data <- read.delim("DOT - Cleaned Data.txt", header=TRUE, stringsAsFactors=FALSE, skip = 1)
nms <- names(data)
source("Cleanup_functions.R")
library(ggplot2)
library(reshape2)
library(stringr)
ordering levels of a factor variable:
tt <- data.frame(nums = 1:4, letts = factor(letters[1:4]))
levels(tt$letts)
## [1] "a" "b" "c" "d"
ordered(tt$letts)
## [1] a b c d
## Levels: a < b < c < d
tt$bb <- factor(tt$letts, levels = c("a", "c", "d", "b"), labels = c("a", "c", "d", "b"), ordered = T)
ordered(tt$bb)
## [1] a b c d
## Levels: a < c < d < b
tt$upper <- factor(LETTERS[5:8])
ordered(tt$upper)
## [1] E F G H
## Levels: E < F < G < H
is.ordered(tt$upper)
## [1] FALSE
tt$upper <- as.ordered(tt$upper)
is.ordered(tt$upper)
## [1] TRUE
levels(tt$upper)
## [1] "E" "F" "G" "H"
#new order
tt$upper <- factor(tt$upper, levels = LETTERS[8:5], labels = LETTERS[8:5])#this was successful because tt$upper was an ordered factor variable before
is.ordered(tt$upper)
## [1] TRUE
levels(tt$upper)
## [1] "H" "G" "F" "E"
"E" < "G"
## [1] TRUE
tt$upper[1]
## [1] E
## Levels: H < G < F < E
tt$upper[1] < "G"
## [1] FALSE
Putting count values on top of bars in a dodged barplot in ggplot2 http://stackoverflow.com/questions/10326729/dont-drop-zero-count-dodged-barplot
ordering.vars <- function(data, ord) {
#puts levels of variables in data (=dataframe) in order
#data = a dataframe of several variables from the same question, that have the same levels.
#ord = a vector of labels for the levels and their order
ret.df <- as.data.frame(matrix(nrow = nrow(data), ncol = ncol(data)))
for (column in 1:ncol(data)) {
ordered.var <- factor(data[, column], levels =ord, labels = ord)
ordered.var <- as.ordered(ordered.var)
ret.df[, column] <- ordered.var
}
return(ret.df)
}
ordEdu <- c("early school leaver",
"primary school grad",
"secondary school grad",
"university grad",
"graduate degree",
"vocational diploma",
"NA education")
source("Cleanup_functions.R")
access.dev.ord <- ordering.vars(data = access.dev.vars, ord = rev(frequency_devices))
prog.dev.desktop <- ifelse(as.ordered(access.dev.ord[,1]) > "Once every few weeks" , 1, 0)
prog.dev.laptop <- ifelse(as.ordered(access.dev.ord[,2]) > "Once every few weeks" , 1, 0)
prog.dev <-ifelse(prog.dev.desktop > 0 |prog.dev.laptop > 0, 1, 0)
# Note:loses 0's(less than or equal "Once every few weeks", they all become NA)!!!
`Have access` <- ifelse(is.na(prog.dev), "Less/ No", "Have access")
ggplot(data = as.data.frame(cbind(data$GenderC, `Have access`)), aes(data$GenderC, fill = `Have access` )) +
geom_bar() +
ggtitle("Access to laptop or desktop computer at least once a week") +
xlab("gender") +
ylab("")
## Warning: Removed 10 rows containing non-finite values (stat_count).
com.dev.tablet <- ifelse(as.ordered(access.dev.ord[,3]) > "Every few days" , 1, 0)
com.dev.smartphone <- ifelse(as.ordered(access.dev.ord[,4]) > "Every few days" , 1, 0)
com.dev <- ifelse(com.dev.tablet > 0 | com.dev.smartphone > 0, 1, 0)
`Have access` <- ifelse(is.na(com.dev), "Less/ No", "Have access")
ggplot(data = as.data.frame(cbind(data$GenderC, `Have access`)), aes(data$GenderC, fill = `Have access` )) +
geom_bar() +
ggtitle("Access to tablet or smartphone at least once a day") +
xlab("gender") +
ylab("")
## Warning: Removed 10 rows containing non-finite values (stat_count).
com.dev.frm.phone <- ifelse(as.ordered(access.dev.ord[,5]) > "Every few days" , 1, 0)
com.dev.bm.phone <- ifelse(as.ordered(access.dev.ord[,6]) > "Every few days" , 1, 0)
com.dev.comb <- ifelse(com.dev.tablet == 1 & com.dev.smartphone == 1, 5, 0)
table(com.dev.comb)
## com.dev.comb
## 0 5
## 430 97
com.dev.spread <- ifelse(com.dev.tablet == 1, 1,
ifelse(com.dev.smartphone == 1, 2,
ifelse(com.dev.frm.phone == 1, 3,
ifelse(com.dev.bm.phone == 1, 4, 0))))
com.dev.spread <- factor(com.dev.spread, levels = c(0,1,2,3,4), labels = c("Less/ No",
"tablet",
"smartphone",
"feature phone",
"basic phone"))
table(com.dev.spread, exclude = NULL)
## com.dev.spread
## Less/ No tablet smartphone feature phone basic phone
## 27 98 274 27 94
## <NA>
## 47
(tbl <- table(data$GenderC, com.dev.spread))
## com.dev.spread
## Less/ No tablet smartphone feature phone basic phone
## female 16 58 133 13 44
## male 9 39 138 14 46
vect <- as.vector(tbl)
mobile.dev = data.frame(type=factor(com.dev.spread), group=factor(data$GenderC))
dat = dcast(mobile.dev, type ~ group, fun.aggregate = length)
## Using group as value column: use value.var to override.
dat.melt = melt(dat, id.vars = "type", measure.vars = c("female", "male"))
dat.melt
## type variable value
## 1 Less/ No female 16
## 2 tablet female 58
## 3 smartphone female 133
## 4 feature phone female 13
## 5 basic phone female 44
## 6 <NA> female 22
## 7 Less/ No male 9
## 8 tablet male 39
## 9 smartphone male 138
## 10 feature phone male 14
## 11 basic phone male 46
## 12 <NA> male 25
ggplot(dat.melt, aes(x = type,y = value, fill = variable)) +
geom_bar(stat = "identity", colour = "black", position = position_dodge(width = .8), width = 0.7) +
geom_text(aes(label = value), position = position_dodge(width = .8), vjust = -0.5)
1. make a barplot as usual
2. make a 2-way table of the variable and fill variable, then from that table make a data.frame
3. use that data.frame within `geom_text` to correctly draw the labels
`Have access` <- ifelse(is.na(prog.dev), "Less/ No", "Have access")
# making table
(tbl <- table(`Have access`, data$GenderC, useNA = "no"))
##
## Have access female male
## Have access 166 162
## Less/ No 120 109
# making data.frame from table
df.tbl <- data.frame(count = append(tbl[1, ], tbl[2,]), gender = factor(colnames(tbl)), access = factor(rownames(tbl)))
df.tbl
## count gender access
## 1 166 female Have access
## 2 162 male Less/ No
## 3 120 female Have access
## 4 109 male Less/ No
ggplot(data = as.data.frame(cbind(data$GenderC, `Have access`)), aes(data$GenderC, fill = `Have access` )) +
geom_bar() +
ggtitle("Access to laptop or desktop computer at least once a week") +
xlab("gender") +
ylab("count") +
geom_text(data = df.tbl, aes(x=as.factor(gender), y = count,label = count, fill = access), position = "stack", vjust = 1.5) #needed to correctly draw labels
## Warning: Removed 10 rows containing non-finite values (stat_count).