load("./data/data_workshop.Rda") # load data
str(census) # check data structure
## 'data.frame': 1000 obs. of 7 variables:
## $ gender : Factor w/ 2 levels "F","M": 1 1 1 1 2 1 1 1 2 2 ...
## $ race : Factor w/ 9 levels "Black ","E Asian ",..: 2 7 1 1 7 7 4 2 6 4 ...
## $ program : Factor w/ 3 levels "Academic","Applied",..: 1 1 2 1 1 1 1 2 1 1 ...
## $ progress: Factor w/ 4 levels "Having Difficulty",..: 2 2 1 3 2 3 2 3 2 2 ...
## $ mark : num 90 85 41 67 58 67 88 52 76 90 ...
## $ mark9 : num 94 78 41 53 67 69 90 50 85 90 ...
## $ absence : num 1 2 8 1 3 0 1 6 4 3 ...
library(ggplot2) # load ggplot2 library
qplot(race, data = census)
qplot(race, data = census, fill = I("darkblue"))
qplot(race, data = census, fill = progress)
qplot(race, data = census, fill = progress) + scale_fill_grey()
qplot(race, data = census, fill = progress, position = "dodge")
qplot(race, data = census, fill = progress, position = "fill", ylab = "percentage")
qplot(race, data = census, fill = progress, position = "fill", ylab = "percentage") +
coord_flip()
qplot(mark, data = census)
qplot(mark, data = census, facets = progress ~ .)
qplot(mark, data = census, facets = progress ~ gender)
qplot(progress, mark, data = census) # results not good
qplot(progress, mark, data = census, geom = "jitter")
qplot(progress, mark, data = census, geom = "jitter", alpha = I(1/3))
qplot(progress, mark, data = census, geom = "boxplot")
qplot(progress, mark, data = census, geom = c("boxplot", "jitter"), alpha = I(1/5))
qplot(mark, data = census, fill = progress, geom = "density")
qplot(mark, data = census, fill = progress, geom = "density", alpha = I(1/2))
qplot(mark, data = census, fill = progress, geom = "density", position = "stack")
qplot(mark, data = census, fill = progress, geom = "density", position = "fill")
qplot(mark9, mark, data = census)
qplot(mark9, mark, data = census, shape = I(1))
qplot(mark9, mark, data = census, shape = I(1), colour = program)
qplot(mark9, mark, data = census, shape = I(1), colour = program, size = absence)
qplot(mark9, mark, data = census, geom = c("point", "smooth"), method = "lm")
qplot(mark9, mark, data = census, geom = c("point", "smooth"), method = "lm",
facets = . ~ program)
# Filter data: filter students who got 0
census.sub <- subset(census, mark > 0 & mark9 > 0)
qplot(mark9, mark, data = census.sub, geom = c("point", "smooth"), method = "lm")
library(plyr) # load library plyr
hm.df <- ddply(census, .(race, program), summarize, absence = mean(absence))
ggplot(hm.df, aes(race, program, fill = absence)) + geom_tile() + scale_fill_gradient2(high = "red",
low = "white") # plot a heatmap
library(maps) # load maps library
crime.map <- read.csv("./data/crime.map.csv") # read data
ggplot(crime.map, aes(x = long, y = lat, group = group, fill = Murder)) + geom_polygon(colour = "black")
ggsave(file = "plot.pdf")
ggsave(file = "plot.jpeg", dpi = 72)
ggsave(file = "plot.svg", plot = htmap, width = 10, height = 5)