Bodong Chen, University of Toronto
KNAER Data Visualization Workshop
April 3, 2013
load("./data/data_workshop.Rda") # load data
str(census) # check data structure
'data.frame': 1000 obs. of 7 variables:
$ gender : Factor w/ 2 levels "F","M": 1 1 1 1 2 1 1 1 2 2 ...
$ race : Factor w/ 9 levels "Black ","E Asian ",..: 2 7 1 1 7 7 4 2 6 4 ...
$ program : Factor w/ 3 levels "Academic","Applied",..: 1 1 2 1 1 1 1 2 1 1 ...
$ progress: Factor w/ 4 levels "Having Difficulty",..: 2 2 1 3 2 3 2 3 2 2 ...
$ mark : num 90 85 41 67 58 67 88 52 76 90 ...
$ mark9 : num 94 78 41 53 67 69 90 50 85 90 ...
$ absence : num 1 2 8 1 3 0 1 6 4 3 ...
library(ggplot2) # load ggplot2 library
qplot(race, data=census)
qplot(race, data=census, fill=I("darkblue"))
qplot(race, data=census, fill=progress)
qplot(race, data=census, fill=progress) + scale_fill_grey()
qplot(race, data=census, fill=progress, position="dodge")
qplot(race, data=census, fill=progress, position="fill", ylab="percentage")
qplot(race, data=census, fill=progress, position="fill", ylab="percentage") + coord_flip()
qplot(mark, data=census)
qplot(mark, data = census, facets = progress ~ .)
qplot(mark, data = census, facets = progress ~ gender)
qplot(progress, mark, data=census) # results not good
qplot(progress, mark, data=census, geom="jitter")
qplot(progress, mark, data=census, geom="jitter", alpha=I(1/3))
qplot(progress, mark, data=census, geom="boxplot")
qplot(progress, mark, data=census, geom=c("boxplot", "jitter"), alpha=I(1/5))
qplot(mark, data=census, fill=progress, geom="density")
ggplot(census, aes(x=factor(mark))) +
geom_bar(aes(y=..ndensity.., group=progress, fill=progress)) +
xlab("Mark") +
ylab ("Frequency of Students by Progress") +
ggtitle("Distribution of Marks by Progress")
qplot(mark, data=census, fill=progress, geom="density", alpha=I(1/2))
qplot(mark, data=census, fill=progress, geom="density", position="stack")
qplot(mark, data=census, fill=progress, geom="density", position="fill")
qplot(mark9, mark, data=census)
qplot(mark9, mark, data=census, shape=I(1))
qplot(mark9, mark, data=census, shape=I(1), colour=program)
qplot(mark9, mark, data=census, shape=I(1), colour=program, size=absence)
qplot(mark9, mark, data=census, geom=c("point", "smooth"), method="lm")
qplot(mark9, mark, data=census, geom=c("point", "smooth"), method="lm",
facets= . ~ program)
# Filter data: filter students who got 0
census.sub <- subset(census, mark > 0 & mark9 > 0)
qplot(mark9, mark, data=census.sub, geom=c("point", "smooth"), method="lm")
library(plyr) # load library plyr
hm.df <- ddply(census, .(race, program), summarize, absence=mean(absence))
ggplot(hm.df, aes(race, program, fill=absence)) + geom_tile() +
scale_fill_gradient2(high="red", low="white") # plot a heatmap
library(maps) # load maps library
crime.map <- read.csv("./data/crime.map.csv") # read data
ggplot(crime.map, aes(x=long, y=lat, group=group, fill=Murder)) +
geom_polygon(colour="black")
ggsave(file="plot.pdf")
ggsave(file="plot.jpeg", dpi=72)
ggsave(file="plot.svg", plot=htmap, width=10, height=5)