ggplot2graphics.R

etienne — Jun 6, 2013, 11:03 PM

# ggplot2 graphics tutorial
# Etienne Laliberte
# June 4, 2013


# the diamonds dataset
library(ggplot2)
data(diamonds)
head(diamonds)
  carat       cut color clarity depth table price    x    y    z
1  0.23     Ideal     E     SI2  61.5    55   326 3.95 3.98 2.43
2  0.21   Premium     E     SI1  59.8    61   326 3.89 3.84 2.31
3  0.23      Good     E     VS1  56.9    65   327 4.05 4.07 2.31
4  0.29   Premium     I     VS2  62.4    58   334 4.20 4.23 2.63
5  0.31      Good     J     SI2  63.3    58   335 4.34 4.35 2.75
6  0.24 Very Good     J    VVS2  62.8    57   336 3.94 3.96 2.48
diam <- subset(diamonds, cut %in% c("Fair", "Good"))
head(diam[, names(diam) %in% c("carat", "cut", "color", "price")] )
   carat  cut color price
3   0.23 Good     E   327
5   0.31 Good     J   335
9   0.22 Fair     E   337
11  0.30 Good     J   339
18  0.30 Good     J   351
19  0.30 Good     J   351


# when base graphics get frustrating
par( mfrow = c(1, 2))
cols <- rep(NA, nrow(diam) )
rbow <- rainbow(7)
cols[diam$color == "D"] <- rbow[1]
cols[diam$color == "E"] <- rbow[2]
cols[diam$color == "F"] <- rbow[3]
cols[diam$color == "G"] <- rbow[4]
cols[diam$color == "H"] <- rbow[5]
cols[diam$color == "I"] <- rbow[6]
cols[diam$color == "J"] <- rbow[7]
diam$cols <- cols
plot(diam$price[diam$cut == "Fair"] ~ diam$carat[diam$cut == "Fair"],
     main = "Fair", xlab = "carat", ylab = "price", pch = 19,
     col = diam$cols[diam$cut == "Fair"])
legend("bottomright", legend = levels(diam$color), pch = rep(19, 7), col = rbow)
plot(diam$price[diam$cut == "Good"] ~ diam$carat[diam$cut == "Good"],
     main = "Good", xlab = "carat", ylab = "price", pch = 19,
     col = diam$cols[diam$cut == "Good"])

plot of chunk unnamed-chunk-1



# multi-panel conditioning using ggplot2
# only one line of code!
qplot(x = carat, y = price, colour = color, facets = . ~ cut, data = diam)

plot of chunk unnamed-chunk-1



# getting started with qplot (quick plot)
set.seed(1410)
dsmall <- diamonds[sample(nrow(diamonds), 100), ]
qplot(x = carat, y = price, data = dsmall)

plot of chunk unnamed-chunk-1

qplot(x = log(carat), y = log(price), data = dsmall)

plot of chunk unnamed-chunk-1

qplot(x = carat, y = price, data = dsmall, log = "xy")

plot of chunk unnamed-chunk-1

# this does the same as above
qplot(x = carat, y = price, data = dsmall) + scale_y_log10() +
  scale_x_log10()

plot of chunk unnamed-chunk-1



# colour, size, shape
qplot(carat, price, data = dsmall, colour = color)

plot of chunk unnamed-chunk-1

qplot(x = carat, y = price, data = dsmall, size = x)

plot of chunk unnamed-chunk-1

qplot(carat, price, data = dsmall, shape = cut)

plot of chunk unnamed-chunk-1



# setting vs mapping
qplot(carat, price, data = dsmall, colour = I("blue") )

plot of chunk unnamed-chunk-1

qplot(carat, price, data = dsmall, colour = "blue")

plot of chunk unnamed-chunk-1



# dealing with overplotting
qplot(carat, price, data = diamonds)

plot of chunk unnamed-chunk-1

qplot(carat, price, data = diamonds, alpha = I(1/100))

plot of chunk unnamed-chunk-1



# other 2D geoms
qplot(carat, price, data = dsmall, geom = c("point", "smooth"))
geom_smooth: method="auto" and size of largest group is <1000, so using
loess. Use 'method = x' to change the smoothing method.

plot of chunk unnamed-chunk-1

qplot(carat, price, data = dsmall, geom = "jitter",
      position = position_jitter(width = 1, height = 1))

plot of chunk unnamed-chunk-1

qplot(carat, price, data = dsmall, geom = "polygon")

plot of chunk unnamed-chunk-1

qplot(carat, price, data = dsmall, geom = "line")

plot of chunk unnamed-chunk-1

qplot(carat, price, data = dsmall, geom = "path")

plot of chunk unnamed-chunk-1



# smoothers
qplot(carat, price, data = dsmall, geom = c("point", "smooth"))
geom_smooth: method="auto" and size of largest group is <1000, so using
loess. Use 'method = x' to change the smoothing method.

plot of chunk unnamed-chunk-1

qplot(carat, price, data = dsmall, geom = c("point", "smooth"), se = F)
geom_smooth: method="auto" and size of largest group is <1000, so using
loess. Use 'method = x' to change the smoothing method.

plot of chunk unnamed-chunk-1

qplot(carat, price, data = dsmall, geom = c("point", "smooth"), span = 0.2)
geom_smooth: method="auto" and size of largest group is <1000, so using
loess. Use 'method = x' to change the smoothing method.

plot of chunk unnamed-chunk-1

qplot(carat, price, data = dsmall, geom = c("point", "smooth"), method = "lm")

plot of chunk unnamed-chunk-1



# boxplots and jittered points
qplot(color, price, data = dsmall, geom = "boxplot")

plot of chunk unnamed-chunk-1

qplot(color, price, data = diamonds, geom = "boxplot")

plot of chunk unnamed-chunk-1

qplot(color, price, data = diamonds, geom = "jitter", alpha = I(1/10))

plot of chunk unnamed-chunk-1



# histograms and density plots
qplot(carat, data = diamonds, geom = "histogram") # play with binwidth (1, 0.1, 0.01)
stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust
this.

plot of chunk unnamed-chunk-1

qplot(carat, data = diamonds, geom = "density") # play with adjust (0.5, 1, 3)

plot of chunk unnamed-chunk-1



# splitting by groups
qplot(carat, data = diamonds, geom = "histogram", fill = color)
stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust
this.

plot of chunk unnamed-chunk-1

qplot(carat, data = diamonds, geom = "density", colour = color)

plot of chunk unnamed-chunk-1



# barplots
qplot(color, data = dsmall, geom = "bar")

plot of chunk unnamed-chunk-1

qplot(color, data = dsmall, geom = "bar", weight = carat, ylab = "sum carat")

plot of chunk unnamed-chunk-1



# barplots: stacking, filling, dodging
qplot(color, data = dsmall, geom = "bar", fill = cut, position = "stack")

plot of chunk unnamed-chunk-1

qplot(color, data = dsmall, geom = "bar", fill = cut, position = "fill")

plot of chunk unnamed-chunk-1

qplot(color, data = dsmall, geom = "bar", fill = cut, position = "dodge")

plot of chunk unnamed-chunk-1



# time series
economics[1:4,]# US economic data over 40 years
        date   pce    pop psavert uempmed unemploy
1 1967-06-30 507.8 198712     9.8     4.5     2944
2 1967-07-31 510.9 198911     9.8     4.7     2945
3 1967-08-31 516.7 199113     9.0     4.6     2958
4 1967-09-30 513.3 199311     9.8     4.9     3143
(p <- qplot(date, unemploy, data = economics, geom = "line", ylab = "unemployed (1000s)") )

plot of chunk unnamed-chunk-1

qplot(date, uempmed, data = economics, geom = "line", ylab = "unemployement (weeks)")

plot of chunk unnamed-chunk-1



# times series and groups
library(nlme)
data(Oxboys) ; Oxboys[1:3,]
Grouped Data: height ~ age | Subject
  Subject     age height Occasion
1       1 -1.0000  140.5        1
2       1 -0.7479  143.4        2
3       1 -0.4630  144.8        3
qplot(age, height, data = Oxboys, geom = "line")

plot of chunk unnamed-chunk-1

qplot(age, height, data = Oxboys, group = Subject, geom = "line")

plot of chunk unnamed-chunk-1



# facetting
qplot(carat, price, data = diamonds, facets = . ~ cut)

plot of chunk unnamed-chunk-1



# facet_grid vs facet_wrap
qplot(carat, price, data = dsmall) + facet_grid(. ~ cut)

plot of chunk unnamed-chunk-1

qplot(carat, price, data = dsmall) + facet_wrap(~ cut, nrow = 2)

plot of chunk unnamed-chunk-1



# facets and scales
qplot(carat, price, data = dsmall) + facet_wrap(~ cut, nrow = 1)

plot of chunk unnamed-chunk-1

qplot(carat, price, data = dsmall) + facet_wrap( ~ cut, nrow = 1, scales = "free")

plot of chunk unnamed-chunk-1



# qplot vs ggplot
qplot(x = carat, y = price, data = dsmall)

plot of chunk unnamed-chunk-1

ggplot(data = dsmall, mapping = aes(x = carat, y = price)) +
      layer(geom = "point")

plot of chunk unnamed-chunk-1

ggplot(dsmall, aes(carat, price)) +
      geom_point()

plot of chunk unnamed-chunk-1



# avoiding duplication
bestfit <- geom_smooth(method = "lm", se = F, colour = "orange", size = 2)
qplot(carat, price, data = dsmall) + bestfit

plot of chunk unnamed-chunk-1

qplot(depth, price, data = dsmall) + bestfit

plot of chunk unnamed-chunk-1



# mapping: global vs local
qplot(carat, price, colour = cut, data = dsmall) + geom_smooth(method = 'lm', se = F)

plot of chunk unnamed-chunk-1

qplot(carat, price, geom = 'smooth', method = 'lm', se = F, data = dsmall) + geom_point(aes(colour = cut))

plot of chunk unnamed-chunk-1

# FYI, the following two lines do the same in 'full ggplot' grammar
ggplot(dsmall, aes(carat, price, colour = cut)) + geom_point() + geom_smooth(method = "lm", se = F)

plot of chunk unnamed-chunk-1

ggplot(dsmall, aes(carat, price)) + geom_point(aes(colour = cut)) + geom_smooth(method = "lm", se = F)

plot of chunk unnamed-chunk-1



# mapping vs setting
qplot(carat, price, data = dsmall) + geom_point(aes(colour = cut))

plot of chunk unnamed-chunk-1

qplot(carat, price, data = dsmall) + geom_point(colour = "red")

plot of chunk unnamed-chunk-1



# changing labels
dsmall$microg <- dsmall$carat * 200 * 1000
qplot(microg, price, colour = cut, data = dsmall) +
  xlab(expression(Weight~(mu*g)) ) +
  ylab("Price ($US)") +
  scale_colour_discrete(name = "What is\nthe cut?")

plot of chunk unnamed-chunk-1



# changing scales (e.g. colour)
p <- qplot(carat, price, colour = cut, data = dsmall) + geom_point()
p + scale_colour_brewer(palette = "Set1")

plot of chunk unnamed-chunk-1

p + scale_colour_manual(values = c("red","blue", "green", "orange", "purple"))

plot of chunk unnamed-chunk-1

p + scale_colour_grey(start = 0, end = 0.9) + theme_bw()

plot of chunk unnamed-chunk-1



# flipping axes
qplot(color, price, data = dsmall, geom = "boxplot") + coord_flip()

plot of chunk unnamed-chunk-1



# themes
qplot(carat, price, data = dsmall) + theme_grey()

plot of chunk unnamed-chunk-1

qplot(carat, price, data = dsmall) + theme_bw()

plot of chunk unnamed-chunk-1



# arranging multiple plots
library(gridExtra)
Loading required package: grid
p1 <- qplot(carat, price, data = dsmall)
p2 <- qplot(color, price, data = dsmall, geom = "boxplot")
grid.arrange(p1, p2, ncol = 2)

plot of chunk unnamed-chunk-1



# barplot with error bars
# means and standard errors
se <- function(x) sd(x) / sqrt(length(x) )
se.min <- function(x) mean(x) - se(x)
se.max <- function(x) mean(x) + se(x)

#plot
(p <- ggplot(dsmall, aes(color, carat) ) +
        stat_summary(fun.y = mean, geom = "bar", fill = "orange") +
        stat_summary(fun.ymin = se.min, fun.ymax = se.max,
                     geom = "errorbar", width = 0.5) +
       coord_cartesian(ylim = c(0, 1.8)) )

plot of chunk unnamed-chunk-1



# barplots and error bars
# create a smaller dataset
library(plyr)
dsmall2 <- ddply(dsmall, .(color), numcolwise(se.max))[, 1:2]
dsmall2$lett <- c(rep("a", 3), rep("b", 2), rep("c", 2))
dsmall2
  color  carat lett
1     D 0.7365    a
2     E 0.6694    a
3     F 0.9159    a
4     G 1.0116    b
5     H 1.1941    b
6     I 1.5020    c
7     J 1.5341    c

# plot the barplot with the letters
p + geom_text(aes(label = lett, vjust = -0.5), data = dsmall2)

plot of chunk unnamed-chunk-1



# boxplots with letters
letts <- dsmall2$lett
lettpos <- boxplot(carat ~ color, data = dsmall, plot = FALSE)$stats[5, ]
lettdf <- data.frame(color = levels(dsmall$color), carat = lettpos, lett = letts)
qplot(color, carat, data = dsmall, geom = "boxplot") +
  geom_text(aes(label = lett, vjust = -0.5), data = lettdf) +
  ylim(c(0, 2.75))

plot of chunk unnamed-chunk-1



# map of australia
load("ozdata.rda") # check the data
ozplot <- qplot(long, lat, data = ozdata, geom = "polygon", fill = state) + coord_equal()
ozplot

plot of chunk unnamed-chunk-1


# fraud cases in australia
ozcrime <- read.table("ozcrime.txt", header = TRUE)
ggplot(data = ozdata, aes(x = long, y = lat)) +
  geom_polygon(aes(fill = state))  + 
    geom_point(data = ozcrime, aes(x = long, y = lat, size = fraud), colour = "red") +
    geom_text(data = ozcrime, aes(x = long, y = lat, label  = city, vjust = 1.5))+
    scale_size(range = c(3, 9)) +
    coord_fixed()

plot of chunk unnamed-chunk-1



# zooming in on SWA
ozplot + 
  coord_fixed(xlim = c(114, 118),
              ylim = c(-31, -35.5) )  +
  theme(legend.position = 'none')

plot of chunk unnamed-chunk-1




# saving output
p <- qplot(carat, price, data = dsmall)
ggsave("myplot.png", height = 3, plot = p, width = 4, dpi = 300)
ggsave("myplot.pdf", height = 3, plot = p, width = 4)