Stat545a-2013-hw5-Liu

upload R packages

# install.packages('ggplot2', dependencies = TRUE)
# install.packages('colorspace')
library(ggplot2)
library(plyr)
library(lattice)

download data

gdURL <- "http://www.stat.ubc.ca/~jenny/notOcto/STAT545A/examples/gapminder/data/gapminderDataFiveYear.txt"
gDat <- read.delim(file = gdURL)
str(gDat)
## 'data.frame':    1704 obs. of  6 variables:
##  $ country  : Factor w/ 142 levels "Afghanistan",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ year     : int  1952 1957 1962 1967 1972 1977 1982 1987 1992 1997 ...
##  $ pop      : num  8425333 9240934 10267083 11537966 13079460 ...
##  $ continent: Factor w/ 5 levels "Africa","Americas",..: 3 3 3 3 3 3 3 3 3 3 ...
##  $ lifeExp  : num  28.8 30.3 32 34 36.1 ...
##  $ gdpPercap: num  779 821 853 836 740 ...
setwd("C:/Users/Yan/Dropbox/Important_File/2013-2014Courses/545_R/exercise/")

one quantitative and one categorical variable

Include at least one stripplot-type of figure: one quantitative and one categorical variable. Bring in some extra information, for example by superposing groups, facetting (aka multi-panel conditioning), and/or overlaying, e.g. summary statistics.

# Stripplot for lifeExp
ggplot(gDat, aes(x = year, y = lifeExp, color = continent)) + geom_point()

plot of chunk unnamed-chunk-3

ggplot(gDat, aes(x = year, y = lifeExp, color = continent)) + geom_smooth(method = "lm")

plot of chunk unnamed-chunk-3


# stripplot for each connent with median of LifeExp in red dots
stat_sum_single <- function(fun, geom = "point", ...) {
    stat_summary(fun.y = fun, colour = "red", geom = geom, size = 3, ...)
}

ggplot(gDat, aes(x = year, y = lifeExp)) + geom_point() + geom_smooth() + facet_wrap(~continent) + 
    stat_sum_single(median)
## geom_smooth: method="auto" and size of largest group is <1000, so using
## loess. Use 'method = x' to change the smoothing method. geom_smooth:
## method="auto" and size of largest group is <1000, so using loess. Use
## 'method = x' to change the smoothing method. geom_smooth: method="auto"
## and size of largest group is <1000, so using loess. Use 'method = x' to
## change the smoothing method. geom_smooth: method="auto" and size of
## largest group is <1000, so using loess. Use 'method = x' to change the
## smoothing method. geom_smooth: method="auto" and size of largest group is
## <1000, so using loess. Use 'method = x' to change the smoothing method.

plot of chunk unnamed-chunk-3


# stripplot for lifeExp with facet
ggplot(gDat, aes(x = year, y = lifeExp)) + geom_point() + geom_smooth(method = "lm") + 
    facet_wrap(~continent)

plot of chunk unnamed-chunk-3


# stripplot for each connent with median of LifeExp in red dots
stat_sum_single <- function(fun, geom = "point", ...) {
    stat_summary(fun.y = fun, colour = "red", geom = geom, size = 3, ...)
}

ggplot(gDat, aes(x = year, y = lifeExp)) + geom_point() + geom_smooth() + facet_wrap(~continent) + 
    stat_sum_single(median)
## geom_smooth: method="auto" and size of largest group is <1000, so using
## loess. Use 'method = x' to change the smoothing method. geom_smooth:
## method="auto" and size of largest group is <1000, so using loess. Use
## 'method = x' to change the smoothing method. geom_smooth: method="auto"
## and size of largest group is <1000, so using loess. Use 'method = x' to
## change the smoothing method. geom_smooth: method="auto" and size of
## largest group is <1000, so using loess. Use 'method = x' to change the
## smoothing method. geom_smooth: method="auto" and size of largest group is
## <1000, so using loess. Use 'method = x' to change the smoothing method.

plot of chunk unnamed-chunk-3

two quantitative variables

Include at least one scatterplot: two quantitative variables. Do extras, e.g. superposing groups, facetting, overlaying with a fitted or external model.

p <- qplot(log10(gdpPercap), lifeExp, data = gDat, colour = factor(year))

qplot(log10(gdpPercap), lifeExp, data = gDat, colour = factor(continent))

plot of chunk unnamed-chunk-4

qplot(log10(gdpPercap), lifeExp, data = gDat, facets = . ~ continent) + geom_smooth(method = "lm")

plot of chunk unnamed-chunk-4


ggplot(gDat, aes(x = gdpPercap, y = lifeExp, color = factor(continent))) + geom_point(alpha = 0.8) + 
    geom_smooth(se = FALSE, color = "black") + scale_x_log10()
## geom_smooth: method="auto" and size of largest group is >=1000, so using
## gam with formula: y ~ s(x, bs = "cs"). Use 'method = x' to change the
## smoothing method.

plot of chunk unnamed-chunk-4


ggplot(gDat, aes(x = gdpPercap, y = lifeExp, color = factor(year))) + geom_point(alpha = 0.8) + 
    geom_smooth(se = FALSE, color = "black") + scale_x_log10() + facet_wrap(~continent)
## geom_smooth: method="auto" and size of largest group is <1000, so using
## loess. Use 'method = x' to change the smoothing method. geom_smooth:
## method="auto" and size of largest group is <1000, so using loess. Use
## 'method = x' to change the smoothing method. geom_smooth: method="auto"
## and size of largest group is <1000, so using loess. Use 'method = x' to
## change the smoothing method. geom_smooth: method="auto" and size of
## largest group is <1000, so using loess. Use 'method = x' to change the
## smoothing method. geom_smooth: method="auto" and size of largest group is
## <1000, so using loess. Use 'method = x' to change the smoothing method.

plot of chunk unnamed-chunk-4


# scatterplot for lifeExp and gdpPer with facet
ggplot(gDat, aes(x = year, y = lifeExp)) + geom_point() + geom_smooth(method = "lm") + 
    facet_wrap(~continent)

plot of chunk unnamed-chunk-4

ggplot(gDat, aes(x = year, y = lifeExp)) + geom_point() + geom_smooth() + facet_wrap(~continent)
## geom_smooth: method="auto" and size of largest group is <1000, so using
## loess. Use 'method = x' to change the smoothing method. geom_smooth:
## method="auto" and size of largest group is <1000, so using loess. Use
## 'method = x' to change the smoothing method. geom_smooth: method="auto"
## and size of largest group is <1000, so using loess. Use 'method = x' to
## change the smoothing method. geom_smooth: method="auto" and size of
## largest group is <1000, so using loess. Use 'method = x' to change the
## smoothing method. geom_smooth: method="auto" and size of largest group is
## <1000, so using loess. Use 'method = x' to change the smoothing method.

plot of chunk unnamed-chunk-4

Plot regression intercept and slope for lifeExp~gdpPercap

# plot regression intercept and slope for each continent
p + geom_smooth(aes(group = continent), method = "lm", fullrange = TRUE)

plot of chunk unnamed-chunk-5


p + geom_abline(intercept = 40, slope = 4) + coord_flip()

plot of chunk unnamed-chunk-5

p + geom_abline(intercept = 40, slope = 3) + coord_polar()

plot of chunk unnamed-chunk-5

Compare density plot between lattice and ggplot:

# ggplot pdf of lifeExp
m <- ggplot(gDat, aes(x = lifeExp))
m + geom_density(aes(fill = factor(continent)), size = 2) + facet_wrap(~continent)

plot of chunk unnamed-chunk-6

m + geom_density(aes(fill = factor(continent)), size = 2)

plot of chunk unnamed-chunk-6

m1 <- ggplot(gDat, aes(x = lifeExp, color = factor(continent)))
m1 + geom_density(adjust = 5)

plot of chunk unnamed-chunk-6


# lattice
densityplot(~lifeExp | continent, gDat, plot.points = FALSE, ref = TRUE)

plot of chunk unnamed-chunk-6

densityplot(~lifeExp, gDat, groups = reorder(continent, lifeExp), auto.key = TRUE, 
    plot.points = FALSE, ref = TRUE)

plot of chunk unnamed-chunk-6

options(rpubs.upload.method = "internal")