Rebecca Johnston
Here I will be exploring some features of lattice, a multivariate data visualization package in R. I will use the gapminder data to provide examples of the graphical output from this package.
library(lattice)
library(plyr)
library(xtable)
gdURL <- "http://www.stat.ubc.ca/~jenny/notOcto/STAT545A/examples/gapminder/data/gapminderDataFiveYear.txt"
gDat <- read.delim(file = gdURL)
str(gDat)
## 'data.frame': 1704 obs. of 6 variables:
## $ country : Factor w/ 142 levels "Afghanistan",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ year : int 1952 1957 1962 1967 1972 1977 1982 1987 1992 1997 ...
## $ pop : num 8425333 9240934 10267083 11537966 13079460 ...
## $ continent: Factor w/ 5 levels "Africa","Americas",..: 3 3 3 3 3 3 3 3 3 3 ...
## $ lifeExp : num 28.8 30.3 32 34 36.1 ...
## $ gdpPercap: num 779 821 853 836 740 ...
table(gDat$continent, gDat$year)
##
## 1952 1957 1962 1967 1972 1977 1982 1987 1992 1997 2002 2007
## Africa 52 52 52 52 52 52 52 52 52 52 52 52
## Americas 25 25 25 25 25 25 25 25 25 25 25 25
## Asia 33 33 33 33 33 33 33 33 33 33 33 33
## Europe 30 30 30 30 30 30 30 30 30 30 30 30
## Oceania 2 2 2 2 2 2 2 2 2 2 2 2
iDat <- droplevels(subset(gDat, continent != "Oceania"))
table(iDat$continent)
##
## Africa Americas Asia Europe
## 624 300 396 360
I have chosen to create graphs to accompany code created by Yumian Hu. Not only did Yumian answer every single question from hw03 (so impressive!), she answered each question with clean code. I even learnt a few tricks from her .rmd file to create links within this very page. Thanks Yumian!
meanLife <- as.data.frame(daply(iDat, ~year + continent, summarize, mean(lifeExp))) # Code adapted from Yumian Hu
print(xtable(meanLife), type = "html", include.rownames = TRUE)
| Africa | Americas | Asia | Europe | |
|---|---|---|---|---|
| 1952 | 39.14 | 53.28 | 46.31 | 64.41 |
| 1957 | 41.27 | 55.96 | 49.32 | 66.70 |
| 1962 | 43.32 | 58.40 | 51.56 | 68.54 |
| 1967 | 45.33 | 60.41 | 54.66 | 69.74 |
| 1972 | 47.45 | 62.39 | 57.32 | 70.78 |
| 1977 | 49.58 | 64.39 | 59.61 | 71.94 |
| 1982 | 51.59 | 66.23 | 62.62 | 72.81 |
| 1987 | 53.34 | 68.09 | 64.85 | 73.64 |
| 1992 | 53.63 | 69.57 | 66.54 | 74.44 |
| 1997 | 53.60 | 71.15 | 68.02 | 75.51 |
| 2002 | 53.33 | 72.42 | 69.23 | 76.70 |
| 2007 | 54.81 | 73.61 | 70.73 | 77.65 |
meanLife <- ddply(iDat, ~continent + year, summarize, avgLifeExp = mean(lifeExp))
head(meanLife) # Too tall to display everything!
## continent year avgLifeExp
## 1 Africa 1952 39.14
## 2 Africa 1957 41.27
## 3 Africa 1962 43.32
## 4 Africa 1967 45.33
## 5 Africa 1972 47.45
## 6 Africa 1977 49.58
xyplot(avgLifeExp ~ year, meanLife, groups = continent, auto.key = TRUE)
xyplot(avgLifeExp ~ year, meanLife, groups = continent, grid = "h", type = c("p",
"r"), auto.key = list(space = "right", reverse.rows = TRUE))
GDPpercap = ddply(iDat, year ~ continent, summarize, minGDP = min(gdpPercap),
maxGDP = max(gdpPercap)) # Code adapted from Yumian Hu
head(GDPpercap) # Too long to show it all
## year continent minGDP maxGDP
## 1 1952 Africa 298.8 4725
## 2 1952 Americas 1397.7 13990
## 3 1952 Asia 331.0 108382
## 4 1952 Europe 973.5 14734
## 5 1957 Africa 336.0 5487
## 6 1957 Americas 1544.4 14847
print(xtable(GDPpercap), type = "html", include.rownames = FALSE)
| year | continent | minGDP | maxGDP |
|---|---|---|---|
| 1952 | Africa | 298.85 | 4725.30 |
| 1952 | Americas | 1397.72 | 13990.48 |
| 1952 | Asia | 331.00 | 108382.35 |
| 1952 | Europe | 973.53 | 14734.23 |
| 1957 | Africa | 336.00 | 5487.10 |
| 1957 | Americas | 1544.40 | 14847.13 |
| 1957 | Asia | 350.00 | 113523.13 |
| 1957 | Europe | 1353.99 | 17909.49 |
| 1962 | Africa | 355.20 | 6757.03 |
| 1962 | Americas | 1662.14 | 16173.15 |
| 1962 | Asia | 388.00 | 95458.11 |
| 1962 | Europe | 1709.68 | 20431.09 |
| 1967 | Africa | 412.98 | 18772.75 |
| 1967 | Americas | 1452.06 | 19530.37 |
| 1967 | Asia | 349.00 | 80894.88 |
| 1967 | Europe | 2172.35 | 22966.14 |
| 1972 | Africa | 464.10 | 21011.50 |
| 1972 | Americas | 1654.46 | 21806.04 |
| 1972 | Asia | 357.00 | 109347.87 |
| 1972 | Europe | 2860.17 | 27195.11 |
| 1977 | Africa | 502.32 | 21951.21 |
| 1977 | Americas | 1874.30 | 24072.63 |
| 1977 | Asia | 371.00 | 59265.48 |
| 1977 | Europe | 3528.48 | 26982.29 |
| 1982 | Africa | 462.21 | 17364.28 |
| 1982 | Americas | 2011.16 | 25009.56 |
| 1982 | Asia | 424.00 | 33693.18 |
| 1982 | Europe | 3630.88 | 28397.72 |
| 1987 | Africa | 389.88 | 11864.41 |
| 1987 | Americas | 1823.02 | 29884.35 |
| 1987 | Asia | 385.00 | 28118.43 |
| 1987 | Europe | 3738.93 | 31540.97 |
| 1992 | Africa | 410.90 | 13522.16 |
| 1992 | Americas | 1456.31 | 32003.93 |
| 1992 | Asia | 347.00 | 34932.92 |
| 1992 | Europe | 2497.44 | 33965.66 |
| 1997 | Africa | 312.19 | 14722.84 |
| 1997 | Americas | 1341.73 | 35767.43 |
| 1997 | Asia | 415.00 | 40300.62 |
| 1997 | Europe | 3193.05 | 41283.16 |
| 2002 | Africa | 241.17 | 12521.71 |
| 2002 | Americas | 1270.36 | 39097.10 |
| 2002 | Asia | 611.00 | 36023.11 |
| 2002 | Europe | 4604.21 | 44683.98 |
| 2007 | Africa | 277.55 | 13206.48 |
| 2007 | Americas | 1201.64 | 42951.65 |
| 2007 | Asia | 944.00 | 47306.99 |
| 2007 | Europe | 5937.03 | 49357.19 |
xyplot(minGDP ~ year, GDPpercap, groups = continent, auto.key = TRUE)
xyplot(maxGDP ~ year, GDPpercap, groups = continent, auto.key = TRUE)
minmax <- function(x) {
stat = c("Min", "Max")
gdpPerCap = c(min(x$gdpPercap), max(x$gdpPercap))
data.frame(stat, gdpPerCap)
}
gdpYear = ddply(iDat, year ~ continent, minmax)
head(gdpYear)
## year continent stat gdpPerCap
## 1 1952 Africa Min 298.8
## 2 1952 Africa Max 4725.3
## 3 1952 Americas Min 1397.7
## 4 1952 Americas Max 13990.5
## 5 1952 Asia Min 331.0
## 6 1952 Asia Max 108382.4
str(gdpYear)
## 'data.frame': 96 obs. of 4 variables:
## $ year : int 1952 1952 1952 1952 1952 1952 1952 1952 1957 1957 ...
## $ continent: Factor w/ 4 levels "Africa","Americas",..: 1 1 2 2 3 3 4 4 1 1 ...
## $ stat : Factor w/ 2 levels "Max","Min": 2 1 2 1 2 1 2 1 2 1 ...
## $ gdpPerCap: num 299 4725 1398 13990 331 ...
xyplot(gdpPerCap ~ year | stat, gdpYear, groups = continent, auto.key = TRUE)
xyplot(gdpPerCap ~ year | stat, gdpYear, groups = continent, type = c("p", "l"),
scales = list(y = list(relation = "free")), auto.key = list(columns = nlevels(gdpYear$continent)))
gdpYear <- within(gdpYear, stat <- reorder(stat, gdpPerCap, FUN = min))
xyplot(gdpPerCap ~ year | stat, gdpYear, groups = continent, type = c("p", "l"),
scales = list(y = list(relation = "free")), auto.key = list(columns = nlevels(gdpYear$continent)))