stat545a-2013-hw04
Import data
## data import from URL
gdURL <- "http://www.stat.ubc.ca/~jenny/notOcto/STAT545A/examples/gapminder/data/gapminderDataFiveYear.txt"
gDat <- read.delim(file = gdURL)
library(plyr)
library(xtable)
library(lattice)
Drop Oceania right from the start.
gDat <- subset(gDat, continent %in% c("Africa", "Americas", "Asia", "Europe"))
gDat$continent <- factor(gDat$continent) ## drop factor
summary(gDat$continent)
## Africa Americas Asia Europe
## 624 300 396 360
View life expectancy changing over time on each continents, in a width format.
(LeByYear <- daply(gDat, ~continent + year, summarize, mlifeExp = mean(lifeExp)))
## year
## continent 1952 1957 1962 1967 1972 1977 1982 1987 1992 1997
## Africa 39.14 41.27 43.32 45.33 47.45 49.58 51.59 53.34 53.63 53.6
## Americas 53.28 55.96 58.4 60.41 62.39 64.39 66.23 68.09 69.57 71.15
## Asia 46.31 49.32 51.56 54.66 57.32 59.61 62.62 64.85 66.54 68.02
## Europe 64.41 66.7 68.54 69.74 70.78 71.94 72.81 73.64 74.44 75.51
## year
## continent 2002 2007
## Africa 53.33 54.81
## Americas 72.42 73.61
## Asia 69.23 70.73
## Europe 76.7 77.65
## LeByYear <- xtable(LeByYear) print(LeByYear, type = 'html',
## include.rownames = FALSE)
Get the maximum and minimum of GDP per capita for all continents in a “tall” format
rgGDPByContinent <- ddply(gDat, ~continent + year, function(x) {
gdpPercap <- range(x$gdpPercap)
return(data.frame(gdpPercap, stat = c("min", "max")))
})
rgGDPByContinent
## continent year gdpPercap stat
## 1 Africa 1952 298.8 min
## 2 Africa 1952 4725.3 max
## 3 Africa 1957 336.0 min
## 4 Africa 1957 5487.1 max
## 5 Africa 1962 355.2 min
## 6 Africa 1962 6757.0 max
## 7 Africa 1967 413.0 min
## 8 Africa 1967 18772.8 max
## 9 Africa 1972 464.1 min
## 10 Africa 1972 21011.5 max
## 11 Africa 1977 502.3 min
## 12 Africa 1977 21951.2 max
## 13 Africa 1982 462.2 min
## 14 Africa 1982 17364.3 max
## 15 Africa 1987 389.9 min
## 16 Africa 1987 11864.4 max
## 17 Africa 1992 410.9 min
## 18 Africa 1992 13522.2 max
## 19 Africa 1997 312.2 min
## 20 Africa 1997 14722.8 max
## 21 Africa 2002 241.2 min
## 22 Africa 2002 12521.7 max
## 23 Africa 2007 277.6 min
## 24 Africa 2007 13206.5 max
## 25 Americas 1952 1397.7 min
## 26 Americas 1952 13990.5 max
## 27 Americas 1957 1544.4 min
## 28 Americas 1957 14847.1 max
## 29 Americas 1962 1662.1 min
## 30 Americas 1962 16173.1 max
## 31 Americas 1967 1452.1 min
## 32 Americas 1967 19530.4 max
## 33 Americas 1972 1654.5 min
## 34 Americas 1972 21806.0 max
## 35 Americas 1977 1874.3 min
## 36 Americas 1977 24072.6 max
## 37 Americas 1982 2011.2 min
## 38 Americas 1982 25009.6 max
## 39 Americas 1987 1823.0 min
## 40 Americas 1987 29884.4 max
## 41 Americas 1992 1456.3 min
## 42 Americas 1992 32003.9 max
## 43 Americas 1997 1341.7 min
## 44 Americas 1997 35767.4 max
## 45 Americas 2002 1270.4 min
## 46 Americas 2002 39097.1 max
## 47 Americas 2007 1201.6 min
## 48 Americas 2007 42951.7 max
## 49 Asia 1952 331.0 min
## 50 Asia 1952 108382.4 max
## 51 Asia 1957 350.0 min
## 52 Asia 1957 113523.1 max
## 53 Asia 1962 388.0 min
## 54 Asia 1962 95458.1 max
## 55 Asia 1967 349.0 min
## 56 Asia 1967 80894.9 max
## 57 Asia 1972 357.0 min
## 58 Asia 1972 109347.9 max
## 59 Asia 1977 371.0 min
## 60 Asia 1977 59265.5 max
## 61 Asia 1982 424.0 min
## 62 Asia 1982 33693.2 max
## 63 Asia 1987 385.0 min
## 64 Asia 1987 28118.4 max
## 65 Asia 1992 347.0 min
## 66 Asia 1992 34932.9 max
## 67 Asia 1997 415.0 min
## 68 Asia 1997 40300.6 max
## 69 Asia 2002 611.0 min
## 70 Asia 2002 36023.1 max
## 71 Asia 2007 944.0 min
## 72 Asia 2007 47307.0 max
## 73 Europe 1952 973.5 min
## 74 Europe 1952 14734.2 max
## 75 Europe 1957 1354.0 min
## 76 Europe 1957 17909.5 max
## 77 Europe 1962 1709.7 min
## 78 Europe 1962 20431.1 max
## 79 Europe 1967 2172.4 min
## 80 Europe 1967 22966.1 max
## 81 Europe 1972 2860.2 min
## 82 Europe 1972 27195.1 max
## 83 Europe 1977 3528.5 min
## 84 Europe 1977 26982.3 max
## 85 Europe 1982 3630.9 min
## 86 Europe 1982 28397.7 max
## 87 Europe 1987 3738.9 min
## 88 Europe 1987 31541.0 max
## 89 Europe 1992 2497.4 min
## 90 Europe 1992 33965.7 max
## 91 Europe 1997 3193.1 min
## 92 Europe 1997 41283.2 max
## 93 Europe 2002 4604.2 min
## 94 Europe 2002 44684.0 max
## 95 Europe 2007 5937.0 min
## 96 Europe 2007 49357.2 max
Examine “typical” life expectancy (namely, trimeed mean life expectancy, trime level = 0.01 ) for different years.
tmLeByYear <- ddply(gDat, ~year, summarize, meanLifeExp = mean(lifeExp, trim = 0.01))
tmLeByYear
## year meanLifeExp
## 1 1952 48.74
## 2 1957 51.23
## 3 1962 53.37
## 4 1967 55.47
## 5 1972 57.48
## 6 1977 59.46
## 7 1982 61.40
## 8 1987 63.09
## 9 1992 64.16
## 10 1997 64.92
## 11 2002 65.57
## 12 2007 66.89
xyplot(year ~ meanLifeExp, tmLeByYear, jitter.data = TRUE, grid = "h", type = c("p",
"l"))
Inspecting how life expectancy change over time on each continent.
xyplot(year ~ lifeExp, gDat, groups = continent, jitter.data = TRUE, auto.key = TRUE,
grid = "h", type = c("p", "r"))
As show in the figure, Asia is the contient with the fastest growing speed in life expectency during 1950-2007, follwed by Americas. Africa has the most slowly increase in life expectancy during the past 20 years (about).Europ has kept a high life expectancy level and grown slowly since 1950.
Depict the maximum and minimum of GDP per capita for all continents.
xyplot(gdpPercap ~ year | continent, rgGDPByContinent, jitter.data = TRUE, auto.key = TRUE,
grid = "h")
Spread of GDP per capita within the continents in 2007.
tpGdpPercap <- subset(gDat, year == 2007)
bwplot(gdpPercap ~ reorder(continent, gdpPercap), tpGdpPercap, panel = function(...,
box.ratio) {
panel.violin(..., col = "transparent", border = "grey60", varwidth = FALSE,
box.ratio = box.ratio)
panel.bwplot(..., fill = NULL, box.ratio = 0.1)
})
Depict the number and/or proportion of countries with low life expectancy over time by continent.
bMark = 48.2
proLoLifeExp <- ddply(gDat, ~continent + year, function(x) {
jCount = sum(x$lifeExp <= bMark)
c(count = jCount, prop = jCount/nrow(x))
})
xyplot(count ~ year | continent, proLoLifeExp, jitter.data = TRUE, auto.key = TRUE,
grid = "h")
xyplot(prop ~ year | continent, proLoLifeExp, jitter.data = TRUE, auto.key = TRUE,
grid = "h")
## proLoLifeExp
Find countries with extremely low or high life expectancy in 1952.
tDat <- subset(gDat, year == 1952)
stripplot(country ~ lifeExp, tDat, jitter.data = TRUE, auto.key = TRUE, grid = "h")
how to make this figure easier to read
Find countries with sudden, substantial departures from the temporal trend in one of the quantitative measures
xyplot(lifeExp ~ year, gDat, jitter.data = TRUE, auto.key = TRUE, type = c("p",
"g", "r"))
## how to adding label to individual points?