STAT545a-2013-hw4_Liu
Read data into R
getwd()
## [1] "C:/Users/Yan/Dropbox/Important_File/2013-2014Courses/545_R/exercise"
setwd("C:/Users/Yan/Dropbox/Important_File/2013-2014Courses/545_R/exercise")
library(plyr)
library(lattice)
library(xtable)
library(knitr)
# install.packages('locfit')
gDat <- read.delim("gapminderDataFiveYear.txt")
str(gDat)
## 'data.frame': 1704 obs. of 6 variables:
## $ country : Factor w/ 142 levels "Afghanistan",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ year : int 1952 1957 1962 1967 1972 1977 1982 1987 1992 1997 ...
## $ pop : num 8425333 9240934 10267083 11537966 13079460 ...
## $ continent: Factor w/ 5 levels "Africa","Americas",..: 3 3 3 3 3 3 3 3 3 3 ...
## $ lifeExp : num 28.8 30.3 32 34 36.1 ...
## $ gdpPercap: num 779 821 853 836 740 ...
I. Choose your own adventure
xyplot(lifeExp ~ year | continent, gDat, type = c("p", "r"))
yearMin <- min(gDat$year)
fit <- lm(lifeExp ~ I(year - yearMin), gDat)
plot(fit)
plot(gDat$lifeExp ~ I(gDat$year - yearMin))
abline(fit)
II. Examine “typical” life expectancy for different years. The median and trimed mean of life expectancy over years
# par(mfrow=c(2,2))
Medlif <- as.data.frame(ddply(gDat, ~year, summarize, median = round(median(lifeExp),
2)))
xyplot(median ~ year, Medlif, layout = c(4, 2), type = c("p", "g", "r"))
Meanlif <- as.data.frame(ddply(gDat, ~year, summarize, trimean = round(mean(lifeExp,
trim = 0.05), 2)))
xyplot(trimean ~ year, Meanlif, layout = c(4, 2), type = c("p", "g", "r"))
IV. How is life expectancy changing over time on different continents?
Medlifbycon <- as.data.frame(ddply(gDat, .(continent, year), summarize, median = round(median(lifeExp),
2)))
xyplot(median ~ year | continent, Medlifbycon, layout = c(4, 2), type = c("p",
"g", "r"))
Meanlifbycon <- as.data.frame(ddply(gDat, .(continent, year), summarize, trimean = round(mean(lifeExp,
trim = 0.05), 2)))
xyplot(trimean ~ year | continent, Meanlifbycon, layout = c(4, 2), type = c("p",
"g", "r"))
V. Depict the maximum and minimum of GDP per capita for all continents in 2007.
hDat <- subset(gDat, year %in% 2007)
# table(hDat$year);head(hDat)
ddply(hDat, ~continent, function(x) {
theMin <- which.min(x$gdpPercap)
x[theMin, c("country", "year", "continent", "gdpPercap")]
})
## country year continent gdpPercap
## 1 Congo, Dem. Rep. 2007 Africa 277.6
## 2 Haiti 2007 Americas 1201.6
## 3 Myanmar 2007 Asia 944.0
## 4 Albania 2007 Europe 5937.0
## 5 New Zealand 2007 Oceania 25185.0
stripplot(gdpPercap ~ continent, hDat, jitter.data = TRUE, grid = "h", type = c("p",
"a"), fun = min)
ddply(hDat, ~continent, function(x) {
theMax <- which.max(x$gdpPercap)
x[theMax, c("country", "year", "continent", "gdpPercap")]
})
## country year continent gdpPercap
## 1 Gabon 2007 Africa 13206
## 2 United States 2007 Americas 42952
## 3 Kuwait 2007 Asia 47307
## 4 Norway 2007 Europe 49357
## 5 Australia 2007 Oceania 34435
stripplot(gdpPercap ~ continent, hDat, jitter.data = TRUE, grid = "h", type = c("p",
"a"), fun = max)
VI.Look at the spread of GDP per capita within the continents in 2007.
ddply(hDat, ~continent, function(x) {
dispersion <- sd(x$gdpPercap)
x[dispersion, c("continent", "gdpPercap")]
})
## continent gdpPercap
## 1 <NA> NA
## 2 <NA> NA
## 3 <NA> NA
## 4 <NA> NA
## 5 <NA> NA
ddply(hDat, ~continent, function(x) {
thesd <- sd(x$gdpPercap)
x[thesd, c("country", "year", "continent", "lifeExp")]
})
## country year continent lifeExp
## 1 <NA> NA <NA> NA
## 2 <NA> NA <NA> NA
## 3 <NA> NA <NA> NA
## 4 <NA> NA <NA> NA
## 5 <NA> NA <NA> NA
sdGDP <- ddply(hDat, ~continent, summarize, sd = round(sd(gdpPercap), 2), median = round(mad(gdpPercap),
2), Quantile = round(IQR = round(IQR(gdpPercap), 2)))
stripplot(gdpPercap ~ continent, hDat, jitter.data = TRUE, grid = "h", type = c("p",
"a"))