gdURL <- "http://www.stat.ubc.ca/~jenny/notOcto/STAT545A/examples/gapminder/data/gapminderDataFiveYear.txt"
gDat <- read.delim(file = gdURL)
library(lattice)
## Warning: package 'lattice' was built under R version 3.0.2
library(plyr)
## Warning: package 'plyr' was built under R version 3.0.2
library(xtable)
Trim <- 0.2
Trimmed_meanlifeExp<- ddply(gDat,~year,summarize, tMean = mean(lifeExp, trim = Trim))
Trimmed_meanlifeExp <- arrange(Trimmed_meanlifeExp, tMean)
htmlPrint <- function(x, ...,digits = 0, include.rownames = FALSE) {
print(xtable(x, digits = digits, ...), type = 'html',
include.rownames = include.rownames, ...)
}
htmlPrint(arrange(Trimmed_meanlifeExp, tMean))
year | tMean |
---|---|
1952 | 48 |
1957 | 51 |
1962 | 53 |
1967 | 56 |
1972 | 58 |
1977 | 60 |
1982 | 62 |
1987 | 64 |
1992 | 66 |
1997 | 67 |
2002 | 68 |
2007 | 69 |
xyplot(tMean~year,Trimmed_meanlifeExp)
LifeExp_cont <- ddply(gDat,~year+continent,summarize,avglifeExp=mean(lifeExp))
stripplot(avglifeExp~year,LifeExp_cont,group=continent,auto.key=TRUE,grid="h", jitter.data = TRUE)
We can see that overall, lifeExp is increasing in all continents overtime. But this is a little bit drop in Africa around 2000.
subset2007= subset(gDat,year==2007)
GDP_cont <- ddply(subset2007,~continent,summarize,maxGDP=max(gdpPercap),minGDP=min(gdpPercap))
stripplot(maxGDP~continent,GDP_cont,grid="h", jitter.data = TRUE)
## Plot min GDP with stripplot
stripplot(minGDP~continent,GDP_cont,grid="h", jitter.data = TRUE)
bwplot(gdpPercap~as.factor(year) | continent,gDat)
lowlifeExp=60
ProplowlifeExp <- ddply(gDat, ~ continent + year, function(x) c(
lowlifeExp = sum(x$lifeExp <= lowlifeExp)/nrow(x)) )
stripplot(lowlifeExp~year,ProplowlifeExp,group=continent,auto.key=TRUE,ylim=c(0,100),ylab="percentage of Low LifeExp country")
The way I am doing this is to first fit a linear model to the data, and then find the country that has the largest maxResid.
yearMin <- min(gDat$year)
jFun <- function(x) {
jFit <- lm(lifeExp ~ I(year - yearMin), x)
jCoef <- coef(jFit)
names(jCoef) <- NULL
return(c(intercept = jCoef[1],
slope = jCoef[2],
maxResid = max(abs(resid(jFit)))/summary(jFit)$sigma))
}
linearModel <- ddply(gDat, ~ country, jFun)
country_interest <- linearModel[which.max(linearModel$maxResid),]
xyplot(lifeExp ~ year , gDat, subset = country %in% country_interest$country, type = c("p", "r"))