user — Sep 18, 2013, 2:03 PM
#class notes-Lecture # 4
gdURL<-"http://www.stat.ubc.ca/~jenny/notOcto/STAT545A/examples/gapminder/data/gapminderDataFiveYear.txt"
gDat<-read.delim(gdURL)
str(gDat)
'data.frame': 1704 obs. of 6 variables:
$ country : Factor w/ 142 levels "Afghanistan",..: 1 1 1 1 1 1 1 1 1 1 ...
$ year : int 1952 1957 1962 1967 1972 1977 1982 1987 1992 1997 ...
$ pop : num 8425333 9240934 10267083 11537966 13079460 ...
$ continent: Factor w/ 5 levels "Africa","Americas",..: 3 3 3 3 3 3 3 3 3 3 ...
$ lifeExp : num 28.8 30.3 32 34 36.1 ...
$ gdpPercap: num 779 821 853 836 740 ...
#read.table(gdURL, header =TRUE, sep="\t",quote="\"")
library(lattice)
library(plyr)
maxLeByCont <- ddply(gDat,~continent, summarize,maxLifeExp=max(lifeExp))
str(maxLeByCont)
'data.frame': 5 obs. of 2 variables:
$ continent : Factor w/ 5 levels "Africa","Americas",..: 1 2 3 4 5
$ maxLifeExp: num 76.4 80.7 82.6 81.8 81.2
minGDPByCont <- ddply(gDat,~continent, summarize,minGDP=min(gdpPercap))
minGDPByCont
continent minGDP
1 Africa 241.2
2 Americas 1201.6
3 Asia 331.0
4 Europe 973.5
5 Oceania 10039.6
str(minGDPByCont)
'data.frame': 5 obs. of 2 variables:
$ continent: Factor w/ 5 levels "Africa","Americas",..: 1 2 3 4 5
$ minGDP : num 241 1202 331 974 10040
length(unique(gDat$country))
[1] 142
ddply(gDat, ~continent, summarize, nUniqCoutries = length(unique(country)))
continent nUniqCoutries
1 Africa 52
2 Americas 25
3 Asia 33
4 Europe 30
5 Oceania 2
ddply(gDat, ~ continent, summarize, minlifeExp = min(lifeExp), maxlifeExp = max(lifeExp), gdpPercap = median(gdpPercap))
continent minlifeExp maxlifeExp gdpPercap
1 Africa 23.60 76.44 1192
2 Americas 37.58 80.65 5466
3 Asia 28.80 82.60 2647
4 Europe 43.59 81.76 12082
5 Oceania 69.12 81.23 17983
jCountry <- "France" # pick, but do not hard wire, on example
(jDat <- subset(gDat, country == jCountry))
country year pop continent lifeExp gdpPercap
529 France 1952 42459667 Europe 67.41 7030
530 France 1957 44310863 Europe 68.93 8663
531 France 1962 47124000 Europe 70.51 10560
532 France 1967 49569000 Europe 71.55 13000
533 France 1972 51732000 Europe 72.38 16107
534 France 1977 53165019 Europe 73.83 18293
535 France 1982 54433565 Europe 74.89 20294
536 France 1987 55630100 Europe 76.34 22066
537 France 1992 57374179 Europe 77.46 24704
538 France 1997 58623428 Europe 78.64 25890
539 France 2002 59925035 Europe 79.59 28926
540 France 2007 61083916 Europe 80.66 30470
xyplot(lifeExp ~ year, jDat, type = c("p", "r"))
jFit <- lm(lifeExp ~ year, jDat)
summary(jFit)
Call:
lm(formula = lifeExp ~ year, data = jDat)
Residuals:
Min 1Q Median 3Q Max
-0.3801 -0.1389 0.0124 0.1429 0.3349
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -3.98e+02 7.29e+00 -54.6 1.0e-13 ***
year 2.38e-01 3.68e-03 64.8 1.9e-14 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 0.22 on 10 degrees of freedom
Multiple R-squared: 0.998, Adjusted R-squared: 0.997
F-statistic: 4.2e+03 on 1 and 10 DF, p-value: 1.86e-14
(yearMin <- min(gDat$year))
[1] 1952
jFit <- lm(lifeExp ~ I(year - yearMin), jDat)
summary(jFit)
Call:
lm(formula = lifeExp ~ I(year - yearMin), data = jDat)
Residuals:
Min 1Q Median 3Q Max
-0.3801 -0.1389 0.0124 0.1429 0.3349
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 67.79013 0.11949 567.3 < 2e-16 ***
I(year - yearMin) 0.23850 0.00368 64.8 1.9e-14 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 0.22 on 10 degrees of freedom
Multiple R-squared: 0.998, Adjusted R-squared: 0.997
F-statistic: 4.2e+03 on 1 and 10 DF, p-value: 1.86e-14
#I inhabit interpretation of Objects
?I
starting httpd help server ... done
class(jFit)
[1] "lm"
model(jFit)
Error: could not find function "model"
names(jFit)
[1] "coefficients" "residuals" "effects" "rank"
[5] "fitted.values" "assign" "qr" "df.residual"
[9] "xlevels" "call" "terms" "model"
jFit$coefficients
(Intercept) I(year - yearMin)
67.7901 0.2385
coef(jFit)
(Intercept) I(year - yearMin)
67.7901 0.2385
jFun <- function(x) coef(lm(lifeExp ~ I(year - yearMin), x))
jFun(jDat) # trying out our new function ... yes still get same numbers
(Intercept) I(year - yearMin)
67.7901 0.2385
jFun <- function(x) {
estCoefs <- coef(lm(lifeExp ~ I(year - yearMin), x))
names(estCoefs) <- c("intercept", "slope")
return(estCoefs)
}
jFun(jDat) # trying out our improved function ... yes still get same numbers
intercept slope
67.7901 0.2385
jFun(subset(gDat, country == "Canada"))
intercept slope
68.8838 0.2189
jFun(subset(gDat, country == "Uruguay"))
intercept slope
65.7416 0.1833
jFun(subset(gDat, country == "India"))
intercept slope
39.2698 0.5053
jCoefs <- ddply(gDat, ~country, jFun)
str(jCoefs)
'data.frame': 142 obs. of 3 variables:
$ country : Factor w/ 142 levels "Afghanistan",..: 1 2 3 4 5 6 7 8 9 10 ...
$ intercept: num 29.9 59.2 43.4 32.1 62.7 ...
$ slope : num 0.275 0.335 0.569 0.209 0.232 ...
tail(jCoefs)
country intercept slope
137 Venezuela 57.51 0.32972
138 Vietnam 39.01 0.67162
139 West Bank and Gaza 43.80 0.60110
140 Yemen, Rep. 30.13 0.60546
141 Zambia 47.66 -0.06043
142 Zimbabwe 55.22 -0.09302
install.packages("xtable", dependencies = TRUE)
Installing package into 'C:/Users/user/Documents/R/win-library/3.0' (as
'lib' is unspecified)
Error: trying to use CRAN without setting a mirror
library(xtable)
jCoefs <- ddply(gDat, ~country, jFun)
#jCoefs
jCoefs <- ddply(gDat, ~country + continent, jFun)
str(jCoefs)
'data.frame': 142 obs. of 4 variables:
$ country : Factor w/ 142 levels "Afghanistan",..: 1 2 3 4 5 6 7 8 9 10 ...
$ continent: Factor w/ 5 levels "Africa","Americas",..: 3 4 1 1 2 5 4 3 3 4 ...
$ intercept: num 29.9 59.2 43.4 32.1 62.7 ...
$ slope : num 0.275 0.335 0.569 0.209 0.232 ...
tail(jCoefs)
country continent intercept slope
137 Venezuela Americas 57.51 0.32972
138 Vietnam Asia 39.01 0.67162
139 West Bank and Gaza Asia 43.80 0.60110
140 Yemen, Rep. Asia 30.13 0.60546
141 Zambia Africa 47.66 -0.06043
142 Zimbabwe Africa 55.22 -0.09302
set.seed(916)
foo<-jCoefs[sample(nrow(jCoefs),size =15),]
foo<-xtable(foo)
foo
% latex table generated in R 3.0.1 by xtable 1.7-1 package
% Wed Sep 18 14:03:55 2013
\begin{table}[ht]
\centering
\begin{tabular}{rllrr}
\hline
& country & continent & intercept & slope \\
\hline
73 & Lebanon & Asia & 58.69 & 0.26 \\
111 & Senegal & Africa & 36.75 & 0.50 \\
37 & Dominican Republic & Americas & 48.60 & 0.47 \\
97 & Oman & Asia & 37.21 & 0.77 \\
48 & Germany & Europe & 67.57 & 0.21 \\
70 & Korea, Dem. Rep. & Asia & 54.91 & 0.32 \\
82 & Mauritius & Africa & 55.37 & 0.35 \\
115 & Slovak Republic & Europe & 67.01 & 0.13 \\
27 & Comoros & Africa & 40.00 & 0.45 \\
5 & Argentina & Americas & 62.69 & 0.23 \\
22 & Central African Republic & Africa & 38.81 & 0.18 \\
38 & Ecuador & Americas & 49.07 & 0.50 \\
139 & West Bank and Gaza & Asia & 43.80 & 0.60 \\
39 & Egypt & Africa & 40.97 & 0.56 \\
88 & Myanmar & Asia & 41.41 & 0.43 \\
\hline
\end{tabular}
\end{table}
print(foo,type = "html",include.rownames=FALSE)
<!-- html table generated in R 3.0.1 by xtable 1.7-1 package -->
<!-- Wed Sep 18 14:03:55 2013 -->
<TABLE border=1>
<TR> <TH> country </TH> <TH> continent </TH> <TH> intercept </TH> <TH> slope </TH> </TR>
<TR> <TD> Lebanon </TD> <TD> Asia </TD> <TD align="right"> 58.69 </TD> <TD align="right"> 0.26 </TD> </TR>
<TR> <TD> Senegal </TD> <TD> Africa </TD> <TD align="right"> 36.75 </TD> <TD align="right"> 0.50 </TD> </TR>
<TR> <TD> Dominican Republic </TD> <TD> Americas </TD> <TD align="right"> 48.60 </TD> <TD align="right"> 0.47 </TD> </TR>
<TR> <TD> Oman </TD> <TD> Asia </TD> <TD align="right"> 37.21 </TD> <TD align="right"> 0.77 </TD> </TR>
<TR> <TD> Germany </TD> <TD> Europe </TD> <TD align="right"> 67.57 </TD> <TD align="right"> 0.21 </TD> </TR>
<TR> <TD> Korea, Dem. Rep. </TD> <TD> Asia </TD> <TD align="right"> 54.91 </TD> <TD align="right"> 0.32 </TD> </TR>
<TR> <TD> Mauritius </TD> <TD> Africa </TD> <TD align="right"> 55.37 </TD> <TD align="right"> 0.35 </TD> </TR>
<TR> <TD> Slovak Republic </TD> <TD> Europe </TD> <TD align="right"> 67.01 </TD> <TD align="right"> 0.13 </TD> </TR>
<TR> <TD> Comoros </TD> <TD> Africa </TD> <TD align="right"> 40.00 </TD> <TD align="right"> 0.45 </TD> </TR>
<TR> <TD> Argentina </TD> <TD> Americas </TD> <TD align="right"> 62.69 </TD> <TD align="right"> 0.23 </TD> </TR>
<TR> <TD> Central African Republic </TD> <TD> Africa </TD> <TD align="right"> 38.81 </TD> <TD align="right"> 0.18 </TD> </TR>
<TR> <TD> Ecuador </TD> <TD> Americas </TD> <TD align="right"> 49.07 </TD> <TD align="right"> 0.50 </TD> </TR>
<TR> <TD> West Bank and Gaza </TD> <TD> Asia </TD> <TD align="right"> 43.80 </TD> <TD align="right"> 0.60 </TD> </TR>
<TR> <TD> Egypt </TD> <TD> Africa </TD> <TD align="right"> 40.97 </TD> <TD align="right"> 0.56 </TD> </TR>
<TR> <TD> Myanmar </TD> <TD> Asia </TD> <TD align="right"> 41.41 </TD> <TD align="right"> 0.43 </TD> </TR>
</TABLE>