Exercise 1

Read in data

gap = read.csv("datafiles/gapminderData5.csv")
library(ggplot2)
library(nlme)

Use the tapply() function to estimate the mean life expectancy by country

tapply(gap$lifeExp, gap$country, mean)
##              Afghanistan                  Albania                  Algeria 
##                 37.47883                 68.43292                 59.03017 
##                   Angola                Argentina                Australia 
##                 37.88350                 69.06042                 74.66292 
##                  Austria                  Bahrain               Bangladesh 
##                 73.10325                 65.60567                 49.83408 
##                  Belgium                    Benin                  Bolivia 
##                 73.64175                 48.77992                 52.50458 
##   Bosnia and Herzegovina                 Botswana                   Brazil 
##                 67.70783                 54.59750                 62.23950 
##                 Bulgaria             Burkina Faso                  Burundi 
##                 69.74375                 44.69400                 44.81733 
##                 Cambodia                 Cameroon                   Canada 
##                 47.90275                 48.12850                 74.90275 
## Central African Republic                     Chad                    Chile 
##                 43.86692                 46.77358                 67.43092 
##                    China                 Colombia                  Comoros 
##                 61.78514                 63.89775                 52.38175 
##         Congo, Dem. Rep.              Congo, Rep.               Costa Rica 
##                 44.54375                 52.50192                 70.18142 
##            Cote d'Ivoire                  Croatia                     Cuba 
##                 48.43617                 70.05592                 71.04508 
##           Czech Republic                  Denmark                 Djibouti 
##                 71.51050                 74.37017                 46.38075 
##       Dominican Republic                  Ecuador                    Egypt 
##                 61.55450                 62.81683                 56.24300 
##              El Salvador        Equatorial Guinea                  Eritrea 
##                 59.63333                 42.96000                 45.99925 
##                 Ethiopia                  Finland                   France 
##                 44.47575                 72.99192                 74.34892 
##                    Gabon                   Gambia                  Germany 
##                 51.22050                 44.40058                 73.44442 
##                    Ghana                   Greece                Guatemala 
##                 52.34067                 73.73317                 56.72942 
##                   Guinea            Guinea-Bissau                    Haiti 
##                 43.23983                 39.21025                 50.16525 
##                 Honduras         Hong Kong, China                  Hungary 
##                 57.92083                 73.49283                 69.39317 
##                  Iceland                    India                Indonesia 
##                 76.51142                 53.16608                 54.33575 
##                     Iran                     Iraq                  Ireland 
##                 58.63658                 56.58175                 73.01725 
##                   Israel                    Italy                  Jamaica 
##                 73.64583                 74.01383                 68.74933 
##                    Japan                   Jordan                    Kenya 
##                 74.82692                 59.78642                 52.68100 
##         Korea, Dem. Rep.              Korea, Rep.                   Kuwait 
##                 63.60733                 65.00100                 68.92233 
##                  Lebanon                  Lesotho                  Liberia 
##                 65.86567                 50.00708                 42.47625 
##                    Libya               Madagascar                   Malawi 
##                 59.30417                 47.77058                 43.35158 
##                 Malaysia                     Mali               Mauritania 
##                 64.27958                 43.41350                 52.30208 
##                Mauritius                   Mexico                 Mongolia 
##                 64.95325                 65.40883                 55.89033 
##               Montenegro                  Morocco               Mozambique 
##                 70.29917                 57.60883                 40.37950 
##                  Myanmar                  Namibia                    Nepal 
##                 53.32167                 53.49133                 48.98633 
##              Netherlands              New Zealand                Nicaragua 
##                 75.64850                 73.98950                 58.34942 
##                    Niger                  Nigeria                   Norway 
##                 44.55867                 43.58133                 75.84300 
##                     Oman                 Pakistan                   Panama 
##                 58.44267                 54.88225                 67.80175 
##                 Paraguay                     Peru              Philippines 
##                 66.80908                 58.85933                 60.96725 
##                   Poland                 Portugal              Puerto Rico 
##                 70.17692                 70.41983                 72.73933 
##                  Reunion                  Romania                   Rwanda 
##                 66.64425                 68.29067                 41.48158 
##    Sao Tome and Principe             Saudi Arabia                  Senegal 
##                 57.89633                 58.67875                 50.62592 
##                   Serbia             Sierra Leone                Singapore 
##                 68.55100                 36.76917                 71.22025 
##          Slovak Republic                 Slovenia                  Somalia 
##                 70.69608                 71.60075                 40.98867 
##             South Africa                    Spain                Sri Lanka 
##                 53.99317                 74.20342                 66.52608 
##                    Sudan                Swaziland                   Sweden 
##                 48.40050                 49.00242                 76.17700 
##              Switzerland                    Syria                   Taiwan 
##                 75.56508                 61.34617                 70.33667 
##                 Tanzania                 Thailand                     Togo 
##                 47.91233                 62.20025                 51.49875 
##      Trinidad and Tobago                  Tunisia                   Turkey 
##                 66.82800                 60.72100                 59.69642 
##                   Uganda           United Kingdom            United States 
##                 47.61883                 73.92258                 73.47850 
##                  Uruguay                Venezuela                  Vietnam 
##                 70.78158                 66.58067                 57.47950 
##       West Bank and Gaza              Yemen, Rep.                   Zambia 
##                 60.32867                 46.78042                 45.99633 
##                 Zimbabwe 
##                 52.66317

Make a scatter plot of lifeExp vs. year for all countries

Note: please see attached image uploaded to canvas for the scatter plot

myplot = ggplot(gap, aes(x = year, y = lifeExp, group = country))
myplot = myplot + geom_point() + facet_wrap("country")
ggsave("myplot.pdf", width = 30, height = 30)

Edit the given code to make plots of life expectancy vs year for two countries (not the United States)

Life Expectancy vs Year for Mongolia

gap_sub <- subset(gap, country == 'Mongolia')
plot(gap_sub$year, gap_sub$lifeExp, main = "Life Expectancy for Mongolia", xlab = "year", ylab = "lifeExp", type ='l')

Life Expectancy vs Year for Afghanistan

gap_sub2 <- subset(gap, country == 'Afghanistan')
plot(gap_sub2$year, gap_sub2$lifeExp, main = "Life Expectancy for Afghanistan", xlab = "year", ylab = "lifeExp", type ='l')

center the year variable by subtracting 1952 by all values

gap$year <- (gap$year - 1952)

Model life expectancy as a function of year

gap.lm <- lm(lifeExp ~ year, data = gap)
summary(gap.lm)
## 
## Call:
## lm(formula = lifeExp ~ year, data = gap)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -39.949  -9.651   1.697  10.335  22.158 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 50.51208    0.53000   95.31   <2e-16 ***
## year         0.32590    0.01632   19.96   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 11.63 on 1702 degrees of freedom
## Multiple R-squared:  0.1898, Adjusted R-squared:  0.1893 
## F-statistic: 398.6 on 1 and 1702 DF,  p-value: < 2.2e-16
coef(gap.lm)
## (Intercept)        year 
##  50.5120841   0.3259038

The coefficient of the intercept is 50.5120841 and the coefficient of year is 0.3259038. The significance of both intercept and year is 2e-16. The F-statistic is 398.6 with an associated p value of 2.2e-16.The Intercept is the average life expectancy for the year 1952. The year coefficient shows life expectancy will increase by 0.3259038 per unit increase in year.

Build a random intercept model using the lme() function, using country as the grouping variable for the random effect

gap.lme1 = lme(lifeExp ~ year, random = ~ 1 | country, data = gap)

summarize

summary(gap.lme1)
## Linear mixed-effects model fit by REML
##   Data: gap 
##        AIC      BIC    logLik
##   9874.263 9896.022 -4933.132
## 
## Random effects:
##  Formula: ~1 | country
##         (Intercept) Residual
## StdDev:    11.09716 3.584196
## 
## Fixed effects:  lifeExp ~ year 
##                Value Std.Error   DF  t-value p-value
## (Intercept) 50.51208 0.9454671 1561 53.42553       0
## year         0.32590 0.0050305 1561 64.78581       0
##  Correlation: 
##      (Intr)
## year -0.146
## 
## Standardized Within-Group Residuals:
##        Min         Q1        Med         Q3        Max 
## -6.1691513 -0.4910339  0.1216455  0.6030002  2.4194109 
## 
## Number of Observations: 1704
## Number of Groups: 142

The fixed effect coefficients are life expectancy (intercept) and year and their associated p-values are both 0.

Standard deviation for Intercept:11.09716

Standard deviation for Residual: 3.584196

Calculate the ICC for the random intercept in this model

VarCorr(gap.lme1)
## country = pdLogChol(1) 
##             Variance  StdDev   
## (Intercept) 123.14688 11.097156
## Residual     12.84646  3.584196

Math for intercept divided by sum of variances

123.14688/(123.14688+12.84646)
## [1] 0.9055361

The ICC = 0.9055361. The ICC score does not support the use of the random intercept model because the variance is less than 5%.

Build a third model with a random intercept and slope.

gap.lme2 = lme(lifeExp ~ year, random = ~ year | country, data = gap)

summary(gap.lme2)
## Linear mixed-effects model fit by REML
##   Data: gap 
##        AIC      BIC    logLik
##   8747.277 8779.915 -4367.639
## 
## Random effects:
##  Formula: ~year | country
##  Structure: General positive-definite, Log-Cholesky parametrization
##             StdDev     Corr  
## (Intercept) 12.3027957 (Intr)
## year         0.1582867 -0.434
## Residual     2.1807917       
## 
## Fixed effects:  lifeExp ~ year 
##                Value Std.Error   DF  t-value p-value
## (Intercept) 50.51208 1.0371995 1561 48.70045       0
## year         0.32590 0.0136312 1561 23.90865       0
##  Correlation: 
##      (Intr)
## year -0.439
## 
## Standardized Within-Group Residuals:
##         Min          Q1         Med          Q3         Max 
## -8.08192929 -0.34891405  0.02690066  0.39742349  4.67388085 
## 
## Number of Observations: 1704
## Number of Groups: 142

Get AIC for three models

AIC(gap.lm)
## [1] 13201.73
AIC(gap.lme1)
## [1] 9874.263
AIC(gap.lme2)
## [1] 8747.277

The gap.lme2, random intercept and slope model is the best because it has the lowest AIC score.