library(alr4)
## Loading required package: car
## Loading required package: carData
## Loading required package: effects
## Warning: package 'effects' was built under R version 3.6.2
## Registered S3 methods overwritten by 'lme4':
## method from
## cooks.distance.influence.merMod car
## influence.merMod car
## dfbeta.influence.merMod car
## dfbetas.influence.merMod car
## lattice theme set by effectsTheme()
## See ?effectsTheme for details.
data(UN)
names(UN)
## [1] "region" "group" "fertility" "ppgdp"
## [5] "lifeExpF" "pctUrban" "infantMortality"
str(UN)
## 'data.frame': 213 obs. of 7 variables:
## $ region : Factor w/ 8 levels "Africa","Asia",..: 2 4 1 NA 1 3 5 2 3 8 ...
## $ group : Factor w/ 3 levels "oecd","other",..: 2 2 3 NA 3 2 2 2 2 1 ...
## $ fertility : num 5.97 1.52 2.14 NA 5.13 ...
## $ ppgdp : num 499 3677 4473 NA 4322 ...
## $ lifeExpF : num 49.5 80.4 75 NA 53.2 ...
## $ pctUrban : num 23 53 67 NA 59 100 93 64 47 89 ...
## $ infantMortality: num 124.5 16.6 21.5 11.3 96.2 ...
summary(UN)
## region group fertility ppgdp
## Africa :53 oecd : 31 Min. :1.134 Min. : 114.8
## Asia :50 other :115 1st Qu.:1.754 1st Qu.: 1283.0
## Europe :39 africa: 53 Median :2.262 Median : 4684.5
## Latin Amer:20 NA's : 14 Mean :2.761 Mean : 13012.0
## Caribbean :17 3rd Qu.:3.545 3rd Qu.: 15520.5
## (Other) :20 Max. :6.925 Max. :105095.4
## NA's :14 NA's :14 NA's :14
## lifeExpF pctUrban infantMortality
## Min. :48.11 Min. : 11.00 Min. : 1.916
## 1st Qu.:65.66 1st Qu.: 39.00 1st Qu.: 7.019
## Median :75.89 Median : 59.00 Median : 19.007
## Mean :72.29 Mean : 57.93 Mean : 29.440
## 3rd Qu.:79.58 3rd Qu.: 75.00 3rd Qu.: 44.477
## Max. :87.12 Max. :100.00 Max. :124.535
## NA's :14 NA's :14 NA's :6
sum(is.na(UN))
## [1] 90
newdata <- na.omit(UN)
currentmod<-mod0<- lm(lifeExpF~1, data=newdata)
mod1<-lm(lifeExpF~region, data=newdata)
mod2<-lm(lifeExpF~ppgdp,data=newdata)
mod3<-lm(lifeExpF~pctUrban,data=newdata)
mod4<-lm(lifeExpF~infantMortality,data=newdata)
mod5<-lm(lifeExpF~fertility,data=newdata)
anova(currentmod , mod1)
## Analysis of Variance Table
##
## Model 1: lifeExpF ~ 1
## Model 2: lifeExpF ~ region
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 192 19906.4
## 2 186 7278.9 6 12628 53.779 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
pvals<-c(anova(currentmod,mod1)[6],
+ anova(currentmod,mod2)[6],
+ anova(currentmod,mod3)[6],
+ anova(currentmod,mod4)[6],
+ anova(currentmod,mod5)[6])
pvals
## $`Pr(>F)`
## [1] NA 4.222081e-38
##
## $`Pr(>F)`
## [1] NA 3.673818e-17
##
## $`Pr(>F)`
## [1] NA 6.337908e-20
##
## $`Pr(>F)`
## [1] NA 8.225623e-87
##
## $`Pr(>F)`
## [1] NA 5.719746e-49
#since pvals show mod4 is best, we change that to the new current mod
currentmod <- mod4
mod12<-lm(lifeExpF~infantMortality+region, data=newdata)
mod13<-lm(lifeExpF~infantMortality+group , data=newdata)
mod14<-lm(lifeExpF~infantMortality+fertility,data=newdata)
mod15<-lm(lifeExpF~infantMortality+pctUrban, data=newdata)
pvals2<-c(
+ anova(currentmod,mod12)[6],
+ anova(currentmod,mod13)[6],
+ anova(currentmod,mod14)[6],
+ anova(currentmod,mod15)[6])
pvals2
## $`Pr(>F)`
## [1] NA 3.543629e-07
##
## $`Pr(>F)`
## [1] NA 3.85907e-09
##
## $`Pr(>F)`
## [1] NA 0.08196719
##
## $`Pr(>F)`
## [1] NA 0.07870966
currentmod <- mod13
moda<-lm(lifeExpF~infantMortality+group +region, data=newdata)
modb<-lm(lifeExpF~infantMortality+group+ fertility,data=newdata)
modc<-lm(lifeExpF~infantMortality+group+pctUrban, data = newdata)
pvals3 <-c(
+ anova(currentmod,moda)[6],
+ anova(currentmod,modb)[6],
+ anova(currentmod,modc) [6])
pvals3
## $`Pr(>F)`
## [1] NA 0.004085691
##
## $`Pr(>F)`
## [1] NA 0.2436129
##
## $`Pr(>F)`
## [1] NA 0.08777467
modd <- lm(lifeExpF~infantMortality+group +region+fertility, data=newdata)
modf <- lm(lifeExpF~infantMortality+group+region+pctUrban, data = newdata)
pvals4 <- c(
+ anova(currentmod, modd) [6],
+ anova(currentmod, modf) [6])
pvals4
## $`Pr(>F)`
## [1] NA 0.008480809
##
## $`Pr(>F)`
## [1] NA 0.00531204
AIC(mod2)
## [1] 1376.609
AIC(mod3)
## [1] 1363.88
#the smaller AIC which is mod3 is better
summary(mod2)$call
## lm(formula = lifeExpF ~ ppgdp, data = newdata)
lm(formula = lifeExpF ~ ppgdp, data = newdata)
##
## Call:
## lm(formula = lifeExpF ~ ppgdp, data = newdata)
##
## Coefficients:
## (Intercept) ppgdp
## 6.807e+01 3.261e-04
summary(mod3)$call
## lm(formula = lifeExpF ~ pctUrban, data = newdata)
lm(formula = lifeExpF ~ pctUrban, data = newdata)
##
## Call:
## lm(formula = lifeExpF ~ pctUrban, data = newdata)
##
## Coefficients:
## (Intercept) pctUrban
## 57.0869 0.2626
AIC(mod0)
## [1] 1446.478
AIC(mod1)
## [1] 1264.308
AIC(mod2)
## [1] 1376.609
AIC(mod3)
## [1] 1363.88
AIC(mod4)
## [1] 1053.52
AIC(mod5)
## [1] 1229.366