library(alr4)
## Loading required package: car
## Loading required package: carData
## Loading required package: effects
## Warning: package 'effects' was built under R version 3.6.2
## Registered S3 methods overwritten by 'lme4':
##   method                          from
##   cooks.distance.influence.merMod car 
##   influence.merMod                car 
##   dfbeta.influence.merMod         car 
##   dfbetas.influence.merMod        car
## lattice theme set by effectsTheme()
## See ?effectsTheme for details.
data(UN)
names(UN)
## [1] "region"          "group"           "fertility"       "ppgdp"          
## [5] "lifeExpF"        "pctUrban"        "infantMortality"
str(UN)
## 'data.frame':    213 obs. of  7 variables:
##  $ region         : Factor w/ 8 levels "Africa","Asia",..: 2 4 1 NA 1 3 5 2 3 8 ...
##  $ group          : Factor w/ 3 levels "oecd","other",..: 2 2 3 NA 3 2 2 2 2 1 ...
##  $ fertility      : num  5.97 1.52 2.14 NA 5.13 ...
##  $ ppgdp          : num  499 3677 4473 NA 4322 ...
##  $ lifeExpF       : num  49.5 80.4 75 NA 53.2 ...
##  $ pctUrban       : num  23 53 67 NA 59 100 93 64 47 89 ...
##  $ infantMortality: num  124.5 16.6 21.5 11.3 96.2 ...
summary(UN)
##         region      group       fertility         ppgdp         
##  Africa    :53   oecd  : 31   Min.   :1.134   Min.   :   114.8  
##  Asia      :50   other :115   1st Qu.:1.754   1st Qu.:  1283.0  
##  Europe    :39   africa: 53   Median :2.262   Median :  4684.5  
##  Latin Amer:20   NA's  : 14   Mean   :2.761   Mean   : 13012.0  
##  Caribbean :17                3rd Qu.:3.545   3rd Qu.: 15520.5  
##  (Other)   :20                Max.   :6.925   Max.   :105095.4  
##  NA's      :14                NA's   :14      NA's   :14        
##     lifeExpF        pctUrban      infantMortality  
##  Min.   :48.11   Min.   : 11.00   Min.   :  1.916  
##  1st Qu.:65.66   1st Qu.: 39.00   1st Qu.:  7.019  
##  Median :75.89   Median : 59.00   Median : 19.007  
##  Mean   :72.29   Mean   : 57.93   Mean   : 29.440  
##  3rd Qu.:79.58   3rd Qu.: 75.00   3rd Qu.: 44.477  
##  Max.   :87.12   Max.   :100.00   Max.   :124.535  
##  NA's   :14      NA's   :14       NA's   :6
sum(is.na(UN))
## [1] 90
newdata <- na.omit(UN)

currentmod<-mod0<- lm(lifeExpF~1, data=newdata)

mod1<-lm(lifeExpF~region, data=newdata)
mod2<-lm(lifeExpF~ppgdp,data=newdata)
mod3<-lm(lifeExpF~pctUrban,data=newdata)
mod4<-lm(lifeExpF~infantMortality,data=newdata)
mod5<-lm(lifeExpF~fertility,data=newdata)

anova(currentmod , mod1)
## Analysis of Variance Table
## 
## Model 1: lifeExpF ~ 1
## Model 2: lifeExpF ~ region
##   Res.Df     RSS Df Sum of Sq      F    Pr(>F)    
## 1    192 19906.4                                  
## 2    186  7278.9  6     12628 53.779 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
 pvals<-c(anova(currentmod,mod1)[6],
+ anova(currentmod,mod2)[6],
+ anova(currentmod,mod3)[6],
+ anova(currentmod,mod4)[6],
+ anova(currentmod,mod5)[6])
 
 pvals
## $`Pr(>F)`
## [1]           NA 4.222081e-38
## 
## $`Pr(>F)`
## [1]           NA 3.673818e-17
## 
## $`Pr(>F)`
## [1]           NA 6.337908e-20
## 
## $`Pr(>F)`
## [1]           NA 8.225623e-87
## 
## $`Pr(>F)`
## [1]           NA 5.719746e-49
 #since pvals show mod4 is best, we change that to the new current mod
 
 currentmod <- mod4
mod12<-lm(lifeExpF~infantMortality+region, data=newdata)
mod13<-lm(lifeExpF~infantMortality+group , data=newdata)
mod14<-lm(lifeExpF~infantMortality+fertility,data=newdata)
mod15<-lm(lifeExpF~infantMortality+pctUrban, data=newdata)

pvals2<-c(
+ anova(currentmod,mod12)[6],
+ anova(currentmod,mod13)[6],
+ anova(currentmod,mod14)[6],
+ anova(currentmod,mod15)[6])
pvals2
## $`Pr(>F)`
## [1]           NA 3.543629e-07
## 
## $`Pr(>F)`
## [1]          NA 3.85907e-09
## 
## $`Pr(>F)`
## [1]         NA 0.08196719
## 
## $`Pr(>F)`
## [1]         NA 0.07870966
currentmod <- mod13

moda<-lm(lifeExpF~infantMortality+group +region, data=newdata)
modb<-lm(lifeExpF~infantMortality+group+ fertility,data=newdata)
modc<-lm(lifeExpF~infantMortality+group+pctUrban, data = newdata)

pvals3 <-c(
+ anova(currentmod,moda)[6],
+ anova(currentmod,modb)[6],
+ anova(currentmod,modc) [6])
pvals3
## $`Pr(>F)`
## [1]          NA 0.004085691
## 
## $`Pr(>F)`
## [1]        NA 0.2436129
## 
## $`Pr(>F)`
## [1]         NA 0.08777467
modd <- lm(lifeExpF~infantMortality+group +region+fertility, data=newdata)
modf <- lm(lifeExpF~infantMortality+group+region+pctUrban, data = newdata)

pvals4 <- c(
+ anova(currentmod, modd) [6],
+ anova(currentmod, modf) [6])

pvals4
## $`Pr(>F)`
## [1]          NA 0.008480809
## 
## $`Pr(>F)`
## [1]         NA 0.00531204
AIC(mod2)
## [1] 1376.609
AIC(mod3)
## [1] 1363.88
#the smaller AIC which is mod3 is better

summary(mod2)$call
## lm(formula = lifeExpF ~ ppgdp, data = newdata)
lm(formula = lifeExpF ~ ppgdp, data = newdata)
## 
## Call:
## lm(formula = lifeExpF ~ ppgdp, data = newdata)
## 
## Coefficients:
## (Intercept)        ppgdp  
##   6.807e+01    3.261e-04
summary(mod3)$call
## lm(formula = lifeExpF ~ pctUrban, data = newdata)
lm(formula = lifeExpF ~ pctUrban, data = newdata)
## 
## Call:
## lm(formula = lifeExpF ~ pctUrban, data = newdata)
## 
## Coefficients:
## (Intercept)     pctUrban  
##     57.0869       0.2626
AIC(mod0)
## [1] 1446.478
AIC(mod1)
## [1] 1264.308
AIC(mod2)
## [1] 1376.609
AIC(mod3)
## [1] 1363.88
AIC(mod4)
## [1] 1053.52
AIC(mod5)
## [1] 1229.366