Mod 0 is basic mod. Test a new mod for each variable. Mod with lowest p value becomes new mod.
library(alr4)
## Warning: package 'alr4' was built under R version 3.6.3
## Loading required package: car
## Warning: package 'car' was built under R version 3.6.3
## Loading required package: carData
## Loading required package: effects
## Warning: package 'effects' was built under R version 3.6.3
## Registered S3 methods overwritten by 'lme4':
## method from
## cooks.distance.influence.merMod car
## influence.merMod car
## dfbeta.influence.merMod car
## dfbetas.influence.merMod car
## lattice theme set by effectsTheme()
## See ?effectsTheme for details.
data(UN)
names(UN)
## [1] "region" "group" "fertility" "ppgdp"
## [5] "lifeExpF" "pctUrban" "infantMortality"
newdata<- na.omit(UN)
currentmod<-mod0<- lm(lifeExpF~1, data=newdata)
mod1<-lm(lifeExpF~region, data=newdata)
mod2<-lm(lifeExpF~ppgdp,data=newdata)
mod3<-lm(lifeExpF~pctUrban,data=newdata)
mod4<-lm(lifeExpF~infantMortality,data=newdata)
mod5<-lm(lifeExpF~fertility,data=newdata)
anova(currentmod,mod1)
## Analysis of Variance Table
##
## Model 1: lifeExpF ~ 1
## Model 2: lifeExpF ~ region
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 192 19906.4
## 2 186 7278.9 6 12628 53.779 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
pvals<-c(anova(currentmod,mod1)[6],
+ anova(currentmod,mod2)[6],
+ anova(currentmod,mod3)[6],
+ anova(currentmod,mod4)[6],
+ anova(currentmod,mod5)[6])
pvals
## $`Pr(>F)`
## [1] NA 4.222081e-38
##
## $`Pr(>F)`
## [1] NA 3.673818e-17
##
## $`Pr(>F)`
## [1] NA 6.337908e-20
##
## $`Pr(>F)`
## [1] NA 8.225623e-87
##
## $`Pr(>F)`
## [1] NA 5.719746e-49
currentmod <- mod4
Repeat this process until there are no new significant p values.
mod12<-lm(lifeExpF~infantMortality+region, data=newdata)
mod13<-lm(lifeExpF~infantMortality+group , data=newdata)
mod14<-lm(lifeExpF~infantMortality+fertility,data=newdata)
mod15<-lm(lifeExpF~infantMortality+pctUrban, data=newdata)
pvals<-c(
+ anova(currentmod,mod12)[6],
+ anova(currentmod,mod13)[6],
+ anova(currentmod,mod14)[6],
+ anova(currentmod,mod15)[6])
pvals
## $`Pr(>F)`
## [1] NA 3.543629e-07
##
## $`Pr(>F)`
## [1] NA 3.85907e-09
##
## $`Pr(>F)`
## [1] NA 0.08196719
##
## $`Pr(>F)`
## [1] NA 0.07870966
currentmod <- mod13
Forward is when you start with no variables and continue to add them. Backward is when you start with all the variables and substarct them
library(MASS)
## Warning: package 'MASS' was built under R version 3.6.3
modc<-lm(lifeExpF~infantMortality+group+pctUrban, data=newdata)
forwardmod <- stepAIC(modc, direction = "forward")
## Start: AIC=467.24
## lifeExpF ~ infantMortality + group + pctUrban
backwardmod <- stepAIC(modc, direction = "backward")
## Start: AIC=467.24
## lifeExpF ~ infantMortality + group + pctUrban
##
## Df Sum of Sq RSS AIC
## <none> 2062.8 467.24
## - pctUrban 1 32.3 2095.1 468.24
## - group 2 467.4 2530.2 502.66
## - infantMortality 1 3738.6 5801.4 664.81
BIGmod <- lm(lifeExpF ~ infantMortality * group * pctUrban, data = UN)
forwardmod <- stepAIC(BIGmod, direction = "forward")
## Start: AIC=459.16
## lifeExpF ~ infantMortality * group * pctUrban
backwardmod <- stepAIC(BIGmod, direction = "backward")
## Start: AIC=459.16
## lifeExpF ~ infantMortality * group * pctUrban
##
## Df Sum of Sq RSS AIC
## - infantMortality:group:pctUrban 2 6.221 1846.0 455.81
## <none> 1839.8 459.16
##
## Step: AIC=455.81
## lifeExpF ~ infantMortality + group + pctUrban + infantMortality:group +
## infantMortality:pctUrban + group:pctUrban
##
## Df Sum of Sq RSS AIC
## <none> 1846.0 455.81
## - infantMortality:group 2 44.111 1890.2 456.37
## - group:pctUrban 2 132.098 1978.1 465.15
## - infantMortality:pctUrban 1 181.645 2027.7 471.93
backwardmod$call
## lm(formula = lifeExpF ~ infantMortality + group + pctUrban +
## infantMortality:group + infantMortality:pctUrban + group:pctUrban,
## data = UN)
forwardmod$call
## lm(formula = lifeExpF ~ infantMortality * group * pctUrban, data = UN)
Forward and backward can sometimes produce different results