R Markdown

Mod 0 is basic mod. Test a new mod for each variable. Mod with lowest p value becomes new mod.

library(alr4)
## Warning: package 'alr4' was built under R version 3.6.3
## Loading required package: car
## Warning: package 'car' was built under R version 3.6.3
## Loading required package: carData
## Loading required package: effects
## Warning: package 'effects' was built under R version 3.6.3
## Registered S3 methods overwritten by 'lme4':
##   method                          from
##   cooks.distance.influence.merMod car 
##   influence.merMod                car 
##   dfbeta.influence.merMod         car 
##   dfbetas.influence.merMod        car
## lattice theme set by effectsTheme()
## See ?effectsTheme for details.
data(UN)
names(UN)
## [1] "region"          "group"           "fertility"       "ppgdp"          
## [5] "lifeExpF"        "pctUrban"        "infantMortality"
newdata<- na.omit(UN)
currentmod<-mod0<- lm(lifeExpF~1, data=newdata)
mod1<-lm(lifeExpF~region, data=newdata)
 mod2<-lm(lifeExpF~ppgdp,data=newdata)
 mod3<-lm(lifeExpF~pctUrban,data=newdata)
 mod4<-lm(lifeExpF~infantMortality,data=newdata)
 mod5<-lm(lifeExpF~fertility,data=newdata)
 anova(currentmod,mod1)
## Analysis of Variance Table
## 
## Model 1: lifeExpF ~ 1
## Model 2: lifeExpF ~ region
##   Res.Df     RSS Df Sum of Sq      F    Pr(>F)    
## 1    192 19906.4                                  
## 2    186  7278.9  6     12628 53.779 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
  pvals<-c(anova(currentmod,mod1)[6],
            + anova(currentmod,mod2)[6],
            + anova(currentmod,mod3)[6],
            + anova(currentmod,mod4)[6],
            + anova(currentmod,mod5)[6])
 pvals
## $`Pr(>F)`
## [1]           NA 4.222081e-38
## 
## $`Pr(>F)`
## [1]           NA 3.673818e-17
## 
## $`Pr(>F)`
## [1]           NA 6.337908e-20
## 
## $`Pr(>F)`
## [1]           NA 8.225623e-87
## 
## $`Pr(>F)`
## [1]           NA 5.719746e-49
 currentmod <- mod4

Repeat this process until there are no new significant p values.

 mod12<-lm(lifeExpF~infantMortality+region, data=newdata)
 mod13<-lm(lifeExpF~infantMortality+group , data=newdata)
 mod14<-lm(lifeExpF~infantMortality+fertility,data=newdata)
 mod15<-lm(lifeExpF~infantMortality+pctUrban, data=newdata)
 pvals<-c(
   + anova(currentmod,mod12)[6],
   + anova(currentmod,mod13)[6],
   + anova(currentmod,mod14)[6],
   + anova(currentmod,mod15)[6])
  pvals
## $`Pr(>F)`
## [1]           NA 3.543629e-07
## 
## $`Pr(>F)`
## [1]          NA 3.85907e-09
## 
## $`Pr(>F)`
## [1]         NA 0.08196719
## 
## $`Pr(>F)`
## [1]         NA 0.07870966
  currentmod <- mod13

Forward is when you start with no variables and continue to add them. Backward is when you start with all the variables and substarct them

 library(MASS)
## Warning: package 'MASS' was built under R version 3.6.3
 modc<-lm(lifeExpF~infantMortality+group+pctUrban, data=newdata)
 forwardmod <- stepAIC(modc, direction = "forward")
## Start:  AIC=467.24
## lifeExpF ~ infantMortality + group + pctUrban
backwardmod <- stepAIC(modc, direction = "backward")
## Start:  AIC=467.24
## lifeExpF ~ infantMortality + group + pctUrban
## 
##                   Df Sum of Sq    RSS    AIC
## <none>                         2062.8 467.24
## - pctUrban         1      32.3 2095.1 468.24
## - group            2     467.4 2530.2 502.66
## - infantMortality  1    3738.6 5801.4 664.81
 BIGmod <- lm(lifeExpF ~ infantMortality * group * pctUrban, data = UN)
   forwardmod <- stepAIC(BIGmod, direction = "forward")
## Start:  AIC=459.16
## lifeExpF ~ infantMortality * group * pctUrban
   backwardmod <- stepAIC(BIGmod, direction = "backward")
## Start:  AIC=459.16
## lifeExpF ~ infantMortality * group * pctUrban
## 
##                                  Df Sum of Sq    RSS    AIC
## - infantMortality:group:pctUrban  2     6.221 1846.0 455.81
## <none>                                        1839.8 459.16
## 
## Step:  AIC=455.81
## lifeExpF ~ infantMortality + group + pctUrban + infantMortality:group + 
##     infantMortality:pctUrban + group:pctUrban
## 
##                            Df Sum of Sq    RSS    AIC
## <none>                                  1846.0 455.81
## - infantMortality:group     2    44.111 1890.2 456.37
## - group:pctUrban            2   132.098 1978.1 465.15
## - infantMortality:pctUrban  1   181.645 2027.7 471.93
   backwardmod$call
## lm(formula = lifeExpF ~ infantMortality + group + pctUrban + 
##     infantMortality:group + infantMortality:pctUrban + group:pctUrban, 
##     data = UN)
   forwardmod$call
## lm(formula = lifeExpF ~ infantMortality * group * pctUrban, data = UN)

Forward and backward can sometimes produce different results