Problem 9.20

bailsofhay — Dec 3, 2013, 5:09 PM

data=read.table("http://www.stat.lsu.edu/exstweb/statlab/datasets/KNNLData/APPENC03.txt")
names(data)=c("ID","y","x1","x2","x3","x4","x5","x6")

lm.datanew=lm(y~x1+x2+I(x3==1)+I(x4==1)+I(x6==1999)+I(x6==2001)+I(x6==2002), data =data)
lm.datanew

Call:
lm(formula = y ~ x1 + x2 + I(x3 == 1) + I(x4 == 1) + I(x6 == 
    1999) + I(x6 == 2001) + I(x6 == 2002), data = data)

Coefficients:
      (Intercept)                 x1                 x2  
         3.02e+00          -2.47e-01          -9.65e-05  
   I(x3 == 1)TRUE     I(x4 == 1)TRUE  I(x6 == 1999)TRUE  
         4.09e-01           1.24e-01           1.32e-02  
I(x6 == 2001)TRUE  I(x6 == 2002)TRUE  
        -1.09e-01          -8.31e-02  

### Part a ###########
library(MASS)

## Null ##

Null=lm(y~1, data=data)
Null

Call:
lm(formula = y ~ 1, data = data)

Coefficients:
(Intercept)  
       2.66  
addterm( Null, scope = lm.datanew, test="F" )
Single term additions

Model:
y ~ 1
              Df Sum of Sq   RSS    AIC F Value   Pr(F)    
<none>                     2.446  -94.8                    
x1             1     0.087 2.359  -94.1     1.3    0.27    
x2             1     0.013 2.433  -93.0     0.2    0.67    
I(x3 == 1)     1     1.530 0.917 -128.1    56.7 9.6e-09 ***
I(x4 == 1)     1     0.228 2.219  -96.3     3.5    0.07 .  
I(x6 == 1999)  1     0.012 2.434  -93.0     0.2    0.69    
I(x6 == 2001)  1     0.057 2.389  -93.7     0.8    0.37    
I(x6 == 2002)  1     0.007 2.439  -92.9     0.1    0.76    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

## Step 1 ###
newmodel=update(Null,.~.+I(x3==1)+I(x4==1))
addterm( newmodel, scope = lm.datanew, test="F" )
Single term additions

Model:
y ~ I(x3 == 1) + I(x4 == 1)
              Df Sum of Sq   RSS  AIC F Value Pr(F)  
<none>                     0.831 -130                
x1             1    0.1127 0.718 -133    5.02 0.032 *
x2             1    0.0253 0.805 -129    1.00 0.324  
I(x6 == 1999)  1    0.0197 0.811 -129    0.78 0.384  
I(x6 == 2001)  1    0.0631 0.768 -130    2.63 0.115  
I(x6 == 2002)  1    0.0245 0.806 -129    0.97 0.331  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

## Step 2 ###
newmodel2=update(Null,.~.+x1+I(x3==1)+I(x4==1))
addterm( newmodel2, scope = lm.datanew, test="F")
Single term additions

Model:
y ~ x1 + I(x3 == 1) + I(x4 == 1)
              Df Sum of Sq   RSS  AIC F Value Pr(F)
<none>                     0.718 -133              
x2             1    0.0003 0.718 -131   0.014  0.91
I(x6 == 1999)  1    0.0090 0.709 -131   0.394  0.53
I(x6 == 2001)  1    0.0379 0.680 -133   1.727  0.20
I(x6 == 2002)  1    0.0005 0.717 -131   0.020  0.89

##### Part b ########
# yes this is also the same as what was found in problems 8.42 and 9.12. The model suggested is still Y=b0+b1x1+b2x3+b3x4