Problem 8.42

bailsofhay — Dec 3, 2013, 5:08 PM

data=read.table("http://www.stat.lsu.edu/exstweb/statlab/datasets/KNNLData/APPENC03.txt")
names(data)=c("ID","y","x1","x2","x3","x4","x5","x6")


### Part a ####
lm.data=lm(y~x1+x2+I(x3==1)+I(x4==1)+I(x6==1999)+I(x6==2001)+I(x6==2002), data =data)
lm.data

Call:
lm(formula = y ~ x1 + x2 + I(x3 == 1) + I(x4 == 1) + I(x6 == 
    1999) + I(x6 == 2001) + I(x6 == 2002), data = data)

Coefficients:
      (Intercept)                 x1                 x2  
         3.02e+00          -2.47e-01          -9.65e-05  
   I(x3 == 1)TRUE     I(x4 == 1)TRUE  I(x6 == 1999)TRUE  
         4.09e-01           1.24e-01           1.32e-02  
I(x6 == 2001)TRUE  I(x6 == 2002)TRUE  
        -1.09e-01          -8.31e-02  

plot(lm.data$fitted.values, lm.data$residuals, ylab="Residuals", xlab="Fitted Values")

plot of chunk unnamed-chunk-1


# The data is plotted somewhat well. It could probably be fixed up though since the residuals have some outliers that are skewing the model. The varience does not appear to be very constant since the residuals increase as the fitted values increase also.

### Part b ####

lm.datanew=lm(y~x1+x2+I(x1*x2)+I(x1^2)+I(x2^2)+I(x3==1)+I(x4==1)+I(x6==1999)+I(x6==2001)+I(x6==2002), data =data)
lm.datanew

Call:
lm(formula = y ~ x1 + x2 + I(x1 * x2) + I(x1^2) + I(x2^2) + I(x3 == 
    1) + I(x4 == 1) + I(x6 == 1999) + I(x6 == 2001) + I(x6 == 
    2002), data = data)

Coefficients:
      (Intercept)                 x1                 x2  
         8.70e+00          -4.80e+00          -9.51e-04  
       I(x1 * x2)            I(x1^2)            I(x2^2)  
         1.63e-04           9.22e-01           5.52e-07  
   I(x3 == 1)TRUE     I(x4 == 1)TRUE  I(x6 == 1999)TRUE  
         3.94e-01           1.15e-01           1.24e-02  
I(x6 == 2001)TRUE  I(x6 == 2002)TRUE  
        -1.01e-01          -5.81e-02  



# Alternatives: Ho: B3=B4=B5=0, Ha: not all of the B's are = 0
# Decision rule: if F*< or equal to F(.95,3,30), conlcude Ho. If F*> F, conclude Ha

anova.data=anova(lm.datanew,lm.data)
anova.data
Analysis of Variance Table

Model 1: y ~ x1 + x2 + I(x1 * x2) + I(x1^2) + I(x2^2) + I(x3 == 1) + I(x4 == 
    1) + I(x6 == 1999) + I(x6 == 2001) + I(x6 == 2002)
Model 2: y ~ x1 + x2 + I(x3 == 1) + I(x4 == 1) + I(x6 == 1999) + I(x6 == 
    2001) + I(x6 == 2002)
  Res.Df   RSS Df Sum of Sq    F Pr(>F)
1     25 0.626                         
2     28 0.654 -3   -0.0281 0.37   0.77

Fs=anova.data[2,5]
Fs
[1] 0.374
Fc=qf(.95,3,30)
Fc
[1] 2.922

# Conclude Ho since F*<< F. We do not need the quadratics or interaction terms

### Part c ####

# Alternatives: Ho: B2=B5=B6=B7, Ha: not all B's equal zero
# Decision Rule: if F* < or equal to F, conclude Ho, if F*> F, conclude Ha

newfit=lm(y~x1+I(x3==1)+I(x4==1), data=data)
newfit

Call:
lm(formula = y ~ x1 + I(x3 == 1) + I(x4 == 1), data = data)

Coefficients:
   (Intercept)              x1  I(x3 == 1)TRUE  I(x4 == 1)TRUE  
         3.185          -0.353           0.399           0.118  

anova.newfit=anova(newfit,lm.data)
anova.newfit
Analysis of Variance Table

Model 1: y ~ x1 + I(x3 == 1) + I(x4 == 1)
Model 2: y ~ x1 + x2 + I(x3 == 1) + I(x4 == 1) + I(x6 == 1999) + I(x6 == 
    2001) + I(x6 == 2002)
  Res.Df   RSS Df Sum of Sq    F Pr(>F)
1     32 0.718                         
2     28 0.654  4    0.0637 0.68   0.61

Fs=anova.newfit[2,5]
Fs
[1] 0.6817
Fc=qf(.95,4,28)
Fc
[1] 2.714

# Concude Ho since F*<< F, so B2, B5, B6, and B7 can all be left out of the model.