Problem 8.42

bailsofhay — Dec 3, 2013, 5:08 PM

data=read.table("http://www.stat.lsu.edu/exstweb/statlab/datasets/KNNLData/APPENC03.txt")
names(data)=c("ID","y","x1","x2","x3","x4","x5","x6")


### Part a ####
lm.data=lm(y~x1+x2+I(x3==1)+I(x4==1)+I(x6==1999)+I(x6==2001)+I(x6==2002), data =data)
lm.data


Call:
lm(formula = y ~ x1 + x2 + I(x3 == 1) + I(x4 == 1) + I(x6 == 
    1999) + I(x6 == 2001) + I(x6 == 2002), data = data)

Coefficients:
      (Intercept)                 x1                 x2  
         3.02e+00          -2.47e-01          -9.65e-05  
   I(x3 == 1)TRUE     I(x4 == 1)TRUE  I(x6 == 1999)TRUE  
         4.09e-01           1.24e-01           1.32e-02  
I(x6 == 2001)TRUE  I(x6 == 2002)TRUE  
        -1.09e-01          -8.31e-02


plot(lm.data$fitted.values, lm.data$residuals, ylab="Residuals", xlab="Fitted Values")

plot of chunk unnamed-chunk-1


# The data is plotted somewhat well. It could probably be fixed up though since the residuals have some outliers that are skewing the model. The varience does not appear to be very constant since the residuals increase as the fitted values increase also.

### Part b ####

lm.datanew=lm(y~x1+x2+I(x1*x2)+I(x1^2)+I(x2^2)+I(x3==1)+I(x4==1)+I(x6==1999)+I(x6==2001)+I(x6==2002), data =data)
lm.datanew


Call:
lm(formula = y ~ x1 + x2 + I(x1 * x2) + I(x1^2) + I(x2^2) + I(x3 == 
    1) + I(x4 == 1) + I(x6 == 1999) + I(x6 == 2001) + I(x6 == 
    2002), data = data)

Coefficients:
      (Intercept)                 x1                 x2  
         8.70e+00          -4.80e+00          -9.51e-04  
       I(x1 * x2)            I(x1^2)            I(x2^2)  
         1.63e-04           9.22e-01           5.52e-07  
   I(x3 == 1)TRUE     I(x4 == 1)TRUE  I(x6 == 1999)TRUE  
         3.94e-01           1.15e-01           1.24e-02  
I(x6 == 2001)TRUE  I(x6 == 2002)TRUE  
        -1.01e-01          -5.81e-02




# Alternatives: Ho: B3=B4=B5=0, Ha: not all of the B's are = 0
# Decision rule: if F*< or equal to F(.95,3,30), conlcude Ho. If F*> F, conclude Ha

anova.data=anova(lm.datanew,lm.data)
anova.data

Analysis of Variance Table

Model 1: y ~ x1 + x2 + I(x1 * x2) + I(x1^2) + I(x2^2) + I(x3 == 1) + I(x4 == 
    1) + I(x6 == 1999) + I(x6 == 2001) + I(x6 == 2002)
Model 2: y ~ x1 + x2 + I(x3 == 1) + I(x4 == 1) + I(x6 == 1999) + I(x6 == 
    2001) + I(x6 == 2002)
  Res.Df   RSS Df Sum of Sq    F Pr(>F)
1     25 0.626                         
2     28 0.654 -3   -0.0281 0.37   0.77


Fs=anova.data[2,5]
Fs

[1] 0.374

Fc=qf(.95,3,30)
Fc

[1] 2.922


# Conclude Ho since F*<< F. We do not need the quadratics or interaction terms

### Part c ####

# Alternatives: Ho: B2=B5=B6=B7, Ha: not all B's equal zero
# Decision Rule: if F* < or equal to F, conclude Ho, if F*> F, conclude Ha

newfit=lm(y~x1+I(x3==1)+I(x4==1), data=data)
newfit


Call:
lm(formula = y ~ x1 + I(x3 == 1) + I(x4 == 1), data = data)

Coefficients:
   (Intercept)              x1  I(x3 == 1)TRUE  I(x4 == 1)TRUE  
         3.185          -0.353           0.399           0.118


anova.newfit=anova(newfit,lm.data)
anova.newfit

Analysis of Variance Table

Model 1: y ~ x1 + I(x3 == 1) + I(x4 == 1)
Model 2: y ~ x1 + x2 + I(x3 == 1) + I(x4 == 1) + I(x6 == 1999) + I(x6 == 
    2001) + I(x6 == 2002)
  Res.Df   RSS Df Sum of Sq    F Pr(>F)
1     32 0.718                         
2     28 0.654  4    0.0637 0.68   0.61


Fs=anova.newfit[2,5]
Fs

[1] 0.6817

Fc=qf(.95,4,28)
Fc

[1] 2.714


# Concude Ho since F*<< F, so B2, B5, B6, and B7 can all be left out of the model.