bailsofhay — Dec 3, 2013, 5:08 PM
data=read.table("http://www.stat.lsu.edu/exstweb/statlab/datasets/KNNLData/APPENC03.txt")
names(data)=c("ID","y","x1","x2","x3","x4","x5","x6")
### Part a ####
lm.data=lm(y~x1+x2+I(x3==1)+I(x4==1)+I(x6==1999)+I(x6==2001)+I(x6==2002), data =data)
lm.data
Call:
lm(formula = y ~ x1 + x2 + I(x3 == 1) + I(x4 == 1) + I(x6 ==
1999) + I(x6 == 2001) + I(x6 == 2002), data = data)
Coefficients:
(Intercept) x1 x2
3.02e+00 -2.47e-01 -9.65e-05
I(x3 == 1)TRUE I(x4 == 1)TRUE I(x6 == 1999)TRUE
4.09e-01 1.24e-01 1.32e-02
I(x6 == 2001)TRUE I(x6 == 2002)TRUE
-1.09e-01 -8.31e-02
plot(lm.data$fitted.values, lm.data$residuals, ylab="Residuals", xlab="Fitted Values")
# The data is plotted somewhat well. It could probably be fixed up though since the residuals have some outliers that are skewing the model. The varience does not appear to be very constant since the residuals increase as the fitted values increase also.
### Part b ####
lm.datanew=lm(y~x1+x2+I(x1*x2)+I(x1^2)+I(x2^2)+I(x3==1)+I(x4==1)+I(x6==1999)+I(x6==2001)+I(x6==2002), data =data)
lm.datanew
Call:
lm(formula = y ~ x1 + x2 + I(x1 * x2) + I(x1^2) + I(x2^2) + I(x3 ==
1) + I(x4 == 1) + I(x6 == 1999) + I(x6 == 2001) + I(x6 ==
2002), data = data)
Coefficients:
(Intercept) x1 x2
8.70e+00 -4.80e+00 -9.51e-04
I(x1 * x2) I(x1^2) I(x2^2)
1.63e-04 9.22e-01 5.52e-07
I(x3 == 1)TRUE I(x4 == 1)TRUE I(x6 == 1999)TRUE
3.94e-01 1.15e-01 1.24e-02
I(x6 == 2001)TRUE I(x6 == 2002)TRUE
-1.01e-01 -5.81e-02
# Alternatives: Ho: B3=B4=B5=0, Ha: not all of the B's are = 0
# Decision rule: if F*< or equal to F(.95,3,30), conlcude Ho. If F*> F, conclude Ha
anova.data=anova(lm.datanew,lm.data)
anova.data
Analysis of Variance Table
Model 1: y ~ x1 + x2 + I(x1 * x2) + I(x1^2) + I(x2^2) + I(x3 == 1) + I(x4 ==
1) + I(x6 == 1999) + I(x6 == 2001) + I(x6 == 2002)
Model 2: y ~ x1 + x2 + I(x3 == 1) + I(x4 == 1) + I(x6 == 1999) + I(x6 ==
2001) + I(x6 == 2002)
Res.Df RSS Df Sum of Sq F Pr(>F)
1 25 0.626
2 28 0.654 -3 -0.0281 0.37 0.77
Fs=anova.data[2,5]
Fs
[1] 0.374
Fc=qf(.95,3,30)
Fc
[1] 2.922
# Conclude Ho since F*<< F. We do not need the quadratics or interaction terms
### Part c ####
# Alternatives: Ho: B2=B5=B6=B7, Ha: not all B's equal zero
# Decision Rule: if F* < or equal to F, conclude Ho, if F*> F, conclude Ha
newfit=lm(y~x1+I(x3==1)+I(x4==1), data=data)
newfit
Call:
lm(formula = y ~ x1 + I(x3 == 1) + I(x4 == 1), data = data)
Coefficients:
(Intercept) x1 I(x3 == 1)TRUE I(x4 == 1)TRUE
3.185 -0.353 0.399 0.118
anova.newfit=anova(newfit,lm.data)
anova.newfit
Analysis of Variance Table
Model 1: y ~ x1 + I(x3 == 1) + I(x4 == 1)
Model 2: y ~ x1 + x2 + I(x3 == 1) + I(x4 == 1) + I(x6 == 1999) + I(x6 ==
2001) + I(x6 == 2002)
Res.Df RSS Df Sum of Sq F Pr(>F)
1 32 0.718
2 28 0.654 4 0.0637 0.68 0.61
Fs=anova.newfit[2,5]
Fs
[1] 0.6817
Fc=qf(.95,4,28)
Fc
[1] 2.714
# Concude Ho since F*<< F, so B2, B5, B6, and B7 can all be left out of the model.