#Assignment 10 - Why you should check residuals
#
#Sarah Rathwell-301084687
#
#Objective: Analyze residuals from model
#
options(width=200)
getwd()
## [1] "/Users/s_rathwell/Documents/SFU/Sp2014/Stat 340/Assignment 10"
rm(list=ls())
cat("Analyze residuals from a model ", date(), '\n')
## Analyze residuals from a model Fri Oct 24 10:07:11 2014
#read in data, name, check
res <- read.table("http://www.stat.sfu.ca/~cschwarz/Stat-340/Assignments/Assign10/interesting.txt", header=F, as.is=T)
names(res) <- c('y', 'x1', 'x2', 'x3', 'x4', 'x5', 'x6')
res[1:5,]
## y x1 x2 x3 x4 x5 x6
## 1 1.674 0.03625 0.09306 -0.007742 0.39289 -0.003097 0.4277
## 2 -1.745 -0.12656 0.08588 -0.030573 -0.01575 -0.025589 -0.2546
## 3 -1.802 -0.07058 0.04854 -0.004661 -0.07054 -0.002070 -0.3531
## 4 -1.609 0.00657 0.11321 -0.059757 0.32730 -0.069982 -0.6304
## 5 0.613 -0.11583 -0.06065 0.077369 -0.44769 0.047839 0.5474
#create scatter matrix
pairs(~ y+x1+x2+x3+x4+x5+x6, data=res, main='Simple Scatterplot Matrix')

#model y ~ x1:x6
fit <- lm(y ~ x1+x2+x3+x4+x5+x6, data=res)
summary(fit)
##
## Call:
## lm(formula = y ~ x1 + x2 + x3 + x4 + x5 + x6, data = res)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.942 -1.014 0.492 0.784 3.055
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.0134 0.0142 0.94 0.34
## x1 4.0394 0.2068 19.54 < 2e-16 ***
## x2 0.9667 0.1495 6.47 1.1e-10 ***
## x3 4.0089 0.3847 10.42 < 2e-16 ***
## x4 1.0145 0.0802 12.65 < 2e-16 ***
## x5 3.9816 0.8773 4.54 5.8e-06 ***
## x6 0.9876 0.0351 28.14 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1 on 4975 degrees of freedom
## Multiple R-squared: 0.311, Adjusted R-squared: 0.31
## F-statistic: 375 on 6 and 4975 DF, p-value: <2e-16
names(fit)
## [1] "coefficients" "residuals" "effects" "rank" "fitted.values" "assign" "qr" "df.residual" "xlevels" "call" "terms" "model"
#extract predicted values and residuals
fit.p <- fit$fitted.values
fit.r <- fit$residuals
#graph predicted vs residual
plot(fit.p, fit.r, type='p', xlim=c(-2,2), ylab='Residual', xlab='Predicted', main='Predicted vs Residual' )
