#Assignment 10 - Why you should check residuals
#
#Sarah Rathwell-301084687
#
#Objective: Analyze residuals from model
#
options(width=200)
getwd()
## [1] "/Users/s_rathwell/Documents/SFU/Sp2014/Stat 340/Assignment 10"
rm(list=ls())

cat("Analyze residuals from a model ", date(), '\n')
## Analyze residuals from a model  Fri Oct 24 10:07:11 2014
#read in data, name, check
res <- read.table("http://www.stat.sfu.ca/~cschwarz/Stat-340/Assignments/Assign10/interesting.txt", header=F, as.is=T)
names(res) <- c('y', 'x1', 'x2', 'x3', 'x4', 'x5', 'x6')
res[1:5,]
##        y       x1       x2        x3       x4        x5      x6
## 1  1.674  0.03625  0.09306 -0.007742  0.39289 -0.003097  0.4277
## 2 -1.745 -0.12656  0.08588 -0.030573 -0.01575 -0.025589 -0.2546
## 3 -1.802 -0.07058  0.04854 -0.004661 -0.07054 -0.002070 -0.3531
## 4 -1.609  0.00657  0.11321 -0.059757  0.32730 -0.069982 -0.6304
## 5  0.613 -0.11583 -0.06065  0.077369 -0.44769  0.047839  0.5474
#create scatter matrix
pairs(~ y+x1+x2+x3+x4+x5+x6, data=res, main='Simple Scatterplot Matrix')

plot of chunk unnamed-chunk-1

#model y ~ x1:x6
fit <- lm(y ~ x1+x2+x3+x4+x5+x6, data=res)
summary(fit)
## 
## Call:
## lm(formula = y ~ x1 + x2 + x3 + x4 + x5 + x6, data = res)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -2.942 -1.014  0.492  0.784  3.055 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   0.0134     0.0142    0.94     0.34    
## x1            4.0394     0.2068   19.54  < 2e-16 ***
## x2            0.9667     0.1495    6.47  1.1e-10 ***
## x3            4.0089     0.3847   10.42  < 2e-16 ***
## x4            1.0145     0.0802   12.65  < 2e-16 ***
## x5            3.9816     0.8773    4.54  5.8e-06 ***
## x6            0.9876     0.0351   28.14  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1 on 4975 degrees of freedom
## Multiple R-squared:  0.311,  Adjusted R-squared:  0.31 
## F-statistic:  375 on 6 and 4975 DF,  p-value: <2e-16
names(fit)
##  [1] "coefficients"  "residuals"     "effects"       "rank"          "fitted.values" "assign"        "qr"            "df.residual"   "xlevels"       "call"          "terms"         "model"
#extract predicted values and residuals
fit.p <- fit$fitted.values
fit.r <- fit$residuals

#graph predicted vs residual
plot(fit.p, fit.r, type='p', xlim=c(-2,2), ylab='Residual', xlab='Predicted', main='Predicted vs Residual' )

plot of chunk unnamed-chunk-1