Exercise 6.1
Constant Variance assumption for errors
data(sat, package='faraway')
lmod<- lm(total~expend+salary+ratio+takers,sat)
plot(fitted(lmod),residuals(lmod),xlab="Fitted",ylab="Residuals")
abline(h=0)

plot(sat$takers,residuals(lmod), xlab="takers",ylab="Residuals")
abline(h=0)

var.test(residuals(lmod),residuals(lmod)[sat$takers])
##
## F test to compare two variances
##
## data: residuals(lmod) and residuals(lmod)[sat$takers]
## F = 0.85642, num df = 49, denom df = 31, p-value = 0.616
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.4383331 1.5959180
## sample estimates:
## ratio of variances
## 0.8564211
Normality Assumption
qqnorm(residuals(lmod),ylab="Residuals",main="")
qqline(residuals(lmod))

hist(residuals(lmod),xlab="Residuals",main="")

Leverage Points
hatv<- hatvalues(lmod)
head(hatv)
## Alabama Alaska Arizona Arkansas California Colorado
## 0.09537668 0.18030612 0.04931612 0.05382878 0.28211791 0.03014533
sum(hatv)
## [1] 5
Outliers
set.seed(123)
testdata <- data.frame(x=1:10, y=1:10+rnorm(10))
p1<-c(5.5,12)
lmod1 <- lm(y ~x, rbind(testdata, p1))
plot(y ~ x, rbind(testdata, p1))
points(5.5,12,pch=4,cex=2)
abline(lmod)
## Warning in abline(lmod): only using the first two of 5 regression
## coefficients
abline(lmod1,lty=2)

Influential Points
plot(dfbeta(lmod)[,2],ylab="Change in takers coef")
abline(h=0)

Structure of relationship between predictors and response
summary(lmod)
##
## Call:
## lm(formula = total ~ expend + salary + ratio + takers, data = sat)
##
## Residuals:
## Min 1Q Median 3Q Max
## -90.531 -20.855 -1.746 15.979 66.571
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1045.9715 52.8698 19.784 < 2e-16 ***
## expend 4.4626 10.5465 0.423 0.674
## salary 1.6379 2.3872 0.686 0.496
## ratio -3.6242 3.2154 -1.127 0.266
## takers -2.9045 0.2313 -12.559 2.61e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 32.7 on 45 degrees of freedom
## Multiple R-squared: 0.8246, Adjusted R-squared: 0.809
## F-statistic: 52.88 on 4 and 45 DF, p-value: < 2.2e-16
d<-residuals(lm(total~expend+salary+ratio+takers,sat))
m<-residuals(lm(takers~expend+salary+ratio,sat))
plot(m,d,xlab="takers residuals",ylab="Sat Totals residuals")
abline(0,coef(lmod)['takers'])
