Exercise 6.1

Constant Variance assumption for errors

data(sat, package='faraway')
lmod<- lm(total~expend+salary+ratio+takers,sat)
plot(fitted(lmod),residuals(lmod),xlab="Fitted",ylab="Residuals")
abline(h=0)

plot(sat$takers,residuals(lmod), xlab="takers",ylab="Residuals")
abline(h=0)

var.test(residuals(lmod),residuals(lmod)[sat$takers])
## 
##  F test to compare two variances
## 
## data:  residuals(lmod) and residuals(lmod)[sat$takers]
## F = 0.85642, num df = 49, denom df = 31, p-value = 0.616
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  0.4383331 1.5959180
## sample estimates:
## ratio of variances 
##          0.8564211

Normality Assumption

qqnorm(residuals(lmod),ylab="Residuals",main="")
qqline(residuals(lmod))

hist(residuals(lmod),xlab="Residuals",main="")

Leverage Points

hatv<- hatvalues(lmod)
head(hatv)
##    Alabama     Alaska    Arizona   Arkansas California   Colorado 
## 0.09537668 0.18030612 0.04931612 0.05382878 0.28211791 0.03014533
sum(hatv)
## [1] 5

Outliers

set.seed(123)
testdata <- data.frame(x=1:10, y=1:10+rnorm(10))

p1<-c(5.5,12)
lmod1 <- lm(y ~x, rbind(testdata, p1))
plot(y ~ x, rbind(testdata, p1))
points(5.5,12,pch=4,cex=2)
abline(lmod)
## Warning in abline(lmod): only using the first two of 5 regression
## coefficients
abline(lmod1,lty=2)

Influential Points

plot(dfbeta(lmod)[,2],ylab="Change in takers coef")
abline(h=0)

Structure of relationship between predictors and response

summary(lmod)
## 
## Call:
## lm(formula = total ~ expend + salary + ratio + takers, data = sat)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -90.531 -20.855  -1.746  15.979  66.571 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 1045.9715    52.8698  19.784  < 2e-16 ***
## expend         4.4626    10.5465   0.423    0.674    
## salary         1.6379     2.3872   0.686    0.496    
## ratio         -3.6242     3.2154  -1.127    0.266    
## takers        -2.9045     0.2313 -12.559 2.61e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 32.7 on 45 degrees of freedom
## Multiple R-squared:  0.8246, Adjusted R-squared:  0.809 
## F-statistic: 52.88 on 4 and 45 DF,  p-value: < 2.2e-16
d<-residuals(lm(total~expend+salary+ratio+takers,sat))
m<-residuals(lm(takers~expend+salary+ratio,sat))
plot(m,d,xlab="takers residuals",ylab="Sat Totals residuals")
abline(0,coef(lmod)['takers'])