Import library and data.
library(MASS)
attach(Boston)
names(Boston)
## [1] "crim" "zn" "indus" "chas" "nox" "rm" "age"
## [8] "dis" "rad" "tax" "ptratio" "black" "lstat" "medv"
Fit a simple linear regression model using the lm() function. The basic syntax is lm(y~x,data), where y is the response, x is the predictor, and data is the data set in which these two variables are kept.
lm.fit = lm(medv ~ lstat)
lm.fit
##
## Call:
## lm(formula = medv ~ lstat)
##
## Coefficients:
## (Intercept) lstat
## 34.55 -0.95
coef(lm.fit)
## (Intercept) lstat
## 34.5538409 -0.9500494
summary(lm.fit)
##
## Call:
## lm(formula = medv ~ lstat)
##
## Residuals:
## Min 1Q Median 3Q Max
## -15.168 -3.990 -1.318 2.034 24.500
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 34.55384 0.56263 61.41 <2e-16 ***
## lstat -0.95005 0.03873 -24.53 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 6.216 on 504 degrees of freedom
## Multiple R-squared: 0.5441, Adjusted R-squared: 0.5432
## F-statistic: 601.6 on 1 and 504 DF, p-value: < 2.2e-16
confint(lm.fit) # produce a CI for the coefficient
## 2.5 % 97.5 %
## (Intercept) 33.448457 35.6592247
## lstat -1.026148 -0.8739505
plot(lstat, medv)
abline(lm.fit)
More exploration.
plot(lstat, medv)
abline(lm.fit, lwd=3)
plot(lstat, medv)
abline(lm.fit, lwd=3, col="red")
plot(lstat, medv, col="red")
plot(lstat, medv, pch=20)
plot(lstat, medv, pch="+")
plot(lstat, medv, pch=1:20)
par(mfrow=c(2,2))
plot(lm.fit) # produce four diagnostic plots
plot(predict(lm.fit), residuals(lm.fit)) # compute the residuals
plot(predict(lm.fit), rstudent(lm.fit)) # compute the studentized residuals
plot(hatvalues(lm.fit)) # compute leverage statistics for predictors
which.max(hatvalues(lm.fit)) # identify the index of the largest element of a vector
## 375
## 375