Fit a simple linear regression model

Import library and data.

library(MASS)
attach(Boston)
names(Boston)
##  [1] "crim"    "zn"      "indus"   "chas"    "nox"     "rm"      "age"    
##  [8] "dis"     "rad"     "tax"     "ptratio" "black"   "lstat"   "medv"

Fit a simple linear regression model using the lm() function. The basic syntax is lm(y~x,data), where y is the response, x is the predictor, and data is the data set in which these two variables are kept.

lm.fit = lm(medv ~ lstat)

Obtain information about the model

lm.fit
## 
## Call:
## lm(formula = medv ~ lstat)
## 
## Coefficients:
## (Intercept)        lstat  
##       34.55        -0.95
coef(lm.fit)
## (Intercept)       lstat 
##  34.5538409  -0.9500494
summary(lm.fit)
## 
## Call:
## lm(formula = medv ~ lstat)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -15.168  -3.990  -1.318   2.034  24.500 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 34.55384    0.56263   61.41   <2e-16 ***
## lstat       -0.95005    0.03873  -24.53   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 6.216 on 504 degrees of freedom
## Multiple R-squared:  0.5441, Adjusted R-squared:  0.5432 
## F-statistic: 601.6 on 1 and 504 DF,  p-value: < 2.2e-16
confint(lm.fit) # produce a CI for the coefficient
##                 2.5 %     97.5 %
## (Intercept) 33.448457 35.6592247
## lstat       -1.026148 -0.8739505

Plot

plot(lstat, medv)
abline(lm.fit)

More exploration.

plot(lstat, medv)
abline(lm.fit, lwd=3)

plot(lstat, medv)
abline(lm.fit, lwd=3, col="red")

plot(lstat, medv, col="red")

plot(lstat, medv, pch=20)

plot(lstat, medv, pch="+")

plot(lstat, medv, pch=1:20)

par(mfrow=c(2,2))
plot(lm.fit) # produce four diagnostic plots

plot(predict(lm.fit), residuals(lm.fit)) # compute the residuals

plot(predict(lm.fit), rstudent(lm.fit)) # compute the studentized residuals

plot(hatvalues(lm.fit)) # compute leverage statistics for predictors

which.max(hatvalues(lm.fit)) # identify the index of the largest element of a vector
## 375 
## 375