library(MASS)
head(Boston)
## crim zn indus chas nox rm age dis rad tax ptratio black lstat
## 1 0.00632 18 2.31 0 0.538 6.575 65.2 4.0900 1 296 15.3 396.90 4.98
## 2 0.02731 0 7.07 0 0.469 6.421 78.9 4.9671 2 242 17.8 396.90 9.14
## 3 0.02729 0 7.07 0 0.469 7.185 61.1 4.9671 2 242 17.8 392.83 4.03
## 4 0.03237 0 2.18 0 0.458 6.998 45.8 6.0622 3 222 18.7 394.63 2.94
## 5 0.06905 0 2.18 0 0.458 7.147 54.2 6.0622 3 222 18.7 396.90 5.33
## 6 0.02985 0 2.18 0 0.458 6.430 58.7 6.0622 3 222 18.7 394.12 5.21
## medv
## 1 24.0
## 2 21.6
## 3 34.7
## 4 33.4
## 5 36.2
## 6 28.7
summary(Boston)
## crim zn indus chas
## Min. : 0.00632 Min. : 0.00 Min. : 0.46 Min. :0.00000
## 1st Qu.: 0.08205 1st Qu.: 0.00 1st Qu.: 5.19 1st Qu.:0.00000
## Median : 0.25651 Median : 0.00 Median : 9.69 Median :0.00000
## Mean : 3.61352 Mean : 11.36 Mean :11.14 Mean :0.06917
## 3rd Qu.: 3.67708 3rd Qu.: 12.50 3rd Qu.:18.10 3rd Qu.:0.00000
## Max. :88.97620 Max. :100.00 Max. :27.74 Max. :1.00000
## nox rm age dis
## Min. :0.3850 Min. :3.561 Min. : 2.90 Min. : 1.130
## 1st Qu.:0.4490 1st Qu.:5.886 1st Qu.: 45.02 1st Qu.: 2.100
## Median :0.5380 Median :6.208 Median : 77.50 Median : 3.207
## Mean :0.5547 Mean :6.285 Mean : 68.57 Mean : 3.795
## 3rd Qu.:0.6240 3rd Qu.:6.623 3rd Qu.: 94.08 3rd Qu.: 5.188
## Max. :0.8710 Max. :8.780 Max. :100.00 Max. :12.127
## rad tax ptratio black
## Min. : 1.000 Min. :187.0 Min. :12.60 Min. : 0.32
## 1st Qu.: 4.000 1st Qu.:279.0 1st Qu.:17.40 1st Qu.:375.38
## Median : 5.000 Median :330.0 Median :19.05 Median :391.44
## Mean : 9.549 Mean :408.2 Mean :18.46 Mean :356.67
## 3rd Qu.:24.000 3rd Qu.:666.0 3rd Qu.:20.20 3rd Qu.:396.23
## Max. :24.000 Max. :711.0 Max. :22.00 Max. :396.90
## lstat medv
## Min. : 1.73 Min. : 5.00
## 1st Qu.: 6.95 1st Qu.:17.02
## Median :11.36 Median :21.20
## Mean :12.65 Mean :22.53
## 3rd Qu.:16.95 3rd Qu.:25.00
## Max. :37.97 Max. :50.00
# a quadratic term for 'lstat' (lower status of the population)
# interaction term between 'rm' (average number of rooms per dwelling) and 'chas' as a dichotomous variable (Charles River dummy variable)
# medv is the dependent variable representing the median value of owner-occupied homes in $1000s.
model<- lm(medv ~ lstat + I(lstat^2) + chas + rm + chas:rm, data=Boston)
summary(model)
##
## Call:
## lm(formula = medv ~ lstat + I(lstat^2) + chas + rm + chas:rm,
## data = Boston)
##
## Residuals:
## Min 1Q Median 3Q Max
## -18.5777 -3.1152 -0.3891 2.4852 26.6456
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 11.436162 3.124285 3.660 0.000279 ***
## lstat -1.867717 0.120111 -15.550 < 2e-16 ***
## I(lstat^2) 0.036932 0.003427 10.777 < 2e-16 ***
## chas 13.963145 6.707781 2.082 0.037884 *
## rm 4.241223 0.416695 10.178 < 2e-16 ***
## chas:rm -1.516264 1.024477 -1.480 0.139493
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.916 on 500 degrees of freedom
## Multiple R-squared: 0.7171, Adjusted R-squared: 0.7143
## F-statistic: 253.5 on 5 and 500 DF, p-value: < 2.2e-16
medv due to lstat slows
down as lstat increases.par(mfrow=c(2,2))
plot(model)
Residual analysis hints at normal distribution of errors with potential outliers. Outliers and heteroscedasticity, as seen in the residual plots, suggest the need for model adjustments such as variable transformations or robust regression techniques. Despite a good fit indicated by a high R-squared, addressing these issues may improve model reliability.