# load in packages we'll use
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.4.2
## Warning: package 'ggplot2' was built under R version 4.4.2
## Warning: package 'tibble' was built under R version 4.4.2
## Warning: package 'tidyr' was built under R version 4.4.2
## Warning: package 'readr' was built under R version 4.4.2
## Warning: package 'purrr' was built under R version 4.4.2
## Warning: package 'dplyr' was built under R version 4.4.2
## Warning: package 'forcats' was built under R version 4.4.2
## Warning: package 'lubridate' was built under R version 4.4.2
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## âś” dplyr 1.1.4 âś” readr 2.1.5
## âś” forcats 1.0.0 âś” stringr 1.5.1
## âś” ggplot2 3.5.1 âś” tibble 3.2.1
## âś” lubridate 1.9.3 âś” tidyr 1.3.1
## âś” purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## âś– dplyr::filter() masks stats::filter()
## âś– dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
setwd('C:\\Users\\Spencer\\Desktop\\R markdown\\Project')
# read in the test and train data
train <- read.table('train.csv',header = T,sep=",")
test <- read.table('test.csv',header=T,sep=",")
n = nrow(train)
p = ncol(train) -1
y <- vector('numeric', n)
y <- train[ , 'medv']
X <- matrix(0, n, p+1)
X[, 1] <- rep(1, n)
for(j in 1:p)
X[, j+1] <-train[,j]
# fit our model using all predictors
fullfit <- lm(medv~ ., data = train)
# get our summary of our initial model
summary(fullfit)
##
## Call:
## lm(formula = medv ~ ., data = train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -16.4449 -2.9231 -0.6063 2.2156 24.5896
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 41.148911 6.210430 6.626 1.21e-10 ***
## crim -0.132999 0.036329 -3.661 0.000288 ***
## zn 0.039915 0.016965 2.353 0.019154 *
## indus 0.012679 0.074582 0.170 0.865107
## chas 2.967850 1.080076 2.748 0.006291 **
## nox -16.362308 5.058274 -3.235 0.001326 **
## rm 3.533316 0.490719 7.200 3.35e-12 ***
## age 0.003979 0.015979 0.249 0.803478
## dis -1.469930 0.239584 -6.135 2.18e-09 ***
## rad 0.348479 0.081878 4.256 2.64e-05 ***
## tax -0.012953 0.004602 -2.815 0.005140 **
## ptratio -1.074000 0.163417 -6.572 1.68e-10 ***
## black 0.007668 0.003256 2.355 0.019021 *
## lstat -0.586815 0.060211 -9.746 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.032 on 372 degrees of freedom
## Multiple R-squared: 0.7328, Adjusted R-squared: 0.7235
## F-statistic: 78.49 on 13 and 372 DF, p-value: < 2.2e-16
sample_indices <- sample(1:nrow(train), size = 0.8 * nrow(train))
train_set <- train[sample_indices, ]
test_set <- train[-sample_indices, ]
predictions <- predict(fullfit, newdata=test_set)
residuals <- test_set$medv - predictions
rmspe <- sqrt(mean(residuals^2))
rmspe
## [1] 5.272312
print(paste("Root Mean Square Prediction Error is", round(rmspe, 2)))
## [1] "Root Mean Square Prediction Error is 5.27"
null <- lm(medv ~ 1, data=train)
full <- lm(medv ~ crim + zn + indus + chas + nox + rm + age + dis + rad + tax + ptratio + black + lstat, data=train)
sfit_f <- step(null, scope = list(lower=null, upper=full), direction='forward')
## Start: AIC=1744.66
## medv ~ 1
##
## Df Sum of Sq RSS AIC
## + lstat 1 19031.4 16229 1447.1
## + rm 1 16513.9 18746 1502.8
## + ptratio 1 10274.0 24986 1613.7
## + indus 1 7778.3 27482 1650.5
## + tax 1 7299.9 27960 1657.1
## + nox 1 6227.5 29033 1671.7
## + crim 1 5017.6 30243 1687.4
## + rad 1 4854.0 30406 1689.5
## + age 1 4583.2 30677 1692.9
## + zn 1 3978.6 31282 1700.5
## + black 1 3565.7 31695 1705.5
## + dis 1 1837.6 33423 1726.0
## + chas 1 1556.0 33704 1729.2
## <none> 35260 1744.7
##
## Step: AIC=1447.14
## medv ~ lstat
##
## Df Sum of Sq RSS AIC
## + rm 1 3185.8 13043 1364.8
## + ptratio 1 2440.6 13788 1386.2
## + dis 1 798.6 15430 1429.7
## + chas 1 793.4 15435 1429.8
## + age 1 278.4 15950 1442.5
## + crim 1 174.7 16054 1445.0
## + tax 1 129.3 16100 1446.0
## <none> 16229 1447.1
## + black 1 68.4 16160 1447.5
## + nox 1 63.0 16166 1447.6
## + zn 1 49.3 16180 1448.0
## + indus 1 26.6 16202 1448.5
## + rad 1 1.6 16227 1449.1
##
## Step: AIC=1364.79
## medv ~ lstat + rm
##
## Df Sum of Sq RSS AIC
## + ptratio 1 1389.47 11654 1323.3
## + chas 1 523.20 12520 1351.0
## + dis 1 424.12 12619 1354.0
## + crim 1 296.02 12747 1357.9
## + black 1 288.43 12755 1358.2
## + tax 1 172.62 12870 1361.6
## <none> 13043 1364.8
## + age 1 51.54 12992 1365.3
## + rad 1 42.86 13000 1365.5
## + zn 1 8.48 13035 1366.5
## + indus 1 7.52 13036 1366.6
## + nox 1 1.71 13041 1366.7
##
## Step: AIC=1323.31
## medv ~ lstat + rm + ptratio
##
## Df Sum of Sq RSS AIC
## + dis 1 690.27 10963 1301.7
## + chas 1 454.29 11199 1310.0
## + age 1 173.20 11480 1319.5
## + black 1 168.38 11485 1319.7
## + crim 1 128.19 11525 1321.0
## + zn 1 83.39 11570 1322.5
## + rad 1 61.68 11592 1323.3
## <none> 11654 1323.3
## + indus 1 52.35 11601 1323.6
## + nox 1 22.69 11631 1324.5
## + tax 1 0.05 11654 1325.3
##
## Step: AIC=1301.74
## medv ~ lstat + rm + ptratio + dis
##
## Df Sum of Sq RSS AIC
## + chas 1 326.43 10637 1292.1
## + nox 1 325.04 10638 1292.1
## + crim 1 281.34 10682 1293.7
## + black 1 246.26 10717 1295.0
## + tax 1 109.86 10854 1299.8
## + indus 1 107.90 10855 1299.9
## + zn 1 70.81 10892 1301.2
## <none> 10963 1301.7
## + age 1 21.05 10942 1303.0
## + rad 1 0.91 10962 1303.7
##
## Step: AIC=1292.07
## medv ~ lstat + rm + ptratio + dis + chas
##
## Df Sum of Sq RSS AIC
## + nox 1 353.46 10283 1281.0
## + crim 1 252.13 10385 1284.8
## + black 1 204.51 10432 1286.6
## + indus 1 128.89 10508 1289.4
## + tax 1 102.28 10535 1290.3
## + zn 1 69.33 10568 1291.5
## <none> 10637 1292.1
## + age 1 28.37 10608 1293.0
## + rad 1 2.96 10634 1294.0
##
## Step: AIC=1281.03
## medv ~ lstat + rm + ptratio + dis + chas + nox
##
## Df Sum of Sq RSS AIC
## + crim 1 172.314 10111 1276.5
## + black 1 128.202 10155 1278.2
## + zn 1 74.671 10209 1280.2
## + rad 1 59.254 10224 1280.8
## <none> 10283 1281.0
## + indus 1 15.132 10268 1282.5
## + tax 1 1.580 10282 1283.0
## + age 1 0.119 10283 1283.0
##
## Step: AIC=1276.5
## medv ~ lstat + rm + ptratio + dis + chas + nox + crim
##
## Df Sum of Sq RSS AIC
## + rad 1 229.717 9881.4 1269.6
## + zn 1 118.784 9992.3 1273.9
## + black 1 79.723 10031.4 1275.5
## <none> 10111.1 1276.5
## + tax 1 14.290 10096.8 1278.0
## + indus 1 11.632 10099.5 1278.1
## + age 1 1.105 10110.0 1278.5
##
## Step: AIC=1269.63
## medv ~ lstat + rm + ptratio + dis + chas + nox + crim + rad
##
## Df Sum of Sq RSS AIC
## + tax 1 178.766 9702.6 1264.6
## + black 1 153.147 9728.2 1265.6
## + zn 1 78.932 9802.5 1268.5
## <none> 9881.4 1269.6
## + indus 1 32.780 9848.6 1270.3
## + age 1 0.186 9881.2 1271.6
##
## Step: AIC=1264.59
## medv ~ lstat + rm + ptratio + dis + chas + nox + crim + rad +
## tax
##
## Df Sum of Sq RSS AIC
## + black 1 141.627 9561.0 1260.9
## + zn 1 138.071 9564.6 1261.0
## <none> 9702.6 1264.6
## + indus 1 0.564 9702.1 1266.6
## + age 1 0.040 9702.6 1266.6
##
## Step: AIC=1260.91
## medv ~ lstat + rm + ptratio + dis + chas + nox + crim + rad +
## tax + black
##
## Df Sum of Sq RSS AIC
## + zn 1 138.123 9422.9 1257.3
## <none> 9561.0 1260.9
## + indus 1 0.168 9560.8 1262.9
## + age 1 0.113 9560.9 1262.9
##
## Step: AIC=1257.29
## medv ~ lstat + rm + ptratio + dis + chas + nox + crim + rad +
## tax + black + zn
##
## Df Sum of Sq RSS AIC
## <none> 9422.9 1257.3
## + age 1 1.60238 9421.3 1259.2
## + indus 1 0.76375 9422.1 1259.3
sfit_stepwise <- step(null, scope=list(lower=null, upper=full), direction='both')
## Start: AIC=1744.66
## medv ~ 1
##
## Df Sum of Sq RSS AIC
## + lstat 1 19031.4 16229 1447.1
## + rm 1 16513.9 18746 1502.8
## + ptratio 1 10274.0 24986 1613.7
## + indus 1 7778.3 27482 1650.5
## + tax 1 7299.9 27960 1657.1
## + nox 1 6227.5 29033 1671.7
## + crim 1 5017.6 30243 1687.4
## + rad 1 4854.0 30406 1689.5
## + age 1 4583.2 30677 1692.9
## + zn 1 3978.6 31282 1700.5
## + black 1 3565.7 31695 1705.5
## + dis 1 1837.6 33423 1726.0
## + chas 1 1556.0 33704 1729.2
## <none> 35260 1744.7
##
## Step: AIC=1447.14
## medv ~ lstat
##
## Df Sum of Sq RSS AIC
## + rm 1 3185.8 13043 1364.8
## + ptratio 1 2440.6 13788 1386.2
## + dis 1 798.6 15430 1429.7
## + chas 1 793.4 15435 1429.8
## + age 1 278.4 15950 1442.5
## + crim 1 174.7 16054 1445.0
## + tax 1 129.3 16100 1446.0
## <none> 16229 1447.1
## + black 1 68.4 16160 1447.5
## + nox 1 63.0 16166 1447.6
## + zn 1 49.3 16180 1448.0
## + indus 1 26.6 16202 1448.5
## + rad 1 1.6 16227 1449.1
## - lstat 1 19031.4 35260 1744.7
##
## Step: AIC=1364.79
## medv ~ lstat + rm
##
## Df Sum of Sq RSS AIC
## + ptratio 1 1389.5 11654 1323.3
## + chas 1 523.2 12520 1351.0
## + dis 1 424.1 12619 1354.0
## + crim 1 296.0 12747 1357.9
## + black 1 288.4 12755 1358.2
## + tax 1 172.6 12870 1361.6
## <none> 13043 1364.8
## + age 1 51.5 12992 1365.3
## + rad 1 42.9 13000 1365.5
## + zn 1 8.5 13035 1366.5
## + indus 1 7.5 13036 1366.6
## + nox 1 1.7 13041 1366.7
## - rm 1 3185.8 16229 1447.1
## - lstat 1 5703.2 18746 1502.8
##
## Step: AIC=1323.31
## medv ~ lstat + rm + ptratio
##
## Df Sum of Sq RSS AIC
## + dis 1 690.3 10963 1301.7
## + chas 1 454.3 11199 1310.0
## + age 1 173.2 11480 1319.5
## + black 1 168.4 11485 1319.7
## + crim 1 128.2 11525 1321.0
## + zn 1 83.4 11570 1322.5
## + rad 1 61.7 11592 1323.3
## <none> 11654 1323.3
## + indus 1 52.3 11601 1323.6
## + nox 1 22.7 11631 1324.5
## + tax 1 0.1 11654 1325.3
## - ptratio 1 1389.5 13043 1364.8
## - rm 1 2134.6 13788 1386.2
## - lstat 1 4322.5 15976 1443.1
##
## Step: AIC=1301.74
## medv ~ lstat + rm + ptratio + dis
##
## Df Sum of Sq RSS AIC
## + chas 1 326.4 10637 1292.1
## + nox 1 325.0 10638 1292.1
## + crim 1 281.3 10682 1293.7
## + black 1 246.3 10717 1295.0
## + tax 1 109.9 10854 1299.8
## + indus 1 107.9 10855 1299.9
## + zn 1 70.8 10892 1301.2
## <none> 10963 1301.7
## + age 1 21.1 10942 1303.0
## + rad 1 0.9 10962 1303.7
## - dis 1 690.3 11654 1323.3
## - ptratio 1 1655.6 12619 1354.0
## - rm 1 1686.3 12650 1355.0
## - lstat 1 4979.5 15943 1444.3
##
## Step: AIC=1292.07
## medv ~ lstat + rm + ptratio + dis + chas
##
## Df Sum of Sq RSS AIC
## + nox 1 353.5 10283 1281.0
## + crim 1 252.1 10385 1284.8
## + black 1 204.5 10432 1286.6
## + indus 1 128.9 10508 1289.4
## + tax 1 102.3 10535 1290.3
## + zn 1 69.3 10568 1291.5
## <none> 10637 1292.1
## + age 1 28.4 10608 1293.0
## + rad 1 3.0 10634 1294.0
## - chas 1 326.4 10963 1301.7
## - dis 1 562.4 11199 1310.0
## - ptratio 1 1561.0 12198 1342.9
## - rm 1 1591.1 12228 1343.9
## - lstat 1 4820.1 15457 1434.3
##
## Step: AIC=1281.03
## medv ~ lstat + rm + ptratio + dis + chas + nox
##
## Df Sum of Sq RSS AIC
## + crim 1 172.3 10111 1276.5
## + black 1 128.2 10155 1278.2
## + zn 1 74.7 10209 1280.2
## + rad 1 59.3 10224 1280.8
## <none> 10283 1281.0
## + indus 1 15.1 10268 1282.5
## + tax 1 1.6 10282 1283.0
## + age 1 0.1 10283 1283.0
## - nox 1 353.5 10637 1292.1
## - chas 1 354.9 10638 1292.1
## - dis 1 910.8 11194 1311.8
## - ptratio 1 1572.2 11856 1333.9
## - rm 1 1625.9 11909 1335.7
## - lstat 1 3291.3 13575 1386.2
##
## Step: AIC=1276.5
## medv ~ lstat + rm + ptratio + dis + chas + nox + crim
##
## Df Sum of Sq RSS AIC
## + rad 1 229.72 9881.4 1269.6
## + zn 1 118.78 9992.3 1273.9
## + black 1 79.72 10031.4 1275.5
## <none> 10111.1 1276.5
## + tax 1 14.29 10096.8 1278.0
## + indus 1 11.63 10099.5 1278.1
## + age 1 1.10 10110.0 1278.5
## - crim 1 172.31 10283.4 1281.0
## - nox 1 273.64 10384.8 1284.8
## - chas 1 325.75 10436.9 1286.7
## - dis 1 945.44 11056.5 1309.0
## - ptratio 1 1407.41 11518.5 1324.8
## - rm 1 1696.10 11807.2 1334.4
## - lstat 1 2929.75 13040.9 1372.7
##
## Step: AIC=1269.63
## medv ~ lstat + rm + ptratio + dis + chas + nox + crim + rad
##
## Df Sum of Sq RSS AIC
## + tax 1 178.77 9702.6 1264.6
## + black 1 153.15 9728.2 1265.6
## + zn 1 78.93 9802.5 1268.5
## <none> 9881.4 1269.6
## + indus 1 32.78 9848.6 1270.3
## + age 1 0.19 9881.2 1271.6
## - rad 1 229.72 10111.1 1276.5
## - chas 1 293.77 10175.2 1278.9
## - crim 1 342.78 10224.2 1280.8
## - nox 1 462.91 10344.3 1285.3
## - dis 1 1024.55 10905.9 1305.7
## - rm 1 1525.55 11406.9 1323.0
## - ptratio 1 1637.08 11518.5 1326.8
## - lstat 1 2959.07 12840.5 1368.7
##
## Step: AIC=1264.59
## medv ~ lstat + rm + ptratio + dis + chas + nox + crim + rad +
## tax
##
## Df Sum of Sq RSS AIC
## + black 1 141.63 9561.0 1260.9
## + zn 1 138.07 9564.6 1261.0
## <none> 9702.6 1264.6
## + indus 1 0.56 9702.1 1266.6
## + age 1 0.04 9702.6 1266.6
## - tax 1 178.77 9881.4 1269.6
## - chas 1 238.59 9941.2 1272.0
## - nox 1 344.56 10047.2 1276.1
## - crim 1 345.42 10048.0 1276.1
## - rad 1 394.19 10096.8 1278.0
## - dis 1 1034.73 10737.4 1301.7
## - rm 1 1401.70 11104.3 1314.7
## - ptratio 1 1562.33 11265.0 1320.2
## - lstat 1 2889.72 12592.3 1363.2
##
## Step: AIC=1260.91
## medv ~ lstat + rm + ptratio + dis + chas + nox + crim + rad +
## tax + black
##
## Df Sum of Sq RSS AIC
## + zn 1 138.12 9422.9 1257.3
## <none> 9561.0 1260.9
## + indus 1 0.17 9560.8 1262.9
## + age 1 0.11 9560.9 1262.9
## - black 1 141.63 9702.6 1264.6
## - tax 1 167.25 9728.2 1265.6
## - chas 1 206.03 9767.0 1267.1
## - crim 1 305.67 9866.7 1271.1
## - nox 1 332.57 9893.6 1272.1
## - rad 1 439.61 10000.6 1276.3
## - dis 1 1016.30 10577.3 1297.9
## - rm 1 1493.16 11054.2 1314.9
## - ptratio 1 1578.70 11139.7 1317.9
## - lstat 1 2571.58 12132.6 1350.9
##
## Step: AIC=1257.29
## medv ~ lstat + rm + ptratio + dis + chas + nox + crim + rad +
## tax + black + zn
##
## Df Sum of Sq RSS AIC
## <none> 9422.9 1257.3
## + age 1 1.60 9421.3 1259.2
## + indus 1 0.76 9422.1 1259.3
## - zn 1 138.12 9561.0 1260.9
## - black 1 141.68 9564.6 1261.0
## - chas 1 196.08 9619.0 1263.2
## - tax 1 224.82 9647.7 1264.4
## - nox 1 284.72 9707.6 1266.8
## - crim 1 340.06 9762.9 1269.0
## - rad 1 472.93 9895.8 1274.2
## - ptratio 1 1101.85 10524.7 1298.0
## - dis 1 1120.08 10543.0 1298.7
## - rm 1 1366.46 10789.3 1307.6
## - lstat 1 2574.32 11997.2 1348.5
sfit_b <- step(full, direction='backward')
## Start: AIC=1261.2
## medv ~ crim + zn + indus + chas + nox + rm + age + dis + rad +
## tax + ptratio + black + lstat
##
## Df Sum of Sq RSS AIC
## - indus 1 0.73 9421.3 1259.2
## - age 1 1.57 9422.1 1259.3
## <none> 9420.5 1261.2
## - zn 1 140.18 9560.7 1264.9
## - black 1 140.50 9561.0 1264.9
## - chas 1 191.21 9611.7 1267.0
## - tax 1 200.65 9621.2 1267.3
## - nox 1 264.98 9685.5 1269.9
## - crim 1 339.42 9760.0 1272.9
## - rad 1 458.72 9879.3 1277.5
## - dis 1 953.25 10373.8 1296.4
## - ptratio 1 1093.82 10514.4 1301.6
## - rm 1 1312.90 10733.4 1309.6
## - lstat 1 2405.41 11826.0 1347.0
##
## Step: AIC=1259.23
## medv ~ crim + zn + chas + nox + rm + age + dis + rad + tax +
## ptratio + black + lstat
##
## Df Sum of Sq RSS AIC
## - age 1 1.60 9422.9 1257.3
## <none> 9421.3 1259.2
## - zn 1 139.61 9560.9 1262.9
## - black 1 140.02 9561.3 1262.9
## - chas 1 194.79 9616.1 1265.1
## - tax 1 225.23 9646.5 1266.3
## - nox 1 274.98 9696.3 1268.3
## - crim 1 340.54 9761.8 1270.9
## - rad 1 474.51 9895.8 1276.2
## - dis 1 1002.51 10423.8 1296.3
## - ptratio 1 1102.09 10523.4 1299.9
## - rm 1 1320.87 10742.1 1307.9
## - lstat 1 2411.56 11832.8 1345.2
##
## Step: AIC=1257.29
## medv ~ crim + zn + chas + nox + rm + dis + rad + tax + ptratio +
## black + lstat
##
## Df Sum of Sq RSS AIC
## <none> 9422.9 1257.3
## - zn 1 138.12 9561.0 1260.9
## - black 1 141.68 9564.6 1261.0
## - chas 1 196.08 9619.0 1263.2
## - tax 1 224.82 9647.7 1264.4
## - nox 1 284.72 9707.6 1266.8
## - crim 1 340.06 9762.9 1269.0
## - rad 1 472.93 9895.8 1274.2
## - ptratio 1 1101.85 10524.7 1298.0
## - dis 1 1120.08 10543.0 1298.7
## - rm 1 1366.46 10789.3 1307.6
## - lstat 1 2574.32 11997.2 1348.5
cat("AIC from Forward Selection:", AIC(sfit_f), "\n")
## AIC from Forward Selection: 2354.713
cat("AIC from Backward Elimination:", AIC(sfit_b), "\n")
## AIC from Backward Elimination: 2354.713
cat("AIC from Stepwise Regression:", AIC(sfit_stepwise), "\n")
## AIC from Stepwise Regression: 2354.713
So we can conclude the best model under AIC criteria is medv ~ lstat + rm + ptratio + dis + chas + nox + crim + rad + tax + black + zn. This is agreed by all three approaches
selected_model <- lm(medv ~ lstat + rm + ptratio + dis + chas + nox + crim + rad + tax + black + zn, data=train)
summary(selected_model)
##
## Call:
## lm(formula = medv ~ lstat + rm + ptratio + dis + chas + nox +
## crim + rad + tax + black + zn, data = train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -16.4900 -2.9318 -0.5881 2.1835 24.7178
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 41.027390 6.181207 6.637 1.12e-10 ***
## lstat -0.581911 0.057568 -10.108 < 2e-16 ***
## rm 3.543253 0.481127 7.364 1.14e-12 ***
## ptratio -1.067792 0.161466 -6.613 1.30e-10 ***
## dis -1.495080 0.224231 -6.668 9.36e-11 ***
## chas 2.992400 1.072645 2.790 0.005545 **
## nox -15.814417 4.704344 -3.362 0.000855 ***
## crim -0.133066 0.036220 -3.674 0.000274 ***
## rad 0.344314 0.079472 4.333 1.90e-05 ***
## tax -0.012638 0.004231 -2.987 0.003001 **
## black 0.007689 0.003242 2.371 0.018229 *
## zn 0.039036 0.016672 2.341 0.019736 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.019 on 374 degrees of freedom
## Multiple R-squared: 0.7328, Adjusted R-squared: 0.7249
## F-statistic: 93.23 on 11 and 374 DF, p-value: < 2.2e-16
sample_indices <- sample(1:nrow(train), size = 0.8 * nrow(train))
train_set <- train[sample_indices, ]
test_set <- train[-sample_indices, ]
predictions <- predict(selected_model, newdata=test_set)
residuals <- test_set$medv - predictions
rmspe <- sqrt(mean(residuals^2))
rmspe
## [1] 4.871127
print(paste("Root Mean Square Prediction Error is", round(rmspe, 2)))
## [1] "Root Mean Square Prediction Error is 4.87"
This root mean square prediction error is lower than with the full model, however it is still too high for, so lets try using our new fitted model along with the squares of each of the included variables.
new_model <- lm(medv ~ lstat + rm + ptratio + dis + chas + nox + crim + rad + tax + black + zn + I(lstat^2) + I(rm^2) + I(ptratio^2) + I(dis^2) + I(chas^2) + I(nox^2) + I(crim^2 )+ I(rad^2) + I(tax^2) + I(black^2) + I(zn^2), data=train)
summary(new_model)
##
## Call:
## lm(formula = medv ~ lstat + rm + ptratio + dis + chas + nox +
## crim + rad + tax + black + zn + I(lstat^2) + I(rm^2) + I(ptratio^2) +
## I(dis^2) + I(chas^2) + I(nox^2) + I(crim^2) + I(rad^2) +
## I(tax^2) + I(black^2) + I(zn^2), data = train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -25.075 -2.146 -0.370 1.715 24.136
##
## Coefficients: (1 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.797e+02 2.503e+01 7.178 4.00e-12 ***
## lstat -1.498e+00 1.457e-01 -10.285 < 2e-16 ***
## rm -1.711e+01 3.129e+00 -5.469 8.45e-08 ***
## ptratio -7.434e+00 2.019e+00 -3.683 0.000266 ***
## dis -3.148e+00 6.317e-01 -4.983 9.69e-07 ***
## chas 2.999e+00 8.860e-01 3.385 0.000790 ***
## nox -1.916e+01 2.924e+01 -0.655 0.512747
## crim -3.938e-01 9.676e-02 -4.070 5.77e-05 ***
## rad 7.148e-01 2.425e-01 2.948 0.003403 **
## tax -3.747e-02 1.458e-02 -2.570 0.010564 *
## black 2.422e-02 1.306e-02 1.855 0.064427 .
## zn -7.964e-02 3.713e-02 -2.145 0.032606 *
## I(lstat^2) 2.722e-02 4.087e-03 6.661 1.01e-10 ***
## I(rm^2) 1.547e+00 2.447e-01 6.320 7.64e-10 ***
## I(ptratio^2) 1.836e-01 5.691e-02 3.226 0.001368 **
## I(dis^2) 1.894e-01 5.230e-02 3.622 0.000334 ***
## I(chas^2) NA NA NA NA
## I(nox^2) -5.960e+00 2.210e+01 -0.270 0.787565
## I(crim^2) 3.162e-03 1.229e-03 2.574 0.010444 *
## I(rad^2) -1.378e-02 9.906e-03 -1.391 0.165029
## I(tax^2) 3.131e-05 1.878e-05 1.668 0.096237 .
## I(black^2) -4.428e-05 2.905e-05 -1.524 0.128327
## I(zn^2) 8.972e-04 4.159e-04 2.157 0.031641 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.097 on 364 degrees of freedom
## Multiple R-squared: 0.8267, Adjusted R-squared: 0.8167
## F-statistic: 82.69 on 21 and 364 DF, p-value: < 2.2e-16
sample_indices <- sample(1:nrow(train), size = 0.8 * nrow(train))
train_set <- train[sample_indices, ]
test_set <- train[-sample_indices, ]
predictions <- predict(new_model, newdata=test_set)
residuals <- test_set$medv - predictions
rmspe <- sqrt(mean(residuals^2))
rmspe
## [1] 4.327575
print(paste("Root Mean Square Prediction Error is", round(rmspe, 2)))
## [1] "Root Mean Square Prediction Error is 4.33"
Now we have reduced the root mean square prediction error significantly from before, to reduce it further we can repeat the regression methods in order to eliminate insignificant variables.
null <- lm(medv ~ 1, data=train)
full <- lm(medv ~ lstat + rm + ptratio + dis + chas + nox + crim + rad + tax + black + zn + I(lstat^2) + I(rm^2) + I(ptratio^2) + I(dis^2) + I(chas^2) + I(nox^2) + I(crim^2 )+ I(rad^2) + I(tax^2) + I(black^2) + I(zn^2), data=train)
sfit_f <- step(null, scope = list(lower=null, upper=full), direction='forward')
## Start: AIC=1744.66
## medv ~ 1
##
## Df Sum of Sq RSS AIC
## + lstat 1 19031.4 16229 1447.1
## + I(rm^2) 1 17662.5 17598 1478.4
## + rm 1 16513.9 18746 1502.8
## + I(lstat^2) 1 13698.7 21562 1556.8
## + ptratio 1 10274.0 24986 1613.7
## + I(ptratio^2) 1 10270.8 24989 1613.8
## + tax 1 7299.9 27960 1657.1
## + I(tax^2) 1 6890.2 28370 1662.7
## + nox 1 6227.5 29033 1671.7
## + I(nox^2) 1 6097.9 29162 1673.4
## + I(rad^2) 1 5067.4 30193 1686.8
## + crim 1 5017.6 30243 1687.4
## + rad 1 4854.0 30406 1689.5
## + zn 1 3978.6 31282 1700.5
## + I(black^2) 1 3573.5 31687 1705.4
## + black 1 3565.7 31695 1705.5
## + I(zn^2) 1 2933.2 32327 1713.1
## + dis 1 1837.6 33423 1726.0
## + I(crim^2) 1 1711.2 33549 1727.5
## + chas 1 1556.0 33704 1729.2
## + I(chas^2) 1 1556.0 33704 1729.2
## + I(dis^2) 1 1149.7 34110 1733.9
## <none> 35260 1744.7
##
## Step: AIC=1447.14
## medv ~ lstat
##
## Df Sum of Sq RSS AIC
## + I(rm^2) 1 3984.1 12245 1340.4
## + I(lstat^2) 1 3564.7 12664 1353.4
## + rm 1 3185.8 13043 1364.8
## + ptratio 1 2440.6 13788 1386.2
## + I(ptratio^2) 1 2306.7 13922 1390.0
## + dis 1 798.6 15430 1429.7
## + chas 1 793.4 15435 1429.8
## + I(chas^2) 1 793.4 15435 1429.8
## + I(dis^2) 1 616.8 15612 1434.2
## + I(crim^2) 1 230.8 15998 1443.6
## + crim 1 174.7 16054 1445.0
## + tax 1 129.3 16100 1446.0
## <none> 16229 1447.1
## + I(tax^2) 1 73.1 16156 1447.4
## + I(zn^2) 1 68.5 16160 1447.5
## + black 1 68.4 16160 1447.5
## + nox 1 63.0 16166 1447.6
## + I(nox^2) 1 51.1 16178 1447.9
## + zn 1 49.3 16180 1448.0
## + I(black^2) 1 28.5 16200 1448.5
## + I(rad^2) 1 7.8 16221 1449.0
## + rad 1 1.6 16227 1449.1
##
## Step: AIC=1340.41
## medv ~ lstat + I(rm^2)
##
## Df Sum of Sq RSS AIC
## + rm 1 2179.07 10066 1266.8
## + I(lstat^2) 1 1916.12 10329 1276.7
## + ptratio 1 1216.03 11029 1302.0
## + I(ptratio^2) 1 1172.86 11072 1303.5
## + chas 1 478.41 11766 1327.0
## + I(chas^2) 1 478.41 11766 1327.0
## + I(crim^2) 1 358.37 11886 1330.9
## + crim 1 337.94 11907 1331.6
## + dis 1 330.92 11914 1331.8
## + black 1 317.34 11927 1332.3
## + I(dis^2) 1 252.89 11992 1334.3
## + tax 1 197.03 12048 1336.2
## + I(black^2) 1 194.79 12050 1336.2
## + I(tax^2) 1 152.18 12093 1337.6
## + I(rad^2) 1 85.34 12159 1339.7
## + rad 1 65.22 12180 1340.3
## <none> 12245 1340.4
## + I(zn^2) 1 17.08 12228 1341.9
## + zn 1 6.73 12238 1342.2
## + I(nox^2) 1 2.15 12243 1342.3
## + nox 1 0.70 12244 1342.4
##
## Step: AIC=1266.77
## medv ~ lstat + I(rm^2) + rm
##
## Df Sum of Sq RSS AIC
## + ptratio 1 932.34 9133.4 1231.2
## + I(ptratio^2) 1 916.96 9148.7 1231.9
## + I(lstat^2) 1 912.69 9153.0 1232.1
## + crim 1 489.13 9576.6 1249.5
## + chas 1 404.23 9661.5 1252.9
## + I(chas^2) 1 404.23 9661.5 1252.9
## + I(crim^2) 1 363.68 9702.0 1254.6
## + tax 1 317.55 9748.1 1256.4
## + I(tax^2) 1 273.29 9792.4 1258.1
## + black 1 267.05 9798.6 1258.4
## + I(black^2) 1 195.18 9870.5 1261.2
## + I(rad^2) 1 188.10 9877.6 1261.5
## + rad 1 164.77 9900.9 1262.4
## + dis 1 104.14 9961.6 1264.8
## + I(dis^2) 1 103.49 9962.2 1264.8
## + I(nox^2) 1 74.39 9991.3 1265.9
## + nox 1 66.01 9999.7 1266.2
## <none> 10065.7 1266.8
## + I(zn^2) 1 19.53 10046.2 1268.0
## + zn 1 14.24 10051.5 1268.2
##
## Step: AIC=1231.25
## medv ~ lstat + I(rm^2) + rm + ptratio
##
## Df Sum of Sq RSS AIC
## + I(lstat^2) 1 603.28 8530.1 1206.9
## + chas 1 361.56 8771.8 1217.7
## + I(chas^2) 1 361.56 8771.8 1217.7
## + crim 1 289.93 8843.4 1220.8
## + I(crim^2) 1 275.73 8857.6 1221.4
## + dis 1 248.85 8884.5 1222.6
## + I(dis^2) 1 214.30 8919.1 1224.1
## + black 1 171.39 8962.0 1225.9
## + I(black^2) 1 122.94 9010.4 1228.0
## + tax 1 52.80 9080.6 1231.0
## <none> 9133.4 1231.2
## + I(nox^2) 1 44.50 9088.9 1231.4
## + zn 1 38.18 9095.2 1231.6
## + I(tax^2) 1 27.60 9105.8 1232.1
## + nox 1 21.10 9112.3 1232.3
## + I(zn^2) 1 9.72 9123.6 1232.8
## + I(ptratio^2) 1 6.85 9126.5 1233.0
## + I(rad^2) 1 2.95 9130.4 1233.1
## + rad 1 0.91 9132.4 1233.2
##
## Step: AIC=1206.87
## medv ~ lstat + I(rm^2) + rm + ptratio + I(lstat^2)
##
## Df Sum of Sq RSS AIC
## + dis 1 403.77 8126.3 1190.2
## + I(dis^2) 1 373.90 8156.2 1191.6
## + chas 1 351.58 8178.5 1192.6
## + I(chas^2) 1 351.58 8178.5 1192.6
## + crim 1 310.94 8219.1 1194.5
## + I(crim^2) 1 262.41 8267.7 1196.8
## + zn 1 147.94 8382.1 1202.1
## + black 1 146.37 8383.7 1202.2
## + I(black^2) 1 98.18 8431.9 1204.4
## + I(zn^2) 1 70.24 8459.8 1205.7
## <none> 8530.1 1206.9
## + tax 1 27.39 8502.7 1207.6
## + I(ptratio^2) 1 26.69 8503.4 1207.7
## + I(tax^2) 1 11.53 8518.5 1208.3
## + I(nox^2) 1 9.52 8520.6 1208.4
## + rad 1 0.82 8529.3 1208.8
## + nox 1 0.11 8530.0 1208.9
## + I(rad^2) 1 0.03 8530.1 1208.9
##
## Step: AIC=1190.15
## medv ~ lstat + I(rm^2) + rm + ptratio + I(lstat^2) + dis
##
## Df Sum of Sq RSS AIC
## + crim 1 469.75 7656.6 1169.2
## + I(nox^2) 1 359.22 7767.1 1174.7
## + nox 1 327.87 7798.4 1176.2
## + I(crim^2) 1 318.93 7807.4 1176.7
## + chas 1 267.36 7858.9 1179.2
## + I(chas^2) 1 267.36 7858.9 1179.2
## + black 1 199.20 7927.1 1182.6
## + tax 1 162.12 7964.2 1184.4
## + I(black^2) 1 146.39 7979.9 1185.1
## + I(tax^2) 1 119.49 8006.8 1186.4
## <none> 8126.3 1190.2
## + I(rad^2) 1 41.26 8085.0 1190.2
## + rad 1 28.71 8097.6 1190.8
## + I(ptratio^2) 1 3.57 8122.7 1192.0
## + I(zn^2) 1 2.77 8123.5 1192.0
## + zn 1 0.12 8126.2 1192.2
## + I(dis^2) 1 0.01 8126.3 1192.2
##
## Step: AIC=1169.17
## medv ~ lstat + I(rm^2) + rm + ptratio + I(lstat^2) + dis + crim
##
## Df Sum of Sq RSS AIC
## + I(nox^2) 1 251.461 7405.1 1158.3
## + chas 1 231.137 7425.4 1159.3
## + I(chas^2) 1 231.137 7425.4 1159.3
## + nox 1 222.993 7433.6 1159.8
## + black 1 90.579 7566.0 1166.6
## + I(black^2) 1 62.115 7594.4 1168.0
## <none> 7656.6 1169.2
## + I(dis^2) 1 30.632 7625.9 1169.6
## + rad 1 26.536 7630.0 1169.8
## + tax 1 21.672 7634.9 1170.1
## + I(crim^2) 1 21.051 7635.5 1170.1
## + I(zn^2) 1 20.143 7636.4 1170.2
## + I(rad^2) 1 17.818 7638.7 1170.3
## + zn 1 8.498 7648.1 1170.7
## + I(tax^2) 1 4.821 7651.7 1170.9
## + I(ptratio^2) 1 3.818 7652.7 1171.0
##
## Step: AIC=1158.28
## medv ~ lstat + I(rm^2) + rm + ptratio + I(lstat^2) + dis + crim +
## I(nox^2)
##
## Df Sum of Sq RSS AIC
## + chas 1 262.407 7142.7 1146.3
## + I(chas^2) 1 262.407 7142.7 1146.3
## + I(dis^2) 1 188.470 7216.6 1150.3
## + rad 1 172.516 7232.6 1151.2
## + I(rad^2) 1 146.636 7258.5 1152.6
## + black 1 55.269 7349.8 1157.4
## + I(ptratio^2) 1 40.096 7365.0 1158.2
## <none> 7405.1 1158.3
## + nox 1 34.248 7370.8 1158.5
## + I(black^2) 1 31.207 7373.9 1158.7
## + I(tax^2) 1 29.090 7376.0 1158.8
## + I(zn^2) 1 28.279 7376.8 1158.8
## + zn 1 12.085 7393.0 1159.7
## + tax 1 7.523 7397.6 1159.9
## + I(crim^2) 1 0.525 7404.6 1160.2
##
## Step: AIC=1146.35
## medv ~ lstat + I(rm^2) + rm + ptratio + I(lstat^2) + dis + crim +
## I(nox^2) + chas
##
## Df Sum of Sq RSS AIC
## + I(dis^2) 1 208.230 6934.5 1136.9
## + rad 1 150.783 6991.9 1140.1
## + I(rad^2) 1 129.636 7013.1 1141.3
## + I(ptratio^2) 1 74.263 7068.4 1144.3
## + nox 1 45.825 7096.9 1145.9
## + black 1 38.429 7104.3 1146.3
## <none> 7142.7 1146.3
## + I(tax^2) 1 31.259 7111.4 1146.7
## + I(zn^2) 1 29.076 7113.6 1146.8
## + I(black^2) 1 18.775 7123.9 1147.3
## + zn 1 11.651 7131.0 1147.7
## + tax 1 10.476 7132.2 1147.8
## + I(crim^2) 1 0.009 7142.7 1148.3
##
## Step: AIC=1136.93
## medv ~ lstat + I(rm^2) + rm + ptratio + I(lstat^2) + dis + crim +
## I(nox^2) + chas + I(dis^2)
##
## Df Sum of Sq RSS AIC
## + rad 1 136.202 6798.3 1131.3
## + I(rad^2) 1 113.550 6820.9 1132.6
## + I(ptratio^2) 1 57.973 6876.5 1135.7
## + black 1 39.286 6895.2 1136.7
## <none> 6934.5 1136.9
## + nox 1 22.534 6911.9 1137.7
## + I(black^2) 1 21.724 6912.7 1137.7
## + I(tax^2) 1 12.481 6922.0 1138.2
## + I(crim^2) 1 3.499 6931.0 1138.7
## + I(zn^2) 1 2.391 6932.1 1138.8
## + tax 1 1.217 6933.2 1138.9
## + zn 1 0.067 6934.4 1138.9
##
## Step: AIC=1131.27
## medv ~ lstat + I(rm^2) + rm + ptratio + I(lstat^2) + dis + crim +
## I(nox^2) + chas + I(dis^2) + rad
##
## Df Sum of Sq RSS AIC
## + tax 1 183.413 6614.8 1122.7
## + I(tax^2) 1 146.174 6652.1 1124.9
## + I(crim^2) 1 131.321 6666.9 1125.7
## + I(ptratio^2) 1 86.151 6712.1 1128.3
## + black 1 80.031 6718.2 1128.7
## + I(black^2) 1 55.321 6742.9 1130.1
## <none> 6798.3 1131.3
## + I(rad^2) 1 18.332 6779.9 1132.2
## + nox 1 11.011 6787.2 1132.7
## + zn 1 2.631 6795.6 1133.1
## + I(zn^2) 1 0.488 6797.8 1133.2
##
## Step: AIC=1122.72
## medv ~ lstat + I(rm^2) + rm + ptratio + I(lstat^2) + dis + crim +
## I(nox^2) + chas + I(dis^2) + rad + tax
##
## Df Sum of Sq RSS AIC
## + I(crim^2) 1 140.521 6474.3 1116.4
## + I(ptratio^2) 1 133.801 6481.0 1116.8
## + black 1 72.337 6542.5 1120.5
## + I(black^2) 1 46.623 6568.2 1122.0
## <none> 6614.8 1122.7
## + I(zn^2) 1 10.197 6604.6 1124.1
## + I(tax^2) 1 10.193 6604.7 1124.1
## + nox 1 8.554 6606.3 1124.2
## + I(rad^2) 1 5.192 6609.7 1124.4
## + zn 1 0.256 6614.6 1124.7
##
## Step: AIC=1116.43
## medv ~ lstat + I(rm^2) + rm + ptratio + I(lstat^2) + dis + crim +
## I(nox^2) + chas + I(dis^2) + rad + tax + I(crim^2)
##
## Df Sum of Sq RSS AIC
## + I(ptratio^2) 1 148.973 6325.4 1109.4
## + black 1 48.958 6425.4 1115.5
## <none> 6474.3 1116.4
## + I(black^2) 1 28.377 6445.9 1116.7
## + I(zn^2) 1 14.954 6459.4 1117.5
## + I(tax^2) 1 13.472 6460.9 1117.6
## + nox 1 5.935 6468.4 1118.1
## + zn 1 1.241 6473.1 1118.3
## + I(rad^2) 1 0.002 6474.3 1118.4
##
## Step: AIC=1109.44
## medv ~ lstat + I(rm^2) + rm + ptratio + I(lstat^2) + dis + crim +
## I(nox^2) + chas + I(dis^2) + rad + tax + I(crim^2) + I(ptratio^2)
##
## Df Sum of Sq RSS AIC
## + black 1 49.291 6276.1 1108.4
## <none> 6325.4 1109.4
## + I(black^2) 1 30.357 6295.0 1109.6
## + I(tax^2) 1 19.210 6306.1 1110.3
## + I(zn^2) 1 3.603 6321.7 1111.2
## + zn 1 2.214 6323.1 1111.3
## + I(rad^2) 1 0.944 6324.4 1111.4
## + nox 1 0.894 6324.5 1111.4
##
## Step: AIC=1108.42
## medv ~ lstat + I(rm^2) + rm + ptratio + I(lstat^2) + dis + crim +
## I(nox^2) + chas + I(dis^2) + rad + tax + I(crim^2) + I(ptratio^2) +
## black
##
## Df Sum of Sq RSS AIC
## + I(black^2) 1 43.909 6232.2 1107.7
## <none> 6276.1 1108.4
## + I(tax^2) 1 19.342 6256.7 1109.2
## + I(zn^2) 1 3.999 6272.1 1110.2
## + zn 1 2.149 6273.9 1110.3
## + nox 1 1.501 6274.6 1110.3
## + I(rad^2) 1 0.662 6275.4 1110.4
##
## Step: AIC=1107.71
## medv ~ lstat + I(rm^2) + rm + ptratio + I(lstat^2) + dis + crim +
## I(nox^2) + chas + I(dis^2) + rad + tax + I(crim^2) + I(ptratio^2) +
## black + I(black^2)
##
## Df Sum of Sq RSS AIC
## <none> 6232.2 1107.7
## + I(tax^2) 1 21.2274 6210.9 1108.4
## + I(zn^2) 1 5.0666 6227.1 1109.4
## + nox 1 1.3188 6230.8 1109.6
## + zn 1 1.1863 6231.0 1109.6
## + I(rad^2) 1 0.4552 6231.7 1109.7
sfit_stepwise <- step(null, scope=list(lower=null, upper=full), direction='both')
## Start: AIC=1744.66
## medv ~ 1
##
## Df Sum of Sq RSS AIC
## + lstat 1 19031.4 16229 1447.1
## + I(rm^2) 1 17662.5 17598 1478.4
## + rm 1 16513.9 18746 1502.8
## + I(lstat^2) 1 13698.7 21562 1556.8
## + ptratio 1 10274.0 24986 1613.7
## + I(ptratio^2) 1 10270.8 24989 1613.8
## + tax 1 7299.9 27960 1657.1
## + I(tax^2) 1 6890.2 28370 1662.7
## + nox 1 6227.5 29033 1671.7
## + I(nox^2) 1 6097.9 29162 1673.4
## + I(rad^2) 1 5067.4 30193 1686.8
## + crim 1 5017.6 30243 1687.4
## + rad 1 4854.0 30406 1689.5
## + zn 1 3978.6 31282 1700.5
## + I(black^2) 1 3573.5 31687 1705.4
## + black 1 3565.7 31695 1705.5
## + I(zn^2) 1 2933.2 32327 1713.1
## + dis 1 1837.6 33423 1726.0
## + I(crim^2) 1 1711.2 33549 1727.5
## + chas 1 1556.0 33704 1729.2
## + I(chas^2) 1 1556.0 33704 1729.2
## + I(dis^2) 1 1149.7 34110 1733.9
## <none> 35260 1744.7
##
## Step: AIC=1447.14
## medv ~ lstat
##
## Df Sum of Sq RSS AIC
## + I(rm^2) 1 3984.1 12245 1340.4
## + I(lstat^2) 1 3564.7 12664 1353.4
## + rm 1 3185.8 13043 1364.8
## + ptratio 1 2440.6 13788 1386.2
## + I(ptratio^2) 1 2306.7 13922 1390.0
## + dis 1 798.6 15430 1429.7
## + chas 1 793.4 15435 1429.8
## + I(chas^2) 1 793.4 15435 1429.8
## + I(dis^2) 1 616.8 15612 1434.2
## + I(crim^2) 1 230.8 15998 1443.6
## + crim 1 174.7 16054 1445.0
## + tax 1 129.3 16100 1446.0
## <none> 16229 1447.1
## + I(tax^2) 1 73.1 16156 1447.4
## + I(zn^2) 1 68.5 16160 1447.5
## + black 1 68.4 16160 1447.5
## + nox 1 63.0 16166 1447.6
## + I(nox^2) 1 51.1 16178 1447.9
## + zn 1 49.3 16180 1448.0
## + I(black^2) 1 28.5 16200 1448.5
## + I(rad^2) 1 7.8 16221 1449.0
## + rad 1 1.6 16227 1449.1
## - lstat 1 19031.4 35260 1744.7
##
## Step: AIC=1340.41
## medv ~ lstat + I(rm^2)
##
## Df Sum of Sq RSS AIC
## + rm 1 2179.1 10066 1266.8
## + I(lstat^2) 1 1916.1 10329 1276.7
## + ptratio 1 1216.0 11029 1302.0
## + I(ptratio^2) 1 1172.9 11072 1303.5
## + chas 1 478.4 11766 1327.0
## + I(chas^2) 1 478.4 11766 1327.0
## + I(crim^2) 1 358.4 11886 1330.9
## + crim 1 337.9 11907 1331.6
## + dis 1 330.9 11914 1331.8
## + black 1 317.3 11927 1332.3
## + I(dis^2) 1 252.9 11992 1334.3
## + tax 1 197.0 12048 1336.2
## + I(black^2) 1 194.8 12050 1336.2
## + I(tax^2) 1 152.2 12093 1337.6
## + I(rad^2) 1 85.3 12159 1339.7
## + rad 1 65.2 12180 1340.3
## <none> 12245 1340.4
## + I(zn^2) 1 17.1 12228 1341.9
## + zn 1 6.7 12238 1342.2
## + I(nox^2) 1 2.1 12243 1342.3
## + nox 1 0.7 12244 1342.4
## - I(rm^2) 1 3984.1 16229 1447.1
## - lstat 1 5353.0 17598 1478.4
##
## Step: AIC=1266.77
## medv ~ lstat + I(rm^2) + rm
##
## Df Sum of Sq RSS AIC
## + ptratio 1 932.3 9133.4 1231.2
## + I(ptratio^2) 1 917.0 9148.7 1231.9
## + I(lstat^2) 1 912.7 9153.0 1232.1
## + crim 1 489.1 9576.6 1249.5
## + chas 1 404.2 9661.5 1252.9
## + I(chas^2) 1 404.2 9661.5 1252.9
## + I(crim^2) 1 363.7 9702.0 1254.6
## + tax 1 317.5 9748.1 1256.4
## + I(tax^2) 1 273.3 9792.4 1258.1
## + black 1 267.0 9798.6 1258.4
## + I(black^2) 1 195.2 9870.5 1261.2
## + I(rad^2) 1 188.1 9877.6 1261.5
## + rad 1 164.8 9900.9 1262.4
## + dis 1 104.1 9961.6 1264.8
## + I(dis^2) 1 103.5 9962.2 1264.8
## + I(nox^2) 1 74.4 9991.3 1265.9
## + nox 1 66.0 9999.7 1266.2
## <none> 10065.7 1266.8
## + I(zn^2) 1 19.5 10046.2 1268.0
## + zn 1 14.2 10051.5 1268.2
## - rm 1 2179.1 12244.8 1340.4
## - I(rm^2) 1 2977.4 13043.1 1364.8
## - lstat 1 6343.1 16408.8 1453.4
##
## Step: AIC=1231.25
## medv ~ lstat + I(rm^2) + rm + ptratio
##
## Df Sum of Sq RSS AIC
## + I(lstat^2) 1 603.3 8530.1 1206.9
## + chas 1 361.6 8771.8 1217.7
## + I(chas^2) 1 361.6 8771.8 1217.7
## + crim 1 289.9 8843.4 1220.8
## + I(crim^2) 1 275.7 8857.6 1221.4
## + dis 1 248.8 8884.5 1222.6
## + I(dis^2) 1 214.3 8919.1 1224.1
## + black 1 171.4 8962.0 1225.9
## + I(black^2) 1 122.9 9010.4 1228.0
## + tax 1 52.8 9080.6 1231.0
## <none> 9133.4 1231.2
## + I(nox^2) 1 44.5 9088.9 1231.4
## + zn 1 38.2 9095.2 1231.6
## + I(tax^2) 1 27.6 9105.8 1232.1
## + nox 1 21.1 9112.3 1232.3
## + I(zn^2) 1 9.7 9123.6 1232.8
## + I(ptratio^2) 1 6.8 9126.5 1233.0
## + I(rad^2) 1 3.0 9130.4 1233.1
## + rad 1 0.9 9132.4 1233.2
## - ptratio 1 932.3 10065.7 1266.8
## - rm 1 1895.4 11028.7 1302.0
## - I(rm^2) 1 2520.2 11653.6 1323.3
## - lstat 1 4999.3 14132.7 1397.8
##
## Step: AIC=1206.87
## medv ~ lstat + I(rm^2) + rm + ptratio + I(lstat^2)
##
## Df Sum of Sq RSS AIC
## + dis 1 403.77 8126.3 1190.2
## + I(dis^2) 1 373.90 8156.2 1191.6
## + chas 1 351.58 8178.5 1192.6
## + I(chas^2) 1 351.58 8178.5 1192.6
## + crim 1 310.94 8219.1 1194.5
## + I(crim^2) 1 262.41 8267.7 1196.8
## + zn 1 147.94 8382.1 1202.1
## + black 1 146.37 8383.7 1202.2
## + I(black^2) 1 98.18 8431.9 1204.4
## + I(zn^2) 1 70.24 8459.8 1205.7
## <none> 8530.1 1206.9
## + tax 1 27.39 8502.7 1207.6
## + I(ptratio^2) 1 26.69 8503.4 1207.7
## + I(tax^2) 1 11.53 8518.5 1208.3
## + I(nox^2) 1 9.52 8520.6 1208.4
## + rad 1 0.82 8529.3 1208.8
## + nox 1 0.11 8530.0 1208.9
## + I(rad^2) 1 0.03 8530.1 1208.9
## - I(lstat^2) 1 603.28 9133.4 1231.2
## - ptratio 1 622.93 9153.0 1232.1
## - rm 1 1130.82 9660.9 1252.9
## - I(rm^2) 1 1512.53 10042.6 1267.9
## - lstat 1 1930.89 10461.0 1283.6
##
## Step: AIC=1190.15
## medv ~ lstat + I(rm^2) + rm + ptratio + I(lstat^2) + dis
##
## Df Sum of Sq RSS AIC
## + crim 1 469.75 7656.6 1169.2
## + I(nox^2) 1 359.22 7767.1 1174.7
## + nox 1 327.87 7798.4 1176.2
## + I(crim^2) 1 318.93 7807.4 1176.7
## + chas 1 267.36 7858.9 1179.2
## + I(chas^2) 1 267.36 7858.9 1179.2
## + black 1 199.20 7927.1 1182.6
## + tax 1 162.12 7964.2 1184.4
## + I(black^2) 1 146.39 7979.9 1185.1
## + I(tax^2) 1 119.49 8006.8 1186.4
## <none> 8126.3 1190.2
## + I(rad^2) 1 41.26 8085.0 1190.2
## + rad 1 28.71 8097.6 1190.8
## + I(ptratio^2) 1 3.57 8122.7 1192.0
## + I(zn^2) 1 2.77 8123.5 1192.0
## + zn 1 0.12 8126.2 1192.2
## + I(dis^2) 1 0.01 8126.3 1192.2
## - dis 1 403.77 8530.1 1206.9
## - I(lstat^2) 1 758.21 8884.5 1222.6
## - ptratio 1 758.51 8884.8 1222.6
## - rm 1 782.87 8909.2 1223.7
## - I(rm^2) 1 1047.03 9173.3 1234.9
## - lstat 1 2291.65 10418.0 1284.0
##
## Step: AIC=1169.17
## medv ~ lstat + I(rm^2) + rm + ptratio + I(lstat^2) + dis + crim
##
## Df Sum of Sq RSS AIC
## + I(nox^2) 1 251.46 7405.1 1158.3
## + chas 1 231.14 7425.4 1159.3
## + I(chas^2) 1 231.14 7425.4 1159.3
## + nox 1 222.99 7433.6 1159.8
## + black 1 90.58 7566.0 1166.6
## + I(black^2) 1 62.12 7594.4 1168.0
## <none> 7656.6 1169.2
## + I(dis^2) 1 30.63 7625.9 1169.6
## + rad 1 26.54 7630.0 1169.8
## + tax 1 21.67 7634.9 1170.1
## + I(crim^2) 1 21.05 7635.5 1170.1
## + I(zn^2) 1 20.14 7636.4 1170.2
## + I(rad^2) 1 17.82 7638.7 1170.3
## + zn 1 8.50 7648.1 1170.7
## + I(tax^2) 1 4.82 7651.7 1170.9
## + I(ptratio^2) 1 3.82 7652.7 1171.0
## - crim 1 469.75 8126.3 1190.2
## - dis 1 562.59 8219.1 1194.5
## - ptratio 1 571.72 8228.3 1195.0
## - I(lstat^2) 1 822.99 8479.5 1206.6
## - rm 1 830.35 8486.9 1206.9
## - I(rm^2) 1 1112.08 8768.6 1219.5
## - lstat 1 2234.43 9891.0 1266.0
##
## Step: AIC=1158.28
## medv ~ lstat + I(rm^2) + rm + ptratio + I(lstat^2) + dis + crim +
## I(nox^2)
##
## Df Sum of Sq RSS AIC
## + chas 1 262.41 7142.7 1146.3
## + I(chas^2) 1 262.41 7142.7 1146.3
## + I(dis^2) 1 188.47 7216.6 1150.3
## + rad 1 172.52 7232.6 1151.2
## + I(rad^2) 1 146.64 7258.5 1152.6
## + black 1 55.27 7349.8 1157.4
## + I(ptratio^2) 1 40.10 7365.0 1158.2
## <none> 7405.1 1158.3
## + nox 1 34.25 7370.8 1158.5
## + I(black^2) 1 31.21 7373.9 1158.7
## + I(tax^2) 1 29.09 7376.0 1158.8
## + I(zn^2) 1 28.28 7376.8 1158.8
## + zn 1 12.09 7393.0 1159.7
## + tax 1 7.52 7397.6 1159.9
## + I(crim^2) 1 0.52 7404.6 1160.2
## - I(nox^2) 1 251.46 7656.6 1169.2
## - crim 1 361.98 7767.1 1174.7
## - ptratio 1 634.90 8040.0 1188.0
## - I(lstat^2) 1 757.70 8162.8 1193.9
## - dis 1 813.10 8218.2 1196.5
## - rm 1 883.57 8288.7 1199.8
## - I(rm^2) 1 1175.54 8580.6 1213.2
## - lstat 1 1915.33 9320.4 1245.1
##
## Step: AIC=1146.35
## medv ~ lstat + I(rm^2) + rm + ptratio + I(lstat^2) + dis + crim +
## I(nox^2) + chas
##
## Df Sum of Sq RSS AIC
## + I(dis^2) 1 208.23 6934.5 1136.9
## + rad 1 150.78 6991.9 1140.1
## + I(rad^2) 1 129.64 7013.1 1141.3
## + I(ptratio^2) 1 74.26 7068.4 1144.3
## + nox 1 45.82 7096.9 1145.9
## + black 1 38.43 7104.3 1146.3
## <none> 7142.7 1146.3
## + I(tax^2) 1 31.26 7111.4 1146.7
## + I(zn^2) 1 29.08 7113.6 1146.8
## + I(black^2) 1 18.78 7123.9 1147.3
## + zn 1 11.65 7131.0 1147.7
## + tax 1 10.48 7132.2 1147.8
## + I(crim^2) 1 0.01 7142.7 1148.3
## - chas 1 262.41 7405.1 1158.3
## - I(nox^2) 1 282.73 7425.4 1159.3
## - crim 1 323.05 7465.7 1161.4
## - ptratio 1 604.31 7747.0 1175.7
## - I(lstat^2) 1 722.40 7865.1 1181.5
## - dis 1 742.07 7884.8 1182.5
## - rm 1 885.93 8028.6 1189.5
## - I(rm^2) 1 1170.39 8313.1 1202.9
## - lstat 1 1830.67 8973.4 1232.4
##
## Step: AIC=1136.93
## medv ~ lstat + I(rm^2) + rm + ptratio + I(lstat^2) + dis + crim +
## I(nox^2) + chas + I(dis^2)
##
## Df Sum of Sq RSS AIC
## + rad 1 136.20 6798.3 1131.3
## + I(rad^2) 1 113.55 6820.9 1132.6
## + I(ptratio^2) 1 57.97 6876.5 1135.7
## + black 1 39.29 6895.2 1136.7
## <none> 6934.5 1136.9
## + nox 1 22.53 6911.9 1137.7
## + I(black^2) 1 21.72 6912.7 1137.7
## + I(tax^2) 1 12.48 6922.0 1138.2
## + I(crim^2) 1 3.50 6931.0 1138.7
## + I(zn^2) 1 2.39 6932.1 1138.8
## + tax 1 1.22 6933.2 1138.9
## + zn 1 0.07 6934.4 1138.9
## - I(dis^2) 1 208.23 7142.7 1146.3
## - chas 1 282.17 7216.6 1150.3
## - crim 1 418.13 7352.6 1157.5
## - dis 1 449.19 7383.7 1159.2
## - I(nox^2) 1 458.28 7392.7 1159.6
## - I(lstat^2) 1 635.31 7569.8 1168.8
## - ptratio 1 687.91 7622.4 1171.4
## - rm 1 766.87 7701.3 1175.4
## - I(rm^2) 1 1029.26 7963.7 1188.3
## - lstat 1 1714.29 8648.8 1220.2
##
## Step: AIC=1131.27
## medv ~ lstat + I(rm^2) + rm + ptratio + I(lstat^2) + dis + crim +
## I(nox^2) + chas + I(dis^2) + rad
##
## Df Sum of Sq RSS AIC
## + tax 1 183.41 6614.8 1122.7
## + I(tax^2) 1 146.17 6652.1 1124.9
## + I(crim^2) 1 131.32 6666.9 1125.7
## + I(ptratio^2) 1 86.15 6712.1 1128.3
## + black 1 80.03 6718.2 1128.7
## + I(black^2) 1 55.32 6742.9 1130.1
## <none> 6798.3 1131.3
## + I(rad^2) 1 18.33 6779.9 1132.2
## + nox 1 11.01 6787.2 1132.7
## + zn 1 2.63 6795.6 1133.1
## + I(zn^2) 1 0.49 6797.8 1133.2
## - rad 1 136.20 6934.5 1136.9
## - I(dis^2) 1 193.65 6991.9 1140.1
## - chas 1 259.90 7058.2 1143.8
## - dis 1 435.25 7233.5 1153.2
## - crim 1 551.40 7349.7 1159.4
## - I(nox^2) 1 586.35 7384.6 1161.2
## - I(lstat^2) 1 657.29 7455.5 1164.9
## - rm 1 723.95 7522.2 1168.3
## - ptratio 1 823.18 7621.4 1173.4
## - I(rm^2) 1 964.78 7763.0 1180.5
## - lstat 1 1754.16 8552.4 1217.9
##
## Step: AIC=1122.72
## medv ~ lstat + I(rm^2) + rm + ptratio + I(lstat^2) + dis + crim +
## I(nox^2) + chas + I(dis^2) + rad + tax
##
## Df Sum of Sq RSS AIC
## + I(crim^2) 1 140.52 6474.3 1116.4
## + I(ptratio^2) 1 133.80 6481.0 1116.8
## + black 1 72.34 6542.5 1120.5
## + I(black^2) 1 46.62 6568.2 1122.0
## <none> 6614.8 1122.7
## + I(zn^2) 1 10.20 6604.6 1124.1
## + I(tax^2) 1 10.19 6604.7 1124.1
## + nox 1 8.55 6606.3 1124.2
## + I(rad^2) 1 5.19 6609.7 1124.4
## + zn 1 0.26 6614.6 1124.7
## - tax 1 183.41 6798.3 1131.3
## - chas 1 209.99 6824.8 1132.8
## - I(dis^2) 1 263.04 6877.9 1135.8
## - rad 1 318.40 6933.2 1138.9
## - I(nox^2) 1 508.92 7123.8 1149.3
## - dis 1 532.79 7147.6 1150.6
## - crim 1 573.37 7188.2 1152.8
## - I(lstat^2) 1 645.10 7259.9 1156.6
## - rm 1 673.05 7287.9 1158.1
## - ptratio 1 789.24 7404.1 1164.2
## - I(rm^2) 1 892.67 7507.5 1169.6
## - lstat 1 1721.57 8336.4 1210.0
##
## Step: AIC=1116.43
## medv ~ lstat + I(rm^2) + rm + ptratio + I(lstat^2) + dis + crim +
## I(nox^2) + chas + I(dis^2) + rad + tax + I(crim^2)
##
## Df Sum of Sq RSS AIC
## + I(ptratio^2) 1 148.97 6325.4 1109.4
## + black 1 48.96 6425.4 1115.5
## <none> 6474.3 1116.4
## + I(black^2) 1 28.38 6445.9 1116.7
## + I(zn^2) 1 14.95 6459.4 1117.5
## + I(tax^2) 1 13.47 6460.9 1117.6
## + nox 1 5.93 6468.4 1118.1
## + zn 1 1.24 6473.1 1118.3
## + I(rad^2) 1 0.00 6474.3 1118.4
## - I(crim^2) 1 140.52 6614.8 1122.7
## - chas 1 179.07 6653.4 1125.0
## - tax 1 192.61 6666.9 1125.7
## - I(dis^2) 1 308.71 6783.0 1132.4
## - crim 1 357.40 6831.7 1135.2
## - rad 1 451.51 6925.8 1140.5
## - I(nox^2) 1 547.77 7022.1 1145.8
## - dis 1 601.73 7076.1 1148.7
## - rm 1 693.97 7168.3 1153.7
## - I(lstat^2) 1 743.80 7218.1 1156.4
## - ptratio 1 832.24 7306.6 1161.1
## - I(rm^2) 1 906.17 7380.5 1165.0
## - lstat 1 1817.11 8291.4 1209.9
##
## Step: AIC=1109.44
## medv ~ lstat + I(rm^2) + rm + ptratio + I(lstat^2) + dis + crim +
## I(nox^2) + chas + I(dis^2) + rad + tax + I(crim^2) + I(ptratio^2)
##
## Df Sum of Sq RSS AIC
## + black 1 49.29 6276.1 1108.4
## <none> 6325.4 1109.4
## + I(black^2) 1 30.36 6295.0 1109.6
## + I(tax^2) 1 19.21 6306.1 1110.3
## + I(zn^2) 1 3.60 6321.7 1111.2
## + zn 1 2.21 6323.1 1111.3
## + I(rad^2) 1 0.94 6324.4 1111.4
## + nox 1 0.89 6324.5 1111.4
## - I(ptratio^2) 1 148.97 6474.3 1116.4
## - I(crim^2) 1 155.69 6481.0 1116.8
## - ptratio 1 203.05 6528.4 1119.6
## - chas 1 212.92 6538.3 1120.2
## - tax 1 244.89 6570.2 1122.1
## - I(dis^2) 1 291.28 6616.6 1124.8
## - crim 1 381.18 6706.5 1130.0
## - rad 1 544.03 6869.4 1139.3
## - dis 1 582.91 6908.3 1141.5
## - rm 1 622.13 6947.5 1143.7
## - I(nox^2) 1 658.24 6983.6 1145.7
## - I(lstat^2) 1 790.36 7115.7 1152.9
## - I(rm^2) 1 798.32 7123.7 1153.3
## - lstat 1 1892.10 8217.5 1208.5
##
## Step: AIC=1108.42
## medv ~ lstat + I(rm^2) + rm + ptratio + I(lstat^2) + dis + crim +
## I(nox^2) + chas + I(dis^2) + rad + tax + I(crim^2) + I(ptratio^2) +
## black
##
## Df Sum of Sq RSS AIC
## + I(black^2) 1 43.91 6232.2 1107.7
## <none> 6276.1 1108.4
## + I(tax^2) 1 19.34 6256.7 1109.2
## - black 1 49.29 6325.4 1109.4
## + I(zn^2) 1 4.00 6272.1 1110.2
## + zn 1 2.15 6273.9 1110.3
## + nox 1 1.50 6274.6 1110.3
## + I(rad^2) 1 0.66 6275.4 1110.4
## - I(crim^2) 1 130.89 6406.9 1114.4
## - I(ptratio^2) 1 149.31 6425.4 1115.5
## - chas 1 196.77 6472.8 1118.3
## - ptratio 1 203.68 6479.7 1118.8
## - tax 1 236.68 6512.7 1120.7
## - I(dis^2) 1 284.56 6560.6 1123.5
## - crim 1 331.71 6607.8 1126.3
## - rad 1 555.02 6831.1 1139.1
## - dis 1 570.73 6846.8 1140.0
## - rm 1 596.46 6872.5 1141.5
## - I(nox^2) 1 641.51 6917.6 1144.0
## - I(lstat^2) 1 771.54 7047.6 1151.2
## - I(rm^2) 1 777.83 7053.9 1151.5
## - lstat 1 1820.54 8096.6 1204.7
##
## Step: AIC=1107.71
## medv ~ lstat + I(rm^2) + rm + ptratio + I(lstat^2) + dis + crim +
## I(nox^2) + chas + I(dis^2) + rad + tax + I(crim^2) + I(ptratio^2) +
## black + I(black^2)
##
## Df Sum of Sq RSS AIC
## <none> 6232.2 1107.7
## + I(tax^2) 1 21.23 6210.9 1108.4
## - I(black^2) 1 43.91 6276.1 1108.4
## + I(zn^2) 1 5.07 6227.1 1109.4
## - black 1 62.84 6295.0 1109.6
## + nox 1 1.32 6230.8 1109.6
## + zn 1 1.19 6231.0 1109.6
## + I(rad^2) 1 0.46 6231.7 1109.7
## - I(crim^2) 1 131.01 6363.2 1113.7
## - I(ptratio^2) 1 138.68 6370.8 1114.2
## - ptratio 1 189.84 6422.0 1117.3
## - chas 1 196.62 6428.8 1117.7
## - tax 1 247.24 6479.4 1120.7
## - I(dis^2) 1 267.89 6500.0 1122.0
## - crim 1 321.27 6553.4 1125.1
## - dis 1 546.03 6778.2 1138.1
## - rm 1 548.52 6780.7 1138.3
## - rad 1 553.21 6785.4 1138.5
## - I(nox^2) 1 636.00 6868.1 1143.2
## - I(rm^2) 1 729.22 6961.4 1148.4
## - I(lstat^2) 1 780.13 7012.3 1151.2
## - lstat 1 1837.52 8069.7 1205.5
sfit_b <- step(full, direction='backward')
## Start: AIC=1110.08
## medv ~ lstat + rm + ptratio + dis + chas + nox + crim + rad +
## tax + black + zn + I(lstat^2) + I(rm^2) + I(ptratio^2) +
## I(dis^2) + I(chas^2) + I(nox^2) + I(crim^2) + I(rad^2) +
## I(tax^2) + I(black^2) + I(zn^2)
##
##
## Step: AIC=1110.08
## medv ~ lstat + rm + ptratio + dis + chas + nox + crim + rad +
## tax + black + zn + I(lstat^2) + I(rm^2) + I(ptratio^2) +
## I(dis^2) + I(nox^2) + I(crim^2) + I(rad^2) + I(tax^2) + I(black^2) +
## I(zn^2)
##
## Df Sum of Sq RSS AIC
## - I(nox^2) 1 1.22 6111.4 1108.2
## - nox 1 7.21 6117.4 1108.5
## <none> 6110.2 1110.1
## - I(rad^2) 1 32.49 6142.7 1110.1
## - I(black^2) 1 39.00 6149.2 1110.5
## - I(tax^2) 1 46.69 6156.9 1111.0
## - black 1 57.75 6167.9 1111.7
## - zn 1 77.24 6187.4 1112.9
## - I(zn^2) 1 78.12 6188.3 1113.0
## - tax 1 110.88 6221.1 1115.0
## - I(crim^2) 1 111.23 6221.4 1115.0
## - rad 1 145.90 6256.1 1117.2
## - I(ptratio^2) 1 174.72 6284.9 1119.0
## - chas 1 192.31 6302.5 1120.0
## - I(dis^2) 1 220.20 6330.4 1121.8
## - ptratio 1 227.64 6337.8 1122.2
## - crim 1 278.06 6388.2 1125.3
## - dis 1 416.85 6527.0 1133.6
## - rm 1 501.99 6612.2 1138.6
## - I(rm^2) 1 670.51 6780.7 1148.3
## - I(lstat^2) 1 744.73 6854.9 1152.5
## - lstat 1 1775.54 7885.7 1206.5
##
## Step: AIC=1108.16
## medv ~ lstat + rm + ptratio + dis + chas + nox + crim + rad +
## tax + black + zn + I(lstat^2) + I(rm^2) + I(ptratio^2) +
## I(dis^2) + I(crim^2) + I(rad^2) + I(tax^2) + I(black^2) +
## I(zn^2)
##
## Df Sum of Sq RSS AIC
## <none> 6111.4 1108.2
## - I(rad^2) 1 34.59 6146.0 1108.3
## - I(black^2) 1 38.60 6150.0 1108.6
## - I(tax^2) 1 48.60 6160.0 1109.2
## - black 1 57.41 6168.8 1109.8
## - I(zn^2) 1 79.19 6190.6 1111.1
## - zn 1 83.66 6195.1 1111.4
## - I(crim^2) 1 111.35 6222.7 1113.1
## - tax 1 114.43 6225.8 1113.3
## - rad 1 153.57 6265.0 1115.7
## - chas 1 191.46 6302.9 1118.1
## - I(ptratio^2) 1 207.64 6319.0 1119.1
## - I(dis^2) 1 231.07 6342.5 1120.5
## - ptratio 1 265.57 6377.0 1122.6
## - crim 1 278.42 6389.8 1123.4
## - dis 1 469.00 6580.4 1134.7
## - rm 1 501.55 6613.0 1136.6
## - nox 1 612.83 6724.2 1143.0
## - I(rm^2) 1 670.25 6781.7 1146.3
## - I(lstat^2) 1 750.99 6862.4 1150.9
## - lstat 1 1791.80 7903.2 1205.4
summary(sfit_f)
##
## Call:
## lm(formula = medv ~ lstat + I(rm^2) + rm + ptratio + I(lstat^2) +
## dis + crim + I(nox^2) + chas + I(dis^2) + rad + tax + I(crim^2) +
## I(ptratio^2) + black + I(black^2), data = train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -25.5396 -2.3272 -0.4128 1.7539 24.0970
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.599e+02 1.858e+01 8.610 < 2e-16 ***
## lstat -1.488e+00 1.427e-01 -10.431 < 2e-16 ***
## I(rm^2) 1.604e+00 2.441e-01 6.571 1.71e-10 ***
## rm -1.779e+01 3.122e+00 -5.699 2.47e-08 ***
## ptratio -6.061e+00 1.808e+00 -3.353 0.000883 ***
## I(lstat^2) 2.722e-02 4.006e-03 6.796 4.32e-11 ***
## dis -3.297e+00 5.798e-01 -5.686 2.65e-08 ***
## crim -4.127e-01 9.461e-02 -4.361 1.68e-05 ***
## I(nox^2) -2.030e+01 3.309e+00 -6.137 2.18e-09 ***
## chas 3.026e+00 8.868e-01 3.412 0.000716 ***
## I(dis^2) 1.972e-01 4.951e-02 3.983 8.21e-05 ***
## rad 4.265e-01 7.451e-02 5.723 2.17e-08 ***
## tax -1.346e-02 3.517e-03 -3.826 0.000153 ***
## I(crim^2) 3.365e-03 1.208e-03 2.785 0.005626 **
## I(ptratio^2) 1.463e-01 5.107e-02 2.865 0.004403 **
## black 2.520e-02 1.306e-02 1.929 0.054500 .
## I(black^2) -4.684e-05 2.905e-05 -1.612 0.107730
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.11 on 369 degrees of freedom
## Multiple R-squared: 0.8233, Adjusted R-squared: 0.8156
## F-statistic: 107.4 on 16 and 369 DF, p-value: < 2.2e-16
summary(sfit_stepwise)
##
## Call:
## lm(formula = medv ~ lstat + I(rm^2) + rm + ptratio + I(lstat^2) +
## dis + crim + I(nox^2) + chas + I(dis^2) + rad + tax + I(crim^2) +
## I(ptratio^2) + black + I(black^2), data = train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -25.5396 -2.3272 -0.4128 1.7539 24.0970
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.599e+02 1.858e+01 8.610 < 2e-16 ***
## lstat -1.488e+00 1.427e-01 -10.431 < 2e-16 ***
## I(rm^2) 1.604e+00 2.441e-01 6.571 1.71e-10 ***
## rm -1.779e+01 3.122e+00 -5.699 2.47e-08 ***
## ptratio -6.061e+00 1.808e+00 -3.353 0.000883 ***
## I(lstat^2) 2.722e-02 4.006e-03 6.796 4.32e-11 ***
## dis -3.297e+00 5.798e-01 -5.686 2.65e-08 ***
## crim -4.127e-01 9.461e-02 -4.361 1.68e-05 ***
## I(nox^2) -2.030e+01 3.309e+00 -6.137 2.18e-09 ***
## chas 3.026e+00 8.868e-01 3.412 0.000716 ***
## I(dis^2) 1.972e-01 4.951e-02 3.983 8.21e-05 ***
## rad 4.265e-01 7.451e-02 5.723 2.17e-08 ***
## tax -1.346e-02 3.517e-03 -3.826 0.000153 ***
## I(crim^2) 3.365e-03 1.208e-03 2.785 0.005626 **
## I(ptratio^2) 1.463e-01 5.107e-02 2.865 0.004403 **
## black 2.520e-02 1.306e-02 1.929 0.054500 .
## I(black^2) -4.684e-05 2.905e-05 -1.612 0.107730
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.11 on 369 degrees of freedom
## Multiple R-squared: 0.8233, Adjusted R-squared: 0.8156
## F-statistic: 107.4 on 16 and 369 DF, p-value: < 2.2e-16
summary(sfit_b)
##
## Call:
## lm(formula = medv ~ lstat + rm + ptratio + dis + chas + nox +
## crim + rad + tax + black + zn + I(lstat^2) + I(rm^2) + I(ptratio^2) +
## I(dis^2) + I(crim^2) + I(rad^2) + I(tax^2) + I(black^2) +
## I(zn^2), data = train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -25.0748 -2.1845 -0.3506 1.7204 24.1155
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.837e+02 2.021e+01 9.087 < 2e-16 ***
## lstat -1.493e+00 1.444e-01 -10.345 < 2e-16 ***
## rm -1.710e+01 3.125e+00 -5.473 8.24e-08 ***
## ptratio -7.608e+00 1.910e+00 -3.983 8.23e-05 ***
## dis -3.197e+00 6.041e-01 -5.293 2.09e-07 ***
## chas 2.990e+00 8.843e-01 3.382 0.000799 ***
## nox -2.695e+01 4.455e+00 -6.050 3.59e-09 ***
## crim -3.940e-01 9.663e-02 -4.078 5.58e-05 ***
## rad 7.248e-01 2.393e-01 3.029 0.002632 **
## tax -3.787e-02 1.448e-02 -2.614 0.009313 **
## black 2.415e-02 1.304e-02 1.852 0.064869 .
## zn -8.148e-02 3.645e-02 -2.235 0.026004 *
## I(lstat^2) 2.707e-02 4.042e-03 6.697 8.05e-11 ***
## I(rm^2) 1.546e+00 2.444e-01 6.327 7.32e-10 ***
## I(ptratio^2) 1.887e-01 5.359e-02 3.522 0.000483 ***
## I(dis^2) 1.916e-01 5.159e-02 3.715 0.000235 ***
## I(crim^2) 3.164e-03 1.227e-03 2.579 0.010306 *
## I(rad^2) -1.411e-02 9.817e-03 -1.437 0.151496
## I(tax^2) 3.180e-05 1.867e-05 1.704 0.089293 .
## I(black^2) -4.403e-05 2.900e-05 -1.518 0.129809
## I(zn^2) 9.024e-04 4.149e-04 2.175 0.030289 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.092 on 365 degrees of freedom
## Multiple R-squared: 0.8267, Adjusted R-squared: 0.8172
## F-statistic: 87.04 on 20 and 365 DF, p-value: < 2.2e-16
cat("AIC from Forward Selection:", AIC(sfit_f), "\n")
## AIC from Forward Selection: 2205.133
cat("AIC from Backward Elimination:", AIC(sfit_b), "\n")
## AIC from Backward Elimination: 2205.581
cat("AIC from Stepwise Regression:", AIC(sfit_stepwise), "\n")
## AIC from Stepwise Regression: 2205.133
So we can conclude the best model under AIC criteria is medv ~ lstat + rm^2 + rm + ptratio + lstat^2 + dis + crim + nox^2 + chas + + dis^2 + rad + tax + crim^2 + ptratio^2 + black + black^2. This is agreed by the forward and stepwise regression, we won’t take into consideration the backwards regression model, as the AIC value was slightly higher than for the other methods. Now we can test the root mean square prediction error and analyze the changes.
selected_model <- sfit_f
summary(selected_model)
##
## Call:
## lm(formula = medv ~ lstat + I(rm^2) + rm + ptratio + I(lstat^2) +
## dis + crim + I(nox^2) + chas + I(dis^2) + rad + tax + I(crim^2) +
## I(ptratio^2) + black + I(black^2), data = train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -25.5396 -2.3272 -0.4128 1.7539 24.0970
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.599e+02 1.858e+01 8.610 < 2e-16 ***
## lstat -1.488e+00 1.427e-01 -10.431 < 2e-16 ***
## I(rm^2) 1.604e+00 2.441e-01 6.571 1.71e-10 ***
## rm -1.779e+01 3.122e+00 -5.699 2.47e-08 ***
## ptratio -6.061e+00 1.808e+00 -3.353 0.000883 ***
## I(lstat^2) 2.722e-02 4.006e-03 6.796 4.32e-11 ***
## dis -3.297e+00 5.798e-01 -5.686 2.65e-08 ***
## crim -4.127e-01 9.461e-02 -4.361 1.68e-05 ***
## I(nox^2) -2.030e+01 3.309e+00 -6.137 2.18e-09 ***
## chas 3.026e+00 8.868e-01 3.412 0.000716 ***
## I(dis^2) 1.972e-01 4.951e-02 3.983 8.21e-05 ***
## rad 4.265e-01 7.451e-02 5.723 2.17e-08 ***
## tax -1.346e-02 3.517e-03 -3.826 0.000153 ***
## I(crim^2) 3.365e-03 1.208e-03 2.785 0.005626 **
## I(ptratio^2) 1.463e-01 5.107e-02 2.865 0.004403 **
## black 2.520e-02 1.306e-02 1.929 0.054500 .
## I(black^2) -4.684e-05 2.905e-05 -1.612 0.107730
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.11 on 369 degrees of freedom
## Multiple R-squared: 0.8233, Adjusted R-squared: 0.8156
## F-statistic: 107.4 on 16 and 369 DF, p-value: < 2.2e-16
sample_indices <- sample(1:nrow(train), size = 0.8 * nrow(train))
train_set <- train[sample_indices, ]
test_set <- train[-sample_indices, ]
predictions <- predict(selected_model, newdata=test_set)
residuals <- test_set$medv - predictions
rmspe <- sqrt(mean(residuals^2))
rmspe
## [1] 4.241404
print(paste("Root Mean Square Prediction Error is", round(rmspe, 2)))
## [1] "Root Mean Square Prediction Error is 4.24"
selected_model <- sfit_b
summary(selected_model)
##
## Call:
## lm(formula = medv ~ lstat + rm + ptratio + dis + chas + nox +
## crim + rad + tax + black + zn + I(lstat^2) + I(rm^2) + I(ptratio^2) +
## I(dis^2) + I(crim^2) + I(rad^2) + I(tax^2) + I(black^2) +
## I(zn^2), data = train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -25.0748 -2.1845 -0.3506 1.7204 24.1155
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.837e+02 2.021e+01 9.087 < 2e-16 ***
## lstat -1.493e+00 1.444e-01 -10.345 < 2e-16 ***
## rm -1.710e+01 3.125e+00 -5.473 8.24e-08 ***
## ptratio -7.608e+00 1.910e+00 -3.983 8.23e-05 ***
## dis -3.197e+00 6.041e-01 -5.293 2.09e-07 ***
## chas 2.990e+00 8.843e-01 3.382 0.000799 ***
## nox -2.695e+01 4.455e+00 -6.050 3.59e-09 ***
## crim -3.940e-01 9.663e-02 -4.078 5.58e-05 ***
## rad 7.248e-01 2.393e-01 3.029 0.002632 **
## tax -3.787e-02 1.448e-02 -2.614 0.009313 **
## black 2.415e-02 1.304e-02 1.852 0.064869 .
## zn -8.148e-02 3.645e-02 -2.235 0.026004 *
## I(lstat^2) 2.707e-02 4.042e-03 6.697 8.05e-11 ***
## I(rm^2) 1.546e+00 2.444e-01 6.327 7.32e-10 ***
## I(ptratio^2) 1.887e-01 5.359e-02 3.522 0.000483 ***
## I(dis^2) 1.916e-01 5.159e-02 3.715 0.000235 ***
## I(crim^2) 3.164e-03 1.227e-03 2.579 0.010306 *
## I(rad^2) -1.411e-02 9.817e-03 -1.437 0.151496
## I(tax^2) 3.180e-05 1.867e-05 1.704 0.089293 .
## I(black^2) -4.403e-05 2.900e-05 -1.518 0.129809
## I(zn^2) 9.024e-04 4.149e-04 2.175 0.030289 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.092 on 365 degrees of freedom
## Multiple R-squared: 0.8267, Adjusted R-squared: 0.8172
## F-statistic: 87.04 on 20 and 365 DF, p-value: < 2.2e-16
sample_indices <- sample(1:nrow(train), size = 0.8 * nrow(train))
train_set <- train[sample_indices, ]
test_set <- train[-sample_indices, ]
predictions <- predict(selected_model, newdata=test_set)
residuals <- test_set$medv - predictions
rmspe <- sqrt(mean(residuals^2))
rmspe
## [1] 3.341072
print(paste("Root Mean Square Prediction Error is", round(rmspe, 2)))
## [1] "Root Mean Square Prediction Error is 3.34"