library(MASS)
data(Boston)
attach(Boston)
Boston_Data = sample(nrow(Boston), nrow(Boston) * 0.7)
Boston_Train = Boston[Boston_Data, ]
Boston_Test = Boston[-Boston_Data, ]
```
summary(Boston_Train)
## crim zn indus chas
## Min. : 0.00632 Min. : 0.000 Min. : 0.460 Min. :0.0000
## 1st Qu.: 0.08190 1st Qu.: 0.000 1st Qu.: 5.223 1st Qu.:0.0000
## Median : 0.25372 Median : 0.000 Median : 9.690 Median :0.0000
## Mean : 3.63560 Mean : 10.653 Mean :11.301 Mean :0.0678
## 3rd Qu.: 3.33671 3rd Qu.: 9.375 3rd Qu.:18.100 3rd Qu.:0.0000
## Max. :88.97620 Max. :100.000 Max. :27.740 Max. :1.0000
## nox rm age dis
## Min. :0.3890 Min. :3.561 Min. : 2.90 Min. : 1.130
## 1st Qu.:0.4530 1st Qu.:5.879 1st Qu.: 45.65 1st Qu.: 2.087
## Median :0.5380 Median :6.209 Median : 78.05 Median : 3.191
## Mean :0.5554 Mean :6.290 Mean : 68.93 Mean : 3.773
## 3rd Qu.:0.6240 3rd Qu.:6.635 3rd Qu.: 94.25 3rd Qu.: 5.188
## Max. :0.8710 Max. :8.780 Max. :100.00 Max. :12.127
## rad tax ptratio black
## Min. : 1.000 Min. :187.0 Min. :12.60 Min. : 0.32
## 1st Qu.: 4.000 1st Qu.:279.0 1st Qu.:17.40 1st Qu.:375.24
## Median : 5.000 Median :330.0 Median :19.05 Median :391.34
## Mean : 9.525 Mean :407.5 Mean :18.49 Mean :358.14
## 3rd Qu.:24.000 3rd Qu.:666.0 3rd Qu.:20.20 3rd Qu.:396.90
## Max. :24.000 Max. :711.0 Max. :22.00 Max. :396.90
## lstat medv
## Min. : 1.730 Min. : 5.00
## 1st Qu.: 6.923 1st Qu.:17.10
## Median :11.170 Median :21.60
## Mean :12.514 Mean :22.89
## 3rd Qu.:16.860 3rd Qu.:26.60
## Max. :37.970 Max. :50.00
library(psych)
pairs.panels(Boston_Train[c("crim","zn","indus","chas","nox","rm","age","dis","rad","tax","ptratio","black","lstat","medv")],gap=0)
model2 = lm(medv ~ ., data = Boston_Train)
summary(model2)$r.squared
## [1] 0.7109482
summary(model2)$adj.r.squared
## [1] 0.6998962
nullmodelBoston = lm(medv ~ 1, data = Boston_Train)
fullmodelBoston = lm(medv ~ ., data = Boston_Train)
modelstep = step(fullmodelBoston, direction = "backward")
## Start: AIC=1171.86
## medv ~ crim + zn + indus + chas + nox + rm + age + dis + rad +
## tax + ptratio + black + lstat
##
## Df Sum of Sq RSS AIC
## - indus 1 0.00 8960.0 1169.9
## - age 1 0.12 8960.1 1169.9
## <none> 8960.0 1171.9
## - zn 1 86.34 9046.3 1173.2
## - tax 1 139.34 9099.3 1175.3
## - crim 1 180.22 9140.2 1176.9
## - chas 1 189.39 9149.4 1177.3
## - black 1 193.71 9153.7 1177.4
## - rad 1 411.07 9371.1 1185.7
## - nox 1 426.67 9386.7 1186.3
## - dis 1 926.14 9886.1 1204.7
## - ptratio 1 949.18 9909.2 1205.5
## - rm 1 1138.97 10099.0 1212.2
## - lstat 1 2047.66 11007.7 1242.7
##
## Step: AIC=1169.86
## medv ~ crim + zn + chas + nox + rm + age + dis + rad + tax +
## ptratio + black + lstat
##
## Df Sum of Sq RSS AIC
## - age 1 0.12 8960.1 1167.9
## <none> 8960.0 1169.9
## - zn 1 87.70 9047.7 1171.3
## - tax 1 169.68 9129.7 1174.5
## - crim 1 180.40 9140.4 1174.9
## - chas 1 192.13 9152.1 1175.4
## - black 1 193.85 9153.9 1175.4
## - rad 1 446.42 9406.4 1185.1
## - nox 1 460.67 9420.7 1185.6
## - ptratio 1 956.65 9916.7 1203.8
## - dis 1 958.06 9918.1 1203.8
## - rm 1 1152.00 10112.0 1210.7
## - lstat 1 2073.89 11033.9 1241.6
##
## Step: AIC=1167.86
## medv ~ crim + zn + chas + nox + rm + dis + rad + tax + ptratio +
## black + lstat
##
## Df Sum of Sq RSS AIC
## <none> 8960.1 1167.9
## - zn 1 88.71 9048.8 1169.3
## - tax 1 169.95 9130.1 1172.5
## - crim 1 180.61 9140.7 1172.9
## - chas 1 193.00 9153.1 1173.4
## - black 1 193.74 9153.9 1173.4
## - rad 1 449.87 9410.0 1183.2
## - nox 1 501.09 9461.2 1185.1
## - ptratio 1 957.90 9918.0 1201.8
## - dis 1 1079.45 10039.6 1206.1
## - rm 1 1184.22 10144.3 1209.8
## - lstat 1 2353.87 11314.0 1248.4
modelstep = step(nullmodelBoston, scope = list(lower = nullmodelBoston, upper = fullmodelBoston),direction = "forward")
## Start: AIC=1585.22
## medv ~ 1
##
## Df Sum of Sq RSS AIC
## + lstat 1 16545.5 14452 1317.1
## + rm 1 14210.7 16787 1370.1
## + ptratio 1 6975.5 24022 1497.0
## + indus 1 6479.3 24519 1504.2
## + tax 1 5889.4 25109 1512.6
## + nox 1 4675.3 26323 1529.3
## + crim 1 3870.5 27127 1540.0
## + age 1 3534.6 27463 1544.4
## + rad 1 3521.8 27476 1544.5
## + black 1 3171.9 27826 1549.0
## + zn 1 2368.7 28629 1559.1
## + dis 1 1437.4 29561 1570.4
## + chas 1 1260.6 29737 1572.5
## <none> 30998 1585.2
##
## Step: AIC=1317.1
## medv ~ lstat
##
## Df Sum of Sq RSS AIC
## + rm 1 2406.94 12046 1254.6
## + ptratio 1 1502.35 12950 1280.2
## + dis 1 761.57 13691 1299.9
## + chas 1 559.96 13892 1305.1
## + age 1 371.36 14081 1309.9
## + black 1 117.92 14334 1316.2
## + tax 1 101.18 14351 1316.6
## + crim 1 87.16 14365 1317.0
## <none> 14452 1317.1
## + indus 1 16.33 14436 1318.7
## + nox 1 12.90 14440 1318.8
## + zn 1 1.96 14450 1319.0
## + rad 1 0.00 14452 1319.1
##
## Step: AIC=1254.61
## medv ~ lstat + rm
##
## Df Sum of Sq RSS AIC
## + ptratio 1 985.48 11060 1226.4
## + chas 1 441.45 11604 1243.4
## + dis 1 426.62 11619 1243.8
## + black 1 266.72 11779 1248.7
## + crim 1 147.28 11898 1252.3
## + tax 1 124.51 11921 1252.9
## + age 1 89.83 11956 1254.0
## <none> 12046 1254.6
## + rad 1 20.70 12025 1256.0
## + indus 1 2.73 12043 1256.5
## + zn 1 2.24 12043 1256.5
## + nox 1 0.07 12046 1256.6
##
## Step: AIC=1226.4
## medv ~ lstat + rm + ptratio
##
## Df Sum of Sq RSS AIC
## + dis 1 532.44 10528 1210.9
## + chas 1 348.78 10711 1217.0
## + black 1 217.06 10843 1221.4
## + age 1 106.20 10954 1225.0
## + zn 1 77.67 10982 1225.9
## <none> 11060 1226.4
## + rad 1 57.73 11002 1226.5
## + crim 1 56.73 11003 1226.6
## + indus 1 12.37 11048 1228.0
## + nox 1 1.38 11059 1228.3
## + tax 1 0.18 11060 1228.4
##
## Step: AIC=1210.93
## medv ~ lstat + rm + ptratio + dis
##
## Df Sum of Sq RSS AIC
## + nox 1 543.81 9983.8 1194.2
## + black 1 310.03 10217.6 1202.3
## + chas 1 224.20 10303.4 1205.3
## + indus 1 165.34 10362.3 1207.3
## + crim 1 138.29 10389.3 1208.2
## + tax 1 97.09 10430.5 1209.7
## <none> 10527.6 1210.9
## + age 1 46.01 10481.6 1211.4
## + zn 1 42.09 10485.5 1211.5
## + rad 1 0.00 10527.6 1212.9
##
## Step: AIC=1194.16
## medv ~ lstat + rm + ptratio + dis + nox
##
## Df Sum of Sq RSS AIC
## + chas 1 260.221 9723.6 1186.8
## + black 1 194.352 9789.4 1189.2
## + rad 1 122.923 9860.9 1191.8
## + crim 1 81.562 9902.2 1193.2
## <none> 9983.8 1194.2
## + zn 1 51.007 9932.8 1194.3
## + indus 1 10.665 9973.1 1195.8
## + tax 1 1.846 9981.9 1196.1
## + age 1 0.330 9983.5 1196.1
##
## Step: AIC=1186.81
## medv ~ lstat + rm + ptratio + dis + nox + chas
##
## Df Sum of Sq RSS AIC
## + black 1 170.211 9553.4 1182.5
## + rad 1 117.547 9606.0 1184.5
## + crim 1 68.800 9654.8 1186.3
## + zn 1 61.264 9662.3 1186.6
## <none> 9723.6 1186.8
## + indus 1 23.071 9700.5 1188.0
## + age 1 4.605 9719.0 1188.6
## + tax 1 2.873 9720.7 1188.7
##
## Step: AIC=1182.55
## medv ~ lstat + rm + ptratio + dis + nox + chas + black
##
## Df Sum of Sq RSS AIC
## + rad 1 216.276 9337.1 1176.5
## + zn 1 73.414 9480.0 1181.8
## <none> 9553.4 1182.5
## + crim 1 29.671 9523.7 1183.5
## + tax 1 24.004 9529.4 1183.7
## + indus 1 16.424 9536.9 1184.0
## + age 1 7.709 9545.7 1184.3
##
## Step: AIC=1176.45
## medv ~ lstat + rm + ptratio + dis + nox + chas + black + rad
##
## Df Sum of Sq RSS AIC
## + crim 1 156.910 9180.2 1172.5
## + tax 1 126.118 9211.0 1173.6
## <none> 9337.1 1176.5
## + zn 1 37.540 9299.5 1177.0
## + indus 1 26.362 9310.7 1177.5
## + age 1 1.641 9335.4 1178.4
##
## Step: AIC=1172.45
## medv ~ lstat + rm + ptratio + dis + nox + chas + black + rad +
## crim
##
## Df Sum of Sq RSS AIC
## + tax 1 131.337 9048.8 1169.3
## <none> 9180.2 1172.5
## + zn 1 50.102 9130.1 1172.5
## + indus 1 32.887 9147.3 1173.2
## + age 1 1.317 9178.9 1174.4
##
## Step: AIC=1169.35
## medv ~ lstat + rm + ptratio + dis + nox + chas + black + rad +
## crim + tax
##
## Df Sum of Sq RSS AIC
## + zn 1 88.714 8960.1 1167.9
## <none> 9048.8 1169.3
## + indus 1 1.378 9047.5 1171.3
## + age 1 1.135 9047.7 1171.3
##
## Step: AIC=1167.86
## medv ~ lstat + rm + ptratio + dis + nox + chas + black + rad +
## crim + tax + zn
##
## Df Sum of Sq RSS AIC
## <none> 8960.1 1167.9
## + age 1 0.116260 8960.0 1169.9
## + indus 1 0.001047 8960.1 1169.9
modelstep = step(nullmodelBoston, scope = list(lower = nullmodelBoston, upper = fullmodelBoston),
direction = "both")
## Start: AIC=1585.22
## medv ~ 1
##
## Df Sum of Sq RSS AIC
## + lstat 1 16545.5 14452 1317.1
## + rm 1 14210.7 16787 1370.1
## + ptratio 1 6975.5 24022 1497.0
## + indus 1 6479.3 24519 1504.2
## + tax 1 5889.4 25109 1512.6
## + nox 1 4675.3 26323 1529.3
## + crim 1 3870.5 27127 1540.0
## + age 1 3534.6 27463 1544.4
## + rad 1 3521.8 27476 1544.5
## + black 1 3171.9 27826 1549.0
## + zn 1 2368.7 28629 1559.1
## + dis 1 1437.4 29561 1570.4
## + chas 1 1260.6 29737 1572.5
## <none> 30998 1585.2
##
## Step: AIC=1317.1
## medv ~ lstat
##
## Df Sum of Sq RSS AIC
## + rm 1 2406.9 12046 1254.6
## + ptratio 1 1502.3 12950 1280.2
## + dis 1 761.6 13691 1299.9
## + chas 1 560.0 13892 1305.1
## + age 1 371.4 14081 1309.9
## + black 1 117.9 14334 1316.2
## + tax 1 101.2 14351 1316.6
## + crim 1 87.2 14365 1317.0
## <none> 14452 1317.1
## + indus 1 16.3 14436 1318.7
## + nox 1 12.9 14440 1318.8
## + zn 1 2.0 14450 1319.0
## + rad 1 0.0 14452 1319.1
## - lstat 1 16545.5 30998 1585.2
##
## Step: AIC=1254.61
## medv ~ lstat + rm
##
## Df Sum of Sq RSS AIC
## + ptratio 1 985.5 11060 1226.4
## + chas 1 441.4 11604 1243.4
## + dis 1 426.6 11619 1243.8
## + black 1 266.7 11779 1248.7
## + crim 1 147.3 11898 1252.3
## + tax 1 124.5 11921 1252.9
## + age 1 89.8 11956 1254.0
## <none> 12046 1254.6
## + rad 1 20.7 12025 1256.0
## + indus 1 2.7 12043 1256.5
## + zn 1 2.2 12043 1256.5
## + nox 1 0.1 12046 1256.6
## - rm 1 2406.9 14452 1317.1
## - lstat 1 4741.7 16787 1370.1
##
## Step: AIC=1226.4
## medv ~ lstat + rm + ptratio
##
## Df Sum of Sq RSS AIC
## + dis 1 532.4 10528 1210.9
## + chas 1 348.8 10711 1217.0
## + black 1 217.1 10843 1221.4
## + age 1 106.2 10954 1225.0
## + zn 1 77.7 10982 1225.9
## <none> 11060 1226.4
## + rad 1 57.7 11002 1226.5
## + crim 1 56.7 11003 1226.6
## + indus 1 12.4 11048 1228.0
## + nox 1 1.4 11059 1228.3
## + tax 1 0.2 11060 1228.4
## - ptratio 1 985.5 12046 1254.6
## - rm 1 1890.1 12950 1280.2
## - lstat 1 3707.4 14767 1326.7
##
## Step: AIC=1210.93
## medv ~ lstat + rm + ptratio + dis
##
## Df Sum of Sq RSS AIC
## + nox 1 543.8 9983.8 1194.2
## + black 1 310.0 10217.6 1202.3
## + chas 1 224.2 10303.4 1205.3
## + indus 1 165.3 10362.3 1207.3
## + crim 1 138.3 10389.3 1208.2
## + tax 1 97.1 10430.5 1209.7
## <none> 10527.6 1210.9
## + age 1 46.0 10481.6 1211.4
## + zn 1 42.1 10485.5 1211.5
## + rad 1 0.0 10527.6 1212.9
## - dis 1 532.4 11060.0 1226.4
## - ptratio 1 1091.3 11618.9 1243.8
## - rm 1 1547.6 12075.2 1257.5
## - lstat 1 4196.5 14724.1 1327.7
##
## Step: AIC=1194.16
## medv ~ lstat + rm + ptratio + dis + nox
##
## Df Sum of Sq RSS AIC
## + chas 1 260.2 9723.6 1186.8
## + black 1 194.4 9789.4 1189.2
## + rad 1 122.9 9860.9 1191.8
## + crim 1 81.6 9902.2 1193.2
## <none> 9983.8 1194.2
## + zn 1 51.0 9932.8 1194.3
## + indus 1 10.7 9973.1 1195.8
## + tax 1 1.8 9981.9 1196.1
## + age 1 0.3 9983.5 1196.1
## - nox 1 543.8 10527.6 1210.9
## - dis 1 1074.9 11058.7 1228.3
## - ptratio 1 1253.1 11236.9 1234.0
## - rm 1 1458.0 11441.7 1240.4
## - lstat 1 3186.5 13170.3 1290.2
##
## Step: AIC=1186.81
## medv ~ lstat + rm + ptratio + dis + nox + chas
##
## Df Sum of Sq RSS AIC
## + black 1 170.21 9553.4 1182.5
## + rad 1 117.55 9606.0 1184.5
## + crim 1 68.80 9654.8 1186.3
## + zn 1 61.26 9662.3 1186.6
## <none> 9723.6 1186.8
## + indus 1 23.07 9700.5 1188.0
## + age 1 4.60 9719.0 1188.6
## + tax 1 2.87 9720.7 1188.7
## - chas 1 260.22 9983.8 1194.2
## - nox 1 579.83 10303.4 1205.3
## - dis 1 971.28 10694.9 1218.5
## - ptratio 1 1150.43 10874.0 1224.4
## - rm 1 1435.77 11159.3 1233.6
## - lstat 1 2962.93 12686.5 1279.0
##
## Step: AIC=1182.55
## medv ~ lstat + rm + ptratio + dis + nox + chas + black
##
## Df Sum of Sq RSS AIC
## + rad 1 216.28 9337.1 1176.5
## + zn 1 73.41 9480.0 1181.8
## <none> 9553.4 1182.5
## + crim 1 29.67 9523.7 1183.5
## + tax 1 24.00 9529.4 1183.7
## + indus 1 16.42 9536.9 1184.0
## + age 1 7.71 9545.7 1184.3
## - black 1 170.21 9723.6 1186.8
## - chas 1 236.08 9789.4 1189.2
## - nox 1 464.06 10017.4 1197.3
## - dis 1 953.81 10507.2 1214.2
## - ptratio 1 1097.24 10650.6 1219.0
## - rm 1 1526.87 11080.2 1233.0
## - lstat 1 2566.38 12119.7 1264.8
##
## Step: AIC=1176.45
## medv ~ lstat + rm + ptratio + dis + nox + chas + black + rad
##
## Df Sum of Sq RSS AIC
## + crim 1 156.91 9180.2 1172.5
## + tax 1 126.12 9211.0 1173.6
## <none> 9337.1 1176.5
## + zn 1 37.54 9299.5 1177.0
## + indus 1 26.36 9310.7 1177.5
## + age 1 1.64 9335.4 1178.4
## - rad 1 216.28 9553.4 1182.5
## - chas 1 222.09 9559.2 1182.8
## - black 1 268.94 9606.0 1184.5
## - nox 1 656.57 9993.7 1198.5
## - dis 1 991.00 10328.1 1210.2
## - ptratio 1 1311.96 10649.1 1221.0
## - rm 1 1367.64 10704.7 1222.8
## - lstat 1 2657.85 11994.9 1263.1
##
## Step: AIC=1172.45
## medv ~ lstat + rm + ptratio + dis + nox + chas + black + rad +
## crim
##
## Df Sum of Sq RSS AIC
## + tax 1 131.34 9048.8 1169.3
## <none> 9180.2 1172.5
## + zn 1 50.10 9130.1 1172.5
## + indus 1 32.89 9147.3 1173.2
## + age 1 1.32 9178.9 1174.4
## - crim 1 156.91 9337.1 1176.5
## - chas 1 202.89 9383.1 1178.2
## - black 1 212.80 9393.0 1178.6
## - rad 1 343.51 9523.7 1183.5
## - nox 1 711.52 9891.7 1196.9
## - dis 1 1050.22 10230.4 1208.8
## - rm 1 1352.94 10533.1 1219.1
## - ptratio 1 1360.37 10540.6 1219.4
## - lstat 1 2435.66 11615.8 1253.8
##
## Step: AIC=1169.35
## medv ~ lstat + rm + ptratio + dis + nox + chas + black + rad +
## crim + tax
##
## Df Sum of Sq RSS AIC
## + zn 1 88.71 8960.1 1167.9
## <none> 9048.8 1169.3
## + indus 1 1.38 9047.5 1171.3
## + age 1 1.14 9047.7 1171.3
## - tax 1 131.34 9180.2 1172.5
## - crim 1 162.13 9211.0 1173.6
## - chas 1 185.06 9233.9 1174.5
## - black 1 196.31 9245.2 1175.0
## - rad 1 426.37 9475.2 1183.7
## - nox 1 543.02 9591.9 1188.0
## - dis 1 1060.88 10109.7 1206.6
## - rm 1 1240.01 10288.9 1212.8
## - ptratio 1 1250.51 10299.4 1213.2
## - lstat 1 2382.28 11431.1 1250.1
##
## Step: AIC=1167.86
## medv ~ lstat + rm + ptratio + dis + nox + chas + black + rad +
## crim + tax + zn
##
## Df Sum of Sq RSS AIC
## <none> 8960.1 1167.9
## - zn 1 88.71 9048.8 1169.3
## + age 1 0.12 8960.0 1169.9
## + indus 1 0.00 8960.1 1169.9
## - tax 1 169.95 9130.1 1172.5
## - crim 1 180.61 9140.7 1172.9
## - chas 1 193.00 9153.1 1173.4
## - black 1 193.74 9153.9 1173.4
## - rad 1 449.87 9410.0 1183.2
## - nox 1 501.09 9461.2 1185.1
## - ptratio 1 957.90 9918.0 1201.8
## - dis 1 1079.45 10039.6 1206.1
## - rm 1 1184.22 10144.3 1209.8
## - lstat 1 2353.87 11314.0 1248.4
model_1=lm(medv ~ lstat + ptratio + rm + dis + nox + black + zn + crim + rad + tax + chas, data = Boston_Train)
model_summary=summary(model_1)
model_summary
##
## Call:
## lm(formula = medv ~ lstat + ptratio + rm + dis + nox + black +
## zn + crim + rad + tax + chas, data = Boston_Train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -15.8269 -2.9134 -0.6731 1.9539 24.3957
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 41.856889 6.440289 6.499 2.86e-10 ***
## lstat -0.566224 0.059737 -9.479 < 2e-16 ***
## ptratio -1.010122 0.167054 -6.047 3.87e-09 ***
## rm 3.388593 0.504020 6.723 7.47e-11 ***
## dis -1.527156 0.237917 -6.419 4.59e-10 ***
## nox -19.897723 4.549774 -4.373 1.63e-05 ***
## black 0.009565 0.003517 2.719 0.00687 **
## zn 0.032502 0.017663 1.840 0.06661 .
## crim -0.098809 0.037633 -2.626 0.00904 **
## rad 0.331438 0.079984 4.144 4.31e-05 ***
## tax -0.010984 0.004313 -2.547 0.01131 *
## chas 3.026340 1.115007 2.714 0.00698 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.119 on 342 degrees of freedom
## Multiple R-squared: 0.7109, Adjusted R-squared: 0.7016
## F-statistic: 76.47 on 11 and 342 DF, p-value: < 2.2e-16
(model_summary$sigma)^2
## [1] 26.1992
glmtrain = glm(medv~.,data=Boston_Train)
summary(glmtrain)
pi = predict(object = model_1, newdata = Boston_Test)
(Boston_Test$medv)^2
## [1] 823.69 524.41 225.00 470.89 331.24 306.25 408.04 210.25
## [9] 275.56 338.56 174.24 171.61 610.09 640.09 449.44 400.00
## [17] 388.09 1253.16 349.69 492.84 302.76 585.64 519.84 457.96
## [25] 432.64 449.44 571.21 615.04 506.25 492.84 484.00 524.41
## [33] 702.25 380.25 376.36 519.84 368.64 457.96 324.00 384.16
## [41] 243.36 316.84 179.56 243.36 243.36 213.16 243.36 590.49
## [49] 2500.00 566.44 556.96 538.24 605.16 1056.25 696.96 876.16
## [57] 1024.00 888.04 1218.01 967.21 1197.16 1218.01 580.81 1789.29
## [65] 2352.25 2500.00 372.49 625.00 542.89 712.89 470.89 2500.00
## [73] 484.00 561.69 309.76 342.25 420.25 436.81 1142.44 2381.44
## [81] 519.84 942.49 595.36 1239.04 1049.76 1232.01 1253.16 2500.00
## [89] 404.01 778.41 841.00 615.04 1095.61 412.09 259.21 488.41
## [97] 392.04 416.16 342.25 529.00 372.49 376.36 424.36 349.69
## [105] 272.25 524.41 580.81 316.84 515.29 432.64 193.21 104.04
## [113] 77.44 51.84 171.61 25.00 31.36 51.84 146.41 295.84
## [121] 295.84 51.84 70.56 278.89 432.64 136.89 118.81 121.00
## [129] 204.49 136.89 70.56 110.25 292.41 116.64 222.01 259.21
## [137] 222.01 222.01 400.00 396.01 361.00 364.81 384.16 213.16
## [145] 457.96 529.00 561.69 364.81 231.04 388.09 424.36 484.00
mean((pi - Boston_Test$medv)^2)
## [1] 14.90817
mean(abs(pi - Boston_Test$medv))
## [1] 3.047425
library(boot)
##
## Attaching package: 'boot'
## The following object is masked from 'package:psych':
##
## logit
model_2 = glm(medv ~ lstat + ptratio + rm + dis + nox + black + zn + crim + rad + tax + chas, data = Boston)
cv.glm(data = Boston, glmfit = model_2, K = 3)$delta[2]
## [1] 22.73432
#CART for training data
library(rpart)
boston.rpart <- rpart(formula = medv ~ lstat + ptratio + rm + dis + nox + black + zn + crim + rad + tax + chas, data = Boston_Train)
boston.rpart
## n= 354
##
## node), split, n, deviance, yval
## * denotes terminal node
##
## 1) root 354 30997.9400 22.89011
## 2) lstat>=9.63 204 4868.9040 17.47059
## 4) lstat>=15.065 110 2154.3690 14.69727
## 8) nox>=0.603 68 806.7206 12.63824 *
## 9) nox< 0.603 42 592.5898 18.03095 *
## 5) lstat< 15.065 94 878.4461 20.71596 *
## 3) lstat< 9.63 150 11988.5400 30.26067
## 6) rm< 7.437 128 5928.4870 27.82031
## 12) dis>=1.9704 117 2966.5370 26.66154
## 24) rm< 6.722 74 882.5059 23.92703 *
## 25) rm>=6.722 43 578.4344 31.36744 *
## 13) dis< 1.9704 11 1133.8470 40.14545 *
## 7) rm>=7.437 22 862.6732 44.45909 *
plot(boston.rpart)
text(boston.rpart)
boston.train.pred.tree = predict(boston.rpart)
boston.test.pred.tree = predict(boston.rpart, Boston_Test)
mean((boston.test.pred.tree - Boston_Test$medv)^2)
## [1] 15.30359
boston.reg = lm(medv ~ lstat + ptratio + rm + dis + nox + black + zn + crim + rad + tax + chas, data = Boston_Train)
boston.test.pred.reg = predict(boston.reg, Boston_Test)
mean((boston.test.pred.reg - Boston_Test$medv)^2)
## [1] 14.90817