Question 1: Working with Boston Dataset:

Using the Boston dataset, fit classification models in order to predict whether a given suburb has a crime rate above or below the median. Explore logistic regression, LDA, naive Bayes, and KNN models using various subsets of the predictors. Describe your findings.

boston <- Boston

Explore Descriptive Data

boston <- boston %>%
    mutate(chas = factor(chas),
           crime_factor = factor(ifelse(crim > median(crim), 
                                              'High', 'Low'), 
                                       levels = c('High', 'Low')))
kbl(boston, caption = "Boston data with classification by crime rate factor")%>%
  row_spec(row =0, bold= TRUE, color = "black", background = "#F9EBEA") %>%
  kable_styling(bootstrap_options = "striped", full_width = F, position = "center")
Boston data with classification by crime rate factor
crim zn indus chas nox rm age dis rad tax ptratio black lstat medv crime_factor
0.00632 18.0 2.31 0 0.5380 6.575 65.2 4.0900 1 296 15.3 396.90 4.98 24.0 Low
0.02731 0.0 7.07 0 0.4690 6.421 78.9 4.9671 2 242 17.8 396.90 9.14 21.6 Low
0.02729 0.0 7.07 0 0.4690 7.185 61.1 4.9671 2 242 17.8 392.83 4.03 34.7 Low
0.03237 0.0 2.18 0 0.4580 6.998 45.8 6.0622 3 222 18.7 394.63 2.94 33.4 Low
0.06905 0.0 2.18 0 0.4580 7.147 54.2 6.0622 3 222 18.7 396.90 5.33 36.2 Low
0.02985 0.0 2.18 0 0.4580 6.430 58.7 6.0622 3 222 18.7 394.12 5.21 28.7 Low
0.08829 12.5 7.87 0 0.5240 6.012 66.6 5.5605 5 311 15.2 395.60 12.43 22.9 Low
0.14455 12.5 7.87 0 0.5240 6.172 96.1 5.9505 5 311 15.2 396.90 19.15 27.1 Low
0.21124 12.5 7.87 0 0.5240 5.631 100.0 6.0821 5 311 15.2 386.63 29.93 16.5 Low
0.17004 12.5 7.87 0 0.5240 6.004 85.9 6.5921 5 311 15.2 386.71 17.10 18.9 Low
0.22489 12.5 7.87 0 0.5240 6.377 94.3 6.3467 5 311 15.2 392.52 20.45 15.0 Low
0.11747 12.5 7.87 0 0.5240 6.009 82.9 6.2267 5 311 15.2 396.90 13.27 18.9 Low
0.09378 12.5 7.87 0 0.5240 5.889 39.0 5.4509 5 311 15.2 390.50 15.71 21.7 Low
0.62976 0.0 8.14 0 0.5380 5.949 61.8 4.7075 4 307 21.0 396.90 8.26 20.4 High
0.63796 0.0 8.14 0 0.5380 6.096 84.5 4.4619 4 307 21.0 380.02 10.26 18.2 High
0.62739 0.0 8.14 0 0.5380 5.834 56.5 4.4986 4 307 21.0 395.62 8.47 19.9 High
1.05393 0.0 8.14 0 0.5380 5.935 29.3 4.4986 4 307 21.0 386.85 6.58 23.1 High
0.78420 0.0 8.14 0 0.5380 5.990 81.7 4.2579 4 307 21.0 386.75 14.67 17.5 High
0.80271 0.0 8.14 0 0.5380 5.456 36.6 3.7965 4 307 21.0 288.99 11.69 20.2 High
0.72580 0.0 8.14 0 0.5380 5.727 69.5 3.7965 4 307 21.0 390.95 11.28 18.2 High
1.25179 0.0 8.14 0 0.5380 5.570 98.1 3.7979 4 307 21.0 376.57 21.02 13.6 High
0.85204 0.0 8.14 0 0.5380 5.965 89.2 4.0123 4 307 21.0 392.53 13.83 19.6 High
1.23247 0.0 8.14 0 0.5380 6.142 91.7 3.9769 4 307 21.0 396.90 18.72 15.2 High
0.98843 0.0 8.14 0 0.5380 5.813 100.0 4.0952 4 307 21.0 394.54 19.88 14.5 High
0.75026 0.0 8.14 0 0.5380 5.924 94.1 4.3996 4 307 21.0 394.33 16.30 15.6 High
0.84054 0.0 8.14 0 0.5380 5.599 85.7 4.4546 4 307 21.0 303.42 16.51 13.9 High
0.67191 0.0 8.14 0 0.5380 5.813 90.3 4.6820 4 307 21.0 376.88 14.81 16.6 High
0.95577 0.0 8.14 0 0.5380 6.047 88.8 4.4534 4 307 21.0 306.38 17.28 14.8 High
0.77299 0.0 8.14 0 0.5380 6.495 94.4 4.4547 4 307 21.0 387.94 12.80 18.4 High
1.00245 0.0 8.14 0 0.5380 6.674 87.3 4.2390 4 307 21.0 380.23 11.98 21.0 High
1.13081 0.0 8.14 0 0.5380 5.713 94.1 4.2330 4 307 21.0 360.17 22.60 12.7 High
1.35472 0.0 8.14 0 0.5380 6.072 100.0 4.1750 4 307 21.0 376.73 13.04 14.5 High
1.38799 0.0 8.14 0 0.5380 5.950 82.0 3.9900 4 307 21.0 232.60 27.71 13.2 High
1.15172 0.0 8.14 0 0.5380 5.701 95.0 3.7872 4 307 21.0 358.77 18.35 13.1 High
1.61282 0.0 8.14 0 0.5380 6.096 96.9 3.7598 4 307 21.0 248.31 20.34 13.5 High
0.06417 0.0 5.96 0 0.4990 5.933 68.2 3.3603 5 279 19.2 396.90 9.68 18.9 Low
0.09744 0.0 5.96 0 0.4990 5.841 61.4 3.3779 5 279 19.2 377.56 11.41 20.0 Low
0.08014 0.0 5.96 0 0.4990 5.850 41.5 3.9342 5 279 19.2 396.90 8.77 21.0 Low
0.17505 0.0 5.96 0 0.4990 5.966 30.2 3.8473 5 279 19.2 393.43 10.13 24.7 Low
0.02763 75.0 2.95 0 0.4280 6.595 21.8 5.4011 3 252 18.3 395.63 4.32 30.8 Low
0.03359 75.0 2.95 0 0.4280 7.024 15.8 5.4011 3 252 18.3 395.62 1.98 34.9 Low
0.12744 0.0 6.91 0 0.4480 6.770 2.9 5.7209 3 233 17.9 385.41 4.84 26.6 Low
0.14150 0.0 6.91 0 0.4480 6.169 6.6 5.7209 3 233 17.9 383.37 5.81 25.3 Low
0.15936 0.0 6.91 0 0.4480 6.211 6.5 5.7209 3 233 17.9 394.46 7.44 24.7 Low
0.12269 0.0 6.91 0 0.4480 6.069 40.0 5.7209 3 233 17.9 389.39 9.55 21.2 Low
0.17142 0.0 6.91 0 0.4480 5.682 33.8 5.1004 3 233 17.9 396.90 10.21 19.3 Low
0.18836 0.0 6.91 0 0.4480 5.786 33.3 5.1004 3 233 17.9 396.90 14.15 20.0 Low
0.22927 0.0 6.91 0 0.4480 6.030 85.5 5.6894 3 233 17.9 392.74 18.80 16.6 Low
0.25387 0.0 6.91 0 0.4480 5.399 95.3 5.8700 3 233 17.9 396.90 30.81 14.4 Low
0.21977 0.0 6.91 0 0.4480 5.602 62.0 6.0877 3 233 17.9 396.90 16.20 19.4 Low
0.08873 21.0 5.64 0 0.4390 5.963 45.7 6.8147 4 243 16.8 395.56 13.45 19.7 Low
0.04337 21.0 5.64 0 0.4390 6.115 63.0 6.8147 4 243 16.8 393.97 9.43 20.5 Low
0.05360 21.0 5.64 0 0.4390 6.511 21.1 6.8147 4 243 16.8 396.90 5.28 25.0 Low
0.04981 21.0 5.64 0 0.4390 5.998 21.4 6.8147 4 243 16.8 396.90 8.43 23.4 Low
0.01360 75.0 4.00 0 0.4100 5.888 47.6 7.3197 3 469 21.1 396.90 14.80 18.9 Low
0.01311 90.0 1.22 0 0.4030 7.249 21.9 8.6966 5 226 17.9 395.93 4.81 35.4 Low
0.02055 85.0 0.74 0 0.4100 6.383 35.7 9.1876 2 313 17.3 396.90 5.77 24.7 Low
0.01432 100.0 1.32 0 0.4110 6.816 40.5 8.3248 5 256 15.1 392.90 3.95 31.6 Low
0.15445 25.0 5.13 0 0.4530 6.145 29.2 7.8148 8 284 19.7 390.68 6.86 23.3 Low
0.10328 25.0 5.13 0 0.4530 5.927 47.2 6.9320 8 284 19.7 396.90 9.22 19.6 Low
0.14932 25.0 5.13 0 0.4530 5.741 66.2 7.2254 8 284 19.7 395.11 13.15 18.7 Low
0.17171 25.0 5.13 0 0.4530 5.966 93.4 6.8185 8 284 19.7 378.08 14.44 16.0 Low
0.11027 25.0 5.13 0 0.4530 6.456 67.8 7.2255 8 284 19.7 396.90 6.73 22.2 Low
0.12650 25.0 5.13 0 0.4530 6.762 43.4 7.9809 8 284 19.7 395.58 9.50 25.0 Low
0.01951 17.5 1.38 0 0.4161 7.104 59.5 9.2229 3 216 18.6 393.24 8.05 33.0 Low
0.03584 80.0 3.37 0 0.3980 6.290 17.8 6.6115 4 337 16.1 396.90 4.67 23.5 Low
0.04379 80.0 3.37 0 0.3980 5.787 31.1 6.6115 4 337 16.1 396.90 10.24 19.4 Low
0.05789 12.5 6.07 0 0.4090 5.878 21.4 6.4980 4 345 18.9 396.21 8.10 22.0 Low
0.13554 12.5 6.07 0 0.4090 5.594 36.8 6.4980 4 345 18.9 396.90 13.09 17.4 Low
0.12816 12.5 6.07 0 0.4090 5.885 33.0 6.4980 4 345 18.9 396.90 8.79 20.9 Low
0.08826 0.0 10.81 0 0.4130 6.417 6.6 5.2873 4 305 19.2 383.73 6.72 24.2 Low
0.15876 0.0 10.81 0 0.4130 5.961 17.5 5.2873 4 305 19.2 376.94 9.88 21.7 Low
0.09164 0.0 10.81 0 0.4130 6.065 7.8 5.2873 4 305 19.2 390.91 5.52 22.8 Low
0.19539 0.0 10.81 0 0.4130 6.245 6.2 5.2873 4 305 19.2 377.17 7.54 23.4 Low
0.07896 0.0 12.83 0 0.4370 6.273 6.0 4.2515 5 398 18.7 394.92 6.78 24.1 Low
0.09512 0.0 12.83 0 0.4370 6.286 45.0 4.5026 5 398 18.7 383.23 8.94 21.4 Low
0.10153 0.0 12.83 0 0.4370 6.279 74.5 4.0522 5 398 18.7 373.66 11.97 20.0 Low
0.08707 0.0 12.83 0 0.4370 6.140 45.8 4.0905 5 398 18.7 386.96 10.27 20.8 Low
0.05646 0.0 12.83 0 0.4370 6.232 53.7 5.0141 5 398 18.7 386.40 12.34 21.2 Low
0.08387 0.0 12.83 0 0.4370 5.874 36.6 4.5026 5 398 18.7 396.06 9.10 20.3 Low
0.04113 25.0 4.86 0 0.4260 6.727 33.5 5.4007 4 281 19.0 396.90 5.29 28.0 Low
0.04462 25.0 4.86 0 0.4260 6.619 70.4 5.4007 4 281 19.0 395.63 7.22 23.9 Low
0.03659 25.0 4.86 0 0.4260 6.302 32.2 5.4007 4 281 19.0 396.90 6.72 24.8 Low
0.03551 25.0 4.86 0 0.4260 6.167 46.7 5.4007 4 281 19.0 390.64 7.51 22.9 Low
0.05059 0.0 4.49 0 0.4490 6.389 48.0 4.7794 3 247 18.5 396.90 9.62 23.9 Low
0.05735 0.0 4.49 0 0.4490 6.630 56.1 4.4377 3 247 18.5 392.30 6.53 26.6 Low
0.05188 0.0 4.49 0 0.4490 6.015 45.1 4.4272 3 247 18.5 395.99 12.86 22.5 Low
0.07151 0.0 4.49 0 0.4490 6.121 56.8 3.7476 3 247 18.5 395.15 8.44 22.2 Low
0.05660 0.0 3.41 0 0.4890 7.007 86.3 3.4217 2 270 17.8 396.90 5.50 23.6 Low
0.05302 0.0 3.41 0 0.4890 7.079 63.1 3.4145 2 270 17.8 396.06 5.70 28.7 Low
0.04684 0.0 3.41 0 0.4890 6.417 66.1 3.0923 2 270 17.8 392.18 8.81 22.6 Low
0.03932 0.0 3.41 0 0.4890 6.405 73.9 3.0921 2 270 17.8 393.55 8.20 22.0 Low
0.04203 28.0 15.04 0 0.4640 6.442 53.6 3.6659 4 270 18.2 395.01 8.16 22.9 Low
0.02875 28.0 15.04 0 0.4640 6.211 28.9 3.6659 4 270 18.2 396.33 6.21 25.0 Low
0.04294 28.0 15.04 0 0.4640 6.249 77.3 3.6150 4 270 18.2 396.90 10.59 20.6 Low
0.12204 0.0 2.89 0 0.4450 6.625 57.8 3.4952 2 276 18.0 357.98 6.65 28.4 Low
0.11504 0.0 2.89 0 0.4450 6.163 69.6 3.4952 2 276 18.0 391.83 11.34 21.4 Low
0.12083 0.0 2.89 0 0.4450 8.069 76.0 3.4952 2 276 18.0 396.90 4.21 38.7 Low
0.08187 0.0 2.89 0 0.4450 7.820 36.9 3.4952 2 276 18.0 393.53 3.57 43.8 Low
0.06860 0.0 2.89 0 0.4450 7.416 62.5 3.4952 2 276 18.0 396.90 6.19 33.2 Low
0.14866 0.0 8.56 0 0.5200 6.727 79.9 2.7778 5 384 20.9 394.76 9.42 27.5 Low
0.11432 0.0 8.56 0 0.5200 6.781 71.3 2.8561 5 384 20.9 395.58 7.67 26.5 Low
0.22876 0.0 8.56 0 0.5200 6.405 85.4 2.7147 5 384 20.9 70.80 10.63 18.6 Low
0.21161 0.0 8.56 0 0.5200 6.137 87.4 2.7147 5 384 20.9 394.47 13.44 19.3 Low
0.13960 0.0 8.56 0 0.5200 6.167 90.0 2.4210 5 384 20.9 392.69 12.33 20.1 Low
0.13262 0.0 8.56 0 0.5200 5.851 96.7 2.1069 5 384 20.9 394.05 16.47 19.5 Low
0.17120 0.0 8.56 0 0.5200 5.836 91.9 2.2110 5 384 20.9 395.67 18.66 19.5 Low
0.13117 0.0 8.56 0 0.5200 6.127 85.2 2.1224 5 384 20.9 387.69 14.09 20.4 Low
0.12802 0.0 8.56 0 0.5200 6.474 97.1 2.4329 5 384 20.9 395.24 12.27 19.8 Low
0.26363 0.0 8.56 0 0.5200 6.229 91.2 2.5451 5 384 20.9 391.23 15.55 19.4 High
0.10793 0.0 8.56 0 0.5200 6.195 54.4 2.7778 5 384 20.9 393.49 13.00 21.7 Low
0.10084 0.0 10.01 0 0.5470 6.715 81.6 2.6775 6 432 17.8 395.59 10.16 22.8 Low
0.12329 0.0 10.01 0 0.5470 5.913 92.9 2.3534 6 432 17.8 394.95 16.21 18.8 Low
0.22212 0.0 10.01 0 0.5470 6.092 95.4 2.5480 6 432 17.8 396.90 17.09 18.7 Low
0.14231 0.0 10.01 0 0.5470 6.254 84.2 2.2565 6 432 17.8 388.74 10.45 18.5 Low
0.17134 0.0 10.01 0 0.5470 5.928 88.2 2.4631 6 432 17.8 344.91 15.76 18.3 Low
0.13158 0.0 10.01 0 0.5470 6.176 72.5 2.7301 6 432 17.8 393.30 12.04 21.2 Low
0.15098 0.0 10.01 0 0.5470 6.021 82.6 2.7474 6 432 17.8 394.51 10.30 19.2 Low
0.13058 0.0 10.01 0 0.5470 5.872 73.1 2.4775 6 432 17.8 338.63 15.37 20.4 Low
0.14476 0.0 10.01 0 0.5470 5.731 65.2 2.7592 6 432 17.8 391.50 13.61 19.3 Low
0.06899 0.0 25.65 0 0.5810 5.870 69.7 2.2577 2 188 19.1 389.15 14.37 22.0 Low
0.07165 0.0 25.65 0 0.5810 6.004 84.1 2.1974 2 188 19.1 377.67 14.27 20.3 Low
0.09299 0.0 25.65 0 0.5810 5.961 92.9 2.0869 2 188 19.1 378.09 17.93 20.5 Low
0.15038 0.0 25.65 0 0.5810 5.856 97.0 1.9444 2 188 19.1 370.31 25.41 17.3 Low
0.09849 0.0 25.65 0 0.5810 5.879 95.8 2.0063 2 188 19.1 379.38 17.58 18.8 Low
0.16902 0.0 25.65 0 0.5810 5.986 88.4 1.9929 2 188 19.1 385.02 14.81 21.4 Low
0.38735 0.0 25.65 0 0.5810 5.613 95.6 1.7572 2 188 19.1 359.29 27.26 15.7 High
0.25915 0.0 21.89 0 0.6240 5.693 96.0 1.7883 4 437 21.2 392.11 17.19 16.2 High
0.32543 0.0 21.89 0 0.6240 6.431 98.8 1.8125 4 437 21.2 396.90 15.39 18.0 High
0.88125 0.0 21.89 0 0.6240 5.637 94.7 1.9799 4 437 21.2 396.90 18.34 14.3 High
0.34006 0.0 21.89 0 0.6240 6.458 98.9 2.1185 4 437 21.2 395.04 12.60 19.2 High
1.19294 0.0 21.89 0 0.6240 6.326 97.7 2.2710 4 437 21.2 396.90 12.26 19.6 High
0.59005 0.0 21.89 0 0.6240 6.372 97.9 2.3274 4 437 21.2 385.76 11.12 23.0 High
0.32982 0.0 21.89 0 0.6240 5.822 95.4 2.4699 4 437 21.2 388.69 15.03 18.4 High
0.97617 0.0 21.89 0 0.6240 5.757 98.4 2.3460 4 437 21.2 262.76 17.31 15.6 High
0.55778 0.0 21.89 0 0.6240 6.335 98.2 2.1107 4 437 21.2 394.67 16.96 18.1 High
0.32264 0.0 21.89 0 0.6240 5.942 93.5 1.9669 4 437 21.2 378.25 16.90 17.4 High
0.35233 0.0 21.89 0 0.6240 6.454 98.4 1.8498 4 437 21.2 394.08 14.59 17.1 High
0.24980 0.0 21.89 0 0.6240 5.857 98.2 1.6686 4 437 21.2 392.04 21.32 13.3 Low
0.54452 0.0 21.89 0 0.6240 6.151 97.9 1.6687 4 437 21.2 396.90 18.46 17.8 High
0.29090 0.0 21.89 0 0.6240 6.174 93.6 1.6119 4 437 21.2 388.08 24.16 14.0 High
1.62864 0.0 21.89 0 0.6240 5.019 100.0 1.4394 4 437 21.2 396.90 34.41 14.4 High
3.32105 0.0 19.58 1 0.8710 5.403 100.0 1.3216 5 403 14.7 396.90 26.82 13.4 High
4.09740 0.0 19.58 0 0.8710 5.468 100.0 1.4118 5 403 14.7 396.90 26.42 15.6 High
2.77974 0.0 19.58 0 0.8710 4.903 97.8 1.3459 5 403 14.7 396.90 29.29 11.8 High
2.37934 0.0 19.58 0 0.8710 6.130 100.0 1.4191 5 403 14.7 172.91 27.80 13.8 High
2.15505 0.0 19.58 0 0.8710 5.628 100.0 1.5166 5 403 14.7 169.27 16.65 15.6 High
2.36862 0.0 19.58 0 0.8710 4.926 95.7 1.4608 5 403 14.7 391.71 29.53 14.6 High
2.33099 0.0 19.58 0 0.8710 5.186 93.8 1.5296 5 403 14.7 356.99 28.32 17.8 High
2.73397 0.0 19.58 0 0.8710 5.597 94.9 1.5257 5 403 14.7 351.85 21.45 15.4 High
1.65660 0.0 19.58 0 0.8710 6.122 97.3 1.6180 5 403 14.7 372.80 14.10 21.5 High
1.49632 0.0 19.58 0 0.8710 5.404 100.0 1.5916 5 403 14.7 341.60 13.28 19.6 High
1.12658 0.0 19.58 1 0.8710 5.012 88.0 1.6102 5 403 14.7 343.28 12.12 15.3 High
2.14918 0.0 19.58 0 0.8710 5.709 98.5 1.6232 5 403 14.7 261.95 15.79 19.4 High
1.41385 0.0 19.58 1 0.8710 6.129 96.0 1.7494 5 403 14.7 321.02 15.12 17.0 High
3.53501 0.0 19.58 1 0.8710 6.152 82.6 1.7455 5 403 14.7 88.01 15.02 15.6 High
2.44668 0.0 19.58 0 0.8710 5.272 94.0 1.7364 5 403 14.7 88.63 16.14 13.1 High
1.22358 0.0 19.58 0 0.6050 6.943 97.4 1.8773 5 403 14.7 363.43 4.59 41.3 High
1.34284 0.0 19.58 0 0.6050 6.066 100.0 1.7573 5 403 14.7 353.89 6.43 24.3 High
1.42502 0.0 19.58 0 0.8710 6.510 100.0 1.7659 5 403 14.7 364.31 7.39 23.3 High
1.27346 0.0 19.58 1 0.6050 6.250 92.6 1.7984 5 403 14.7 338.92 5.50 27.0 High
1.46336 0.0 19.58 0 0.6050 7.489 90.8 1.9709 5 403 14.7 374.43 1.73 50.0 High
1.83377 0.0 19.58 1 0.6050 7.802 98.2 2.0407 5 403 14.7 389.61 1.92 50.0 High
1.51902 0.0 19.58 1 0.6050 8.375 93.9 2.1620 5 403 14.7 388.45 3.32 50.0 High
2.24236 0.0 19.58 0 0.6050 5.854 91.8 2.4220 5 403 14.7 395.11 11.64 22.7 High
2.92400 0.0 19.58 0 0.6050 6.101 93.0 2.2834 5 403 14.7 240.16 9.81 25.0 High
2.01019 0.0 19.58 0 0.6050 7.929 96.2 2.0459 5 403 14.7 369.30 3.70 50.0 High
1.80028 0.0 19.58 0 0.6050 5.877 79.2 2.4259 5 403 14.7 227.61 12.14 23.8 High
2.30040 0.0 19.58 0 0.6050 6.319 96.1 2.1000 5 403 14.7 297.09 11.10 23.8 High
2.44953 0.0 19.58 0 0.6050 6.402 95.2 2.2625 5 403 14.7 330.04 11.32 22.3 High
1.20742 0.0 19.58 0 0.6050 5.875 94.6 2.4259 5 403 14.7 292.29 14.43 17.4 High
2.31390 0.0 19.58 0 0.6050 5.880 97.3 2.3887 5 403 14.7 348.13 12.03 19.1 High
0.13914 0.0 4.05 0 0.5100 5.572 88.5 2.5961 5 296 16.6 396.90 14.69 23.1 Low
0.09178 0.0 4.05 0 0.5100 6.416 84.1 2.6463 5 296 16.6 395.50 9.04 23.6 Low
0.08447 0.0 4.05 0 0.5100 5.859 68.7 2.7019 5 296 16.6 393.23 9.64 22.6 Low
0.06664 0.0 4.05 0 0.5100 6.546 33.1 3.1323 5 296 16.6 390.96 5.33 29.4 Low
0.07022 0.0 4.05 0 0.5100 6.020 47.2 3.5549 5 296 16.6 393.23 10.11 23.2 Low
0.05425 0.0 4.05 0 0.5100 6.315 73.4 3.3175 5 296 16.6 395.60 6.29 24.6 Low
0.06642 0.0 4.05 0 0.5100 6.860 74.4 2.9153 5 296 16.6 391.27 6.92 29.9 Low
0.05780 0.0 2.46 0 0.4880 6.980 58.4 2.8290 3 193 17.8 396.90 5.04 37.2 Low
0.06588 0.0 2.46 0 0.4880 7.765 83.3 2.7410 3 193 17.8 395.56 7.56 39.8 Low
0.06888 0.0 2.46 0 0.4880 6.144 62.2 2.5979 3 193 17.8 396.90 9.45 36.2 Low
0.09103 0.0 2.46 0 0.4880 7.155 92.2 2.7006 3 193 17.8 394.12 4.82 37.9 Low
0.10008 0.0 2.46 0 0.4880 6.563 95.6 2.8470 3 193 17.8 396.90 5.68 32.5 Low
0.08308 0.0 2.46 0 0.4880 5.604 89.8 2.9879 3 193 17.8 391.00 13.98 26.4 Low
0.06047 0.0 2.46 0 0.4880 6.153 68.8 3.2797 3 193 17.8 387.11 13.15 29.6 Low
0.05602 0.0 2.46 0 0.4880 7.831 53.6 3.1992 3 193 17.8 392.63 4.45 50.0 Low
0.07875 45.0 3.44 0 0.4370 6.782 41.1 3.7886 5 398 15.2 393.87 6.68 32.0 Low
0.12579 45.0 3.44 0 0.4370 6.556 29.1 4.5667 5 398 15.2 382.84 4.56 29.8 Low
0.08370 45.0 3.44 0 0.4370 7.185 38.9 4.5667 5 398 15.2 396.90 5.39 34.9 Low
0.09068 45.0 3.44 0 0.4370 6.951 21.5 6.4798 5 398 15.2 377.68 5.10 37.0 Low
0.06911 45.0 3.44 0 0.4370 6.739 30.8 6.4798 5 398 15.2 389.71 4.69 30.5 Low
0.08664 45.0 3.44 0 0.4370 7.178 26.3 6.4798 5 398 15.2 390.49 2.87 36.4 Low
0.02187 60.0 2.93 0 0.4010 6.800 9.9 6.2196 1 265 15.6 393.37 5.03 31.1 Low
0.01439 60.0 2.93 0 0.4010 6.604 18.8 6.2196 1 265 15.6 376.70 4.38 29.1 Low
0.01381 80.0 0.46 0 0.4220 7.875 32.0 5.6484 4 255 14.4 394.23 2.97 50.0 Low
0.04011 80.0 1.52 0 0.4040 7.287 34.1 7.3090 2 329 12.6 396.90 4.08 33.3 Low
0.04666 80.0 1.52 0 0.4040 7.107 36.6 7.3090 2 329 12.6 354.31 8.61 30.3 Low
0.03768 80.0 1.52 0 0.4040 7.274 38.3 7.3090 2 329 12.6 392.20 6.62 34.6 Low
0.03150 95.0 1.47 0 0.4030 6.975 15.3 7.6534 3 402 17.0 396.90 4.56 34.9 Low
0.01778 95.0 1.47 0 0.4030 7.135 13.9 7.6534 3 402 17.0 384.30 4.45 32.9 Low
0.03445 82.5 2.03 0 0.4150 6.162 38.4 6.2700 2 348 14.7 393.77 7.43 24.1 Low
0.02177 82.5 2.03 0 0.4150 7.610 15.7 6.2700 2 348 14.7 395.38 3.11 42.3 Low
0.03510 95.0 2.68 0 0.4161 7.853 33.2 5.1180 4 224 14.7 392.78 3.81 48.5 Low
0.02009 95.0 2.68 0 0.4161 8.034 31.9 5.1180 4 224 14.7 390.55 2.88 50.0 Low
0.13642 0.0 10.59 0 0.4890 5.891 22.3 3.9454 4 277 18.6 396.90 10.87 22.6 Low
0.22969 0.0 10.59 0 0.4890 6.326 52.5 4.3549 4 277 18.6 394.87 10.97 24.4 Low
0.25199 0.0 10.59 0 0.4890 5.783 72.7 4.3549 4 277 18.6 389.43 18.06 22.5 Low
0.13587 0.0 10.59 1 0.4890 6.064 59.1 4.2392 4 277 18.6 381.32 14.66 24.4 Low
0.43571 0.0 10.59 1 0.4890 5.344 100.0 3.8750 4 277 18.6 396.90 23.09 20.0 High
0.17446 0.0 10.59 1 0.4890 5.960 92.1 3.8771 4 277 18.6 393.25 17.27 21.7 Low
0.37578 0.0 10.59 1 0.4890 5.404 88.6 3.6650 4 277 18.6 395.24 23.98 19.3 High
0.21719 0.0 10.59 1 0.4890 5.807 53.8 3.6526 4 277 18.6 390.94 16.03 22.4 Low
0.14052 0.0 10.59 0 0.4890 6.375 32.3 3.9454 4 277 18.6 385.81 9.38 28.1 Low
0.28955 0.0 10.59 0 0.4890 5.412 9.8 3.5875 4 277 18.6 348.93 29.55 23.7 High
0.19802 0.0 10.59 0 0.4890 6.182 42.4 3.9454 4 277 18.6 393.63 9.47 25.0 Low
0.04560 0.0 13.89 1 0.5500 5.888 56.0 3.1121 5 276 16.4 392.80 13.51 23.3 Low
0.07013 0.0 13.89 0 0.5500 6.642 85.1 3.4211 5 276 16.4 392.78 9.69 28.7 Low
0.11069 0.0 13.89 1 0.5500 5.951 93.8 2.8893 5 276 16.4 396.90 17.92 21.5 Low
0.11425 0.0 13.89 1 0.5500 6.373 92.4 3.3633 5 276 16.4 393.74 10.50 23.0 Low
0.35809 0.0 6.20 1 0.5070 6.951 88.5 2.8617 8 307 17.4 391.70 9.71 26.7 High
0.40771 0.0 6.20 1 0.5070 6.164 91.3 3.0480 8 307 17.4 395.24 21.46 21.7 High
0.62356 0.0 6.20 1 0.5070 6.879 77.7 3.2721 8 307 17.4 390.39 9.93 27.5 High
0.61470 0.0 6.20 0 0.5070 6.618 80.8 3.2721 8 307 17.4 396.90 7.60 30.1 High
0.31533 0.0 6.20 0 0.5040 8.266 78.3 2.8944 8 307 17.4 385.05 4.14 44.8 High
0.52693 0.0 6.20 0 0.5040 8.725 83.0 2.8944 8 307 17.4 382.00 4.63 50.0 High
0.38214 0.0 6.20 0 0.5040 8.040 86.5 3.2157 8 307 17.4 387.38 3.13 37.6 High
0.41238 0.0 6.20 0 0.5040 7.163 79.9 3.2157 8 307 17.4 372.08 6.36 31.6 High
0.29819 0.0 6.20 0 0.5040 7.686 17.0 3.3751 8 307 17.4 377.51 3.92 46.7 High
0.44178 0.0 6.20 0 0.5040 6.552 21.4 3.3751 8 307 17.4 380.34 3.76 31.5 High
0.53700 0.0 6.20 0 0.5040 5.981 68.1 3.6715 8 307 17.4 378.35 11.65 24.3 High
0.46296 0.0 6.20 0 0.5040 7.412 76.9 3.6715 8 307 17.4 376.14 5.25 31.7 High
0.57529 0.0 6.20 0 0.5070 8.337 73.3 3.8384 8 307 17.4 385.91 2.47 41.7 High
0.33147 0.0 6.20 0 0.5070 8.247 70.4 3.6519 8 307 17.4 378.95 3.95 48.3 High
0.44791 0.0 6.20 1 0.5070 6.726 66.5 3.6519 8 307 17.4 360.20 8.05 29.0 High
0.33045 0.0 6.20 0 0.5070 6.086 61.5 3.6519 8 307 17.4 376.75 10.88 24.0 High
0.52058 0.0 6.20 1 0.5070 6.631 76.5 4.1480 8 307 17.4 388.45 9.54 25.1 High
0.51183 0.0 6.20 0 0.5070 7.358 71.6 4.1480 8 307 17.4 390.07 4.73 31.5 High
0.08244 30.0 4.93 0 0.4280 6.481 18.5 6.1899 6 300 16.6 379.41 6.36 23.7 Low
0.09252 30.0 4.93 0 0.4280 6.606 42.2 6.1899 6 300 16.6 383.78 7.37 23.3 Low
0.11329 30.0 4.93 0 0.4280 6.897 54.3 6.3361 6 300 16.6 391.25 11.38 22.0 Low
0.10612 30.0 4.93 0 0.4280 6.095 65.1 6.3361 6 300 16.6 394.62 12.40 20.1 Low
0.10290 30.0 4.93 0 0.4280 6.358 52.9 7.0355 6 300 16.6 372.75 11.22 22.2 Low
0.12757 30.0 4.93 0 0.4280 6.393 7.8 7.0355 6 300 16.6 374.71 5.19 23.7 Low
0.20608 22.0 5.86 0 0.4310 5.593 76.5 7.9549 7 330 19.1 372.49 12.50 17.6 Low
0.19133 22.0 5.86 0 0.4310 5.605 70.2 7.9549 7 330 19.1 389.13 18.46 18.5 Low
0.33983 22.0 5.86 0 0.4310 6.108 34.9 8.0555 7 330 19.1 390.18 9.16 24.3 High
0.19657 22.0 5.86 0 0.4310 6.226 79.2 8.0555 7 330 19.1 376.14 10.15 20.5 Low
0.16439 22.0 5.86 0 0.4310 6.433 49.1 7.8265 7 330 19.1 374.71 9.52 24.5 Low
0.19073 22.0 5.86 0 0.4310 6.718 17.5 7.8265 7 330 19.1 393.74 6.56 26.2 Low
0.14030 22.0 5.86 0 0.4310 6.487 13.0 7.3967 7 330 19.1 396.28 5.90 24.4 Low
0.21409 22.0 5.86 0 0.4310 6.438 8.9 7.3967 7 330 19.1 377.07 3.59 24.8 Low
0.08221 22.0 5.86 0 0.4310 6.957 6.8 8.9067 7 330 19.1 386.09 3.53 29.6 Low
0.36894 22.0 5.86 0 0.4310 8.259 8.4 8.9067 7 330 19.1 396.90 3.54 42.8 High
0.04819 80.0 3.64 0 0.3920 6.108 32.0 9.2203 1 315 16.4 392.89 6.57 21.9 Low
0.03548 80.0 3.64 0 0.3920 5.876 19.1 9.2203 1 315 16.4 395.18 9.25 20.9 Low
0.01538 90.0 3.75 0 0.3940 7.454 34.2 6.3361 3 244 15.9 386.34 3.11 44.0 Low
0.61154 20.0 3.97 0 0.6470 8.704 86.9 1.8010 5 264 13.0 389.70 5.12 50.0 High
0.66351 20.0 3.97 0 0.6470 7.333 100.0 1.8946 5 264 13.0 383.29 7.79 36.0 High
0.65665 20.0 3.97 0 0.6470 6.842 100.0 2.0107 5 264 13.0 391.93 6.90 30.1 High
0.54011 20.0 3.97 0 0.6470 7.203 81.8 2.1121 5 264 13.0 392.80 9.59 33.8 High
0.53412 20.0 3.97 0 0.6470 7.520 89.4 2.1398 5 264 13.0 388.37 7.26 43.1 High
0.52014 20.0 3.97 0 0.6470 8.398 91.5 2.2885 5 264 13.0 386.86 5.91 48.8 High
0.82526 20.0 3.97 0 0.6470 7.327 94.5 2.0788 5 264 13.0 393.42 11.25 31.0 High
0.55007 20.0 3.97 0 0.6470 7.206 91.6 1.9301 5 264 13.0 387.89 8.10 36.5 High
0.76162 20.0 3.97 0 0.6470 5.560 62.8 1.9865 5 264 13.0 392.40 10.45 22.8 High
0.78570 20.0 3.97 0 0.6470 7.014 84.6 2.1329 5 264 13.0 384.07 14.79 30.7 High
0.57834 20.0 3.97 0 0.5750 8.297 67.0 2.4216 5 264 13.0 384.54 7.44 50.0 High
0.54050 20.0 3.97 0 0.5750 7.470 52.6 2.8720 5 264 13.0 390.30 3.16 43.5 High
0.09065 20.0 6.96 1 0.4640 5.920 61.5 3.9175 3 223 18.6 391.34 13.65 20.7 Low
0.29916 20.0 6.96 0 0.4640 5.856 42.1 4.4290 3 223 18.6 388.65 13.00 21.1 High
0.16211 20.0 6.96 0 0.4640 6.240 16.3 4.4290 3 223 18.6 396.90 6.59 25.2 Low
0.11460 20.0 6.96 0 0.4640 6.538 58.7 3.9175 3 223 18.6 394.96 7.73 24.4 Low
0.22188 20.0 6.96 1 0.4640 7.691 51.8 4.3665 3 223 18.6 390.77 6.58 35.2 Low
0.05644 40.0 6.41 1 0.4470 6.758 32.9 4.0776 4 254 17.6 396.90 3.53 32.4 Low
0.09604 40.0 6.41 0 0.4470 6.854 42.8 4.2673 4 254 17.6 396.90 2.98 32.0 Low
0.10469 40.0 6.41 1 0.4470 7.267 49.0 4.7872 4 254 17.6 389.25 6.05 33.2 Low
0.06127 40.0 6.41 1 0.4470 6.826 27.6 4.8628 4 254 17.6 393.45 4.16 33.1 Low
0.07978 40.0 6.41 0 0.4470 6.482 32.1 4.1403 4 254 17.6 396.90 7.19 29.1 Low
0.21038 20.0 3.33 0 0.4429 6.812 32.2 4.1007 5 216 14.9 396.90 4.85 35.1 Low
0.03578 20.0 3.33 0 0.4429 7.820 64.5 4.6947 5 216 14.9 387.31 3.76 45.4 Low
0.03705 20.0 3.33 0 0.4429 6.968 37.2 5.2447 5 216 14.9 392.23 4.59 35.4 Low
0.06129 20.0 3.33 1 0.4429 7.645 49.7 5.2119 5 216 14.9 377.07 3.01 46.0 Low
0.01501 90.0 1.21 1 0.4010 7.923 24.8 5.8850 1 198 13.6 395.52 3.16 50.0 Low
0.00906 90.0 2.97 0 0.4000 7.088 20.8 7.3073 1 285 15.3 394.72 7.85 32.2 Low
0.01096 55.0 2.25 0 0.3890 6.453 31.9 7.3073 1 300 15.3 394.72 8.23 22.0 Low
0.01965 80.0 1.76 0 0.3850 6.230 31.5 9.0892 1 241 18.2 341.60 12.93 20.1 Low
0.03871 52.5 5.32 0 0.4050 6.209 31.3 7.3172 6 293 16.6 396.90 7.14 23.2 Low
0.04590 52.5 5.32 0 0.4050 6.315 45.6 7.3172 6 293 16.6 396.90 7.60 22.3 Low
0.04297 52.5 5.32 0 0.4050 6.565 22.9 7.3172 6 293 16.6 371.72 9.51 24.8 Low
0.03502 80.0 4.95 0 0.4110 6.861 27.9 5.1167 4 245 19.2 396.90 3.33 28.5 Low
0.07886 80.0 4.95 0 0.4110 7.148 27.7 5.1167 4 245 19.2 396.90 3.56 37.3 Low
0.03615 80.0 4.95 0 0.4110 6.630 23.4 5.1167 4 245 19.2 396.90 4.70 27.9 Low
0.08265 0.0 13.92 0 0.4370 6.127 18.4 5.5027 4 289 16.0 396.90 8.58 23.9 Low
0.08199 0.0 13.92 0 0.4370 6.009 42.3 5.5027 4 289 16.0 396.90 10.40 21.7 Low
0.12932 0.0 13.92 0 0.4370 6.678 31.1 5.9604 4 289 16.0 396.90 6.27 28.6 Low
0.05372 0.0 13.92 0 0.4370 6.549 51.0 5.9604 4 289 16.0 392.85 7.39 27.1 Low
0.14103 0.0 13.92 0 0.4370 5.790 58.0 6.3200 4 289 16.0 396.90 15.84 20.3 Low
0.06466 70.0 2.24 0 0.4000 6.345 20.1 7.8278 5 358 14.8 368.24 4.97 22.5 Low
0.05561 70.0 2.24 0 0.4000 7.041 10.0 7.8278 5 358 14.8 371.58 4.74 29.0 Low
0.04417 70.0 2.24 0 0.4000 6.871 47.4 7.8278 5 358 14.8 390.86 6.07 24.8 Low
0.03537 34.0 6.09 0 0.4330 6.590 40.4 5.4917 7 329 16.1 395.75 9.50 22.0 Low
0.09266 34.0 6.09 0 0.4330 6.495 18.4 5.4917 7 329 16.1 383.61 8.67 26.4 Low
0.10000 34.0 6.09 0 0.4330 6.982 17.7 5.4917 7 329 16.1 390.43 4.86 33.1 Low
0.05515 33.0 2.18 0 0.4720 7.236 41.1 4.0220 7 222 18.4 393.68 6.93 36.1 Low
0.05479 33.0 2.18 0 0.4720 6.616 58.1 3.3700 7 222 18.4 393.36 8.93 28.4 Low
0.07503 33.0 2.18 0 0.4720 7.420 71.9 3.0992 7 222 18.4 396.90 6.47 33.4 Low
0.04932 33.0 2.18 0 0.4720 6.849 70.3 3.1827 7 222 18.4 396.90 7.53 28.2 Low
0.49298 0.0 9.90 0 0.5440 6.635 82.5 3.3175 4 304 18.4 396.90 4.54 22.8 High
0.34940 0.0 9.90 0 0.5440 5.972 76.7 3.1025 4 304 18.4 396.24 9.97 20.3 High
2.63548 0.0 9.90 0 0.5440 4.973 37.8 2.5194 4 304 18.4 350.45 12.64 16.1 High
0.79041 0.0 9.90 0 0.5440 6.122 52.8 2.6403 4 304 18.4 396.90 5.98 22.1 High
0.26169 0.0 9.90 0 0.5440 6.023 90.4 2.8340 4 304 18.4 396.30 11.72 19.4 High
0.26938 0.0 9.90 0 0.5440 6.266 82.8 3.2628 4 304 18.4 393.39 7.90 21.6 High
0.36920 0.0 9.90 0 0.5440 6.567 87.3 3.6023 4 304 18.4 395.69 9.28 23.8 High
0.25356 0.0 9.90 0 0.5440 5.705 77.7 3.9450 4 304 18.4 396.42 11.50 16.2 Low
0.31827 0.0 9.90 0 0.5440 5.914 83.2 3.9986 4 304 18.4 390.70 18.33 17.8 High
0.24522 0.0 9.90 0 0.5440 5.782 71.7 4.0317 4 304 18.4 396.90 15.94 19.8 Low
0.40202 0.0 9.90 0 0.5440 6.382 67.2 3.5325 4 304 18.4 395.21 10.36 23.1 High
0.47547 0.0 9.90 0 0.5440 6.113 58.8 4.0019 4 304 18.4 396.23 12.73 21.0 High
0.16760 0.0 7.38 0 0.4930 6.426 52.3 4.5404 5 287 19.6 396.90 7.20 23.8 Low
0.18159 0.0 7.38 0 0.4930 6.376 54.3 4.5404 5 287 19.6 396.90 6.87 23.1 Low
0.35114 0.0 7.38 0 0.4930 6.041 49.9 4.7211 5 287 19.6 396.90 7.70 20.4 High
0.28392 0.0 7.38 0 0.4930 5.708 74.3 4.7211 5 287 19.6 391.13 11.74 18.5 High
0.34109 0.0 7.38 0 0.4930 6.415 40.1 4.7211 5 287 19.6 396.90 6.12 25.0 High
0.19186 0.0 7.38 0 0.4930 6.431 14.7 5.4159 5 287 19.6 393.68 5.08 24.6 Low
0.30347 0.0 7.38 0 0.4930 6.312 28.9 5.4159 5 287 19.6 396.90 6.15 23.0 High
0.24103 0.0 7.38 0 0.4930 6.083 43.7 5.4159 5 287 19.6 396.90 12.79 22.2 Low
0.06617 0.0 3.24 0 0.4600 5.868 25.8 5.2146 4 430 16.9 382.44 9.97 19.3 Low
0.06724 0.0 3.24 0 0.4600 6.333 17.2 5.2146 4 430 16.9 375.21 7.34 22.6 Low
0.04544 0.0 3.24 0 0.4600 6.144 32.2 5.8736 4 430 16.9 368.57 9.09 19.8 Low
0.05023 35.0 6.06 0 0.4379 5.706 28.4 6.6407 1 304 16.9 394.02 12.43 17.1 Low
0.03466 35.0 6.06 0 0.4379 6.031 23.3 6.6407 1 304 16.9 362.25 7.83 19.4 Low
0.05083 0.0 5.19 0 0.5150 6.316 38.1 6.4584 5 224 20.2 389.71 5.68 22.2 Low
0.03738 0.0 5.19 0 0.5150 6.310 38.5 6.4584 5 224 20.2 389.40 6.75 20.7 Low
0.03961 0.0 5.19 0 0.5150 6.037 34.5 5.9853 5 224 20.2 396.90 8.01 21.1 Low
0.03427 0.0 5.19 0 0.5150 5.869 46.3 5.2311 5 224 20.2 396.90 9.80 19.5 Low
0.03041 0.0 5.19 0 0.5150 5.895 59.6 5.6150 5 224 20.2 394.81 10.56 18.5 Low
0.03306 0.0 5.19 0 0.5150 6.059 37.3 4.8122 5 224 20.2 396.14 8.51 20.6 Low
0.05497 0.0 5.19 0 0.5150 5.985 45.4 4.8122 5 224 20.2 396.90 9.74 19.0 Low
0.06151 0.0 5.19 0 0.5150 5.968 58.5 4.8122 5 224 20.2 396.90 9.29 18.7 Low
0.01301 35.0 1.52 0 0.4420 7.241 49.3 7.0379 1 284 15.5 394.74 5.49 32.7 Low
0.02498 0.0 1.89 0 0.5180 6.540 59.7 6.2669 1 422 15.9 389.96 8.65 16.5 Low
0.02543 55.0 3.78 0 0.4840 6.696 56.4 5.7321 5 370 17.6 396.90 7.18 23.9 Low
0.03049 55.0 3.78 0 0.4840 6.874 28.1 6.4654 5 370 17.6 387.97 4.61 31.2 Low
0.03113 0.0 4.39 0 0.4420 6.014 48.5 8.0136 3 352 18.8 385.64 10.53 17.5 Low
0.06162 0.0 4.39 0 0.4420 5.898 52.3 8.0136 3 352 18.8 364.61 12.67 17.2 Low
0.01870 85.0 4.15 0 0.4290 6.516 27.7 8.5353 4 351 17.9 392.43 6.36 23.1 Low
0.01501 80.0 2.01 0 0.4350 6.635 29.7 8.3440 4 280 17.0 390.94 5.99 24.5 Low
0.02899 40.0 1.25 0 0.4290 6.939 34.5 8.7921 1 335 19.7 389.85 5.89 26.6 Low
0.06211 40.0 1.25 0 0.4290 6.490 44.4 8.7921 1 335 19.7 396.90 5.98 22.9 Low
0.07950 60.0 1.69 0 0.4110 6.579 35.9 10.7103 4 411 18.3 370.78 5.49 24.1 Low
0.07244 60.0 1.69 0 0.4110 5.884 18.5 10.7103 4 411 18.3 392.33 7.79 18.6 Low
0.01709 90.0 2.02 0 0.4100 6.728 36.1 12.1265 5 187 17.0 384.46 4.50 30.1 Low
0.04301 80.0 1.91 0 0.4130 5.663 21.9 10.5857 4 334 22.0 382.80 8.05 18.2 Low
0.10659 80.0 1.91 0 0.4130 5.936 19.5 10.5857 4 334 22.0 376.04 5.57 20.6 Low
8.98296 0.0 18.10 1 0.7700 6.212 97.4 2.1222 24 666 20.2 377.73 17.60 17.8 High
3.84970 0.0 18.10 1 0.7700 6.395 91.0 2.5052 24 666 20.2 391.34 13.27 21.7 High
5.20177 0.0 18.10 1 0.7700 6.127 83.4 2.7227 24 666 20.2 395.43 11.48 22.7 High
4.26131 0.0 18.10 0 0.7700 6.112 81.3 2.5091 24 666 20.2 390.74 12.67 22.6 High
4.54192 0.0 18.10 0 0.7700 6.398 88.0 2.5182 24 666 20.2 374.56 7.79 25.0 High
3.83684 0.0 18.10 0 0.7700 6.251 91.1 2.2955 24 666 20.2 350.65 14.19 19.9 High
3.67822 0.0 18.10 0 0.7700 5.362 96.2 2.1036 24 666 20.2 380.79 10.19 20.8 High
4.22239 0.0 18.10 1 0.7700 5.803 89.0 1.9047 24 666 20.2 353.04 14.64 16.8 High
3.47428 0.0 18.10 1 0.7180 8.780 82.9 1.9047 24 666 20.2 354.55 5.29 21.9 High
4.55587 0.0 18.10 0 0.7180 3.561 87.9 1.6132 24 666 20.2 354.70 7.12 27.5 High
3.69695 0.0 18.10 0 0.7180 4.963 91.4 1.7523 24 666 20.2 316.03 14.00 21.9 High
13.52220 0.0 18.10 0 0.6310 3.863 100.0 1.5106 24 666 20.2 131.42 13.33 23.1 High
4.89822 0.0 18.10 0 0.6310 4.970 100.0 1.3325 24 666 20.2 375.52 3.26 50.0 High
5.66998 0.0 18.10 1 0.6310 6.683 96.8 1.3567 24 666 20.2 375.33 3.73 50.0 High
6.53876 0.0 18.10 1 0.6310 7.016 97.5 1.2024 24 666 20.2 392.05 2.96 50.0 High
9.23230 0.0 18.10 0 0.6310 6.216 100.0 1.1691 24 666 20.2 366.15 9.53 50.0 High
8.26725 0.0 18.10 1 0.6680 5.875 89.6 1.1296 24 666 20.2 347.88 8.88 50.0 High
11.10810 0.0 18.10 0 0.6680 4.906 100.0 1.1742 24 666 20.2 396.90 34.77 13.8 High
18.49820 0.0 18.10 0 0.6680 4.138 100.0 1.1370 24 666 20.2 396.90 37.97 13.8 High
19.60910 0.0 18.10 0 0.6710 7.313 97.9 1.3163 24 666 20.2 396.90 13.44 15.0 High
15.28800 0.0 18.10 0 0.6710 6.649 93.3 1.3449 24 666 20.2 363.02 23.24 13.9 High
9.82349 0.0 18.10 0 0.6710 6.794 98.8 1.3580 24 666 20.2 396.90 21.24 13.3 High
23.64820 0.0 18.10 0 0.6710 6.380 96.2 1.3861 24 666 20.2 396.90 23.69 13.1 High
17.86670 0.0 18.10 0 0.6710 6.223 100.0 1.3861 24 666 20.2 393.74 21.78 10.2 High
88.97620 0.0 18.10 0 0.6710 6.968 91.9 1.4165 24 666 20.2 396.90 17.21 10.4 High
15.87440 0.0 18.10 0 0.6710 6.545 99.1 1.5192 24 666 20.2 396.90 21.08 10.9 High
9.18702 0.0 18.10 0 0.7000 5.536 100.0 1.5804 24 666 20.2 396.90 23.60 11.3 High
7.99248 0.0 18.10 0 0.7000 5.520 100.0 1.5331 24 666 20.2 396.90 24.56 12.3 High
20.08490 0.0 18.10 0 0.7000 4.368 91.2 1.4395 24 666 20.2 285.83 30.63 8.8 High
16.81180 0.0 18.10 0 0.7000 5.277 98.1 1.4261 24 666 20.2 396.90 30.81 7.2 High
24.39380 0.0 18.10 0 0.7000 4.652 100.0 1.4672 24 666 20.2 396.90 28.28 10.5 High
22.59710 0.0 18.10 0 0.7000 5.000 89.5 1.5184 24 666 20.2 396.90 31.99 7.4 High
14.33370 0.0 18.10 0 0.7000 4.880 100.0 1.5895 24 666 20.2 372.92 30.62 10.2 High
8.15174 0.0 18.10 0 0.7000 5.390 98.9 1.7281 24 666 20.2 396.90 20.85 11.5 High
6.96215 0.0 18.10 0 0.7000 5.713 97.0 1.9265 24 666 20.2 394.43 17.11 15.1 High
5.29305 0.0 18.10 0 0.7000 6.051 82.5 2.1678 24 666 20.2 378.38 18.76 23.2 High
11.57790 0.0 18.10 0 0.7000 5.036 97.0 1.7700 24 666 20.2 396.90 25.68 9.7 High
8.64476 0.0 18.10 0 0.6930 6.193 92.6 1.7912 24 666 20.2 396.90 15.17 13.8 High
13.35980 0.0 18.10 0 0.6930 5.887 94.7 1.7821 24 666 20.2 396.90 16.35 12.7 High
8.71675 0.0 18.10 0 0.6930 6.471 98.8 1.7257 24 666 20.2 391.98 17.12 13.1 High
5.87205 0.0 18.10 0 0.6930 6.405 96.0 1.6768 24 666 20.2 396.90 19.37 12.5 High
7.67202 0.0 18.10 0 0.6930 5.747 98.9 1.6334 24 666 20.2 393.10 19.92 8.5 High
38.35180 0.0 18.10 0 0.6930 5.453 100.0 1.4896 24 666 20.2 396.90 30.59 5.0 High
9.91655 0.0 18.10 0 0.6930 5.852 77.8 1.5004 24 666 20.2 338.16 29.97 6.3 High
25.04610 0.0 18.10 0 0.6930 5.987 100.0 1.5888 24 666 20.2 396.90 26.77 5.6 High
14.23620 0.0 18.10 0 0.6930 6.343 100.0 1.5741 24 666 20.2 396.90 20.32 7.2 High
9.59571 0.0 18.10 0 0.6930 6.404 100.0 1.6390 24 666 20.2 376.11 20.31 12.1 High
24.80170 0.0 18.10 0 0.6930 5.349 96.0 1.7028 24 666 20.2 396.90 19.77 8.3 High
41.52920 0.0 18.10 0 0.6930 5.531 85.4 1.6074 24 666 20.2 329.46 27.38 8.5 High
67.92080 0.0 18.10 0 0.6930 5.683 100.0 1.4254 24 666 20.2 384.97 22.98 5.0 High
20.71620 0.0 18.10 0 0.6590 4.138 100.0 1.1781 24 666 20.2 370.22 23.34 11.9 High
11.95110 0.0 18.10 0 0.6590 5.608 100.0 1.2852 24 666 20.2 332.09 12.13 27.9 High
7.40389 0.0 18.10 0 0.5970 5.617 97.9 1.4547 24 666 20.2 314.64 26.40 17.2 High
14.43830 0.0 18.10 0 0.5970 6.852 100.0 1.4655 24 666 20.2 179.36 19.78 27.5 High
51.13580 0.0 18.10 0 0.5970 5.757 100.0 1.4130 24 666 20.2 2.60 10.11 15.0 High
14.05070 0.0 18.10 0 0.5970 6.657 100.0 1.5275 24 666 20.2 35.05 21.22 17.2 High
18.81100 0.0 18.10 0 0.5970 4.628 100.0 1.5539 24 666 20.2 28.79 34.37 17.9 High
28.65580 0.0 18.10 0 0.5970 5.155 100.0 1.5894 24 666 20.2 210.97 20.08 16.3 High
45.74610 0.0 18.10 0 0.6930 4.519 100.0 1.6582 24 666 20.2 88.27 36.98 7.0 High
18.08460 0.0 18.10 0 0.6790 6.434 100.0 1.8347 24 666 20.2 27.25 29.05 7.2 High
10.83420 0.0 18.10 0 0.6790 6.782 90.8 1.8195 24 666 20.2 21.57 25.79 7.5 High
25.94060 0.0 18.10 0 0.6790 5.304 89.1 1.6475 24 666 20.2 127.36 26.64 10.4 High
73.53410 0.0 18.10 0 0.6790 5.957 100.0 1.8026 24 666 20.2 16.45 20.62 8.8 High
11.81230 0.0 18.10 0 0.7180 6.824 76.5 1.7940 24 666 20.2 48.45 22.74 8.4 High
11.08740 0.0 18.10 0 0.7180 6.411 100.0 1.8589 24 666 20.2 318.75 15.02 16.7 High
7.02259 0.0 18.10 0 0.7180 6.006 95.3 1.8746 24 666 20.2 319.98 15.70 14.2 High
12.04820 0.0 18.10 0 0.6140 5.648 87.6 1.9512 24 666 20.2 291.55 14.10 20.8 High
7.05042 0.0 18.10 0 0.6140 6.103 85.1 2.0218 24 666 20.2 2.52 23.29 13.4 High
8.79212 0.0 18.10 0 0.5840 5.565 70.6 2.0635 24 666 20.2 3.65 17.16 11.7 High
15.86030 0.0 18.10 0 0.6790 5.896 95.4 1.9096 24 666 20.2 7.68 24.39 8.3 High
12.24720 0.0 18.10 0 0.5840 5.837 59.7 1.9976 24 666 20.2 24.65 15.69 10.2 High
37.66190 0.0 18.10 0 0.6790 6.202 78.7 1.8629 24 666 20.2 18.82 14.52 10.9 High
7.36711 0.0 18.10 0 0.6790 6.193 78.1 1.9356 24 666 20.2 96.73 21.52 11.0 High
9.33889 0.0 18.10 0 0.6790 6.380 95.6 1.9682 24 666 20.2 60.72 24.08 9.5 High
8.49213 0.0 18.10 0 0.5840 6.348 86.1 2.0527 24 666 20.2 83.45 17.64 14.5 High
10.06230 0.0 18.10 0 0.5840 6.833 94.3 2.0882 24 666 20.2 81.33 19.69 14.1 High
6.44405 0.0 18.10 0 0.5840 6.425 74.8 2.2004 24 666 20.2 97.95 12.03 16.1 High
5.58107 0.0 18.10 0 0.7130 6.436 87.9 2.3158 24 666 20.2 100.19 16.22 14.3 High
13.91340 0.0 18.10 0 0.7130 6.208 95.0 2.2222 24 666 20.2 100.63 15.17 11.7 High
11.16040 0.0 18.10 0 0.7400 6.629 94.6 2.1247 24 666 20.2 109.85 23.27 13.4 High
14.42080 0.0 18.10 0 0.7400 6.461 93.3 2.0026 24 666 20.2 27.49 18.05 9.6 High
15.17720 0.0 18.10 0 0.7400 6.152 100.0 1.9142 24 666 20.2 9.32 26.45 8.7 High
13.67810 0.0 18.10 0 0.7400 5.935 87.9 1.8206 24 666 20.2 68.95 34.02 8.4 High
9.39063 0.0 18.10 0 0.7400 5.627 93.9 1.8172 24 666 20.2 396.90 22.88 12.8 High
22.05110 0.0 18.10 0 0.7400 5.818 92.4 1.8662 24 666 20.2 391.45 22.11 10.5 High
9.72418 0.0 18.10 0 0.7400 6.406 97.2 2.0651 24 666 20.2 385.96 19.52 17.1 High
5.66637 0.0 18.10 0 0.7400 6.219 100.0 2.0048 24 666 20.2 395.69 16.59 18.4 High
9.96654 0.0 18.10 0 0.7400 6.485 100.0 1.9784 24 666 20.2 386.73 18.85 15.4 High
12.80230 0.0 18.10 0 0.7400 5.854 96.6 1.8956 24 666 20.2 240.52 23.79 10.8 High
10.67180 0.0 18.10 0 0.7400 6.459 94.8 1.9879 24 666 20.2 43.06 23.98 11.8 High
6.28807 0.0 18.10 0 0.7400 6.341 96.4 2.0720 24 666 20.2 318.01 17.79 14.9 High
9.92485 0.0 18.10 0 0.7400 6.251 96.6 2.1980 24 666 20.2 388.52 16.44 12.6 High
9.32909 0.0 18.10 0 0.7130 6.185 98.7 2.2616 24 666 20.2 396.90 18.13 14.1 High
7.52601 0.0 18.10 0 0.7130 6.417 98.3 2.1850 24 666 20.2 304.21 19.31 13.0 High
6.71772 0.0 18.10 0 0.7130 6.749 92.6 2.3236 24 666 20.2 0.32 17.44 13.4 High
5.44114 0.0 18.10 0 0.7130 6.655 98.2 2.3552 24 666 20.2 355.29 17.73 15.2 High
5.09017 0.0 18.10 0 0.7130 6.297 91.8 2.3682 24 666 20.2 385.09 17.27 16.1 High
8.24809 0.0 18.10 0 0.7130 7.393 99.3 2.4527 24 666 20.2 375.87 16.74 17.8 High
9.51363 0.0 18.10 0 0.7130 6.728 94.1 2.4961 24 666 20.2 6.68 18.71 14.9 High
4.75237 0.0 18.10 0 0.7130 6.525 86.5 2.4358 24 666 20.2 50.92 18.13 14.1 High
4.66883 0.0 18.10 0 0.7130 5.976 87.9 2.5806 24 666 20.2 10.48 19.01 12.7 High
8.20058 0.0 18.10 0 0.7130 5.936 80.3 2.7792 24 666 20.2 3.50 16.94 13.5 High
7.75223 0.0 18.10 0 0.7130 6.301 83.7 2.7831 24 666 20.2 272.21 16.23 14.9 High
6.80117 0.0 18.10 0 0.7130 6.081 84.4 2.7175 24 666 20.2 396.90 14.70 20.0 High
4.81213 0.0 18.10 0 0.7130 6.701 90.0 2.5975 24 666 20.2 255.23 16.42 16.4 High
3.69311 0.0 18.10 0 0.7130 6.376 88.4 2.5671 24 666 20.2 391.43 14.65 17.7 High
6.65492 0.0 18.10 0 0.7130 6.317 83.0 2.7344 24 666 20.2 396.90 13.99 19.5 High
5.82115 0.0 18.10 0 0.7130 6.513 89.9 2.8016 24 666 20.2 393.82 10.29 20.2 High
7.83932 0.0 18.10 0 0.6550 6.209 65.4 2.9634 24 666 20.2 396.90 13.22 21.4 High
3.16360 0.0 18.10 0 0.6550 5.759 48.2 3.0665 24 666 20.2 334.40 14.13 19.9 High
3.77498 0.0 18.10 0 0.6550 5.952 84.7 2.8715 24 666 20.2 22.01 17.15 19.0 High
4.42228 0.0 18.10 0 0.5840 6.003 94.5 2.5403 24 666 20.2 331.29 21.32 19.1 High
15.57570 0.0 18.10 0 0.5800 5.926 71.0 2.9084 24 666 20.2 368.74 18.13 19.1 High
13.07510 0.0 18.10 0 0.5800 5.713 56.7 2.8237 24 666 20.2 396.90 14.76 20.1 High
4.34879 0.0 18.10 0 0.5800 6.167 84.0 3.0334 24 666 20.2 396.90 16.29 19.9 High
4.03841 0.0 18.10 0 0.5320 6.229 90.7 3.0993 24 666 20.2 395.33 12.87 19.6 High
3.56868 0.0 18.10 0 0.5800 6.437 75.0 2.8965 24 666 20.2 393.37 14.36 23.2 High
4.64689 0.0 18.10 0 0.6140 6.980 67.6 2.5329 24 666 20.2 374.68 11.66 29.8 High
8.05579 0.0 18.10 0 0.5840 5.427 95.4 2.4298 24 666 20.2 352.58 18.14 13.8 High
6.39312 0.0 18.10 0 0.5840 6.162 97.4 2.2060 24 666 20.2 302.76 24.10 13.3 High
4.87141 0.0 18.10 0 0.6140 6.484 93.6 2.3053 24 666 20.2 396.21 18.68 16.7 High
15.02340 0.0 18.10 0 0.6140 5.304 97.3 2.1007 24 666 20.2 349.48 24.91 12.0 High
10.23300 0.0 18.10 0 0.6140 6.185 96.7 2.1705 24 666 20.2 379.70 18.03 14.6 High
14.33370 0.0 18.10 0 0.6140 6.229 88.0 1.9512 24 666 20.2 383.32 13.11 21.4 High
5.82401 0.0 18.10 0 0.5320 6.242 64.7 3.4242 24 666 20.2 396.90 10.74 23.0 High
5.70818 0.0 18.10 0 0.5320 6.750 74.9 3.3317 24 666 20.2 393.07 7.74 23.7 High
5.73116 0.0 18.10 0 0.5320 7.061 77.0 3.4106 24 666 20.2 395.28 7.01 25.0 High
2.81838 0.0 18.10 0 0.5320 5.762 40.3 4.0983 24 666 20.2 392.92 10.42 21.8 High
2.37857 0.0 18.10 0 0.5830 5.871 41.9 3.7240 24 666 20.2 370.73 13.34 20.6 High
3.67367 0.0 18.10 0 0.5830 6.312 51.9 3.9917 24 666 20.2 388.62 10.58 21.2 High
5.69175 0.0 18.10 0 0.5830 6.114 79.8 3.5459 24 666 20.2 392.68 14.98 19.1 High
4.83567 0.0 18.10 0 0.5830 5.905 53.2 3.1523 24 666 20.2 388.22 11.45 20.6 High
0.15086 0.0 27.74 0 0.6090 5.454 92.7 1.8209 4 711 20.1 395.09 18.06 15.2 Low
0.18337 0.0 27.74 0 0.6090 5.414 98.3 1.7554 4 711 20.1 344.05 23.97 7.0 Low
0.20746 0.0 27.74 0 0.6090 5.093 98.0 1.8226 4 711 20.1 318.43 29.68 8.1 Low
0.10574 0.0 27.74 0 0.6090 5.983 98.8 1.8681 4 711 20.1 390.11 18.07 13.6 Low
0.11132 0.0 27.74 0 0.6090 5.983 83.5 2.1099 4 711 20.1 396.90 13.35 20.1 Low
0.17331 0.0 9.69 0 0.5850 5.707 54.0 2.3817 6 391 19.2 396.90 12.01 21.8 Low
0.27957 0.0 9.69 0 0.5850 5.926 42.6 2.3817 6 391 19.2 396.90 13.59 24.5 High
0.17899 0.0 9.69 0 0.5850 5.670 28.8 2.7986 6 391 19.2 393.29 17.60 23.1 Low
0.28960 0.0 9.69 0 0.5850 5.390 72.9 2.7986 6 391 19.2 396.90 21.14 19.7 High
0.26838 0.0 9.69 0 0.5850 5.794 70.6 2.8927 6 391 19.2 396.90 14.10 18.3 High
0.23912 0.0 9.69 0 0.5850 6.019 65.3 2.4091 6 391 19.2 396.90 12.92 21.2 Low
0.17783 0.0 9.69 0 0.5850 5.569 73.5 2.3999 6 391 19.2 395.77 15.10 17.5 Low
0.22438 0.0 9.69 0 0.5850 6.027 79.7 2.4982 6 391 19.2 396.90 14.33 16.8 Low
0.06263 0.0 11.93 0 0.5730 6.593 69.1 2.4786 1 273 21.0 391.99 9.67 22.4 Low
0.04527 0.0 11.93 0 0.5730 6.120 76.7 2.2875 1 273 21.0 396.90 9.08 20.6 Low
0.06076 0.0 11.93 0 0.5730 6.976 91.0 2.1675 1 273 21.0 396.90 5.64 23.9 Low
0.10959 0.0 11.93 0 0.5730 6.794 89.3 2.3889 1 273 21.0 393.45 6.48 22.0 Low
0.04741 0.0 11.93 0 0.5730 6.030 80.8 2.5050 1 273 21.0 396.90 7.88 11.9 Low

The table describes the boston data with the last column is defined as the crime_factor, which tells us whether a given suburb has a crime rate above or below the median. The “high” value means that suburb has a crime rate above the median, while “low” value means that suburb has a crime rate below the median.

#Correlation plot 
cor_test <- boston %>%
    dplyr::select(-chas) %>% dplyr::select(-crime_factor) %>%
    cor.mtest(conf.level = .95)

boston %>%
    dplyr::select(-chas, -crime_factor) %>%
    cor %>%
    corrplot(method = 'color', 
         order = 'hclust', addrect = 2,
         tl.col = 'black', addCoef.col = 'black', number.cex = 0.65,
         p.mat = cor_test$p, sig.level = .05)

The correlation matrix tells us about the relationship among variables. The blue color means positive relation, while red color means negative relation.

  • In terms of crime rate, there are 5 variables that strongly correlated with crim, including rad (0.63), tax (0.58), lstat (0.46), nox (0.42), and indus(0.41).

  • Besides, there are some independent variables that are strongly correlated which then can lead to multicollinearity.

#boxplot
boston %>%
    dplyr::select(zn:crime_factor) %>%
    gather(value_type, value, -crime_factor, -chas) %>%
    ggplot(aes(value_type, value, fill = crime_factor)) +
    geom_boxplot(alpha = 0.5) +
    facet_wrap(~value_type, scales = 'free') +
    scale_fill_discrete(name = 'Crime Rate') +
    theme(legend.position = 'top')

The figures above help us to distinguish data between high crime group and low crime group. Based on figures, we can clearly see some patterns such as:

  • The age in suburbs that have high crime rate are generally larger than that of low crime rate.

  • The dis in suburbs that have high crime rate are generally lower than that of high crime rate.

  • The indus in suburbs that have high crime rate are generally higher than that of high crime rate.

  • The nox in suburbs that have high crime rate are generally higher than that of high crime rate.

  • The rad in suburbs that have high crime rate are generally higher than that of high crime rate.

  • The tax in suburbs that have high crime rate are generally higher than that of high crime rate.

#boxplot
boston %>%
    dplyr::select(crim, crime_factor, rad, nox, tax, age, dis, indus) %>%
    gather(Variable, value, -crim, -crime_factor) %>%
    mutate(Variable = str_to_title(Variable)) %>%
    ggplot(aes(value, crim)) +
    geom_point(aes(col = crime_factor)) +
    facet_wrap(~ Variable, scales = 'free') +
    geom_smooth(method = 'lm', formula = y ~ poly(x, 3), se = FALSE) +
    guides(col = FALSE) +
    labs(title = 'Scatterplots for each strong predictor')
## Warning: The `<scale>` argument of `guides()` cannot be `FALSE`. Use "none" instead as
## of ggplot2 3.3.4.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

From the above figures, I chose some significant variables such as: age, dis, indus, nox, rad, and tax. Then, I plot to see the relationship between these significant variables and dependent variable as crim.

Looking at the graph, we can see the relation will be polynomial. Hence, the models will use polynomial relationships.

Construct model

Splitting the data

set.seed(123)
boston_split <- initial_split(boston, prop=0.8,strata= crime_factor)
boston_training <- training(boston_split)
boston_testing <- testing(boston_split)

logistic regression

glm_model <- glm(crime_factor ~ poly(rad, 3) + poly(nox, 3) + 
                   poly(tax, 3) + poly(age, 3) + poly(dis, 3)+ poly(indus, 3), 
               data = boston_training, family = "binomial")
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
summary(glm_model)
## 
## Call:
## glm(formula = crime_factor ~ poly(rad, 3) + poly(nox, 3) + poly(tax, 
##     3) + poly(age, 3) + poly(dis, 3) + poly(indus, 3), family = "binomial", 
##     data = boston_training)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.9398   0.0000   0.0000   0.0004   2.1725  
## 
## Coefficients:
##                   Estimate Std. Error z value Pr(>|z|)   
## (Intercept)      -1448.667    637.382  -2.273  0.02304 * 
## poly(rad, 3)1   -55456.257  23688.738  -2.341  0.01923 * 
## poly(rad, 3)2    -8892.445   3783.981  -2.350  0.01877 * 
## poly(rad, 3)3    -1376.436    598.658  -2.299  0.02149 * 
## poly(nox, 3)1    -4332.733   1473.024  -2.941  0.00327 **
## poly(nox, 3)2    -3639.043   1268.403  -2.869  0.00412 **
## poly(nox, 3)3    -1201.098    436.083  -2.754  0.00588 **
## poly(tax, 3)1     8329.522   2969.219   2.805  0.00503 **
## poly(tax, 3)2     2935.135   1044.474   2.810  0.00495 **
## poly(tax, 3)3      536.127    228.136   2.350  0.01877 * 
## poly(age, 3)1       -1.168      9.836  -0.119  0.90545   
## poly(age, 3)2      -19.426      9.010  -2.156  0.03107 * 
## poly(age, 3)3       -8.199      8.119  -1.010  0.31256   
## poly(dis, 3)1       52.903     25.556   2.070  0.03845 * 
## poly(dis, 3)2      -23.914     20.294  -1.178  0.23865   
## poly(dis, 3)3      -39.021     13.779  -2.832  0.00463 **
## poly(indus, 3)1    -16.255     42.342  -0.384  0.70105   
## poly(indus, 3)2    -52.148     24.376  -2.139  0.03241 * 
## poly(indus, 3)3   -188.155     61.799  -3.045  0.00233 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 560.063  on 403  degrees of freedom
## Residual deviance:  77.071  on 385  degrees of freedom
## AIC: 115.07
## 
## Number of Fisher Scoring iterations: 19
glm_fit <- predict(glm_model,type="response", newdata=boston_testing) 

predict_binary_glm <- ifelse(glm_fit > 0.5, "Low", "High")
predict_binary_glm <- predict_binary_glm %>% bind_cols(boston_testing %>% dplyr::select(crime_factor))
## New names:
## • `` -> `...1`
colnames(predict_binary_glm) <- c("predicted_value", "actual_value")
predict_binary_glm$predicted_value <- as.factor(predict_binary_glm$predicted_value)
predict_binary_glm$actual_value <- as.factor(predict_binary_glm$actual_value)
confusion_glm <- confusionMatrix(predict_binary_glm$predicted_value, predict_binary_glm$actual_value)
confusion_glm
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction High Low
##       High   50   5
##       Low     1  46
##                                           
##                Accuracy : 0.9412          
##                  95% CI : (0.8764, 0.9781)
##     No Information Rate : 0.5             
##     P-Value [Acc > NIR] : <2e-16          
##                                           
##                   Kappa : 0.8824          
##                                           
##  Mcnemar's Test P-Value : 0.2207          
##                                           
##             Sensitivity : 0.9804          
##             Specificity : 0.9020          
##          Pos Pred Value : 0.9091          
##          Neg Pred Value : 0.9787          
##              Prevalence : 0.5000          
##          Detection Rate : 0.4902          
##    Detection Prevalence : 0.5392          
##       Balanced Accuracy : 0.9412          
##                                           
##        'Positive' Class : High            
## 

The confusion matrix indicate that the accuracy of the model is 94.12% meaning that 94.12% of results are predicted as true. The sensitivity is the ability of a test to correctly identify true High: ratio = TP/(TP+FN)=48/(48+3) = 94.12%. The specificity is the ability of a test to correctly identify true Low: ratio = TN/(TN+FP)=48/(48+3)=94.12%. The confusion matrix shows that the model perform very well

LDA

lda_model <- lda(crime_factor ~ poly(rad, 3) + poly(nox, 3) + 
                   poly(tax, 3) + poly(age, 3) + poly(dis, 3)+ poly(indus, 3), 
               data = boston_training)
lda_model
## Call:
## lda(crime_factor ~ poly(rad, 3) + poly(nox, 3) + poly(tax, 3) + 
##     poly(age, 3) + poly(dis, 3) + poly(indus, 3), data = boston_training)
## 
## Prior probabilities of groups:
## High  Low 
##  0.5  0.5 
## 
## Group means:
##      poly(rad, 3)1 poly(rad, 3)2 poly(rad, 3)3 poly(nox, 3)1 poly(nox, 3)2
## High    0.03053036  -0.007576335   0.001805575     0.0356466   -0.01258601
## Low    -0.03053036   0.007576335  -0.001805575    -0.0356466    0.01258601
##      poly(nox, 3)3 poly(tax, 3)1 poly(tax, 3)2 poly(tax, 3)3 poly(age, 3)1
## High  -0.002945454    0.02954138   -0.00362748 -0.0009944897    0.02948616
## Low    0.002945454   -0.02954138    0.00362748  0.0009944897   -0.02948616
##      poly(age, 3)2 poly(age, 3)3 poly(dis, 3)1 poly(dis, 3)2 poly(dis, 3)3
## High   0.008543963 -0.0006928698   -0.03135008    0.01015439    0.00142612
## Low   -0.008543963  0.0006928698    0.03135008   -0.01015439   -0.00142612
##      poly(indus, 3)1 poly(indus, 3)2 poly(indus, 3)3
## High      0.03004324     -0.01078947     -0.01153712
## Low      -0.03004324      0.01078947      0.01153712
## 
## Coefficients of linear discriminants:
##                         LD1
## poly(rad, 3)1   -52.7634533
## poly(rad, 3)2    -2.5849385
## poly(rad, 3)3     1.4250961
## poly(nox, 3)1   -24.7126359
## poly(nox, 3)2    10.1489586
## poly(nox, 3)3    -1.4780811
## poly(tax, 3)1    46.0327368
## poly(tax, 3)2    19.0327107
## poly(tax, 3)3    -9.7843183
## poly(age, 3)1    -0.9765566
## poly(age, 3)2    -4.9999505
## poly(age, 3)3    -1.7035940
## poly(dis, 3)1     0.6205349
## poly(dis, 3)2     4.2583329
## poly(dis, 3)3    -5.1157704
## poly(indus, 3)1  -8.4164621
## poly(indus, 3)2  -8.7011396
## poly(indus, 3)3   4.5357491
predict_lda <- predict(lda_model, type= "response", newdata=boston_testing)$class
predict_lda
##   [1] High High Low  High High High High High Low  Low  Low  Low  Low  Low  Low 
##  [16] Low  Low  Low  Low  Low  Low  Low  Low  Low  Low  Low  Low  Low  Low  High
##  [31] High High High High High High High High High Low  Low  Low  Low  High High
##  [46] High High High Low  Low  Low  Low  Low  Low  Low  Low  Low  Low  Low  High
##  [61] High Low  Low  Low  Low  Low  Low  High High High High High High High High
##  [76] High High High High High High High High High High High High High High High
##  [91] High High High High High High High Low  High High High Low 
## Levels: High Low
predict_result_lda <- predict_lda %>% bind_cols(boston_testing %>% dplyr::select(crime_factor))
## New names:
## • `` -> `...1`
colnames(predict_result_lda) <- c("predicted_value", "actual_value")
confusion_lda <- confusionMatrix(predict_result_lda$predicted_value, predict_result_lda$actual_value)
confusion_lda
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction High Low
##       High   49   8
##       Low     2  43
##                                          
##                Accuracy : 0.902          
##                  95% CI : (0.8271, 0.952)
##     No Information Rate : 0.5            
##     P-Value [Acc > NIR] : <2e-16         
##                                          
##                   Kappa : 0.8039         
##                                          
##  Mcnemar's Test P-Value : 0.1138         
##                                          
##             Sensitivity : 0.9608         
##             Specificity : 0.8431         
##          Pos Pred Value : 0.8596         
##          Neg Pred Value : 0.9556         
##              Prevalence : 0.5000         
##          Detection Rate : 0.4804         
##    Detection Prevalence : 0.5588         
##       Balanced Accuracy : 0.9020         
##                                          
##        'Positive' Class : High           
## 

The lda model shows an accuracy at 94.12%, which means that 94.12% of the data is predicted as true. The sensitivity is the ability of a test to correctly identify true High: ratio = TP/(TP+FN)=47/(47+4) = 92.16%. The specificity is the ability of a test to correctly identify true Low: ratio = TN/(TN+FP)=49/(49+2)=96.08%. Compare to the logistic model, lda has better performance in specificity, while it illustrates a worst sensitivity. In general, the two models’ performance are same with the similar accuracy.

naive Bayes

naivebayes_model <- naive_bayes(crime_factor ~ rad + nox+ tax + age + indus, 
               data = boston_training)
naivebayes_model
## 
## ================================== Naive Bayes ================================== 
##  
##  Call: 
## naive_bayes.formula(formula = crime_factor ~ rad + nox + tax + 
##     age + indus, data = boston_training)
## 
## --------------------------------------------------------------------------------- 
##  
## Laplace smoothing: 0
## 
## --------------------------------------------------------------------------------- 
##  
##  A priori probabilities: 
## 
## High  Low 
##  0.5  0.5 
## 
## --------------------------------------------------------------------------------- 
##  
##  Tables: 
## 
## --------------------------------------------------------------------------------- 
##  ::: rad (Gaussian) 
## --------------------------------------------------------------------------------- 
##       
## rad         High       Low
##   mean 14.658416  4.079208
##   sd    9.510254  1.631061
## 
## --------------------------------------------------------------------------------- 
##  ::: nox (Gaussian) 
## --------------------------------------------------------------------------------- 
##       
## nox          High        Low
##   mean 0.63711386 0.46869703
##   sd   0.10224700 0.05528707
## 
## --------------------------------------------------------------------------------- 
##  ::: tax (Gaussian) 
## --------------------------------------------------------------------------------- 
##       
## tax         High       Low
##   mean 504.81683 307.27228
##   sd   168.51777  87.23754
## 
## --------------------------------------------------------------------------------- 
##  ::: age (Gaussian) 
## --------------------------------------------------------------------------------- 
##       
## age        High      Low
##   mean 85.48119 51.60594
##   sd   18.49810 26.88528
## 
## --------------------------------------------------------------------------------- 
##  ::: indus (Gaussian) 
## --------------------------------------------------------------------------------- 
##       
## indus       High       Low
##   mean 15.143663  6.930396
##   sd    5.512455  5.354737
## 
## ---------------------------------------------------------------------------------
predict_naivebayes <- predict(naivebayes_model, type= "class", newdata=boston_testing)
## Warning: predict.naive_bayes(): more features in the newdata are provided as
## there are probability tables in the object. Calculation is performed based on
## features to be found in the tables.
predict_naivebayes
##   [1] Low  Low  Low  Low  Low  Low  Low  Low  Low  Low  Low  Low  Low  Low  Low 
##  [16] Low  Low  Low  Low  Low  Low  Low  Low  Low  Low  Low  Low  High High High
##  [31] High High High High High High High Low  Low  Low  Low  Low  Low  Low  Low 
##  [46] Low  High Low  Low  Low  Low  Low  Low  Low  Low  Low  Low  Low  Low  Low 
##  [61] Low  Low  Low  Low  Low  Low  Low  High High High High High High High High
##  [76] High High High High High High High High High High High High High High High
##  [91] High High High High High High High High Low  Low  Low  Low 
## Levels: High Low
predict_result_naivebayes <- predict_naivebayes %>% bind_cols(boston_testing %>% dplyr::select(crime_factor))
## New names:
## • `` -> `...1`
colnames(predict_result_naivebayes) <- c("predicted_value", "actual_value")
confusion_naivebayes <- confusionMatrix(predict_result_naivebayes$predicted_value, predict_result_naivebayes$actual_value)
confusion_naivebayes
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction High Low
##       High   39   3
##       Low    12  48
##                                           
##                Accuracy : 0.8529          
##                  95% CI : (0.7691, 0.9153)
##     No Information Rate : 0.5             
##     P-Value [Acc > NIR] : 8.267e-14       
##                                           
##                   Kappa : 0.7059          
##                                           
##  Mcnemar's Test P-Value : 0.03887         
##                                           
##             Sensitivity : 0.7647          
##             Specificity : 0.9412          
##          Pos Pred Value : 0.9286          
##          Neg Pred Value : 0.8000          
##              Prevalence : 0.5000          
##          Detection Rate : 0.3824          
##    Detection Prevalence : 0.4118          
##       Balanced Accuracy : 0.8529          
##                                           
##        'Positive' Class : High            
## 

The naive bayes model is considered as worst model when its accuracy is only at 85.29%, in which sensitivity is 74.51% and specificity is 96.08%. Hence, the decrease in accuracy is because the model fail to predict the High - or its sensitivity.

KNN

variables <- c('rad', 'nox', 'tax', 'age', 'dis', 'zn', 'indus')

x_training <- boston_training[, variables]
y_training <- boston_training$crime_factor
x_testing <- boston_testing[, variables]
acc <- list()

for (i in 1:20) {
    knn_pred <- knn(train = x_training, test = x_testing, cl = y_training, k = i)
    acc[as.character(i)] = mean(knn_pred == boston_testing$crime_factor)
}

acc <- unlist(acc)

data_frame(acc = acc) %>%
    mutate(k = row_number()) %>%
    ggplot(aes(k, acc)) +
    geom_col(aes(fill = k == which.max(acc))) +
    labs(x = 'K', y = 'Accuracy', title = 'KNN Accuracy for different values of K') +
    scale_x_continuous(breaks = 1:20) +
    scale_y_continuous(breaks = round(c(seq(0.90, 0.94, 0.01), max(acc)),
                                      digits = 3)) +
    geom_hline(yintercept = max(acc), lty = 2) +
    coord_cartesian(ylim = c(min(acc), max(acc))) +
    guides(fill = FALSE)
## Warning: `data_frame()` was deprecated in tibble 1.1.0.
## ℹ Please use `tibble()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

#final model
knn_final <- knn(train = x_training, test = x_testing, cl = y_training, k = 3)
knn_final
##   [1] Low  High High High High High High Low  Low  Low  Low  Low  Low  Low  Low 
##  [16] Low  Low  Low  Low  Low  Low  Low  Low  Low  Low  Low  Low  Low  Low  High
##  [31] High High High High High High High High Low  Low  Low  Low  Low  Low  High
##  [46] High High High Low  Low  Low  Low  Low  Low  Low  Low  Low  Low  Low  High
##  [61] High Low  Low  Low  Low  Low  Low  High High High High High High High High
##  [76] High High High High High High High High High High High High High High High
##  [91] High High High High High High High Low  Low  Low  Low  Low 
## Levels: High Low
predict_result_knn <- knn_final %>% bind_cols(boston_testing %>% dplyr::select(crime_factor))
## New names:
## • `` -> `...1`
colnames(predict_result_knn) <- c("predicted_value", "actual_value")
confusion_knn <- confusionMatrix(predict_result_knn$predicted_value, predict_result_knn$actual_value)
confusion_knn
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction High Low
##       High   49   2
##       Low     2  49
##                                           
##                Accuracy : 0.9608          
##                  95% CI : (0.9026, 0.9892)
##     No Information Rate : 0.5             
##     P-Value [Acc > NIR] : <2e-16          
##                                           
##                   Kappa : 0.9216          
##                                           
##  Mcnemar's Test P-Value : 1               
##                                           
##             Sensitivity : 0.9608          
##             Specificity : 0.9608          
##          Pos Pred Value : 0.9608          
##          Neg Pred Value : 0.9608          
##              Prevalence : 0.5000          
##          Detection Rate : 0.4804          
##    Detection Prevalence : 0.5000          
##       Balanced Accuracy : 0.9608          
##                                           
##        'Positive' Class : High            
## 

By running the k from 1 to 20, we can see the best model is k=3 with model accuracy at 96.08%. Looking at the confusion matrix, the sensitivity is caculated at 94.12%, while specificity is reported as 98.04%. All the numbers conclude that KNN with K=3 is the best model.

Question 2: Model selection:

We perform best subset, forward stepwise, and backward stepwise selection on a single data set. For each approach, we obtain p+1 models containing 0,1,2,⋯,p predictors. Explain your answers :

2a: Which of the three models with k predictors has the smallest training RSS ?

When performing best subset selection, the model with k predictors is the model with the smallest RSS among all the pCk models with k predictors.

When performing forward stepwise selection, the model with k predictors is the model with the smallest RSS among the p−k models which augment the predictors in M(k−1) with one additional predictor.

When performing backward stepwise selection, the model with k predictors is the model with the smallest RSS among the k models which contains all but one of the predictors in M(k+1).

So, the model with k predictors which has the smallest training RSS is the one obtained from best subset selection as it is the one selected among all k predictors models.

2b: Which of the three models with k predictors has the smallest test RSS ?

Best subset selection may have the smallest test RSS because it considers more models then the other methods.

However, the other models might have better luck picking a model that fits the test data better, as they would be less subject to overfitting.

The outcome will depend more heavily on the choice of test set / validation method than on the selection method.

2c: True or False:

  • The predictors in the k-variable model identified by forward stepwise are a subset of the predictors in the (k+1)-variable model identified by forward stepwise selection.

TRUE. The model with (k+1) predictors is obtained by augmenting the predictors in the model with k predictors with one additional predictor.

  • The predictors in the k-variable model identified by backward stepwise are a subset of the predictors in the (k+1)-variable model identified by backward stepwise selection.

TRUE. The model with k predictors is obtained by removing one predictor from the model with (k+1) predictors.

  • The predictors in the k-variable model identified by backward stepwise are a subset of the predictors in the (k+1)-variable model identified by forward stepwise selection.

FALSE. There is no direct link between the models obtained from forward and backward selection.

  • The predictors in the k-variable model identified by forward stepwise are a subset of the predictors in the (k+1)-variable model identified by backward stepwise selection.

FALSE. There is no direct link between the models obtained from forward and backward selection.

  • The predictors in the k-variable model identified by best subset are a subset of the predictors in the (k+1)-variable model identified by best subset selection.

FALSE. The predictors in the k-variable model identified by best subset are a subset of the predictors in the (k+1)-variable model identified by best subset selection.

Question 3: Working with College Dataset:

3a: Split the data set into a training set and a test set:

First, we load the data. Then, we split the data into train data (college_train) and test data (college_test) with 80% and 20%, respectively. The data is stratified in terms of number of applications (Apps) to make sure that the sample is more representative of the population.

#import data
college <- College

#split data
set.seed(123)
college_split <- initial_split(college,prop=0.8,strate="Apps")
college_train <- training(college_split)
college_test <- testing(college_split)

3b: Fit a linear model using least squares on the training set, and report the test error obtained:

Step 1: We build the model by set up the engine as linear model (lm) and the mode (regression)

Step 2: We fit the model into the data train by defining the dependable “Apps” and all other variables are used as the predictors of number of applications.

Step 3: We see the results of our model by summary model. There are 9 significant variables as: Private, Accept, Top10perc, Top25perc, F.Undergrad, OutState, Room.Board, Expend, Grad.Rate. The adjusted R-squared of train data model result is 91.69%, meaning that the predictors can explain 91.69% the change of “Apps”.

Step 4: We use the model to predict the result in data test. Then the result of prediction and actual result are put together to compare.

Step 5: The test error metrics are reported:

  • RMSE is reported at 1449.199, meaning that our model’s predictions deviate from the actual number of applications by approximately 1449.199.

  • Rsquared is reported at 93.61%, meaning that the predictors can explain 93.61% the fluctuation of dependant variable.

#build model
lm_model <- linear_reg() %>% set_engine('lm') %>% set_mode('regression')
lm_fit <- lm_model %>% fit(Apps~.,data=college_train)

#model result
summary(lm_fit$fit)
## 
## Call:
## stats::lm(formula = Apps ~ ., data = data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3257.7  -431.1   -57.5   318.8  6581.9 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -4.475e+02  4.238e+02  -1.056  0.29141    
## PrivateYes  -5.964e+02  1.471e+02  -4.055 5.67e-05 ***
## Accept       1.262e+00  5.474e-02  23.060  < 2e-16 ***
## Enroll      -2.867e-01  1.960e-01  -1.463  0.14402    
## Top10perc    4.485e+01  5.787e+00   7.749 3.93e-14 ***
## Top25perc   -1.362e+01  4.713e+00  -2.889  0.00400 ** 
## F.Undergrad  9.257e-02  3.473e-02   2.665  0.00790 ** 
## P.Undergrad  4.950e-03  3.319e-02   0.149  0.88150    
## Outstate    -5.318e-02  1.962e-02  -2.710  0.00692 ** 
## Room.Board   1.615e-01  4.929e-02   3.277  0.00111 ** 
## Books        5.242e-02  2.402e-01   0.218  0.82734    
## Personal    -8.572e-03  6.533e-02  -0.131  0.89565    
## PhD         -5.727e+00  4.779e+00  -1.199  0.23118    
## Terminal    -5.017e+00  5.205e+00  -0.964  0.33546    
## S.F.Ratio    3.827e+00  1.342e+01   0.285  0.77560    
## perc.alumni -6.235e+00  4.325e+00  -1.442  0.14991    
## Expend       7.915e-02  1.270e-02   6.233 8.58e-10 ***
## Grad.Rate    1.064e+01  3.063e+00   3.474  0.00055 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 971.6 on 603 degrees of freedom
## Multiple R-squared:  0.9192, Adjusted R-squared:  0.9169 
## F-statistic: 403.6 on 17 and 603 DF,  p-value: < 2.2e-16
#fit test data
predict(lm_fit, new_data = college_test)
## # A tibble: 156 × 1
##    .pred
##    <dbl>
##  1 1438.
##  2 1159.
##  3 1221.
##  4 3852.
##  5 6127.
##  6 2055.
##  7 9687.
##  8 1529.
##  9 1243.
## 10  446.
## # … with 146 more rows
college_test_results <- predict(lm_fit, new_data = college_test) %>% 
  bind_cols(college_test$Apps)
## New names:
## • `` -> `...2`
colnames(college_test_results) <- c("Prediction","Actual data")
college_test_results
## # A tibble: 156 × 2
##    Prediction `Actual data`
##         <dbl>         <dbl>
##  1      1438.          1660
##  2      1159.          1428
##  3      1221.          1038
##  4      3852.          4302
##  5      6127.          7313
##  6      2055.          2135
##  7      9687.          7548
##  8      1529.           948
##  9      1243.           807
## 10       446.           632
## # … with 146 more rows
#test error RMSE:
rmse <- rmse(college_test_results, 
     truth = "Actual data", 
     estimate = "Prediction")
rmse
## # A tibble: 1 × 3
##   .metric .estimator .estimate
##   <chr>   <chr>          <dbl>
## 1 rmse    standard       1449.
#test error Rsquare:
rsq <- rsq(college_test_results, 
    truth = "Actual data", 
    estimate = "Prediction")
rsq
## # A tibble: 1 × 3
##   .metric .estimator .estimate
##   <chr>   <chr>          <dbl>
## 1 rsq     standard       0.936

3c: Fit a ridge regression model on the training set, with λ chosen by cross-validation. Report the test error obtained.

Step 1: We build the model matrix by defining the dependable “Apps” and all other variables are used as the predictors of number of applications.

Step 2: We use cross-validation to choose the lamda for the model. Lamda has the function of tune the hyperparameters by handling variable selection and control the magnitude of coefficients in the model.

Then, we plot the lamda to see the best selection of lamda. Looking at the chart, we can see Ox present the log(lamda) value and Oy show the Mean-Squared Error. Thus, our objective is to select the lamda with the minimum Mean-Squared Error. The chart shows us best selections of log(lamda) are ranging around 6.

After that, we run to see the best lamda and have the result at 313.5603.

Step 3: We build the model by the train data

Step 4: We fit the test data into the model and use the best lamda found before

Step 5: The test error metrics are reported:

  • RMSE is reported at 1986.326, meaning that our model’s predictions deviate from the actual number of applications by approximately 1986.326.

  • Rsquared is reported at 88.41%, meaning that the predictors can explain 88.41% the fluctuation of dependant variable.

set.seed(123)
#Set up matrices needed for the glmnet functions
train_matrix <-  model.matrix(Apps~., data = college_train)
test_matrix = model.matrix(Apps~., data =college_test)

#Choose lambda using cross-validation
lamda <- cv.glmnet(train_matrix,college_train$Apps,alpha=0)
plot(lamda)

bestlam <- lamda$lambda.min
bestlam
## [1] 313.5603
#Build model 
ridge_model <- glmnet(train_matrix,college_train$Apps,alpha = 0)

#Fit test data
ridge_fit <- predict(ridge_model,s=bestlam,newx = test_matrix)

ridge_test_results <- ridge_fit %>% 
  bind_cols(college_test$Apps)
## New names:
## • `` -> `...2`
colnames(ridge_test_results) <- c("Prediction","Actual data")
ridge_test_results
## # A tibble: 156 × 2
##    Prediction `Actual data`
##         <dbl>         <dbl>
##  1      1620.          1660
##  2      1047.          1428
##  3      1506.          1038
##  4      3667.          4302
##  5      6163.          7313
##  6      2202.          2135
##  7      9817.          7548
##  8      1554.           948
##  9      1090.           807
## 10       431.           632
## # … with 146 more rows
#test error RMSE:
rmse_ridge <- rmse(ridge_test_results, 
     truth = "Actual data", 
     estimate = "Prediction")
rmse_ridge
## # A tibble: 1 × 3
##   .metric .estimator .estimate
##   <chr>   <chr>          <dbl>
## 1 rmse    standard       1986.
#test error Rsquare:
rsq_ridge <- rsq(ridge_test_results, 
    truth = "Actual data", 
    estimate = "Prediction")
rsq_ridge
## # A tibble: 1 × 3
##   .metric .estimator .estimate
##   <chr>   <chr>          <dbl>
## 1 rsq     standard       0.884

3d: Fit a lasso model on the training set, with λ chosen by crossvalidation. Report the test error obtained, along with the number of non-zero coefficient estimates.

For lasso mode, we utilize the same steps with ridge regression. The only different thing is the alpha is set as alpha = 1 instead of alpha =0 in ridge model.

The test coefficient estimate:

  • There are 4 variables with non-zero coefficient estimates as: Accept, Top10perc, F.Undergrad, and Expend.

  • The coeffcients of Accept, Top10perc, F.Undergrad, and Expend are 1.213, 18.392, 0.031, and 0.019, respectively. It means that all the variables have the positive relations with “Apps”.

The test error metrics are reported:

  • RMSE is reported at 1636.073, meaning that our model’s predictions deviate from the actual number of applications by approximately 1636.073.

  • Rsquared is reported at 93.60%, meaning that the predictors can explain 93.60% the fluctuation of dependant variable.

#Choose lambda using cross-validation
lamda_2 <- cv.glmnet(train_matrix,college_train$Apps,alpha=1)
plot(lamda_2)

bestlam_2 <- lamda_2$lambda.min
bestlam_2
## [1] 10.75659
#Build model 
lasso_model <- glmnet(train_matrix,college_train$Apps,alpha = 1)

#model result
lasso_coef <- predict(lasso_model,s=bestlam, type="coefficients")
lasso_coef
## 19 x 1 sparse Matrix of class "dgCMatrix"
##                        s1
## (Intercept) -269.29591655
## (Intercept)    .         
## PrivateYes     .         
## Accept         1.21301871
## Enroll         .         
## Top10perc     18.39197894
## Top25perc      .         
## F.Undergrad    0.03146116
## P.Undergrad    .         
## Outstate       .         
## Room.Board     .         
## Books          .         
## Personal       .         
## PhD            .         
## Terminal       .         
## S.F.Ratio      .         
## perc.alumni    .         
## Expend         0.01897122
## Grad.Rate      .
#Fit test data
lasso_fit <- predict(lasso_model,s=bestlam,newx = test_matrix)



lasso_test_results <- lasso_fit %>% 
  bind_cols(college_test$Apps)
## New names:
## • `` -> `...2`
colnames(lasso_test_results) <- c("Prediction","Actual data")
lasso_test_results
## # A tibble: 156 × 2
##    Prediction `Actual data`
##         <dbl>         <dbl>
##  1      1873.          1660
##  2      1664.          1428
##  3      1552.          1038
##  4      2917.          4302
##  5      6180.          7313
##  6      2404.          2135
##  7      9066.          7548
##  8      1722.           948
##  9      1470.           807
## 10       808.           632
## # … with 146 more rows
#test error RMSE:
rmse_lasso <- rmse(lasso_test_results, 
     truth = "Actual data", 
     estimate = "Prediction")
rmse_lasso
## # A tibble: 1 × 3
##   .metric .estimator .estimate
##   <chr>   <chr>          <dbl>
## 1 rmse    standard       1636.
#test error Rsquare:
rsq_lasso <- rsq(lasso_test_results, 
    truth = "Actual data", 
    estimate = "Prediction")
rsq_lasso
## # A tibble: 1 × 3
##   .metric .estimator .estimate
##   <chr>   <chr>          <dbl>
## 1 rsq     standard       0.936