## 'data.frame': 47 obs. of 16 variables:
## $ percent_m : int 151 143 142 136 141 121 127 131 157 140 ...
## $ is_south : int 1 0 1 0 0 0 1 1 1 0 ...
## $ mean_education : int 91 113 89 121 121 110 111 109 90 118 ...
## $ police_exp60 : int 58 103 45 149 109 118 82 115 65 71 ...
## $ police_exp59 : int 56 95 44 141 101 115 79 109 62 68 ...
## $ labour_participation: int 510 583 533 577 591 547 519 542 553 632 ...
## $ m_per1000f : int 950 1012 969 994 985 964 982 969 955 1029 ...
## $ state_pop : int 33 13 18 157 18 25 4 50 39 7 ...
## $ nonwhites_per1000 : int 301 102 219 80 30 44 139 179 286 15 ...
## $ unemploy_m24 : int 108 96 94 102 91 84 97 79 81 100 ...
## $ unemploy_m39 : int 41 36 33 39 20 29 38 35 28 24 ...
## $ gdp : int 394 557 318 673 578 689 620 472 421 526 ...
## $ inequality : int 261 194 250 167 174 126 168 206 239 174 ...
## $ prob_prison : num 0.0846 0.0296 0.0834 0.0158 0.0414 ...
## $ time_prison : num 26.2 25.3 24.3 29.9 21.3 ...
## $ crime_rate : int 791 1635 578 1969 1234 682 963 1555 856 705 ...
##
## Call:
## lm(formula = crime_rate ~ police_exp59, data = crime)
##
## Residuals:
## Min 1Q Median 3Q Max
## -595.58 -156.76 12.29 146.74 593.74
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 165.164 130.427 1.266 0.212
## police_exp59 9.222 1.537 6.001 3.11e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 291.4 on 45 degrees of freedom
## Multiple R-squared: 0.4445, Adjusted R-squared: 0.4322
## F-statistic: 36.01 on 1 and 45 DF, p-value: 3.114e-07
lm.none <- lm(formula = crime_rate ~ 1, data = crime)
lm.all <- lm(formula = crime_rate ~ ., data = crime)
model_step <- step(object = lm.all, scope = list(lower = lm.none), direction = "backward")## Start: AIC=514.65
## crime_rate ~ percent_m + is_south + mean_education + police_exp60 +
## police_exp59 + labour_participation + m_per1000f + state_pop +
## nonwhites_per1000 + unemploy_m24 + unemploy_m39 + gdp + inequality +
## prob_prison + time_prison
##
## Df Sum of Sq RSS AIC
## - is_south 1 29 1354974 512.65
## - labour_participation 1 8917 1363862 512.96
## - time_prison 1 10304 1365250 513.00
## - state_pop 1 14122 1369068 513.14
## - nonwhites_per1000 1 18395 1373341 513.28
## - m_per1000f 1 31967 1386913 513.74
## - gdp 1 37613 1392558 513.94
## - police_exp59 1 37919 1392865 513.95
## <none> 1354946 514.65
## - unemploy_m24 1 83722 1438668 515.47
## - police_exp60 1 144306 1499252 517.41
## - unemploy_m39 1 181536 1536482 518.56
## - percent_m 1 193770 1548716 518.93
## - prob_prison 1 199538 1554484 519.11
## - mean_education 1 402117 1757063 524.86
## - inequality 1 423031 1777977 525.42
##
## Step: AIC=512.65
## crime_rate ~ percent_m + mean_education + police_exp60 + police_exp59 +
## labour_participation + m_per1000f + state_pop + nonwhites_per1000 +
## unemploy_m24 + unemploy_m39 + gdp + inequality + prob_prison +
## time_prison
##
## Df Sum of Sq RSS AIC
## - time_prison 1 10341 1365315 511.01
## - labour_participation 1 10878 1365852 511.03
## - state_pop 1 14127 1369101 511.14
## - nonwhites_per1000 1 21626 1376600 511.39
## - m_per1000f 1 32449 1387423 511.76
## - police_exp59 1 37954 1392929 511.95
## - gdp 1 39223 1394197 511.99
## <none> 1354974 512.65
## - unemploy_m24 1 96420 1451395 513.88
## - police_exp60 1 144302 1499277 515.41
## - unemploy_m39 1 189859 1544834 516.81
## - percent_m 1 195084 1550059 516.97
## - prob_prison 1 204463 1559437 517.26
## - mean_education 1 403140 1758114 522.89
## - inequality 1 488834 1843808 525.13
##
## Step: AIC=511.01
## crime_rate ~ percent_m + mean_education + police_exp60 + police_exp59 +
## labour_participation + m_per1000f + state_pop + nonwhites_per1000 +
## unemploy_m24 + unemploy_m39 + gdp + inequality + prob_prison
##
## Df Sum of Sq RSS AIC
## - labour_participation 1 10533 1375848 509.37
## - nonwhites_per1000 1 15482 1380797 509.54
## - state_pop 1 21846 1387161 509.75
## - police_exp59 1 28932 1394247 509.99
## - gdp 1 36070 1401385 510.23
## - m_per1000f 1 41784 1407099 510.42
## <none> 1365315 511.01
## - unemploy_m24 1 91420 1456735 512.05
## - police_exp60 1 134137 1499452 513.41
## - unemploy_m39 1 184143 1549458 514.95
## - percent_m 1 186110 1551425 515.01
## - prob_prison 1 237493 1602808 516.54
## - mean_education 1 409448 1774763 521.33
## - inequality 1 502909 1868224 523.75
##
## Step: AIC=509.37
## crime_rate ~ percent_m + mean_education + police_exp60 + police_exp59 +
## m_per1000f + state_pop + nonwhites_per1000 + unemploy_m24 +
## unemploy_m39 + gdp + inequality + prob_prison
##
## Df Sum of Sq RSS AIC
## - nonwhites_per1000 1 11675 1387523 507.77
## - police_exp59 1 21418 1397266 508.09
## - state_pop 1 27803 1403651 508.31
## - m_per1000f 1 31252 1407100 508.42
## - gdp 1 35035 1410883 508.55
## <none> 1375848 509.37
## - unemploy_m24 1 80954 1456802 510.06
## - police_exp60 1 123896 1499744 511.42
## - unemploy_m39 1 190746 1566594 513.47
## - percent_m 1 217716 1593564 514.27
## - prob_prison 1 226971 1602819 514.54
## - mean_education 1 413254 1789103 519.71
## - inequality 1 500944 1876792 521.96
##
## Step: AIC=507.77
## crime_rate ~ percent_m + mean_education + police_exp60 + police_exp59 +
## m_per1000f + state_pop + unemploy_m24 + unemploy_m39 + gdp +
## inequality + prob_prison
##
## Df Sum of Sq RSS AIC
## - police_exp59 1 16706 1404229 506.33
## - state_pop 1 25793 1413315 506.63
## - m_per1000f 1 26785 1414308 506.66
## - gdp 1 31551 1419073 506.82
## <none> 1387523 507.77
## - unemploy_m24 1 83881 1471404 508.52
## - police_exp60 1 118348 1505871 509.61
## - unemploy_m39 1 201453 1588976 512.14
## - prob_prison 1 216760 1604282 512.59
## - percent_m 1 309214 1696737 515.22
## - mean_education 1 402754 1790276 517.74
## - inequality 1 589736 1977259 522.41
##
## Step: AIC=506.33
## crime_rate ~ percent_m + mean_education + police_exp60 + m_per1000f +
## state_pop + unemploy_m24 + unemploy_m39 + gdp + inequality +
## prob_prison
##
## Df Sum of Sq RSS AIC
## - state_pop 1 22345 1426575 505.07
## - gdp 1 32142 1436371 505.39
## - m_per1000f 1 36808 1441037 505.54
## <none> 1404229 506.33
## - unemploy_m24 1 86373 1490602 507.13
## - unemploy_m39 1 205814 1610043 510.76
## - prob_prison 1 218607 1622836 511.13
## - percent_m 1 307001 1711230 513.62
## - mean_education 1 389502 1793731 515.83
## - inequality 1 608627 2012856 521.25
## - police_exp60 1 1050202 2454432 530.57
##
## Step: AIC=505.07
## crime_rate ~ percent_m + mean_education + police_exp60 + m_per1000f +
## unemploy_m24 + unemploy_m39 + gdp + inequality + prob_prison
##
## Df Sum of Sq RSS AIC
## - gdp 1 26493 1453068 503.93
## <none> 1426575 505.07
## - m_per1000f 1 84491 1511065 505.77
## - unemploy_m24 1 99463 1526037 506.24
## - prob_prison 1 198571 1625145 509.20
## - unemploy_m39 1 208880 1635455 509.49
## - percent_m 1 320926 1747501 512.61
## - mean_education 1 386773 1813348 514.35
## - inequality 1 594779 2021354 519.45
## - police_exp60 1 1127277 2553852 530.44
##
## Step: AIC=503.93
## crime_rate ~ percent_m + mean_education + police_exp60 + m_per1000f +
## unemploy_m24 + unemploy_m39 + inequality + prob_prison
##
## Df Sum of Sq RSS AIC
## <none> 1453068 503.93
## - m_per1000f 1 103159 1556227 505.16
## - unemploy_m24 1 127044 1580112 505.87
## - prob_prison 1 247978 1701046 509.34
## - unemploy_m39 1 255443 1708511 509.55
## - percent_m 1 296790 1749858 510.67
## - mean_education 1 445788 1898855 514.51
## - inequality 1 738244 2191312 521.24
## - police_exp60 1 1672038 3125105 537.93
##
## Call:
## lm(formula = crime_rate ~ percent_m + mean_education + police_exp60 +
## m_per1000f + unemploy_m24 + unemploy_m39 + inequality + prob_prison,
## data = crime)
##
## Residuals:
## Min 1Q Median 3Q Max
## -444.70 -111.07 3.03 122.15 483.30
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -6426.101 1194.611 -5.379 4.04e-06 ***
## percent_m 9.332 3.350 2.786 0.00828 **
## mean_education 18.012 5.275 3.414 0.00153 **
## police_exp60 10.265 1.552 6.613 8.26e-08 ***
## m_per1000f 2.234 1.360 1.642 0.10874
## unemploy_m24 -6.087 3.339 -1.823 0.07622 .
## unemploy_m39 18.735 7.248 2.585 0.01371 *
## inequality 6.133 1.396 4.394 8.63e-05 ***
## prob_prison -3796.032 1490.646 -2.547 0.01505 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 195.5 on 38 degrees of freedom
## Multiple R-squared: 0.7888, Adjusted R-squared: 0.7444
## F-statistic: 17.74 on 8 and 38 DF, p-value: 1.159e-10
#Shapiro test for Normality test
## Hipotesis:
H0: Residual berdistribusi Normal H1: Residual tidak berdistribusi normal Daerah keputusan:
Tolak H0 ketika p-value < 0.05
##
## Shapiro-Wilk normality test
##
## data: model_step$residuals
## W = 0.98511, p-value = 0.8051
karena p-value = 0.8051 > 0,05 artinya residual berdistribusi normal
Breusch-Pagan for Heteroskedasticity Test
H0: Data residual Homogen (tidak membentuk sebuah pola) H1: Data residual heteros (membentuk sebuah pola)
Tolak H0 ketika p-value < 0.05
##
## studentized Breusch-Pagan test
##
## data: model_step
## BP = 13.51, df = 8, p-value = 0.09546
karena p-value = 0.09546 > 0,05 data tidak membentuk sebuah pola
Variance Inflation Factor
saling berpengaruh jika vif < 10
## percent_m mean_education police_exp60 m_per1000f unemploy_m24
## 2.131963 4.189684 2.560496 1.932367 4.360038
## unemploy_m39 inequality prob_prison
## 4.508106 3.731074 1.381879
Predict Unseen Data
crime_test <- read.csv("crime_test.csv")
pred_value <- predict(object = model_step, crime_test)
mse_crime <- MSE(y_pred = pred_value, y_true = crime_test$crime_rate)
mse_crime## [1] 35467.31