Stepwise Regression
stepwiseAIC <- stepAIC(lm(Crime ~., data = uscrime), direction = "both")
## Start: AIC=514.65
## Crime ~ M + So + Ed + Po1 + Po2 + LF + M.F + Pop + NW + U1 +
## U2 + Wealth + Ineq + Prob + Time
##
## Df Sum of Sq RSS AIC
## - So 1 29 1354974 512.65
## - LF 1 8917 1363862 512.96
## - Time 1 10304 1365250 513.00
## - Pop 1 14122 1369068 513.14
## - NW 1 18395 1373341 513.28
## - M.F 1 31967 1386913 513.74
## - Wealth 1 37613 1392558 513.94
## - Po2 1 37919 1392865 513.95
## <none> 1354946 514.65
## - U1 1 83722 1438668 515.47
## - Po1 1 144306 1499252 517.41
## - U2 1 181536 1536482 518.56
## - M 1 193770 1548716 518.93
## - Prob 1 199538 1554484 519.11
## - Ed 1 402117 1757063 524.86
## - Ineq 1 423031 1777977 525.42
##
## Step: AIC=512.65
## Crime ~ M + Ed + Po1 + Po2 + LF + M.F + Pop + NW + U1 + U2 +
## Wealth + Ineq + Prob + Time
##
## Df Sum of Sq RSS AIC
## - Time 1 10341 1365315 511.01
## - LF 1 10878 1365852 511.03
## - Pop 1 14127 1369101 511.14
## - NW 1 21626 1376600 511.39
## - M.F 1 32449 1387423 511.76
## - Po2 1 37954 1392929 511.95
## - Wealth 1 39223 1394197 511.99
## <none> 1354974 512.65
## - U1 1 96420 1451395 513.88
## + So 1 29 1354946 514.65
## - Po1 1 144302 1499277 515.41
## - U2 1 189859 1544834 516.81
## - M 1 195084 1550059 516.97
## - Prob 1 204463 1559437 517.26
## - Ed 1 403140 1758114 522.89
## - Ineq 1 488834 1843808 525.13
##
## Step: AIC=511.01
## Crime ~ M + Ed + Po1 + Po2 + LF + M.F + Pop + NW + U1 + U2 +
## Wealth + Ineq + Prob
##
## Df Sum of Sq RSS AIC
## - LF 1 10533 1375848 509.37
## - NW 1 15482 1380797 509.54
## - Pop 1 21846 1387161 509.75
## - Po2 1 28932 1394247 509.99
## - Wealth 1 36070 1401385 510.23
## - M.F 1 41784 1407099 510.42
## <none> 1365315 511.01
## - U1 1 91420 1456735 512.05
## + Time 1 10341 1354974 512.65
## + So 1 65 1365250 513.00
## - Po1 1 134137 1499452 513.41
## - U2 1 184143 1549458 514.95
## - M 1 186110 1551425 515.01
## - Prob 1 237493 1602808 516.54
## - Ed 1 409448 1774763 521.33
## - Ineq 1 502909 1868224 523.75
##
## Step: AIC=509.37
## Crime ~ M + Ed + Po1 + Po2 + M.F + Pop + NW + U1 + U2 + Wealth +
## Ineq + Prob
##
## Df Sum of Sq RSS AIC
## - NW 1 11675 1387523 507.77
## - Po2 1 21418 1397266 508.09
## - Pop 1 27803 1403651 508.31
## - M.F 1 31252 1407100 508.42
## - Wealth 1 35035 1410883 508.55
## <none> 1375848 509.37
## - U1 1 80954 1456802 510.06
## + LF 1 10533 1365315 511.01
## + Time 1 9996 1365852 511.03
## + So 1 3046 1372802 511.26
## - Po1 1 123896 1499744 511.42
## - U2 1 190746 1566594 513.47
## - M 1 217716 1593564 514.27
## - Prob 1 226971 1602819 514.54
## - Ed 1 413254 1789103 519.71
## - Ineq 1 500944 1876792 521.96
##
## Step: AIC=507.77
## Crime ~ M + Ed + Po1 + Po2 + M.F + Pop + U1 + U2 + Wealth + Ineq +
## Prob
##
## Df Sum of Sq RSS AIC
## - Po2 1 16706 1404229 506.33
## - Pop 1 25793 1413315 506.63
## - M.F 1 26785 1414308 506.66
## - Wealth 1 31551 1419073 506.82
## <none> 1387523 507.77
## - U1 1 83881 1471404 508.52
## + NW 1 11675 1375848 509.37
## + So 1 7207 1380316 509.52
## + LF 1 6726 1380797 509.54
## + Time 1 4534 1382989 509.61
## - Po1 1 118348 1505871 509.61
## - U2 1 201453 1588976 512.14
## - Prob 1 216760 1604282 512.59
## - M 1 309214 1696737 515.22
## - Ed 1 402754 1790276 517.74
## - Ineq 1 589736 1977259 522.41
##
## Step: AIC=506.33
## Crime ~ M + Ed + Po1 + M.F + Pop + U1 + U2 + Wealth + Ineq +
## Prob
##
## Df Sum of Sq RSS AIC
## - Pop 1 22345 1426575 505.07
## - Wealth 1 32142 1436371 505.39
## - M.F 1 36808 1441037 505.54
## <none> 1404229 506.33
## - U1 1 86373 1490602 507.13
## + Po2 1 16706 1387523 507.77
## + NW 1 6963 1397266 508.09
## + So 1 3807 1400422 508.20
## + LF 1 1986 1402243 508.26
## + Time 1 575 1403654 508.31
## - U2 1 205814 1610043 510.76
## - Prob 1 218607 1622836 511.13
## - M 1 307001 1711230 513.62
## - Ed 1 389502 1793731 515.83
## - Ineq 1 608627 2012856 521.25
## - Po1 1 1050202 2454432 530.57
##
## Step: AIC=505.07
## Crime ~ M + Ed + Po1 + M.F + U1 + U2 + Wealth + Ineq + Prob
##
## Df Sum of Sq RSS AIC
## - Wealth 1 26493 1453068 503.93
## <none> 1426575 505.07
## - M.F 1 84491 1511065 505.77
## - U1 1 99463 1526037 506.24
## + Pop 1 22345 1404229 506.33
## + Po2 1 13259 1413315 506.63
## + NW 1 5927 1420648 506.87
## + So 1 5724 1420851 506.88
## + LF 1 5176 1421398 506.90
## + Time 1 3913 1422661 506.94
## - Prob 1 198571 1625145 509.20
## - U2 1 208880 1635455 509.49
## - M 1 320926 1747501 512.61
## - Ed 1 386773 1813348 514.35
## - Ineq 1 594779 2021354 519.45
## - Po1 1 1127277 2553852 530.44
##
## Step: AIC=503.93
## Crime ~ M + Ed + Po1 + M.F + U1 + U2 + Ineq + Prob
##
## Df Sum of Sq RSS AIC
## <none> 1453068 503.93
## + Wealth 1 26493 1426575 505.07
## - M.F 1 103159 1556227 505.16
## + Pop 1 16697 1436371 505.39
## + Po2 1 14148 1438919 505.47
## + So 1 9329 1443739 505.63
## + LF 1 4374 1448694 505.79
## + NW 1 3799 1449269 505.81
## + Time 1 2293 1450775 505.86
## - U1 1 127044 1580112 505.87
## - Prob 1 247978 1701046 509.34
## - U2 1 255443 1708511 509.55
## - M 1 296790 1749858 510.67
## - Ed 1 445788 1898855 514.51
## - Ineq 1 738244 2191312 521.24
## - Po1 1 1672038 3125105 537.93
summary(stepwiseAIC)
##
## Call:
## lm(formula = Crime ~ M + Ed + Po1 + M.F + U1 + U2 + Ineq + Prob,
## data = uscrime)
##
## Residuals:
## Min 1Q Median 3Q Max
## -444.70 -111.07 3.03 122.15 483.30
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -6426.10 1194.61 -5.379 4.04e-06 ***
## M 93.32 33.50 2.786 0.00828 **
## Ed 180.12 52.75 3.414 0.00153 **
## Po1 102.65 15.52 6.613 8.26e-08 ***
## M.F 22.34 13.60 1.642 0.10874
## U1 -6086.63 3339.27 -1.823 0.07622 .
## U2 187.35 72.48 2.585 0.01371 *
## Ineq 61.33 13.96 4.394 8.63e-05 ***
## Prob -3796.03 1490.65 -2.547 0.01505 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 195.5 on 38 degrees of freedom
## Multiple R-squared: 0.7888, Adjusted R-squared: 0.7444
## F-statistic: 17.74 on 8 and 38 DF, p-value: 1.159e-10
stepwiseBIC <- stepAIC(lm(Crime ~., data = uscrime), direction = "both", k = log(nrow(uscrime)))
## Start: AIC=544.25
## Crime ~ M + So + Ed + Po1 + Po2 + LF + M.F + Pop + NW + U1 +
## U2 + Wealth + Ineq + Prob + Time
##
## Df Sum of Sq RSS AIC
## - So 1 29 1354974 540.40
## - LF 1 8917 1363862 540.71
## - Time 1 10304 1365250 540.76
## - Pop 1 14122 1369068 540.89
## - NW 1 18395 1373341 541.03
## - M.F 1 31967 1386913 541.50
## - Wealth 1 37613 1392558 541.69
## - Po2 1 37919 1392865 541.70
## - U1 1 83722 1438668 543.22
## <none> 1354946 544.25
## - Po1 1 144306 1499252 545.16
## - U2 1 181536 1536482 546.31
## - M 1 193770 1548716 546.68
## - Prob 1 199538 1554484 546.86
## - Ed 1 402117 1757063 552.62
## - Ineq 1 423031 1777977 553.17
##
## Step: AIC=540.4
## Crime ~ M + Ed + Po1 + Po2 + LF + M.F + Pop + NW + U1 + U2 +
## Wealth + Ineq + Prob + Time
##
## Df Sum of Sq RSS AIC
## - Time 1 10341 1365315 536.91
## - LF 1 10878 1365852 536.93
## - Pop 1 14127 1369101 537.04
## - NW 1 21626 1376600 537.30
## - M.F 1 32449 1387423 537.66
## - Po2 1 37954 1392929 537.85
## - Wealth 1 39223 1394197 537.89
## - U1 1 96420 1451395 539.78
## <none> 1354974 540.40
## - Po1 1 144302 1499277 541.31
## - U2 1 189859 1544834 542.72
## - M 1 195084 1550059 542.87
## - Prob 1 204463 1559437 543.16
## + So 1 29 1354946 544.25
## - Ed 1 403140 1758114 548.79
## - Ineq 1 488834 1843808 551.03
##
## Step: AIC=536.91
## Crime ~ M + Ed + Po1 + Po2 + LF + M.F + Pop + NW + U1 + U2 +
## Wealth + Ineq + Prob
##
## Df Sum of Sq RSS AIC
## - LF 1 10533 1375848 533.42
## - NW 1 15482 1380797 533.59
## - Pop 1 21846 1387161 533.81
## - Po2 1 28932 1394247 534.04
## - Wealth 1 36070 1401385 534.28
## - M.F 1 41784 1407099 534.48
## - U1 1 91420 1456735 536.11
## <none> 1365315 536.91
## - Po1 1 134137 1499452 537.46
## - U2 1 184143 1549458 539.01
## - M 1 186110 1551425 539.07
## + Time 1 10341 1354974 540.40
## - Prob 1 237493 1602808 540.60
## + So 1 65 1365250 540.76
## - Ed 1 409448 1774763 545.39
## - Ineq 1 502909 1868224 547.80
##
## Step: AIC=533.42
## Crime ~ M + Ed + Po1 + Po2 + M.F + Pop + NW + U1 + U2 + Wealth +
## Ineq + Prob
##
## Df Sum of Sq RSS AIC
## - NW 1 11675 1387523 529.97
## - Po2 1 21418 1397266 530.30
## - Pop 1 27803 1403651 530.51
## - M.F 1 31252 1407100 530.63
## - Wealth 1 35035 1410883 530.75
## - U1 1 80954 1456802 532.26
## <none> 1375848 533.42
## - Po1 1 123896 1499744 533.62
## - U2 1 190746 1566594 535.67
## - M 1 217716 1593564 536.47
## - Prob 1 226971 1602819 536.75
## + LF 1 10533 1365315 536.91
## + Time 1 9996 1365852 536.93
## + So 1 3046 1372802 537.17
## - Ed 1 413254 1789103 541.91
## - Ineq 1 500944 1876792 544.16
##
## Step: AIC=529.97
## Crime ~ M + Ed + Po1 + Po2 + M.F + Pop + U1 + U2 + Wealth + Ineq +
## Prob
##
## Df Sum of Sq RSS AIC
## - Po2 1 16706 1404229 526.68
## - Pop 1 25793 1413315 526.98
## - M.F 1 26785 1414308 527.02
## - Wealth 1 31551 1419073 527.17
## - U1 1 83881 1471404 528.88
## - Po1 1 118348 1505871 529.96
## <none> 1387523 529.97
## - U2 1 201453 1588976 532.49
## - Prob 1 216760 1604282 532.94
## + NW 1 11675 1375848 533.42
## + So 1 7207 1380316 533.57
## + LF 1 6726 1380797 533.59
## + Time 1 4534 1382989 533.66
## - M 1 309214 1696737 535.57
## - Ed 1 402754 1790276 538.10
## - Ineq 1 589736 1977259 542.76
##
## Step: AIC=526.68
## Crime ~ M + Ed + Po1 + M.F + Pop + U1 + U2 + Wealth + Ineq +
## Prob
##
## Df Sum of Sq RSS AIC
## - Pop 1 22345 1426575 523.57
## - Wealth 1 32142 1436371 523.89
## - M.F 1 36808 1441037 524.05
## - U1 1 86373 1490602 525.64
## <none> 1404229 526.68
## - U2 1 205814 1610043 529.26
## - Prob 1 218607 1622836 529.63
## + Po2 1 16706 1387523 529.97
## + NW 1 6963 1397266 530.30
## + So 1 3807 1400422 530.40
## + LF 1 1986 1402243 530.46
## + Time 1 575 1403654 530.51
## - M 1 307001 1711230 532.12
## - Ed 1 389502 1793731 534.34
## - Ineq 1 608627 2012856 539.75
## - Po1 1 1050202 2454432 549.07
##
## Step: AIC=523.57
## Crime ~ M + Ed + Po1 + M.F + U1 + U2 + Wealth + Ineq + Prob
##
## Df Sum of Sq RSS AIC
## - Wealth 1 26493 1453068 520.59
## - M.F 1 84491 1511065 522.43
## - U1 1 99463 1526037 522.89
## <none> 1426575 523.57
## - Prob 1 198571 1625145 525.85
## - U2 1 208880 1635455 526.14
## + Pop 1 22345 1404229 526.68
## + Po2 1 13259 1413315 526.98
## + NW 1 5927 1420648 527.23
## + So 1 5724 1420851 527.23
## + LF 1 5176 1421398 527.25
## + Time 1 3913 1422661 527.29
## - M 1 320926 1747501 529.26
## - Ed 1 386773 1813348 531.00
## - Ineq 1 594779 2021354 536.10
## - Po1 1 1127277 2553852 547.09
##
## Step: AIC=520.59
## Crime ~ M + Ed + Po1 + M.F + U1 + U2 + Ineq + Prob
##
## Df Sum of Sq RSS AIC
## - M.F 1 103159 1556227 519.96
## <none> 1453068 520.59
## - U1 1 127044 1580112 520.68
## + Wealth 1 26493 1426575 523.57
## + Pop 1 16697 1436371 523.89
## + Po2 1 14148 1438919 523.98
## + So 1 9329 1443739 524.13
## - Prob 1 247978 1701046 524.14
## + LF 1 4374 1448694 524.29
## + NW 1 3799 1449269 524.31
## - U2 1 255443 1708511 524.35
## + Time 1 2293 1450775 524.36
## - M 1 296790 1749858 525.47
## - Ed 1 445788 1898855 529.31
## - Ineq 1 738244 2191312 536.04
## - Po1 1 1672038 3125105 552.73
##
## Step: AIC=519.96
## Crime ~ M + Ed + Po1 + U1 + U2 + Ineq + Prob
##
## Df Sum of Sq RSS AIC
## - U1 1 54830 1611057 517.74
## <none> 1556227 519.96
## + M.F 1 103159 1453068 520.59
## - U2 1 194750 1750977 521.65
## + Pop 1 66223 1490004 521.77
## + Wealth 1 45162 1511065 522.43
## - Prob 1 239705 1795931 522.84
## + Po2 1 29979 1526248 522.90
## + Time 1 22501 1533726 523.13
## + LF 1 10865 1545361 523.48
## + So 1 3867 1552360 523.69
## + NW 1 147 1556080 523.81
## - M 1 413318 1969545 527.18
## - Ed 1 815182 2371408 535.91
## - Ineq 1 906629 2462856 537.69
## - Po1 1 1811722 3367949 552.40
##
## Step: AIC=517.74
## Crime ~ M + Ed + Po1 + U2 + Ineq + Prob
##
## Df Sum of Sq RSS AIC
## <none> 1611057 517.74
## - U2 1 192233 1803290 519.18
## + Wealth 1 59910 1551147 519.81
## + U1 1 54830 1556227 519.96
## + Pop 1 51320 1559737 520.07
## - Prob 1 249308 1860365 520.65
## + M.F 1 30945 1580112 520.68
## + Po2 1 25017 1586040 520.85
## + So 1 17958 1593098 521.06
## + LF 1 13179 1597878 521.20
## + Time 1 7159 1603898 521.38
## + NW 1 359 1610698 521.58
## - M 1 400611 2011667 524.32
## - Ed 1 776207 2387264 532.37
## - Ineq 1 949221 2560278 535.66
## - Po1 1 2817067 4428124 561.41
summary(stepwiseBIC)
##
## Call:
## lm(formula = Crime ~ M + Ed + Po1 + U2 + Ineq + Prob, data = uscrime)
##
## Residuals:
## Min 1Q Median 3Q Max
## -470.68 -78.41 -19.68 133.12 556.23
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -5040.50 899.84 -5.602 1.72e-06 ***
## M 105.02 33.30 3.154 0.00305 **
## Ed 196.47 44.75 4.390 8.07e-05 ***
## Po1 115.02 13.75 8.363 2.56e-10 ***
## U2 89.37 40.91 2.185 0.03483 *
## Ineq 67.65 13.94 4.855 1.88e-05 ***
## Prob -3801.84 1528.10 -2.488 0.01711 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 200.7 on 40 degrees of freedom
## Multiple R-squared: 0.7659, Adjusted R-squared: 0.7307
## F-statistic: 21.81 on 6 and 40 DF, p-value: 3.418e-11
Lasso
Scaling the data
uscrime_x <- scale(as.matrix(uscrime[, -ncol(uscrime)]))
uscrime_y <- scale(uscrime$Crime)
lasso <- glmnet(uscrime_x, uscrime_y, alpha = 1)
summary(lasso)
## Length Class Mode
## a0 88 -none- numeric
## beta 1320 dgCMatrix S4
## df 88 -none- numeric
## dim 2 -none- numeric
## lambda 88 -none- numeric
## dev.ratio 88 -none- numeric
## nulldev 1 -none- numeric
## npasses 1 -none- numeric
## jerr 1 -none- numeric
## offset 1 -none- logical
## call 4 -none- call
## nobs 1 -none- numeric
plot(lasso, label = TRUE)

Lasso: cross-validation
set.seed(123)
lasso_cv <- cv.glmnet(uscrime_x, uscrime_y, alpha = 1)
lasso_best_lambda <- lasso_cv$lambda.min
lasso_coefs <- coef(lasso_cv, s = lasso_best_lambda)
lasso_best_lambda
## [1] 0.008583784
lasso_coefs
## 16 x 1 sparse Matrix of class "dgCMatrix"
## s1
## (Intercept) -3.353617e-16
## M 2.700905e-01
## So 3.917902e-02
## Ed 4.527764e-01
## Po1 7.661597e-01
## Po2 .
## LF .
## M.F 1.353429e-01
## Pop -4.898014e-02
## NW 3.702627e-02
## U1 -1.837816e-01
## U2 3.005588e-01
## Wealth 1.389017e-01
## Ineq 6.468310e-01
## Prob -2.306323e-01
## Time .
Elastic net
elasticnet <- glmnet(uscrime_x, uscrime_y, alpha = 0.5)
summary(elasticnet)
## Length Class Mode
## a0 96 -none- numeric
## beta 1440 dgCMatrix S4
## df 96 -none- numeric
## dim 2 -none- numeric
## lambda 96 -none- numeric
## dev.ratio 96 -none- numeric
## nulldev 1 -none- numeric
## npasses 1 -none- numeric
## jerr 1 -none- numeric
## offset 1 -none- logical
## call 4 -none- call
## nobs 1 -none- numeric
plot(elasticnet, label = TRUE)

Elastic net: cross-validation
set.seed(123)
elasticnet_cv <- cv.glmnet(uscrime_x, uscrime_y, alpha = 0.5)
elasticnet_best_lambda <- elasticnet_cv$lambda.min
elasticnet_coefs <- coef(elasticnet_cv, s = elasticnet_best_lambda)
elasticnet_best_lambda
## [1] 0.01183294
elasticnet_coefs
## 16 x 1 sparse Matrix of class "dgCMatrix"
## s1
## (Intercept) -3.384054e-16
## M 2.677695e-01
## So 4.370161e-02
## Ed 4.518333e-01
## Po1 7.464564e-01
## Po2 .
## LF .
## M.F 1.434832e-01
## Pop -4.966532e-02
## NW 4.920026e-02
## U1 -1.958952e-01
## U2 3.109392e-01
## Wealth 1.523322e-01
## Ineq 6.370044e-01
## Prob -2.356296e-01
## Time .
Different alphas
list.of.fits <- list()
for (i in 0:10) {
fit.name <- paste0("alpha", i/10)
list.of.fits[[fit.name]] <-
cv.glmnet(uscrime_x, uscrime_y, alpha=i/10)
}
## predicting the values in the Testing dataset.
results <- data.frame()
for (i in 0:10) {
fit.name <- paste0("alpha", i/10)
predicted <-
predict(list.of.fits[[fit.name]],
s=list.of.fits[[fit.name]]$lambda.1se, newx=uscrime_x)
mse <- mean((uscrime_y - predicted)^2)
rmse <- sqrt(mean((uscrime_y - predicted)^2))
# Calculate R-squared for Lasso model
ss_total <- sum((uscrime_y - mean(uscrime_y))^2)
ss_residual <- sum((uscrime_y - predicted)^2)
r_squared <- 1 - (ss_residual / ss_total)
## Store the results
temp <- data.frame(alpha=i/10, r_squared=r_squared, fit.name=fit.name)
results <- rbind(results, temp)
}
results
## alpha r_squared fit.name
## 1 0.0 0.5500243 alpha0
## 2 0.1 0.6080307 alpha0.1
## 3 0.2 0.6750612 alpha0.2
## 4 0.3 0.6079146 alpha0.3
## 5 0.4 0.6427385 alpha0.4
## 6 0.5 0.5847286 alpha0.5
## 7 0.6 0.5512065 alpha0.6
## 8 0.7 0.6678918 alpha0.7
## 9 0.8 0.6103203 alpha0.8
## 10 0.9 0.6322980 alpha0.9
## 11 1.0 0.6223538 alpha1
plot(results$r_squared~results$alpha)
