library(MASS)
library(caret)
library(glmnet)
library(ggplot2)
library(leaps)

Question 11.1

Using the crime data set uscrime.txt from Questions 8.2, 9.1, and 10.1, build a regression model using:

1. Stepwise regression

2. Lasso

3. Elastic net

For Parts 2 and 3, remember to scale the data first – otherwise, the regression coefficients will be on different scales and the constraint won’t have the desired effect.

For Parts 2 and 3, use the glmnet function in R.

setwd("C:/Users/tarra/Documents/Intro to Analytics Modeling/Fall2020hw8/data 11.1")

uscrime_data <- read.table("uscrime.txt", header = TRUE)

# Let's start by making just a flat out model without preforming anything. 
uscrime_model <- lm(Crime~., data = uscrime_data)
summary(uscrime_model)
## 
## Call:
## lm(formula = Crime ~ ., data = uscrime_data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -395.74  -98.09   -6.69  112.99  512.67 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -5.984e+03  1.628e+03  -3.675 0.000893 ***
## M            8.783e+01  4.171e+01   2.106 0.043443 *  
## So          -3.803e+00  1.488e+02  -0.026 0.979765    
## Ed           1.883e+02  6.209e+01   3.033 0.004861 ** 
## Po1          1.928e+02  1.061e+02   1.817 0.078892 .  
## Po2         -1.094e+02  1.175e+02  -0.931 0.358830    
## LF          -6.638e+02  1.470e+03  -0.452 0.654654    
## M.F          1.741e+01  2.035e+01   0.855 0.398995    
## Pop         -7.330e-01  1.290e+00  -0.568 0.573845    
## NW           4.204e+00  6.481e+00   0.649 0.521279    
## U1          -5.827e+03  4.210e+03  -1.384 0.176238    
## U2           1.678e+02  8.234e+01   2.038 0.050161 .  
## Wealth       9.617e-02  1.037e-01   0.928 0.360754    
## Ineq         7.067e+01  2.272e+01   3.111 0.003983 ** 
## Prob        -4.855e+03  2.272e+03  -2.137 0.040627 *  
## Time        -3.479e+00  7.165e+00  -0.486 0.630708    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 209.1 on 31 degrees of freedom
## Multiple R-squared:  0.8031, Adjusted R-squared:  0.7078 
## F-statistic: 8.429 on 15 and 31 DF,  p-value: 3.539e-07
# On its own the r squares and adjusted r squared is 0.8031 and 0.7078 respectively.

# Let's see if we can make this better using stepwise regression. 
stepwise_model <- train(Crime ~., data = uscrime_data,
                    method = "lmStepAIC", 
                    trControl = trainControl(),
                    trace = FALSE)

# Model accuracy
stepwise_model$results
##   parameter     RMSE  Rsquared      MAE   RMSESD RsquaredSD    MAESD
## 1      none 331.8706 0.4574266 256.7255 68.56171  0.1911635 57.23648
# Final model coefficients
stepwise_model$finalModel
## 
## Call:
## lm(formula = .outcome ~ M + Ed + Po1 + M.F + U1 + U2 + Ineq + 
##     Prob, data = dat)
## 
## Coefficients:
## (Intercept)            M           Ed          Po1          M.F           U1  
##    -6426.10        93.32       180.12       102.65        22.34     -6086.63  
##          U2         Ineq         Prob  
##      187.35        61.33     -3796.03
# Summary of the model
summary(stepwise_model$finalModel)
## 
## Call:
## lm(formula = .outcome ~ M + Ed + Po1 + M.F + U1 + U2 + Ineq + 
##     Prob, data = dat)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -444.70 -111.07    3.03  122.15  483.30 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -6426.10    1194.61  -5.379 4.04e-06 ***
## M              93.32      33.50   2.786  0.00828 ** 
## Ed            180.12      52.75   3.414  0.00153 ** 
## Po1           102.65      15.52   6.613 8.26e-08 ***
## M.F            22.34      13.60   1.642  0.10874    
## U1          -6086.63    3339.27  -1.823  0.07622 .  
## U2            187.35      72.48   2.585  0.01371 *  
## Ineq           61.33      13.96   4.394 8.63e-05 ***
## Prob        -3796.03    1490.65  -2.547  0.01505 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 195.5 on 38 degrees of freedom
## Multiple R-squared:  0.7888, Adjusted R-squared:  0.7444 
## F-statistic: 17.74 on 8 and 38 DF,  p-value: 1.159e-10
# With our new model after performing stepwise regression we get a slightly lower r squared value of 0.7888, but a slightly higher adjusted r squared value of 0.7444. 

# Let's see what we can do with Lasso now. 

# Scale the data; does not scaled col #2 as it is binary and col #16 as it is
uscrime_scale <- cbind(as.data.frame(scale(uscrime_data[,1])), 
                      as.data.frame(uscrime_data[,2]), 
                      as.data.frame(scale(uscrime_data[,c(3,4,5,6,7,8,9,10,11,12,13,14,15)])),
                      as.data.frame(uscrime_data[,16]))
 
# Fix the column names
colnames(uscrime_scale) <- colnames(uscrime_data)
# Display scaled data
head(uscrime_scale)
##            M So         Ed        Po1        Po2         LF         M.F
## 1  0.9886930  1 -1.3085099 -0.9085105 -0.8666988 -1.2667456 -1.12060499
## 2  0.3521372  0  0.6580587  0.6056737  0.5280852  0.5396568  0.98341752
## 3  0.2725678  1 -1.4872888 -1.3459415 -1.2958632 -0.6976051 -0.47582390
## 4 -0.2048491  0  1.3731746  2.1535064  2.1732150  0.3911854  0.37257228
## 5  0.1929983  0  1.3731746  0.8075649  0.7426673  0.7376187  0.06714965
## 6 -1.3983912  0  0.3898903  1.1104017  1.2433590 -0.3511718 -0.64550313
##           Pop           NW          U1         U2     Wealth       Ineq
## 1 -0.09500679  1.943738564  0.69510600  0.8313680 -1.3616094  1.6793638
## 2 -0.62033844  0.008483424  0.02950365  0.2393332  0.3276683  0.0000000
## 3 -0.48900552  1.146296747 -0.08143007 -0.1158877 -2.1492481  1.4036474
## 4  3.16204944 -0.205464381  0.36230482  0.5945541  1.5298536 -0.6767585
## 5 -0.48900552 -0.691709391 -0.24783066 -1.6551781  0.5453053 -0.5013026
## 6 -0.30513945 -0.555560788 -0.63609870 -0.5895155  1.6956723 -1.7044289
##         Prob        Time Crime
## 1  1.6497631 -0.05599367   791
## 2 -0.7693365 -0.18315796  1635
## 3  1.5969416 -0.32416470   578
## 4 -1.3761895  0.46611085  1969
## 5 -0.2503580 -0.74759413  1234
## 6 -0.5669349 -0.78996812   682
# Run lasso 
uscrime_lasso <- cv.glmnet(x=as.matrix(uscrime_scale[,-16]), y=as.matrix(uscrime_scale$Crime), 
                      alpha=1, nfolds = 5, type.measure="mse", family="gaussian")

# Find the lamba with the smallest cvm
x <- uscrime_lasso$cvm
which(x == min(x))
## [1] 35
# The lamba for the smallest cvm
uscrime_lasso$lambda[which(x == min(x))]
## [1] 11.12694
# or
uscrime_lasso$lambda.min
## [1] 11.12694
coefficients(uscrime_lasso, s=uscrime_lasso$lambda.min)
## 16 x 1 sparse Matrix of class "dgCMatrix"
##                      1
## (Intercept) 889.057299
## M            85.003217
## So           47.081683
## Ed          124.290064
## Po1         308.889820
## Po2           .       
## LF            1.116734
## M.F          52.037497
## Pop           .       
## NW            4.443519
## U1          -22.257315
## U2           55.981384
## Wealth        .       
## Ineq        180.588656
## Prob        -81.812835
## Time          .
# Now lets make a model with these coefficients 
uscrime_lasso_lm <- lm(Crime ~M+So+Ed+Po1+M.F+Pop+NW+U1+U2+Wealth+Ineq+Prob, 
                    data = uscrime_scale)
summary(uscrime_lasso_lm)
## 
## Call:
## lm(formula = Crime ~ M + So + Ed + Po1 + M.F + Pop + NW + U1 + 
##     U2 + Wealth + Ineq + Prob, data = uscrime_scale)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -434.18 -107.01   18.55  115.88  470.32 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   897.29      51.91  17.286  < 2e-16 ***
## M             112.71      49.35   2.284  0.02876 *  
## So             22.89     125.35   0.183  0.85621    
## Ed            195.70      62.94   3.109  0.00378 ** 
## Po1           293.18      64.99   4.511 7.32e-05 ***
## M.F            48.92      48.12   1.017  0.31656    
## Pop           -33.25      45.63  -0.729  0.47113    
## NW             19.16      57.71   0.332  0.74195    
## U1            -89.76      65.68  -1.367  0.18069    
## U2            140.78      66.77   2.108  0.04245 *  
## Wealth         83.30      95.53   0.872  0.38932    
## Ineq          285.77      85.19   3.355  0.00196 ** 
## Prob          -92.75      41.12  -2.255  0.03065 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 202.6 on 34 degrees of freedom
## Multiple R-squared:  0.7971, Adjusted R-squared:  0.7255 
## F-statistic: 11.13 on 12 and 34 DF,  p-value: 1.52e-08
# Let's remove factor which p>0.05 and show the new model. 
uscrime_lasso_lm2 <- lm(Crime ~M+ Ed+ Po1+ U2+ Ineq+Prob, data = uscrime_scale)
summary(uscrime_lasso_lm2)
## 
## Call:
## lm(formula = Crime ~ M + Ed + Po1 + U2 + Ineq + Prob, data = uscrime_scale)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -470.68  -78.41  -19.68  133.12  556.23 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   905.09      29.27  30.918  < 2e-16 ***
## M             131.98      41.85   3.154  0.00305 ** 
## Ed            219.79      50.07   4.390 8.07e-05 ***
## Po1           341.84      40.87   8.363 2.56e-10 ***
## U2             75.47      34.55   2.185  0.03483 *  
## Ineq          269.91      55.60   4.855 1.88e-05 ***
## Prob          -86.44      34.74  -2.488  0.01711 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 200.7 on 40 degrees of freedom
## Multiple R-squared:  0.7659, Adjusted R-squared:  0.7307 
## F-statistic: 21.81 on 6 and 40 DF,  p-value: 3.418e-11
# Let's move on to the final model, elastic net. 

r2=c()
for (i in 0:10) {
  mod_uscrime_elastic <- cv.glmnet(x=as.matrix(uscrime_scale[,-16]),y=as.matrix(uscrime_scale$Crime),
                            alpha=i/10, nfolds = 5, type.measure="mse",
                            family="gaussian")
  #dev.ratio is the percentage of deviance explained
  #min index for the dev.ratio of the model
  r2 = cbind(r2, mod_uscrime_elastic$glmnet.fit$dev.ratio[which(mod_uscrime_elastic$glmnet.fit$lambda == mod_uscrime_elastic$lambda.min)])
}

#get the best alpha
alpha <- (which.max(r2)-1)/10


#get model with alphas = 0.05
uscrime_elastic <- cv.glmnet(x=as.matrix(uscrime_scale[,-16]), 
                             y=as.matrix(uscrime_scale$Crime), alpha=0.05, 
                             nfolds = 5, type.measure="mse", family="gaussian")

uscrime_elastic_lm = lm(Crime ~M+So+Ed+Po1+Po2+LF+M.F+NW+U1+U2+Wealth+Ineq+Prob
+Time, data = uscrime_scale)
summary(uscrime_elastic_lm)
## 
## Call:
## lm(formula = Crime ~ M + So + Ed + Po1 + Po2 + LF + M.F + NW + 
##     U1 + U2 + Wealth + Ineq + Prob + Time, data = uscrime_scale)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -380.91 -101.89  -14.77  110.87  505.40 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  906.483     58.484  15.500  < 2e-16 ***
## M            112.837     51.691   2.183  0.03649 *  
## So            -4.105    147.172  -0.028  0.97792    
## Ed           211.246     68.713   3.074  0.00429 ** 
## Po1          563.337    311.541   1.808  0.07998 .  
## Po2         -313.824    324.701  -0.966  0.34104    
## LF           -31.702     58.147  -0.545  0.58939    
## M.F           64.479     54.722   1.178  0.24737    
## NW            44.572     65.892   0.676  0.50362    
## U1          -112.728     73.902  -1.525  0.13699    
## U2           143.186     68.749   2.083  0.04535 *  
## Wealth        87.836     98.588   0.891  0.37961    
## Ineq         269.086     86.824   3.099  0.00403 ** 
## Prob        -110.457     51.117  -2.161  0.03830 *  
## Time         -31.582     48.772  -0.648  0.52189    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 206.8 on 32 degrees of freedom
## Multiple R-squared:  0.801,  Adjusted R-squared:  0.714 
## F-statistic: 9.202 on 14 and 32 DF,  p-value: 1.301e-07
uscrime_elastic_lm2 = lm(Crime ~M+Ed+U2+Ineq+Prob, data = uscrime_scale)
summary(uscrime_elastic_lm2)
## 
## Call:
## lm(formula = Crime ~ M + Ed + U2 + Ineq + Prob, data = uscrime_scale)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -478.8 -233.6  -46.5  143.2  797.1 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   905.09      47.94  18.881  < 2e-16 ***
## M             107.25      68.36   1.569  0.12437    
## Ed            240.18      81.89   2.933  0.00547 ** 
## U2            135.14      55.35   2.441  0.01903 *  
## Ineq          117.68      86.03   1.368  0.17880    
## Prob         -156.82      55.20  -2.841  0.00697 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 328.6 on 41 degrees of freedom
## Multiple R-squared:  0.3565, Adjusted R-squared:  0.278 
## F-statistic: 4.542 on 5 and 41 DF,  p-value: 0.002186