Loading the data:
cars = read.csv("mycars.csv", header=T, na.strings="?")
#fix(cars)
dim(cars)
## [1] 29466 17
names(cars)
## [1] "X" "trim" "subTrim" "condition"
## [5] "isOneOwner" "mileage" "year" "color"
## [9] "displacement" "fuel" "state" "region"
## [13] "soundSystem" "wheelType" "wheelSize" "featureCount"
## [17] "price"
attach(cars)
summary(cars)
## X trim subTrim condition isOneOwner
## Min. : 2 550 :21836 Hybrid: 190 CPO : 3586 f:25340
## 1st Qu.:13231 430 : 2071 unsp :29276 New :10317 t: 4126
## Median :26254 500 : 2002 Used:15563
## Mean :26269 63 AMG : 1413
## 3rd Qu.:39293 600 : 527
## Max. :52572 350 : 416
## (Other): 1201
## mileage year color displacement
## Min. : 1 Min. :1988 Black :12838 4.6 :13599
## 1st Qu.: 14 1st Qu.:2007 Silver : 6095 5.5 : 9154
## Median : 26120 Median :2012 White : 4418 4.3 : 2071
## Mean : 40387 Mean :2010 Gray : 2007 5 : 2002
## 3rd Qu.: 68234 3rd Qu.:2015 Blue : 1599 6 : 403
## Max. :488525 Max. :2015 unsp : 1467 6.3 : 391
## (Other): 1042 (Other): 1846
## fuel state region soundSystem
## Diesel : 312 CA : 5262 SoA :7805 Alpine : 2
## Gasoline:28628 FL : 3559 Pac :5844 Bang Olufsen : 177
## Hybrid : 189 NY : 2754 Mid :5824 Bose : 943
## unsp : 337 TX : 2458 WSC :2865 Boston Acoustic: 1
## NJ : 2266 ENC :2496 Harman Kardon : 4120
## GA : 1408 New :1421 Premium : 9694
## (Other):11759 (Other):3211 unsp :14529
## wheelType wheelSize featureCount price
## Alloy :14565 unsp :25293 Min. : 0.00 Min. : 599
## Chrome : 80 18 : 1774 1st Qu.: 18.00 1st Qu.: 28995
## Premium: 424 19 : 1297 Median : 53.00 Median : 56991
## Steel : 49 20 : 813 Mean : 46.48 Mean : 67001
## unsp :14348 17 : 149 3rd Qu.: 70.00 3rd Qu.:108815
## 16 : 107 Max. :132.00 Max. :299000
## (Other): 33
Continuous: X, Price, Feature Count, Mileage, Year Factors: Color, Trim, Wheel Size, Displacement, SubTrim, Fuel, Condition, State, Region, isOneOwner, Sound System, Wheel Type
cor(X, price) = 0.029 -> indicative of little to no linear relationship. X is representative of each car's serial number, which is meant to serve as a unique identifier for each car. Is this variable just producing noise?
fit = lm(price~.,data=cars)
summary(fit)
##
## Call:
## lm(formula = price ~ ., data = cars)
##
## Residuals:
## Min 1Q Median 3Q Max
## -66157 -5077 -896 3490 272037
##
## Coefficients: (14 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -8.250e+06 9.878e+04 -83.520 < 2e-16 ***
## X -5.885e-03 4.125e-03 -1.427 0.15371
## trim350 2.721e+04 1.347e+04 2.021 0.04331 *
## trim400 -7.863e+03 1.786e+04 -0.440 0.65983
## trim420 1.474e+04 1.076e+04 1.370 0.17061
## trim430 -8.671e+03 1.074e+04 -0.808 0.41926
## trim450 -3.515e+04 1.610e+04 -2.183 0.02903 *
## trim500 -5.991e+03 1.073e+04 -0.558 0.57678
## trim55 AMG -6.929e+03 1.075e+04 -0.644 0.51936
## trim550 -4.689e+03 1.075e+04 -0.436 0.66283
## trim600 5.058e+03 1.076e+04 0.470 0.63837
## trim63 AMG 4.508e+04 1.077e+04 4.184 2.87e-05 ***
## trim65 AMG 6.245e+03 1.081e+04 0.578 0.56348
## trimunsp 2.801e+04 1.079e+04 2.595 0.00945 **
## subTrimunsp NA NA NA NA
## conditionNew 3.517e+04 2.644e+02 133.014 < 2e-16 ***
## conditionUsed -4.305e+03 2.309e+02 -18.641 < 2e-16 ***
## isOneOwnert -4.183e+02 1.983e+02 -2.110 0.03487 *
## mileage -1.319e-01 2.915e-03 -45.254 < 2e-16 ***
## year 4.140e+03 4.883e+01 84.794 < 2e-16 ***
## colorBlack -3.339e+02 7.477e+02 -0.447 0.65522
## colorBlue -5.846e+02 7.882e+02 -0.742 0.45825
## colorBronze 4.069e+03 3.857e+03 1.055 0.29139
## colorBrown -4.748e+01 1.524e+03 -0.031 0.97515
## colorGold 1.085e+03 1.007e+03 1.078 0.28099
## colorGray -1.314e+03 7.791e+02 -1.686 0.09175 .
## colorGreen 1.671e+02 1.101e+03 0.152 0.87930
## colorPurple 5.338e+03 3.864e+03 1.381 0.16718
## colorRed -9.732e+01 9.521e+02 -0.102 0.91858
## colorSilver -1.026e+03 7.525e+02 -1.363 0.17281
## colorTurquoise -2.427e+03 4.845e+03 -0.501 0.61648
## colorunsp 1.132e+02 7.932e+02 0.143 0.88655
## colorWhite 1.093e+03 7.596e+02 1.438 0.15036
## colorYellow -6.817e+03 7.597e+03 -0.897 0.36960
## displacement3 -4.112e+04 8.063e+03 -5.100 3.41e-07 ***
## displacement3.2 1.325e+04 1.075e+04 1.232 0.21807
## displacement3.5 -2.880e+03 9.446e+03 -0.305 0.76043
## displacement3.7 -4.729e+04 8.305e+03 -5.694 1.25e-08 ***
## displacement4.2 NA NA NA NA
## displacement4.3 NA NA NA NA
## displacement4.6 -4.678e+03 1.184e+03 -3.950 7.84e-05 ***
## displacement4.7 1.720e+04 1.224e+03 14.046 < 2e-16 ***
## displacement5 NA NA NA NA
## displacement5.4 NA NA NA NA
## displacement5.5 -8.743e+03 1.197e+03 -7.304 2.87e-13 ***
## displacement5.8 -8.922e+03 1.753e+03 -5.090 3.60e-07 ***
## displacement6 3.750e+04 1.438e+03 26.070 < 2e-16 ***
## displacement6.3 -4.729e+04 1.404e+03 -33.693 < 2e-16 ***
## displacement8 1.755e+04 1.074e+04 1.635 0.10216
## fuelGasoline -4.842e+02 5.151e+03 -0.094 0.92510
## fuelHybrid -8.495e+03 1.190e+04 -0.714 0.47520
## fuelunsp 1.224e+04 5.103e+03 2.399 0.01646 *
## stateAL 5.922e+03 7.585e+03 0.781 0.43500
## stateAR 6.749e+03 7.674e+03 0.879 0.37915
## stateAZ 7.198e+03 7.582e+03 0.949 0.34246
## stateCA 6.598e+03 7.565e+03 0.872 0.38313
## stateCO 6.992e+03 7.581e+03 0.922 0.35638
## stateCT 6.090e+03 7.584e+03 0.803 0.42200
## stateDC -8.371e+03 9.760e+03 -0.858 0.39109
## stateDE 7.107e+03 7.641e+03 0.930 0.35233
## stateFL 6.003e+03 7.565e+03 0.794 0.42748
## stateGA 5.832e+03 7.569e+03 0.771 0.44096
## stateHI 5.975e+03 7.635e+03 0.783 0.43386
## stateIA 8.094e+03 7.760e+03 1.043 0.29695
## stateID 1.050e+04 7.885e+03 1.331 0.18310
## stateIL 6.845e+03 7.569e+03 0.904 0.36583
## stateIN 5.639e+03 7.602e+03 0.742 0.45827
## stateKS 6.874e+03 7.652e+03 0.898 0.36904
## stateKY 8.806e+03 7.600e+03 1.159 0.24662
## stateLA 7.831e+03 7.613e+03 1.029 0.30364
## stateMA 6.399e+03 7.572e+03 0.845 0.39809
## stateMD 6.925e+03 7.575e+03 0.914 0.36060
## stateME 4.517e+03 7.872e+03 0.574 0.56608
## stateMI 6.147e+03 7.597e+03 0.809 0.41843
## stateMN 7.987e+03 7.596e+03 1.052 0.29303
## stateMO 8.143e+03 7.584e+03 1.074 0.28293
## stateMS 8.524e+03 7.621e+03 1.118 0.26340
## stateMT 9.339e+03 8.224e+03 1.136 0.25615
## stateNC 7.327e+03 7.572e+03 0.968 0.33324
## stateND 1.072e+04 8.951e+03 1.198 0.23098
## stateNE 7.991e+03 7.936e+03 1.007 0.31401
## stateNH 7.832e+03 7.616e+03 1.028 0.30376
## stateNJ 6.155e+03 7.567e+03 0.813 0.41599
## stateNM 6.980e+03 7.784e+03 0.897 0.36985
## stateNV 8.641e+03 7.583e+03 1.139 0.25451
## stateNY 5.379e+03 7.566e+03 0.711 0.47714
## stateOH 5.942e+03 7.575e+03 0.784 0.43283
## stateOK 7.082e+03 7.604e+03 0.931 0.35170
## stateON 8.128e+03 9.269e+03 0.877 0.38055
## stateOR 8.025e+03 7.611e+03 1.054 0.29168
## statePA 6.509e+03 7.573e+03 0.859 0.39009
## stateRI 5.343e+03 7.705e+03 0.693 0.48802
## stateSC 7.038e+03 7.593e+03 0.927 0.35400
## stateSD 2.494e+04 1.070e+04 2.332 0.01972 *
## stateTN 5.435e+03 7.581e+03 0.717 0.47347
## stateTX 7.599e+03 7.566e+03 1.004 0.31525
## stateunsp -2.384e+04 1.072e+04 -2.224 0.02614 *
## stateUT 8.662e+03 7.621e+03 1.137 0.25573
## stateVA 6.432e+03 7.571e+03 0.850 0.39557
## stateWA 8.023e+03 7.587e+03 1.057 0.29030
## stateWI 7.041e+03 7.613e+03 0.925 0.35505
## stateWV 8.344e+03 7.756e+03 1.076 0.28203
## stateWY 1.040e+03 1.069e+04 0.097 0.92251
## regionESC NA NA NA NA
## regionMid NA NA NA NA
## regionMtn NA NA NA NA
## regionNew NA NA NA NA
## regionPac NA NA NA NA
## regionSoA NA NA NA NA
## regionunsp NA NA NA NA
## regionWNC NA NA NA NA
## regionWSC NA NA NA NA
## soundSystemBang Olufsen -1.983e+03 7.615e+03 -0.260 0.79458
## soundSystemBose -6.271e+03 7.579e+03 -0.827 0.40801
## soundSystemBoston Acoustic -5.248e+03 1.314e+04 -0.400 0.68950
## soundSystemHarman Kardon -6.347e+03 7.572e+03 -0.838 0.40196
## soundSystemPremium -4.379e+03 7.571e+03 -0.578 0.56303
## soundSystemunsp -4.080e+03 7.570e+03 -0.539 0.58996
## wheelTypeChrome 2.928e+02 1.206e+03 0.243 0.80818
## wheelTypePremium 1.466e+02 5.406e+02 0.271 0.78626
## wheelTypeSteel 1.691e+04 1.548e+03 10.924 < 2e-16 ***
## wheelTypeunsp 4.881e+02 1.565e+02 3.118 0.00182 **
## wheelSize17 -1.023e+04 1.374e+03 -7.443 1.01e-13 ***
## wheelSize18 -6.942e+03 1.118e+03 -6.209 5.41e-10 ***
## wheelSize19 -6.550e+03 1.130e+03 -5.794 6.95e-09 ***
## wheelSize20 -1.604e+03 1.156e+03 -1.387 0.16541
## wheelSize21 -5.960e+03 7.643e+03 -0.780 0.43554
## wheelSize22 -1.159e+03 2.208e+03 -0.525 0.59967
## wheelSizeunsp -6.232e+03 1.085e+03 -5.745 9.27e-09 ***
## featureCount -1.452e+00 2.934e+00 -0.495 0.62068
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 10690 on 29350 degrees of freedom
## Multiple R-squared: 0.9434, Adjusted R-squared: 0.9432
## F-statistic: 4255 on 115 and 29350 DF, p-value: < 2.2e-16
Most statistically significant IVs: Condition new/used, mileage, year, displacement, wheel type, wheel size
Adjusted R Squared: 0.9432
p-value: 2.2x10-16
Intuition: In danger of overfitting here. High adjusted r squared and low p-value overall. However, the model is most likely “chasing” irrelevant patterns in the data to achieve these results (high variance). Thus, the model is too complex. We will benefit from trying Ridge Regression, where variance will be reduced through a constraint placed on the size of our coefficients.
library(MASS)
library(glmnet)
## Loading required package: Matrix
## Loaded glmnet 1.9-8
x=model.matrix(price~.,cars)[,-1]
y=cars$price
grid=10^seq(10,-2,length=100)
ridge.mod=(glmnet(x,y,alpha=0,lambda=grid))
#inspect matrix of ridge regression coefficients ~ 130 rows, 100 columns
dim(coef(ridge.mod))
## [1] 130 100
#predict w/ a given value of lambda, inspect effect on coefficients
#Just an arbitrary example until we find optimal value of lambda
#ridge.mod$lambda[50] = lambda value at column 50 -> 11497.57
coef(ridge.mod)[,50]
## (Intercept) X
## -4.187464e+06 2.171904e-03
## trim350 trim400
## -5.697381e+03 -4.327408e+03
## trim420 trim430
## -1.622298e+03 -6.811058e+03
## trim450 trim500
## -2.430446e+04 -6.156337e+03
## trim55 AMG trim550
## -5.915860e+03 -4.506929e+03
## trim600 trim63 AMG
## 2.464800e+03 3.585338e+04
## trim65 AMG trimunsp
## 1.525361e+04 2.700037e+04
## subTrimunsp conditionNew
## 4.391017e+03 2.571685e+04
## conditionUsed isOneOwnert
## -1.138583e+04 -2.838472e+03
## mileage year
## -1.616421e-01 2.119020e+03
## colorBlack colorBlue
## 1.302219e+02 6.392502e+01
## colorBronze colorBrown
## 1.027561e+03 -1.112704e+03
## colorGold colorGray
## -7.765971e+02 -2.005419e+03
## colorGreen colorPurple
## 5.095113e+02 6.001340e+02
## colorRed colorSilver
## -4.812084e+02 -8.924312e+02
## colorTurquoise colorunsp
## -3.164189e+03 1.562057e+03
## colorWhite colorYellow
## 1.495393e+03 -8.788087e+03
## displacement3 displacement3.2
## -4.429357e+03 -3.601911e+03
## displacement3.5 displacement3.7
## -3.710781e+03 -1.271847e+04
## displacement4.2 displacement4.3
## -1.666608e+03 -6.883188e+03
## displacement4.6 displacement4.7
## 5.596517e+03 2.317732e+04
## displacement5 displacement5.4
## -6.209019e+03 -5.971740e+03
## displacement5.5 displacement5.8
## -3.439469e+03 -1.344496e+04
## displacement6 displacement6.3
## 2.873764e+04 -2.819547e+04
## displacement8 fuelGasoline
## 1.908290e+04 -4.683082e+03
## fuelHybrid fuelunsp
## -4.386904e+03 1.658524e+04
## stateAL stateAR
## -4.924960e+02 -9.658272e+02
## stateAZ stateCA
## -7.290783e+02 4.085062e+02
## stateCO stateCT
## 4.305919e+02 -1.506865e+03
## stateDC stateDE
## -1.542959e+04 1.781338e+03
## stateFL stateGA
## 9.427893e+01 -8.641308e+02
## stateHI stateIA
## 9.879481e+02 -2.597674e+02
## stateID stateIL
## 2.363679e+03 -1.147684e+02
## stateIN stateKS
## -1.880051e+03 -1.156904e+03
## stateKY stateLA
## 2.477693e+03 -5.770500e+02
## stateMA stateMD
## 6.339630e+02 -1.997155e+02
## stateME stateMI
## -3.515342e+03 -2.361939e+03
## stateMN stateMO
## 8.856934e+02 7.130271e+02
## stateMS stateMT
## 8.839222e+02 1.695196e+03
## stateNC stateND
## -4.892656e+02 7.598338e+02
## stateNE stateNH
## -8.261946e+02 1.059142e+03
## stateNJ stateNM
## 1.690258e+02 -1.172560e+03
## stateNV stateNY
## 1.034957e+03 2.052824e+02
## stateOH stateOK
## -1.411750e+03 5.874249e+02
## stateON stateOR
## 2.369046e+03 1.242062e+03
## statePA stateRI
## -1.293822e+02 -3.309127e+03
## stateSC stateSD
## -2.875836e+02 6.044659e+03
## stateTN stateTX
## -1.397012e+03 4.219907e+02
## stateunsp stateUT
## -2.084179e+04 1.983624e+03
## stateVA stateWA
## -1.512805e+03 2.856755e+01
## stateWI stateWV
## 1.166242e+03 3.622208e+03
## stateWY regionESC
## -7.447666e+03 -1.325403e+02
## regionMid regionMtn
## 1.595297e+02 4.222999e+02
## regionNew regionPac
## -8.285983e+01 4.250071e+02
## regionSoA regionunsp
## -4.487706e+02 -4.241926e+03
## regionWNC regionWSC
## 4.847331e+02 3.519714e+02
## soundSystemBang Olufsen soundSystemBose
## 4.936342e+03 -2.418754e+03
## soundSystemBoston Acoustic soundSystemHarman Kardon
## -1.884051e+03 -3.368728e+03
## soundSystemPremium soundSystemunsp
## 1.174354e+03 7.692923e+02
## wheelTypeChrome wheelTypePremium
## -1.864624e+03 -3.974058e+02
## wheelTypeSteel wheelTypeunsp
## 1.106633e+04 1.761156e+03
## wheelSize17 wheelSize18
## -3.409977e+03 -3.460134e+01
## wheelSize19 wheelSize20
## 1.307111e+03 8.314959e+03
## wheelSize21 wheelSize22
## -5.985421e+03 1.620568e+02
## wheelSizeunsp featureCount
## -2.165516e+03 7.405696e+00
set.seed(1)
train=sample(1:nrow(x), nrow(x)/2)
test=(-train)
y.test=y[test]
cv.out=cv.glmnet(x[train ,],y[train],alpha=0)
plot(cv.out)
#optimal value of lambda
bestlam=cv.out$lambda.min
bestlam
## [1] 3817.154
ridge.pred=predict(ridge.mod,s=bestlam ,newx=x[test,])
MSE = mean((ridge.pred-y.test)^2)
sqrt(MSE)
## [1] 11052.44
out=glmnet(x,y,alpha=0)
predict(out,type="coefficients",s=bestlam)
## 130 x 1 sparse Matrix of class "dgCMatrix"
## 1
## (Intercept) -5.448606e+06
## X -1.829036e-03
## trim350 -5.154806e+03
## trim400 -5.353709e+03
## trim420 1.372899e+03
## trim430 -5.962492e+03
## trim450 -3.333708e+04
## trim500 -5.058562e+03
## trim55 AMG -5.058531e+03
## trim550 -6.433357e+03
## trim600 2.193250e+03
## trim63 AMG 4.099871e+04
## trim65 AMG 1.120004e+04
## trimunsp 2.713648e+04
## subTrimunsp 5.298128e+03
## conditionNew 3.100721e+04
## conditionUsed -8.610253e+03
## isOneOwnert -1.632963e+03
## mileage -1.601431e-01
## year 2.744727e+03
## colorBlack 1.866919e+01
## colorBlue -1.690011e+02
## colorBronze 2.875839e+03
## colorBrown -4.175555e+02
## colorGold 4.668352e+02
## colorGray -1.407169e+03
## colorGreen 4.189047e+02
## colorPurple 2.166144e+03
## colorRed -2.466370e+02
## colorSilver -8.410106e+02
## colorTurquoise -2.191760e+03
## colorunsp 9.537939e+02
## colorWhite 1.458109e+03
## colorYellow -7.853229e+03
## displacement3 -6.567223e+03
## displacement3.2 2.175513e+03
## displacement3.5 -1.734857e+03
## displacement3.7 -1.395098e+04
## displacement4.2 1.732428e+03
## displacement4.3 -5.757234e+03
## displacement4.6 4.209421e+03
## displacement4.7 2.425191e+04
## displacement5 -4.905049e+03
## displacement5.4 -4.942249e+03
## displacement5.5 -3.347471e+03
## displacement5.8 -1.145752e+04
## displacement6 3.478364e+04
## displacement6.3 -3.748274e+04
## displacement8 2.067077e+04
## fuelGasoline -4.049894e+03
## fuelHybrid -5.602763e+03
## fuelunsp 1.489956e+04
## stateAL -4.128948e+02
## stateAR -6.646589e+02
## stateAZ -3.300249e+02
## stateCA 1.456816e+02
## stateCO 1.255923e+02
## stateCT -9.585379e+02
## stateDC -1.588076e+04
## stateDE 1.144039e+03
## stateFL -1.703334e+02
## stateGA -5.725679e+02
## stateHI -5.311680e+01
## stateIA 1.880658e+02
## stateID 2.751314e+03
## stateIL 1.523913e+02
## stateIN -1.444323e+03
## stateKS -8.415395e+02
## stateKY 2.369055e+03
## stateLA 8.134368e+01
## stateMA 2.205330e+02
## stateMD 2.128295e+02
## stateME -3.122057e+03
## stateMI -1.406953e+03
## stateMN 7.951279e+02
## stateMO 8.035299e+02
## stateMS 1.590929e+03
## stateMT 1.891816e+03
## stateNC 1.802731e+02
## stateND 1.852136e+03
## stateNE 3.273926e+02
## stateNH 1.250212e+03
## stateNJ 7.583266e+01
## stateNM -8.721512e+02
## stateNV 1.226938e+03
## stateNY -3.449622e+02
## stateOH -1.125128e+03
## stateOK 3.511050e+02
## stateON 4.387107e+03
## stateOR 1.276423e+03
## statePA -1.721390e+01
## stateRI -2.357474e+03
## stateSC 3.755705e+02
## stateSD 1.094155e+04
## stateTN -1.294951e+03
## stateTX 5.620395e+02
## stateunsp -2.386748e+04
## stateUT 1.806272e+03
## stateVA -7.051583e+02
## stateWA 7.132715e+02
## stateWI 7.323751e+02
## stateWV 2.631780e+03
## stateWY -6.905077e+03
## regionESC -1.848979e+00
## regionMid -1.400872e+02
## regionMtn 4.899329e+02
## regionNew -1.295774e+02
## regionPac 1.896033e+02
## regionSoA -2.557924e+02
## regionunsp -3.696447e+03
## regionWNC 6.052869e+02
## regionWSC 4.990277e+02
## soundSystemBang Olufsen 3.972722e+03
## soundSystemBose -1.711871e+03
## soundSystemBoston Acoustic -1.214785e+03
## soundSystemHarman Kardon -2.509140e+03
## soundSystemPremium 7.368389e+02
## soundSystemunsp 6.792423e+02
## wheelTypeChrome -9.230573e+02
## wheelTypePremium 9.389081e+00
## wheelTypeSteel 1.435099e+04
## wheelTypeunsp 1.176588e+03
## wheelSize17 -3.679843e+03
## wheelSize18 -9.065363e+02
## wheelSize19 1.386767e+01
## wheelSize20 5.923600e+03
## wheelSize21 -3.719637e+03
## wheelSize22 2.165668e+03
## wheelSizeunsp -1.598158e+03
## featureCount 3.617678e+00
*None of the coefficients are zero - Ridge Regression does not perform variable selection. The results are also hard to interpret. RMSE in units of Y (price) corresponds to a $11,052.44 prediction error.
Lasso Model Intuition: This will outperform Ridge Regression b/c this model actually performs variable selection by setting the coefficients of insignificant IVs to zero. While the RMSE might be similar, the interpretability will be improved.
lasso.mod=glmnet(x[train ,],y[train],alpha=1,lambda=grid)
plot(lasso.mod)
set.seed(1)
cv.out=cv.glmnet(x[train ,],y[train],alpha=1)
plot(cv.out)
bestlam2=cv.out$lambda.min
bestlam2
## [1] 14.04094
lasso.pred=predict(lasso.mod,s=bestlam2,newx=x[test,])
MSE_L = mean((lasso.pred-y.test)^2)
sqrt(MSE_L)
## [1] 10830.66
*So, Lasso improves RMSE slightly to a $10,830.66 prediction error, as expected. Let's examine the coefficients to see if any of the 19 original IVs have been removed (i.e., set to zero).
'.' indicates that an IV has been removed. Looks like 16 IVs have been removed (including dummy variables).
out=glmnet(x,y,alpha=1,lambda=grid)
lasso.coef=predict(out,type="coefficients",s=bestlam2)
lasso.coef
## 130 x 1 sparse Matrix of class "dgCMatrix"
## 1
## (Intercept) -8.131699e+06
## X -4.821942e-03
## trim350 3.085590e+03
## trim400 -1.523818e+04
## trim420 2.072193e+04
## trim430 -1.889169e+03
## trim450 -2.841988e+04
## trim500 5.876253e+02
## trim55 AMG -2.359635e+02
## trim550 -2.219278e+03
## trim600 7.361744e+03
## trim63 AMG 4.754579e+04
## trim65 AMG 9.233052e+03
## trimunsp 3.068020e+04
## subTrimunsp 2.309178e+02
## conditionNew 3.532908e+04
## conditionUsed -4.334268e+03
## isOneOwnert -3.628004e+02
## mileage -1.323275e-01
## year 4.076589e+03
## colorBlack .
## colorBlue -2.233050e+02
## colorBronze 3.468189e+03
## colorBrown 3.374730e+01
## colorGold 1.276436e+03
## colorGray -9.591050e+02
## colorGreen 2.978172e+02
## colorPurple 4.662013e+03
## colorRed 6.572447e+01
## colorSilver -6.837494e+02
## colorTurquoise -3.319463e+02
## colorunsp 4.258501e+02
## colorWhite 1.408210e+03
## colorYellow -4.859609e+03
## displacement3 -9.597757e+03
## displacement3.2 1.969139e+04
## displacement3.5 8.681521e+03
## displacement3.7 -1.619455e+04
## displacement4.2 2.083694e+02
## displacement4.3 -7.357509e+01
## displacement4.6 -1.072375e+01
## displacement4.7 2.130671e+04
## displacement5 .
## displacement5.4 -1.684905e+01
## displacement5.5 -4.225297e+03
## displacement5.8 -4.534230e+03
## displacement6 4.136017e+04
## displacement6.3 -4.267528e+04
## displacement8 1.752526e+04
## fuelGasoline -2.081200e+02
## fuelHybrid -4.959335e+03
## fuelunsp 1.479385e+04
## stateAL -4.898113e+02
## stateAR -2.278561e+01
## stateAZ 1.002143e+00
## stateCA .
## stateCO -5.273242e-01
## stateCT -2.546431e+02
## stateDC -1.364655e+04
## stateDE 2.878523e+02
## stateFL -5.350342e+02
## stateGA -7.070895e+02
## stateHI -3.950502e+02
## stateIA .
## stateID 2.793963e+03
## stateIL 1.630700e+02
## stateIN -7.673987e+02
## stateKS -6.998860e+02
## stateKY 2.043353e+03
## stateLA 5.625968e+02
## stateMA .
## stateMD 2.298010e+02
## stateME -1.451057e+03
## stateMI -3.043484e+02
## stateMN .
## stateMO 1.819836e+02
## stateMS 1.686052e+03
## stateMT 1.488020e+03
## stateNC 6.376842e+02
## stateND 1.619949e+03
## stateNE 4.420556e+02
## stateNH 1.186216e+03
## stateNJ -3.903212e+01
## stateNM .
## stateNV 1.418495e+03
## stateNY -8.280699e+02
## stateOH -5.685173e+02
## stateOK .
## stateON 9.490634e+01
## stateOR 1.230477e+03
## statePA 1.697102e+02
## stateRI -8.196256e+02
## stateSC 4.041829e+02
## stateSD 1.507159e+04
## stateTN -1.044794e+03
## stateTX 4.339797e+02
## stateunsp -2.765360e+04
## stateUT 1.379271e+03
## stateVA -1.060443e+02
## stateWA 1.357434e+03
## stateWI 2.432269e+02
## stateWV 1.318422e+03
## stateWY -4.387067e+03
## regionESC .
## regionMid -3.403400e+02
## regionMtn 5.286860e+02
## regionNew -1.510699e+02
## regionPac .
## regionSoA .
## regionunsp .
## regionWNC 1.252365e+03
## regionWSC 5.207036e+02
## soundSystemBang Olufsen 2.340632e+03
## soundSystemBose -1.436560e+03
## soundSystemBoston Acoustic .
## soundSystemHarman Kardon -1.921674e+03
## soundSystemPremium .
## soundSystemunsp 3.280134e+02
## wheelTypeChrome .
## wheelTypePremium .
## wheelTypeSteel 1.653498e+04
## wheelTypeunsp 4.573070e+02
## wheelSize17 -5.785355e+03
## wheelSize18 -2.349047e+03
## wheelSize19 -1.884003e+03
## wheelSize20 2.905789e+03
## wheelSize21 -3.152140e+01
## wheelSize22 2.839241e+03
## wheelSizeunsp -1.734152e+03
## featureCount -1.396352e+00