library(caret)
## Warning: package 'caret' was built under R version 4.0.4
## Loading required package: lattice
## Loading required package: ggplot2
library(glmnet)
## Warning: package 'glmnet' was built under R version 4.0.4
## Loading required package: Matrix
## Loaded glmnet 4.1-1
library(psych)
## Warning: package 'psych' was built under R version 4.0.4
##
## Attaching package: 'psych'
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
library(readxl)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(car)
## Loading required package: carData
##
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
##
## recode
## The following object is masked from 'package:psych':
##
## logit
setwd("C:/_MyData_/IIMK/Assignment 3_IPL Players Pricing")
iplData <- read_excel ("Pricing of players IMB381-XLS-ENG.xls", sheet = "Modified Data")
names(iplData)
## [1] "Sl.NO." "PLAYER NAME" "L25" "B25-35"
## [5] "A35" "Country" "Team" "PLAYING ROLE"
## [9] "BAT" "BOW" "ALL" "BAT*SR"
## [13] "BOW*ECO" "BOW*SR-BL" "BAT*RUN-S" "BOW*WK-I"
## [17] "BAT*T-RUNS" "BAT*ODI-RUNS" "BOW*WK-O" "T-RUNS"
## [21] "T-WKTS" "ODI-RUNS" "ODI-SR-B" "ODI-WKTS"
## [25] "ODI-SR-BL" "CAPTAINCY EXP" "INDIA" "AUSTRALIA"
## [29] "OTHERS" "MTS" "ALL*SR-B" "ALL*SR-BL"
## [33] "ALL*ECON" "RUNS-S" "HS" "AVE"
## [37] "SR -B" "SIXERS" "RUNS-C" "WKTS"
## [41] "AVE-BL" "ECON" "SR -BL" "Year"
## [45] "Base Price(US$)" "Sold Price(US$)" "S_B Price" "SQRT(S-B)"
# Create another data frame with only the required columns as mentioned in the case study's PDF
attach(iplData)
analysisData <- data.frame(L25, `B25-35`, A35, `RUNS-S`, `RUNS-C`, HS, AVE, `AVE-BL`, `SR -B`,
`SR -BL`, SIXERS, WKTS, ECON, `CAPTAINCY EXP`, `ODI-SR-B`, `ODI-SR-BL`,
`ODI-RUNS`, `ODI-WKTS`, `T-RUNS`, `T-WKTS`, BAT, BOW, ALL,
INDIA, AUSTRALIA, OTHERS, Year, `SQRT(S-B)`)
detach(iplData)
names (analysisData)
## [1] "L25" "B25.35" "A35" "RUNS.S"
## [5] "RUNS.C" "HS" "AVE" "AVE.BL"
## [9] "SR..B" "SR..BL" "SIXERS" "WKTS"
## [13] "ECON" "CAPTAINCY.EXP" "ODI.SR.B" "ODI.SR.BL"
## [17] "ODI.RUNS" "ODI.WKTS" "T.RUNS" "T.WKTS"
## [21] "BAT" "BOW" "ALL" "INDIA"
## [25] "AUSTRALIA" "OTHERS" "Year" "SQRT.S.B."
lnModel <- lm (SQRT.S.B.~., data = analysisData)
summary(lnModel)
##
## Call:
## lm(formula = SQRT.S.B. ~ ., data = analysisData)
##
## Residuals:
## Min 1Q Median 3Q Max
## -660.21 -180.79 -20.17 172.48 653.92
##
## Coefficients: (3 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -5.466e+04 3.939e+04 -1.388 0.16810
## L25 1.977e+02 1.300e+02 1.521 0.13124
## B25.35 5.671e+01 7.657e+01 0.741 0.46055
## A35 NA NA NA NA
## RUNS.S 1.689e-01 1.135e-01 1.488 0.13980
## RUNS.C 1.251e-01 7.861e-02 1.592 0.11443
## HS -1.267e+00 1.773e+00 -0.714 0.47658
## AVE 3.742e+00 5.089e+00 0.735 0.46373
## AVE.BL 6.326e+00 7.159e+00 0.884 0.37896
## SR..B -8.626e-01 8.834e-01 -0.976 0.33107
## SR..BL -8.107e+00 9.862e+00 -0.822 0.41290
## SIXERS 1.558e+00 2.279e+00 0.684 0.49573
## WKTS 1.097e+00 9.992e-01 1.098 0.27483
## ECON -1.547e+00 7.452e+00 -0.208 0.83597
## CAPTAINCY.EXP 1.481e+02 8.290e+01 1.787 0.07685 .
## ODI.SR.B 5.022e-01 1.159e+00 0.433 0.66562
## ODI.SR.BL -2.326e+00 1.130e+00 -2.058 0.04204 *
## ODI.RUNS 2.777e-02 2.097e-02 1.324 0.18842
## ODI.WKTS 8.926e-01 5.197e-01 1.718 0.08880 .
## T.RUNS -3.139e-02 2.004e-02 -1.566 0.12030
## T.WKTS -3.327e-01 3.969e-01 -0.838 0.40389
## BAT -3.860e+01 9.942e+01 -0.388 0.69864
## BOW -5.220e+01 8.093e+01 -0.645 0.52031
## ALL NA NA NA NA
## INDIA 2.092e+02 7.290e+01 2.869 0.00498 **
## AUSTRALIA 8.518e+01 7.869e+01 1.083 0.28148
## OTHERS NA NA NA NA
## Year 2.730e+01 1.960e+01 1.393 0.16658
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 265.1 on 105 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.518, Adjusted R-squared: 0.4078
## F-statistic: 4.702 on 24 and 105 DF, p-value: 1.375e-08
anova(lnModel)
## Analysis of Variance Table
##
## Response: SQRT.S.B.
## Df Sum Sq Mean Sq F value Pr(>F)
## L25 1 591152 591152 8.4107 0.004544 **
## B25.35 1 145411 145411 2.0689 0.153307
## RUNS.S 1 3460455 3460455 49.2344 2.306e-10 ***
## RUNS.C 1 1319905 1319905 18.7792 3.372e-05 ***
## HS 1 68240 68240 0.9709 0.326720
## AVE 1 20144 20144 0.2866 0.593535
## AVE.BL 1 32983 32983 0.4693 0.494833
## SR..B 1 2604 2604 0.0371 0.847732
## SR..BL 1 121628 121628 1.7305 0.191214
## SIXERS 1 58337 58337 0.8300 0.364360
## WKTS 1 52152 52152 0.7420 0.390983
## ECON 1 10560 10560 0.1502 0.699091
## CAPTAINCY.EXP 1 155703 155703 2.2153 0.139646
## ODI.SR.B 1 21092 21092 0.3001 0.584984
## ODI.SR.BL 1 308208 308208 4.3851 0.038662 *
## ODI.RUNS 1 18984 18984 0.2701 0.604362
## ODI.WKTS 1 113317 113317 1.6122 0.206983
## T.RUNS 1 459410 459410 6.5364 0.011999 *
## T.WKTS 1 130339 130339 1.8544 0.176184
## BAT 1 19459 19459 0.2769 0.599878
## BOW 1 137 137 0.0019 0.964886
## INDIA 1 565628 565628 8.0476 0.005469 **
## AUSTRALIA 1 118686 118686 1.6886 0.196627
## Year 1 136373 136373 1.9403 0.166581
## Residuals 105 7379964 70285
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# vif (lnModel)
# This function results in error and blocks generating the HTML. The error is "there are aliased coefficients in the model". This means that there are few perfectly correlated variables.
Linear regression results show that there are no highly significant IVs that contribute to the Pricing.
Shows India, but that is significant but that is at 90% confidence interval.
VIF shows that there seem to be perfectly correlated variables. So, check the correlation between all the numerical predictors.
As there are 28 IVs, we can plot all in one graph, but can’t read them. Hence plot them into two graphs.
pairs.panels(analysisData[c(-15, -16, -17, -18, -19, -20, -21, -22, -23, -24, -25, -26, -27, -28)])
pairs.panels(analysisData[c(-1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14)])
# PARTITION THE DATA INTO TRAINING AND TEST
set.seed (1234)
index <- sample (2, nrow(analysisData), replace = TRUE, p = c(.70,.30))
trainData <- analysisData[index == 1,]
testData <- analysisData[index == 2,]
# Custom control parameters
# Use 10 fold cross validation and repeat it for 5 times
customControl <- trainControl(method = "repeatedcv", number = 10, repeats = 5)
set.seed (1234)
linearModel <- train (SQRT.S.B.~., trainData, method = "lm", trControl = customControl)
## Warning in predict.lm(modelFit, newdata): prediction from a rank-deficient fit
## may be misleading
## Warning in predict.lm(modelFit, newdata): prediction from a rank-deficient fit
## may be misleading
## Warning in predict.lm(modelFit, newdata): prediction from a rank-deficient fit
## may be misleading
## Warning in predict.lm(modelFit, newdata): prediction from a rank-deficient fit
## may be misleading
## Warning in predict.lm(modelFit, newdata): prediction from a rank-deficient fit
## may be misleading
## Warning in predict.lm(modelFit, newdata): prediction from a rank-deficient fit
## may be misleading
## Warning in predict.lm(modelFit, newdata): prediction from a rank-deficient fit
## may be misleading
## Warning in predict.lm(modelFit, newdata): prediction from a rank-deficient fit
## may be misleading
## Warning in predict.lm(modelFit, newdata): prediction from a rank-deficient fit
## may be misleading
## Warning in predict.lm(modelFit, newdata): prediction from a rank-deficient fit
## may be misleading
## Warning in predict.lm(modelFit, newdata): prediction from a rank-deficient fit
## may be misleading
## Warning in predict.lm(modelFit, newdata): prediction from a rank-deficient fit
## may be misleading
## Warning in predict.lm(modelFit, newdata): prediction from a rank-deficient fit
## may be misleading
## Warning in predict.lm(modelFit, newdata): prediction from a rank-deficient fit
## may be misleading
## Warning in predict.lm(modelFit, newdata): prediction from a rank-deficient fit
## may be misleading
## Warning in predict.lm(modelFit, newdata): prediction from a rank-deficient fit
## may be misleading
## Warning in predict.lm(modelFit, newdata): prediction from a rank-deficient fit
## may be misleading
## Warning in predict.lm(modelFit, newdata): prediction from a rank-deficient fit
## may be misleading
## Warning in predict.lm(modelFit, newdata): prediction from a rank-deficient fit
## may be misleading
## Warning in predict.lm(modelFit, newdata): prediction from a rank-deficient fit
## may be misleading
## Warning in predict.lm(modelFit, newdata): prediction from a rank-deficient fit
## may be misleading
## Warning in predict.lm(modelFit, newdata): prediction from a rank-deficient fit
## may be misleading
## Warning in predict.lm(modelFit, newdata): prediction from a rank-deficient fit
## may be misleading
## Warning in predict.lm(modelFit, newdata): prediction from a rank-deficient fit
## may be misleading
## Warning in predict.lm(modelFit, newdata): prediction from a rank-deficient fit
## may be misleading
## Warning in predict.lm(modelFit, newdata): prediction from a rank-deficient fit
## may be misleading
## Warning in predict.lm(modelFit, newdata): prediction from a rank-deficient fit
## may be misleading
## Warning in predict.lm(modelFit, newdata): prediction from a rank-deficient fit
## may be misleading
## Warning in predict.lm(modelFit, newdata): prediction from a rank-deficient fit
## may be misleading
## Warning in predict.lm(modelFit, newdata): prediction from a rank-deficient fit
## may be misleading
## Warning in predict.lm(modelFit, newdata): prediction from a rank-deficient fit
## may be misleading
## Warning in predict.lm(modelFit, newdata): prediction from a rank-deficient fit
## may be misleading
## Warning in predict.lm(modelFit, newdata): prediction from a rank-deficient fit
## may be misleading
## Warning in predict.lm(modelFit, newdata): prediction from a rank-deficient fit
## may be misleading
## Warning in predict.lm(modelFit, newdata): prediction from a rank-deficient fit
## may be misleading
## Warning in predict.lm(modelFit, newdata): prediction from a rank-deficient fit
## may be misleading
## Warning in predict.lm(modelFit, newdata): prediction from a rank-deficient fit
## may be misleading
## Warning in predict.lm(modelFit, newdata): prediction from a rank-deficient fit
## may be misleading
## Warning in predict.lm(modelFit, newdata): prediction from a rank-deficient fit
## may be misleading
## Warning in predict.lm(modelFit, newdata): prediction from a rank-deficient fit
## may be misleading
## Warning in predict.lm(modelFit, newdata): prediction from a rank-deficient fit
## may be misleading
## Warning in predict.lm(modelFit, newdata): prediction from a rank-deficient fit
## may be misleading
## Warning in predict.lm(modelFit, newdata): prediction from a rank-deficient fit
## may be misleading
## Warning in predict.lm(modelFit, newdata): prediction from a rank-deficient fit
## may be misleading
## Warning in predict.lm(modelFit, newdata): prediction from a rank-deficient fit
## may be misleading
## Warning in predict.lm(modelFit, newdata): prediction from a rank-deficient fit
## may be misleading
## Warning in predict.lm(modelFit, newdata): prediction from a rank-deficient fit
## may be misleading
## Warning in predict.lm(modelFit, newdata): prediction from a rank-deficient fit
## may be misleading
## Warning in predict.lm(modelFit, newdata): prediction from a rank-deficient fit
## may be misleading
## Warning in predict.lm(modelFit, newdata): prediction from a rank-deficient fit
## may be misleading
linearModel$results
## intercept RMSE Rsquared MAE RMSESD RsquaredSD MAESD
## 1 TRUE 336.4882 0.2917763 275.102 91.55101 0.2285392 70.13945
linearModel
## Linear Regression
##
## 99 samples
## 27 predictors
##
## No pre-processing
## Resampling: Cross-Validated (10 fold, repeated 5 times)
## Summary of sample sizes: 89, 90, 88, 88, 89, 90, ...
## Resampling results:
##
## RMSE Rsquared MAE
## 336.4882 0.2917763 275.102
##
## Tuning parameter 'intercept' was held constant at a value of TRUE
summary(linearModel)
##
## Call:
## lm(formula = .outcome ~ ., data = dat)
##
## Residuals:
## Min 1Q Median 3Q Max
## -545.05 -151.59 -28.63 163.15 665.80
##
## Coefficients: (3 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -4.174e+04 4.704e+04 -0.887 0.37776
## L25 1.404e+02 1.626e+02 0.864 0.39057
## B25.35 -4.947e+01 9.922e+01 -0.499 0.61955
## A35 NA NA NA NA
## RUNS.S 1.363e-01 1.325e-01 1.029 0.30671
## RUNS.C 1.635e-01 8.985e-02 1.820 0.07288 .
## HS -1.747e+00 2.649e+00 -0.659 0.51163
## AVE 3.581e+00 6.381e+00 0.561 0.57632
## AVE.BL 7.864e+00 1.011e+01 0.778 0.43922
## SR..B -4.067e-01 1.037e+00 -0.392 0.69607
## SR..BL -1.016e+01 1.329e+01 -0.765 0.44697
## SIXERS 2.141e+00 2.641e+00 0.811 0.42001
## WKTS 5.883e-01 1.164e+00 0.505 0.61476
## ECON -3.427e+00 8.566e+00 -0.400 0.69024
## CAPTAINCY.EXP 2.005e+02 9.952e+01 2.014 0.04762 *
## ODI.SR.B 1.988e-01 1.426e+00 0.139 0.88945
## ODI.SR.BL -1.193e+00 1.521e+00 -0.784 0.43547
## ODI.RUNS 3.942e-02 2.503e-02 1.575 0.11953
## ODI.WKTS 5.995e-01 6.462e-01 0.928 0.35657
## T.RUNS -4.411e-02 2.377e-02 -1.856 0.06749 .
## T.WKTS -7.290e-02 5.186e-01 -0.141 0.88859
## BAT -3.827e+01 1.241e+02 -0.308 0.75869
## BOW -1.477e+01 9.679e+01 -0.153 0.87915
## ALL NA NA NA NA
## INDIA 2.504e+02 8.558e+01 2.926 0.00456 **
## AUSTRALIA 1.624e+02 9.338e+01 1.739 0.08616 .
## OTHERS NA NA NA NA
## Year 2.086e+01 2.341e+01 0.891 0.37571
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 277.3 on 74 degrees of freedom
## Multiple R-squared: 0.5413, Adjusted R-squared: 0.3925
## F-statistic: 3.638 on 24 and 74 DF, p-value: 1.038e-05
set.seed (1234)
ridgeModel <- train (SQRT.S.B.~., trainData, method = "glmnet",
tuneGrid = expand.grid(alpha = 0, lambda = seq(0.0001, 1, length = 5)),
trControl = customControl)
ridgeModel
## glmnet
##
## 99 samples
## 27 predictors
##
## No pre-processing
## Resampling: Cross-Validated (10 fold, repeated 5 times)
## Summary of sample sizes: 89, 90, 88, 88, 89, 90, ...
## Resampling results across tuning parameters:
##
## lambda RMSE Rsquared MAE
## 0.000100 316.3624 0.3192681 259.4783
## 0.250075 316.3624 0.3192681 259.4783
## 0.500050 316.3624 0.3192681 259.4783
## 0.750025 316.3624 0.3192681 259.4783
## 1.000000 316.3624 0.3192681 259.4783
##
## Tuning parameter 'alpha' was held constant at a value of 0
## RMSE was used to select the optimal model using the smallest value.
## The final values used for the model were alpha = 0 and lambda = 1.
plot(ridgeModel)
plot(ridgeModel$finalModel, xvar = "lambda", label = TRUE)
plot(ridgeModel$finalModel, xvar = "dev", label = TRUE)
plot(varImp(ridgeModel, scale = TRUE))
#LASSO REGRESSION
set.seed(1234)
lassoModel <- train (SQRT.S.B.~., trainData, method = "glmnet",
tuneGrid = expand.grid(alpha = 1, lambda = seq(0.0001, 1, length = 5)),
trControl = customControl)
lassoModel
## glmnet
##
## 99 samples
## 27 predictors
##
## No pre-processing
## Resampling: Cross-Validated (10 fold, repeated 5 times)
## Summary of sample sizes: 89, 90, 88, 88, 89, 90, ...
## Resampling results across tuning parameters:
##
## lambda RMSE Rsquared MAE
## 0.000100 334.8978 0.2925710 273.9198
## 0.250075 333.1932 0.2933637 272.5206
## 0.500050 330.7415 0.2946883 270.4532
## 0.750025 328.5459 0.2956393 268.7194
## 1.000000 326.5725 0.2963811 267.4339
##
## Tuning parameter 'alpha' was held constant at a value of 1
## RMSE was used to select the optimal model using the smallest value.
## The final values used for the model were alpha = 1 and lambda = 1.
plot(lassoModel)
plot(lassoModel$finalModel, xvar = "lambda", label = TRUE)
plot(lassoModel$finalModel, xvar = "dev", label = TRUE)
plot(varImp(lassoModel, scale = TRUE))
set.seed (1234)
elasticModel <- train (SQRT.S.B.~., trainData, method = "glmnet",
tuneGrid = expand.grid(alpha = seq(0,1, length = 10),
lambda = seq(0.0001, 1, length = 5)),
trControl = customControl)
elasticModel
## glmnet
##
## 99 samples
## 27 predictors
##
## No pre-processing
## Resampling: Cross-Validated (10 fold, repeated 5 times)
## Summary of sample sizes: 89, 90, 88, 88, 89, 90, ...
## Resampling results across tuning parameters:
##
## alpha lambda RMSE Rsquared MAE
## 0.0000000 0.000100 316.3624 0.3192681 259.4783
## 0.0000000 0.250075 316.3624 0.3192681 259.4783
## 0.0000000 0.500050 316.3624 0.3192681 259.4783
## 0.0000000 0.750025 316.3624 0.3192681 259.4783
## 0.0000000 1.000000 316.3624 0.3192681 259.4783
## 0.1111111 0.000100 334.9100 0.2925636 273.9604
## 0.1111111 0.250075 334.3818 0.2928674 273.5494
## 0.1111111 0.500050 333.2412 0.2936938 272.6696
## 0.1111111 0.750025 332.1910 0.2944880 271.8173
## 0.1111111 1.000000 331.2245 0.2952417 271.0123
## 0.2222222 0.000100 334.9710 0.2925712 274.0026
## 0.2222222 0.250075 334.2067 0.2929751 273.4094
## 0.2222222 0.500050 332.8832 0.2938572 272.3630
## 0.2222222 0.750025 331.6641 0.2947287 271.3450
## 0.2222222 1.000000 330.5676 0.2955208 270.3934
## 0.3333333 0.000100 335.0047 0.2925156 274.0316
## 0.3333333 0.250075 334.0872 0.2929957 273.3073
## 0.3333333 0.500050 332.5908 0.2939777 272.0950
## 0.3333333 0.750025 331.2130 0.2948665 270.9078
## 0.3333333 1.000000 329.9909 0.2957272 269.8724
## 0.4444444 0.000100 334.9045 0.2926822 273.9539
## 0.4444444 0.250075 333.8838 0.2931465 273.1414
## 0.4444444 0.500050 332.2628 0.2941103 271.8068
## 0.4444444 0.750025 330.7539 0.2950362 270.4597
## 0.4444444 1.000000 329.4005 0.2959617 269.4271
## 0.5555556 0.000100 334.9206 0.2926477 273.9453
## 0.5555556 0.250075 333.7673 0.2932004 273.0255
## 0.5555556 0.500050 331.9730 0.2942200 271.5281
## 0.5555556 0.750025 330.3179 0.2952342 270.1220
## 0.5555556 1.000000 328.8457 0.2961641 268.9796
## 0.6666667 0.000100 334.9076 0.2926138 273.9296
## 0.6666667 0.250075 333.6333 0.2932368 272.9061
## 0.6666667 0.500050 331.6687 0.2943346 271.2313
## 0.6666667 0.750025 329.8882 0.2954054 269.8003
## 0.6666667 1.000000 328.2660 0.2962625 268.4927
## 0.7777778 0.000100 334.8844 0.2925911 273.9061
## 0.7777778 0.250075 333.4956 0.2932748 272.7869
## 0.7777778 0.500050 331.3579 0.2944575 270.9290
## 0.7777778 0.750025 329.4522 0.2955357 269.4618
## 0.7777778 1.000000 327.6948 0.2963300 268.1568
## 0.8888889 0.000100 334.9018 0.2925972 273.9160
## 0.8888889 0.250075 333.3437 0.2933123 272.6548
## 0.8888889 0.500050 331.0437 0.2945838 270.6667
## 0.8888889 0.750025 328.9870 0.2955903 269.0731
## 0.8888889 1.000000 327.1217 0.2963792 267.7821
## 1.0000000 0.000100 334.8978 0.2925710 273.9198
## 1.0000000 0.250075 333.1932 0.2933637 272.5206
## 1.0000000 0.500050 330.7415 0.2946883 270.4532
## 1.0000000 0.750025 328.5459 0.2956393 268.7194
## 1.0000000 1.000000 326.5725 0.2963811 267.4339
##
## RMSE was used to select the optimal model using the smallest value.
## The final values used for the model were alpha = 0 and lambda = 1.
plot(elasticModel)
plot(varImp(elasticModel, scale = TRUE))
modelList <- list (linear = linearModel, lasso = lassoModel, ridge = ridgeModel, elastic = elasticModel)
modelComparison <- resamples(modelList)
summary (modelComparison)
##
## Call:
## summary.resamples(object = modelComparison)
##
## Models: linear, lasso, ridge, elastic
## Number of resamples: 50
##
## MAE
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## linear 94.90724 232.0795 267.3368 275.1020 309.4318 445.5510 0
## lasso 99.61402 229.8958 259.2751 267.4339 303.2953 432.1984 0
## ridge 104.05325 223.4995 252.6031 259.4783 295.6658 425.4602 0
## elastic 104.05325 223.4995 252.6031 259.4783 295.6658 425.4602 0
##
## RMSE
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## linear 105.5140 271.2796 324.3533 336.4882 397.4152 577.1842 0
## lasso 113.3644 261.6002 315.3057 326.5725 386.4215 556.1973 0
## ridge 121.0110 258.4110 303.6545 316.3624 371.6800 546.0811 0
## elastic 121.0110 258.4110 303.6545 316.3624 371.6800 546.0811 0
##
## Rsquared
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## linear 2.511387e-05 0.1055304 0.2460802 0.2917763 0.4686587 0.9383782 0
## lasso 3.279814e-06 0.1050043 0.2593284 0.2963811 0.4651699 0.9350176 0
## ridge 5.674389e-05 0.1250421 0.2896259 0.3192681 0.4853880 0.9375848 0
## elastic 5.674389e-05 0.1250421 0.2896259 0.3192681 0.4853880 0.9375848 0
ridgeModel$bestTune
## alpha lambda
## 5 0 1
lassoModel$bestTune
## alpha lambda
## 5 1 1
elasticModel$bestTune
## alpha lambda
## 5 0 1
bestModel <- ridgeModel$finalModel
coef(bestModel, s = ridgeModel$bestTune$lambda)
## 28 x 1 sparse Matrix of class "dgCMatrix"
## 1
## (Intercept) -4.325920e+04
## L25 1.437221e+02
## B25.35 -3.825291e+01
## A35 -1.318471e+01
## RUNS.S 1.047020e-01
## RUNS.C 1.228217e-01
## HS -7.727037e-01
## AVE 1.890611e+00
## AVE.BL 1.288837e+00
## SR..B -3.599746e-01
## SR..BL -1.261645e+00
## SIXERS 2.366044e+00
## WKTS 9.222959e-01
## ECON -1.844224e+00
## CAPTAINCY.EXP 1.751146e+02
## ODI.SR.B 2.992131e-01
## ODI.SR.BL -1.046566e+00
## ODI.RUNS 2.546127e-02
## ODI.WKTS 6.195488e-01
## T.RUNS -2.562485e-02
## T.WKTS -1.071907e-01
## BAT -2.404488e+01
## BOW 4.258354e+00
## ALL 2.140721e+01
## INDIA 1.204468e+02
## AUSTRALIA 3.344905e+01
## OTHERS -1.347188e+02
## Year 2.166016e+01
predictionOne <- predict(ridgeModel, trainData)
sqrt(mean((trainData$SQRT.S.B.-predictionOne)^2))
## [1] 241.6788
predictionTwo <- predict(ridgeModel, testData)
sqrt(mean((testData$SQRT.S.B..-predictionTwo)^2))
## [1] NaN