Ridge and Lasso Regression

\[RSS(\beta) + \lambda \sum_{j=1}^{p} \beta_j^2\]

\[RSS(\beta) + \lambda \sum_{j=1}^{p} |\beta_j|\]

rm(list = ls(all=TRUE))

Read and Understand the data

setwd("C://Users//brbhatta//Desktop//INSOFE//My_exercise//8_26May_27May//27May//20180527_Batch42_CSE7302c_Ridge_Lasso_ElasticNet_TuningHyperParams//ridge_lasso")
labour_data <- read.csv("labour_income.csv")
str(labour_data)
## 'data.frame':    3987 obs. of  5 variables:
##  $ wages    : num  10.6 11 17.8 14 8.2 ...
##  $ education: num  15 13.2 14 16 15 13.5 12 14 18 11 ...
##  $ age      : int  40 19 46 50 31 30 61 46 43 17 ...
##  $ sex      : Factor w/ 2 levels "Female","Male": 2 2 2 1 2 1 1 1 2 2 ...
##  $ language : Factor w/ 3 levels "English","French",..: 1 1 3 1 1 1 1 3 1 1 ...
summary(labour_data)
##      wages         education          age           sex      
##  Min.   : 2.30   Min.   : 0.00   Min.   :16.0   Female:2001  
##  1st Qu.: 9.25   1st Qu.:12.00   1st Qu.:28.0   Male  :1986  
##  Median :14.13   Median :13.00   Median :36.0                
##  Mean   :15.54   Mean   :13.34   Mean   :37.1                
##  3rd Qu.:19.72   3rd Qu.:15.10   3rd Qu.:46.0                
##  Max.   :49.92   Max.   :20.00   Max.   :69.0                
##     language   
##  English:3244  
##  French : 259  
##  Other  : 484  
##                
##                
## 
head(labour_data)
##   wages education age    sex language
## 1 10.56      15.0  40   Male  English
## 2 11.00      13.2  19   Male  English
## 3 17.76      14.0  46   Male    Other
## 4 14.00      16.0  50 Female  English
## 5  8.20      15.0  31   Male  English
## 6 16.97      13.5  30 Female  English
tail(labour_data)
##      wages education age    sex language
## 3982 16.66       8.0  61 Female  English
## 3983  6.80      13.1  20   Male  English
## 3984 30.49      16.0  52   Male    Other
## 3985 22.00      15.0  41   Male    Other
## 3986 11.85      11.0  47 Female  English
## 3987 23.00      14.0  30   Male  English
sum(is.na(labour_data))
## [1] 0

Data Pre-processing

Train-Test Split

  • Split the data into train and test
set.seed(007)

train_rows <- sample(x = seq(1, nrow(labour_data), 1), size = 0.7*nrow(labour_data))

train_data <- labour_data[train_rows, ]

test_data <- labour_data[-train_rows, ]

Standardize the Data

  • Standardize the continuous independent variables
library(caret)
## Loading required package: lattice
## Loading required package: ggplot2
std_obj <- preProcess(x = train_data[, !colnames(train_data) %in% c("wages")],
                      method = c("center", "scale"))

train_std_data <- predict(std_obj, train_data)

test_std_data <- predict(std_obj, test_data)
std_obj
## Created from 2790 samples and 4 variables
## 
## Pre-processing:
##   - centered (2)
##   - ignored (2)
##   - scaled (2)
head(train_std_data)
##      wages   education         age    sex language
## 3943 12.38 -0.27202965 -0.41263640 Female  English
## 1586  9.97  0.70387715 -0.82111481   Male  English
## 462  13.73  1.84243509  0.32262474 Female  English
## 278  23.04  0.05327262  1.05788588 Female  English
## 971  23.96  0.21592375 -0.08585367   Male  English
## 3154  6.58 -0.43468078 -1.47468026 Female  English
head(test_std_data)
##    wages  education        age    sex language
## 1  10.56  0.5412260  0.2409291   Male  English
## 3  17.76  0.2159238  0.7311031   Male    Other
## 4  14.00  0.8665283  1.0578859 Female  English
## 7   6.70 -0.4346808  1.9565384 Female  English
## 11 16.00 -2.0611921  0.7311031   Male    Other
## 12 23.00  0.2159238 -0.4126364   Male  English

Dummify the Data

  • Use the dummyVars() function from caret to convert sex and language into dummy variables
dummy_obj <- dummyVars( ~ . , train_std_data)

train_dummy_data <- as.data.frame(predict(dummy_obj, train_std_data))

test_dummy_data <- as.data.frame(predict(dummy_obj, test_std_data))
head(train_dummy_data)
##      wages   education         age sex.Female sex.Male language.English
## 3943 12.38 -0.27202965 -0.41263640          1        0                1
## 1586  9.97  0.70387715 -0.82111481          0        1                1
## 462  13.73  1.84243509  0.32262474          1        0                1
## 278  23.04  0.05327262  1.05788588          1        0                1
## 971  23.96  0.21592375 -0.08585367          0        1                1
## 3154  6.58 -0.43468078 -1.47468026          1        0                1
##      language.French language.Other
## 3943               0              0
## 1586               0              0
## 462                0              0
## 278                0              0
## 971                0              0
## 3154               0              0
head(test_dummy_data)
##    wages  education        age sex.Female sex.Male language.English
## 1  10.56  0.5412260  0.2409291          0        1                1
## 3  17.76  0.2159238  0.7311031          0        1                0
## 4  14.00  0.8665283  1.0578859          1        0                1
## 7   6.70 -0.4346808  1.9565384          1        0                1
## 11 16.00 -2.0611921  0.7311031          0        1                0
## 12 23.00  0.2159238 -0.4126364          0        1                1
##    language.French language.Other
## 1                0              0
## 3                0              1
## 4                0              0
## 7                0              0
## 11               0              1
## 12               0              0

Get the data into a compatible format

  • The functions we will be using today from the glmnet package expect a matrix as an input and not our familiar formula structure, so we need to convert our dataframes into a matrix
X_train <- as.matrix(train_dummy_data[, -1])
  
y_train <- as.matrix(train_dummy_data[, 1])
  
X_test <- as.matrix(test_dummy_data[, -1])
  
y_test <- as.matrix(test_dummy_data[, 1])

Hyper-parameter Tuning

Choosing a lambda for Lasso Regression

  • The alpha value is 1 for lasso regression
library(glmnet)
## Loading required package: Matrix
## Loading required package: foreach
## Loaded glmnet 2.0-16
cv_lasso <- cv.glmnet(X_train, y_train, alpha = 1, type.measure = "mse", nfolds = 4)

plot(cv_lasso)

  • The object returned form the call to cv.glmnet() function, contains the lambda values of importance

  • The coefficients are accessible calling the coef() function on the cv_lasso object

plot(cv_lasso$glmnet.fit, xvar="lambda", label=TRUE)

plot(cv_lasso)

print(cv_lasso$lambda.min)
## [1] 0.07241715
coef(cv_lasso)
## 8 x 1 sparse Matrix of class "dgCMatrix"
##                          1
## (Intercept)      16.794336
## education         2.285889
## age               2.664529
## sex.Female       -2.279586
## sex.Male          .       
## language.English  .       
## language.French   .       
## language.Other    .

Choosing a lambda for Ridge Regression

  • The alpha value is 0 for ridge regression
cv_ridge <- cv.glmnet(X_train, y_train, alpha = 0, type.measure = "mse", nfolds = 4)

plot(cv_ridge)

plot(cv_ridge$glmnet.fit, xvar="lambda", label=TRUE)

  • We can access the lambda and the coefficients as we did before
print(cv_ridge$lambda.min)
## [1] 0.328403
coef(cv_ridge)
## 8 x 1 sparse Matrix of class "dgCMatrix"
##                            1
## (Intercept)      15.73379156
## education         1.87998809
## age               2.13132411
## sex.Female       -1.34452432
## sex.Male          1.34171157
## language.English -0.12408093
## language.French   0.23497933
## language.Other    0.04254616

Building The Final Model

Building the Final Lasso Regression Model

lasso_model <- glmnet(X_train, y_train, lambda = cv_lasso$lambda.min, alpha = 1)

coef(lasso_model)
## 8 x 1 sparse Matrix of class "dgCMatrix"
##                           s0
## (Intercept)      17.21523733
## education         2.77341706
## age               3.13672030
## sex.Female       -3.14026120
## sex.Male          .         
## language.English  .         
## language.French   0.08227626
## language.Other    .
  • Use the model to predict on test data
preds_lasso <- predict(lasso_model, X_test)

Building the Final Ridge Regression Model

ridge_model <- glmnet(X_train, y_train, lambda = cv_ridge$lambda.min, alpha = 0)

coef(ridge_model)
## 8 x 1 sparse Matrix of class "dgCMatrix"
##                           s0
## (Intercept)      15.76844764
## education         2.73075409
## age               3.07192453
## sex.Female       -1.65759407
## sex.Male          1.55801018
## language.English -0.10449380
## language.French   0.26629251
## language.Other   -0.01122563
  • Use the model to predict on test data
preds_ridge <- predict(ridge_model, X_test)

Model Performance Evaluation

Lasso Regression Model Metrics

library(DMwR)
## Loading required package: grid
regr.eval(trues = y_test, preds = preds_lasso)
##        mae        mse       rmse       mape 
##  5.1939127 44.6079517  6.6789185  0.4327586

Ridge Regression Model Metrics

library(DMwR)

regr.eval(trues = y_test, preds = preds_ridge)
##        mae        mse       rmse       mape 
##  5.1943165 44.5931661  6.6778115  0.4330808