REGULARIZATION
ALY6015: INTERMEDIATE ANALYTICS
NORTHEASTERN UNIVERSITY
Student - Jayakumar Moris Udayakumar
Instructor Name: Prof. Zhi (Richard) He
Date: 18th Mar 2024

LOADING DATASET

DATA PARTITION

#setting random seed
set.seed(20353)

#partitioning dataset into train and test
train_col_ds <- createDataPartition(col_dataset$S.F.Ratio, p = 0.75, list = FALSE, times = 1)
train_col_split = col_dataset[train_col_ds,]
test_col_split = col_dataset[-train_col_ds,]

#creating matrix excluding 'S.F.Ratio'
train_matrix_excP = model.matrix(S.F.Ratio ~., train_col_split)[,-15]
test_matrix_excP = model.matrix(S.F.Ratio ~., test_col_split)[,-15]

#creating object just for categorical variable 'S.F.Ratio'
train_S.F.Ratio = as.numeric(train_col_split$S.F.Ratio)
test_S.F.Ratio = as.numeric(test_col_split$S.F.Ratio)

RIDGE REGRESSION

Cross Validation

set.seed(20353)

# Perform cross-validation to find the best lambda for Ridge
cv.ridge <- cv.glmnet(train_matrix_excP,train_S.F.Ratio, nfolds = 10, alpha = 0)

# plot cross validation
plot(cv.ridge)

print(paste("lambda Min:", round(cv.ridge$lambda.min, 3), "; lambda 1se:", round(cv.ridge$lambda.1se, 3)))
## [1] "lambda Min: 0.366 ; lambda 1se: 7.176"

Ridge Regression - Model fit

#Fit the final Ridge model using the best lambda
ridgemodel_fit <- glmnet(x = train_matrix_excP, y = train_S.F.Ratio, alpha = 0, lambda = cv.ridge$lambda.min)

#prediction of ridge model fit using test set
predict.ridge.test <- predict(ridgemodel_fit, newx = test_matrix_excP)

#prediction of ridge model fit using train set
predict.ridge.train <- predict(ridgemodel_fit, newx = train_matrix_excP)

#rmse train
Ridge_RMSE_train <- RMSE(train_S.F.Ratio, predict.ridge.train)

#rmse test 
Ridge_RMSE_test <- RMSE(test_S.F.Ratio, predict.ridge.test)

# Create a data frame with lambda Min and lambda 1se values
RSME_ridge <- data.frame(
  Model = "Ridge",
  Train = round(Ridge_RMSE_train, 4),
  Test = round(Ridge_RMSE_test, 4)
)

# Print the data frame
kable(RSME_ridge, align = "c", caption = "Ridge RSME Values Train and Test") %>%
  kable_styling(bootstrap_options = "bordered")
Ridge RSME Values Train and Test
Model Train Test
Ridge 2.9645 2.6233

LASSO REGRESSION

Cross Validation

set.seed(20353)

# Perform cross-validation to find the best lambda for Ridge
cv.lassoreg <- cv.glmnet(train_matrix_excP,train_S.F.Ratio, nfolds = 10, alpha = 1)

# plot cross validation
plot(cv.lassoreg)

print(paste("lambda Min:", round(cv.lassoreg$lambda.min, 3), "; lambda 1se:", round(cv.lassoreg$lambda.1se, 3)))
## [1] "lambda Min: 0.088 ; lambda 1se: 0.825"

Lasso Regression - Model fit

#Fit the laso model using the best lambda
lassomodel_fit <- glmnet(x = train_matrix_excP, y = train_S.F.Ratio, alpha = 1, lambda = cv.lassoreg$lambda.min)

#prediction of ridge model fit using test set
predict.lasso.test <- predict(lassomodel_fit, newx = test_matrix_excP)

#prediction of ridge model fit using train set
predict.lasso.train <- predict(lassomodel_fit, newx = train_matrix_excP)

#rmse train
lasso_RMSE_train <- RMSE(train_S.F.Ratio, predict.lasso.train)

#rmse test 
lasso_RMSE_test <- RMSE(test_S.F.Ratio, predict.lasso.test)


# Create a data frame with lambda Min and lambda 1se values
RSME_lasso <- data.frame(
  Model = "Lasso",
  Train = round(lasso_RMSE_train, 4),
  Test = round(lasso_RMSE_test, 4)
)

# Print the data frame
kable(RSME_lasso, align = "c", caption = "Lasso RSME Values Train and Test") %>%
  kable_styling(bootstrap_options = "bordered")
Lasso RSME Values Train and Test
Model Train Test
Lasso 2.9693 2.6089

ELASTINET REGRESSION

Cross Validation

set.seed(20353)

# Perform cross-validation to find the best lambda for Ridge
cv.elastreg <- cv.glmnet(train_matrix_excP,train_S.F.Ratio, nfolds = 10, alpha = 0.5)

# plot cross validation
plot(cv.elastreg)

print(paste("lambda Min:", round(cv.elastreg$lambda.min, 3), "; lambda 1se:", round(cv.elastreg$lambda.1se, 3)))
## [1] "lambda Min: 0.177 ; lambda 1se: 1.37"

Elastinet Regression - Model fit

#Fit the laso model using the best lambda
elastmodel_fit <- glmnet(x = train_matrix_excP, y = train_S.F.Ratio, alpha = 0.5, lambda = cv.elastreg$lambda.min)

#prediction of ridge model fit using train set
predict.elast.train <- predict(elastmodel_fit, newx = train_matrix_excP)

#prediction of ridge model fit using test set
predict.elast.test <- predict(elastmodel_fit, newx = test_matrix_excP)

#rmse train
Elast_RMSE_train <- RMSE(train_S.F.Ratio, predict.elast.train)

#rmse test 
Elast_RMSE_test <- RMSE(test_S.F.Ratio, predict.elast.test)


# Create a data frame with lambda Min and lambda 1se values
RSME_Elastinet <- data.frame(
  Model = "Elastinet",
  Train = round(Elast_RMSE_train, 4),
  Test = round(Elast_RMSE_test, 4)
)


#presenting table
kable(RSME_Elastinet, align = "c", caption = "Elastinet RSME Values Train and Test") %>%
  kable_styling(bootstrap_options = "bordered")
Elastinet RSME Values Train and Test
Model Train Test
Elastinet 2.9714 2.6102

STEPWISE SELECTION

glm_step <- step(glm(S.F.Ratio ~ ., data = train_col_split), direction = 'both', trace = 0)
#prediction of ridge model fit using test set
predict.glmstep.test <- predict(glm_step, newx = test_col_split)

#prediction of ridge model fit using train set
predict.glmstep.train <- predict(glm_step, newx = train_col_split)

#rmse train
glm_RMSE_train <- RMSE(train_S.F.Ratio, predict.glmstep.train)

#rmse test 
glm_RMSE_test <- RMSE(test_S.F.Ratio, predict.glmstep.test)


# Create a data frame with lambda Min and lambda 1se values
RSME_GLM <- data.frame(
  Model = "Stepwise Selection - GLM",
  Train = round(glm_RMSE_train, 4),
  Test = round(glm_RMSE_test, 4)
)

# Print the data frame
kable(RSME_GLM, align = "c", caption = "GLM RSME Values Train and Test") %>%
  kable_styling(bootstrap_options = "bordered")
GLM RSME Values Train and Test
Model Train Test
Stepwise Selection - GLM 2.9597 4.5233
# Combine RMSE tables into one
combined_table <- bind_rows(RSME_ridge, RSME_lasso, RSME_Elastinet, RSME_GLM)

# Print combined table
combined_table %>%
  kable(align = "c", caption = "Combined RMSE Values Train and Test", cex=0.5) %>%
  kableExtra::kable_styling(bootstrap_options = "bordered")
Combined RMSE Values Train and Test
Model Train Test
Ridge 2.9645 2.6233
Lasso 2.9693 2.6089
Elastinet 2.9714 2.6102
Stepwise Selection - GLM 2.9597 4.5233