# Clear the workspace
  rm(list = ls()) # Clear environment - remove all files from your workspace
  invisible(gc()) # Clear unused memory
  cat("\f")       # Clear the console
  graphics.off()  # clear all graphs
#Importing necessary libraries
library(MASS)
library(stargazer)
## 
## Please cite as:
##  Hlavac, Marek (2022). stargazer: Well-Formatted Regression and Summary Statistics Tables.
##  R package version 5.2.3. https://CRAN.R-project.org/package=stargazer
library(glmnet)
## Loading required package: Matrix
## Loaded glmnet 4.1-8

Part A

# Set seed for reproducibility
set.seed(42)

# Define parameters
n <- 20   # Number of observations
p <- 100  # Number of predictors

# Simulate predictors (X) and response variable (y)
X <- matrix(rnorm(n * p), nrow = n, ncol = p)
y <- rnorm(n)

# Attempt to fit an OLS model
ols_model <- lm(y ~ X)
summary(ols_model)
## 
## Call:
## lm(formula = y ~ X)
## 
## Residuals:
## ALL 20 residuals are 0: no residual degrees of freedom!
## 
## Coefficients: (81 not defined because of singularities)
##               Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.5796240        NaN     NaN      NaN
## X1          -0.1633698        NaN     NaN      NaN
## X2           0.0539168        NaN     NaN      NaN
## X3           0.2902229        NaN     NaN      NaN
## X4          -0.7841325        NaN     NaN      NaN
## X5          -0.8992692        NaN     NaN      NaN
## X6          -0.1560911        NaN     NaN      NaN
## X7          -0.8007147        NaN     NaN      NaN
## X8          -0.7305252        NaN     NaN      NaN
## X9           0.0213804        NaN     NaN      NaN
## X10         -0.7078046        NaN     NaN      NaN
## X11         -0.2856775        NaN     NaN      NaN
## X12         -0.4160416        NaN     NaN      NaN
## X13         -0.0032922        NaN     NaN      NaN
## X14         -0.0008478        NaN     NaN      NaN
## X15          0.4290647        NaN     NaN      NaN
## X16          0.0103589        NaN     NaN      NaN
## X17         -1.6703966        NaN     NaN      NaN
## X18          0.4150547        NaN     NaN      NaN
## X19          0.5231972        NaN     NaN      NaN
## X20                 NA         NA      NA       NA
## X21                 NA         NA      NA       NA
## X22                 NA         NA      NA       NA
## X23                 NA         NA      NA       NA
## X24                 NA         NA      NA       NA
## X25                 NA         NA      NA       NA
## X26                 NA         NA      NA       NA
## X27                 NA         NA      NA       NA
## X28                 NA         NA      NA       NA
## X29                 NA         NA      NA       NA
## X30                 NA         NA      NA       NA
## X31                 NA         NA      NA       NA
## X32                 NA         NA      NA       NA
## X33                 NA         NA      NA       NA
## X34                 NA         NA      NA       NA
## X35                 NA         NA      NA       NA
## X36                 NA         NA      NA       NA
## X37                 NA         NA      NA       NA
## X38                 NA         NA      NA       NA
## X39                 NA         NA      NA       NA
## X40                 NA         NA      NA       NA
## X41                 NA         NA      NA       NA
## X42                 NA         NA      NA       NA
## X43                 NA         NA      NA       NA
## X44                 NA         NA      NA       NA
## X45                 NA         NA      NA       NA
## X46                 NA         NA      NA       NA
## X47                 NA         NA      NA       NA
## X48                 NA         NA      NA       NA
## X49                 NA         NA      NA       NA
## X50                 NA         NA      NA       NA
## X51                 NA         NA      NA       NA
## X52                 NA         NA      NA       NA
## X53                 NA         NA      NA       NA
## X54                 NA         NA      NA       NA
## X55                 NA         NA      NA       NA
## X56                 NA         NA      NA       NA
## X57                 NA         NA      NA       NA
## X58                 NA         NA      NA       NA
## X59                 NA         NA      NA       NA
## X60                 NA         NA      NA       NA
## X61                 NA         NA      NA       NA
## X62                 NA         NA      NA       NA
## X63                 NA         NA      NA       NA
## X64                 NA         NA      NA       NA
## X65                 NA         NA      NA       NA
## X66                 NA         NA      NA       NA
## X67                 NA         NA      NA       NA
## X68                 NA         NA      NA       NA
## X69                 NA         NA      NA       NA
## X70                 NA         NA      NA       NA
## X71                 NA         NA      NA       NA
## X72                 NA         NA      NA       NA
## X73                 NA         NA      NA       NA
## X74                 NA         NA      NA       NA
## X75                 NA         NA      NA       NA
## X76                 NA         NA      NA       NA
## X77                 NA         NA      NA       NA
## X78                 NA         NA      NA       NA
## X79                 NA         NA      NA       NA
## X80                 NA         NA      NA       NA
## X81                 NA         NA      NA       NA
## X82                 NA         NA      NA       NA
## X83                 NA         NA      NA       NA
## X84                 NA         NA      NA       NA
## X85                 NA         NA      NA       NA
## X86                 NA         NA      NA       NA
## X87                 NA         NA      NA       NA
## X88                 NA         NA      NA       NA
## X89                 NA         NA      NA       NA
## X90                 NA         NA      NA       NA
## X91                 NA         NA      NA       NA
## X92                 NA         NA      NA       NA
## X93                 NA         NA      NA       NA
## X94                 NA         NA      NA       NA
## X95                 NA         NA      NA       NA
## X96                 NA         NA      NA       NA
## X97                 NA         NA      NA       NA
## X98                 NA         NA      NA       NA
## X99                 NA         NA      NA       NA
## X100                NA         NA      NA       NA
## 
## Residual standard error: NaN on 0 degrees of freedom
## Multiple R-squared:      1,  Adjusted R-squared:    NaN 
## F-statistic:   NaN on 19 and 0 DF,  p-value: NA

In this simulated scenario where p≫n (100 predictors and 20 observations), OLS regression produces coefficient estimates but assigns infinite standard errors to all predictors. This is due to perfect multicollinearity—when p>n, the design matrix X cannot be of full rank, which prevents the model from uniquely estimating each coefficient.

Part 2

A.

#Scaling
X_scaled <- scale(X)
# Perform cross-validation for Lasso
cv_lasso <- cv.glmnet(X_scaled, y, alpha = 1, nfolds = 5)

# Get the optimal lambda value for Lasso
optimal_lambda_lasso <- cv_lasso$lambda.min
lasso_coefficients <- coef(cv_lasso, s = "lambda.min")

# Otimal lambda value
cat("Optimal Lambda for Lasso: ", optimal_lambda_lasso, "\n")
## Optimal Lambda for Lasso:  0.5640283
# Plot the cross-validation results
plot(cv_lasso)

# Perform cross-validation for Ridge
cv_ridge <- cv.glmnet(X_scaled, y, alpha = 0, nfolds = 5)

# Get the optimal lambda value for Ridge
optimal_lambda_ridge <- cv_ridge$lambda.min
ridge_coefficients <- coef(cv_ridge, s = "lambda.min")

# Output the optimal lambda value and coefficients
cat("Optimal Lambda for Ridge: ", optimal_lambda_ridge, "\n")
## Optimal Lambda for Ridge:  5.640283
plot(cv_ridge)

# Summary of non-zero coefficients from both models
ridge_non_zero <- which(ridge_coefficients != 0)
lasso_non_zero <- which(lasso_coefficients != 0)

print("Non-zero Ridge Coefficients:")
## [1] "Non-zero Ridge Coefficients:"
print(ridge_coefficients[ridge_non_zero])
##   [1] -0.1646529224 -0.0291058573 -0.0340641768  0.0063700972  0.0104727934
##   [6] -0.0301403744  0.0109108482  0.0037876294 -0.0154466119 -0.0081071299
##  [11]  0.0108057892  0.0050496517  0.0410907680 -0.0208989857  0.0245618743
##  [16]  0.0071337225 -0.0204436682 -0.0497435522  0.0265368598  0.0003906498
##  [21] -0.0029570052 -0.0232219225 -0.0213913811  0.0274462007  0.0182773639
##  [26]  0.0201994680  0.0024239584 -0.0207178020 -0.0043419211  0.0249667224
##  [31]  0.0312678204  0.0144799773 -0.0055915302 -0.0109840880 -0.0184609730
##  [36] -0.0371145949  0.0350459851  0.0120687100  0.0381936685 -0.0102040871
##  [41] -0.0064710388  0.0157622747  0.0008999130  0.0374362277  0.0019006785
##  [46]  0.0185591588 -0.0189871492  0.0247680834  0.0094764796  0.0240215675
##  [51] -0.0011946665 -0.0072310877 -0.0158727369  0.0459513456 -0.0124469993
##  [56]  0.0465814740  0.0015868517 -0.0138635145 -0.0045387662  0.0044873306
##  [61] -0.0011943441 -0.0105972635  0.0155693340  0.0210235450  0.0188281772
##  [66] -0.0100965273 -0.0174690971 -0.0229925051  0.0132609092 -0.0184545517
##  [71] -0.0131686014  0.0113219932  0.0484246040 -0.0253034570  0.0128428821
##  [76]  0.0217184052 -0.0021332859  0.0040309954  0.0058467156 -0.0112787133
##  [81] -0.0272319857 -0.0029281754  0.0154846301 -0.0154784457 -0.0142740965
##  [86]  0.0259856332  0.0119605323  0.0007207626 -0.0166601496 -0.0323446496
##  [91] -0.0257290933  0.0102783094  0.0095924415  0.0252797581 -0.0027808296
##  [96] -0.0017761066 -0.0381302354  0.0246577162 -0.0049615642  0.0000132025
## [101]  0.0103930173
print("Non-zero Lasso Coefficients:")
## [1] "Non-zero Lasso Coefficients:"
print(lasso_coefficients[lasso_non_zero])
## [1] -0.1646529
  • Which method do you prefer, and why?

    • The choice between Ridge and Lasso depends on the context. If feature selection is essential, I prefer Lasso, as it can shrink some coefficients to zero, effectively selecting a simpler model. Ridge, on the other hand, keeps all predictors but shrinks their coefficients, which is beneficial when dealing with multicollinearity.
  • Do they give similar coefficients?

    • Ridge and Lasso typically yield different coefficients. Ridge often produces smaller coefficients for all predictors, while Lasso can eliminate some predictors entirely, resulting in a sparse model. The coefficients’ similarity will depend on the specific data and the underlying relationships between predictors and the response variable.

B.

# Transforming a variable 
non_zero_index <- lasso_non_zero[1] 
X_transformed <- X_scaled
X_transformed[, non_zero_index] <- X_transformed[, non_zero_index] / 1000 
# Rerun Lasso without re-scaling predictors
cv_lasso_transformed <- cv.glmnet(X_transformed, y, alpha = 1, nfolds = 5)
lasso_coefficients_transformed <- coef(cv_lasso_transformed, s = "lambda.min")

# Output the coefficients after transformation
print(lasso_coefficients_transformed)
## 101 x 1 sparse Matrix of class "dgCMatrix"
##                      s1
## (Intercept) -0.16465292
## V1           .         
## V2           .         
## V3           .         
## V4           .         
## V5           .         
## V6           .         
## V7           .         
## V8           .         
## V9           .         
## V10          .         
## V11          .         
## V12          0.06223462
## V13          .         
## V14          .         
## V15          .         
## V16          .         
## V17         -0.15970959
## V18          .         
## V19          .         
## V20          .         
## V21          .         
## V22          .         
## V23          .         
## V24          .         
## V25          .         
## V26          .         
## V27          .         
## V28          .         
## V29          .         
## V30          .         
## V31          .         
## V32          .         
## V33          .         
## V34          .         
## V35          .         
## V36          .         
## V37          .         
## V38          .         
## V39          .         
## V40          .         
## V41          .         
## V42          .         
## V43          .         
## V44          .         
## V45          .         
## V46          .         
## V47          .         
## V48          .         
## V49          .         
## V50          .         
## V51          .         
## V52          .         
## V53          0.12920566
## V54          .         
## V55          0.02784337
## V56          .         
## V57          .         
## V58          .         
## V59          .         
## V60          .         
## V61          .         
## V62          .         
## V63          .         
## V64          .         
## V65          .         
## V66          .         
## V67          .         
## V68          .         
## V69          .         
## V70          .         
## V71          .         
## V72          0.13175282
## V73          .         
## V74          .         
## V75          .         
## V76          .         
## V77          .         
## V78          .         
## V79          .         
## V80          .         
## V81          .         
## V82          .         
## V83          .         
## V84          .         
## V85          .         
## V86          .         
## V87          .         
## V88          .         
## V89          .         
## V90          .         
## V91          .         
## V92          .         
## V93          .         
## V94          .         
## V95          .         
## V96          .         
## V97          .         
## V98          .         
## V99          .         
## V100         .
  • Impact of Transformation: Transforming an independent variable can affect the regularization path. When the scale of a variable changes, it can alter its influence on the outcome, leading to different variable selection by Lasso.

  • Regularization Sensitivity: Lasso applies a penalty proportional to the absolute size of the coefficients. If the magnitude of the transformed variable is significantly smaller (e.g., dollars to thousands), it may become less influential, leading to its elimination from the model.