1 + 1[1] 2
Quarto enables you to weave together content and executable code into a finished document. To learn more about Quarto see https://quarto.org.
When you click the Render button a document will be generated that includes both content and the output of embedded code. You can embed code like this:
1 + 1[1] 2
You can add options to executable code like this
[1] 4
The echo: false option disables the printing of code (only output is displayed).
# Install and load glmnet if not already installed
if (!require(glmnet)) install.packages("glmnet", dependencies = TRUE)Loading required package: glmnet
Loading required package: Matrix
Loaded glmnet 4.1-8
library(glmnet)
# Set random seed for reproducibility
set.seed(123)
# Define parameters
n <- 50 # Number of observations
p <- 100 # Number of predictors
# Generate predictors and response
X <- matrix(rnorm(n * p), nrow = n, ncol = p)
beta_true <- c(rep(3, 5), rep(0, p - 5)) # Only first 5 predictors are relevant
y <- X %*% beta_true + rnorm(n) # Response with some noise
X_scaled <- scale(X)
# Fit Lasso model with cross-validation
cv_lasso <- cv.glmnet(X_scaled, y, alpha = 1)
# Fit Ridge model with cross-validation
cv_ridge <- cv.glmnet(X_scaled, y, alpha = 0)
# Extract coefficients at lambda.min for Lasso and Ridge
lasso_coef <- coef(cv_lasso, s = "lambda.min")
ridge_coef <- coef(cv_ridge, s = "lambda.min")
# Print lambda values and coefficients
print(cv_lasso$lambda.min)[1] 0.09452662
print(cv_ridge$lambda.min)[1] 25.69789
print(lasso_coef)101 x 1 sparse Matrix of class "dgCMatrix"
s1
(Intercept) -0.1651242192
V1 2.5477076041
V2 2.2925730581
V3 2.7130862191
V4 2.6208850655
V5 2.7178150625
V6 .
V7 .
V8 .
V9 .
V10 .
V11 .
V12 .
V13 .
V14 0.1584833307
V15 0.0334449569
V16 0.0328565314
V17 .
V18 .
V19 .
V20 .
V21 .
V22 .
V23 .
V24 -0.4050708509
V25 .
V26 .
V27 0.0298169690
V28 .
V29 .
V30 0.1779228105
V31 .
V32 -0.0559265535
V33 .
V34 .
V35 0.0005713771
V36 .
V37 0.0676201998
V38 .
V39 .
V40 .
V41 .
V42 .
V43 -0.1687769896
V44 0.0433658489
V45 .
V46 .
V47 0.0057535617
V48 .
V49 0.0334467601
V50 .
V51 .
V52 -0.1352569494
V53 .
V54 .
V55 .
V56 0.0961285013
V57 .
V58 0.2725440526
V59 .
V60 .
V61 .
V62 -0.1128952417
V63 .
V64 .
V65 .
V66 .
V67 .
V68 .
V69 .
V70 .
V71 .
V72 .
V73 .
V74 .
V75 .
V76 .
V77 .
V78 .
V79 .
V80 .
V81 .
V82 .
V83 .
V84 0.2025239617
V85 -0.0011540789
V86 0.1539477943
V87 .
V88 .
V89 .
V90 0.1500841754
V91 .
V92 .
V93 .
V94 .
V95 .
V96 .
V97 .
V98 .
V99 .
V100 .
print(ridge_coef)101 x 1 sparse Matrix of class "dgCMatrix"
s1
(Intercept) -0.165124219
V1 0.270198062
V2 0.175022519
V3 0.358557975
V4 0.378498321
V5 0.362312729
V6 -0.090723393
V7 0.017646964
V8 -0.032848774
V9 -0.256295265
V10 -0.023319265
V11 0.028906700
V12 -0.015553172
V13 -0.105393986
V14 0.202801199
V15 0.103407542
V16 0.053330824
V17 -0.043843848
V18 -0.062625235
V19 0.062626355
V20 0.006955393
V21 0.015321958
V22 0.039046165
V23 0.007884025
V24 -0.227052129
V25 0.018611738
V26 -0.137353493
V27 0.122010637
V28 0.062160594
V29 -0.011518258
V30 0.161842465
V31 0.070907838
V32 -0.194426286
V33 -0.203756447
V34 -0.112529652
V35 -0.041891595
V36 -0.067582888
V37 -0.039020131
V38 -0.048758362
V39 0.002632836
V40 0.108257642
V41 0.133146291
V42 -0.045462779
V43 0.012086299
V44 0.193450622
V45 0.043495731
V46 -0.059896612
V47 0.104405002
V48 -0.024078868
V49 0.104225513
V50 0.153319380
V51 -0.001285483
V52 -0.198570490
V53 0.106443948
V54 -0.094758947
V55 -0.001092293
V56 0.078003709
V57 -0.141716444
V58 0.115984552
V59 -0.085108620
V60 0.095850639
V61 -0.001835678
V62 -0.243405123
V63 0.041876598
V64 -0.005510896
V65 0.021976500
V66 0.156457903
V67 0.088747778
V68 0.061734189
V69 0.029211334
V70 0.009395057
V71 0.138698089
V72 -0.017966024
V73 0.114402339
V74 -0.042764001
V75 -0.028560479
V76 -0.026030124
V77 -0.003303338
V78 -0.109149718
V79 0.011451851
V80 -0.091095442
V81 0.078261760
V82 0.048913995
V83 -0.105930607
V84 0.045947667
V85 -0.001480428
V86 -0.088553108
V87 -0.048724202
V88 -0.110578805
V89 0.031356145
V90 0.257278626
V91 -0.049662188
V92 -0.125837959
V93 0.154523440
V94 -0.234055208
V95 -0.080527708
V96 -0.145720236
V97 -0.032386733
V98 0.041318781
V99 0.148905165
V100 0.045832681
# Clear the workspace
rm(list = ls()) # Clear environment - remove all files from your workspace
invisible(gc()) # Clear unused memory
cat("\f") # Clear the consolegraphics.off() # clear all graphs
# Prepare needed libraries
packages <- c("glmnet", # used for regression
"caret", # used for modeling
"xgboost", # used for building XGBoost model
"ISLR",
"dplyr", # used for data manipulation and joining
"tidyselect",
"stargazer", # presentation of data
"data.table",# used for reading and manipulation of data
"ggplot2", # used for ploting
"cowplot", # used for combining multiple plots
"e1071", # used for skewness
"psych",
"car"
)
suppressPackageStartupMessages({
for (i in 1:length(packages)) {
if (!packages[i] %in% rownames(installed.packages())) {
install.packages(packages[i]
, repos = "http://cran.rstudio.com/"
, dependencies = TRUE
)
}
library(packages[i], character.only = TRUE)
}
})
rm(packages)
set.seed(7)
# Clear the workspace by removing all objects from the environment
remove(list=ls())
# Load the help page for distribution functions
?distributionHelp on topic 'distribution' was found in the following packages:
Package Library
lava /Library/Frameworks/R.framework/Versions/4.4-x86_64/Resources/library
stats /Library/Frameworks/R.framework/Versions/4.4-x86_64/Resources/library
Using the first match ...
# Generate response variable 'y' from a Student's t-distribution with 10 observations and 7 degrees of freedom
y <- rt(n = 10, df = 7)
# Generate predictor variables from different distributions:
# Uniform distributions for x1, x2, and x3:
x1 <- runif(n = 10, min = 1, max = 7) # 10 random values between 1 and 7
x2 <- runif(n = 10, min = 2, max = 8)
x3 <- runif(n = 10, min = 3, max = 9)
# Binomial distributions for x4, x5, and x6:
x4 <- rbinom(n = 10, size = 14, prob = 0.7) # 10 random values, each as the count of 14 trials with 0.7 probability
x5 <- rbinom(n = 10, size = 15, prob = 0.8)
x6 <- rbinom(n = 10, size = 16, prob = 0.9)
# Poisson distributions for x7 to x11:
x7 <- rpois(n = 10, lambda = 5) # 10 random values with an average rate of 5
x8 <- rpois(n = 10, lambda = 6)
x9 <- rpois(n = 10, lambda = 7)
x10 <- rpois(n = 10, lambda = 8)
x11 <- rpois(n = 10, lambda = 9)
reg <- lm(formula = y ~ x1+x2+x3+x4+x5+x6+x7+x8+x9+x10+x11)
stargazer(reg,
type = "text")
========================================
Dependent variable:
---------------------------
y
----------------------------------------
x1 0.231
x2 0.980
x3 -0.453
x4 -0.316
x5 -0.117
x6 -1.296
x7 0.186
x8 0.888
x9 0.127
x10
x11
Constant 14.039
----------------------------------------
Observations 10
R2 1.000
========================================
Note: *p<0.1; **p<0.05; ***p<0.01
set.seed(123)
n <- 50 # Number of observations
p <- 100 # Number of predictors
# Generate predictor matrix (n x p) from a normal distribution
X <- matrix(rnorm(n * p), nrow = n, ncol = p)
# True coefficients: Set only a few to non-zero for sparsity
beta_true <- c(rep(3, 5), rep(0, p - 5)) # Only first 5 predictors are relevant
y <- X %*% beta_true + rnorm(n) # Response variable with added noise
# Convert to data frame for easier manipulation
data <- as.data.frame(cbind(y, X))
names(data) <- c("y", paste0("X", 1:p))
library(glmnet)
# Standardize predictors
X_scaled <- scale(X)
# Apply Lasso regression (alpha = 1)
lasso_model <- glmnet(X_scaled, y, alpha = 1)
# Apply Ridge regression (alpha = 0)
ridge_model <- glmnet(X_scaled, y, alpha = 0)
# Use the smallest lambda value directly
lasso_coef <- coef(lasso_model, s = min(lasso_model$lambda))
ridge_coef <- coef(ridge_model, s = min(ridge_model$lambda))
# View selected predictors
lasso_coef101 x 1 sparse Matrix of class "dgCMatrix"
s1
(Intercept) -0.1651242192
V1 2.4749059876
V2 2.2267619038
V3 2.5977244758
V4 2.9214298396
V5 2.8569976105
V6 .
V7 .
V8 .
V9 -0.0782470782
V10 .
V11 .
V12 .
V13 .
V14 0.0893302872
V15 0.1265278373
V16 0.1341741687
V17 .
V18 -0.1089199970
V19 0.1850786792
V20 .
V21 -0.0262251929
V22 .
V23 .
V24 -0.3752921848
V25 0.0736202592
V26 .
V27 0.0394549083
V28 .
V29 0.0569289027
V30 0.3099569456
V31 .
V32 -0.1315316174
V33 .
V34 .
V35 0.0007631402
V36 -0.0429165773
V37 0.2021075477
V38 .
V39 .
V40 .
V41 .
V42 0.0098033985
V43 -0.2330907785
V44 0.0734845057
V45 0.0145287334
V46 .
V47 .
V48 0.0612690152
V49 0.2533904285
V50 .
V51 .
V52 -0.2081377069
V53 .
V54 .
V55 .
V56 .
V57 .
V58 0.3901696347
V59 .
V60 0.0286061300
V61 .
V62 -0.1229414048
V63 -0.2142368997
V64 .
V65 -0.0217359336
V66 .
V67 .
V68 .
V69 .
V70 .
V71 .
V72 .
V73 0.0017507610
V74 .
V75 .
V76 .
V77 -0.0459013720
V78 .
V79 0.1677940235
V80 .
V81 .
V82 .
V83 .
V84 0.2820615983
V85 .
V86 0.3955285974
V87 .
V88 .
V89 0.0330490189
V90 0.1690503542
V91 .
V92 .
V93 -0.0395307149
V94 .
V95 -0.0145776248
V96 .
V97 .
V98 .
V99 0.0266763106
V100 .
ridge_coef101 x 1 sparse Matrix of class "dgCMatrix"
s1
(Intercept) -0.165124219
V1 0.270198062
V2 0.175022519
V3 0.358557975
V4 0.378498321
V5 0.362312729
V6 -0.090723393
V7 0.017646964
V8 -0.032848774
V9 -0.256295265
V10 -0.023319265
V11 0.028906700
V12 -0.015553172
V13 -0.105393986
V14 0.202801199
V15 0.103407542
V16 0.053330824
V17 -0.043843848
V18 -0.062625235
V19 0.062626355
V20 0.006955393
V21 0.015321958
V22 0.039046165
V23 0.007884025
V24 -0.227052129
V25 0.018611738
V26 -0.137353493
V27 0.122010637
V28 0.062160594
V29 -0.011518258
V30 0.161842465
V31 0.070907838
V32 -0.194426286
V33 -0.203756447
V34 -0.112529652
V35 -0.041891595
V36 -0.067582888
V37 -0.039020131
V38 -0.048758362
V39 0.002632836
V40 0.108257642
V41 0.133146291
V42 -0.045462779
V43 0.012086299
V44 0.193450622
V45 0.043495731
V46 -0.059896612
V47 0.104405002
V48 -0.024078868
V49 0.104225513
V50 0.153319380
V51 -0.001285483
V52 -0.198570490
V53 0.106443948
V54 -0.094758947
V55 -0.001092293
V56 0.078003709
V57 -0.141716444
V58 0.115984552
V59 -0.085108620
V60 0.095850639
V61 -0.001835678
V62 -0.243405123
V63 0.041876598
V64 -0.005510896
V65 0.021976500
V66 0.156457903
V67 0.088747778
V68 0.061734189
V69 0.029211334
V70 0.009395057
V71 0.138698089
V72 -0.017966024
V73 0.114402339
V74 -0.042764001
V75 -0.028560479
V76 -0.026030124
V77 -0.003303338
V78 -0.109149718
V79 0.011451851
V80 -0.091095442
V81 0.078261760
V82 0.048913995
V83 -0.105930607
V84 0.045947667
V85 -0.001480428
V86 -0.088553108
V87 -0.048724202
V88 -0.110578805
V89 0.031356145
V90 0.257278626
V91 -0.049662188
V92 -0.125837959
V93 0.154523440
V94 -0.234055208
V95 -0.080527708
V96 -0.145720236
V97 -0.032386733
V98 0.041318781
V99 0.148905165
V100 0.045832681
# Change units of X1
data$X1 <- data$X1 * 1000
# Re-run Lasso and Ridge without scaling
X_transformed <- as.matrix(data[, -1]) # Exclude response y
# Re-run Lasso
lasso_model_transformed <- glmnet(X_transformed, y, alpha = 1)
lasso_coef_transformed <- coef(lasso_model_transformed, s = min(lasso_model$lambda))
# Check if transformed X1 is eliminated in Lasso
lasso_coef_transformed101 x 1 sparse Matrix of class "dgCMatrix"
s1
(Intercept) -0.1896155048
X1 0.0026730600
X2 2.4592951183
X3 2.6257307294
X4 3.1380324887
X5 3.0156435219
X6 .
X7 .
X8 .
X9 -0.0761815938
X10 .
X11 .
X12 .
X13 .
X14 0.0855906834
X15 0.1180652543
X16 0.1422712152
X17 .
X18 -0.1012884425
X19 0.1784462586
X20 .
X21 -0.0228776373
X22 .
X23 .
X24 -0.3872202750
X25 0.0732509628
X26 .
X27 0.0376878861
X28 .
X29 0.0583215048
X30 0.3129968644
X31 .
X32 -0.1412267005
X33 .
X34 .
X35 0.0006749303
X36 -0.0468869092
X37 0.2138020072
X38 .
X39 .
X40 .
X41 .
X42 0.0112860892
X43 -0.2691016441
X44 0.0840647934
X45 0.0149546757
X46 .
X47 .
X48 0.0565047998
X49 0.2483457210
X50 .
X51 .
X52 -0.2255254350
X53 .
X54 .
X55 .
X56 .
X57 .
X58 0.4019580855
X59 .
X60 0.0241930658
X61 .
X62 -0.1117239287
X63 -0.2609772176
X64 .
X65 -0.0199872938
X66 .
X67 .
X68 .
X69 .
X70 .
X71 .
X72 .
X73 0.0015832462
X74 .
X75 .
X76 .
X77 -0.0505693329
X78 .
X79 0.1705593630
X80 .
X81 .
X82 .
X83 .
X84 0.3734860359
X85 .
X86 0.3538781227
X87 .
X88 .
X89 0.0305425608
X90 0.1869285317
X91 .
X92 .
X93 -0.0371518829
X94 .
X95 -0.0144419875
X96 .
X97 .
X98 .
X99 0.0270868428
X100 .
# Cross-validated Lasso and Ridge models
cv_lasso <- cv.glmnet(X_scaled, y, alpha = 1)
cv_ridge <- cv.glmnet(X_scaled, y, alpha = 0)
# Extract coefficients at lambda.min
lasso_coef <- coef(cv_lasso, s = "lambda.min")
ridge_coef <- coef(cv_ridge, s = "lambda.min")
# Display the lambda values used in the model
print(lasso_model$lambda) [1] 2.56978947 2.45298857 2.34149645 2.23507183 2.13348436 2.03651420
[7] 1.94395149 1.85559590 1.77125620 1.69074987 1.61390268 1.54054831
[13] 1.47052801 1.40369025 1.33989036 1.27899027 1.22085819 1.16536830
[19] 1.11240051 1.06184019 1.01357792 0.96750924 0.92353445 0.88155838
[25] 0.84149019 0.80324317 0.76673453 0.73188526 0.69861994 0.66686659
[31] 0.63655647 0.60762400 0.58000655 0.55364436 0.52848037 0.50446012
[37] 0.48153163 0.45964527 0.43875368 0.41881165 0.39977602 0.38160558
[43] 0.36426102 0.34770479 0.33190107 0.31681566 0.30241590 0.28867063
[49] 0.27555010 0.26302592 0.25107099 0.23965943 0.22876653 0.21836874
[55] 0.20844355 0.19896947 0.18992600 0.18129357 0.17305349 0.16518795
[61] 0.15767990 0.15051310 0.14367205 0.13714193 0.13090862 0.12495862
[67] 0.11927906 0.11385764 0.10868264 0.10374284 0.09902757 0.09452662
[73] 0.09023024 0.08612913 0.08221443 0.07847766 0.07491073 0.07150592
[79] 0.06825586 0.06515353 0.06219220 0.05936547 0.05666722 0.05409160
[85] 0.05163306 0.04928626 0.04704612 0.04490780 0.04286667 0.04091832
[91] 0.03905851 0.03728325 0.03558866 0.03397110 0.03242707 0.03095321
[97] 0.02954633 0.02820341 0.02692152 0.02569789
print(ridge_model$lambda) [1] 2569.78947 2452.98857 2341.49645 2235.07183 2133.48436 2036.51420
[7] 1943.95149 1855.59590 1771.25620 1690.74987 1613.90268 1540.54831
[13] 1470.52801 1403.69025 1339.89036 1278.99027 1220.85819 1165.36830
[19] 1112.40051 1061.84019 1013.57792 967.50924 923.53445 881.55838
[25] 841.49019 803.24317 766.73453 731.88526 698.61994 666.86659
[31] 636.55647 607.62400 580.00655 553.64436 528.48037 504.46012
[37] 481.53163 459.64527 438.75368 418.81165 399.77602 381.60558
[43] 364.26102 347.70479 331.90107 316.81566 302.41590 288.67063
[49] 275.55010 263.02592 251.07099 239.65943 228.76653 218.36874
[55] 208.44355 198.96947 189.92600 181.29357 173.05349 165.18795
[61] 157.67990 150.51310 143.67205 137.14193 130.90862 124.95862
[67] 119.27906 113.85764 108.68264 103.74284 99.02757 94.52662
[73] 90.23024 86.12913 82.21443 78.47766 74.91073 71.50592
[79] 68.25586 65.15353 62.19220 59.36547 56.66722 54.09160
[85] 51.63306 49.28626 47.04612 44.90780 42.86667 40.91832
[91] 39.05851 37.28325 35.58866 33.97110 32.42707 30.95321
[97] 29.54633 28.20341 26.92152 25.69789
Lasso vs. Ridge: Lasso is preferable when the goal is feature selection, as it zeroes out coefficients of less important variables. Ridge, on the other hand, is better for retaining all predictors but reducing their impact through shrinkage, especially when predictors are highly collinear.
Similarity of Coefficients: The coefficients from Lasso and Ridge may be similar for the most relevant predictors (those strongly correlated with the response) but differ in that Lasso will set many coefficients to zero, while Ridge shrinks all coefficients without elimination.
Explanation of Potential Elimination Effect of Scale on Lasso: Lasso’s penalty is applied proportionally to the size of each coefficient. When we increase the units of a variable (e.g., from dollars to thousands of dollars), the coefficient needed to explain its contribution also increases. Lasso might then assign a higher penalty to this transformed variable, potentially resulting in its elimination from the model.
Effect on Ridge: Ridge penalizes the sum of squared coefficients, which can similarly be impacted by unscaled variables, although Ridge tends to retain all predictors. Ridge might not eliminate the transformed predictor but will reduce its coefficient due to the increased penalty.