Regression Discussion 8

Author

Langley Burke

Set Up

# Clear the workspace
rm(list = ls()) # Clear environment - remove all files from your workspace
invisible(gc()) # Clear unused memory
cat("\f")       # Clear the console

Generating Data

  graphics.off()  # clear all graphs


# Prepare needed libraries
packages <- c("glmnet",   # used for regression
              "caret",    # used for modeling
              "xgboost",  # used for building XGBoost model
              "ISLR",
              "dplyr",     # used for data manipulation and joining
              "tidyselect",
              "stargazer", # presentation of data
              "data.table",# used for reading and manipulation of data
              "ggplot2",   # used for ploting
              "cowplot",   # used for combining multiple plots
              "e1071",     # used for skewness
              "psych",
              "car"
              )

suppressPackageStartupMessages({
  
  for (i in 1:length(packages)) {
    if (!packages[i] %in% rownames(installed.packages())) {
      install.packages(packages[i]
                       , repos = "http://cran.rstudio.com/"
                       , dependencies = TRUE
                       )
    }
    library(packages[i], character.only = TRUE)
  }
  
})  

rm(packages)

set.seed(7)

p>> n

set.seed(123)  # For reproducibility
N <- 10  # number of observations
p <- 50  # number of predictors

# Generate a dataset
X <- matrix(rnorm(N * p), nrow = N, ncol = p)
beta <- rnorm(p)  # Coefficients
y <- X %*% beta + rnorm(N)  # Linear combination with noise


model <- lm(y ~ X -1) #Removing intercept 

summary(model)

Call:
lm(formula = y ~ X - 1)

Residuals:
ALL 10 residuals are 0: no residual degrees of freedom!

Coefficients: (40 not defined because of singularities)
    Estimate Std. Error t value Pr(>|t|)
X1   -1.6890        NaN     NaN      NaN
X2    0.2981        NaN     NaN      NaN
X3    4.2035        NaN     NaN      NaN
X4   18.1412        NaN     NaN      NaN
X5   -0.1074        NaN     NaN      NaN
X6   -3.2134        NaN     NaN      NaN
X7    1.0257        NaN     NaN      NaN
X8   -5.1135        NaN     NaN      NaN
X9   -7.9502        NaN     NaN      NaN
X10  -3.4384        NaN     NaN      NaN
X11       NA         NA      NA       NA
X12       NA         NA      NA       NA
X13       NA         NA      NA       NA
X14       NA         NA      NA       NA
X15       NA         NA      NA       NA
X16       NA         NA      NA       NA
X17       NA         NA      NA       NA
X18       NA         NA      NA       NA
X19       NA         NA      NA       NA
X20       NA         NA      NA       NA
X21       NA         NA      NA       NA
X22       NA         NA      NA       NA
X23       NA         NA      NA       NA
X24       NA         NA      NA       NA
X25       NA         NA      NA       NA
X26       NA         NA      NA       NA
X27       NA         NA      NA       NA
X28       NA         NA      NA       NA
X29       NA         NA      NA       NA
X30       NA         NA      NA       NA
X31       NA         NA      NA       NA
X32       NA         NA      NA       NA
X33       NA         NA      NA       NA
X34       NA         NA      NA       NA
X35       NA         NA      NA       NA
X36       NA         NA      NA       NA
X37       NA         NA      NA       NA
X38       NA         NA      NA       NA
X39       NA         NA      NA       NA
X40       NA         NA      NA       NA
X41       NA         NA      NA       NA
X42       NA         NA      NA       NA
X43       NA         NA      NA       NA
X44       NA         NA      NA       NA
X45       NA         NA      NA       NA
X46       NA         NA      NA       NA
X47       NA         NA      NA       NA
X48       NA         NA      NA       NA
X49       NA         NA      NA       NA
X50       NA         NA      NA       NA

Residual standard error: NaN on 0 degrees of freedom
Multiple R-squared:      1, Adjusted R-squared:    NaN 
F-statistic:   NaN on 10 and 0 DF,  p-value: NA
library(stargazer)
stargazer(model, type="text")

========================================
                 Dependent variable:    
             ---------------------------
                          y             
----------------------------------------
X1                     -1.689           
                                        
                                        
X2                      0.298           
                                        
                                        
X3                      4.204           
                                        
                                        
X4                     18.141           
                                        
                                        
X5                     -0.107           
                                        
                                        
X6                     -3.213           
                                        
                                        
X7                      1.026           
                                        
                                        
X8                     -5.113           
                                        
                                        
X9                     -7.950           
                                        
                                        
X10                    -3.438           
                                        
                                        
X11                                     
                                        
                                        
X12                                     
                                        
                                        
X13                                     
                                        
                                        
X14                                     
                                        
                                        
X15                                     
                                        
                                        
X16                                     
                                        
                                        
X17                                     
                                        
                                        
X18                                     
                                        
                                        
X19                                     
                                        
                                        
X20                                     
                                        
                                        
X21                                     
                                        
                                        
X22                                     
                                        
                                        
X23                                     
                                        
                                        
X24                                     
                                        
                                        
X25                                     
                                        
                                        
X26                                     
                                        
                                        
X27                                     
                                        
                                        
X28                                     
                                        
                                        
X29                                     
                                        
                                        
X30                                     
                                        
                                        
X31                                     
                                        
                                        
X32                                     
                                        
                                        
X33                                     
                                        
                                        
X34                                     
                                        
                                        
X35                                     
                                        
                                        
X36                                     
                                        
                                        
X37                                     
                                        
                                        
X38                                     
                                        
                                        
X39                                     
                                        
                                        
X40                                     
                                        
                                        
X41                                     
                                        
                                        
X42                                     
                                        
                                        
X43                                     
                                        
                                        
X44                                     
                                        
                                        
X45                                     
                                        
                                        
X46                                     
                                        
                                        
X47                                     
                                        
                                        
X48                                     
                                        
                                        
X49                                     
                                        
                                        
X50                                     
                                        
                                        
----------------------------------------
Observations             10             
R2                      1.000           
========================================
Note:        *p<0.1; **p<0.05; ***p<0.01

No, you cannot uniquely estimate all of the coefficients (betas) when the number of predictors exceeds the number of observations. This is because, in this case, the design matrix X′XX’XX′X is not invertible; it is singular. When you have more predictors than observations, the system of equations formed by the regression does not have a unique solution. Instead, there are infinitely many possible combinations of coefficients that could fit the data, making it impossible to isolate a single solution for all coefficients.

Standardizing

apply(X = X,
      MARGIN = 2,
      FUN = sd       # var
      ) 
 [1] 0.9537841 1.0380734 0.9308092 0.5273024 1.0825182 0.8564451 0.9363923
 [8] 0.9955075 0.5475116 1.0688850 0.7055487 0.7326917 0.8105153 1.3484826
[15] 1.2738211 0.6515157 1.2807552 0.8700321 0.7992612 1.0360528 1.0570533
[22] 1.0098733 0.7749196 0.9038609 1.0812399 0.8857048 1.1890517 0.7235803
[29] 0.8323328 1.0848067 1.1884888 1.1214604 0.8085609 1.1522709 0.9717037
[36] 1.4446861 0.6507908 1.2486367 0.7525440 1.1072081 0.7606131 0.9713721
[43] 1.2332286 1.0824507 1.0285900 1.1913966 0.8689961 0.9905616 0.7139366
[50] 0.9381471
summary(y)
       V1        
 Min.   :-7.221  
 1st Qu.:-4.169  
 Median : 1.972  
 Mean   : 1.272  
 3rd Qu.: 5.655  
 Max.   :11.982  
scaled_y = scale(y)
summary(scaled_y) # mean = 0
       V1         
 Min.   :-1.3230  
 1st Qu.:-0.8477  
 Median : 0.1089  
 Mean   : 0.0000  
 3rd Qu.: 0.6826  
 Max.   : 1.6683  

Checking Mean after Scaling

# scale : mean = 0, std=1
scaled_X = scale(x = X)
 
# after standardization
colMeans(x = scaled_X)    # mean ~ 0
 [1] -6.661338e-17  0.000000e+00 -2.220446e-17  2.220446e-17 -5.551115e-18
 [6]  2.428613e-18  0.000000e+00  3.608225e-17  4.440892e-17  0.000000e+00
[11]  6.661338e-17 -3.330669e-17 -1.942890e-17  2.220446e-17 -4.440892e-17
[16]  5.551115e-17 -9.020562e-18  2.220446e-17  2.220446e-17 -3.330669e-17
[21] -4.996004e-17 -2.220446e-17 -4.440892e-17 -1.110223e-17  2.220446e-17
[26] -2.220446e-17  2.359224e-17 -3.885781e-17  1.804112e-17  1.110223e-16
[31]  2.220446e-17  3.330669e-17  7.216450e-17  4.857226e-17 -1.110223e-17
[36]  2.220446e-17  4.440892e-17 -2.220446e-17 -2.220446e-17 -4.059253e-17
[41]  0.000000e+00 -1.110223e-17 -1.110223e-16  2.220446e-17  0.000000e+00
[46]  5.551115e-18 -3.330669e-17  0.000000e+00 -6.661338e-17 -2.220446e-17

Checking SD after Scaling

apply(X = scaled_X,
          MARGIN = 2,
          FUN = sd
          )  # standard deviation = 1
 [1] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
[39] 1 1 1 1 1 1 1 1 1 1 1 1

Summary Stats

library(stargazer)

df_scaled <- as.data.frame(scaled_X)
stargazer(data = df_scaled, type = "text")

=========================================
Statistic N   Mean  St. Dev.  Min    Max 
-----------------------------------------
V1        10 -0.000  1.000   -1.405 1.720
V2        10 0.000   1.000   -2.095 1.520
V3        10 -0.000  1.000   -1.356 1.803
V4        10 0.000   1.000   -1.332 1.087
V5        10 -0.000  1.000   -1.161 2.012
V6        10 0.000   1.000   -2.067 1.512
V7        10 0.000   1.000   -1.276 2.058
V8        10 0.000   1.000   -1.955 1.395
V9        10 0.000   1.000   -1.249 1.526
V10       10 0.000   1.000   -1.369 1.637
V11       10 0.000   1.000   -1.803 1.864
V12       10 0.000   1.000   -1.767 1.270
V13       10 -0.000  1.000   -1.050 2.411
V14       10 0.000   1.000   -1.548 1.391
V15       10 -0.000  1.000   -0.885 2.021
V16       10 0.000   1.000   -1.743 1.304
V17       10 -0.000  1.000   -1.211 2.304
V18       10 0.000   1.000   -1.310 2.397
V19       10 0.000   1.000   -1.330 1.580
V20       10 -0.000  1.000   -1.007 2.186
V21       10 -0.000  1.000   -1.040 1.786
V22       10 -0.000  1.000   -1.082 1.850
V23       10 -0.000  1.000   -1.311 1.432
V24       10 -0.000  1.000   -1.372 2.249
V25       10 0.000   1.000   -1.179 1.838
V26       10 -0.000  1.000   -1.774 1.761
V27       10 0.000   1.000   -1.644 1.761
V28       10 -0.000  1.000   -2.233 1.026
V29       10 0.000   1.000   -2.154 1.821
V30       10 0.000   1.000   -1.858 1.233
V31       10 0.000   1.000   -1.519 1.891
V32       10 0.000   1.000   -1.657 1.385
V33       10 0.000   1.000   -2.009 1.299
V34       10 0.000   1.000   -1.621 1.479
V35       10 -0.000  1.000   -1.792 1.732
V36       10 0.000   1.000   -1.612 1.874
V37       10 0.000   1.000   -1.788 1.239
V38       10 -0.000  1.000   -1.305 2.064
V39       10 0.000   1.000   -1.566 1.775
V40       10 -0.000  1.000   -1.645 1.662
V41       10 0.000   1.000   -1.803 1.361
V42       10 0.000   1.000   -2.572 0.822
V43       10 -0.000  1.000   -1.564 1.547
V44       10 0.000   1.000   -1.985 1.212
V45       10 0.000   1.000   -1.687 1.167
V46       10 0.000   1.000   -2.439 0.995
V47       10 -0.000  1.000   -1.767 1.475
V48       10 0.000   1.000   -1.497 1.833
V49       10 -0.000  1.000   -1.486 1.598
V50       10 -0.000  1.000   -1.505 1.440
-----------------------------------------

Finding Optimal Lambda

?cv.glmnet
cv_model <- cv.glmnet(x = scaled_X,
                      y = scaled_y, 
                      alpha = 1
                      )

#find optimal lambda value that minimizes test MSE
best_lambda <- cv_model$lambda.min
best_lambda
[1] 0.0165685

Lasso

li.eq <- lm(y ~ X -1)
    
?glmnet
# Lasso
la.eq <- glmnet(x      = scaled_X, 
                y      = scaled_y, 
                lambda = best_lambda, 
                family = "gaussian", #specifiy the error if you know its distrubution
                intercept = FALSE,   #since we are comparing wth OLS without an intercept
                alpha  = 1
                )     
summary(la.eq) 
          Length Class     Mode   
a0         1     -none-    numeric
beta      50     dgCMatrix S4     
df         1     -none-    numeric
dim        2     -none-    numeric
lambda     1     -none-    numeric
dev.ratio  1     -none-    numeric
nulldev    1     -none-    numeric
npasses    1     -none-    numeric
jerr       1     -none-    numeric
offset     1     -none-    logical
call       7     -none-    call   
nobs       1     -none-    numeric

Ridge

# Ridge
ri.eq <- glmnet(x      = scaled_X, 
                y      = scaled_y, 
                lambda = best_lambda, 
                family = "gaussian", 
                intercept = FALSE, 
                alpha = 0
                ) 
summary(ri.eq)
          Length Class     Mode   
a0         1     -none-    numeric
beta      50     dgCMatrix S4     
df         1     -none-    numeric
dim        2     -none-    numeric
lambda     1     -none-    numeric
dev.ratio  1     -none-    numeric
nulldev    1     -none-    numeric
npasses    1     -none-    numeric
jerr       1     -none-    numeric
offset     1     -none-    logical
call       7     -none-    call   
nobs       1     -none-    numeric
df_comp_optimal <- data.frame(
    beta    = beta,
    Linear  = li.eq$coefficients,
    Lasso   = la.eq$beta[,1],
    Ridge   = ri.eq$beta[,1]
    )    
df_comp_optimal
           beta     Linear       Lasso         Ridge
X1  -0.60189285 -1.6889821  0.00000000  0.0398268064
X2  -0.99369859  0.2980547  0.00000000  0.0062904435
X3   1.02678506  4.2035277  0.00000000 -0.0270920729
X4   0.75106130 18.1411861  0.20195827  0.0967842434
X5  -1.50916654 -0.1073880  0.00000000  0.0232611226
X6  -0.09514745 -3.2134442  0.00000000 -0.0119460361
X7  -0.89594782  1.0257187 -0.26524951 -0.0962405789
X8  -2.07075107 -5.1134818  0.00000000 -0.0307682852
X9   0.15012013 -7.9502453  0.00000000 -0.0300443123
X10 -0.07921171 -3.4384300  0.00000000  0.0223224090
X11 -0.09736927         NA  0.00000000  0.0009885722
X12  0.21615254         NA  0.00000000  0.0289230665
X13  0.88246516         NA  0.18575768  0.0726246621
X14  0.20559750         NA  0.00000000 -0.0680266291
X15 -0.61643584         NA -0.17524860 -0.1048754073
X16 -0.73479925         NA  0.00000000 -0.0334053199
X17 -0.13180279         NA  0.00000000 -0.0018732970
X18  0.31001699         NA  0.00000000  0.0034908548
X19 -1.03968035         NA  0.00000000 -0.0711812204
X20 -0.18430887         NA  0.00000000 -0.0313667104
X21  0.96726726         NA  0.00000000 -0.0131569556
X22 -0.10828009         NA  0.00000000  0.0192115083
X23 -0.69842067         NA  0.00000000  0.0044747492
X24 -0.27594517         NA  0.00000000  0.0088568786
X25  1.11464855         NA  0.00000000 -0.0183366260
X26  0.55004396         NA  0.00000000  0.0446926122
X27  1.23667580         NA  0.00000000  0.0643109064
X28  0.13909786         NA  0.00000000  0.0289534134
X29  0.41027510         NA  0.00000000 -0.0379114790
X30 -0.55845691         NA  0.00000000  0.0761433380
X31  0.60537067         NA  0.00000000 -0.1065163183
X32 -0.50633354         NA  0.00000000 -0.0752107231
X33 -1.42056550         NA -0.09659316 -0.0934958707
X34  0.12799297         NA  0.09214532  0.0652000480
X35  1.94585122         NA  0.08721843  0.1292659891
X36  0.80091434         NA  0.00000000  0.0037266557
X37  1.16525339         NA  0.00000000  0.0806354198
X38  0.35885572         NA  0.00000000  0.0808684635
X39 -0.60855718         NA  0.00000000 -0.0217095809
X40 -0.20224086         NA  0.00000000 -0.0575881866
X41 -0.27324811         NA  0.00000000  0.0534700811
X42 -0.46869978         NA  0.00000000  0.0500090862
X43  0.70416728         NA  0.00000000  0.0044364844
X44 -1.19736350         NA  0.00000000  0.0315123485
X45  0.86636613         NA  0.00000000  0.0315346927
X46  0.86415249         NA  0.33387459  0.1071827181
X47 -1.19862236         NA -0.09378029 -0.1026761085
X48  0.63949200         NA  0.00000000 -0.0165063471
X49  2.43022665         NA  0.00000000 -0.0183431933
X50 -0.55721548         NA  0.00000000 -0.0143651440

I prefer the Lasso method because it gives a better idea of which ones could be brought right to zero and it is clearer to interpret. The coefficients are similar except Lasso are smaller and include 0 coefficients due to the penalty term.

Transformed Data

X_transformed <- X  # Copy original data
X_transformed[, 4] <- X_transformed[, 4] * 1000

Lasso (Non-Scaled)

# Fit Lasso model on transformed data without scaling
la_eq_transformed <- glmnet(
  x = X_transformed,
  y = y,
  lambda = best_lambda, 
  family = "gaussian",
  intercept = FALSE,
  alpha = 1
)

summary(la_eq_transformed)
          Length Class     Mode   
a0         1     -none-    numeric
beta      50     dgCMatrix S4     
df         1     -none-    numeric
dim        2     -none-    numeric
lambda     1     -none-    numeric
dev.ratio  1     -none-    numeric
nulldev    1     -none-    numeric
npasses    1     -none-    numeric
jerr       1     -none-    numeric
offset     1     -none-    logical
call       7     -none-    call   
nobs       1     -none-    numeric

Ridge (Non-Scaled)

ri_eq_transformed <- glmnet(
  x = X_transformed,
  y = y,
  lambda = best_lambda, 
  family = "gaussian",
  intercept = FALSE,
  alpha = 0
)

summary(ri_eq_transformed)
          Length Class     Mode   
a0         1     -none-    numeric
beta      50     dgCMatrix S4     
df         1     -none-    numeric
dim        2     -none-    numeric
lambda     1     -none-    numeric
dev.ratio  1     -none-    numeric
nulldev    1     -none-    numeric
npasses    1     -none-    numeric
jerr       1     -none-    numeric
offset     1     -none-    logical
call       7     -none-    call   
nobs       1     -none-    numeric
df_comp_optimal3 <- data.frame(
    beta    = beta,
    Linear  = li.eq$coefficients,
    Lasso_transformed   = la_eq_transformed$beta[,1],
    Ridge_transformed   = ri_eq_transformed$beta[,1]
    )    
df_comp_optimal3
           beta     Linear Lasso_transformed Ridge_transformed
X1  -0.60189285 -1.6889821      0.0000000000       0.325503052
X2  -0.99369859  0.2980547      0.0000000000       0.032461867
X3   1.02678506  4.2035277      0.0000000000      -0.041941493
X4   0.75106130 18.1411861      0.0037384051       0.002069071
X5  -1.50916654 -0.1073880      0.8681548503       0.440081864
X6  -0.09514745 -3.2134442      0.0000000000      -0.128829779
X7  -0.89594782  1.0257187     -1.1042581295      -1.131154691
X8  -2.07075107 -5.1134818      0.0000000000      -0.354842604
X9   0.15012013 -7.9502453      0.0000000000      -0.143437052
X10 -0.07921171 -3.4384300      0.0000000000       0.659836376
X11 -0.09736927         NA      0.0000000000      -0.264499347
X12  0.21615254         NA      0.0000000000       0.270964515
X13  0.88246516         NA      2.4820410194       0.956571719
X14  0.20559750         NA      0.0000000000      -0.758556493
X15 -0.61643584         NA     -0.3085498238      -0.679389304
X16 -0.73479925         NA      0.0000000000       0.157405204
X17 -0.13180279         NA      0.0000000000       0.169289588
X18  0.31001699         NA      0.0000000000       0.608805382
X19 -1.03968035         NA     -0.0002138494      -0.374037163
X20 -0.18430887         NA     -0.0940985742      -0.386939346
X21  0.96726726         NA      0.0000000000       0.402639771
X22 -0.10828009         NA      0.0000000000       0.038019739
X23 -0.69842067         NA      0.0000000000       0.704792453
X24 -0.27594517         NA      0.0000000000       0.455851283
X25  1.11464855         NA      0.0000000000      -0.058778106
X26  0.55004396         NA      0.0000000000       0.108313981
X27  1.23667580         NA      0.0318403270       0.314779552
X28  0.13909786         NA      0.0000000000       0.524355495
X29  0.41027510         NA      0.0000000000      -0.726328013
X30 -0.55845691         NA      0.0000000000       0.385970888
X31  0.60537067         NA      0.0000000000      -0.346554025
X32 -0.50633354         NA      0.0000000000      -0.260392202
X33 -1.42056550         NA      0.0000000000      -0.673846406
X34  0.12799297         NA      0.0000000000       0.351682043
X35  1.94585122         NA      2.3260446785       0.694585610
X36  0.80091434         NA      0.0000000000      -0.015287506
X37  1.16525339         NA      0.6096615881       0.175307611
X38  0.35885572         NA      0.0000000000       0.069595637
X39 -0.60855718         NA      0.0000000000      -0.006341468
X40 -0.20224086         NA      0.0000000000      -0.037113372
X41 -0.27324811         NA      0.0000000000       0.070549084
X42 -0.46869978         NA      0.0000000000       0.235118885
X43  0.70416728         NA      0.0000000000      -0.170322549
X44 -1.19736350         NA      0.0000000000       0.111648515
X45  0.86636613         NA      0.0000000000      -0.008361660
X46  0.86415249         NA      0.9600126414       0.103726291
X47 -1.19862236         NA     -0.0851095675      -0.197990493
X48  0.63949200         NA      0.0000000000      -0.205191115
X49  2.43022665         NA      0.0000000000      -0.143033139
X50 -0.55721548         NA      0.0000000000       0.140352542

Using the transformed data with X4​ (that was not eliminated in the initial run) reduces its old Lasso coefficient from 0.2 to 0.003, bringing it much closer to zero. This change also alters the coefficients of other variables. The change in scale may have made X4​ less influential for the outcome variable y, which helps explain the larger values of the other coefficients. Additionally, variables with larger scales are penalized more heavily by Lasso and Ridge regression, which can further drive down their coefficients.