Import libraries
library(tidyverse)
library(GGally)
library(glmnet)
data <- read_csv("Advertising.csv")
── Column specification ─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
cols(
TV = col_double(),
Radio = col_double(),
Newspaper = col_double(),
Sales = col_double()
)
head(data, 10)
Simple Linear Regression
lm_fit2 <- lm(formula = Sales ~ TV, data = data)
summary(lm_fit2)
Call:
lm(formula = Sales ~ TV, data = data)
Residuals:
Min 1Q Median 3Q Max
-8.3860 -1.9545 -0.1913 2.0671 7.2124
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 7.032594 0.457843 15.36 <2e-16 ***
TV 0.047537 0.002691 17.67 <2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 3.259 on 198 degrees of freedom
Multiple R-squared: 0.6119, Adjusted R-squared: 0.6099
F-statistic: 312.1 on 1 and 198 DF, p-value: < 2.2e-16
Multiple Linear Regression
lm_fit3 <- lm(formula = Sales ~ TV + Radio + Newspaper, data = data)
summary(lm_fit3)
Call:
lm(formula = Sales ~ TV + Radio + Newspaper, data = data)
Residuals:
Min 1Q Median 3Q Max
-8.8277 -0.8908 0.2418 1.1893 2.8292
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 2.938889 0.311908 9.422 <2e-16 ***
TV 0.045765 0.001395 32.809 <2e-16 ***
Radio 0.188530 0.008611 21.893 <2e-16 ***
Newspaper -0.001037 0.005871 -0.177 0.86
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 1.686 on 196 degrees of freedom
Multiple R-squared: 0.8972, Adjusted R-squared: 0.8956
F-statistic: 570.3 on 3 and 196 DF, p-value: < 2.2e-16
ggp <- ggpairs(data)
print(ggp, progress = FALSE)

Regression with Dummy Variables
df <- read_csv('Credit.csv')
── Column specification ─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
cols(
Income = col_double(),
Limit = col_double(),
Rating = col_double(),
Cards = col_double(),
Age = col_double(),
Education = col_double(),
Gender = col_character(),
Student = col_character(),
Married = col_character(),
Ethnicity = col_character(),
Balance = col_double()
)
head(df, 10)
df <- df %>% mutate_at(c("Gender", "Student", "Married", "Ethnicity"), as.factor)
lm_fit4 <- lm(formula = Balance ~ Student + Income, data = df)
summary(lm_fit4)
Call:
lm(formula = Balance ~ Student + Income, data = df)
Residuals:
Min 1Q Median 3Q Max
-762.37 -331.38 -45.04 323.60 818.28
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 211.1430 32.4572 6.505 2.34e-10 ***
StudentYes 382.6705 65.3108 5.859 9.78e-09 ***
Income 5.9843 0.5566 10.751 < 2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 391.8 on 397 degrees of freedom
Multiple R-squared: 0.2775, Adjusted R-squared: 0.2738
F-statistic: 76.22 on 2 and 397 DF, p-value: < 2.2e-16
lm_fit5 <- lm(formula = Balance ~ Student * Income, data = df)
summary(lm_fit5)
Call:
lm(formula = Balance ~ Student * Income, data = df)
Residuals:
Min 1Q Median 3Q Max
-773.39 -325.70 -41.13 321.65 814.04
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 200.6232 33.6984 5.953 5.79e-09 ***
StudentYes 476.6758 104.3512 4.568 6.59e-06 ***
Income 6.2182 0.5921 10.502 < 2e-16 ***
StudentYes:Income -1.9992 1.7313 -1.155 0.249
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 391.6 on 396 degrees of freedom
Multiple R-squared: 0.2799, Adjusted R-squared: 0.2744
F-statistic: 51.3 on 3 and 396 DF, p-value: < 2.2e-16
lm_fit6 <- lm(formula = Balance ~ Student + Rating, data = df)
summary(lm_fit6)
Call:
lm(formula = Balance ~ Student + Rating, data = df)
Residuals:
Min 1Q Median 3Q Max
-672.81 -115.06 2.92 124.45 469.79
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -431.31776 25.13464 -17.16 <2e-16 ***
StudentYes 399.13749 33.14390 12.04 <2e-16 ***
Rating 2.56781 0.06434 39.91 <2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 198.9 on 397 degrees of freedom
Multiple R-squared: 0.8138, Adjusted R-squared: 0.8129
F-statistic: 867.8 on 2 and 397 DF, p-value: < 2.2e-16
lm_fit7 <- lm(formula = Balance ~ Student * Rating, data = df)
summary(lm_fit7)
Call:
lm(formula = Balance ~ Student * Rating, data = df)
Residuals:
Min 1Q Median 3Q Max
-695.0 -115.4 2.6 125.7 466.9
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -423.37117 26.14584 -16.193 < 2e-16 ***
StudentYes 311.99938 85.86752 3.633 0.000316 ***
Rating 2.54543 0.06747 37.728 < 2e-16 ***
StudentYes:Rating 0.24609 0.22372 1.100 0.272002
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 198.8 on 396 degrees of freedom
Multiple R-squared: 0.8144, Adjusted R-squared: 0.813
F-statistic: 579.3 on 3 and 396 DF, p-value: < 2.2e-16
Simulation Study
e <- rnorm(500, mean = 0, sd = 1)
x1 <- runif(500)
x2 <- runif(500)
x3 <- runif(500)
x4 <- runif(500)
x5 <- runif(500)
x6 <- runif(500)
x7 <- runif(500)
x8 <- runif(500)
x9 <- runif(500)
x10 <- runif(500)
x <- cbind(x1,x2,x3,x4,x5,x6,x7,x8,x9,x10)
b <- c(5,3,8,4,10,0,0,0,2,3)
y <- x %*% b + e
Scatter Plots
par(mfrow=c(2,2))
plot(x1, y)
plot(x2, y)
plot(x3, y)
plot(x4, y)

par(mfrow=c(2,2))
plot(x5, y)
plot(x6, y)
plot(x7, y)
plot(x8, y)

par(mfrow=c(1,2))
plot(x9, y)
plot(x10, y)

OLS Estimate
lm_fit8 <- lm(formula = y ~ x1 + x2 + x3 + x4 + x5 + x6 + x7 + x8 + x9 + x10)
summary(lm_fit8)
Call:
lm(formula = y ~ x1 + x2 + x3 + x4 + x5 + x6 + x7 + x8 + x9 +
x10)
Residuals:
Min 1Q Median 3Q Max
-2.5319 -0.6571 -0.0082 0.6178 3.7872
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.20086 0.24513 0.819 0.413
x1 4.99041 0.15356 32.498 <2e-16 ***
x2 3.02512 0.15413 19.628 <2e-16 ***
x3 8.17961 0.15159 53.959 <2e-16 ***
x4 4.03556 0.14747 27.366 <2e-16 ***
x5 9.90680 0.14754 67.147 <2e-16 ***
x6 0.07624 0.15618 0.488 0.626
x7 -0.03803 0.15960 -0.238 0.812
x8 -0.13421 0.14897 -0.901 0.368
x9 1.73014 0.15551 11.126 <2e-16 ***
x10 2.84365 0.15718 18.091 <2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.9786 on 489 degrees of freedom
Multiple R-squared: 0.955, Adjusted R-squared: 0.954
F-statistic: 1037 on 10 and 489 DF, p-value: < 2.2e-16
Ridge Regression and LASSO
lm_ridge <- cv.glmnet(x,y,alpha=0)
lm_lasso <- cv.glmnet(x,y,alpha=1)
par(mfrow=c(1,2))
plot(lm_ridge)
plot(lm_lasso)

lm_ridge$lambda.1se
[1] 0.4174587
lm_lasso$lambda.1se
[1] 0.0920551
coef(lm_ridge, s = "lambda.1se")
11 x 1 sparse Matrix of class "dgCMatrix"
1
(Intercept) 1.68794896
x1 4.50974689
x2 2.83841484
x3 7.51406893
x4 3.68141552
x5 9.05144955
x6 0.08484578
x7 -0.10946227
x8 -0.14994479
x9 1.68070553
x10 2.57427006
coef(lm_lasso, s = "lambda.1se")
11 x 1 sparse Matrix of class "dgCMatrix"
1
(Intercept) 1.206242
x1 4.669388
x2 2.746175
x3 7.925204
x4 3.736231
x5 9.565087
x6 .
x7 .
x8 .
x9 1.471957
x10 2.495505
lm_fit9 <- lm(formula = y ~ x1 + x2 + x3 + x4 + x5 + x9 + x10)
summary(lm_fit9)
Call:
lm(formula = y ~ x1 + x2 + x3 + x4 + x5 + x9 + x10)
Residuals:
Min 1Q Median 3Q Max
-2.5174 -0.6563 -0.0257 0.6344 3.8401
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.1506 0.1989 0.757 0.449
x1 4.9842 0.1518 32.835 <2e-16 ***
x2 3.0267 0.1537 19.698 <2e-16 ***
x3 8.1922 0.1502 54.549 <2e-16 ***
x4 4.0325 0.1469 27.456 <2e-16 ***
x5 9.9102 0.1471 67.365 <2e-16 ***
x9 1.7316 0.1551 11.163 <2e-16 ***
x10 2.8437 0.1560 18.225 <2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.9768 on 492 degrees of freedom
Multiple R-squared: 0.9548, Adjusted R-squared: 0.9542
F-statistic: 1486 on 7 and 492 DF, p-value: < 2.2e-16
par(mfrow=c(2,2))
plot(lm_fit9,1)
plot(lm_fit9,2)
plot(lm_fit9,3)
plot(lm_fit9,4)

shapiro.test(lm_fit9$residuals)
Shapiro-Wilk normality test
data: lm_fit9$residuals
W = 0.9967, p-value = 0.401
hist(lm_fit9$residuals, main = "", xlab = "residual")

LS0tCnRpdGxlOiAiUmVncmVzc2lvbiBBbmFseXNpcyIKb3V0cHV0OgogIGh0bWxfbm90ZWJvb2s6CiAgICB0b2M6IHRydWUKICAgIHRvY19mbG9hdDoKICAgICAgdG9jX2NvbGxhcHNlZDogdHJ1ZQotLS0KCiMjIEltcG9ydCBsaWJyYXJpZXMKYGBge3IgZWNobz1UUlVFLCBtZXNzYWdlPUZBTFNFLCB3YXJuaW5nPUZBTFNFfQpsaWJyYXJ5KHRpZHl2ZXJzZSkKbGlicmFyeShHR2FsbHkpCmxpYnJhcnkoZ2xtbmV0KQpgYGAKCgpgYGB7cn0KZGF0YSA8LSByZWFkX2NzdigiQWR2ZXJ0aXNpbmcuY3N2IikKYGBgCgpgYGB7cn0KaGVhZChkYXRhLCAxMCkKYGBgCgojIyBTaW1wbGUgTGluZWFyIFJlZ3Jlc3Npb24KYGBge3J9CmxtX2ZpdDIgPC0gbG0oZm9ybXVsYSA9IFNhbGVzIH4gVFYsIGRhdGEgPSBkYXRhKQpzdW1tYXJ5KGxtX2ZpdDIpCmBgYAoKIyMgTXVsdGlwbGUgTGluZWFyIFJlZ3Jlc3Npb24KYGBge3J9CmxtX2ZpdDMgPC0gbG0oZm9ybXVsYSA9IFNhbGVzIH4gVFYgKyBSYWRpbyArIE5ld3NwYXBlciwgZGF0YSA9IGRhdGEpCnN1bW1hcnkobG1fZml0MykKYGBgCgpgYGB7ciB3YXJuaW5nPUZBTFNFfQpnZ3AgPC0gZ2dwYWlycyhkYXRhKQpwcmludChnZ3AsIHByb2dyZXNzID0gRkFMU0UpCmBgYAoKIyMgUmVncmVzc2lvbiB3aXRoIER1bW15IFZhcmlhYmxlcwpgYGB7cn0KZGYgPC0gcmVhZF9jc3YoJ0NyZWRpdC5jc3YnKQpgYGAKCgpgYGB7cn0KaGVhZChkZiwgMTApCmBgYAoKYGBge3J9CmRmIDwtIGRmICU+JSBtdXRhdGVfYXQoYygiR2VuZGVyIiwgIlN0dWRlbnQiLCAiTWFycmllZCIsICJFdGhuaWNpdHkiKSwgYXMuZmFjdG9yKQpgYGAKCgoKYGBge3J9CmxtX2ZpdDQgPC0gbG0oZm9ybXVsYSA9IEJhbGFuY2UgfiBTdHVkZW50ICsgSW5jb21lLCBkYXRhID0gZGYpCnN1bW1hcnkobG1fZml0NCkKYGBgCgpgYGB7cn0KbG1fZml0NSA8LSBsbShmb3JtdWxhID0gQmFsYW5jZSB+IFN0dWRlbnQgKiBJbmNvbWUsIGRhdGEgPSBkZikKc3VtbWFyeShsbV9maXQ1KQpgYGAKCgpgYGB7cn0KbG1fZml0NiA8LSBsbShmb3JtdWxhID0gQmFsYW5jZSB+IFN0dWRlbnQgKyBSYXRpbmcsIGRhdGEgPSBkZikKc3VtbWFyeShsbV9maXQ2KQpgYGAKCgpgYGB7cn0KbG1fZml0NyA8LSBsbShmb3JtdWxhID0gQmFsYW5jZSB+IFN0dWRlbnQgKiBSYXRpbmcsIGRhdGEgPSBkZikKc3VtbWFyeShsbV9maXQ3KQpgYGAKCiMjIFNpbXVsYXRpb24gU3R1ZHkKYGBge3J9CnNldC5zZWVkKDEyMykKZSAgIDwtIHJub3JtKDUwMCwgbWVhbiA9IDAsIHNkID0gMSkKeDEgIDwtIHJ1bmlmKDUwMCkKeDIgIDwtIHJ1bmlmKDUwMCkKeDMgIDwtIHJ1bmlmKDUwMCkKeDQgIDwtIHJ1bmlmKDUwMCkKeDUgIDwtIHJ1bmlmKDUwMCkKeDYgIDwtIHJ1bmlmKDUwMCkKeDcgIDwtIHJ1bmlmKDUwMCkKeDggIDwtIHJ1bmlmKDUwMCkKeDkgIDwtIHJ1bmlmKDUwMCkKeDEwIDwtIHJ1bmlmKDUwMCkKeCAgIDwtIGNiaW5kKHgxLHgyLHgzLHg0LHg1LHg2LHg3LHg4LHg5LHgxMCkKYiAgIDwtIGMoNSwzLDgsNCwxMCwwLDAsMCwyLDMpCnkgICA8LSB4ICUqJSBiICsgZQpgYGAKCiMjIyBTY2F0dGVyIFBsb3RzCmBgYHtyfQpwYXIobWZyb3c9YygyLDIpKQpwbG90KHgxLCB5KQpwbG90KHgyLCB5KQpwbG90KHgzLCB5KQpwbG90KHg0LCB5KQpgYGAKCmBgYHtyfQpwYXIobWZyb3c9YygyLDIpKQpwbG90KHg1LCB5KQpwbG90KHg2LCB5KQpwbG90KHg3LCB5KQpwbG90KHg4LCB5KQpgYGAKCgpgYGB7cn0KcGFyKG1mcm93PWMoMSwyKSkKcGxvdCh4OSwgeSkKcGxvdCh4MTAsIHkpCmBgYAoKCiMjIyBPTFMgRXN0aW1hdGUKYGBge3J9CmxtX2ZpdDggPC0gbG0oZm9ybXVsYSA9IHkgfiB4MSArIHgyICsgeDMgKyB4NCArIHg1ICsgeDYgKyB4NyArIHg4ICsgeDkgKyB4MTApCnN1bW1hcnkobG1fZml0OCkKYGBgCgojIyMgUmlkZ2UgUmVncmVzc2lvbiBhbmQgTEFTU08KYGBge3J9CmxtX3JpZGdlIDwtIGN2LmdsbW5ldCh4LHksYWxwaGE9MCkKbG1fbGFzc28gPC0gY3YuZ2xtbmV0KHgseSxhbHBoYT0xKQpwYXIobWZyb3c9YygxLDIpKQpwbG90KGxtX3JpZGdlKQpwbG90KGxtX2xhc3NvKQpgYGAKCmBgYHtyfQpsbV9yaWRnZSRsYW1iZGEuMXNlCmxtX2xhc3NvJGxhbWJkYS4xc2UKYGBgCgoKYGBge3J9CmNvZWYobG1fcmlkZ2UsIHMgPSAibGFtYmRhLjFzZSIpCmBgYAoKCmBgYHtyfQpjb2VmKGxtX2xhc3NvLCBzID0gImxhbWJkYS4xc2UiKQpgYGAKCgpgYGB7cn0KbG1fZml0OSA8LSBsbShmb3JtdWxhID0geSB+IHgxICsgeDIgKyB4MyArIHg0ICsgeDUgKyB4OSArIHgxMCkKc3VtbWFyeShsbV9maXQ5KQpgYGAKCmBgYHtyfQpwYXIobWZyb3c9YygyLDIpKQpwbG90KGxtX2ZpdDksMSkKcGxvdChsbV9maXQ5LDIpCnBsb3QobG1fZml0OSwzKQpwbG90KGxtX2ZpdDksNCkKYGBgCgpgYGB7cn0Kc2hhcGlyby50ZXN0KGxtX2ZpdDkkcmVzaWR1YWxzKQpgYGAKCmBgYHtyfQpoaXN0KGxtX2ZpdDkkcmVzaWR1YWxzLCBtYWluID0gIiIsIHhsYWIgPSAicmVzaWR1YWwiKQpgYGAKCgoK