# Chapter 8 Tutorial: Multiple Linear Regression
# <March 28, 2026>
# <MacGarrigle>
options(repos = c(CRAN = "https://cloud.r-project.org"))
# Step 1: Install and Load Packages -----
install.packages(c("openintro", "tidyverse", "broom", "knitr"))
##
## The downloaded binary packages are in
## /var/folders/gb/5hrx65lj5hq_x5fyxwc2mzsc0000gn/T//Rtmp6y2VgE/downloaded_packages
library(openintro)
## Loading required package: airports
## Loading required package: cherryblossom
## Loading required package: usdata
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.2.0 ✔ readr 2.2.0
## ✔ forcats 1.0.1 ✔ stringr 1.6.0
## ✔ ggplot2 4.0.2 ✔ tibble 3.3.1
## ✔ lubridate 1.9.5 ✔ tidyr 1.3.2
## ✔ purrr 1.2.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(broom)
library(knitr)
data("loans_full_schema")
# Step 2: Prepare the Data -----
loans <- loans_full_schema %>%
mutate(
credit_checks = inquiries_last_12m,
credit_util = total_credit_utilized / total_credit_limit,
bankruptcy = ifelse(public_record_bankrupt == 0, 0, 1),
bankruptcy = as.factor(bankruptcy)
)
# Step 3: Single Categorical Predictors (Table 8.5) -----
model_8.5 <- lm(interest_rate ~ verified_income, data = loans)
model_8.5 %>%
tidy() %>%
select(term, estimate, std.error, statistic, p.value)
## # A tibble: 3 × 5
## term estimate std.error statistic p.value
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) 11.1 0.0809 137. 0
## 2 verified_incomeSource Verified 1.42 0.111 12.8 3.79e- 37
## 3 verified_incomeVerified 3.25 0.130 25.1 8.61e-135
# Step 4: The Full Model (Table 8.6) -----
model_8.6 <- lm(interest_rate ~ verified_income + debt_to_income +
credit_util + bankruptcy + term +
credit_checks + issue_month,
data = loans)
model_8.6 %>% tidy()
## # A tibble: 10 × 5
## term estimate std.error statistic p.value
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) 1.89 0.210 9.01 2.49e- 19
## 2 verified_incomeSource Verified 0.997 0.0992 10.1 1.12e- 23
## 3 verified_incomeVerified 2.56 0.117 21.9 1.25e-103
## 4 debt_to_income 0.0218 0.00294 7.43 1.14e- 13
## 5 credit_util 4.90 0.162 30.2 2.21e-192
## 6 bankruptcy1 0.391 0.132 2.96 3.12e- 3
## 7 term 0.153 0.00394 38.9 3.93e-308
## 8 credit_checks 0.228 0.0182 12.5 1.13e- 35
## 9 issue_monthJan-2018 0.0455 0.108 0.421 6.74e- 1
## 10 issue_monthMar-2018 -0.0416 0.107 -0.391 6.96e- 1
# Step 5: Professional Table Formatting -----
model_8.6 %>%
tidy() %>%
select(term, estimate, std.error, statistic, p.value) %>%
mutate(
# Force 2 decimal places for most columns
estimate = sprintf("%.2f", estimate),
std.error = sprintf("%.2f", std.error),
statistic = sprintf("%.2f", statistic),
# Force 4 decimal places for p-value to see granular detail
p.value = sprintf("%.4f", p.value)
) %>%
kable(
caption = "Table 8.6: Multiple Regression Results (Formatted)",
col.names = c("Variable", "Estimate", "Std. Error", "T-Stat", "P-Value"),
align = "lcccc"
)
Table 8.6: Multiple Regression Results (Formatted)
| (Intercept) |
1.89 |
0.21 |
9.01 |
0.0000 |
| verified_incomeSource Verified |
1.00 |
0.10 |
10.06 |
0.0000 |
| verified_incomeVerified |
2.56 |
0.12 |
21.87 |
0.0000 |
| debt_to_income |
0.02 |
0.00 |
7.43 |
0.0000 |
| credit_util |
4.90 |
0.16 |
30.25 |
0.0000 |
| bankruptcy1 |
0.39 |
0.13 |
2.96 |
0.0031 |
| term |
0.15 |
0.00 |
38.89 |
0.0000 |
| credit_checks |
0.23 |
0.02 |
12.52 |
0.0000 |
| issue_monthJan-2018 |
0.05 |
0.11 |
0.42 |
0.6736 |
| issue_monthMar-2018 |
-0.04 |
0.11 |
-0.39 |
0.6960 |