# Chapter 8 Tutorial: Multiple Linear Regression
# <March 28, 2026>
# <MacGarrigle>

options(repos = c(CRAN = "https://cloud.r-project.org"))

# Step 1: Install and Load Packages -----
install.packages(c("openintro", "tidyverse", "broom", "knitr"))
## 
## The downloaded binary packages are in
##  /var/folders/gb/5hrx65lj5hq_x5fyxwc2mzsc0000gn/T//Rtmp6y2VgE/downloaded_packages
library(openintro)
## Loading required package: airports
## Loading required package: cherryblossom
## Loading required package: usdata
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.2.0     ✔ readr     2.2.0
## ✔ forcats   1.0.1     ✔ stringr   1.6.0
## ✔ ggplot2   4.0.2     ✔ tibble    3.3.1
## ✔ lubridate 1.9.5     ✔ tidyr     1.3.2
## ✔ purrr     1.2.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(broom)
library(knitr)
data("loans_full_schema")

# Step 2: Prepare the Data -----
loans <- loans_full_schema %>%
  mutate(
    credit_checks = inquiries_last_12m, 
    credit_util = total_credit_utilized / total_credit_limit,
    bankruptcy = ifelse(public_record_bankrupt == 0, 0, 1),
    bankruptcy = as.factor(bankruptcy)
  )

# Step 3: Single Categorical Predictors (Table 8.5) -----
model_8.5 <- lm(interest_rate ~ verified_income, data = loans)
model_8.5 %>% 
  tidy() %>%
  select(term, estimate, std.error, statistic, p.value)
## # A tibble: 3 × 5
##   term                           estimate std.error statistic   p.value
##   <chr>                             <dbl>     <dbl>     <dbl>     <dbl>
## 1 (Intercept)                       11.1     0.0809     137.  0        
## 2 verified_incomeSource Verified     1.42    0.111       12.8 3.79e- 37
## 3 verified_incomeVerified            3.25    0.130       25.1 8.61e-135
# Step 4: The Full Model (Table 8.6) -----
model_8.6 <- lm(interest_rate ~ verified_income + debt_to_income + 
                  credit_util + bankruptcy + term + 
                  credit_checks + issue_month, 
                data = loans)
model_8.6 %>% tidy()
## # A tibble: 10 × 5
##    term                           estimate std.error statistic   p.value
##    <chr>                             <dbl>     <dbl>     <dbl>     <dbl>
##  1 (Intercept)                      1.89     0.210       9.01  2.49e- 19
##  2 verified_incomeSource Verified   0.997    0.0992     10.1   1.12e- 23
##  3 verified_incomeVerified          2.56     0.117      21.9   1.25e-103
##  4 debt_to_income                   0.0218   0.00294     7.43  1.14e- 13
##  5 credit_util                      4.90     0.162      30.2   2.21e-192
##  6 bankruptcy1                      0.391    0.132       2.96  3.12e-  3
##  7 term                             0.153    0.00394    38.9   3.93e-308
##  8 credit_checks                    0.228    0.0182     12.5   1.13e- 35
##  9 issue_monthJan-2018              0.0455   0.108       0.421 6.74e-  1
## 10 issue_monthMar-2018             -0.0416   0.107      -0.391 6.96e-  1
# Step 5: Professional Table Formatting -----
model_8.6 %>%
  tidy() %>%
  select(term, estimate, std.error, statistic, p.value) %>%
  mutate(
    # Force 2 decimal places for most columns
    estimate = sprintf("%.2f", estimate),
    std.error = sprintf("%.2f", std.error),
    statistic = sprintf("%.2f", statistic),
    # Force 4 decimal places for p-value to see granular detail
    p.value = sprintf("%.4f", p.value)
  ) %>%
  kable(
    caption = "Table 8.6: Multiple Regression Results (Formatted)",
    col.names = c("Variable", "Estimate", "Std. Error", "T-Stat", "P-Value"),
    align = "lcccc"
  )
Table 8.6: Multiple Regression Results (Formatted)
Variable Estimate Std. Error T-Stat P-Value
(Intercept) 1.89 0.21 9.01 0.0000
verified_incomeSource Verified 1.00 0.10 10.06 0.0000
verified_incomeVerified 2.56 0.12 21.87 0.0000
debt_to_income 0.02 0.00 7.43 0.0000
credit_util 4.90 0.16 30.25 0.0000
bankruptcy1 0.39 0.13 2.96 0.0031
term 0.15 0.00 38.89 0.0000
credit_checks 0.23 0.02 12.52 0.0000
issue_monthJan-2018 0.05 0.11 0.42 0.6736
issue_monthMar-2018 -0.04 0.11 -0.39 0.6960