Unpacking required packages for the Analysis

set.seed(1)  #
library(tidyverse)
## Warning: package 'ggplot2' was built under R version 4.3.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.3     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.5.0     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.0
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(caret)  # contains createDataPartition
## Warning: package 'caret' was built under R version 4.3.3
## Loading required package: lattice
## 
## Attaching package: 'caret'
## 
## The following object is masked from 'package:purrr':
## 
##     lift
library(glmnet)   # performed lasso regression
## Loading required package: Matrix
## 
## Attaching package: 'Matrix'
## 
## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack
## 
## Loaded glmnet 4.1-8
library(pROC) 
## Type 'citation("pROC")' for a citation.
## 
## Attaching package: 'pROC'
## 
## The following objects are masked from 'package:stats':
## 
##     cov, smooth, var
library(caTools)
library(caret)
library(mltools)  # One-hot encoding
## Warning: package 'mltools' was built under R version 4.3.2
## 
## Attaching package: 'mltools'
## 
## The following object is masked from 'package:tidyr':
## 
##     replace_na
library(data.table)
## 
## Attaching package: 'data.table'
## 
## The following objects are masked from 'package:lubridate':
## 
##     hour, isoweek, mday, minute, month, quarter, second, wday, week,
##     yday, year
## 
## The following objects are masked from 'package:dplyr':
## 
##     between, first, last
## 
## The following object is masked from 'package:purrr':
## 
##     transpose

Importing the dataset

travel2 <- read.table("C:/Users/DELL/Desktop/Lasso logistic regression/travel2.csv")
head(travel2, 5)
##    X Approved census_tract Female CoApplicant Race.Ethnicity Black White
## 1 63        1  42071010900      0           0       Hispanic     0     0
## 2 64        1  42029304201      0           0       Hispanic     0     0
## 3 65        1  42091207300      0           0       Hispanic     0     0
## 4 66        1  42059970400      0           1          White     0     1
## 5 67        1  42091203602      0           0       Hispanic     0     0
##   Hispanic Asian Indigenous tract_minority_population_percent
## 1        1     0          0                             18.34
## 2        1     0          0                             25.33
## 3        1     0          0                             32.20
## 4        0     0          0                              1.71
## 5        1     0          0                             58.31
##   tract_to_msa_income_percentage income loan_amount property_value
## 1                            104     74      155000         185000
## 2                             77     63      195000         245000
## 3                             80     51      185000         225000
## 4                            105     28       85000         125000
## 5                             74     44      135000         165000
##   loan_to_income_ratio loan_to_value_ratio log.income log.loan_amount
## 1             2.094595                  80   11.21182        11.95118
## 2             3.095238                  80   11.05089        12.18075
## 3             3.627451                  80   10.83958        12.12811
## 4             3.035714                  70   10.23996        11.35041
## 5             3.068182                  80   10.69194        11.81303
##   log.loan_to_income_ratio log.loan_to_value_ratio
## 1                 0.739360                4.382027
## 2                 1.129865                4.382027
## 3                 1.288530                4.382027
## 4                 1.110447                4.248495
## 5                 1.121085                4.382027

Preliminary Analysis

Coverting binary variables to factor

travel2$Approved <- as.factor(travel2$Approved)
travel2$Female <- as.factor(travel2$Female)
travel2$CoApplicant <- as.factor(travel2$CoApplicant)
travel2$Race.Ethnicity <- as.factor(travel2$Race.Ethnicity)
travel2$Black <- as.factor(travel2$Black)
travel2$White <- as.factor(travel2$White)
travel2$Hispanic <- as.factor(travel2$Hispanic)
travel2$Asian <- as.factor(travel2$Asian)
travel2$Indigenous <- as.factor(travel2$Indigenous)
str(travel2)

Exploratory Analysis: Graphs

# Calculate percentages
percentages <- travel2 %>%
  group_by(Race.Ethnicity) %>%
  summarise(percent = (n() / nrow(travel2)) * 100)

# Create a bar chart with data labels
ggplot(percentages, aes(x = Race.Ethnicity, y = percent)) +
  geom_bar(stat = "identity", fill = "red") +
  geom_text(aes(label = paste0(round(percent, 2), "%")), vjust = -0.5) +
  labs(title = "Percentage of Each Race/Ethnicity", x = "Race/Ethnicity", y = "Percentage") +
  theme_minimal()

# Approved 
percentages_A <- travel2 %>%
  group_by(Approved) %>%
  summarise(percent = (n() / nrow(travel2)) * 100)

# Create a bar chart with data labels
ggplot(percentages_A, aes(x = Approved, y = percent)) +
  geom_bar(stat = "identity", fill = "red") +
  geom_text(aes(label = paste0(round(percent, 2), "%")), vjust = -0.5) +
  labs(title = "Percentage of morgage approve or not", x = "Race/Ethnicity", y = "Percentage") +
  theme_minimal()

#Gender 
percentages_B <- travel2 %>%
  group_by(Female) %>%
  summarise(percent = (n() / nrow(travel2)) * 100)

# Create a bar chart with data labels
ggplot(percentages_B, aes(x = Female, y = percent)) +
  geom_bar(stat = "identity", fill = "red") +
  geom_text(aes(label = paste0(round(percent, 2), "%")), vjust = -0.5) +
  labs(title = "Percentage of Female vs Male", x = "Race/Ethnicity", y = "Percentage") +
  theme_minimal()

Filtering for bid that were approved

Approved <- travel2 %>% filter(Approved ==1)
head(Approved,5)
##    X Approved census_tract Female CoApplicant Race.Ethnicity Black White
## 1 63        1  42071010900      0           0       Hispanic     0     0
## 2 64        1  42029304201      0           0       Hispanic     0     0
## 3 65        1  42091207300      0           0       Hispanic     0     0
## 4 66        1  42059970400      0           1          White     0     1
## 5 67        1  42091203602      0           0       Hispanic     0     0
##   Hispanic Asian Indigenous tract_minority_population_percent
## 1        1     0          0                             18.34
## 2        1     0          0                             25.33
## 3        1     0          0                             32.20
## 4        0     0          0                              1.71
## 5        1     0          0                             58.31
##   tract_to_msa_income_percentage income loan_amount property_value
## 1                            104     74      155000         185000
## 2                             77     63      195000         245000
## 3                             80     51      185000         225000
## 4                            105     28       85000         125000
## 5                             74     44      135000         165000
##   loan_to_income_ratio loan_to_value_ratio log.income log.loan_amount
## 1             2.094595                  80   11.21182        11.95118
## 2             3.095238                  80   11.05089        12.18075
## 3             3.627451                  80   10.83958        12.12811
## 4             3.035714                  70   10.23996        11.35041
## 5             3.068182                  80   10.69194        11.81303
##   log.loan_to_income_ratio log.loan_to_value_ratio
## 1                 0.739360                4.382027
## 2                 1.129865                4.382027
## 3                 1.288530                4.382027
## 4                 1.110447                4.248495
## 5                 1.121085                4.382027

Calculate percentages for the approved

percent <- Approved %>% select(Race.Ethnicity)%>%
  group_by(Race.Ethnicity) %>%
  summarise(percent = (n() / nrow(Approved)) * 100)

# Create a bar chart with data labels
ggplot(percent, aes(x = Race.Ethnicity, y = percent)) +
  geom_bar(stat = "identity", fill = "red") +
  geom_text(aes(label = paste0(round(percent, 2), "%")), vjust = -0.5) +
  labs(title = "Percentage of Each Race/Ethnicity", x = "Race/Ethnicity", y = "Percentage") +
  theme_minimal()

### Filtering for bid that were approved Not Approved

Not_Approved <- travel2 %>% filter(Approved ==0)
head(Not_Approved,5)
##      X Approved census_tract Female CoApplicant Race.Ethnicity Black White
## 13  77        0  42003496200      1           0          White     0     1
## 20  91        0  42071010101      0           0          White     0     1
## 25 102        0  42131400500      0           0          White     0     1
## 26 104        0  42117950300      0           0          White     0     1
## 54 204        0  42063961500      1           0       Hispanic     0     0
##    Hispanic Asian Indigenous tract_minority_population_percent
## 13        0     0          0                              2.31
## 20        0     0          0                              4.74
## 25        0     0          0                              3.90
## 26        0     0          0                              2.80
## 54        1     0          0                              2.78
##    tract_to_msa_income_percentage income loan_amount property_value
## 13                            101     15       85000         115000
## 20                            116     26       95000         155000
## 25                            106     36      155000         155000
## 26                             89     72      155000         155000
## 54                             89     22       45000          55000
##    loan_to_income_ratio loan_to_value_ratio log.income log.loan_amount
## 13             5.666667               80.00   9.615805        11.35041
## 20             3.653846              101.75  10.165852        11.46163
## 25             4.305556              100.00  10.491274        11.95118
## 26             2.152778              101.75  11.184421        11.95118
## 54             2.045455               90.00   9.998798        10.71442
##    log.loan_to_income_ratio log.loan_to_value_ratio
## 13                 1.734601                4.382027
## 20                 1.295780                4.622519
## 25                 1.459906                4.605170
## 26                 0.766759                4.622519
## 54                 0.715620                4.499810

Calculate percentages for the approved

percent_Not <- Not_Approved %>% select(Race.Ethnicity)%>%
  group_by(Race.Ethnicity) %>%
  summarise(percent = (n() / nrow(Approved)) * 100)

# Create a bar chart with data labels
ggplot(percent_Not, aes(x = Race.Ethnicity, y = percent)) +
  geom_bar(stat = "identity", fill = "red") +
  geom_text(aes(label = paste0(round(percent, 2), "%")), vjust = -0.5) +
  labs(title = "Percentage of Each Race/Ethnicity", x = "Race/Ethnicity", y = "Percentage") +
  theme_minimal()

ggplot(percentages_A, aes(x = Approved, y = percent)) +
  geom_bar(stat = "identity", fill = "red") +
  geom_text(aes(label = paste0(round(percent, 2), "%")), vjust = -0.5) +
  labs(title = "Percentage of morgage approve or not", x = "Race/Ethnicity", y = "Percentage") +
  theme_minimal()

Filtering for countinous features

Num <- travel2%>% select(tract_minority_population_percent, 
                         tract_to_msa_income_percentage, 
                         income, loan_amount, property_value, 
                         loan_to_income_ratio, loan_to_value_ratio, 
                         log.income, log.loan_amount, log.loan_to_income_ratio, 
                         log.loan_to_value_ratio )

Relationships between the continous features

cor(Num)
##                                   tract_minority_population_percent
## tract_minority_population_percent                       1.000000000
## tract_to_msa_income_percentage                         -0.269879731
## income                                                 -0.004440587
## loan_amount                                            -0.039315496
## property_value                                         -0.046526651
## loan_to_income_ratio                                    0.080992509
## loan_to_value_ratio                                     0.013315789
## log.income                                             -0.127809429
## log.loan_amount                                        -0.045246032
## log.loan_to_income_ratio                                0.111179359
## log.loan_to_value_ratio                                 0.086989610
##                                   tract_to_msa_income_percentage        income
## tract_minority_population_percent                   -0.269879731 -0.0044405870
## tract_to_msa_income_percentage                       1.000000000  0.0230837034
## income                                               0.023083703  1.0000000000
## loan_amount                                          0.418466095  0.0492488570
## property_value                                       0.352599647  0.0409865682
## loan_to_income_ratio                                 0.003712528 -0.0221270842
## loan_to_value_ratio                                 -0.012458065 -0.0008199386
## log.income                                           0.374396010  0.1108370136
## log.loan_amount                                      0.407190447  0.0314390208
## log.loan_to_income_ratio                             0.020580398 -0.1062472897
## log.loan_to_value_ratio                             -0.110105520 -0.0089751520
##                                    loan_amount property_value
## tract_minority_population_percent -0.039315496    -0.04652665
## tract_to_msa_income_percentage     0.418466095     0.35259965
## income                             0.049248857     0.04098657
## loan_amount                        1.000000000     0.75921867
## property_value                     0.759218667     1.00000000
## loan_to_income_ratio               0.104237989     0.05304767
## loan_to_value_ratio               -0.001251133    -0.01373620
## log.income                         0.681657871     0.54028261
## log.loan_amount                    0.869942616     0.64047528
## log.loan_to_income_ratio           0.199571497     0.09635148
## log.loan_to_value_ratio            0.072136413    -0.15661686
##                                   loan_to_income_ratio loan_to_value_ratio
## tract_minority_population_percent          0.080992509        0.0133157893
## tract_to_msa_income_percentage             0.003712528       -0.0124580649
## income                                    -0.022127084       -0.0008199386
## loan_amount                                0.104237989       -0.0012511327
## property_value                             0.053047669       -0.0137362033
## loan_to_income_ratio                       1.000000000        0.0039016779
## loan_to_value_ratio                        0.003901678        1.0000000000
## log.income                                -0.342244606       -0.0068002486
## log.loan_amount                            0.167027432        0.0007860133
## log.loan_to_income_ratio                   0.661040026        0.0099414419
## log.loan_to_value_ratio                    0.102693824        0.1572979515
##                                     log.income log.loan_amount
## tract_minority_population_percent -0.127809429   -0.0452460324
## tract_to_msa_income_percentage     0.374396010    0.4071904471
## income                             0.110837014    0.0314390208
## loan_amount                        0.681657871    0.8699426158
## property_value                     0.540282610    0.6404752845
## loan_to_income_ratio              -0.342244606    0.1670274324
## loan_to_value_ratio               -0.006800249    0.0007860133
## log.income                         1.000000000    0.6996099634
## log.loan_amount                    0.699609963    1.0000000000
## log.loan_to_income_ratio          -0.434175110    0.3399111976
## log.loan_to_value_ratio           -0.039213133    0.1401361291
##                                   log.loan_to_income_ratio
## tract_minority_population_percent              0.111179359
## tract_to_msa_income_percentage                 0.020580398
## income                                        -0.106247290
## loan_amount                                    0.199571497
## property_value                                 0.096351476
## loan_to_income_ratio                           0.661040026
## loan_to_value_ratio                            0.009941442
## log.income                                    -0.434175110
## log.loan_amount                                0.339911198
## log.loan_to_income_ratio                       1.000000000
## log.loan_to_value_ratio                        0.228287207
##                                   log.loan_to_value_ratio
## tract_minority_population_percent             0.086989610
## tract_to_msa_income_percentage               -0.110105520
## income                                       -0.008975152
## loan_amount                                   0.072136413
## property_value                               -0.156616860
## loan_to_income_ratio                          0.102693824
## loan_to_value_ratio                           0.157297952
## log.income                                   -0.039213133
## log.loan_amount                               0.140136129
## log.loan_to_income_ratio                      0.228287207
## log.loan_to_value_ratio                       1.000000000
summary(Num)
##  tract_minority_population_percent tract_to_msa_income_percentage
##  Min.   :  0.00                    Min.   :  0.0                 
##  1st Qu.:  5.36                    1st Qu.: 90.0                 
##  Median : 10.29                    Median :109.0                 
##  Mean   : 16.98                    Mean   :115.1                 
##  3rd Qu.: 20.06                    3rd Qu.:132.0                 
##  Max.   :100.00                    Max.   :443.0                 
##      income          loan_amount      property_value     loan_to_income_ratio
##  Min.   :     1.0   Min.   :   5000   Min.   :    5000   Min.   :  0.00007   
##  1st Qu.:    55.0   1st Qu.: 135000   1st Qu.:  165000   1st Qu.:  1.73077   
##  Median :    85.0   Median : 205000   Median :  245000   Median :  2.37705   
##  Mean   :   115.1   Mean   : 237796   Mean   :  294765   Mean   :  2.56347   
##  3rd Qu.:   131.0   3rd Qu.: 305000   3rd Qu.:  365000   3rd Qu.:  3.16327   
##  Max.   :365001.0   Max.   :3845000   Max.   :38225000   Max.   :255.00000   
##  loan_to_value_ratio   log.income     log.loan_amount  log.loan_to_income_ratio
##  Min.   :    1.05    Min.   : 6.908   Min.   : 8.517   Min.   :-9.5888         
##  1st Qu.:   80.00    1st Qu.:10.915   1st Qu.:11.813   1st Qu.: 0.5486         
##  Median :   87.67    Median :11.350   Median :12.231   Median : 0.8659         
##  Mean   :   84.97    Mean   :11.368   Mean   :12.187   Mean   : 0.8188         
##  3rd Qu.:   95.00    3rd Qu.:11.783   3rd Qu.:12.628   3rd Qu.: 1.1516         
##  Max.   :77235.80    Max.   :19.715   Max.   :15.162   Max.   : 5.5413         
##  log.loan_to_value_ratio
##  Min.   : 0.04879       
##  1st Qu.: 4.38203       
##  Median : 4.47360       
##  Mean   : 4.40834       
##  3rd Qu.: 4.55388       
##  Max.   :11.25462

Feature Engineering: Scaling numerical variables and adding them to the dataset for use

travel2$tract_minority_population_percent <- scale(travel2$tract_minority_population_percent)
travel2$tract_to_msa_income_percentage <- scale(travel2$tract_to_msa_income_percentage)
travel2$income <- scale(travel2$income)
travel2$loan_amount <- scale(travel2$loan_amount)
travel2$property_value <- scale(travel2$property_value)
travel2$loan_to_income_ratio <- scale(travel2$loan_to_income_ratio)
travel2$loan_to_value_ratio <- scale(travel2$loan_to_value_ratio)
travel2$log.income <- scale(travel2$log.income)
travel2$log.loan_amount <- scale(travel2$log.loan_amount)
travel2$log.loan_amount<- scale(travel2$log.loan_amount)
travel2$log.loan_to_income_ratio <- scale(travel2$log.loan_to_income_ratio)
travel2$log.loan_to_value_ratio <- scale(travel2$log.loan_to_value_ratio)

Analysis: Step by step lasso logistic regression

Step 1. Split the data into training and testing sets. 80% of the data was used to train the model

sample <- sample.split(travel2$census_tract, SplitRatio = 0.8)
train <- subset(travel2, sample == T)
test <- subset(travel2, sample == F)

Step 2. Create a weight variable to correct for imbalance in the response variable (weights for the training set)

p.retain <- sum(train$Approved == 1)/length(train$Approved)
weights <- rep(NA, times=length(train$Approved))
weights[train$Approved == 0] <- p.retain
weights[train$Approved == 1] <- 1-p.retain

Step 3. Regression procedure for training, identify lambda and fit the model using cross-validation

MODEL 1: No interaction terms

Subset the training data to include only the selected predictors and the outcome variable

train_subset <- train%>%select("Black", "White", "Hispanic", "Asian",
                               "Indigenous", "CoApplicant", "Female", 
                               "log.income", "log.loan_amount", "property_value"
                               , "Approved")

Convert the outcome variable to a matrix

y_matrix <- as.matrix(train_subset$Approved)

Convert the predictor variables to a matrix

x_matrix <- as.matrix(train_subset%>%select(-Approved))

Perform cross-validated LASSO logistic regression

model_cv_lasso_NO <- cv.glmnet(y = y_matrix, x = x_matrix,
                               family = "binomial", 
                               weights = weights, 
                               type.measure = "auc")

MODEL 2: With interaction terms

predict_train <- train%>%select("Black", "White", "Hispanic", "Asian", "Indigenous", 
                                "CoApplicant", "Female", "log.income", "log.loan_amount", "property_value",
                                "log.loan_to_income_ratio", "log.loan_to_value_ratio", "Approved")

predict_test <-test%>%select("Black", "White", "Hispanic", "Asian", "Indigenous", 
                             "CoApplicant", "Female", "log.income", "log.loan_amount", "property_value",
                             "log.loan_to_income_ratio", "log.loan_to_value_ratio", "Approved")

Creating the interaction term

predict_test$interaction_term <- predict_test$log.loan_to_income_ratio * predict_test$log.loan_to_value_ratio
predict_train$interaction_term <- predict_train$log.loan_to_income_ratio * predict_train$log.loan_to_value_ratio

Convert the outcome variable to a matrix

Y_matrix <- as.matrix(predict_train$Approved)

Convert the predictor variables to a matrix

X_matrix <- as.matrix(predict_train%>%select(-Approved))

Perform cross-validated LASSO logistic regression with interaction term

model_cv_lasso_interaction <- cv.glmnet(y = Y_matrix, x = X_matrix,
                                        family = "binomial", na.action = NULL, 
                                        weights = weights, 
                                        type.measure = "auc")

### Print the cross-validated results
print(model_cv_lasso_interaction)
## 
## Call:  cv.glmnet(x = X_matrix, y = Y_matrix, weights = weights, type.measure = "auc",      family = "binomial", na.action = NULL) 
## 
## Measure: AUC 
## 
##       Lambda Index Measure       SE Nonzero
## min 0.000276    66  0.6856 0.005479      11
## 1se 0.005956    33  0.6803 0.005471       8

Step 4: Evaluate the model fit

One approach to evaluating model fit is Area-Under-the-Curve

We evaluate the model performance using the “Area Under the Curve (AUC) ## MODEL 1: No interaction terms

probs <- predict(model_cv_lasso_NO, as.matrix(test%>%select("Black", "White", "Hispanic", "Asian",
                                                            "Indigenous", "CoApplicant", "Female", "log.income", "log.loan_amount", "property_value") ), s=model_cv_lasso_NO$lambda.min, type = "response")

# Create ROC object
roc_obj <- roc(as.matrix(
  test$Approved) ~ as.vector(probs), smoothed = TRUE)
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
# Plot ROC curve
plot(roc_obj, main = "AUC Lasso Regression: No Interactions")

# Arguments for confidence interval
ci(roc_obj, alpha = 0.9, stratified = FALSE)
## 95% CI: 0.6615-0.6961 (DeLong)
# Additional plot options
coords(roc_obj, "best", ret = c("threshold", "accuracy"))
##           threshold  accuracy
## threshold 0.4695458 0.7324072
# Display AUC
auc(roc_obj)
## Area under the curve: 0.6788
# Show the plot
legend("bottomright", legend = "95% CI", col = "gray", lty = 2, cex = 0.8)

roc_obj_NO <- roc(as.matrix(test$Approved) ~ probs, smoothed = TRUE,
                  # arguments for ci
                  ci=TRUE, ci.alpha=0.9, stratified=FALSE,
                  # arguments for plot
                  plot=TRUE, auc.polygon=TRUE, max.auc.polygon=TRUE, grid=TRUE, 
                  print.auc=TRUE, show.thres=TRUE, main="AUC Lasso Regression: No Interactions")
## Setting levels: control = 0, case = 1
## Warning in roc.default(response, predictors[, 1], ...): Deprecated use a matrix
## as predictor. Unexpected results may be produced, please pass a numeric vector.
## Setting direction: controls < cases
# Adding confidence intervals to the plot
sens.ci_NO <- ci.se(roc_obj_NO)
plot(sens.ci_NO, type="shape", col="lightblue")
## Warning in plot.ci.se(sens.ci_NO, type = "shape", col = "lightblue"): Low
## definition shape.
plot(sens.ci_NO, type="bars")

MODEL 2: With interaction terms

probs_interaction <- predict(model_cv_lasso_interaction , as.matrix(predict_test%>%select(-Approved)), s=model_cv_lasso_interaction$lambda.min, type = "response")
roc_obj_i <- roc(as.matrix(predict_test%>%select(Approved)) ~ probs_interaction , smoothed = TRUE,
                 # arguments for ci
                 ci=TRUE, ci.alpha=0.9, stratified=FALSE,
                 # arguments for plot
                 plot=TRUE, auc.polygon=TRUE, max.auc.polygon=TRUE, grid=TRUE,
                 print.auc=TRUE, show.thres=TRUE, main="AUC Lasso Regression: with Interactions")
## Setting levels: control = 0, case = 1
## Warning in roc.default(response, predictors[, 1], ...): Deprecated use a matrix
## as predictor. Unexpected results may be produced, please pass a numeric vector.
## Setting direction: controls < cases
## Adding confidence intervals to the plot
sens.ci_i <- ci.se(roc_obj_i)
plot(sens.ci_i, type="shape", col="lightblue")
## Warning in plot.ci.se(sens.ci_i, type = "shape", col = "lightblue"): Low
## definition shape.
plot(sens.ci_i, type="bars")

Step 5. Model Interpretation

Coefficient interpretation is possible for Model 1 (because no intereaction terms) Getting exponential of coefficients (odds ratios)

odds_ratios <- exp(coef(model_cv_lasso_NO, model_cv_lasso_NO$lambda.min))

Converting odds ratios to an array for interpretability

odds_ratios <- array(odds_ratios, dimnames = list(c("Intercept", colnames(train_subset%>%select(-Approved)))))
t(t(odds_ratios))
##                      [,1]
## Intercept       0.8110930
## Black           0.5751037
## White           1.5349017
## Hispanic        1.0000000
## Asian           0.8652843
## Indigenous      0.7744054
## CoApplicant     0.9919661
## Female          1.1609520
## log.income      1.4563389
## log.loan_amount 1.5852917
## property_value  0.5989867

Coefficient interpretation is difficult for Models 2 and 3 because of the numerous interaction terms.

Inspecting coefficients is sometimes still useful (specifically, for ensuring all expected terms are present)

Getting exponential of coefficients (odds ratios)

odds_ratios_i <- exp(coef(model_cv_lasso_interaction, model_cv_lasso_interaction$lambda.min))

Converting odds ratios to an array for interpretability

odds_ratios_i <- array(odds_ratios_i, dimnames = list(c("Intercept", colnames(X_matrix))))
t(t(odds_ratios))
##                      [,1]
## Intercept       0.8110930
## Black           0.5751037
## White           1.5349017
## Hispanic        1.0000000
## Asian           0.8652843
## Indigenous      0.7744054
## CoApplicant     0.9919661
## Female          1.1609520
## log.income      1.4563389
## log.loan_amount 1.5852917
## property_value  0.5989867