Load Libraries

library(readxl)
library(dplyr)
library(ggplot2)
library(tidyr)
library(scales)
library(knitr)

4.1 Data Loading, Inspection & Cleaning

Load Dataset

# Read the Excel file (row 1 is a description row; actual headers are on row 2)
credit_card_info_default <- read_excel("default_of_credit_card_clients.xls", skip = 1)
# Rename target variable for clarity

Variable Names

cat("Variable Names:\n")

## Variable Names:

names(credit_card_info_default)

##  [1] "ID"                         "LIMIT_BAL"                 
##  [3] "SEX"                        "EDUCATION"                 
##  [5] "MARRIAGE"                   "AGE"                       
##  [7] "PAY_0"                      "PAY_2"                     
##  [9] "PAY_3"                      "PAY_4"                     
## [11] "PAY_5"                      "PAY_6"                     
## [13] "BILL_AMT1"                  "BILL_AMT2"                 
## [15] "BILL_AMT3"                  "BILL_AMT4"                 
## [17] "BILL_AMT5"                  "BILL_AMT6"                 
## [19] "PAY_AMT1"                   "PAY_AMT2"                  
## [21] "PAY_AMT3"                   "PAY_AMT4"                  
## [23] "PAY_AMT5"                   "PAY_AMT6"                  
## [25] "default payment next month"

Top 5 Rows

print(head(credit_card_info_default, 5))

## # A tibble: 5 × 25
##      ID LIMIT_BAL   SEX EDUCATION MARRIAGE   AGE PAY_0 PAY_2 PAY_3 PAY_4 PAY_5
##   <dbl>     <dbl> <dbl>     <dbl>    <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1     1     20000     2         2        1    24     2     2    -1    -1    -2
## 2     2    120000     2         2        2    26    -1     2     0     0     0
## 3     3     90000     2         2        2    34     0     0     0     0     0
## 4     4     50000     2         2        1    37     0     0     0     0     0
## 5     5     50000     1         2        1    57    -1     0    -1     0     0
## # ℹ 14 more variables: PAY_6 <dbl>, BILL_AMT1 <dbl>, BILL_AMT2 <dbl>,
## #   BILL_AMT3 <dbl>, BILL_AMT4 <dbl>, BILL_AMT5 <dbl>, BILL_AMT6 <dbl>,
## #   PAY_AMT1 <dbl>, PAY_AMT2 <dbl>, PAY_AMT3 <dbl>, PAY_AMT4 <dbl>,
## #   PAY_AMT5 <dbl>, PAY_AMT6 <dbl>, `default payment next month` <dbl>

Bottom 10 Rows

print(tail(credit_card_info_default, 10), caption = "Bottom 10 Rows of the Dataset")

## # A tibble: 10 × 25
##       ID LIMIT_BAL   SEX EDUCATION MARRIAGE   AGE PAY_0 PAY_2 PAY_3 PAY_4 PAY_5
##    <dbl>     <dbl> <dbl>     <dbl>    <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
##  1 29991    140000     1         2        1    41     0     0     0     0     0
##  2 29992    210000     1         2        1    34     3     2     2     2     2
##  3 29993     10000     1         3        1    43     0     0     0    -2    -2
##  4 29994    100000     1         1        2    38     0    -1    -1     0     0
##  5 29995     80000     1         2        2    34     2     2     2     2     2
##  6 29996    220000     1         3        1    39     0     0     0     0     0
##  7 29997    150000     1         3        2    43    -1    -1    -1    -1     0
##  8 29998     30000     1         2        2    37     4     3     2    -1     0
##  9 29999     80000     1         3        1    41     1    -1     0     0     0
## 10 30000     50000     1         2        1    46     0     0     0     0     0
## # ℹ 14 more variables: PAY_6 <dbl>, BILL_AMT1 <dbl>, BILL_AMT2 <dbl>,
## #   BILL_AMT3 <dbl>, BILL_AMT4 <dbl>, BILL_AMT5 <dbl>, BILL_AMT6 <dbl>,
## #   PAY_AMT1 <dbl>, PAY_AMT2 <dbl>, PAY_AMT3 <dbl>, PAY_AMT4 <dbl>,
## #   PAY_AMT5 <dbl>, PAY_AMT6 <dbl>, `default payment next month` <dbl>

Data Types

Check data types and shape in red

cat("\033[31mData Types and Shape:\033[0m\n")

## [31mData Types and Shape:[0m

str(credit_card_info_default)

## tibble [30,000 × 25] (S3: tbl_df/tbl/data.frame)
##  $ ID                        : num [1:30000] 1 2 3 4 5 6 7 8 9 10 ...
##  $ LIMIT_BAL                 : num [1:30000] 20000 120000 90000 50000 50000 50000 500000 100000 140000 20000 ...
##  $ SEX                       : num [1:30000] 2 2 2 2 1 1 1 2 2 1 ...
##  $ EDUCATION                 : num [1:30000] 2 2 2 2 2 1 1 2 3 3 ...
##  $ MARRIAGE                  : num [1:30000] 1 2 2 1 1 2 2 2 1 2 ...
##  $ AGE                       : num [1:30000] 24 26 34 37 57 37 29 23 28 35 ...
##  $ PAY_0                     : num [1:30000] 2 -1 0 0 -1 0 0 0 0 -2 ...
##  $ PAY_2                     : num [1:30000] 2 2 0 0 0 0 0 -1 0 -2 ...
##  $ PAY_3                     : num [1:30000] -1 0 0 0 -1 0 0 -1 2 -2 ...
##  $ PAY_4                     : num [1:30000] -1 0 0 0 0 0 0 0 0 -2 ...
##  $ PAY_5                     : num [1:30000] -2 0 0 0 0 0 0 0 0 -1 ...
##  $ PAY_6                     : num [1:30000] -2 2 0 0 0 0 0 -1 0 -1 ...
##  $ BILL_AMT1                 : num [1:30000] 3913 2682 29239 46990 8617 ...
##  $ BILL_AMT2                 : num [1:30000] 3102 1725 14027 48233 5670 ...
##  $ BILL_AMT3                 : num [1:30000] 689 2682 13559 49291 35835 ...
##  $ BILL_AMT4                 : num [1:30000] 0 3272 14331 28314 20940 ...
##  $ BILL_AMT5                 : num [1:30000] 0 3455 14948 28959 19146 ...
##  $ BILL_AMT6                 : num [1:30000] 0 3261 15549 29547 19131 ...
##  $ PAY_AMT1                  : num [1:30000] 0 0 1518 2000 2000 ...
##  $ PAY_AMT2                  : num [1:30000] 689 1000 1500 2019 36681 ...
##  $ PAY_AMT3                  : num [1:30000] 0 1000 1000 1200 10000 657 38000 0 432 0 ...
##  $ PAY_AMT4                  : num [1:30000] 0 1000 1000 1100 9000 ...
##  $ PAY_AMT5                  : num [1:30000] 0 0 1000 1069 689 ...
##  $ PAY_AMT6                  : num [1:30000] 0 2000 5000 1000 679 ...
##  $ default payment next month: num [1:30000] 1 1 0 0 0 0 0 0 0 0 ...

Shape (Dimensions)

cat("\033[31mShape (Rows, Columns):\033[0m\n")

## [31mShape (Rows, Columns):[0m

print(dim(credit_card_info_default))

## [1] 30000    25

Duplicate Rows

credit_card_info_default_duplication <- sum(duplicated(credit_card_info_default))
cat("Number of duplicate rows:", credit_card_info_default_duplication, "\n")

## Number of duplicate rows: 0

# Drop duplicates if any
credit_card_info_default <- credit_card_info_default %>% distinct()
cat("Rows after removing duplicates:", nrow(credit_card_info_default), "\n")

## Rows after removing duplicates: 30000

Result: No duplicate rows were found. Dataset remains at 30,000 rows.

Missing Values

missing_summary <- data.frame(
  Variable = names(credit_card_info_default),
  Missing   = colSums(is.na(credit_card_info_default))
)
print(missing_summary, caption = "Missing Values per Column", row.names = FALSE)

##                    Variable Missing
##                          ID       0
##                   LIMIT_BAL       0
##                         SEX       0
##                   EDUCATION       0
##                    MARRIAGE       0
##                         AGE       0
##                       PAY_0       0
##                       PAY_2       0
##                       PAY_3       0
##                       PAY_4       0
##                       PAY_5       0
##                       PAY_6       0
##                   BILL_AMT1       0
##                   BILL_AMT2       0
##                   BILL_AMT3       0
##                   BILL_AMT4       0
##                   BILL_AMT5       0
##                   BILL_AMT6       0
##                    PAY_AMT1       0
##                    PAY_AMT2       0
##                    PAY_AMT3       0
##                    PAY_AMT4       0
##                    PAY_AMT5       0
##                    PAY_AMT6       0
##  default payment next month       0

Result: There are no missing values in any column. No imputation is required.

Outlier Detection – Box Plots (Quantitative Variables)

# 2. Select numeric columns for analysis
numeric_cols <- credit_card_info_default[, c(2:10, 13:23)]
par(mar = c(10, 4, 4, 2))
boxplot(scale(numeric_cols),
        main = "Boxplots of Quantitative Variables",
        las = 2,
        col = rainbow(ncol(numeric_cols)), 
        border="darkblue",
         cex.axis = 0.7
        )

Handling Outliers

# Use Winsorising (capping) at 1st and 99th percentile for financial variables
# This preserves data volume while limiting extreme influence

winsorise <- function(x, low = 0.01, high = 0.99) {
  q <- quantile(x, probs = c(low, high), na.rm = TRUE)
  x <- pmax(pmin(x, q[2]), q[1])
  return(x)
}

financial_cols <- c("LIMIT_BAL", "BILL_AMT1","BILL_AMT2","BILL_AMT3",
                    "BILL_AMT4","BILL_AMT5","BILL_AMT6",
                    "PAY_AMT1","PAY_AMT2","PAY_AMT3",
                    "PAY_AMT4","PAY_AMT5","PAY_AMT6")

credit_card_info_default <- credit_card_info_default %>%
  mutate(across(all_of(financial_cols), winsorise))

cat("Outlier treatment complete: Winsorised financial variables at 1st–99th percentile.\n")

## Outlier treatment complete: Winsorised financial variables at 1st–99th percentile.

after handling outliers

numeric_cols <- credit_card_info_default[, c(2:10, 13:23)]
par(mar = c(10, 4, 4, 2))
boxplot(scale(numeric_cols),
        main = "Boxplots of Quantitative Variables",
        las = 2,
        col = rainbow(ncol(numeric_cols)), 
        border="darkblue",
         cex.axis = 0.7
        )

4.2 Customer Information Summary (Labeled Graphs)

Recode Categorical Variables

# Rename target variable for clarity

credit_card_info_default <- credit_card_info_default %>% rename(DEFAULT = `default payment next month`)
credit_card_info_default <- credit_card_info_default %>%
  mutate(
    GENDER      = factor(SEX,       levels = c(1,2),     labels = c("Male","Female")),
    EDU_LABEL   = factor(EDUCATION, levels = c(0,1,2,3,4,5,6),labels = c("Unknown","Graduate School","University",
                                    "High School","Others","Others2","Others3")),
    MARITAL     = factor(MARRIAGE,  levels = c(0,1,2,3),labels = c("Unknown","Married","Single","Others")),
    DEFAULT_LBL = factor(DEFAULT,   levels = c(0,1), labels = c("No Default","Default")),
      AGE_GROUP = cut(AGE,breaks = seq(20, 80, 5),
                    right = FALSE,
                    include.lowest=TRUE)
  )

Gender Distribution

credit_card_info_default %>%
  count(GENDER) %>%
  mutate(pct = n / sum(n) * 100) %>%
  ggplot(aes(x = GENDER, y = n, fill = GENDER)) +
  geom_col(width = 0.5) +
  geom_text(aes(label = paste0(scales::comma(n), "\n(", round(pct,1), "%)")),
            vjust = -0.3, size = 4) +
  scale_fill_manual(values = c("Male" = "#2196F3", "Female" = "#E91E63")) +
  labs(title = "Credit Cards Issued by Gender",
       x = "Gender", y = "Number of Clients") +
  theme_minimal() +
  theme(legend.position = "none") +
  coord_cartesian(clip = "off")

Education Level Distribution

credit_card_info_default %>%
  count(EDU_LABEL) %>%
  mutate(pct = n / sum(n) * 100) %>%
  ggplot(aes(x = reorder(EDU_LABEL, -n), y = n, fill = EDU_LABEL)) +
  geom_col(width = 0.6) +
  geom_text(aes(label = paste0(comma(n), "\n(", round(pct,1), "%)")),
            vjust = -0.3, size = 3.5) +
   scale_y_continuous(expand = expansion(mult = c(0, 0.15))) +
  labs(title = "Distribution by Education Level",
       x = "Education Level", y = "Number of Clients") +
  theme_minimal() +
  theme(legend.position = "none")

Age Distribution

ggplot(credit_card_info_default, aes(x = AGE, fill = DEFAULT_LBL)) +
  geom_histogram(binwidth = 2, position = "stack", colour = "white") +
  scale_fill_manual(values = c("No Default" = "#4CAF50", "Default" = "#F44336")) +
  labs(title = "Age Distribution of Clients",
       x = "Age (years)", y = "Count", fill = "Default Status") +
  theme_minimal()

Marital Status Distribution

credit_card_info_default %>%
  count(MARITAL) %>%
  mutate(pct = n / sum(n) * 100) %>%
  ggplot(aes(x = "", y = n, fill = MARITAL)) +
  geom_col(width = 1) +
  coord_polar("y") +
  geom_text(aes(label = paste0(MARITAL, "\n", round(pct,1), "%")),
            position = position_stack(vjust = 0.5), size = 3.5,check_overlap = TRUE) +
  labs(title = "Distribution by Marital Status") +
  theme_void() +
  theme(legend.position = "none")

Default Payment Status

credit_card_info_default %>%
  count(DEFAULT_LBL) %>%
  mutate(pct = n / sum(n) * 100) %>%
  ggplot(aes(x = DEFAULT_LBL, y = n, fill = DEFAULT_LBL)) +
  geom_col(width = 0.5) +
  geom_text(aes(label = paste0(comma(n), "\n(", round(pct,1), "%)")),
            vjust = -0.3, size = 4.5) +
  scale_fill_manual(values = c("No Default" = "#4CAF50", "Default" = "#F44336")) +
  labs(title = "Default Payment Status",
       x = "Default Status", y = "Number of Clients") +
  theme_minimal() +
  theme(legend.position = "none") +
coord_cartesian(clip = "off")

Summary Statement

pct_under40   <- round(mean(credit_card_info_default$AGE < 40) * 100, 1)
uni_count     <- sum(credit_card_info_default$EDUCATION == 2)
female_count  <- sum(credit_card_info_default$SEX == 2)
male_count    <- sum(credit_card_info_default$SEX == 1)
female_ratio  <- round(female_count / male_count, 2)
default_rate  <- round(mean(credit_card_info_default$DEFAULT) * 100, 1)

cat(paste0(
  "The EDA shows that ", pct_under40, "% of the clients are less than 40 years.\n",
  "Moreover, ", uni_count, " out of 30,000 have university-level education.\n",
  "In addition, the number of credit cards issued to female is ", female_ratio,
  " times the number issued to Male.\n",
  "Generally, ", default_rate, "% of the loans were defaulted.\n"
))

## The EDA shows that 69.5% of the clients are less than 40 years.
## Moreover, 14030 out of 30,000 have university-level education.
## In addition, the number of credit cards issued to female is 1.52 times the number issued to Male.
## Generally, 22.1% of the loans were defaulted.

4.3 Default Rates by Subgroups

By Age Group (5-Year Bins)

credit_card_info_default %>%
  group_by(AGE_GROUP) %>%
  summarise(
    Clients      = n(),
    Defaults     = sum(DEFAULT),
    Default_Rate = round(mean(DEFAULT) * 100, 1)
  ) %>%
  ggplot(aes(x = AGE_GROUP, y = Default_Rate, fill = Default_Rate)) +
  geom_col() +
  geom_text(aes(label = paste0(Default_Rate, "%")), vjust = -0.3, size = 3.5) +
  scale_fill_gradient(low = "#81C784", high = "#D32F2F") +
  labs(title = "Default Rate by Age Group (5-Year Bins)",
       x = "Age Group", y = "Default Rate (%)", fill = "Rate (%)") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

Insight: The youngest group (20–24) has the highest default rate at 27.2%, followed by clients aged 60+ (≥29%). Middle-aged clients (30–39) have relatively lower default rates around 19–21%.

Default rate By Education Level

credit_card_info_default %>%
  group_by(EDU_LABEL) %>%
  summarise(Default_Rate = round(mean(DEFAULT) * 100, 1), Clients = n()) %>%
  #filter(!EDU_LABEL %in% c("Unknown","Others2","Others3")) %>%
  ggplot(aes(x = reorder(EDU_LABEL, -Default_Rate), y = Default_Rate, fill = EDU_LABEL)) +
  geom_col(width = 0.5) +
  geom_text(aes(label = paste0(Default_Rate, "%")), vjust = -0.3) +
  labs(title = "Default Rate by Education Level",
       x = "Education", y = "Default Rate (%)") +
  theme_minimal() +
  theme(legend.position = "none")

Insight: High school graduates have the highest default rate (25.2%), followed by university graduates (23.7%). Graduate school clients default least (19.2%), suggesting higher education correlates with lower default risk.

By Gender

credit_card_info_default %>%
  group_by(GENDER) %>%
  summarise(Default_Rate = round(mean(DEFAULT) * 100, 1)) %>%
  ggplot(aes(x = GENDER, y = Default_Rate, fill = GENDER)) +
  geom_col(width = 0.4) +
  geom_text(aes(label = paste0(Default_Rate, "%")), vjust = -0.3, size = 5) +
  scale_fill_manual(values = c("Male" = "#2196F3", "Female" = "#E91E63")) +
  labs(title = "Default Rate by Gender", x = NULL, y = "Default Rate (%)") +
  theme_minimal() +
  theme(legend.position = "none")

Insight: Males default at a higher rate (24.2%) than females (20.8%), despite females receiving more credit cards overall.

By Marital Status

credit_card_info_default %>%
  group_by(MARITAL) %>%
  summarise(Default_Rate = round(mean(DEFAULT) * 100, 1)) %>%
  filter(MARITAL != "Unknown") %>%
  ggplot(aes(x = MARITAL, y = Default_Rate, fill = MARITAL)) +
  geom_col(width = 0.4) +
  geom_text(aes(label = paste0(Default_Rate, "%")), vjust = -0.3) +
  labs(title = "Default Rate by Marital Status", x = NULL, y = "Default Rate (%)") +
  theme_minimal() +
  theme(legend.position = "none")

Insight: Married clients have a slightly higher default rate (23.5%) compared to single clients (20.9%).

4.4 High-Risk Customer Segment

# Segment definition:
# Customers aged ≤ 30 with PAY_0 >= 2 (payment delayed ≥ 2 months in Sept 2005)
# This indicates both youth risk AND recent repayment failure

segment <- credit_card_info_default %>%
  filter(AGE <= 30, PAY_0 >= 2,EDU_LABEL %in% c("High School", "Others", "Others2", "Others3"))

overall_rate  <- round(mean(credit_card_info_default$DEFAULT) * 100, 1)
segment_rate  <- round(mean(segment$DEFAULT) * 100, 1)
segment_size  <- nrow(segment)

cat("=== HIGH-RISK SEGMENT DEFINITION ===\n")

## === HIGH-RISK SEGMENT DEFINITION ===

cat("Segment: Clients aged ≤ 30 with a repayment delay ≥ 2 months (PAY_0 ≥ 2)\n\n")

## Segment: Clients aged ≤ 30 with a repayment delay ≥ 2 months (PAY_0 ≥ 2)

cat("Segment size:        ", segment_size, "clients\n")

## Segment size:         178 clients

cat("Segment default rate:", segment_rate, "%\n")

## Segment default rate: 62.4 %

cat("Overall default rate:", overall_rate, "%\n")

## Overall default rate: 22.1 %

cat("Relative risk:       ", round(segment_rate / overall_rate, 1), "x higher than average\n")

## Relative risk:        2.8 x higher than average

data.frame(
  Group = c("Overall", "High-Risk Segment\n(Age ≤ 30, PAY_0 ≥ 2)"),
  Rate  = c(overall_rate, segment_rate)
) %>%
  ggplot(aes(x = Group, y = Rate, fill = Group)) +
  geom_col(width = 0.45) +
  geom_text(aes(label = paste0(Rate, "%")), vjust = -0.4, size = 5) +
  scale_fill_manual(values = c("Overall" = "#64B5F6", "High-Risk Segment\n(Age ≤ 30, PAY_0 ≥ 2)" = "#D32F2F")) +
  labs(title = "Default Rate: Overall vs. High-Risk Segment",
       x = NULL, y = "Default Rate (%)") +
  theme_minimal() +
  theme(legend.position = "none")

Finding: Young clients (≤ 30 years old) who were already experiencing repayment delays of ≥ 2 months in September 2005 represent a critical high-risk segment. Their default rate of 62.4% is 3.1× higher than the overall rate of 22.1%. This segment of 1,252 clients represents a concentrated, identifiable risk group.

4.5 Practical Recommendations for Managing Default Risk

Based on the EDA findings, the following five recommendations are proposed:

Recommendation 1 – Early Intervention for Young Clients (Age ≤ 30)

Young clients, especially the 20–24 age group, exhibit the highest default rates (27.2%). The bank should implement proactive financial counselling for first-time card holders under 30, including automated alerts when spending approaches 80% of the credit limit, and tailored repayment reminder systems.

Recommendation 2 – Immediate Escalation on Repayment Delays

Clients with even a 1-month delay (PAY_0 ≥ 1) are at significantly elevated risk. The bank should trigger an automated risk escalation workflow as soon as a delay is detected — including outbound calls, SMS reminders, and temporary credit limit reduction — to prevent the delay from compounding into default.

Recommendation 3 – Education-Tiered Credit Limits and Products

High school educated clients default at 25.2% vs. 19.2% for graduate school clients. The bank should offer education-adjusted credit products: lower initial limits with graduated increases tied to demonstrated repayment history for less-educated segments, while offering premium products to lower-risk, higher-education segments.

Recommendation 4 – Gender-Sensitive Risk Strategies

Male clients default at 24.2% vs. female clients at 20.8%. The bank can design targeted retention and monitoring programs for male clients, such as enhanced credit health dashboards or peer-comparison spending summaries, which research shows can reduce male overspending and late payments.

Recommendation 5 – Predictive Scoring Using PAY History Variables

The repayment history variables (PAY_0 through PAY_6) are the strongest observable indicators of imminent default, especially when delays accumulate across consecutive months. The bank should develop a real-time credit risk score using these variables updated monthly, flagging any client whose score crosses a defined threshold for manual review and proactive outreach.

Session Info

sessionInfo()

## R version 4.5.1 (2025-06-13 ucrt)
## Platform: x86_64-w64-mingw32/x64
## Running under: Windows 11 x64 (build 26200)
## 
## Matrix products: default
##   LAPACK version 3.12.1
## 
## locale:
## [1] LC_COLLATE=English_United Kingdom.utf8 
## [2] LC_CTYPE=English_United Kingdom.utf8   
## [3] LC_MONETARY=English_United Kingdom.utf8
## [4] LC_NUMERIC=C                           
## [5] LC_TIME=English_United Kingdom.utf8    
## 
## time zone: Africa/Kigali
## tzcode source: internal
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
## [1] knitr_1.51    scales_1.4.0  tidyr_1.3.2   ggplot2_4.0.3 dplyr_1.2.1  
## [6] readxl_1.5.0 
## 
## loaded via a namespace (and not attached):
##  [1] gtable_0.3.6       jsonlite_2.0.0     compiler_4.5.1     tidyselect_1.2.1  
##  [5] jquerylib_0.1.4    yaml_2.3.12        fastmap_1.2.0      R6_2.6.1          
##  [9] labeling_0.4.3     generics_0.1.4     tibble_3.3.1       bslib_0.10.0      
## [13] pillar_1.11.1      RColorBrewer_1.1-3 rlang_1.2.0        cachem_1.1.0      
## [17] xfun_0.57          sass_0.4.10        S7_0.2.2           otel_0.2.0        
## [21] cli_3.6.6          withr_3.0.2        magrittr_2.0.5     digest_0.6.39     
## [25] grid_4.5.1         rstudioapi_0.18.0  lifecycle_1.0.5    vctrs_0.7.3       
## [29] evaluate_1.0.5     glue_1.8.1         farver_2.1.2       cellranger_1.1.0  
## [33] purrr_1.2.2        rmarkdown_2.31     tools_4.5.1        pkgconfig_2.0.3   
## [37] htmltools_0.5.9

Exploratory Analysis of Credit Card Defaults

MSDA 9113 – R For Data Science | Final Exam Section B

Student Name | Student Number

2026-06-09