LDA and Naive Bayes Analysis

Install Necessary Packages

install.packages(c("dplyr", "gt", "tidymodels", "stringr", "ISLR2", "klaR"))

Load Libraries

library(ISLR2)
library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(gt)
library(tidymodels)

## ── Attaching packages ────────────────────────────────────── tidymodels 1.3.0 ──

## ✔ broom        1.0.7     ✔ rsample      1.2.1
## ✔ dials        1.4.0     ✔ tibble       3.2.1
## ✔ ggplot2      3.5.1     ✔ tidyr        1.3.1
## ✔ infer        1.0.7     ✔ tune         1.3.0
## ✔ modeldata    1.4.0     ✔ workflows    1.2.0
## ✔ parsnip      1.3.1     ✔ workflowsets 1.1.0
## ✔ purrr        1.0.4     ✔ yardstick    1.3.2
## ✔ recipes      1.1.1

## ── Conflicts ───────────────────────────────────────── tidymodels_conflicts() ──
## ✖ purrr::discard() masks scales::discard()
## ✖ dplyr::filter()  masks stats::filter()
## ✖ dplyr::lag()     masks stats::lag()
## ✖ recipes::step()  masks stats::step()

library(stringr)

## 
## Attaching package: 'stringr'

## The following object is masked from 'package:recipes':
## 
##     fixed

library(tidyr)
library(MASS)

## 
## Attaching package: 'MASS'

## The following object is masked from 'package:dplyr':
## 
##     select

## The following object is masked from 'package:ISLR2':
## 
##     Boston

library(klaR)

Load Dataset

data("Default")

Train the LDA Model

lda_default_balance_student <- lda(default ~ balance + student, data = Default)
lda_posterior <- predict(lda_default_balance_student, newdata = Default)$posterior

LDA Predictions

lda_pred <- bind_cols(
  pred_default = predict(lda_default_balance_student, newdata = Default)$class,
  Default
)

Modify Threshold for 0.2 Probability

lda_pred_20 <- bind_cols(
  Default,
  posterior_prob_default = lda_posterior[, 2]
) %>%
  mutate(pred_default = ifelse(posterior_prob_default > 0.2, "Yes", "No"))

Train the Naive Bayes Model

nb_model <- NaiveBayes(default ~ balance + student, data = Default)
nb_prob_default <- predict(nb_model, newdata = Default)$posterior[, 2]

Naive Bayes Predictions

nb_pred <- Default %>%
  mutate(nb_prob_default = nb_prob_default)

Table 4.4: LDA Prediction Table

lda_pred %>%
  count(pred_default, default) %>%
  pivot_wider(names_from = default, values_from = n) %>%
  mutate(Total = No + Yes) %>%
  gt(rowname_col = "pred_default") %>%
  gt::tab_spanner(label = "True default status", columns = everything()) %>%
  gt::tab_stubhead("Predicted") %>%
  gt::grand_summary_rows(fns = list(Total = ~round(sum(.), 0))) %>%
  gt::tab_caption("Table 4.4: Confusion Matrix for LDA Model")

Table 4.4: Confusion Matrix for LDA Model
Predicted	True default status
Predicted	No	Yes	Total
No	9644	252	9896
Yes	23	81	104
Total	9667	333	10000

Table 4.5: LDA Threshold (0.2) Prediction Table

lda_pred_20 %>%
  count(pred_default, default) %>%
  pivot_wider(names_from = default, values_from = n) %>%
  mutate(Total = No + Yes) %>%
  gt(rowname_col = "pred_default") %>%
  gt::tab_spanner(label = "True default status", columns = everything()) %>%
  gt::tab_stubhead("Predicted") %>%
  gt::grand_summary_rows(fns = list(Total = ~round(sum(.), 0))) %>%
  gt::tab_caption("Table 4.5: Confusion Matrix for LDA Model with 0.2 Threshold")

Table 4.5: Confusion Matrix for LDA Model with 0.2 Threshold
Predicted	True default status
Predicted	No	Yes	Total
No	9432	138	9570
Yes	235	195	430
Total	9667	333	10000

Generate Naive Bayes Predictions for Different Probability Thresholds

nb_pred <- nb_pred %>%
  mutate(
    pred_default_0.5 = ifelse(nb_prob_default > 0.5, "Yes", "No"),
    pred_default_0.2 = ifelse(nb_prob_default > 0.2, "Yes", "No")
  )

Table 4.8: Naive Bayes Prediction Table

nb_pred %>%
  count(pred_default_0.5, default) %>%
  pivot_wider(names_from = default, values_from = n) %>%
  mutate(Total = No + Yes) %>%
  gt(rowname_col = "pred_default_0.5") %>%
  gt::tab_spanner(label = "True default status", columns = everything()) %>%
  gt::tab_stubhead("Predicted") %>%
  gt::grand_summary_rows(fns = list(Total = ~round(sum(.), 0))) %>%
  gt::tab_caption("Table 4.8: Confusion Matrix for Naive Bayes Model with 0.5 Threshold")

Table 4.8: Confusion Matrix for Naive Bayes Model with 0.5 Threshold
Predicted	True default status
Predicted	No	Yes	Total
No	9621	244	9865
Yes	46	89	135
Total	9667	333	10000

LDA and Naive Bayes Analysis

Msibi Sandziso_111021084

2025-03-30

Install Necessary Packages

Load Libraries

Load Dataset

Train the LDA Model

LDA Predictions

Modify Threshold for 0.2 Probability

Train the Naive Bayes Model

Naive Bayes Predictions

Table 4.4: LDA Prediction Table

Table 4.5: LDA Threshold (0.2) Prediction Table

Generate Naive Bayes Predictions for Different Probability Thresholds

Table 4.8: Naive Bayes Prediction Table