hw 3 Emmanuel

library(ISLR2)
library(class)
library(ggplot2)
library(gmodels)
library(scales)
library(caret)

## Loading required package: lattice

library(tidyverse)

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.0     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ lubridate 1.9.2     ✔ tibble    3.1.8
## ✔ purrr     1.0.1     ✔ tidyr     1.3.0

## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ readr::col_factor() masks scales::col_factor()
## ✖ purrr::discard()    masks scales::discard()
## ✖ dplyr::filter()     masks stats::filter()
## ✖ dplyr::lag()        masks stats::lag()
## ✖ purrr::lift()       masks caret::lift()
## ℹ Use the ]8;;http://conflicted.r-lib.org/conflicted package]8;; to force all conflicts to become errors

library(pROC)

## Type 'citation("pROC")' for a citation.
## 
## Attaching package: 'pROC'
## 
## The following object is masked from 'package:gmodels':
## 
##     ci
## 
## The following objects are masked from 'package:stats':
## 
##     cov, smooth, var

library(tidymodels)

## ── Attaching packages ────────────────────────────────────── tidymodels 1.0.0 ──
## ✔ broom        1.0.3     ✔ rsample      1.1.1
## ✔ dials        1.2.0     ✔ tune         1.1.0
## ✔ infer        1.0.4     ✔ workflows    1.1.3
## ✔ modeldata    1.1.0     ✔ workflowsets 1.0.0
## ✔ parsnip      1.0.4     ✔ yardstick    1.1.0
## ✔ recipes      1.0.5     
## ── Conflicts ───────────────────────────────────────── tidymodels_conflicts() ──
## ✖ purrr::discard()         masks scales::discard()
## ✖ dplyr::filter()          masks stats::filter()
## ✖ recipes::fixed()         masks stringr::fixed()
## ✖ dplyr::lag()             masks stats::lag()
## ✖ purrr::lift()            masks caret::lift()
## ✖ yardstick::precision()   masks caret::precision()
## ✖ yardstick::recall()      masks caret::recall()
## ✖ yardstick::sensitivity() masks caret::sensitivity()
## ✖ yardstick::spec()        masks readr::spec()
## ✖ yardstick::specificity() masks caret::specificity()
## ✖ recipes::step()          masks stats::step()
## • Use tidymodels_prefer() to resolve common conflicts.

dataset = Default

df <- dataset %>% mutate_if(is.ordered, factor, ordered = FALSE)

set.seed(123)
churn_split <- initial_split(df, prop = .7, strata = default)
churn_train <- training(churn_split)
churn_test <- testing(churn_split)

cv_mod1 <- caret::train(
  default ~ balance,
  data = churn_train,
  method = "glm",
  family = "binomial",
  trControl = trainControl(method = "cv", number = 10)
)

pred_class <- predict(cv_mod1, churn_train)
confusionMatrix(
  data = relevel(pred_class, ref = "Yes"),
  reference = relevel(churn_train$default, ref = "Yes")
)

## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  Yes   No
##        Yes   71   30
##        No   159 6740
##                                           
##                Accuracy : 0.973           
##                  95% CI : (0.9689, 0.9767)
##     No Information Rate : 0.9671          
##     P-Value [Acc > NIR] : 0.002652        
##                                           
##                   Kappa : 0.4173          
##                                           
##  Mcnemar's Test P-Value : < 2.2e-16       
##                                           
##             Sensitivity : 0.30870         
##             Specificity : 0.99557         
##          Pos Pred Value : 0.70297         
##          Neg Pred Value : 0.97695         
##              Prevalence : 0.03286         
##          Detection Rate : 0.01014         
##    Detection Prevalence : 0.01443         
##       Balanced Accuracy : 0.65213         
##                                           
##        'Positive' Class : Yes             
##

cv_mod2 <- caret::train(
  default ~ balance + student,
  data = churn_train,
  method = "glm",
  family = "binomial",
  trControl = trainControl(method = "cv", number = 10)
)

pred_class <- predict(cv_mod2, churn_train)
confusionMatrix(
  data = relevel(pred_class, ref = "Yes"),
  reference = relevel(churn_train$default, ref = "Yes")
)

## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  Yes   No
##        Yes   76   30
##        No   154 6740
##                                           
##                Accuracy : 0.9737          
##                  95% CI : (0.9697, 0.9773)
##     No Information Rate : 0.9671          
##     P-Value [Acc > NIR] : 0.0008272       
##                                           
##                   Kappa : 0.4408          
##                                           
##  Mcnemar's Test P-Value : < 2.2e-16       
##                                           
##             Sensitivity : 0.33043         
##             Specificity : 0.99557         
##          Pos Pred Value : 0.71698         
##          Neg Pred Value : 0.97766         
##              Prevalence : 0.03286         
##          Detection Rate : 0.01086         
##    Detection Prevalence : 0.01514         
##       Balanced Accuracy : 0.66300         
##                                           
##        'Positive' Class : Yes             
##

cv_mod3 <- caret::train(
  default ~ balance + income + student,
  data = churn_train,
  method = "glm",
  family = "binomial",
  trControl = trainControl(method = "cv", number = 10)
)

pred_class <- predict(cv_mod3, churn_train)
confusionMatrix(
  data = relevel(pred_class, ref = "Yes"),
  reference = relevel(churn_train$default, ref = "Yes")
)

## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  Yes   No
##        Yes   76   30
##        No   154 6740
##                                           
##                Accuracy : 0.9737          
##                  95% CI : (0.9697, 0.9773)
##     No Information Rate : 0.9671          
##     P-Value [Acc > NIR] : 0.0008272       
##                                           
##                   Kappa : 0.4408          
##                                           
##  Mcnemar's Test P-Value : < 2.2e-16       
##                                           
##             Sensitivity : 0.33043         
##             Specificity : 0.99557         
##          Pos Pred Value : 0.71698         
##          Neg Pred Value : 0.97766         
##              Prevalence : 0.03286         
##          Detection Rate : 0.01086         
##    Detection Prevalence : 0.01514         
##       Balanced Accuracy : 0.66300         
##                                           
##        'Positive' Class : Yes             
##

summary(
  resamples(
    list(
      Table_1 = cv_mod1,
      Table_2 = cv_mod2,
      Table_3 = cv_mod3
    )
  )
)

## 
## Call:
## summary.resamples(object = resamples(list(Table_1 = cv_mod1, Table_2 =
##  cv_mod2, Table_3 = cv_mod3)))
## 
## Models: Table_1, Table_2, Table_3 
## Number of resamples: 10 
## 
## Accuracy 
##              Min.   1st Qu.    Median      Mean   3rd Qu.      Max. NA's
## Table_1 0.9642857 0.9717857 0.9735714 0.9731429 0.9753571 0.9800000    0
## Table_2 0.9671429 0.9714286 0.9735714 0.9737143 0.9757143 0.9800000    0
## Table_3 0.9685714 0.9717857 0.9728571 0.9735714 0.9764286 0.9785714    0
## 
## Kappa 
##              Min.   1st Qu.    Median      Mean   3rd Qu.      Max. NA's
## Table_1 0.1796362 0.3665542 0.4385241 0.4177551 0.4844803 0.5542618    0
## Table_2 0.2452653 0.4023762 0.4305530 0.4367752 0.5071568 0.6016584    0
## Table_3 0.3108944 0.3482950 0.4176434 0.4326191 0.5323200 0.5677345    0

R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

summary(cars)

##      speed           dist       
##  Min.   : 4.0   Min.   :  2.00  
##  1st Qu.:12.0   1st Qu.: 26.00  
##  Median :15.0   Median : 36.00  
##  Mean   :15.4   Mean   : 42.98  
##  3rd Qu.:19.0   3rd Qu.: 56.00  
##  Max.   :25.0   Max.   :120.00

Including Plots

You can also embed plots, for example:

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.

hw 3 Emmanuel

Emmanuel Ferdinand Anggawirja

2023-04-27

R Markdown

Including Plots