knitr::opts_chunk$set(echo = TRUE)
library(C50)
## Warning: package 'C50' was built under R version 4.4.3
library(gmodels)
## Warning: package 'gmodels' was built under R version 4.4.3
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.0.4     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
credit <- read.csv("C:/Users/Olurebi1800/Downloads/credit.csv")

set.seed(12345)
credit_rand <- credit[order(runif(1000)), ]

summary(credit$amount)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##     250    1366    2320    3271    3972   18424
summary(credit_rand$amount)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##     250    1366    2320    3271    3972   18424
head(credit$amount, 10)
##  [1] 1169 5951 2096 7882 4870 9055 2835 6948 3059 5234
head(credit_rand$amount, 10)
##  [1] 1199 2576 1103 4020 1501 1568 4281  918 2629 1845
credit_train <- credit_rand[1:900, ]
credit_test <- credit_rand[901:1000, ]

prop.table(table(credit_train$default))
## 
##         1         2 
## 0.7022222 0.2977778
prop.table(table(credit_test$default))
## 
##    1    2 
## 0.68 0.32
credit_train$default <- as.factor(credit_train$default)
credit_model <- C5.0(
  credit_train[, setdiff(names(credit_train), "default")],
  credit_train$default)
credit_model
## 
## Call:
## C5.0.default(x = credit_train[, setdiff(names(credit_train), "default")], y
##  = credit_train$default)
## 
## Classification Tree
## Number of samples: 900 
## Number of predictors: 20 
## 
## Tree size: 57 
## 
## Non-standard options: attempt to group attributes
summary(credit_model)
## 
## Call:
## C5.0.default(x = credit_train[, setdiff(names(credit_train), "default")], y
##  = credit_train$default)
## 
## 
## C5.0 [Release 2.07 GPL Edition]      Thu Apr 10 13:58:45 2025
## -------------------------------
## 
## Class specified by attribute `outcome'
## 
## Read 900 cases (21 attributes) from undefined.data
## 
## Decision tree:
## 
## checking_balance = unknown: 1 (358/44)
## checking_balance in {< 0 DM,1 - 200 DM,> 200 DM}:
## :...foreign_worker = no:
##     :...installment_plan in {none,stores}: 1 (17/1)
##     :   installment_plan = bank:
##     :   :...residence_history <= 3: 2 (2)
##     :       residence_history > 3: 1 (2)
##     foreign_worker = yes:
##     :...credit_history in {fully repaid,fully repaid this bank}: 2 (61/20)
##         credit_history in {critical,repaid,delayed}:
##         :...months_loan_duration <= 11: 1 (76/13)
##             months_loan_duration > 11:
##             :...savings_balance = > 1000 DM: 1 (13)
##                 savings_balance in {< 100 DM,101 - 500 DM,501 - 1000 DM,
##                 :                   unknown}:
##                 :...checking_balance = > 200 DM:
##                     :...dependents > 1: 2 (3)
##                     :   dependents <= 1:
##                     :   :...credit_history in {repaid,delayed}: 1 (23/3)
##                     :       credit_history = critical:
##                     :       :...amount <= 2337: 2 (3)
##                     :           amount > 2337: 1 (6)
##                     checking_balance = < 0 DM:
##                     :...other_debtors = guarantor:
##                     :   :...credit_history = critical: 2 (1)
##                     :   :   credit_history in {repaid,delayed}: 1 (11/1)
##                     :   other_debtors in {none,co-applicant}:
##                     :   :...job = mangement self-employed: 1 (26/6)
##                     :       job in {unskilled resident,skilled employee,
##                     :       :       unemployed non-resident}:
##                     :       :...purpose in {radio/tv,others,repairs,
##                     :           :           domestic appliances,
##                     :           :           retraining}: 2 (33/10)
##                     :           purpose = education: [S1]
##                     :           purpose = business:
##                     :           :...job in {unskilled resident,
##                     :           :   :       unemployed non-resident}: 1 (3)
##                     :           :   job = skilled employee: 2 (3)
##                     :           purpose = car (new): [S2]
##                     :           purpose = car (used):
##                     :           :...amount > 6229: 2 (5)
##                     :           :   amount <= 6229: [S3]
##                     :           purpose = furniture:
##                     :           :...months_loan_duration > 27: 2 (9/1)
##                     :               months_loan_duration <= 27: [S4]
##                     checking_balance = 1 - 200 DM:
##                     :...savings_balance = unknown: 1 (34/6)
##                         savings_balance in {< 100 DM,101 - 500 DM,
##                         :                   501 - 1000 DM}:
##                         :...months_loan_duration > 45: 2 (11/1)
##                             months_loan_duration <= 45:
##                             :...installment_plan = stores:
##                                 :...age <= 35: 2 (4)
##                                 :   age > 35: 1 (2)
##                                 installment_plan = bank:
##                                 :...residence_history <= 1: 1 (3)
##                                 :   residence_history > 1:
##                                 :   :...existing_credits <= 1: 2 (5)
##                                 :       existing_credits > 1:
##                                 :       :...installment_rate > 2: 2 (3)
##                                 :           installment_rate <= 2: [S5]
##                                 installment_plan = none:
##                                 :...other_debtors = guarantor: 1 (7/1)
##                                     other_debtors = co-applicant: 2 (3/1)
##                                     other_debtors = none:
##                                     :...employment_length = 4 - 7 yrs:
##                                         :...age <= 41: 1 (16)
##                                         :   age > 41: 2 (3/1)
##                                         employment_length in {> 7 yrs,
##                                         :                     1 - 4 yrs,
##                                         :                     0 - 1 yrs,
##                                         :                     unemployed}:
##                                         :...amount > 7980: 2 (7)
##                                             amount <= 7980:
##                                             :...amount > 4746: 1 (10)
##                                                 amount <= 4746: [S6]
## 
## SubTree [S1]
## 
## savings_balance in {< 100 DM,101 - 500 DM,501 - 1000 DM}: 2 (6)
## savings_balance = unknown: 1 (2)
## 
## SubTree [S2]
## 
## savings_balance = 101 - 500 DM: 1 (1)
## savings_balance in {501 - 1000 DM,unknown}: 2 (4)
## savings_balance = < 100 DM:
## :...personal_status in {single male,female,divorced male}: 2 (29/6)
##     personal_status = married male: 1 (2)
## 
## SubTree [S3]
## 
## job = unskilled resident: 2 (1)
## job in {skilled employee,unemployed non-resident}: 1 (8/1)
## 
## SubTree [S4]
## 
## employment_length in {> 7 yrs,4 - 7 yrs}: 1 (7/1)
## employment_length = unemployed: 2 (2)
## employment_length = 0 - 1 yrs:
## :...job = unskilled resident: 2 (1)
## :   job in {skilled employee,unemployed non-resident}: 1 (4)
## employment_length = 1 - 4 yrs:
## :...property in {building society savings,unknown/none}: 1 (5)
##     property in {other,real estate}:
##     :...residence_history <= 2: 1 (4/1)
##         residence_history > 2: 2 (5)
## 
## SubTree [S5]
## 
## other_debtors in {none,guarantor}: 1 (3)
## other_debtors = co-applicant: 2 (1)
## 
## SubTree [S6]
## 
## housing = for free: 1 (2)
## housing = rent:
## :...credit_history = critical: 1 (1)
## :   credit_history in {repaid,delayed}: 2 (10/2)
## housing = own:
## :...savings_balance = 101 - 500 DM: 1 (6)
##     savings_balance in {< 100 DM,501 - 1000 DM}:
##     :...residence_history <= 1: 1 (8/1)
##         residence_history > 1:
##         :...installment_rate <= 1: 1 (2)
##             installment_rate > 1:
##             :...employment_length in {> 7 yrs,unemployed}: 1 (13/6)
##                 employment_length in {1 - 4 yrs,0 - 1 yrs}: 2 (10)
## 
## 
## Evaluation on training data (900 cases):
## 
##      Decision Tree   
##    ----------------  
##    Size      Errors  
## 
##      57  127(14.1%)   <<
## 
## 
##     (a)   (b)    <-classified as
##    ----  ----
##     590    42    (a): class 1
##      85   183    (b): class 2
## 
## 
##  Attribute usage:
## 
##  100.00% checking_balance
##   60.22% foreign_worker
##   57.89% credit_history
##   51.11% months_loan_duration
##   42.67% savings_balance
##   30.44% other_debtors
##   17.78% job
##   15.56% installment_plan
##   14.89% purpose
##   12.89% employment_length
##   10.22% amount
##    6.78% residence_history
##    5.78% housing
##    3.89% dependents
##    3.56% installment_rate
##    3.44% personal_status
##    2.78% age
##    1.56% property
##    1.33% existing_credits
## 
## 
## Time: 0.0 secs
credit_pred <- predict(credit_model, credit_test[-17])

CrossTable(credit_test$default, credit_pred,
           prop.chisq = FALSE, prop.c = FALSE, prop.r = FALSE,
           dnn = c('Actual Default', 'Predicted Default'))
## 
##  
##    Cell Contents
## |-------------------------|
## |                       N |
## |         N / Table Total |
## |-------------------------|
## 
##  
## Total Observations in Table:  100 
## 
##  
##                | Predicted Default 
## Actual Default |         1 |         2 | Row Total | 
## ---------------|-----------|-----------|-----------|
##              1 |        54 |        14 |        68 | 
##                |     0.540 |     0.140 |           | 
## ---------------|-----------|-----------|-----------|
##              2 |        11 |        21 |        32 | 
##                |     0.110 |     0.210 |           | 
## ---------------|-----------|-----------|-----------|
##   Column Total |        65 |        35 |       100 | 
## ---------------|-----------|-----------|-----------|
## 
## 
credit_boost10 <- C5.0(credit_train[-17], credit_train$default, trials = 10)

credit_boost_pred10 <- predict(credit_boost10, credit_test[-17])

CrossTable(credit_test$default, credit_boost_pred10,
           prop.chisq = FALSE, prop.c = FALSE, prop.r = FALSE,
           dnn = c('Actual Default', 'Predicted Default'))
## 
##  
##    Cell Contents
## |-------------------------|
## |                       N |
## |         N / Table Total |
## |-------------------------|
## 
##  
## Total Observations in Table:  100 
## 
##  
##                | Predicted Default 
## Actual Default |         1 |         2 | Row Total | 
## ---------------|-----------|-----------|-----------|
##              1 |        63 |         5 |        68 | 
##                |     0.630 |     0.050 |           | 
## ---------------|-----------|-----------|-----------|
##              2 |        16 |        16 |        32 | 
##                |     0.160 |     0.160 |           | 
## ---------------|-----------|-----------|-----------|
##   Column Total |        79 |        21 |       100 | 
## ---------------|-----------|-----------|-----------|
## 
## 
matrix_dimensions <- list(c("no", "yes"), c("no", "yes"))
names(matrix_dimensions) <- c("predicted", "actual")

error_cost <- matrix(c(0, 1, 4, 0), nrow = 2)

credit_cost <- C5.0(credit_train[-17], credit_train$default,
                    costs = error_cost)
## Warning: no dimnames were given for the cost matrix; the factor levels will be
## used
credit_cost_pred <- predict(credit_cost, credit_test[-17])

CrossTable(credit_test$default, credit_cost_pred,
           prop.chisq = FALSE, prop.c = FALSE, prop.r = FALSE,
           dnn = c('Actual Default', 'Predicted Default'))
## 
##  
##    Cell Contents
## |-------------------------|
## |                       N |
## |         N / Table Total |
## |-------------------------|
## 
##  
## Total Observations in Table:  100 
## 
##  
##                | Predicted Default 
## Actual Default |         1 |         2 | Row Total | 
## ---------------|-----------|-----------|-----------|
##              1 |        38 |        30 |        68 | 
##                |     0.380 |     0.300 |           | 
## ---------------|-----------|-----------|-----------|
##              2 |         5 |        27 |        32 | 
##                |     0.050 |     0.270 |           | 
## ---------------|-----------|-----------|-----------|
##   Column Total |        43 |        57 |       100 | 
## ---------------|-----------|-----------|-----------|
## 
## 

R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

summary(cars)
##      speed           dist       
##  Min.   : 4.0   Min.   :  2.00  
##  1st Qu.:12.0   1st Qu.: 26.00  
##  Median :15.0   Median : 36.00  
##  Mean   :15.4   Mean   : 42.98  
##  3rd Qu.:19.0   3rd Qu.: 56.00  
##  Max.   :25.0   Max.   :120.00

Including Plots

You can also embed plots, for example:

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.