# Step 1: Load libraries
library(C50)
## Warning: package 'C50' was built under R version 4.3.3
library(gmodels)
## Warning: package 'gmodels' was built under R version 4.3.2
library(partykit)
## Warning: package 'partykit' was built under R version 4.3.3
## Loading required package: grid
## Loading required package: libcoin
## Loading required package: mvtnorm
## Warning: package 'mvtnorm' was built under R version 4.3.3
# Step 2: Load your dataset explicitly
credit <- read.csv("~/Downloads/credit.csv", stringsAsFactors = TRUE)
# Confirm loaded data
str(credit)
## 'data.frame': 1000 obs. of 21 variables:
## $ checking_balance : Factor w/ 4 levels "< 0 DM","> 200 DM",..: 1 3 4 1 1 4 4 3 4 3 ...
## $ months_loan_duration: int 6 48 12 42 24 36 24 36 12 30 ...
## $ credit_history : Factor w/ 5 levels "critical","delayed",..: 1 5 1 5 2 5 5 5 5 1 ...
## $ purpose : Factor w/ 10 levels "business","car (new)",..: 8 8 5 6 2 5 6 3 8 2 ...
## $ amount : int 1169 5951 2096 7882 4870 9055 2835 6948 3059 5234 ...
## $ savings_balance : Factor w/ 5 levels "< 100 DM","> 1000 DM",..: 5 1 1 1 1 5 4 1 2 1 ...
## $ employment_length : Factor w/ 5 levels "> 7 yrs","0 - 1 yrs",..: 1 3 4 4 3 3 1 3 4 5 ...
## $ installment_rate : int 4 2 2 2 3 2 3 2 2 4 ...
## $ personal_status : Factor w/ 4 levels "divorced male",..: 4 2 4 4 4 4 4 4 1 3 ...
## $ other_debtors : Factor w/ 3 levels "co-applicant",..: 3 3 3 2 3 3 3 3 3 3 ...
## $ residence_history : int 4 2 3 4 4 4 4 2 4 2 ...
## $ property : Factor w/ 4 levels "building society savings",..: 3 3 3 1 4 4 1 2 3 2 ...
## $ age : int 67 22 49 45 53 35 53 35 61 28 ...
## $ installment_plan : Factor w/ 3 levels "bank","none",..: 2 2 2 2 2 2 2 2 2 2 ...
## $ housing : Factor w/ 3 levels "for free","own",..: 2 2 2 1 1 1 2 3 2 2 ...
## $ existing_credits : int 2 1 1 1 2 1 1 1 1 2 ...
## $ default : int 1 2 1 1 2 1 1 1 1 2 ...
## $ dependents : int 1 1 2 2 2 2 1 1 1 1 ...
## $ telephone : Factor w/ 2 levels "none","yes": 2 1 1 1 1 2 1 2 1 1 ...
## $ foreign_worker : Factor w/ 2 levels "no","yes": 2 2 2 2 2 2 2 2 2 2 ...
## $ job : Factor w/ 4 levels "mangement self-employed",..: 2 2 4 2 2 4 2 1 4 1 ...
# Step 3: Randomize the dataset explicitly
set.seed(12345)
credit_rand <- credit[order(runif(1000)), ]
# Check the randomized data briefly
summary(credit_rand)
## checking_balance months_loan_duration credit_history
## < 0 DM :274 Min. : 4.0 critical :293
## > 200 DM : 63 1st Qu.:12.0 delayed : 88
## 1 - 200 DM:269 Median :18.0 fully repaid : 40
## unknown :394 Mean :20.9 fully repaid this bank: 49
## 3rd Qu.:24.0 repaid :530
## Max. :72.0
##
## purpose amount savings_balance employment_length
## radio/tv :280 Min. : 250 < 100 DM :603 > 7 yrs :253
## car (new) :234 1st Qu.: 1366 > 1000 DM : 48 0 - 1 yrs :172
## furniture :181 Median : 2320 101 - 500 DM :103 1 - 4 yrs :339
## car (used):103 Mean : 3271 501 - 1000 DM: 63 4 - 7 yrs :174
## business : 97 3rd Qu.: 3972 unknown :183 unemployed: 62
## education : 50 Max. :18424
## (Other) : 55
## installment_rate personal_status other_debtors residence_history
## Min. :1.000 divorced male: 50 co-applicant: 41 Min. :1.000
## 1st Qu.:2.000 female :310 guarantor : 52 1st Qu.:2.000
## Median :3.000 married male : 92 none :907 Median :3.000
## Mean :2.973 single male :548 Mean :2.845
## 3rd Qu.:4.000 3rd Qu.:4.000
## Max. :4.000 Max. :4.000
##
## property age installment_plan housing
## building society savings:232 Min. :19.00 bank :139 for free:108
## other :332 1st Qu.:27.00 none :814 own :713
## real estate :282 Median :33.00 stores: 47 rent :179
## unknown/none :154 Mean :35.55
## 3rd Qu.:42.00
## Max. :75.00
##
## existing_credits default dependents telephone foreign_worker
## Min. :1.000 Min. :1.0 Min. :1.000 none:596 no : 37
## 1st Qu.:1.000 1st Qu.:1.0 1st Qu.:1.000 yes :404 yes:963
## Median :1.000 Median :1.0 Median :1.000
## Mean :1.407 Mean :1.3 Mean :1.155
## 3rd Qu.:2.000 3rd Qu.:2.0 3rd Qu.:1.000
## Max. :4.000 Max. :2.0 Max. :2.000
##
## job
## mangement self-employed:148
## skilled employee :630
## unemployed non-resident: 22
## unskilled resident :200
##
##
##
head(credit[1:10], 10)
## checking_balance months_loan_duration credit_history purpose amount
## 1 < 0 DM 6 critical radio/tv 1169
## 2 1 - 200 DM 48 repaid radio/tv 5951
## 3 unknown 12 critical education 2096
## 4 < 0 DM 42 repaid furniture 7882
## 5 < 0 DM 24 delayed car (new) 4870
## 6 unknown 36 repaid education 9055
## 7 unknown 24 repaid furniture 2835
## 8 1 - 200 DM 36 repaid car (used) 6948
## 9 unknown 12 repaid radio/tv 3059
## 10 1 - 200 DM 30 critical car (new) 5234
## savings_balance employment_length installment_rate personal_status
## 1 unknown > 7 yrs 4 single male
## 2 < 100 DM 1 - 4 yrs 2 female
## 3 < 100 DM 4 - 7 yrs 2 single male
## 4 < 100 DM 4 - 7 yrs 2 single male
## 5 < 100 DM 1 - 4 yrs 3 single male
## 6 unknown 1 - 4 yrs 2 single male
## 7 501 - 1000 DM > 7 yrs 3 single male
## 8 < 100 DM 1 - 4 yrs 2 single male
## 9 > 1000 DM 4 - 7 yrs 2 divorced male
## 10 < 100 DM unemployed 4 married male
## other_debtors
## 1 none
## 2 none
## 3 none
## 4 guarantor
## 5 none
## 6 none
## 7 none
## 8 none
## 9 none
## 10 none
head(credit_rand[1:10], 10)
## checking_balance months_loan_duration credit_history purpose amount
## 14 < 0 DM 24 critical car (new) 1199
## 448 1 - 200 DM 7 repaid radio/tv 2576
## 697 1 - 200 DM 12 repaid radio/tv 1103
## 32 < 0 DM 24 repaid furniture 4020
## 196 1 - 200 DM 9 critical education 1501
## 83 unknown 18 repaid business 1568
## 119 < 0 DM 33 critical furniture 4281
## 602 1 - 200 DM 9 repaid furniture 918
## 443 1 - 200 DM 20 delayed others 2629
## 945 < 0 DM 15 repaid furniture 1845
## savings_balance employment_length installment_rate personal_status
## 14 < 100 DM > 7 yrs 4 single male
## 448 < 100 DM 1 - 4 yrs 2 single male
## 697 < 100 DM 4 - 7 yrs 4 single male
## 32 < 100 DM 1 - 4 yrs 2 single male
## 196 < 100 DM > 7 yrs 2 female
## 83 101 - 500 DM 1 - 4 yrs 3 female
## 119 501 - 1000 DM 1 - 4 yrs 1 female
## 602 < 100 DM 1 - 4 yrs 4 female
## 443 < 100 DM 1 - 4 yrs 2 single male
## 945 < 100 DM 0 - 1 yrs 4 female
## other_debtors
## 14 none
## 448 guarantor
## 697 guarantor
## 32 none
## 196 none
## 83 none
## 119 none
## 602 none
## 443 none
## 945 guarantor
# Step 4: Split into training and testing sets
credit_train <- credit_rand[1:900, ]
credit_test <- credit_rand[901:1000, ]
# Explicitly factorize your target
credit_train$default <- as.factor(credit_train$default)
credit_test$default <- as.factor(credit_test$default)
# Step 5: Train the C5.0 model
credit_model <- C5.0(credit_train[-17], credit_train$default)
# Verify your model summary
summary(credit_model)
##
## Call:
## C5.0.default(x = credit_train[-17], y = credit_train$default)
##
##
## C5.0 [Release 2.07 GPL Edition] Thu Apr 10 14:46:15 2025
## -------------------------------
##
## Class specified by attribute `outcome'
##
## Read 900 cases (21 attributes) from undefined.data
##
## Decision tree:
##
## checking_balance = unknown: 1 (358/44)
## checking_balance in {< 0 DM,> 200 DM,1 - 200 DM}:
## :...foreign_worker = no:
## :...installment_plan in {none,stores}: 1 (17/1)
## : installment_plan = bank:
## : :...residence_history <= 3: 2 (2)
## : residence_history > 3: 1 (2)
## foreign_worker = yes:
## :...credit_history in {fully repaid,fully repaid this bank}: 2 (61/20)
## credit_history in {critical,delayed,repaid}:
## :...months_loan_duration <= 11: 1 (76/13)
## months_loan_duration > 11:
## :...savings_balance = > 1000 DM: 1 (13)
## savings_balance in {< 100 DM,101 - 500 DM,501 - 1000 DM,
## : unknown}:
## :...checking_balance = > 200 DM:
## :...dependents > 1: 2 (3)
## : dependents <= 1:
## : :...credit_history in {delayed,repaid}: 1 (23/3)
## : credit_history = critical:
## : :...amount <= 2337: 2 (3)
## : amount > 2337: 1 (6)
## checking_balance = < 0 DM:
## :...other_debtors = guarantor:
## : :...credit_history = critical: 2 (1)
## : : credit_history in {delayed,repaid}: 1 (11/1)
## : other_debtors in {co-applicant,none}:
## : :...job = mangement self-employed: 1 (26/6)
## : job in {skilled employee,unemployed non-resident,
## : : unskilled resident}:
## : :...purpose in {domestic appliances,others,
## : : radio/tv,repairs,
## : : retraining}: 2 (33/10)
## : purpose = business:
## : :...job = skilled employee: 2 (3)
## : : job in {unemployed non-resident,
## : : unskilled resident}: 1 (3)
## : purpose = education: [S1]
## : purpose = car (new): [S2]
## : purpose = car (used):
## : :...amount > 6229: 2 (5)
## : : amount <= 6229: [S3]
## : purpose = furniture:
## : :...months_loan_duration > 27: 2 (9/1)
## : months_loan_duration <= 27: [S4]
## checking_balance = 1 - 200 DM:
## :...savings_balance = unknown: 1 (34/6)
## savings_balance in {< 100 DM,101 - 500 DM,
## : 501 - 1000 DM}:
## :...months_loan_duration > 45: 2 (11/1)
## months_loan_duration <= 45:
## :...installment_plan = stores:
## :...age <= 35: 2 (4)
## : age > 35: 1 (2)
## installment_plan = bank:
## :...residence_history <= 1: 1 (3)
## : residence_history > 1:
## : :...existing_credits <= 1: 2 (5)
## : existing_credits > 1:
## : :...installment_rate > 2: 2 (3)
## : installment_rate <= 2: [S5]
## installment_plan = none:
## :...other_debtors = co-applicant: 2 (3/1)
## other_debtors = guarantor: 1 (7/1)
## other_debtors = none:
## :...employment_length = 4 - 7 yrs:
## :...age <= 41: 1 (16)
## : age > 41: 2 (3/1)
## employment_length in {> 7 yrs,
## : 0 - 1 yrs,
## : 1 - 4 yrs,
## : unemployed}:
## :...amount > 7980: 2 (7)
## amount <= 7980:
## :...amount > 4746: 1 (10)
## amount <= 4746: [S6]
##
## SubTree [S1]
##
## savings_balance in {< 100 DM,101 - 500 DM,501 - 1000 DM}: 2 (6)
## savings_balance = unknown: 1 (2)
##
## SubTree [S2]
##
## savings_balance = 101 - 500 DM: 1 (1)
## savings_balance in {501 - 1000 DM,unknown}: 2 (4)
## savings_balance = < 100 DM:
## :...personal_status in {divorced male,female,single male}: 2 (29/6)
## personal_status = married male: 1 (2)
##
## SubTree [S3]
##
## job in {skilled employee,unemployed non-resident}: 1 (8/1)
## job = unskilled resident: 2 (1)
##
## SubTree [S4]
##
## employment_length in {> 7 yrs,4 - 7 yrs}: 1 (7/1)
## employment_length = unemployed: 2 (2)
## employment_length = 0 - 1 yrs:
## :...job in {skilled employee,unemployed non-resident}: 1 (4)
## : job = unskilled resident: 2 (1)
## employment_length = 1 - 4 yrs:
## :...property in {building society savings,unknown/none}: 1 (5)
## property in {other,real estate}:
## :...residence_history <= 2: 1 (4/1)
## residence_history > 2: 2 (5)
##
## SubTree [S5]
##
## other_debtors = co-applicant: 2 (1)
## other_debtors in {guarantor,none}: 1 (3)
##
## SubTree [S6]
##
## housing = for free: 1 (2)
## housing = rent:
## :...credit_history = critical: 1 (1)
## : credit_history in {delayed,repaid}: 2 (10/2)
## housing = own:
## :...savings_balance = 101 - 500 DM: 1 (6)
## savings_balance in {< 100 DM,501 - 1000 DM}:
## :...residence_history <= 1: 1 (8/1)
## residence_history > 1:
## :...installment_rate <= 1: 1 (2)
## installment_rate > 1:
## :...employment_length in {> 7 yrs,unemployed}: 1 (13/6)
## employment_length in {0 - 1 yrs,1 - 4 yrs}: 2 (10)
##
##
## Evaluation on training data (900 cases):
##
## Decision Tree
## ----------------
## Size Errors
##
## 57 127(14.1%) <<
##
##
## (a) (b) <-classified as
## ---- ----
## 590 42 (a): class 1
## 85 183 (b): class 2
##
##
## Attribute usage:
##
## 100.00% checking_balance
## 60.22% foreign_worker
## 57.89% credit_history
## 51.11% months_loan_duration
## 42.67% savings_balance
## 30.44% other_debtors
## 17.78% job
## 15.56% installment_plan
## 14.89% purpose
## 12.89% employment_length
## 10.22% amount
## 6.78% residence_history
## 5.78% housing
## 3.89% dependents
## 3.56% installment_rate
## 3.44% personal_status
## 2.78% age
## 1.56% property
## 1.33% existing_credits
##
##
## Time: 0.0 secs
# Optional visualization (Decision tree)
plot(as.party(credit_model))

# Step 6: Predict using your model
credit_pred <- predict(credit_model, credit_test[-17])
# Evaluate your model using CrossTable
CrossTable(credit_test$default, credit_pred,
prop.chisq = FALSE, prop.c = FALSE, prop.r = FALSE,
dnn = c('Actual Default', 'Predicted Default'))
##
##
## Cell Contents
## |-------------------------|
## | N |
## | N / Table Total |
## |-------------------------|
##
##
## Total Observations in Table: 100
##
##
## | Predicted Default
## Actual Default | 1 | 2 | Row Total |
## ---------------|-----------|-----------|-----------|
## 1 | 54 | 14 | 68 |
## | 0.540 | 0.140 | |
## ---------------|-----------|-----------|-----------|
## 2 | 11 | 21 | 32 |
## | 0.110 | 0.210 | |
## ---------------|-----------|-----------|-----------|
## Column Total | 65 | 35 | 100 |
## ---------------|-----------|-----------|-----------|
##
##