# Step 1: Load libraries
library(C50)
## Warning: package 'C50' was built under R version 4.3.3
library(gmodels)
## Warning: package 'gmodels' was built under R version 4.3.2
library(partykit)
## Warning: package 'partykit' was built under R version 4.3.3
## Loading required package: grid
## Loading required package: libcoin
## Loading required package: mvtnorm
## Warning: package 'mvtnorm' was built under R version 4.3.3
# Step 2: Load your dataset explicitly
credit <- read.csv("~/Downloads/credit.csv", stringsAsFactors = TRUE)

# Confirm loaded data
str(credit)
## 'data.frame':    1000 obs. of  21 variables:
##  $ checking_balance    : Factor w/ 4 levels "< 0 DM","> 200 DM",..: 1 3 4 1 1 4 4 3 4 3 ...
##  $ months_loan_duration: int  6 48 12 42 24 36 24 36 12 30 ...
##  $ credit_history      : Factor w/ 5 levels "critical","delayed",..: 1 5 1 5 2 5 5 5 5 1 ...
##  $ purpose             : Factor w/ 10 levels "business","car (new)",..: 8 8 5 6 2 5 6 3 8 2 ...
##  $ amount              : int  1169 5951 2096 7882 4870 9055 2835 6948 3059 5234 ...
##  $ savings_balance     : Factor w/ 5 levels "< 100 DM","> 1000 DM",..: 5 1 1 1 1 5 4 1 2 1 ...
##  $ employment_length   : Factor w/ 5 levels "> 7 yrs","0 - 1 yrs",..: 1 3 4 4 3 3 1 3 4 5 ...
##  $ installment_rate    : int  4 2 2 2 3 2 3 2 2 4 ...
##  $ personal_status     : Factor w/ 4 levels "divorced male",..: 4 2 4 4 4 4 4 4 1 3 ...
##  $ other_debtors       : Factor w/ 3 levels "co-applicant",..: 3 3 3 2 3 3 3 3 3 3 ...
##  $ residence_history   : int  4 2 3 4 4 4 4 2 4 2 ...
##  $ property            : Factor w/ 4 levels "building society savings",..: 3 3 3 1 4 4 1 2 3 2 ...
##  $ age                 : int  67 22 49 45 53 35 53 35 61 28 ...
##  $ installment_plan    : Factor w/ 3 levels "bank","none",..: 2 2 2 2 2 2 2 2 2 2 ...
##  $ housing             : Factor w/ 3 levels "for free","own",..: 2 2 2 1 1 1 2 3 2 2 ...
##  $ existing_credits    : int  2 1 1 1 2 1 1 1 1 2 ...
##  $ default             : int  1 2 1 1 2 1 1 1 1 2 ...
##  $ dependents          : int  1 1 2 2 2 2 1 1 1 1 ...
##  $ telephone           : Factor w/ 2 levels "none","yes": 2 1 1 1 1 2 1 2 1 1 ...
##  $ foreign_worker      : Factor w/ 2 levels "no","yes": 2 2 2 2 2 2 2 2 2 2 ...
##  $ job                 : Factor w/ 4 levels "mangement self-employed",..: 2 2 4 2 2 4 2 1 4 1 ...
# Step 3: Randomize the dataset explicitly
set.seed(12345)
credit_rand <- credit[order(runif(1000)), ]

# Check the randomized data briefly
summary(credit_rand)
##    checking_balance months_loan_duration                credit_history
##  < 0 DM    :274     Min.   : 4.0         critical              :293   
##  > 200 DM  : 63     1st Qu.:12.0         delayed               : 88   
##  1 - 200 DM:269     Median :18.0         fully repaid          : 40   
##  unknown   :394     Mean   :20.9         fully repaid this bank: 49   
##                     3rd Qu.:24.0         repaid                :530   
##                     Max.   :72.0                                      
##                                                                       
##        purpose        amount           savings_balance  employment_length
##  radio/tv  :280   Min.   :  250   < 100 DM     :603    > 7 yrs   :253    
##  car (new) :234   1st Qu.: 1366   > 1000 DM    : 48    0 - 1 yrs :172    
##  furniture :181   Median : 2320   101 - 500 DM :103    1 - 4 yrs :339    
##  car (used):103   Mean   : 3271   501 - 1000 DM: 63    4 - 7 yrs :174    
##  business  : 97   3rd Qu.: 3972   unknown      :183    unemployed: 62    
##  education : 50   Max.   :18424                                          
##  (Other)   : 55                                                          
##  installment_rate      personal_status      other_debtors residence_history
##  Min.   :1.000    divorced male: 50    co-applicant: 41   Min.   :1.000    
##  1st Qu.:2.000    female       :310    guarantor   : 52   1st Qu.:2.000    
##  Median :3.000    married male : 92    none        :907   Median :3.000    
##  Mean   :2.973    single male  :548                       Mean   :2.845    
##  3rd Qu.:4.000                                            3rd Qu.:4.000    
##  Max.   :4.000                                            Max.   :4.000    
##                                                                            
##                      property        age        installment_plan     housing   
##  building society savings:232   Min.   :19.00   bank  :139       for free:108  
##  other                   :332   1st Qu.:27.00   none  :814       own     :713  
##  real estate             :282   Median :33.00   stores: 47       rent    :179  
##  unknown/none            :154   Mean   :35.55                                  
##                                 3rd Qu.:42.00                                  
##                                 Max.   :75.00                                  
##                                                                                
##  existing_credits    default      dependents    telephone  foreign_worker
##  Min.   :1.000    Min.   :1.0   Min.   :1.000   none:596   no : 37       
##  1st Qu.:1.000    1st Qu.:1.0   1st Qu.:1.000   yes :404   yes:963       
##  Median :1.000    Median :1.0   Median :1.000                            
##  Mean   :1.407    Mean   :1.3   Mean   :1.155                            
##  3rd Qu.:2.000    3rd Qu.:2.0   3rd Qu.:1.000                            
##  Max.   :4.000    Max.   :2.0   Max.   :2.000                            
##                                                                          
##                       job     
##  mangement self-employed:148  
##  skilled employee       :630  
##  unemployed non-resident: 22  
##  unskilled resident     :200  
##                               
##                               
## 
head(credit[1:10], 10)
##    checking_balance months_loan_duration credit_history    purpose amount
## 1            < 0 DM                    6       critical   radio/tv   1169
## 2        1 - 200 DM                   48         repaid   radio/tv   5951
## 3           unknown                   12       critical  education   2096
## 4            < 0 DM                   42         repaid  furniture   7882
## 5            < 0 DM                   24        delayed  car (new)   4870
## 6           unknown                   36         repaid  education   9055
## 7           unknown                   24         repaid  furniture   2835
## 8        1 - 200 DM                   36         repaid car (used)   6948
## 9           unknown                   12         repaid   radio/tv   3059
## 10       1 - 200 DM                   30       critical  car (new)   5234
##    savings_balance employment_length installment_rate personal_status
## 1          unknown           > 7 yrs                4     single male
## 2         < 100 DM         1 - 4 yrs                2          female
## 3         < 100 DM         4 - 7 yrs                2     single male
## 4         < 100 DM         4 - 7 yrs                2     single male
## 5         < 100 DM         1 - 4 yrs                3     single male
## 6          unknown         1 - 4 yrs                2     single male
## 7    501 - 1000 DM           > 7 yrs                3     single male
## 8         < 100 DM         1 - 4 yrs                2     single male
## 9        > 1000 DM         4 - 7 yrs                2   divorced male
## 10        < 100 DM        unemployed                4    married male
##    other_debtors
## 1           none
## 2           none
## 3           none
## 4      guarantor
## 5           none
## 6           none
## 7           none
## 8           none
## 9           none
## 10          none
head(credit_rand[1:10], 10)
##     checking_balance months_loan_duration credit_history   purpose amount
## 14            < 0 DM                   24       critical car (new)   1199
## 448       1 - 200 DM                    7         repaid  radio/tv   2576
## 697       1 - 200 DM                   12         repaid  radio/tv   1103
## 32            < 0 DM                   24         repaid furniture   4020
## 196       1 - 200 DM                    9       critical education   1501
## 83           unknown                   18         repaid  business   1568
## 119           < 0 DM                   33       critical furniture   4281
## 602       1 - 200 DM                    9         repaid furniture    918
## 443       1 - 200 DM                   20        delayed    others   2629
## 945           < 0 DM                   15         repaid furniture   1845
##     savings_balance employment_length installment_rate personal_status
## 14         < 100 DM           > 7 yrs                4     single male
## 448        < 100 DM         1 - 4 yrs                2     single male
## 697        < 100 DM         4 - 7 yrs                4     single male
## 32         < 100 DM         1 - 4 yrs                2     single male
## 196        < 100 DM           > 7 yrs                2          female
## 83     101 - 500 DM         1 - 4 yrs                3          female
## 119   501 - 1000 DM         1 - 4 yrs                1          female
## 602        < 100 DM         1 - 4 yrs                4          female
## 443        < 100 DM         1 - 4 yrs                2     single male
## 945        < 100 DM         0 - 1 yrs                4          female
##     other_debtors
## 14           none
## 448     guarantor
## 697     guarantor
## 32           none
## 196          none
## 83           none
## 119          none
## 602          none
## 443          none
## 945     guarantor
# Step 4: Split into training and testing sets
credit_train <- credit_rand[1:900, ]
credit_test <- credit_rand[901:1000, ]

# Explicitly factorize your target
credit_train$default <- as.factor(credit_train$default)
credit_test$default <- as.factor(credit_test$default)

# Step 5: Train the C5.0 model
credit_model <- C5.0(credit_train[-17], credit_train$default)

# Verify your model summary
summary(credit_model)
## 
## Call:
## C5.0.default(x = credit_train[-17], y = credit_train$default)
## 
## 
## C5.0 [Release 2.07 GPL Edition]      Thu Apr 10 14:46:15 2025
## -------------------------------
## 
## Class specified by attribute `outcome'
## 
## Read 900 cases (21 attributes) from undefined.data
## 
## Decision tree:
## 
## checking_balance = unknown: 1 (358/44)
## checking_balance in {< 0 DM,> 200 DM,1 - 200 DM}:
## :...foreign_worker = no:
##     :...installment_plan in {none,stores}: 1 (17/1)
##     :   installment_plan = bank:
##     :   :...residence_history <= 3: 2 (2)
##     :       residence_history > 3: 1 (2)
##     foreign_worker = yes:
##     :...credit_history in {fully repaid,fully repaid this bank}: 2 (61/20)
##         credit_history in {critical,delayed,repaid}:
##         :...months_loan_duration <= 11: 1 (76/13)
##             months_loan_duration > 11:
##             :...savings_balance = > 1000 DM: 1 (13)
##                 savings_balance in {< 100 DM,101 - 500 DM,501 - 1000 DM,
##                 :                   unknown}:
##                 :...checking_balance = > 200 DM:
##                     :...dependents > 1: 2 (3)
##                     :   dependents <= 1:
##                     :   :...credit_history in {delayed,repaid}: 1 (23/3)
##                     :       credit_history = critical:
##                     :       :...amount <= 2337: 2 (3)
##                     :           amount > 2337: 1 (6)
##                     checking_balance = < 0 DM:
##                     :...other_debtors = guarantor:
##                     :   :...credit_history = critical: 2 (1)
##                     :   :   credit_history in {delayed,repaid}: 1 (11/1)
##                     :   other_debtors in {co-applicant,none}:
##                     :   :...job = mangement self-employed: 1 (26/6)
##                     :       job in {skilled employee,unemployed non-resident,
##                     :       :       unskilled resident}:
##                     :       :...purpose in {domestic appliances,others,
##                     :           :           radio/tv,repairs,
##                     :           :           retraining}: 2 (33/10)
##                     :           purpose = business:
##                     :           :...job = skilled employee: 2 (3)
##                     :           :   job in {unemployed non-resident,
##                     :           :           unskilled resident}: 1 (3)
##                     :           purpose = education: [S1]
##                     :           purpose = car (new): [S2]
##                     :           purpose = car (used):
##                     :           :...amount > 6229: 2 (5)
##                     :           :   amount <= 6229: [S3]
##                     :           purpose = furniture:
##                     :           :...months_loan_duration > 27: 2 (9/1)
##                     :               months_loan_duration <= 27: [S4]
##                     checking_balance = 1 - 200 DM:
##                     :...savings_balance = unknown: 1 (34/6)
##                         savings_balance in {< 100 DM,101 - 500 DM,
##                         :                   501 - 1000 DM}:
##                         :...months_loan_duration > 45: 2 (11/1)
##                             months_loan_duration <= 45:
##                             :...installment_plan = stores:
##                                 :...age <= 35: 2 (4)
##                                 :   age > 35: 1 (2)
##                                 installment_plan = bank:
##                                 :...residence_history <= 1: 1 (3)
##                                 :   residence_history > 1:
##                                 :   :...existing_credits <= 1: 2 (5)
##                                 :       existing_credits > 1:
##                                 :       :...installment_rate > 2: 2 (3)
##                                 :           installment_rate <= 2: [S5]
##                                 installment_plan = none:
##                                 :...other_debtors = co-applicant: 2 (3/1)
##                                     other_debtors = guarantor: 1 (7/1)
##                                     other_debtors = none:
##                                     :...employment_length = 4 - 7 yrs:
##                                         :...age <= 41: 1 (16)
##                                         :   age > 41: 2 (3/1)
##                                         employment_length in {> 7 yrs,
##                                         :                     0 - 1 yrs,
##                                         :                     1 - 4 yrs,
##                                         :                     unemployed}:
##                                         :...amount > 7980: 2 (7)
##                                             amount <= 7980:
##                                             :...amount > 4746: 1 (10)
##                                                 amount <= 4746: [S6]
## 
## SubTree [S1]
## 
## savings_balance in {< 100 DM,101 - 500 DM,501 - 1000 DM}: 2 (6)
## savings_balance = unknown: 1 (2)
## 
## SubTree [S2]
## 
## savings_balance = 101 - 500 DM: 1 (1)
## savings_balance in {501 - 1000 DM,unknown}: 2 (4)
## savings_balance = < 100 DM:
## :...personal_status in {divorced male,female,single male}: 2 (29/6)
##     personal_status = married male: 1 (2)
## 
## SubTree [S3]
## 
## job in {skilled employee,unemployed non-resident}: 1 (8/1)
## job = unskilled resident: 2 (1)
## 
## SubTree [S4]
## 
## employment_length in {> 7 yrs,4 - 7 yrs}: 1 (7/1)
## employment_length = unemployed: 2 (2)
## employment_length = 0 - 1 yrs:
## :...job in {skilled employee,unemployed non-resident}: 1 (4)
## :   job = unskilled resident: 2 (1)
## employment_length = 1 - 4 yrs:
## :...property in {building society savings,unknown/none}: 1 (5)
##     property in {other,real estate}:
##     :...residence_history <= 2: 1 (4/1)
##         residence_history > 2: 2 (5)
## 
## SubTree [S5]
## 
## other_debtors = co-applicant: 2 (1)
## other_debtors in {guarantor,none}: 1 (3)
## 
## SubTree [S6]
## 
## housing = for free: 1 (2)
## housing = rent:
## :...credit_history = critical: 1 (1)
## :   credit_history in {delayed,repaid}: 2 (10/2)
## housing = own:
## :...savings_balance = 101 - 500 DM: 1 (6)
##     savings_balance in {< 100 DM,501 - 1000 DM}:
##     :...residence_history <= 1: 1 (8/1)
##         residence_history > 1:
##         :...installment_rate <= 1: 1 (2)
##             installment_rate > 1:
##             :...employment_length in {> 7 yrs,unemployed}: 1 (13/6)
##                 employment_length in {0 - 1 yrs,1 - 4 yrs}: 2 (10)
## 
## 
## Evaluation on training data (900 cases):
## 
##      Decision Tree   
##    ----------------  
##    Size      Errors  
## 
##      57  127(14.1%)   <<
## 
## 
##     (a)   (b)    <-classified as
##    ----  ----
##     590    42    (a): class 1
##      85   183    (b): class 2
## 
## 
##  Attribute usage:
## 
##  100.00% checking_balance
##   60.22% foreign_worker
##   57.89% credit_history
##   51.11% months_loan_duration
##   42.67% savings_balance
##   30.44% other_debtors
##   17.78% job
##   15.56% installment_plan
##   14.89% purpose
##   12.89% employment_length
##   10.22% amount
##    6.78% residence_history
##    5.78% housing
##    3.89% dependents
##    3.56% installment_rate
##    3.44% personal_status
##    2.78% age
##    1.56% property
##    1.33% existing_credits
## 
## 
## Time: 0.0 secs
# Optional visualization (Decision tree)
plot(as.party(credit_model))

# Step 6: Predict using your model
credit_pred <- predict(credit_model, credit_test[-17])

# Evaluate your model using CrossTable
CrossTable(credit_test$default, credit_pred,
           prop.chisq = FALSE, prop.c = FALSE, prop.r = FALSE,
           dnn = c('Actual Default', 'Predicted Default'))
## 
##  
##    Cell Contents
## |-------------------------|
## |                       N |
## |         N / Table Total |
## |-------------------------|
## 
##  
## Total Observations in Table:  100 
## 
##  
##                | Predicted Default 
## Actual Default |         1 |         2 | Row Total | 
## ---------------|-----------|-----------|-----------|
##              1 |        54 |        14 |        68 | 
##                |     0.540 |     0.140 |           | 
## ---------------|-----------|-----------|-----------|
##              2 |        11 |        21 |        32 | 
##                |     0.110 |     0.210 |           | 
## ---------------|-----------|-----------|-----------|
##   Column Total |        65 |        35 |       100 | 
## ---------------|-----------|-----------|-----------|
## 
##