This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
library(readr)
credit <- read_csv("C:/Users/otuata4438/Downloads/credit.csv")
## Rows: 1000 Columns: 21
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (13): checking_balance, credit_history, purpose, savings_balance, employ...
## dbl (8): months_loan_duration, amount, installment_rate, residence_history,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
View(credit)
stringsAsFactors=TRUE
options(repos = c(CRAN = "https://cloud.r-project.org"))
install.packages("C50")
## Installing package into 'C:/Users/otuata4438/AppData/Local/R/win-library/4.4'
## (as 'lib' is unspecified)
## package 'C50' successfully unpacked and MD5 sums checked
##
## The downloaded binary packages are in
## C:\Users\otuata4438\AppData\Local\Temp\Rtmpqsj7Hi\downloaded_packages
library(C50)
## Warning: package 'C50' was built under R version 4.4.3
options(repos = c(CRAN = "https://cloud.r-project.org"))
install.packages("gmodels")
## Installing package into 'C:/Users/otuata4438/AppData/Local/R/win-library/4.4'
## (as 'lib' is unspecified)
## package 'gmodels' successfully unpacked and MD5 sums checked
##
## The downloaded binary packages are in
## C:\Users\otuata4438\AppData\Local\Temp\Rtmpqsj7Hi\downloaded_packages
library(gmodels)
## Warning: package 'gmodels' was built under R version 4.4.3
set.seed(12345)
credit_rand <- credit[order(runif(1000)), ]
summary(credit)
## checking_balance months_loan_duration credit_history purpose
## Length:1000 Min. : 4.0 Length:1000 Length:1000
## Class :character 1st Qu.:12.0 Class :character Class :character
## Mode :character Median :18.0 Mode :character Mode :character
## Mean :20.9
## 3rd Qu.:24.0
## Max. :72.0
## amount savings_balance employment_length installment_rate
## Min. : 250 Length:1000 Length:1000 Min. :1.000
## 1st Qu.: 1366 Class :character Class :character 1st Qu.:2.000
## Median : 2320 Mode :character Mode :character Median :3.000
## Mean : 3271 Mean :2.973
## 3rd Qu.: 3972 3rd Qu.:4.000
## Max. :18424 Max. :4.000
## personal_status other_debtors residence_history property
## Length:1000 Length:1000 Min. :1.000 Length:1000
## Class :character Class :character 1st Qu.:2.000 Class :character
## Mode :character Mode :character Median :3.000 Mode :character
## Mean :2.845
## 3rd Qu.:4.000
## Max. :4.000
## age installment_plan housing existing_credits
## Min. :19.00 Length:1000 Length:1000 Min. :1.000
## 1st Qu.:27.00 Class :character Class :character 1st Qu.:1.000
## Median :33.00 Mode :character Mode :character Median :1.000
## Mean :35.55 Mean :1.407
## 3rd Qu.:42.00 3rd Qu.:2.000
## Max. :75.00 Max. :4.000
## default dependents telephone foreign_worker
## Min. :1.0 Min. :1.000 Length:1000 Length:1000
## 1st Qu.:1.0 1st Qu.:1.000 Class :character Class :character
## Median :1.0 Median :1.000 Mode :character Mode :character
## Mean :1.3 Mean :1.155
## 3rd Qu.:2.0 3rd Qu.:1.000
## Max. :2.0 Max. :2.000
## job
## Length:1000
## Class :character
## Mode :character
##
##
##
summary(credit_rand)
## checking_balance months_loan_duration credit_history purpose
## Length:1000 Min. : 4.0 Length:1000 Length:1000
## Class :character 1st Qu.:12.0 Class :character Class :character
## Mode :character Median :18.0 Mode :character Mode :character
## Mean :20.9
## 3rd Qu.:24.0
## Max. :72.0
## amount savings_balance employment_length installment_rate
## Min. : 250 Length:1000 Length:1000 Min. :1.000
## 1st Qu.: 1366 Class :character Class :character 1st Qu.:2.000
## Median : 2320 Mode :character Mode :character Median :3.000
## Mean : 3271 Mean :2.973
## 3rd Qu.: 3972 3rd Qu.:4.000
## Max. :18424 Max. :4.000
## personal_status other_debtors residence_history property
## Length:1000 Length:1000 Min. :1.000 Length:1000
## Class :character Class :character 1st Qu.:2.000 Class :character
## Mode :character Mode :character Median :3.000 Mode :character
## Mean :2.845
## 3rd Qu.:4.000
## Max. :4.000
## age installment_plan housing existing_credits
## Min. :19.00 Length:1000 Length:1000 Min. :1.000
## 1st Qu.:27.00 Class :character Class :character 1st Qu.:1.000
## Median :33.00 Mode :character Mode :character Median :1.000
## Mean :35.55 Mean :1.407
## 3rd Qu.:42.00 3rd Qu.:2.000
## Max. :75.00 Max. :4.000
## default dependents telephone foreign_worker
## Min. :1.0 Min. :1.000 Length:1000 Length:1000
## 1st Qu.:1.0 1st Qu.:1.000 Class :character Class :character
## Median :1.0 Median :1.000 Mode :character Mode :character
## Mean :1.3 Mean :1.155
## 3rd Qu.:2.0 3rd Qu.:1.000
## Max. :2.0 Max. :2.000
## job
## Length:1000
## Class :character
## Mode :character
##
##
##
head(credit$amount, 10)
## [1] 1169 5951 2096 7882 4870 9055 2835 6948 3059 5234
head(credit_rand$amount, 10)
## [1] 1199 2576 1103 4020 1501 1568 4281 918 2629 1845
credit_train <- credit_rand[1:900, ]
credit_test <- credit_rand[901:1000, ]
prop.table(table(credit_train$default))
##
## 1 2
## 0.7022222 0.2977778
prop.table(table(credit_test$default))
##
## 1 2
## 0.68 0.32
credit_train$default <- as.factor(credit_train$default)
credit_test$default <- as.factor(credit_test$default)
credit_model <- C5.0(credit_train[-17], credit_train$default)
summary(credit_model)
##
## Call:
## C5.0.default(x = credit_train[-17], y = credit_train$default)
##
##
## C5.0 [Release 2.07 GPL Edition] Thu Apr 10 14:16:24 2025
## -------------------------------
##
## Class specified by attribute `outcome'
##
## Read 900 cases (21 attributes) from undefined.data
##
## Decision tree:
##
## checking_balance = unknown: 1 (358/44)
## checking_balance in {< 0 DM,1 - 200 DM,> 200 DM}:
## :...foreign_worker = no:
## :...installment_plan in {none,stores}: 1 (17/1)
## : installment_plan = bank:
## : :...residence_history <= 3: 2 (2)
## : residence_history > 3: 1 (2)
## foreign_worker = yes:
## :...credit_history in {fully repaid,fully repaid this bank}: 2 (61/20)
## credit_history in {critical,repaid,delayed}:
## :...months_loan_duration <= 11: 1 (76/13)
## months_loan_duration > 11:
## :...savings_balance = > 1000 DM: 1 (13)
## savings_balance in {< 100 DM,101 - 500 DM,501 - 1000 DM,
## : unknown}:
## :...checking_balance = > 200 DM:
## :...dependents > 1: 2 (3)
## : dependents <= 1:
## : :...credit_history in {repaid,delayed}: 1 (23/3)
## : credit_history = critical:
## : :...amount <= 2337: 2 (3)
## : amount > 2337: 1 (6)
## checking_balance = < 0 DM:
## :...other_debtors = guarantor:
## : :...credit_history = critical: 2 (1)
## : : credit_history in {repaid,delayed}: 1 (11/1)
## : other_debtors in {none,co-applicant}:
## : :...job = mangement self-employed: 1 (26/6)
## : job in {unskilled resident,skilled employee,
## : : unemployed non-resident}:
## : :...purpose in {radio/tv,others,repairs,
## : : domestic appliances,
## : : retraining}: 2 (33/10)
## : purpose = education: [S1]
## : purpose = business:
## : :...job in {unskilled resident,
## : : : unemployed non-resident}: 1 (3)
## : : job = skilled employee: 2 (3)
## : purpose = car (new): [S2]
## : purpose = car (used):
## : :...amount > 6229: 2 (5)
## : : amount <= 6229: [S3]
## : purpose = furniture:
## : :...months_loan_duration > 27: 2 (9/1)
## : months_loan_duration <= 27: [S4]
## checking_balance = 1 - 200 DM:
## :...savings_balance = unknown: 1 (34/6)
## savings_balance in {< 100 DM,101 - 500 DM,
## : 501 - 1000 DM}:
## :...months_loan_duration > 45: 2 (11/1)
## months_loan_duration <= 45:
## :...installment_plan = stores:
## :...age <= 35: 2 (4)
## : age > 35: 1 (2)
## installment_plan = bank:
## :...residence_history <= 1: 1 (3)
## : residence_history > 1:
## : :...existing_credits <= 1: 2 (5)
## : existing_credits > 1:
## : :...installment_rate > 2: 2 (3)
## : installment_rate <= 2: [S5]
## installment_plan = none:
## :...other_debtors = guarantor: 1 (7/1)
## other_debtors = co-applicant: 2 (3/1)
## other_debtors = none:
## :...employment_length = 4 - 7 yrs:
## :...age <= 41: 1 (16)
## : age > 41: 2 (3/1)
## employment_length in {> 7 yrs,
## : 1 - 4 yrs,
## : 0 - 1 yrs,
## : unemployed}:
## :...amount > 7980: 2 (7)
## amount <= 7980:
## :...amount > 4746: 1 (10)
## amount <= 4746: [S6]
##
## SubTree [S1]
##
## savings_balance in {< 100 DM,101 - 500 DM,501 - 1000 DM}: 2 (6)
## savings_balance = unknown: 1 (2)
##
## SubTree [S2]
##
## savings_balance = 101 - 500 DM: 1 (1)
## savings_balance in {501 - 1000 DM,unknown}: 2 (4)
## savings_balance = < 100 DM:
## :...personal_status in {single male,female,divorced male}: 2 (29/6)
## personal_status = married male: 1 (2)
##
## SubTree [S3]
##
## job = unskilled resident: 2 (1)
## job in {skilled employee,unemployed non-resident}: 1 (8/1)
##
## SubTree [S4]
##
## employment_length in {> 7 yrs,4 - 7 yrs}: 1 (7/1)
## employment_length = unemployed: 2 (2)
## employment_length = 0 - 1 yrs:
## :...job = unskilled resident: 2 (1)
## : job in {skilled employee,unemployed non-resident}: 1 (4)
## employment_length = 1 - 4 yrs:
## :...property in {building society savings,unknown/none}: 1 (5)
## property in {other,real estate}:
## :...residence_history <= 2: 1 (4/1)
## residence_history > 2: 2 (5)
##
## SubTree [S5]
##
## other_debtors in {none,guarantor}: 1 (3)
## other_debtors = co-applicant: 2 (1)
##
## SubTree [S6]
##
## housing = for free: 1 (2)
## housing = rent:
## :...credit_history = critical: 1 (1)
## : credit_history in {repaid,delayed}: 2 (10/2)
## housing = own:
## :...savings_balance = 101 - 500 DM: 1 (6)
## savings_balance in {< 100 DM,501 - 1000 DM}:
## :...residence_history <= 1: 1 (8/1)
## residence_history > 1:
## :...installment_rate <= 1: 1 (2)
## installment_rate > 1:
## :...employment_length in {> 7 yrs,unemployed}: 1 (13/6)
## employment_length in {1 - 4 yrs,0 - 1 yrs}: 2 (10)
##
##
## Evaluation on training data (900 cases):
##
## Decision Tree
## ----------------
## Size Errors
##
## 57 127(14.1%) <<
##
##
## (a) (b) <-classified as
## ---- ----
## 590 42 (a): class 1
## 85 183 (b): class 2
##
##
## Attribute usage:
##
## 100.00% checking_balance
## 60.22% foreign_worker
## 57.89% credit_history
## 51.11% months_loan_duration
## 42.67% savings_balance
## 30.44% other_debtors
## 17.78% job
## 15.56% installment_plan
## 14.89% purpose
## 12.89% employment_length
## 10.22% amount
## 6.78% residence_history
## 5.78% housing
## 3.89% dependents
## 3.56% installment_rate
## 3.44% personal_status
## 2.78% age
## 1.56% property
## 1.33% existing_credits
##
##
## Time: 0.0 secs
credit_pred <- predict(credit_model, credit_test[-17])
CrossTable(credit_test$default, credit_pred,
prop.chisq = FALSE, prop.c = FALSE, prop.r = FALSE,
dnn = c('Actual Default', 'Predicted Default'))
##
##
## Cell Contents
## |-------------------------|
## | N |
## | N / Table Total |
## |-------------------------|
##
##
## Total Observations in Table: 100
##
##
## | Predicted Default
## Actual Default | 1 | 2 | Row Total |
## ---------------|-----------|-----------|-----------|
## 1 | 54 | 14 | 68 |
## | 0.540 | 0.140 | |
## ---------------|-----------|-----------|-----------|
## 2 | 11 | 21 | 32 |
## | 0.110 | 0.210 | |
## ---------------|-----------|-----------|-----------|
## Column Total | 65 | 35 | 100 |
## ---------------|-----------|-----------|-----------|
##
##
credit_boost10 <- C5.0(credit_train[-17], credit_train$default, trials = 10)
credit_boost_pred10 <- predict(credit_boost10, credit_test[-17])
CrossTable(credit_test$default, credit_boost_pred10,
prop.chisq = FALSE, prop.c = FALSE, prop.r = FALSE,
dnn = c('Actual Default', 'Predicted Default'))
##
##
## Cell Contents
## |-------------------------|
## | N |
## | N / Table Total |
## |-------------------------|
##
##
## Total Observations in Table: 100
##
##
## | Predicted Default
## Actual Default | 1 | 2 | Row Total |
## ---------------|-----------|-----------|-----------|
## 1 | 63 | 5 | 68 |
## | 0.630 | 0.050 | |
## ---------------|-----------|-----------|-----------|
## 2 | 16 | 16 | 32 |
## | 0.160 | 0.160 | |
## ---------------|-----------|-----------|-----------|
## Column Total | 79 | 21 | 100 |
## ---------------|-----------|-----------|-----------|
##
##
matrix_dimensions <- list(c("no", "yes"), c("no", "yes"))
names(matrix_dimensions) <- c("predicted", "actual")
error_cost <- matrix(c(0, 1, 4, 0), nrow = 2)
credit_cost <- C5.0(credit_train[-17], credit_train$default, costs = error_cost)
## Warning: no dimnames were given for the cost matrix; the factor levels will be
## used
credit_cost_pred <- predict(credit_cost, credit_test[-17])
CrossTable(credit_test$default, credit_cost_pred,
prop.chisq = FALSE, prop.c = FALSE, prop.r = FALSE,
dnn = c('Actual Default', 'Predicted Default'))
##
##
## Cell Contents
## |-------------------------|
## | N |
## | N / Table Total |
## |-------------------------|
##
##
## Total Observations in Table: 100
##
##
## | Predicted Default
## Actual Default | 1 | 2 | Row Total |
## ---------------|-----------|-----------|-----------|
## 1 | 38 | 30 | 68 |
## | 0.380 | 0.300 | |
## ---------------|-----------|-----------|-----------|
## 2 | 5 | 27 | 32 |
## | 0.050 | 0.270 | |
## ---------------|-----------|-----------|-----------|
## Column Total | 43 | 57 | 100 |
## ---------------|-----------|-----------|-----------|
##
##
matrix_dimensions <- list(c("no", "yes"), c("no", "yes"))
names(matrix_dimensions) <- c("predicted", "actual")
error_cost <- matrix(c(0, 1, 4, 0), nrow = 2)
credit_cost <- C5.0(credit_train[-17], credit_train$default, costs = error_cost)
## Warning: no dimnames were given for the cost matrix; the factor levels will be
## used
credit_cost_pred <- predict(credit_cost, credit_test[-17])
CrossTable(credit_test$default, credit_cost_pred,
prop.chisq = FALSE, prop.c = FALSE, prop.r = FALSE,
dnn = c('Actual Default', 'Predicted Default'))
##
##
## Cell Contents
## |-------------------------|
## | N |
## | N / Table Total |
## |-------------------------|
##
##
## Total Observations in Table: 100
##
##
## | Predicted Default
## Actual Default | 1 | 2 | Row Total |
## ---------------|-----------|-----------|-----------|
## 1 | 38 | 30 | 68 |
## | 0.380 | 0.300 | |
## ---------------|-----------|-----------|-----------|
## 2 | 5 | 27 | 32 |
## | 0.050 | 0.270 | |
## ---------------|-----------|-----------|-----------|
## Column Total | 43 | 57 | 100 |
## ---------------|-----------|-----------|-----------|
##
##
library(partykit )
## Warning: package 'partykit' was built under R version 4.4.3
## Loading required package: grid
## Loading required package: libcoin
## Warning: package 'libcoin' was built under R version 4.4.3
## Loading required package: mvtnorm
## Warning: package 'mvtnorm' was built under R version 4.4.3
plot(credit_model)
## Warning in partysplit(varid = as.integer(i), breaks = as.numeric(j[1]), : NAs
## introduced by coercion
## Warning in partysplit(varid = as.integer(i), breaks = as.numeric(j[1]), : NAs
## introduced by coercion
## Warning in partysplit(varid = as.integer(i), breaks = as.numeric(j[1]), : NAs
## introduced by coercion
## Warning in partysplit(varid = as.integer(i), breaks = as.numeric(j[1]), : NAs
## introduced by coercion
## Warning in partysplit(varid = as.integer(i), breaks = as.numeric(j[1]), : NAs
## introduced by coercion
## Warning in partysplit(varid = as.integer(i), breaks = as.numeric(j[1]), : NAs
## introduced by coercion
## Warning in partysplit(varid = as.integer(i), breaks = as.numeric(j[1]), : NAs
## introduced by coercion
## Warning in partysplit(varid = as.integer(i), breaks = as.numeric(j[1]), : NAs
## introduced by coercion
## Warning in partysplit(varid = as.integer(i), breaks = as.numeric(j[1]), : NAs
## introduced by coercion
## Warning in partysplit(varid = as.integer(i), breaks = as.numeric(j[1]), : NAs
## introduced by coercion
## Warning in partysplit(varid = as.integer(i), breaks = as.numeric(j[1]), : NAs
## introduced by coercion
## Warning in partysplit(varid = as.integer(i), breaks = as.numeric(j[1]), : NAs
## introduced by coercion
## Warning in partysplit(varid = as.integer(i), breaks = as.numeric(j[1]), : NAs
## introduced by coercion
## Warning in partysplit(varid = as.integer(i), breaks = as.numeric(j[1]), : NAs
## introduced by coercion
## Warning in partysplit(varid = as.integer(i), breaks = as.numeric(j[1]), : NAs
## introduced by coercion
## Warning in partysplit(varid = as.integer(i), breaks = as.numeric(j[1]), : NAs
## introduced by coercion
## Warning in partysplit(varid = as.integer(i), breaks = as.numeric(j[1]), : NAs
## introduced by coercion
## Warning in partysplit(varid = as.integer(i), breaks = as.numeric(j[1]), : NAs
## introduced by coercion
## Warning in partysplit(varid = as.integer(i), breaks = as.numeric(j[1]), : NAs
## introduced by coercion
## Warning in partysplit(varid = as.integer(i), breaks = as.numeric(j[1]), : NAs
## introduced by coercion
## Warning in partysplit(varid = as.integer(i), breaks = as.numeric(j[1]), : NAs
## introduced by coercion
## Warning in partysplit(varid = as.integer(i), breaks = as.numeric(j[1]), : NAs
## introduced by coercion
## Warning in partysplit(varid = as.integer(i), breaks = as.numeric(j[1]), : NAs
## introduced by coercion
## Warning in partysplit(varid = as.integer(i), breaks = as.numeric(j[1]), : NAs
## introduced by coercion
## Warning in partysplit(varid = as.integer(i), breaks = as.numeric(j[1]), : NAs
## introduced by coercion
## Warning in .bincode(as.numeric(x), breaks = unique(c(-Inf, breaks_split(split),
## : NAs introduced by coercion
## Warning in .bincode(as.numeric(x), breaks = unique(c(-Inf, breaks_split(split),
## : NAs introduced by coercion
## Warning in .bincode(as.numeric(x), breaks = unique(c(-Inf, breaks_split(split),
## : NAs introduced by coercion
## Warning in .bincode(as.numeric(x), breaks = unique(c(-Inf, breaks_split(split),
## : NAs introduced by coercion
## Warning in .bincode(as.numeric(x), breaks = unique(c(-Inf, breaks_split(split),
## : NAs introduced by coercion
## Warning in .bincode(as.numeric(x), breaks = unique(c(-Inf, breaks_split(split),
## : NAs introduced by coercion
## Warning in .bincode(as.numeric(x), breaks = unique(c(-Inf, breaks_split(split),
## : NAs introduced by coercion
## Warning in .bincode(as.numeric(x), breaks = unique(c(-Inf, breaks_split(split),
## : NAs introduced by coercion
## Warning in .bincode(as.numeric(x), breaks = unique(c(-Inf, breaks_split(split),
## : NAs introduced by coercion
## Warning in .bincode(as.numeric(x), breaks = unique(c(-Inf, breaks_split(split),
## : NAs introduced by coercion
## Warning in .bincode(as.numeric(x), breaks = unique(c(-Inf, breaks_split(split),
## : NAs introduced by coercion
You can also embed plots, for example:
Note that the echo = FALSE
parameter was added to the
code chunk to prevent printing of the R code that generated the
plot.