Abstract
決定木と分類機融資データから焦げつくかどうかを予測する
# データ収集
credit <- read.csv("credit.csv") %>%
mutate(default = as.factor(default))
glimpse(credit)
## Rows: 1,000
## Columns: 17
## $ checking_balance <chr> "< 0 DM", "1 - 200 DM", "unknown", "< 0 DM", "<…
## $ months_loan_duration <int> 6, 48, 12, 42, 24, 36, 24, 36, 12, 30, 12, 48, …
## $ credit_history <chr> "critical", "good", "critical", "good", "poor",…
## $ purpose <chr> "furniture/appliances", "furniture/appliances",…
## $ amount <int> 1169, 5951, 2096, 7882, 4870, 9055, 2835, 6948,…
## $ savings_balance <chr> "unknown", "< 100 DM", "< 100 DM", "< 100 DM", …
## $ employment_duration <chr> "> 7 years", "1 - 4 years", "4 - 7 years", "4 -…
## $ percent_of_income <int> 4, 2, 2, 2, 3, 2, 3, 2, 2, 4, 3, 3, 1, 4, 2, 4,…
## $ years_at_residence <int> 4, 2, 3, 4, 4, 4, 4, 2, 4, 2, 1, 4, 1, 4, 4, 2,…
## $ age <int> 67, 22, 49, 45, 53, 35, 53, 35, 61, 28, 25, 24,…
## $ other_credit <chr> "none", "none", "none", "none", "none", "none",…
## $ housing <chr> "own", "own", "own", "other", "other", "other",…
## $ existing_loans_count <int> 2, 1, 1, 1, 2, 1, 1, 1, 1, 2, 1, 1, 1, 2, 1, 1,…
## $ job <chr> "skilled", "skilled", "unskilled", "skilled", "…
## $ dependents <int> 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,…
## $ phone <chr> "yes", "no", "no", "no", "no", "yes", "no", "ye…
## $ default <fct> no, yes, no, no, yes, no, no, no, no, yes, yes,…
## 融資の30%が焦げ付いている
table(credit$default)
##
## no yes
## 700 300
# 前処理
## ランダムサンプリング
set.seed(1)
### 1~1000の中で900個とる
train_sample <- sample(1000, 900)
### 訓練データ
credit_train <- credit[train_sample, ]
### テストデータ
credit_test <- credit[-train_sample, ]
### 双方とも比率はほぼ同じ
prop.table(table(credit_train$default))
##
## no yes
## 0.7033333 0.2966667
prop.table(table(credit_test$default))
##
## no yes
## 0.67 0.33
# モデルを訓練する
library(C50)
## 目的変数を除外し, 因子ベクトルとして設定する
credit_model <- C5.0(credit_train[-17], credit_train$default,
trials = 1, costs = NULL)
credit_model
##
## Call:
## C5.0.default(x = credit_train[-17], y = credit_train$default, trials = 1,
## costs = NULL)
##
## Classification Tree
## Number of samples: 900
## Number of predictors: 16
##
## Tree size: 51
##
## Non-standard options: attempt to group attributes
決定の深さが51であることがわかり, 特徴量は16個
## 決定木の可視化
library(partykit)
## Loading required package: grid
## Loading required package: libcoin
## Loading required package: mvtnorm
credit_model %>%
plot()
## Warning in partysplit(varid = as.integer(i), breaks = as.numeric(j[1]), : 強制変
## 換により NA が生成されました
## Warning in partysplit(varid = as.integer(i), breaks = as.numeric(j[1]), : 強制変
## 換により NA が生成されました
## Warning in partysplit(varid = as.integer(i), breaks = as.numeric(j[1]), : 強制変
## 換により NA が生成されました
## Warning in partysplit(varid = as.integer(i), breaks = as.numeric(j[1]), : 強制変
## 換により NA が生成されました
## Warning in partysplit(varid = as.integer(i), breaks = as.numeric(j[1]), : 強制変
## 換により NA が生成されました
## Warning in partysplit(varid = as.integer(i), breaks = as.numeric(j[1]), : 強制変
## 換により NA が生成されました
## Warning in partysplit(varid = as.integer(i), breaks = as.numeric(j[1]), : 強制変
## 換により NA が生成されました
## Warning in partysplit(varid = as.integer(i), breaks = as.numeric(j[1]), : 強制変
## 換により NA が生成されました
## Warning in partysplit(varid = as.integer(i), breaks = as.numeric(j[1]), : 強制変
## 換により NA が生成されました
## Warning in partysplit(varid = as.integer(i), breaks = as.numeric(j[1]), : 強制変
## 換により NA が生成されました
## Warning in partysplit(varid = as.integer(i), breaks = as.numeric(j[1]), : 強制変
## 換により NA が生成されました
## Warning in partysplit(varid = as.integer(i), breaks = as.numeric(j[1]), : 強制変
## 換により NA が生成されました
## Warning in partysplit(varid = as.integer(i), breaks = as.numeric(j[1]), : 強制変
## 換により NA が生成されました
## Warning in partysplit(varid = as.integer(i), breaks = as.numeric(j[1]), : 強制変
## 換により NA が生成されました
## Warning in partysplit(varid = as.integer(i), breaks = as.numeric(j[1]), : 強制変
## 換により NA が生成されました
## Warning in partysplit(varid = as.integer(i), breaks = as.numeric(j[1]), : 強制変
## 換により NA が生成されました
## Warning in partysplit(varid = as.integer(i), breaks = as.numeric(j[1]), : 強制変
## 換により NA が生成されました
## Warning in partysplit(varid = as.integer(i), breaks = as.numeric(j[1]), : 強制変
## 換により NA が生成されました
## Warning in .bincode(as.numeric(x), breaks = unique(c(-Inf,
## breaks_split(split), : 強制変換により NA が生成されました
## Warning in .bincode(as.numeric(x), breaks = unique(c(-Inf,
## breaks_split(split), : 強制変換により NA が生成されました
## Warning in .bincode(as.numeric(x), breaks = unique(c(-Inf,
## breaks_split(split), : 強制変換により NA が生成されました
## Warning in .bincode(as.numeric(x), breaks = unique(c(-Inf,
## breaks_split(split), : 強制変換により NA が生成されました
## Warning in .bincode(as.numeric(x), breaks = unique(c(-Inf,
## breaks_split(split), : 強制変換により NA が生成されました
## Warning in .bincode(as.numeric(x), breaks = unique(c(-Inf,
## breaks_split(split), : 強制変換により NA が生成されました
## Warning in .bincode(as.numeric(x), breaks = unique(c(-Inf,
## breaks_split(split), : 強制変換により NA が生成されました
## 結果の解釈
summary(credit_model)
##
## Call:
## C5.0.default(x = credit_train[-17], y = credit_train$default, trials = 1,
## costs = NULL)
##
##
## C5.0 [Release 2.07 GPL Edition] Tue Mar 23 00:54:23 2021
## -------------------------------
##
## Class specified by attribute `outcome'
##
## Read 900 cases (17 attributes) from undefined.data
##
## Decision tree:
##
## checking_balance in {unknown,> 200 DM}: no (418/53)
## checking_balance in {< 0 DM,1 - 200 DM}:
## :...credit_history in {perfect,very good}:
## :...dependents > 1: yes (10)
## : dependents <= 1:
## : :...savings_balance in {< 100 DM,100 - 500 DM,
## : : > 1000 DM}: yes (43/12)
## : savings_balance in {unknown,500 - 1000 DM}: no (8/1)
## credit_history in {good,critical,poor}:
## :...months_loan_duration <= 22:
## :...employment_duration = 4 - 7 years: no (36/3)
## : employment_duration in {1 - 4 years,> 7 years,unemployed,< 1 year}:
## : :...purpose in {business,car0,renovations}: no (26/3)
## : purpose = education:
## : :...savings_balance in {< 100 DM,100 - 500 DM,500 - 1000 DM,
## : : : > 1000 DM}: yes (6)
## : : savings_balance = unknown: no (4)
## : purpose = furniture/appliances:
## : :...savings_balance = 100 - 500 DM: yes (3)
## : : savings_balance in {500 - 1000 DM,
## : : : > 1000 DM}: no (10/1)
## : : savings_balance = unknown:
## : : :...credit_history in {critical,poor}: no (3)
## : : : credit_history = good:
## : : : :...years_at_residence <= 3: yes (6/1)
## : : : years_at_residence > 3: no (2)
## : : savings_balance = < 100 DM:
## : : :...months_loan_duration <= 16: no (54/11)
## : : months_loan_duration > 16:
## : : :...checking_balance = 1 - 200 DM: no (9/2)
## : : checking_balance = < 0 DM: [S1]
## : purpose = car:
## : :...other_credit = store: no (0)
## : other_credit = bank: yes (11/3)
## : other_credit = none:
## : :...credit_history in {critical,poor}: no (20/2)
## : credit_history = good:
## : :...years_at_residence <= 1: yes (4)
## : years_at_residence > 1:
## : :...housing = rent: no (8/2)
## : housing = other: yes (5/1)
## : housing = own:
## : :...age <= 25: yes (4)
## : age > 25: no (15/4)
## months_loan_duration > 22:
## :...savings_balance = 500 - 1000 DM: yes (3/1)
## savings_balance = > 1000 DM: no (4/1)
## savings_balance = 100 - 500 DM:
## :...employment_duration in {1 - 4 years,< 1 year}: yes (13/2)
## : employment_duration in {> 7 years,unemployed,
## : 4 - 7 years}: no (12/3)
## savings_balance = unknown:
## :...checking_balance = < 0 DM: yes (12/4)
## : checking_balance = 1 - 200 DM: no (17/1)
## savings_balance = < 100 DM:
## :...months_loan_duration > 47: yes (19/2)
## months_loan_duration <= 47:
## :...housing = other:
## :...percent_of_income <= 2: no (5)
## : percent_of_income > 2: yes (9/3)
## housing = rent:
## :...other_credit = store: yes (0)
## : other_credit = bank: no (1)
## : other_credit = none:
## : :...percent_of_income > 2: yes (10/1)
## : percent_of_income <= 2:
## : :...years_at_residence <= 3: no (3)
## : years_at_residence > 3: yes (2)
## housing = own:
## :...employment_duration = > 7 years: no (14/5)
## employment_duration = 4 - 7 years: yes (9/1)
## employment_duration = unemployed:
## :...years_at_residence <= 2: yes (4)
## : years_at_residence > 2: no (3)
## employment_duration = 1 - 4 years:
## :...purpose in {furniture/appliances,
## : : renovations}: no (7)
## : purpose in {car,business,education,car0}:
## : :...years_at_residence <= 3: yes (10/1)
## : years_at_residence > 3: no (2)
## employment_duration = < 1 year:
## :...years_at_residence > 3: yes (7)
## years_at_residence <= 3:
## :...other_credit = bank: no (0)
## other_credit = store: yes (1)
## other_credit = none:
## :...checking_balance = < 0 DM: yes (6/2)
## checking_balance = 1 - 200 DM: no (8/2)
##
## SubTree [S1]
##
## employment_duration in {1 - 4 years,unemployed}: yes (10)
## employment_duration in {> 7 years,< 1 year}: no (4)
##
##
## Evaluation on training data (900 cases):
##
## Decision Tree
## ----------------
## Size Errors
##
## 48 128(14.2%) <<
##
##
## (a) (b) <-classified as
## ---- ----
## 599 34 (a): class no
## 94 173 (b): class yes
##
##
## Attribute usage:
##
## 100.00% checking_balance
## 53.56% credit_history
## 46.78% months_loan_duration
## 38.11% savings_balance
## 37.33% employment_duration
## 24.78% purpose
## 14.78% housing
## 10.89% other_credit
## 10.00% years_at_residence
## 6.78% dependents
## 3.22% percent_of_income
## 2.11% age
##
##
## Time: 0.0 secs
900件中, 772件的中している(約86%). この中で本当はyesなのにnoと誤分類(偽陽性)されているものが34件あり, これは融資リスクとして大きい. 理想はこれを減らすこと.
# モデルの性能を評価する
credit_pred <- predict(credit_model, credit_test)
library(gmodels)
CrossTable(credit_test$default, credit_pred,
prop.chisq = FALSE, prop.c = FALSE, prop.t = FALSE,
dnn = c("actual default", "predicted default"))
##
##
## Cell Contents
## |-------------------------|
## | N |
## | N / Row Total |
## |-------------------------|
##
##
## Total Observations in Table: 100
##
##
## | predicted default
## actual default | no | yes | Row Total |
## ---------------|-----------|-----------|-----------|
## no | 63 | 4 | 67 |
## | 0.940 | 0.060 | 0.670 |
## ---------------|-----------|-----------|-----------|
## yes | 17 | 16 | 33 |
## | 0.515 | 0.485 | 0.330 |
## ---------------|-----------|-----------|-----------|
## Column Total | 80 | 20 | 100 |
## ---------------|-----------|-----------|-----------|
##
##
79%の精度で予測できている.
# モデルの性能を向上させる
##決定木の最大数を設定(決定木を縮小)
credit_model10 <- C5.0(credit_train[-17], credit_train$default, trials = 10)
summary(credit_model10)
##
## Call:
## C5.0.default(x = credit_train[-17], y = credit_train$default, trials = 10)
##
##
## C5.0 [Release 2.07 GPL Edition] Tue Mar 23 00:54:28 2021
## -------------------------------
##
## Class specified by attribute `outcome'
##
## Read 900 cases (17 attributes) from undefined.data
##
## ----- Trial 0: -----
##
## Decision tree:
##
## checking_balance in {unknown,> 200 DM}: no (418/53)
## checking_balance in {< 0 DM,1 - 200 DM}:
## :...credit_history in {perfect,very good}:
## :...dependents > 1: yes (10)
## : dependents <= 1:
## : :...savings_balance in {< 100 DM,100 - 500 DM,
## : : > 1000 DM}: yes (43/12)
## : savings_balance in {unknown,500 - 1000 DM}: no (8/1)
## credit_history in {good,critical,poor}:
## :...months_loan_duration <= 22:
## :...employment_duration = 4 - 7 years: no (36/3)
## : employment_duration in {1 - 4 years,> 7 years,unemployed,< 1 year}:
## : :...purpose in {business,car0,renovations}: no (26/3)
## : purpose = education:
## : :...savings_balance in {< 100 DM,100 - 500 DM,500 - 1000 DM,
## : : : > 1000 DM}: yes (6)
## : : savings_balance = unknown: no (4)
## : purpose = furniture/appliances:
## : :...savings_balance = 100 - 500 DM: yes (3)
## : : savings_balance in {500 - 1000 DM,
## : : : > 1000 DM}: no (10/1)
## : : savings_balance = unknown:
## : : :...credit_history in {critical,poor}: no (3)
## : : : credit_history = good:
## : : : :...years_at_residence <= 3: yes (6/1)
## : : : years_at_residence > 3: no (2)
## : : savings_balance = < 100 DM:
## : : :...months_loan_duration <= 16: no (54/11)
## : : months_loan_duration > 16:
## : : :...checking_balance = 1 - 200 DM: no (9/2)
## : : checking_balance = < 0 DM: [S1]
## : purpose = car:
## : :...other_credit = store: no (0)
## : other_credit = bank: yes (11/3)
## : other_credit = none:
## : :...credit_history in {critical,poor}: no (20/2)
## : credit_history = good:
## : :...years_at_residence <= 1: yes (4)
## : years_at_residence > 1:
## : :...housing = rent: no (8/2)
## : housing = other: yes (5/1)
## : housing = own:
## : :...age <= 25: yes (4)
## : age > 25: no (15/4)
## months_loan_duration > 22:
## :...savings_balance = 500 - 1000 DM: yes (3/1)
## savings_balance = > 1000 DM: no (4/1)
## savings_balance = 100 - 500 DM:
## :...employment_duration in {1 - 4 years,< 1 year}: yes (13/2)
## : employment_duration in {> 7 years,unemployed,
## : 4 - 7 years}: no (12/3)
## savings_balance = unknown:
## :...checking_balance = < 0 DM: yes (12/4)
## : checking_balance = 1 - 200 DM: no (17/1)
## savings_balance = < 100 DM:
## :...months_loan_duration > 47: yes (19/2)
## months_loan_duration <= 47:
## :...housing = other:
## :...percent_of_income <= 2: no (5)
## : percent_of_income > 2: yes (9/3)
## housing = rent:
## :...other_credit = store: yes (0)
## : other_credit = bank: no (1)
## : other_credit = none:
## : :...percent_of_income > 2: yes (10/1)
## : percent_of_income <= 2:
## : :...years_at_residence <= 3: no (3)
## : years_at_residence > 3: yes (2)
## housing = own:
## :...employment_duration = > 7 years: no (14/5)
## employment_duration = 4 - 7 years: yes (9/1)
## employment_duration = unemployed:
## :...years_at_residence <= 2: yes (4)
## : years_at_residence > 2: no (3)
## employment_duration = 1 - 4 years:
## :...purpose in {furniture/appliances,
## : : renovations}: no (7)
## : purpose in {car,business,education,car0}:
## : :...years_at_residence <= 3: yes (10/1)
## : years_at_residence > 3: no (2)
## employment_duration = < 1 year:
## :...years_at_residence > 3: yes (7)
## years_at_residence <= 3:
## :...other_credit = bank: no (0)
## other_credit = store: yes (1)
## other_credit = none:
## :...checking_balance = < 0 DM: yes (6/2)
## checking_balance = 1 - 200 DM: no (8/2)
##
## SubTree [S1]
##
## employment_duration in {1 - 4 years,unemployed}: yes (10)
## employment_duration in {> 7 years,< 1 year}: no (4)
##
## ----- Trial 1: -----
##
## Decision tree:
##
## months_loan_duration <= 7: no (66.2/8.5)
## months_loan_duration > 7:
## :...checking_balance = unknown:
## :...other_credit in {bank,store}:
## : :...employment_duration = 4 - 7 years: no (7.1)
## : : employment_duration in {1 - 4 years,> 7 years,unemployed,< 1 year}:
## : : :...purpose in {car,business,education,renovations}: yes (38.1/8.7)
## : : purpose in {furniture/appliances,car0}: no (21/6.8)
## : other_credit = none:
## : :...age > 31: no (131.7/9)
## : age <= 31:
## : :...amount > 6458: yes (15.2/4)
## : amount <= 6458:
## : :...credit_history in {perfect,critical,
## : : very good}: no (20.6)
## : credit_history in {good,poor}:
## : :...percent_of_income <= 1: no (5.5)
## : percent_of_income > 1:
## : :...age <= 23: yes (16.7/3.2)
## : age > 23: no (51.4/15.8)
## checking_balance in {< 0 DM,1 - 200 DM,> 200 DM}:
## :...savings_balance in {unknown,500 - 1000 DM,> 1000 DM}:
## :...purpose in {business,education}: no (15/1.6)
## : purpose in {car0,renovations}: yes (5.3/0.8)
## : purpose = car:
## : :...amount <= 1424: yes (10.7/0.8)
## : : amount > 1424: no (21.8/5.4)
## : purpose = furniture/appliances:
## : :...housing in {own,rent}: no (50.7/11.6)
## : housing = other: yes (3.8/0.8)
## savings_balance in {< 100 DM,100 - 500 DM}:
## :...months_loan_duration > 42:
## :...years_at_residence <= 1: no (5.3/0.8)
## : years_at_residence > 1: yes (33.6/5.3)
## months_loan_duration <= 42:
## :...other_credit = store: no (20.1/10.1)
## other_credit = bank:
## :...years_at_residence <= 1: no (15.4/0.8)
## : years_at_residence > 1:
## : :...percent_of_income > 3: yes (18.8/3.8)
## : percent_of_income <= 3:
## : :...years_at_residence <= 2: yes (2.3)
## : years_at_residence > 2: no (27.7/6.3)
## other_credit = none:
## :...credit_history in {perfect,poor,very good}: yes (45/16.3)
## credit_history = critical:
## :...housing = other: no (7)
## : housing in {own,rent}:
## : :...percent_of_income <= 1: no (4.7)
## : percent_of_income > 1:
## : :...dependents > 1: no (6.3/0.8)
## : dependents <= 1:
## : :...percent_of_income <= 3: yes (20/5.5)
## : percent_of_income > 3: no (28.1/9.3)
## credit_history = good:
## :...purpose in {business,car0}: no (10.2/3)
## purpose in {education,renovations}: yes (14/5.5)
## purpose = car:
## :...employment_duration in {1 - 4 years,> 7 years,
## : : < 1 year,
## : : 4 - 7 years}: yes (49.6/13.5)
## : employment_duration = unemployed: no (6.1/0.8)
## purpose = furniture/appliances:
## :...employment_duration in {> 7 years,
## : 4 - 7 years}: no (23.4/7.7)
## employment_duration in {unemployed,
## : < 1 year}: yes (35.5/13.2)
## employment_duration = 1 - 4 years:
## :...savings_balance = 100 - 500 DM: yes (2.4)
## savings_balance = < 100 DM:
## :...phone = yes: no (6.3/1.6)
## phone = no:
## :...housing = rent: no (3.2/1.6)
## housing = other: yes (0.8)
## housing = own: [S1]
##
## SubTree [S1]
##
## checking_balance = < 0 DM: no (10.3/1.6)
## checking_balance in {1 - 200 DM,> 200 DM}: yes (22.9/7.1)
##
## ----- Trial 2: -----
##
## Decision tree:
##
## checking_balance = unknown:
## :...other_credit = bank:
## : :...existing_loans_count > 2: no (3.7)
## : : existing_loans_count <= 2:
## : : :...housing = rent: yes (9.5/2.8)
## : : housing = other: no (7.7/1.8)
## : : housing = own:
## : : :...phone = no: no (20.8/8.5)
## : : phone = yes: yes (12.3/1.9)
## : other_credit in {none,store}:
## : :...credit_history in {critical,very good}: no (86.2/5.5)
## : credit_history in {perfect,good,poor}:
## : :...employment_duration in {> 7 years,4 - 7 years}: no (64.5/9.7)
## : employment_duration in {1 - 4 years,unemployed,< 1 year}:
## : :...dependents > 1: no (10.9/1.8)
## : dependents <= 1:
## : :...amount > 4455: yes (26.5/6)
## : amount <= 4455:
## : :...percent_of_income <= 1: no (7.1)
## : percent_of_income > 1:
## : :...savings_balance in {100 - 500 DM,unknown,
## : : > 1000 DM}: no (18.3/3)
## : savings_balance = 500 - 1000 DM: yes (7.3/1.3)
## : savings_balance = < 100 DM:
## : :...age <= 23: yes (7.4)
## : age > 23: no (30/7.8)
## checking_balance in {< 0 DM,1 - 200 DM,> 200 DM}:
## :...other_credit = store:
## :...months_loan_duration <= 11: no (2.5)
## : months_loan_duration > 11: yes (29.9/8.3)
## other_credit in {bank,none}:
## :...amount > 10722: yes (26.6/5.3)
## amount <= 10722:
## :...checking_balance in {1 - 200 DM,> 200 DM}:
## :...dependents <= 1: no (255.5/88.9)
## : dependents > 1:
## : :...housing = rent: no (3.7/1.5)
## : housing = other: yes (8/0.6)
## : housing = own:
## : :...months_loan_duration <= 13: no (10/0.6)
## : months_loan_duration > 13: yes (18.7/5.6)
## checking_balance = < 0 DM:
## :...savings_balance in {100 - 500 DM,> 1000 DM}: no (13.6/3.8)
## savings_balance = 500 - 1000 DM: yes (4.1/1.9)
## savings_balance = unknown:
## :...months_loan_duration <= 33: no (22.8/10.2)
## : months_loan_duration > 33: yes (4.5)
## savings_balance = < 100 DM:
## :...job in {unskilled,management}: no (67.7/25.4)
## job = unemployed: yes (2.6/0.6)
## job = skilled:
## :...amount > 5771: yes (11.7/0.6)
## amount <= 5771:
## :...employment_duration in {> 7 years,< 1 year,
## : 4 - 7 years}: yes (63/22.4)
## employment_duration = unemployed: no (3.8/1.3)
## employment_duration = 1 - 4 years:
## :...housing = other: yes (2.6)
## housing in {own,rent}:
## :...dependents > 1: no (3.2)
## dependents <= 1:
## :...amount > 4153: no (5.1)
## amount <= 4153: [S1]
##
## SubTree [S1]
##
## months_loan_duration <= 16: no (11.9/4.3)
## months_loan_duration > 16: yes (16.3/4)
##
## ----- Trial 3: -----
##
## Decision tree:
##
## checking_balance = unknown:
## :...other_credit in {bank,store}:
## : :...employment_duration = 4 - 7 years: no (5.5)
## : : employment_duration in {1 - 4 years,> 7 years,unemployed,< 1 year}:
## : : :...age > 44: no (11.8/1.5)
## : : age <= 44:
## : : :...years_at_residence <= 1: no (3.6)
## : : years_at_residence > 1: yes (49.5/16.5)
## : other_credit = none:
## : :...credit_history in {perfect,critical,very good}: no (71.2/4.8)
## : credit_history in {good,poor}:
## : :...existing_loans_count <= 1:
## : :...credit_history = good: no (98.7/24.1)
## : : credit_history = poor: yes (8.6/2.7)
## : existing_loans_count > 1:
## : :...employment_duration in {1 - 4 years,> 7 years,unemployed,
## : : < 1 year}: yes (32.5/11.8)
## : employment_duration = 4 - 7 years: no (6.2)
## checking_balance in {< 0 DM,1 - 200 DM,> 200 DM}:
## :...employment_duration = 1 - 4 years:
## :...credit_history in {perfect,critical,poor,very good}: no (78.4/31.9)
## : credit_history = good: yes (127.5/51.8)
## employment_duration = unemployed:
## :...years_at_residence <= 2: yes (20.6/4.4)
## : years_at_residence > 2: no (23.9/4.3)
## employment_duration = < 1 year:
## :...housing = rent: yes (38.9/10.5)
## : housing = other: no (6.2/1.7)
## : housing = own:
## : :...savings_balance in {unknown,500 - 1000 DM,> 1000 DM}: no (9.1)
## : savings_balance in {< 100 DM,100 - 500 DM}:
## : :...years_at_residence <= 1: no (35.8/12.5)
## : years_at_residence > 1: yes (51.3/17.5)
## employment_duration = 4 - 7 years:
## :...savings_balance in {100 - 500 DM,unknown}: no (30.2/8.1)
## : savings_balance in {500 - 1000 DM,> 1000 DM}: yes (6/1.1)
## : savings_balance = < 100 DM:
## : :...months_loan_duration <= 22: no (25.8/5.8)
## : months_loan_duration > 22:
## : :...months_loan_duration <= 40: yes (17.7/3.4)
## : months_loan_duration > 40: no (11.2/3.4)
## employment_duration = > 7 years:
## :...months_loan_duration > 33: yes (21.5/2.9)
## months_loan_duration <= 33:
## :...purpose in {business,education,car0}: yes (23.8/9)
## purpose = renovations: no (1.8)
## purpose = furniture/appliances:
## :...years_at_residence <= 3: yes (16.8/6.3)
## : years_at_residence > 3: no (24.1/3.3)
## purpose = car:
## :...job in {unskilled,unemployed}: yes (6.8/0.5)
## job in {skilled,management}:
## :...checking_balance = > 200 DM: no (2.9)
## checking_balance in {< 0 DM,1 - 200 DM}:
## :...months_loan_duration <= 18: yes (17.6/2.3)
## months_loan_duration > 18: no (14.7/3.3)
##
## ----- Trial 4: -----
##
## Decision tree:
##
## checking_balance = unknown:
## :...employment_duration = > 7 years: no (75.9/17.1)
## : employment_duration = unemployed: yes (15.8/7.2)
## : employment_duration = 4 - 7 years:
## : :...age <= 22: yes (8/3.2)
## : : age > 22: no (30.7)
## : employment_duration = < 1 year:
## : :...purpose = car0: no (0)
## : : purpose in {business,renovations}: yes (7.9)
## : : purpose in {car,furniture/appliances,education}:
## : : :...months_loan_duration <= 30: no (28.1/6.3)
## : : months_loan_duration > 30: yes (2.8)
## : employment_duration = 1 - 4 years:
## : :...months_loan_duration <= 9: no (13.6)
## : months_loan_duration > 9:
## : :...housing = rent: yes (14.6/4.2)
## : housing in {own,other}:
## : :...job = unemployed: no (0)
## : job = management: yes (13.3/4.4)
## : job in {skilled,unskilled}:
## : :...purpose in {furniture/appliances,car0,
## : : renovations}: no (17.8)
## : purpose in {car,business,education}:
## : :...amount <= 2255: yes (20.3/6.5)
## : amount > 2255:
## : :...credit_history in {perfect,critical}: yes (8.8/3.5)
## : credit_history in {good,poor,
## : very good}: no (18.4)
## checking_balance in {< 0 DM,1 - 200 DM,> 200 DM}:
## :...credit_history in {perfect,very good}:
## :...dependents > 1: yes (12.4)
## : dependents <= 1:
## : :...age <= 23: no (5.3)
## : age > 23:
## : :...housing = rent: yes (12.9/0.5)
## : housing in {own,other}:
## : :...other_credit = store: yes (3)
## : other_credit in {bank,none}:
## : :...months_loan_duration <= 10: yes (4.7)
## : months_loan_duration > 10:
## : :...percent_of_income <= 3: no (24.7/7.7)
## : percent_of_income > 3: yes (17/5.5)
## credit_history in {good,critical,poor}:
## :...amount > 7980: yes (37.5/9.3)
## amount <= 7980:
## :...months_loan_duration <= 8: no (39.1/7)
## months_loan_duration > 8:
## :...purpose = education: yes (24.2/10.1)
## purpose in {car0,renovations}: no (17.2/5)
## purpose = business:
## :...age <= 53: no (32.9/9.2)
## : age > 53: yes (2.5)
## purpose = car:
## :...percent_of_income <= 2:
## : :...amount <= 1386: yes (5.7/0.5)
## : : amount > 1386: no (40.3/6.3)
## : percent_of_income > 2:
## : :...years_at_residence <= 2:
## : :...percent_of_income <= 3: yes (14.3/1.5)
## : : percent_of_income > 3:
## : : :...phone = no: yes (28.1/5.5)
## : : phone = yes: no (6.9/2)
## : years_at_residence > 2:
## : :...months_loan_duration <= 15: no (19.8/4.8)
## : months_loan_duration > 15:
## : :...employment_duration in {unemployed,
## : : < 1 year}: yes (6.2)
## : employment_duration in {1 - 4 years,> 7 years,
## : : 4 - 7 years}:
## : :...amount <= 1680: yes (10.5/1)
## : amount > 1680: no (19.3/5.8)
## purpose = furniture/appliances:
## :...savings_balance = 100 - 500 DM: yes (18.2/5.1)
## savings_balance in {500 - 1000 DM,
## : > 1000 DM}: no (16.1/4.7)
## savings_balance = unknown:
## :...job in {skilled,management,unemployed}: yes (28.9/11.5)
## : job = unskilled: no (3.8)
## savings_balance = < 100 DM:
## :...phone = yes:
## :...other_credit in {bank,store}: no (1.9)
## : other_credit = none:
## : :...checking_balance in {< 0 DM,
## : : 1 - 200 DM}: yes (32/11.1)
## : checking_balance = > 200 DM: no (2.9)
## phone = no:
## :...other_credit = bank: no (11.7/4.6)
## other_credit = store: yes (10.8/3.6)
## other_credit = none:
## :...job in {skilled,management}: no (76.3/26.7)
## job = unemployed: yes (0.5)
## job = unskilled:
## :...months_loan_duration > 27: yes (3)
## months_loan_duration <= 27: [S1]
##
## SubTree [S1]
##
## checking_balance in {< 0 DM,1 - 200 DM}: no (26.4/5.7)
## checking_balance = > 200 DM: yes (7/1.7)
##
## ----- Trial 5: -----
##
## Decision tree:
##
## checking_balance in {unknown,> 200 DM}:
## :...other_credit = store: no (19.3/6.7)
## : other_credit = bank:
## : :...job in {skilled,management,unemployed}: no (38.8/13.9)
## : : job = unskilled: yes (18.4/6.1)
## : other_credit = none:
## : :...age > 34: no (108/15.5)
## : age <= 34:
## : :...savings_balance = 500 - 1000 DM: yes (10.5/3.1)
## : savings_balance = > 1000 DM: no (8.1)
## : savings_balance in {< 100 DM,100 - 500 DM,unknown}:
## : :...credit_history in {perfect,very good}: yes (5.6/1.6)
## : credit_history = poor: no (18.4/8)
## : credit_history = critical:
## : :...housing = own: no (20.2)
## : : housing in {rent,other}: yes (10.7/3.3)
## : credit_history = good:
## : :...purpose in {car,business,education,car0}: no (31.7/4.9)
## : purpose = renovations: yes (1.6)
## : purpose = furniture/appliances:
## : :...savings_balance = unknown: no (4.1)
## : savings_balance in {< 100 DM,100 - 500 DM}:
## : :...existing_loans_count <= 1: no (39.4/17.7)
## : existing_loans_count > 1: yes (7/0.4)
## checking_balance in {< 0 DM,1 - 200 DM}:
## :...savings_balance in {500 - 1000 DM,> 1000 DM}: no (33.3/14.8)
## savings_balance = unknown:
## :...credit_history in {perfect,critical,poor}: no (16.5)
## : credit_history in {good,very good}:
## : :...checking_balance = < 0 DM: yes (26.1/8.3)
## : checking_balance = 1 - 200 DM: no (33.5/10)
## savings_balance = 100 - 500 DM:
## :...job = unemployed: no (0)
## : job in {unskilled,management}: yes (14.2/3.9)
## : job = skilled:
## : :...credit_history in {critical,poor}: no (13.2/0.4)
## : credit_history in {perfect,good,very good}:
## : :...housing in {own,other}: no (23.7/10.3)
## : housing = rent: yes (7.2)
## savings_balance = < 100 DM:
## :...months_loan_duration > 47: yes (27.5/4.4)
## months_loan_duration <= 47:
## :...housing = other:
## :...existing_loans_count <= 2: yes (29.2/12.4)
## : existing_loans_count > 2: no (2.3)
## housing = rent:
## :...checking_balance = 1 - 200 DM: no (23.6/9.5)
## : checking_balance = < 0 DM:
## : :...phone = yes: yes (18.6/1.6)
## : phone = no:
## : :...years_at_residence <= 3: no (7/0.8)
## : years_at_residence > 3: yes (27.1/8.9)
## housing = own:
## :...job = unemployed: no (2.4/0.8)
## job = management:
## :...years_at_residence <= 1: no (4.4)
## : years_at_residence > 1: yes (25.1/9.6)
## job = unskilled:
## :...employment_duration in {> 7 years,unemployed,
## : : 4 - 7 years}: no (18/4.1)
## : employment_duration = < 1 year: yes (23.6/10.2)
## : employment_duration = 1 - 4 years:
## : :...existing_loans_count > 2: yes (2.2)
## : existing_loans_count <= 2:
## : :...amount <= 672: yes (3.9)
## : amount > 672: no (26.9/5.7)
## job = skilled:
## :...purpose in {business,education,
## : renovations}: yes (25.4/7.6)
## purpose = car0: no (0.4)
## purpose = car:
## :...age <= 48: yes (28.7/11.1)
## : age > 48: no (5.4)
## purpose = furniture/appliances:
## :...credit_history in {perfect,critical,
## : poor}: no (20.5/6.6)
## credit_history = very good: yes (3.5)
## credit_history = good:
## :...months_loan_duration <= 7: no (5.3)
## months_loan_duration > 7:
## :...age > 46: no (3.2)
## age <= 46: [S1]
##
## SubTree [S1]
##
## checking_balance = 1 - 200 DM: yes (25.5/7.1)
## checking_balance = < 0 DM:
## :...months_loan_duration <= 16: no (6)
## months_loan_duration > 16: yes (24.7/8.9)
##
## ----- Trial 6: -----
##
## Decision tree:
##
## checking_balance = unknown:
## :...credit_history in {perfect,critical,very good}: no (96.9/23.9)
## : credit_history in {good,poor}:
## : :...purpose in {car,car0}: no (51.9/10.3)
## : purpose in {business,education,renovations}: yes (39.3/15.5)
## : purpose = furniture/appliances:
## : :...amount > 3275: yes (27.2/9.8)
## : amount <= 3275:
## : :...age <= 22: yes (11/2.1)
## : age > 22: no (38.6/6.8)
## checking_balance in {< 0 DM,1 - 200 DM,> 200 DM}:
## :...credit_history in {perfect,very good}: yes (73.4/27.7)
## credit_history = poor:
## :...percent_of_income <= 1: no (6.7)
## : percent_of_income > 1:
## : :...age <= 46: no (26.8/10.3)
## : age > 46: yes (11.2/0.7)
## credit_history = critical:
## :...dependents > 1: no (22.3/5.2)
## : dependents <= 1:
## : :...savings_balance in {100 - 500 DM,unknown,> 1000 DM}: no (20.7/2.9)
## : savings_balance = 500 - 1000 DM: yes (6/1)
## : savings_balance = < 100 DM:
## : :...amount <= 1050: no (8.2)
## : amount > 1050:
## : :...housing = rent: yes (16.6/2.4)
## : housing = other: no (6.7/1.8)
## : housing = own:
## : :...years_at_residence <= 1: no (3.4)
## : years_at_residence > 1:
## : :...age <= 32: yes (24.8/4.8)
## : age > 32: no (21.8/7.5)
## credit_history = good:
## :...amount > 8648: yes (21.6/4.6)
## amount <= 8648:
## :...job in {management,unemployed}: no (46.6/10.9)
## job in {skilled,unskilled}:
## :...months_loan_duration > 33: yes (41.8/13.2)
## months_loan_duration <= 33:
## :...purpose in {business,education,car0,
## : renovations}: no (34.2/9.4)
## purpose = car:
## :...percent_of_income > 2: yes (46.4/15.1)
## : percent_of_income <= 2:
## : :...age <= 42: no (23.3/5.2)
## : age > 42: yes (5)
## purpose = furniture/appliances:
## :...months_loan_duration <= 7: no (8.2)
## months_loan_duration > 7:
## :...employment_duration in {> 7 years,
## : 4 - 7 years}: no (35.3/8.6)
## employment_duration = unemployed: yes (5.4)
## employment_duration = < 1 year:
## :...phone = no: no (39.9/13.3)
## : phone = yes: yes (8.2/2.5)
## employment_duration = 1 - 4 years:
## :...housing in {rent,other}: yes (8.2/2)
## housing = own:
## :...months_loan_duration > 22: no (10.5/1.9)
## months_loan_duration <= 22:
## :...months_loan_duration > 15: yes (13.9/3.5)
## months_loan_duration <= 15:
## :...phone = yes: no (6.8/2)
## phone = no: [S1]
##
## SubTree [S1]
##
## checking_balance = < 0 DM: no (5.3)
## checking_balance in {1 - 200 DM,> 200 DM}: yes (26.2/10)
##
## ----- Trial 7: -----
##
## Decision tree:
##
## checking_balance in {unknown,> 200 DM}:
## :...savings_balance = > 1000 DM: no (13.6)
## : savings_balance in {< 100 DM,100 - 500 DM,unknown,500 - 1000 DM}:
## : :...other_credit = none: no (237.6/69.1)
## : other_credit in {bank,store}:
## : :...existing_loans_count > 2: no (4.3)
## : existing_loans_count <= 2:
## : :...employment_duration in {> 7 years,unemployed}: yes (30.9/11.6)
## : employment_duration in {< 1 year,4 - 7 years}: no (18.4/5.9)
## : employment_duration = 1 - 4 years:
## : :...years_at_residence <= 1: no (3)
## : years_at_residence > 1: yes (24/6.1)
## checking_balance in {< 0 DM,1 - 200 DM}:
## :...employment_duration = unemployed:
## :...years_at_residence <= 2: yes (20.9/5)
## : years_at_residence > 2: no (23.4/4.1)
## employment_duration = 4 - 7 years:
## :...months_loan_duration <= 22: no (39/7.5)
## : months_loan_duration > 22:
## : :...dependents > 1: no (11.4/2.2)
## : dependents <= 1:
## : :...housing in {rent,other}: no (12.1/4.9)
## : housing = own:
## : :...credit_history in {perfect,good,critical,
## : : poor}: yes (22.8/4.6)
## : credit_history = very good: no (4.3)
## employment_duration = > 7 years:
## :...months_loan_duration > 33: yes (18.8/1.5)
## : months_loan_duration <= 33:
## : :...savings_balance in {100 - 500 DM,500 - 1000 DM}: no (6.7)
## : savings_balance in {< 100 DM,unknown,> 1000 DM}:
## : :...amount <= 776: no (7.9)
## : amount > 776:
## : :...existing_loans_count > 2: no (6.2/1.2)
## : existing_loans_count <= 2:
## : :...years_at_residence <= 3:
## : :...months_loan_duration <= 24: yes (24.8/3.7)
## : : months_loan_duration > 24: no (2.8)
## : years_at_residence > 3:
## : :...savings_balance in {unknown,
## : : > 1000 DM}: no (11.9/3.9)
## : savings_balance = < 100 DM:
## : :...purpose in {car,business,
## : : education}: yes (21.7/2.5)
## : purpose in {furniture/appliances,car0,
## : renovations}: no (12.1/1)
## employment_duration = < 1 year:
## :...housing = rent: yes (34/9.6)
## : housing in {own,other}:
## : :...savings_balance in {unknown,> 1000 DM}: no (8.3)
## : savings_balance in {< 100 DM,100 - 500 DM,500 - 1000 DM}:
## : :...purpose in {business,car0,renovations}: no (4.4/0.6)
## : purpose = education: yes (4.6/1.4)
## : purpose = car:
## : :...housing = other: yes (1.7)
## : : housing = own:
## : : :...age <= 25: yes (4.3)
## : : age > 25: no (24.2/5.8)
## : purpose = furniture/appliances:
## : :...dependents > 1: yes (2.6)
## : dependents <= 1:
## : :...housing = other: no (4.1)
## : housing = own:
## : :...phone = yes: yes (7.1/1.2)
## : phone = no:
## : :...existing_loans_count <= 1: no (22.7/9)
## : existing_loans_count > 1: yes (5.2/0.8)
## employment_duration = 1 - 4 years:
## :...housing = other: yes (10.6/2.9)
## housing = rent:
## :...years_at_residence <= 3: no (14.7/4.2)
## : years_at_residence > 3:
## : :...other_credit = bank: no (5.8/1.2)
## : other_credit in {none,store}: yes (21.3/4.3)
## housing = own:
## :...purpose in {business,education}: no (24.6/10.7)
## purpose in {car0,renovations}: yes (12.8/3.9)
## purpose = car:
## :...amount <= 1123: yes (5.2)
## : amount > 1123: no (24/6.8)
## purpose = furniture/appliances:
## :...dependents > 1: no (5.9)
## dependents <= 1:
## :...other_credit = store: no (6.3)
## other_credit in {bank,none}:
## :...savings_balance in {100 - 500 DM,unknown,
## : 500 - 1000 DM}: yes (16.4/3.7)
## savings_balance = > 1000 DM: no (1.2)
## savings_balance = < 100 DM:
## :...existing_loans_count > 1: no (8.2/0.6)
## existing_loans_count <= 1:
## :...months_loan_duration > 30: yes (3.8)
## months_loan_duration <= 30:
## :...other_credit = bank: yes (2.8/0.6)
## other_credit = none: no (32.4/12.3)
##
## ----- Trial 8: -----
##
## Decision tree:
##
## months_loan_duration <= 7: no (47.6/7.8)
## months_loan_duration > 7:
## :...savings_balance in {unknown,> 1000 DM}:
## :...checking_balance in {unknown,> 200 DM}: no (63.9/12)
## : checking_balance = 1 - 200 DM:
## : :...other_credit in {bank,none}: no (51/15)
## : : other_credit = store: yes (2.2)
## : checking_balance = < 0 DM:
## : :...savings_balance = > 1000 DM: no (2.2)
## : savings_balance = unknown:
## : :...employment_duration = unemployed: yes (0)
## : employment_duration = < 1 year: no (5.2)
## : employment_duration in {1 - 4 years,> 7 years,4 - 7 years}:
## : :...phone = no: yes (15/2.5)
## : phone = yes: no (14/5.6)
## savings_balance in {< 100 DM,100 - 500 DM,500 - 1000 DM}:
## :...checking_balance = unknown:
## :...age > 44: no (18.8)
## : age <= 44:
## : :...age <= 23:
## : :...savings_balance in {< 100 DM,500 - 1000 DM}: yes (23.5/4.1)
## : : savings_balance = 100 - 500 DM: no (3.5)
## : age > 23:
## : :...employment_duration = 4 - 7 years: no (12.9)
## : employment_duration in {1 - 4 years,> 7 years,unemployed,
## : : < 1 year}:
## : :...other_credit = store: no (12.6/6.2)
## : other_credit = bank:
## : :...age <= 34: no (22.7/8.5)
## : : age > 34: yes (11.6)
## : other_credit = none:
## : :...age > 31: no (20.6)
## : age <= 31:
## : :...dependents > 1: no (2.1)
## : dependents <= 1:
## : :...credit_history = very good: yes (0)
## : credit_history in {perfect,
## : : critical}: no (5)
## : credit_history in {good,poor}:
## : :...amount <= 1107: no (4.6)
## : amount > 1107:
## : :...age <= 25: no (3.1)
## : age > 25: yes (26.8/7)
## checking_balance in {< 0 DM,1 - 200 DM,> 200 DM}:
## :...years_at_residence <= 1:
## :...other_credit = bank: no (14.6/1.4)
## : other_credit in {none,store}:
## : :...job = management: no (6.6/0.5)
## : job = unemployed: yes (5.9)
## : job in {skilled,unskilled}:
## : :...housing in {rent,other}: yes (11.2/2.2)
## : housing = own:
## : :...checking_balance = > 200 DM: no (9.9/2.1)
## : checking_balance in {< 0 DM,1 - 200 DM}:
## : :...other_credit = store: yes (4.9/1.5)
## : other_credit = none:
## : :...checking_balance = < 0 DM: yes (19.7/7.9)
## : checking_balance = 1 - 200 DM: no (19.7/5.3)
## years_at_residence > 1:
## :...months_loan_duration > 26: yes (117/37.6)
## months_loan_duration <= 26:
## :...credit_history in {perfect,very good}: yes (39.8/12.9)
## credit_history = poor: no (17.6/7.7)
## credit_history = good:
## :...dependents > 1: yes (22.1/8)
## : dependents <= 1:
## : :...purpose in {car,car0}: yes (50.6/23.5)
## : purpose in {business,education,
## : : renovations}: no (20.3/5)
## : purpose = furniture/appliances: [S1]
## credit_history = critical:
## :...housing = other: no (8.1)
## housing in {own,rent}:
## :...other_credit = bank: yes (14.2/4)
## other_credit = store: no (1)
## other_credit = none:
## :...existing_loans_count <= 1: no (9.5)
## existing_loans_count > 1:
## :...percent_of_income <= 1: no (3.8)
## percent_of_income > 1: [S2]
##
## SubTree [S1]
##
## employment_duration in {1 - 4 years,unemployed,< 1 year}: yes (75.5/32.9)
## employment_duration in {> 7 years,4 - 7 years}: no (14.5/2.9)
##
## SubTree [S2]
##
## checking_balance = > 200 DM: yes (2.6)
## checking_balance in {< 0 DM,1 - 200 DM}:
## :...employment_duration in {unemployed,4 - 7 years}: no (5.8)
## employment_duration in {1 - 4 years,> 7 years,< 1 year}:
## :...percent_of_income <= 3: yes (12/1.8)
## percent_of_income > 3: no (15.4/6.5)
##
## ----- Trial 9: -----
##
## Decision tree:
##
## checking_balance = unknown:
## :...age > 44: no (31.6)
## : age <= 44:
## : :...months_loan_duration <= 8: no (12.7)
## : months_loan_duration > 8:
## : :...other_credit = bank:
## : :...percent_of_income <= 3: yes (29.5/8)
## : : percent_of_income > 3: no (12/3)
## : other_credit in {none,store}:
## : :...age > 37: no (13.5)
## : age <= 37:
## : :...credit_history = perfect: yes (4.2/1.1)
## : credit_history in {critical,very good}: no (22.2/2.8)
## : credit_history = poor:
## : :...percent_of_income <= 3: no (13/1.8)
## : : percent_of_income > 3: yes (12.9/2.7)
## : credit_history = good:
## : :...phone = yes: no (28/3.5)
## : phone = no:
## : :...other_credit = store: no (1.1)
## : other_credit = none:
## : :...months_loan_duration > 30: no (3.5)
## : months_loan_duration <= 30:
## : :...percent_of_income <= 1: no (3.2)
## : percent_of_income > 1: yes (33.7/13.8)
## checking_balance in {< 0 DM,1 - 200 DM,> 200 DM}:
## :...credit_history = perfect:
## :...percent_of_income > 3: yes (8)
## : percent_of_income <= 3:
## : :...housing = own: no (24.3/8.1)
## : housing in {rent,other}: yes (4.3)
## credit_history = critical:
## :...age > 33: no (67/14.7)
## : age <= 33:
## : :...savings_balance in {< 100 DM,500 - 1000 DM,
## : : > 1000 DM}: yes (48.1/17)
## : savings_balance in {100 - 500 DM,unknown}: no (9.4)
## credit_history = poor:
## :...percent_of_income <= 1: no (5.5)
## : percent_of_income > 1:
## : :...savings_balance = 500 - 1000 DM: yes (0)
## : savings_balance in {100 - 500 DM,unknown}: no (16.2/2.3)
## : savings_balance in {< 100 DM,> 1000 DM}:
## : :...months_loan_duration <= 13: no (4.5)
## : months_loan_duration > 13: yes (28.4/4.2)
## credit_history = very good:
## :...dependents > 1: yes (6.8)
## : dependents <= 1:
## : :...age <= 23: no (8.4)
## : age > 23:
## : :...age <= 29: yes (9.4/0.6)
## : age > 29: no (23.8/10.4)
## credit_history = good:
## :...savings_balance = 500 - 1000 DM: no (13.4/1.2)
## savings_balance in {< 100 DM,100 - 500 DM,unknown,> 1000 DM}:
## :...job = unemployed: yes (6.1/2.3)
## job = management:
## :...savings_balance = > 1000 DM: no (0)
## : savings_balance = unknown: yes (8.7/1.9)
## : savings_balance in {< 100 DM,100 - 500 DM}:
## : :...amount <= 7596: no (34.6/5.2)
## : amount > 7596: yes (11/3)
## job = unskilled:
## :...checking_balance = 1 - 200 DM:
## : :...months_loan_duration <= 21: no (27.9/1.7)
## : : months_loan_duration > 21: yes (4.6/1.1)
## : checking_balance in {< 0 DM,> 200 DM}:
## : :...months_loan_duration > 36: no (3.3)
## : months_loan_duration <= 36:
## : :...months_loan_duration > 26: yes (4.5)
## : months_loan_duration <= 26:
## : :...dependents > 1: yes (10.2/2)
## : dependents <= 1:
## : :...months_loan_duration <= 9: yes (3.6)
## : months_loan_duration > 9: no (33.3/10.8)
## job = skilled:
## :...checking_balance = > 200 DM: no (23.1/6.1)
## checking_balance in {< 0 DM,1 - 200 DM}:
## :...purpose in {business,car0}: yes (9.3/3)
## purpose in {education,renovations}: no (10.7/3.3)
## purpose = car:
## :...other_credit = bank: no (5.6/0.7)
## : other_credit = store: yes (1.1)
## : other_credit = none:
## : :...age > 32: yes (18.5/3.4)
## : age <= 32:
## : :...percent_of_income <= 1: no (7)
## : percent_of_income > 1:
## : :...age <= 25: yes (5)
## : age > 25: no (27.9/9.7)
## purpose = furniture/appliances:
## :...housing = rent: yes (21.8/6.3)
## housing = other: no (5.7/1.6)
## housing = own:
## :...employment_duration in {> 7 years,
## : unemployed}: yes (19.5/5.5)
## employment_duration in {< 1 year,
## : 4 - 7 years}: no (32.6/15.6)
## employment_duration = 1 - 4 years:
## :...dependents > 1: no (2.3)
## dependents <= 1:
## :...other_credit in {bank,store}: no (4.3/0.5)
## other_credit = none:
## :...percent_of_income <= 3: yes (24.5/9.1)
## percent_of_income > 3: no (14.7/3.4)
##
##
## Evaluation on training data (900 cases):
##
## Trial Decision Tree
## ----- ----------------
## Size Errors
##
## 0 48 128(14.2%)
## 1 41 179(19.9%)
## 2 36 200(22.2%)
## 3 32 244(27.1%)
## 4 50 195(21.7%)
## 5 49 192(21.3%)
## 6 37 194(21.6%)
## 7 51 176(19.6%)
## 8 46 188(20.9%)
## 9 57 201(22.3%)
## boost 44( 4.9%) <<
##
##
## (a) (b) <-classified as
## ---- ----
## 626 7 (a): class no
## 37 230 (b): class yes
##
##
## Attribute usage:
##
## 100.00% checking_balance
## 100.00% months_loan_duration
## 100.00% credit_history
## 100.00% savings_balance
## 100.00% employment_duration
## 100.00% other_credit
## 85.33% purpose
## 82.89% amount
## 80.11% age
## 73.11% housing
## 67.67% job
## 67.00% dependents
## 57.44% years_at_residence
## 50.56% percent_of_income
## 49.89% existing_loans_count
## 39.33% phone
##
##
## Time: 0.1 secs
900件中856件当たっており, 予測精度は約95%とかなり上がった. (ただし偽陽性は4増えてしまった)
## テストデータでの評価
credit_pred10 <- predict(credit_model10, credit_test)
CrossTable(credit_test$default, credit_pred10,
prop.chisq = FALSE, prop.c = FALSE, prop.t = FALSE,
dnn = c("actual default", "predicted default"))
##
##
## Cell Contents
## |-------------------------|
## | N |
## | N / Row Total |
## |-------------------------|
##
##
## Total Observations in Table: 100
##
##
## | predicted default
## actual default | no | yes | Row Total |
## ---------------|-----------|-----------|-----------|
## no | 57 | 10 | 67 |
## | 0.851 | 0.149 | 0.670 |
## ---------------|-----------|-----------|-----------|
## yes | 17 | 16 | 33 |
## | 0.515 | 0.485 | 0.330 |
## ---------------|-----------|-----------|-----------|
## Column Total | 74 | 26 | 100 |
## ---------------|-----------|-----------|-----------|
##
##
今回は精度が下がってしまった(79%→73%)
本当はyes(焦げ付く)なのにnoと分類してしまうのが一番リスキーなので, ペナルティを重くする
# モデルの性能を向上させる
## 後分類のコストを指定する
### 行列を作成
matrix_dimensions <- list(c("no", "yes"), c("no", "yes"))
matrix_names <- c("actual", "predict")
names(matrix_dimensions) <- matrix_names
matrix_dimensions
## $actual
## [1] "no" "yes"
##
## $predict
## [1] "no" "yes"
### ペナルティを割り当てる
error_costs <- matrix(c(0, 1, 4, 0), nrow = 2, dimnames = matrix_dimensions)
## 予測
credit_cost <- C5.0(credit_train[-17], credit_train$default,
costs = error_costs)
credit_cost_pred <- predict(credit_cost, credit_test)
CrossTable(credit_test$default, credit_cost_pred,
prop.chisq = FALSE, prop.c = FALSE, prop.t = FALSE,
dnn = c("actual default", "predicted default"))
##
##
## Cell Contents
## |-------------------------|
## | N |
## | N / Row Total |
## |-------------------------|
##
##
## Total Observations in Table: 100
##
##
## | predicted default
## actual default | no | yes | Row Total |
## ---------------|-----------|-----------|-----------|
## no | 34 | 33 | 67 |
## | 0.507 | 0.493 | 0.670 |
## ---------------|-----------|-----------|-----------|
## yes | 8 | 25 | 33 |
## | 0.242 | 0.758 | 0.330 |
## ---------------|-----------|-----------|-----------|
## Column Total | 42 | 58 | 100 |
## ---------------|-----------|-----------|-----------|
##
##
全体の正解率は下がってしまったが, 債務不履行の予測精度は上がった(25/33).
毒キノコか食べれるキノコかを分類する
ここでは全種類がカバーされていると仮定するため, 訓練データとテストデータにわける必要がなく, このデータを正しく分類できればよい
# データ収集
mushrooms <- read.csv("mushrooms.csv", stringsAsFactors = TRUE) %>%
select(!veil_type)
table(mushrooms$type)
##
## edible poisonous
## 4208 3916
1R分類器では説明変数として最も重要度の高い特徴量を一つ選択し, ルールを作成する
# モデルを訓練する
library(OneR)
mushrooms_1R <- OneR(formula = type ~ ., data = mushrooms)
mushrooms_1R
##
## Call:
## OneR.formula(formula = type ~ ., data = mushrooms)
##
## Rules:
## If odor = almond then type = edible
## If odor = anise then type = edible
## If odor = creosote then type = poisonous
## If odor = fishy then type = poisonous
## If odor = foul then type = poisonous
## If odor = musty then type = poisonous
## If odor = none then type = edible
## If odor = pungent then type = poisonous
## If odor = spicy then type = poisonous
##
## Accuracy:
## 8004 of 8124 instances classified correctly (98.52%)
mushrooms_1R_pred <- predict(mushrooms_1R, mushrooms)
table(actual = mushrooms$type, predicted = mushrooms_1R_pred)
## predicted
## actual edible poisonous
## edible 4208 0
## poisonous 120 3796
98.52%正しく分類できているが, それでも120種類は誤って分類されている. しかも実際は独なのに食べられると予測されている.
より高性能なJRip関数を用いる
# モデルの性能を評価する
library(RWeka)
##
## Attaching package: 'RWeka'
## The following object is masked from 'package:OneR':
##
## OneR
mushrooms_JRip <- JRip(formula = type ~., data = mushrooms)
mushrooms_JRip
## JRIP rules:
## ===========
##
## (odor = foul) => type=poisonous (2160.0/0.0)
## (gill_size = narrow) and (gill_color = buff) => type=poisonous (1152.0/0.0)
## (gill_size = narrow) and (odor = pungent) => type=poisonous (256.0/0.0)
## (odor = creosote) => type=poisonous (192.0/0.0)
## (spore_print_color = green) => type=poisonous (72.0/0.0)
## (stalk_surface_below_ring = scaly) and (stalk_surface_above_ring = silky) => type=poisonous (68.0/0.0)
## (habitat = leaves) and (cap_color = white) => type=poisonous (8.0/0.0)
## (stalk_color_above_ring = yellow) => type=poisonous (8.0/0.0)
## => type=edible (4208.0/0.0)
##
## Number of Rules : 9
mushrooms_JRip_pred <- predict(mushrooms_JRip, mushrooms)
table(actual = mushrooms$type, predicted = mushrooms_JRip_pred)
## predicted
## actual edible poisonous
## edible 4208 0
## poisonous 0 3916
全て正しく分類できた