決定木

融資データから焦げつくかどうかを予測する

# データ収集
credit <- read.csv("credit.csv") %>% 
  mutate(default = as.factor(default))
glimpse(credit)
## Rows: 1,000
## Columns: 17
## $ checking_balance     <chr> "< 0 DM", "1 - 200 DM", "unknown", "< 0 DM", "<…
## $ months_loan_duration <int> 6, 48, 12, 42, 24, 36, 24, 36, 12, 30, 12, 48, …
## $ credit_history       <chr> "critical", "good", "critical", "good", "poor",…
## $ purpose              <chr> "furniture/appliances", "furniture/appliances",…
## $ amount               <int> 1169, 5951, 2096, 7882, 4870, 9055, 2835, 6948,…
## $ savings_balance      <chr> "unknown", "< 100 DM", "< 100 DM", "< 100 DM", …
## $ employment_duration  <chr> "> 7 years", "1 - 4 years", "4 - 7 years", "4 -…
## $ percent_of_income    <int> 4, 2, 2, 2, 3, 2, 3, 2, 2, 4, 3, 3, 1, 4, 2, 4,…
## $ years_at_residence   <int> 4, 2, 3, 4, 4, 4, 4, 2, 4, 2, 1, 4, 1, 4, 4, 2,…
## $ age                  <int> 67, 22, 49, 45, 53, 35, 53, 35, 61, 28, 25, 24,…
## $ other_credit         <chr> "none", "none", "none", "none", "none", "none",…
## $ housing              <chr> "own", "own", "own", "other", "other", "other",…
## $ existing_loans_count <int> 2, 1, 1, 1, 2, 1, 1, 1, 1, 2, 1, 1, 1, 2, 1, 1,…
## $ job                  <chr> "skilled", "skilled", "unskilled", "skilled", "…
## $ dependents           <int> 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,…
## $ phone                <chr> "yes", "no", "no", "no", "no", "yes", "no", "ye…
## $ default              <fct> no, yes, no, no, yes, no, no, no, no, yes, yes,…
## 融資の30%が焦げ付いている
table(credit$default)
## 
##  no yes 
## 700 300
# 前処理
## ランダムサンプリング
set.seed(1)
### 1~1000の中で900個とる
train_sample <- sample(1000, 900)
### 訓練データ
credit_train <- credit[train_sample, ]
### テストデータ
credit_test <- credit[-train_sample, ]
### 双方とも比率はほぼ同じ
prop.table(table(credit_train$default))
## 
##        no       yes 
## 0.7033333 0.2966667
prop.table(table(credit_test$default))
## 
##   no  yes 
## 0.67 0.33
# モデルを訓練する
library(C50)
## 目的変数を除外し, 因子ベクトルとして設定する
credit_model <- C5.0(credit_train[-17], credit_train$default,
                     trials = 1, costs = NULL)
credit_model
## 
## Call:
## C5.0.default(x = credit_train[-17], y = credit_train$default, trials = 1,
##  costs = NULL)
## 
## Classification Tree
## Number of samples: 900 
## Number of predictors: 16 
## 
## Tree size: 51 
## 
## Non-standard options: attempt to group attributes

決定の深さが51であることがわかり, 特徴量は16個

## 決定木の可視化
library(partykit)
## Loading required package: grid
## Loading required package: libcoin
## Loading required package: mvtnorm
credit_model %>% 
  plot()
## Warning in partysplit(varid = as.integer(i), breaks = as.numeric(j[1]), : 強制変
## 換により NA が生成されました

## Warning in partysplit(varid = as.integer(i), breaks = as.numeric(j[1]), : 強制変
## 換により NA が生成されました

## Warning in partysplit(varid = as.integer(i), breaks = as.numeric(j[1]), : 強制変
## 換により NA が生成されました

## Warning in partysplit(varid = as.integer(i), breaks = as.numeric(j[1]), : 強制変
## 換により NA が生成されました

## Warning in partysplit(varid = as.integer(i), breaks = as.numeric(j[1]), : 強制変
## 換により NA が生成されました

## Warning in partysplit(varid = as.integer(i), breaks = as.numeric(j[1]), : 強制変
## 換により NA が生成されました

## Warning in partysplit(varid = as.integer(i), breaks = as.numeric(j[1]), : 強制変
## 換により NA が生成されました

## Warning in partysplit(varid = as.integer(i), breaks = as.numeric(j[1]), : 強制変
## 換により NA が生成されました

## Warning in partysplit(varid = as.integer(i), breaks = as.numeric(j[1]), : 強制変
## 換により NA が生成されました

## Warning in partysplit(varid = as.integer(i), breaks = as.numeric(j[1]), : 強制変
## 換により NA が生成されました

## Warning in partysplit(varid = as.integer(i), breaks = as.numeric(j[1]), : 強制変
## 換により NA が生成されました

## Warning in partysplit(varid = as.integer(i), breaks = as.numeric(j[1]), : 強制変
## 換により NA が生成されました

## Warning in partysplit(varid = as.integer(i), breaks = as.numeric(j[1]), : 強制変
## 換により NA が生成されました

## Warning in partysplit(varid = as.integer(i), breaks = as.numeric(j[1]), : 強制変
## 換により NA が生成されました

## Warning in partysplit(varid = as.integer(i), breaks = as.numeric(j[1]), : 強制変
## 換により NA が生成されました

## Warning in partysplit(varid = as.integer(i), breaks = as.numeric(j[1]), : 強制変
## 換により NA が生成されました

## Warning in partysplit(varid = as.integer(i), breaks = as.numeric(j[1]), : 強制変
## 換により NA が生成されました

## Warning in partysplit(varid = as.integer(i), breaks = as.numeric(j[1]), : 強制変
## 換により NA が生成されました
## Warning in .bincode(as.numeric(x), breaks = unique(c(-Inf,
## breaks_split(split), : 強制変換により NA が生成されました

## Warning in .bincode(as.numeric(x), breaks = unique(c(-Inf,
## breaks_split(split), : 強制変換により NA が生成されました

## Warning in .bincode(as.numeric(x), breaks = unique(c(-Inf,
## breaks_split(split), : 強制変換により NA が生成されました

## Warning in .bincode(as.numeric(x), breaks = unique(c(-Inf,
## breaks_split(split), : 強制変換により NA が生成されました

## Warning in .bincode(as.numeric(x), breaks = unique(c(-Inf,
## breaks_split(split), : 強制変換により NA が生成されました

## Warning in .bincode(as.numeric(x), breaks = unique(c(-Inf,
## breaks_split(split), : 強制変換により NA が生成されました

## Warning in .bincode(as.numeric(x), breaks = unique(c(-Inf,
## breaks_split(split), : 強制変換により NA が生成されました

## 結果の解釈
summary(credit_model)
## 
## Call:
## C5.0.default(x = credit_train[-17], y = credit_train$default, trials = 1,
##  costs = NULL)
## 
## 
## C5.0 [Release 2.07 GPL Edition]      Tue Mar 23 00:54:23 2021
## -------------------------------
## 
## Class specified by attribute `outcome'
## 
## Read 900 cases (17 attributes) from undefined.data
## 
## Decision tree:
## 
## checking_balance in {unknown,> 200 DM}: no (418/53)
## checking_balance in {< 0 DM,1 - 200 DM}:
## :...credit_history in {perfect,very good}:
##     :...dependents > 1: yes (10)
##     :   dependents <= 1:
##     :   :...savings_balance in {< 100 DM,100 - 500 DM,
##     :       :                   > 1000 DM}: yes (43/12)
##     :       savings_balance in {unknown,500 - 1000 DM}: no (8/1)
##     credit_history in {good,critical,poor}:
##     :...months_loan_duration <= 22:
##         :...employment_duration = 4 - 7 years: no (36/3)
##         :   employment_duration in {1 - 4 years,> 7 years,unemployed,< 1 year}:
##         :   :...purpose in {business,car0,renovations}: no (26/3)
##         :       purpose = education:
##         :       :...savings_balance in {< 100 DM,100 - 500 DM,500 - 1000 DM,
##         :       :   :                   > 1000 DM}: yes (6)
##         :       :   savings_balance = unknown: no (4)
##         :       purpose = furniture/appliances:
##         :       :...savings_balance = 100 - 500 DM: yes (3)
##         :       :   savings_balance in {500 - 1000 DM,
##         :       :   :                   > 1000 DM}: no (10/1)
##         :       :   savings_balance = unknown:
##         :       :   :...credit_history in {critical,poor}: no (3)
##         :       :   :   credit_history = good:
##         :       :   :   :...years_at_residence <= 3: yes (6/1)
##         :       :   :       years_at_residence > 3: no (2)
##         :       :   savings_balance = < 100 DM:
##         :       :   :...months_loan_duration <= 16: no (54/11)
##         :       :       months_loan_duration > 16:
##         :       :       :...checking_balance = 1 - 200 DM: no (9/2)
##         :       :           checking_balance = < 0 DM: [S1]
##         :       purpose = car:
##         :       :...other_credit = store: no (0)
##         :           other_credit = bank: yes (11/3)
##         :           other_credit = none:
##         :           :...credit_history in {critical,poor}: no (20/2)
##         :               credit_history = good:
##         :               :...years_at_residence <= 1: yes (4)
##         :                   years_at_residence > 1:
##         :                   :...housing = rent: no (8/2)
##         :                       housing = other: yes (5/1)
##         :                       housing = own:
##         :                       :...age <= 25: yes (4)
##         :                           age > 25: no (15/4)
##         months_loan_duration > 22:
##         :...savings_balance = 500 - 1000 DM: yes (3/1)
##             savings_balance = > 1000 DM: no (4/1)
##             savings_balance = 100 - 500 DM:
##             :...employment_duration in {1 - 4 years,< 1 year}: yes (13/2)
##             :   employment_duration in {> 7 years,unemployed,
##             :                           4 - 7 years}: no (12/3)
##             savings_balance = unknown:
##             :...checking_balance = < 0 DM: yes (12/4)
##             :   checking_balance = 1 - 200 DM: no (17/1)
##             savings_balance = < 100 DM:
##             :...months_loan_duration > 47: yes (19/2)
##                 months_loan_duration <= 47:
##                 :...housing = other:
##                     :...percent_of_income <= 2: no (5)
##                     :   percent_of_income > 2: yes (9/3)
##                     housing = rent:
##                     :...other_credit = store: yes (0)
##                     :   other_credit = bank: no (1)
##                     :   other_credit = none:
##                     :   :...percent_of_income > 2: yes (10/1)
##                     :       percent_of_income <= 2:
##                     :       :...years_at_residence <= 3: no (3)
##                     :           years_at_residence > 3: yes (2)
##                     housing = own:
##                     :...employment_duration = > 7 years: no (14/5)
##                         employment_duration = 4 - 7 years: yes (9/1)
##                         employment_duration = unemployed:
##                         :...years_at_residence <= 2: yes (4)
##                         :   years_at_residence > 2: no (3)
##                         employment_duration = 1 - 4 years:
##                         :...purpose in {furniture/appliances,
##                         :   :           renovations}: no (7)
##                         :   purpose in {car,business,education,car0}:
##                         :   :...years_at_residence <= 3: yes (10/1)
##                         :       years_at_residence > 3: no (2)
##                         employment_duration = < 1 year:
##                         :...years_at_residence > 3: yes (7)
##                             years_at_residence <= 3:
##                             :...other_credit = bank: no (0)
##                                 other_credit = store: yes (1)
##                                 other_credit = none:
##                                 :...checking_balance = < 0 DM: yes (6/2)
##                                     checking_balance = 1 - 200 DM: no (8/2)
## 
## SubTree [S1]
## 
## employment_duration in {1 - 4 years,unemployed}: yes (10)
## employment_duration in {> 7 years,< 1 year}: no (4)
## 
## 
## Evaluation on training data (900 cases):
## 
##      Decision Tree   
##    ----------------  
##    Size      Errors  
## 
##      48  128(14.2%)   <<
## 
## 
##     (a)   (b)    <-classified as
##    ----  ----
##     599    34    (a): class no
##      94   173    (b): class yes
## 
## 
##  Attribute usage:
## 
##  100.00% checking_balance
##   53.56% credit_history
##   46.78% months_loan_duration
##   38.11% savings_balance
##   37.33% employment_duration
##   24.78% purpose
##   14.78% housing
##   10.89% other_credit
##   10.00% years_at_residence
##    6.78% dependents
##    3.22% percent_of_income
##    2.11% age
## 
## 
## Time: 0.0 secs

900件中, 772件的中している(約86%). この中で本当はyesなのにnoと誤分類(偽陽性)されているものが34件あり, これは融資リスクとして大きい. 理想はこれを減らすこと.

# モデルの性能を評価する
credit_pred <- predict(credit_model, credit_test)
library(gmodels)
CrossTable(credit_test$default, credit_pred,
           prop.chisq = FALSE, prop.c = FALSE, prop.t = FALSE,
           dnn = c("actual default", "predicted default"))
## 
##  
##    Cell Contents
## |-------------------------|
## |                       N |
## |           N / Row Total |
## |-------------------------|
## 
##  
## Total Observations in Table:  100 
## 
##  
##                | predicted default 
## actual default |        no |       yes | Row Total | 
## ---------------|-----------|-----------|-----------|
##             no |        63 |         4 |        67 | 
##                |     0.940 |     0.060 |     0.670 | 
## ---------------|-----------|-----------|-----------|
##            yes |        17 |        16 |        33 | 
##                |     0.515 |     0.485 |     0.330 | 
## ---------------|-----------|-----------|-----------|
##   Column Total |        80 |        20 |       100 | 
## ---------------|-----------|-----------|-----------|
## 
## 

79%の精度で予測できている.

# モデルの性能を向上させる
##決定木の最大数を設定(決定木を縮小)
credit_model10 <- C5.0(credit_train[-17], credit_train$default, trials = 10)
summary(credit_model10)
## 
## Call:
## C5.0.default(x = credit_train[-17], y = credit_train$default, trials = 10)
## 
## 
## C5.0 [Release 2.07 GPL Edition]      Tue Mar 23 00:54:28 2021
## -------------------------------
## 
## Class specified by attribute `outcome'
## 
## Read 900 cases (17 attributes) from undefined.data
## 
## -----  Trial 0:  -----
## 
## Decision tree:
## 
## checking_balance in {unknown,> 200 DM}: no (418/53)
## checking_balance in {< 0 DM,1 - 200 DM}:
## :...credit_history in {perfect,very good}:
##     :...dependents > 1: yes (10)
##     :   dependents <= 1:
##     :   :...savings_balance in {< 100 DM,100 - 500 DM,
##     :       :                   > 1000 DM}: yes (43/12)
##     :       savings_balance in {unknown,500 - 1000 DM}: no (8/1)
##     credit_history in {good,critical,poor}:
##     :...months_loan_duration <= 22:
##         :...employment_duration = 4 - 7 years: no (36/3)
##         :   employment_duration in {1 - 4 years,> 7 years,unemployed,< 1 year}:
##         :   :...purpose in {business,car0,renovations}: no (26/3)
##         :       purpose = education:
##         :       :...savings_balance in {< 100 DM,100 - 500 DM,500 - 1000 DM,
##         :       :   :                   > 1000 DM}: yes (6)
##         :       :   savings_balance = unknown: no (4)
##         :       purpose = furniture/appliances:
##         :       :...savings_balance = 100 - 500 DM: yes (3)
##         :       :   savings_balance in {500 - 1000 DM,
##         :       :   :                   > 1000 DM}: no (10/1)
##         :       :   savings_balance = unknown:
##         :       :   :...credit_history in {critical,poor}: no (3)
##         :       :   :   credit_history = good:
##         :       :   :   :...years_at_residence <= 3: yes (6/1)
##         :       :   :       years_at_residence > 3: no (2)
##         :       :   savings_balance = < 100 DM:
##         :       :   :...months_loan_duration <= 16: no (54/11)
##         :       :       months_loan_duration > 16:
##         :       :       :...checking_balance = 1 - 200 DM: no (9/2)
##         :       :           checking_balance = < 0 DM: [S1]
##         :       purpose = car:
##         :       :...other_credit = store: no (0)
##         :           other_credit = bank: yes (11/3)
##         :           other_credit = none:
##         :           :...credit_history in {critical,poor}: no (20/2)
##         :               credit_history = good:
##         :               :...years_at_residence <= 1: yes (4)
##         :                   years_at_residence > 1:
##         :                   :...housing = rent: no (8/2)
##         :                       housing = other: yes (5/1)
##         :                       housing = own:
##         :                       :...age <= 25: yes (4)
##         :                           age > 25: no (15/4)
##         months_loan_duration > 22:
##         :...savings_balance = 500 - 1000 DM: yes (3/1)
##             savings_balance = > 1000 DM: no (4/1)
##             savings_balance = 100 - 500 DM:
##             :...employment_duration in {1 - 4 years,< 1 year}: yes (13/2)
##             :   employment_duration in {> 7 years,unemployed,
##             :                           4 - 7 years}: no (12/3)
##             savings_balance = unknown:
##             :...checking_balance = < 0 DM: yes (12/4)
##             :   checking_balance = 1 - 200 DM: no (17/1)
##             savings_balance = < 100 DM:
##             :...months_loan_duration > 47: yes (19/2)
##                 months_loan_duration <= 47:
##                 :...housing = other:
##                     :...percent_of_income <= 2: no (5)
##                     :   percent_of_income > 2: yes (9/3)
##                     housing = rent:
##                     :...other_credit = store: yes (0)
##                     :   other_credit = bank: no (1)
##                     :   other_credit = none:
##                     :   :...percent_of_income > 2: yes (10/1)
##                     :       percent_of_income <= 2:
##                     :       :...years_at_residence <= 3: no (3)
##                     :           years_at_residence > 3: yes (2)
##                     housing = own:
##                     :...employment_duration = > 7 years: no (14/5)
##                         employment_duration = 4 - 7 years: yes (9/1)
##                         employment_duration = unemployed:
##                         :...years_at_residence <= 2: yes (4)
##                         :   years_at_residence > 2: no (3)
##                         employment_duration = 1 - 4 years:
##                         :...purpose in {furniture/appliances,
##                         :   :           renovations}: no (7)
##                         :   purpose in {car,business,education,car0}:
##                         :   :...years_at_residence <= 3: yes (10/1)
##                         :       years_at_residence > 3: no (2)
##                         employment_duration = < 1 year:
##                         :...years_at_residence > 3: yes (7)
##                             years_at_residence <= 3:
##                             :...other_credit = bank: no (0)
##                                 other_credit = store: yes (1)
##                                 other_credit = none:
##                                 :...checking_balance = < 0 DM: yes (6/2)
##                                     checking_balance = 1 - 200 DM: no (8/2)
## 
## SubTree [S1]
## 
## employment_duration in {1 - 4 years,unemployed}: yes (10)
## employment_duration in {> 7 years,< 1 year}: no (4)
## 
## -----  Trial 1:  -----
## 
## Decision tree:
## 
## months_loan_duration <= 7: no (66.2/8.5)
## months_loan_duration > 7:
## :...checking_balance = unknown:
##     :...other_credit in {bank,store}:
##     :   :...employment_duration = 4 - 7 years: no (7.1)
##     :   :   employment_duration in {1 - 4 years,> 7 years,unemployed,< 1 year}:
##     :   :   :...purpose in {car,business,education,renovations}: yes (38.1/8.7)
##     :   :       purpose in {furniture/appliances,car0}: no (21/6.8)
##     :   other_credit = none:
##     :   :...age > 31: no (131.7/9)
##     :       age <= 31:
##     :       :...amount > 6458: yes (15.2/4)
##     :           amount <= 6458:
##     :           :...credit_history in {perfect,critical,
##     :               :                  very good}: no (20.6)
##     :               credit_history in {good,poor}:
##     :               :...percent_of_income <= 1: no (5.5)
##     :                   percent_of_income > 1:
##     :                   :...age <= 23: yes (16.7/3.2)
##     :                       age > 23: no (51.4/15.8)
##     checking_balance in {< 0 DM,1 - 200 DM,> 200 DM}:
##     :...savings_balance in {unknown,500 - 1000 DM,> 1000 DM}:
##         :...purpose in {business,education}: no (15/1.6)
##         :   purpose in {car0,renovations}: yes (5.3/0.8)
##         :   purpose = car:
##         :   :...amount <= 1424: yes (10.7/0.8)
##         :   :   amount > 1424: no (21.8/5.4)
##         :   purpose = furniture/appliances:
##         :   :...housing in {own,rent}: no (50.7/11.6)
##         :       housing = other: yes (3.8/0.8)
##         savings_balance in {< 100 DM,100 - 500 DM}:
##         :...months_loan_duration > 42:
##             :...years_at_residence <= 1: no (5.3/0.8)
##             :   years_at_residence > 1: yes (33.6/5.3)
##             months_loan_duration <= 42:
##             :...other_credit = store: no (20.1/10.1)
##                 other_credit = bank:
##                 :...years_at_residence <= 1: no (15.4/0.8)
##                 :   years_at_residence > 1:
##                 :   :...percent_of_income > 3: yes (18.8/3.8)
##                 :       percent_of_income <= 3:
##                 :       :...years_at_residence <= 2: yes (2.3)
##                 :           years_at_residence > 2: no (27.7/6.3)
##                 other_credit = none:
##                 :...credit_history in {perfect,poor,very good}: yes (45/16.3)
##                     credit_history = critical:
##                     :...housing = other: no (7)
##                     :   housing in {own,rent}:
##                     :   :...percent_of_income <= 1: no (4.7)
##                     :       percent_of_income > 1:
##                     :       :...dependents > 1: no (6.3/0.8)
##                     :           dependents <= 1:
##                     :           :...percent_of_income <= 3: yes (20/5.5)
##                     :               percent_of_income > 3: no (28.1/9.3)
##                     credit_history = good:
##                     :...purpose in {business,car0}: no (10.2/3)
##                         purpose in {education,renovations}: yes (14/5.5)
##                         purpose = car:
##                         :...employment_duration in {1 - 4 years,> 7 years,
##                         :   :                       < 1 year,
##                         :   :                       4 - 7 years}: yes (49.6/13.5)
##                         :   employment_duration = unemployed: no (6.1/0.8)
##                         purpose = furniture/appliances:
##                         :...employment_duration in {> 7 years,
##                             :                       4 - 7 years}: no (23.4/7.7)
##                             employment_duration in {unemployed,
##                             :                       < 1 year}: yes (35.5/13.2)
##                             employment_duration = 1 - 4 years:
##                             :...savings_balance = 100 - 500 DM: yes (2.4)
##                                 savings_balance = < 100 DM:
##                                 :...phone = yes: no (6.3/1.6)
##                                     phone = no:
##                                     :...housing = rent: no (3.2/1.6)
##                                         housing = other: yes (0.8)
##                                         housing = own: [S1]
## 
## SubTree [S1]
## 
## checking_balance = < 0 DM: no (10.3/1.6)
## checking_balance in {1 - 200 DM,> 200 DM}: yes (22.9/7.1)
## 
## -----  Trial 2:  -----
## 
## Decision tree:
## 
## checking_balance = unknown:
## :...other_credit = bank:
## :   :...existing_loans_count > 2: no (3.7)
## :   :   existing_loans_count <= 2:
## :   :   :...housing = rent: yes (9.5/2.8)
## :   :       housing = other: no (7.7/1.8)
## :   :       housing = own:
## :   :       :...phone = no: no (20.8/8.5)
## :   :           phone = yes: yes (12.3/1.9)
## :   other_credit in {none,store}:
## :   :...credit_history in {critical,very good}: no (86.2/5.5)
## :       credit_history in {perfect,good,poor}:
## :       :...employment_duration in {> 7 years,4 - 7 years}: no (64.5/9.7)
## :           employment_duration in {1 - 4 years,unemployed,< 1 year}:
## :           :...dependents > 1: no (10.9/1.8)
## :               dependents <= 1:
## :               :...amount > 4455: yes (26.5/6)
## :                   amount <= 4455:
## :                   :...percent_of_income <= 1: no (7.1)
## :                       percent_of_income > 1:
## :                       :...savings_balance in {100 - 500 DM,unknown,
## :                           :                   > 1000 DM}: no (18.3/3)
## :                           savings_balance = 500 - 1000 DM: yes (7.3/1.3)
## :                           savings_balance = < 100 DM:
## :                           :...age <= 23: yes (7.4)
## :                               age > 23: no (30/7.8)
## checking_balance in {< 0 DM,1 - 200 DM,> 200 DM}:
## :...other_credit = store:
##     :...months_loan_duration <= 11: no (2.5)
##     :   months_loan_duration > 11: yes (29.9/8.3)
##     other_credit in {bank,none}:
##     :...amount > 10722: yes (26.6/5.3)
##         amount <= 10722:
##         :...checking_balance in {1 - 200 DM,> 200 DM}:
##             :...dependents <= 1: no (255.5/88.9)
##             :   dependents > 1:
##             :   :...housing = rent: no (3.7/1.5)
##             :       housing = other: yes (8/0.6)
##             :       housing = own:
##             :       :...months_loan_duration <= 13: no (10/0.6)
##             :           months_loan_duration > 13: yes (18.7/5.6)
##             checking_balance = < 0 DM:
##             :...savings_balance in {100 - 500 DM,> 1000 DM}: no (13.6/3.8)
##                 savings_balance = 500 - 1000 DM: yes (4.1/1.9)
##                 savings_balance = unknown:
##                 :...months_loan_duration <= 33: no (22.8/10.2)
##                 :   months_loan_duration > 33: yes (4.5)
##                 savings_balance = < 100 DM:
##                 :...job in {unskilled,management}: no (67.7/25.4)
##                     job = unemployed: yes (2.6/0.6)
##                     job = skilled:
##                     :...amount > 5771: yes (11.7/0.6)
##                         amount <= 5771:
##                         :...employment_duration in {> 7 years,< 1 year,
##                             :                       4 - 7 years}: yes (63/22.4)
##                             employment_duration = unemployed: no (3.8/1.3)
##                             employment_duration = 1 - 4 years:
##                             :...housing = other: yes (2.6)
##                                 housing in {own,rent}:
##                                 :...dependents > 1: no (3.2)
##                                     dependents <= 1:
##                                     :...amount > 4153: no (5.1)
##                                         amount <= 4153: [S1]
## 
## SubTree [S1]
## 
## months_loan_duration <= 16: no (11.9/4.3)
## months_loan_duration > 16: yes (16.3/4)
## 
## -----  Trial 3:  -----
## 
## Decision tree:
## 
## checking_balance = unknown:
## :...other_credit in {bank,store}:
## :   :...employment_duration = 4 - 7 years: no (5.5)
## :   :   employment_duration in {1 - 4 years,> 7 years,unemployed,< 1 year}:
## :   :   :...age > 44: no (11.8/1.5)
## :   :       age <= 44:
## :   :       :...years_at_residence <= 1: no (3.6)
## :   :           years_at_residence > 1: yes (49.5/16.5)
## :   other_credit = none:
## :   :...credit_history in {perfect,critical,very good}: no (71.2/4.8)
## :       credit_history in {good,poor}:
## :       :...existing_loans_count <= 1:
## :           :...credit_history = good: no (98.7/24.1)
## :           :   credit_history = poor: yes (8.6/2.7)
## :           existing_loans_count > 1:
## :           :...employment_duration in {1 - 4 years,> 7 years,unemployed,
## :               :                       < 1 year}: yes (32.5/11.8)
## :               employment_duration = 4 - 7 years: no (6.2)
## checking_balance in {< 0 DM,1 - 200 DM,> 200 DM}:
## :...employment_duration = 1 - 4 years:
##     :...credit_history in {perfect,critical,poor,very good}: no (78.4/31.9)
##     :   credit_history = good: yes (127.5/51.8)
##     employment_duration = unemployed:
##     :...years_at_residence <= 2: yes (20.6/4.4)
##     :   years_at_residence > 2: no (23.9/4.3)
##     employment_duration = < 1 year:
##     :...housing = rent: yes (38.9/10.5)
##     :   housing = other: no (6.2/1.7)
##     :   housing = own:
##     :   :...savings_balance in {unknown,500 - 1000 DM,> 1000 DM}: no (9.1)
##     :       savings_balance in {< 100 DM,100 - 500 DM}:
##     :       :...years_at_residence <= 1: no (35.8/12.5)
##     :           years_at_residence > 1: yes (51.3/17.5)
##     employment_duration = 4 - 7 years:
##     :...savings_balance in {100 - 500 DM,unknown}: no (30.2/8.1)
##     :   savings_balance in {500 - 1000 DM,> 1000 DM}: yes (6/1.1)
##     :   savings_balance = < 100 DM:
##     :   :...months_loan_duration <= 22: no (25.8/5.8)
##     :       months_loan_duration > 22:
##     :       :...months_loan_duration <= 40: yes (17.7/3.4)
##     :           months_loan_duration > 40: no (11.2/3.4)
##     employment_duration = > 7 years:
##     :...months_loan_duration > 33: yes (21.5/2.9)
##         months_loan_duration <= 33:
##         :...purpose in {business,education,car0}: yes (23.8/9)
##             purpose = renovations: no (1.8)
##             purpose = furniture/appliances:
##             :...years_at_residence <= 3: yes (16.8/6.3)
##             :   years_at_residence > 3: no (24.1/3.3)
##             purpose = car:
##             :...job in {unskilled,unemployed}: yes (6.8/0.5)
##                 job in {skilled,management}:
##                 :...checking_balance = > 200 DM: no (2.9)
##                     checking_balance in {< 0 DM,1 - 200 DM}:
##                     :...months_loan_duration <= 18: yes (17.6/2.3)
##                         months_loan_duration > 18: no (14.7/3.3)
## 
## -----  Trial 4:  -----
## 
## Decision tree:
## 
## checking_balance = unknown:
## :...employment_duration = > 7 years: no (75.9/17.1)
## :   employment_duration = unemployed: yes (15.8/7.2)
## :   employment_duration = 4 - 7 years:
## :   :...age <= 22: yes (8/3.2)
## :   :   age > 22: no (30.7)
## :   employment_duration = < 1 year:
## :   :...purpose = car0: no (0)
## :   :   purpose in {business,renovations}: yes (7.9)
## :   :   purpose in {car,furniture/appliances,education}:
## :   :   :...months_loan_duration <= 30: no (28.1/6.3)
## :   :       months_loan_duration > 30: yes (2.8)
## :   employment_duration = 1 - 4 years:
## :   :...months_loan_duration <= 9: no (13.6)
## :       months_loan_duration > 9:
## :       :...housing = rent: yes (14.6/4.2)
## :           housing in {own,other}:
## :           :...job = unemployed: no (0)
## :               job = management: yes (13.3/4.4)
## :               job in {skilled,unskilled}:
## :               :...purpose in {furniture/appliances,car0,
## :                   :           renovations}: no (17.8)
## :                   purpose in {car,business,education}:
## :                   :...amount <= 2255: yes (20.3/6.5)
## :                       amount > 2255:
## :                       :...credit_history in {perfect,critical}: yes (8.8/3.5)
## :                           credit_history in {good,poor,
## :                                              very good}: no (18.4)
## checking_balance in {< 0 DM,1 - 200 DM,> 200 DM}:
## :...credit_history in {perfect,very good}:
##     :...dependents > 1: yes (12.4)
##     :   dependents <= 1:
##     :   :...age <= 23: no (5.3)
##     :       age > 23:
##     :       :...housing = rent: yes (12.9/0.5)
##     :           housing in {own,other}:
##     :           :...other_credit = store: yes (3)
##     :               other_credit in {bank,none}:
##     :               :...months_loan_duration <= 10: yes (4.7)
##     :                   months_loan_duration > 10:
##     :                   :...percent_of_income <= 3: no (24.7/7.7)
##     :                       percent_of_income > 3: yes (17/5.5)
##     credit_history in {good,critical,poor}:
##     :...amount > 7980: yes (37.5/9.3)
##         amount <= 7980:
##         :...months_loan_duration <= 8: no (39.1/7)
##             months_loan_duration > 8:
##             :...purpose = education: yes (24.2/10.1)
##                 purpose in {car0,renovations}: no (17.2/5)
##                 purpose = business:
##                 :...age <= 53: no (32.9/9.2)
##                 :   age > 53: yes (2.5)
##                 purpose = car:
##                 :...percent_of_income <= 2:
##                 :   :...amount <= 1386: yes (5.7/0.5)
##                 :   :   amount > 1386: no (40.3/6.3)
##                 :   percent_of_income > 2:
##                 :   :...years_at_residence <= 2:
##                 :       :...percent_of_income <= 3: yes (14.3/1.5)
##                 :       :   percent_of_income > 3:
##                 :       :   :...phone = no: yes (28.1/5.5)
##                 :       :       phone = yes: no (6.9/2)
##                 :       years_at_residence > 2:
##                 :       :...months_loan_duration <= 15: no (19.8/4.8)
##                 :           months_loan_duration > 15:
##                 :           :...employment_duration in {unemployed,
##                 :               :                       < 1 year}: yes (6.2)
##                 :               employment_duration in {1 - 4 years,> 7 years,
##                 :               :                       4 - 7 years}:
##                 :               :...amount <= 1680: yes (10.5/1)
##                 :                   amount > 1680: no (19.3/5.8)
##                 purpose = furniture/appliances:
##                 :...savings_balance = 100 - 500 DM: yes (18.2/5.1)
##                     savings_balance in {500 - 1000 DM,
##                     :                   > 1000 DM}: no (16.1/4.7)
##                     savings_balance = unknown:
##                     :...job in {skilled,management,unemployed}: yes (28.9/11.5)
##                     :   job = unskilled: no (3.8)
##                     savings_balance = < 100 DM:
##                     :...phone = yes:
##                         :...other_credit in {bank,store}: no (1.9)
##                         :   other_credit = none:
##                         :   :...checking_balance in {< 0 DM,
##                         :       :                    1 - 200 DM}: yes (32/11.1)
##                         :       checking_balance = > 200 DM: no (2.9)
##                         phone = no:
##                         :...other_credit = bank: no (11.7/4.6)
##                             other_credit = store: yes (10.8/3.6)
##                             other_credit = none:
##                             :...job in {skilled,management}: no (76.3/26.7)
##                                 job = unemployed: yes (0.5)
##                                 job = unskilled:
##                                 :...months_loan_duration > 27: yes (3)
##                                     months_loan_duration <= 27: [S1]
## 
## SubTree [S1]
## 
## checking_balance in {< 0 DM,1 - 200 DM}: no (26.4/5.7)
## checking_balance = > 200 DM: yes (7/1.7)
## 
## -----  Trial 5:  -----
## 
## Decision tree:
## 
## checking_balance in {unknown,> 200 DM}:
## :...other_credit = store: no (19.3/6.7)
## :   other_credit = bank:
## :   :...job in {skilled,management,unemployed}: no (38.8/13.9)
## :   :   job = unskilled: yes (18.4/6.1)
## :   other_credit = none:
## :   :...age > 34: no (108/15.5)
## :       age <= 34:
## :       :...savings_balance = 500 - 1000 DM: yes (10.5/3.1)
## :           savings_balance = > 1000 DM: no (8.1)
## :           savings_balance in {< 100 DM,100 - 500 DM,unknown}:
## :           :...credit_history in {perfect,very good}: yes (5.6/1.6)
## :               credit_history = poor: no (18.4/8)
## :               credit_history = critical:
## :               :...housing = own: no (20.2)
## :               :   housing in {rent,other}: yes (10.7/3.3)
## :               credit_history = good:
## :               :...purpose in {car,business,education,car0}: no (31.7/4.9)
## :                   purpose = renovations: yes (1.6)
## :                   purpose = furniture/appliances:
## :                   :...savings_balance = unknown: no (4.1)
## :                       savings_balance in {< 100 DM,100 - 500 DM}:
## :                       :...existing_loans_count <= 1: no (39.4/17.7)
## :                           existing_loans_count > 1: yes (7/0.4)
## checking_balance in {< 0 DM,1 - 200 DM}:
## :...savings_balance in {500 - 1000 DM,> 1000 DM}: no (33.3/14.8)
##     savings_balance = unknown:
##     :...credit_history in {perfect,critical,poor}: no (16.5)
##     :   credit_history in {good,very good}:
##     :   :...checking_balance = < 0 DM: yes (26.1/8.3)
##     :       checking_balance = 1 - 200 DM: no (33.5/10)
##     savings_balance = 100 - 500 DM:
##     :...job = unemployed: no (0)
##     :   job in {unskilled,management}: yes (14.2/3.9)
##     :   job = skilled:
##     :   :...credit_history in {critical,poor}: no (13.2/0.4)
##     :       credit_history in {perfect,good,very good}:
##     :       :...housing in {own,other}: no (23.7/10.3)
##     :           housing = rent: yes (7.2)
##     savings_balance = < 100 DM:
##     :...months_loan_duration > 47: yes (27.5/4.4)
##         months_loan_duration <= 47:
##         :...housing = other:
##             :...existing_loans_count <= 2: yes (29.2/12.4)
##             :   existing_loans_count > 2: no (2.3)
##             housing = rent:
##             :...checking_balance = 1 - 200 DM: no (23.6/9.5)
##             :   checking_balance = < 0 DM:
##             :   :...phone = yes: yes (18.6/1.6)
##             :       phone = no:
##             :       :...years_at_residence <= 3: no (7/0.8)
##             :           years_at_residence > 3: yes (27.1/8.9)
##             housing = own:
##             :...job = unemployed: no (2.4/0.8)
##                 job = management:
##                 :...years_at_residence <= 1: no (4.4)
##                 :   years_at_residence > 1: yes (25.1/9.6)
##                 job = unskilled:
##                 :...employment_duration in {> 7 years,unemployed,
##                 :   :                       4 - 7 years}: no (18/4.1)
##                 :   employment_duration = < 1 year: yes (23.6/10.2)
##                 :   employment_duration = 1 - 4 years:
##                 :   :...existing_loans_count > 2: yes (2.2)
##                 :       existing_loans_count <= 2:
##                 :       :...amount <= 672: yes (3.9)
##                 :           amount > 672: no (26.9/5.7)
##                 job = skilled:
##                 :...purpose in {business,education,
##                     :           renovations}: yes (25.4/7.6)
##                     purpose = car0: no (0.4)
##                     purpose = car:
##                     :...age <= 48: yes (28.7/11.1)
##                     :   age > 48: no (5.4)
##                     purpose = furniture/appliances:
##                     :...credit_history in {perfect,critical,
##                         :                  poor}: no (20.5/6.6)
##                         credit_history = very good: yes (3.5)
##                         credit_history = good:
##                         :...months_loan_duration <= 7: no (5.3)
##                             months_loan_duration > 7:
##                             :...age > 46: no (3.2)
##                                 age <= 46: [S1]
## 
## SubTree [S1]
## 
## checking_balance = 1 - 200 DM: yes (25.5/7.1)
## checking_balance = < 0 DM:
## :...months_loan_duration <= 16: no (6)
##     months_loan_duration > 16: yes (24.7/8.9)
## 
## -----  Trial 6:  -----
## 
## Decision tree:
## 
## checking_balance = unknown:
## :...credit_history in {perfect,critical,very good}: no (96.9/23.9)
## :   credit_history in {good,poor}:
## :   :...purpose in {car,car0}: no (51.9/10.3)
## :       purpose in {business,education,renovations}: yes (39.3/15.5)
## :       purpose = furniture/appliances:
## :       :...amount > 3275: yes (27.2/9.8)
## :           amount <= 3275:
## :           :...age <= 22: yes (11/2.1)
## :               age > 22: no (38.6/6.8)
## checking_balance in {< 0 DM,1 - 200 DM,> 200 DM}:
## :...credit_history in {perfect,very good}: yes (73.4/27.7)
##     credit_history = poor:
##     :...percent_of_income <= 1: no (6.7)
##     :   percent_of_income > 1:
##     :   :...age <= 46: no (26.8/10.3)
##     :       age > 46: yes (11.2/0.7)
##     credit_history = critical:
##     :...dependents > 1: no (22.3/5.2)
##     :   dependents <= 1:
##     :   :...savings_balance in {100 - 500 DM,unknown,> 1000 DM}: no (20.7/2.9)
##     :       savings_balance = 500 - 1000 DM: yes (6/1)
##     :       savings_balance = < 100 DM:
##     :       :...amount <= 1050: no (8.2)
##     :           amount > 1050:
##     :           :...housing = rent: yes (16.6/2.4)
##     :               housing = other: no (6.7/1.8)
##     :               housing = own:
##     :               :...years_at_residence <= 1: no (3.4)
##     :                   years_at_residence > 1:
##     :                   :...age <= 32: yes (24.8/4.8)
##     :                       age > 32: no (21.8/7.5)
##     credit_history = good:
##     :...amount > 8648: yes (21.6/4.6)
##         amount <= 8648:
##         :...job in {management,unemployed}: no (46.6/10.9)
##             job in {skilled,unskilled}:
##             :...months_loan_duration > 33: yes (41.8/13.2)
##                 months_loan_duration <= 33:
##                 :...purpose in {business,education,car0,
##                     :           renovations}: no (34.2/9.4)
##                     purpose = car:
##                     :...percent_of_income > 2: yes (46.4/15.1)
##                     :   percent_of_income <= 2:
##                     :   :...age <= 42: no (23.3/5.2)
##                     :       age > 42: yes (5)
##                     purpose = furniture/appliances:
##                     :...months_loan_duration <= 7: no (8.2)
##                         months_loan_duration > 7:
##                         :...employment_duration in {> 7 years,
##                             :                       4 - 7 years}: no (35.3/8.6)
##                             employment_duration = unemployed: yes (5.4)
##                             employment_duration = < 1 year:
##                             :...phone = no: no (39.9/13.3)
##                             :   phone = yes: yes (8.2/2.5)
##                             employment_duration = 1 - 4 years:
##                             :...housing in {rent,other}: yes (8.2/2)
##                                 housing = own:
##                                 :...months_loan_duration > 22: no (10.5/1.9)
##                                     months_loan_duration <= 22:
##                                     :...months_loan_duration > 15: yes (13.9/3.5)
##                                         months_loan_duration <= 15:
##                                         :...phone = yes: no (6.8/2)
##                                             phone = no: [S1]
## 
## SubTree [S1]
## 
## checking_balance = < 0 DM: no (5.3)
## checking_balance in {1 - 200 DM,> 200 DM}: yes (26.2/10)
## 
## -----  Trial 7:  -----
## 
## Decision tree:
## 
## checking_balance in {unknown,> 200 DM}:
## :...savings_balance = > 1000 DM: no (13.6)
## :   savings_balance in {< 100 DM,100 - 500 DM,unknown,500 - 1000 DM}:
## :   :...other_credit = none: no (237.6/69.1)
## :       other_credit in {bank,store}:
## :       :...existing_loans_count > 2: no (4.3)
## :           existing_loans_count <= 2:
## :           :...employment_duration in {> 7 years,unemployed}: yes (30.9/11.6)
## :               employment_duration in {< 1 year,4 - 7 years}: no (18.4/5.9)
## :               employment_duration = 1 - 4 years:
## :               :...years_at_residence <= 1: no (3)
## :                   years_at_residence > 1: yes (24/6.1)
## checking_balance in {< 0 DM,1 - 200 DM}:
## :...employment_duration = unemployed:
##     :...years_at_residence <= 2: yes (20.9/5)
##     :   years_at_residence > 2: no (23.4/4.1)
##     employment_duration = 4 - 7 years:
##     :...months_loan_duration <= 22: no (39/7.5)
##     :   months_loan_duration > 22:
##     :   :...dependents > 1: no (11.4/2.2)
##     :       dependents <= 1:
##     :       :...housing in {rent,other}: no (12.1/4.9)
##     :           housing = own:
##     :           :...credit_history in {perfect,good,critical,
##     :               :                  poor}: yes (22.8/4.6)
##     :               credit_history = very good: no (4.3)
##     employment_duration = > 7 years:
##     :...months_loan_duration > 33: yes (18.8/1.5)
##     :   months_loan_duration <= 33:
##     :   :...savings_balance in {100 - 500 DM,500 - 1000 DM}: no (6.7)
##     :       savings_balance in {< 100 DM,unknown,> 1000 DM}:
##     :       :...amount <= 776: no (7.9)
##     :           amount > 776:
##     :           :...existing_loans_count > 2: no (6.2/1.2)
##     :               existing_loans_count <= 2:
##     :               :...years_at_residence <= 3:
##     :                   :...months_loan_duration <= 24: yes (24.8/3.7)
##     :                   :   months_loan_duration > 24: no (2.8)
##     :                   years_at_residence > 3:
##     :                   :...savings_balance in {unknown,
##     :                       :                   > 1000 DM}: no (11.9/3.9)
##     :                       savings_balance = < 100 DM:
##     :                       :...purpose in {car,business,
##     :                           :           education}: yes (21.7/2.5)
##     :                           purpose in {furniture/appliances,car0,
##     :                                       renovations}: no (12.1/1)
##     employment_duration = < 1 year:
##     :...housing = rent: yes (34/9.6)
##     :   housing in {own,other}:
##     :   :...savings_balance in {unknown,> 1000 DM}: no (8.3)
##     :       savings_balance in {< 100 DM,100 - 500 DM,500 - 1000 DM}:
##     :       :...purpose in {business,car0,renovations}: no (4.4/0.6)
##     :           purpose = education: yes (4.6/1.4)
##     :           purpose = car:
##     :           :...housing = other: yes (1.7)
##     :           :   housing = own:
##     :           :   :...age <= 25: yes (4.3)
##     :           :       age > 25: no (24.2/5.8)
##     :           purpose = furniture/appliances:
##     :           :...dependents > 1: yes (2.6)
##     :               dependents <= 1:
##     :               :...housing = other: no (4.1)
##     :                   housing = own:
##     :                   :...phone = yes: yes (7.1/1.2)
##     :                       phone = no:
##     :                       :...existing_loans_count <= 1: no (22.7/9)
##     :                           existing_loans_count > 1: yes (5.2/0.8)
##     employment_duration = 1 - 4 years:
##     :...housing = other: yes (10.6/2.9)
##         housing = rent:
##         :...years_at_residence <= 3: no (14.7/4.2)
##         :   years_at_residence > 3:
##         :   :...other_credit = bank: no (5.8/1.2)
##         :       other_credit in {none,store}: yes (21.3/4.3)
##         housing = own:
##         :...purpose in {business,education}: no (24.6/10.7)
##             purpose in {car0,renovations}: yes (12.8/3.9)
##             purpose = car:
##             :...amount <= 1123: yes (5.2)
##             :   amount > 1123: no (24/6.8)
##             purpose = furniture/appliances:
##             :...dependents > 1: no (5.9)
##                 dependents <= 1:
##                 :...other_credit = store: no (6.3)
##                     other_credit in {bank,none}:
##                     :...savings_balance in {100 - 500 DM,unknown,
##                         :                   500 - 1000 DM}: yes (16.4/3.7)
##                         savings_balance = > 1000 DM: no (1.2)
##                         savings_balance = < 100 DM:
##                         :...existing_loans_count > 1: no (8.2/0.6)
##                             existing_loans_count <= 1:
##                             :...months_loan_duration > 30: yes (3.8)
##                                 months_loan_duration <= 30:
##                                 :...other_credit = bank: yes (2.8/0.6)
##                                     other_credit = none: no (32.4/12.3)
## 
## -----  Trial 8:  -----
## 
## Decision tree:
## 
## months_loan_duration <= 7: no (47.6/7.8)
## months_loan_duration > 7:
## :...savings_balance in {unknown,> 1000 DM}:
##     :...checking_balance in {unknown,> 200 DM}: no (63.9/12)
##     :   checking_balance = 1 - 200 DM:
##     :   :...other_credit in {bank,none}: no (51/15)
##     :   :   other_credit = store: yes (2.2)
##     :   checking_balance = < 0 DM:
##     :   :...savings_balance = > 1000 DM: no (2.2)
##     :       savings_balance = unknown:
##     :       :...employment_duration = unemployed: yes (0)
##     :           employment_duration = < 1 year: no (5.2)
##     :           employment_duration in {1 - 4 years,> 7 years,4 - 7 years}:
##     :           :...phone = no: yes (15/2.5)
##     :               phone = yes: no (14/5.6)
##     savings_balance in {< 100 DM,100 - 500 DM,500 - 1000 DM}:
##     :...checking_balance = unknown:
##         :...age > 44: no (18.8)
##         :   age <= 44:
##         :   :...age <= 23:
##         :       :...savings_balance in {< 100 DM,500 - 1000 DM}: yes (23.5/4.1)
##         :       :   savings_balance = 100 - 500 DM: no (3.5)
##         :       age > 23:
##         :       :...employment_duration = 4 - 7 years: no (12.9)
##         :           employment_duration in {1 - 4 years,> 7 years,unemployed,
##         :           :                       < 1 year}:
##         :           :...other_credit = store: no (12.6/6.2)
##         :               other_credit = bank:
##         :               :...age <= 34: no (22.7/8.5)
##         :               :   age > 34: yes (11.6)
##         :               other_credit = none:
##         :               :...age > 31: no (20.6)
##         :                   age <= 31:
##         :                   :...dependents > 1: no (2.1)
##         :                       dependents <= 1:
##         :                       :...credit_history = very good: yes (0)
##         :                           credit_history in {perfect,
##         :                           :                  critical}: no (5)
##         :                           credit_history in {good,poor}:
##         :                           :...amount <= 1107: no (4.6)
##         :                               amount > 1107:
##         :                               :...age <= 25: no (3.1)
##         :                                   age > 25: yes (26.8/7)
##         checking_balance in {< 0 DM,1 - 200 DM,> 200 DM}:
##         :...years_at_residence <= 1:
##             :...other_credit = bank: no (14.6/1.4)
##             :   other_credit in {none,store}:
##             :   :...job = management: no (6.6/0.5)
##             :       job = unemployed: yes (5.9)
##             :       job in {skilled,unskilled}:
##             :       :...housing in {rent,other}: yes (11.2/2.2)
##             :           housing = own:
##             :           :...checking_balance = > 200 DM: no (9.9/2.1)
##             :               checking_balance in {< 0 DM,1 - 200 DM}:
##             :               :...other_credit = store: yes (4.9/1.5)
##             :                   other_credit = none:
##             :                   :...checking_balance = < 0 DM: yes (19.7/7.9)
##             :                       checking_balance = 1 - 200 DM: no (19.7/5.3)
##             years_at_residence > 1:
##             :...months_loan_duration > 26: yes (117/37.6)
##                 months_loan_duration <= 26:
##                 :...credit_history in {perfect,very good}: yes (39.8/12.9)
##                     credit_history = poor: no (17.6/7.7)
##                     credit_history = good:
##                     :...dependents > 1: yes (22.1/8)
##                     :   dependents <= 1:
##                     :   :...purpose in {car,car0}: yes (50.6/23.5)
##                     :       purpose in {business,education,
##                     :       :           renovations}: no (20.3/5)
##                     :       purpose = furniture/appliances: [S1]
##                     credit_history = critical:
##                     :...housing = other: no (8.1)
##                         housing in {own,rent}:
##                         :...other_credit = bank: yes (14.2/4)
##                             other_credit = store: no (1)
##                             other_credit = none:
##                             :...existing_loans_count <= 1: no (9.5)
##                                 existing_loans_count > 1:
##                                 :...percent_of_income <= 1: no (3.8)
##                                     percent_of_income > 1: [S2]
## 
## SubTree [S1]
## 
## employment_duration in {1 - 4 years,unemployed,< 1 year}: yes (75.5/32.9)
## employment_duration in {> 7 years,4 - 7 years}: no (14.5/2.9)
## 
## SubTree [S2]
## 
## checking_balance = > 200 DM: yes (2.6)
## checking_balance in {< 0 DM,1 - 200 DM}:
## :...employment_duration in {unemployed,4 - 7 years}: no (5.8)
##     employment_duration in {1 - 4 years,> 7 years,< 1 year}:
##     :...percent_of_income <= 3: yes (12/1.8)
##         percent_of_income > 3: no (15.4/6.5)
## 
## -----  Trial 9:  -----
## 
## Decision tree:
## 
## checking_balance = unknown:
## :...age > 44: no (31.6)
## :   age <= 44:
## :   :...months_loan_duration <= 8: no (12.7)
## :       months_loan_duration > 8:
## :       :...other_credit = bank:
## :           :...percent_of_income <= 3: yes (29.5/8)
## :           :   percent_of_income > 3: no (12/3)
## :           other_credit in {none,store}:
## :           :...age > 37: no (13.5)
## :               age <= 37:
## :               :...credit_history = perfect: yes (4.2/1.1)
## :                   credit_history in {critical,very good}: no (22.2/2.8)
## :                   credit_history = poor:
## :                   :...percent_of_income <= 3: no (13/1.8)
## :                   :   percent_of_income > 3: yes (12.9/2.7)
## :                   credit_history = good:
## :                   :...phone = yes: no (28/3.5)
## :                       phone = no:
## :                       :...other_credit = store: no (1.1)
## :                           other_credit = none:
## :                           :...months_loan_duration > 30: no (3.5)
## :                               months_loan_duration <= 30:
## :                               :...percent_of_income <= 1: no (3.2)
## :                                   percent_of_income > 1: yes (33.7/13.8)
## checking_balance in {< 0 DM,1 - 200 DM,> 200 DM}:
## :...credit_history = perfect:
##     :...percent_of_income > 3: yes (8)
##     :   percent_of_income <= 3:
##     :   :...housing = own: no (24.3/8.1)
##     :       housing in {rent,other}: yes (4.3)
##     credit_history = critical:
##     :...age > 33: no (67/14.7)
##     :   age <= 33:
##     :   :...savings_balance in {< 100 DM,500 - 1000 DM,
##     :       :                   > 1000 DM}: yes (48.1/17)
##     :       savings_balance in {100 - 500 DM,unknown}: no (9.4)
##     credit_history = poor:
##     :...percent_of_income <= 1: no (5.5)
##     :   percent_of_income > 1:
##     :   :...savings_balance = 500 - 1000 DM: yes (0)
##     :       savings_balance in {100 - 500 DM,unknown}: no (16.2/2.3)
##     :       savings_balance in {< 100 DM,> 1000 DM}:
##     :       :...months_loan_duration <= 13: no (4.5)
##     :           months_loan_duration > 13: yes (28.4/4.2)
##     credit_history = very good:
##     :...dependents > 1: yes (6.8)
##     :   dependents <= 1:
##     :   :...age <= 23: no (8.4)
##     :       age > 23:
##     :       :...age <= 29: yes (9.4/0.6)
##     :           age > 29: no (23.8/10.4)
##     credit_history = good:
##     :...savings_balance = 500 - 1000 DM: no (13.4/1.2)
##         savings_balance in {< 100 DM,100 - 500 DM,unknown,> 1000 DM}:
##         :...job = unemployed: yes (6.1/2.3)
##             job = management:
##             :...savings_balance = > 1000 DM: no (0)
##             :   savings_balance = unknown: yes (8.7/1.9)
##             :   savings_balance in {< 100 DM,100 - 500 DM}:
##             :   :...amount <= 7596: no (34.6/5.2)
##             :       amount > 7596: yes (11/3)
##             job = unskilled:
##             :...checking_balance = 1 - 200 DM:
##             :   :...months_loan_duration <= 21: no (27.9/1.7)
##             :   :   months_loan_duration > 21: yes (4.6/1.1)
##             :   checking_balance in {< 0 DM,> 200 DM}:
##             :   :...months_loan_duration > 36: no (3.3)
##             :       months_loan_duration <= 36:
##             :       :...months_loan_duration > 26: yes (4.5)
##             :           months_loan_duration <= 26:
##             :           :...dependents > 1: yes (10.2/2)
##             :               dependents <= 1:
##             :               :...months_loan_duration <= 9: yes (3.6)
##             :                   months_loan_duration > 9: no (33.3/10.8)
##             job = skilled:
##             :...checking_balance = > 200 DM: no (23.1/6.1)
##                 checking_balance in {< 0 DM,1 - 200 DM}:
##                 :...purpose in {business,car0}: yes (9.3/3)
##                     purpose in {education,renovations}: no (10.7/3.3)
##                     purpose = car:
##                     :...other_credit = bank: no (5.6/0.7)
##                     :   other_credit = store: yes (1.1)
##                     :   other_credit = none:
##                     :   :...age > 32: yes (18.5/3.4)
##                     :       age <= 32:
##                     :       :...percent_of_income <= 1: no (7)
##                     :           percent_of_income > 1:
##                     :           :...age <= 25: yes (5)
##                     :               age > 25: no (27.9/9.7)
##                     purpose = furniture/appliances:
##                     :...housing = rent: yes (21.8/6.3)
##                         housing = other: no (5.7/1.6)
##                         housing = own:
##                         :...employment_duration in {> 7 years,
##                             :                       unemployed}: yes (19.5/5.5)
##                             employment_duration in {< 1 year,
##                             :                       4 - 7 years}: no (32.6/15.6)
##                             employment_duration = 1 - 4 years:
##                             :...dependents > 1: no (2.3)
##                                 dependents <= 1:
##                                 :...other_credit in {bank,store}: no (4.3/0.5)
##                                     other_credit = none:
##                                     :...percent_of_income <= 3: yes (24.5/9.1)
##                                         percent_of_income > 3: no (14.7/3.4)
## 
## 
## Evaluation on training data (900 cases):
## 
## Trial        Decision Tree   
## -----      ----------------  
##    Size      Errors  
## 
##    0     48  128(14.2%)
##    1     41  179(19.9%)
##    2     36  200(22.2%)
##    3     32  244(27.1%)
##    4     50  195(21.7%)
##    5     49  192(21.3%)
##    6     37  194(21.6%)
##    7     51  176(19.6%)
##    8     46  188(20.9%)
##    9     57  201(22.3%)
## boost             44( 4.9%)   <<
## 
## 
##     (a)   (b)    <-classified as
##    ----  ----
##     626     7    (a): class no
##      37   230    (b): class yes
## 
## 
##  Attribute usage:
## 
##  100.00% checking_balance
##  100.00% months_loan_duration
##  100.00% credit_history
##  100.00% savings_balance
##  100.00% employment_duration
##  100.00% other_credit
##   85.33% purpose
##   82.89% amount
##   80.11% age
##   73.11% housing
##   67.67% job
##   67.00% dependents
##   57.44% years_at_residence
##   50.56% percent_of_income
##   49.89% existing_loans_count
##   39.33% phone
## 
## 
## Time: 0.1 secs

900件中856件当たっており, 予測精度は約95%とかなり上がった. (ただし偽陽性は4増えてしまった)

## テストデータでの評価
credit_pred10 <- predict(credit_model10, credit_test)
CrossTable(credit_test$default, credit_pred10,
           prop.chisq = FALSE, prop.c = FALSE, prop.t = FALSE,
           dnn = c("actual default", "predicted default"))
## 
##  
##    Cell Contents
## |-------------------------|
## |                       N |
## |           N / Row Total |
## |-------------------------|
## 
##  
## Total Observations in Table:  100 
## 
##  
##                | predicted default 
## actual default |        no |       yes | Row Total | 
## ---------------|-----------|-----------|-----------|
##             no |        57 |        10 |        67 | 
##                |     0.851 |     0.149 |     0.670 | 
## ---------------|-----------|-----------|-----------|
##            yes |        17 |        16 |        33 | 
##                |     0.515 |     0.485 |     0.330 | 
## ---------------|-----------|-----------|-----------|
##   Column Total |        74 |        26 |       100 | 
## ---------------|-----------|-----------|-----------|
## 
## 

今回は精度が下がってしまった(79%→73%)

本当はyes(焦げ付く)なのにnoと分類してしまうのが一番リスキーなので, ペナルティを重くする

# モデルの性能を向上させる
## 後分類のコストを指定する
### 行列を作成
matrix_dimensions <- list(c("no", "yes"), c("no", "yes"))
matrix_names <- c("actual", "predict")
names(matrix_dimensions) <- matrix_names
matrix_dimensions
## $actual
## [1] "no"  "yes"
## 
## $predict
## [1] "no"  "yes"
### ペナルティを割り当てる
error_costs <- matrix(c(0, 1, 4, 0), nrow = 2, dimnames = matrix_dimensions)
## 予測
credit_cost <- C5.0(credit_train[-17], credit_train$default, 
                    costs = error_costs)
credit_cost_pred <- predict(credit_cost, credit_test)
CrossTable(credit_test$default, credit_cost_pred,
           prop.chisq = FALSE, prop.c = FALSE, prop.t = FALSE,
           dnn = c("actual default", "predicted default"))
## 
##  
##    Cell Contents
## |-------------------------|
## |                       N |
## |           N / Row Total |
## |-------------------------|
## 
##  
## Total Observations in Table:  100 
## 
##  
##                | predicted default 
## actual default |        no |       yes | Row Total | 
## ---------------|-----------|-----------|-----------|
##             no |        34 |        33 |        67 | 
##                |     0.507 |     0.493 |     0.670 | 
## ---------------|-----------|-----------|-----------|
##            yes |         8 |        25 |        33 | 
##                |     0.242 |     0.758 |     0.330 | 
## ---------------|-----------|-----------|-----------|
##   Column Total |        42 |        58 |       100 | 
## ---------------|-----------|-----------|-----------|
## 
## 

全体の正解率は下がってしまったが, 債務不履行の予測精度は上がった(25/33).

分類ルール学習器

毒キノコか食べれるキノコかを分類する
ここでは全種類がカバーされていると仮定するため, 訓練データとテストデータにわける必要がなく, このデータを正しく分類できればよい

# データ収集
mushrooms <- read.csv("mushrooms.csv", stringsAsFactors = TRUE) %>% 
  select(!veil_type)
table(mushrooms$type)
## 
##    edible poisonous 
##      4208      3916

1R分類器では説明変数として最も重要度の高い特徴量を一つ選択し, ルールを作成する

# モデルを訓練する
library(OneR)
mushrooms_1R <- OneR(formula = type ~ ., data = mushrooms)
mushrooms_1R
## 
## Call:
## OneR.formula(formula = type ~ ., data = mushrooms)
## 
## Rules:
## If odor = almond   then type = edible
## If odor = anise    then type = edible
## If odor = creosote then type = poisonous
## If odor = fishy    then type = poisonous
## If odor = foul     then type = poisonous
## If odor = musty    then type = poisonous
## If odor = none     then type = edible
## If odor = pungent  then type = poisonous
## If odor = spicy    then type = poisonous
## 
## Accuracy:
## 8004 of 8124 instances classified correctly (98.52%)
mushrooms_1R_pred <- predict(mushrooms_1R, mushrooms)
table(actual = mushrooms$type, predicted = mushrooms_1R_pred)
##            predicted
## actual      edible poisonous
##   edible      4208         0
##   poisonous    120      3796

98.52%正しく分類できているが, それでも120種類は誤って分類されている. しかも実際は独なのに食べられると予測されている.
より高性能なJRip関数を用いる

# モデルの性能を評価する
library(RWeka)
## 
## Attaching package: 'RWeka'
## The following object is masked from 'package:OneR':
## 
##     OneR
mushrooms_JRip <- JRip(formula = type ~., data = mushrooms)
mushrooms_JRip
## JRIP rules:
## ===========
## 
## (odor = foul) => type=poisonous (2160.0/0.0)
## (gill_size = narrow) and (gill_color = buff) => type=poisonous (1152.0/0.0)
## (gill_size = narrow) and (odor = pungent) => type=poisonous (256.0/0.0)
## (odor = creosote) => type=poisonous (192.0/0.0)
## (spore_print_color = green) => type=poisonous (72.0/0.0)
## (stalk_surface_below_ring = scaly) and (stalk_surface_above_ring = silky) => type=poisonous (68.0/0.0)
## (habitat = leaves) and (cap_color = white) => type=poisonous (8.0/0.0)
## (stalk_color_above_ring = yellow) => type=poisonous (8.0/0.0)
##  => type=edible (4208.0/0.0)
## 
## Number of Rules : 9
mushrooms_JRip_pred <- predict(mushrooms_JRip, mushrooms)
table(actual = mushrooms$type, predicted = mushrooms_JRip_pred)
##            predicted
## actual      edible poisonous
##   edible      4208         0
##   poisonous      0      3916

全て正しく分類できた