## 'data.frame': 569 obs. of 32 variables:
## $ id : int 842302 842517 84300903 84348301 84358402 843786 844359 84458202 844981 84501001 ...
## $ diagnosis : chr "M" "M" "M" "M" ...
## $ radius_mean : num 18 20.6 19.7 11.4 20.3 ...
## $ texture_mean : num 10.4 17.8 21.2 20.4 14.3 ...
## $ perimeter_mean : num 122.8 132.9 130 77.6 135.1 ...
## $ area_mean : num 1001 1326 1203 386 1297 ...
## $ smoothness_mean : num 0.1184 0.0847 0.1096 0.1425 0.1003 ...
## $ compactness_mean : num 0.2776 0.0786 0.1599 0.2839 0.1328 ...
## $ concavity_mean : num 0.3001 0.0869 0.1974 0.2414 0.198 ...
## $ concave.points_mean : num 0.1471 0.0702 0.1279 0.1052 0.1043 ...
## $ symmetry_mean : num 0.242 0.181 0.207 0.26 0.181 ...
## $ fractal_dimension_mean : num 0.0787 0.0567 0.06 0.0974 0.0588 ...
## $ radius_se : num 1.095 0.543 0.746 0.496 0.757 ...
## $ texture_se : num 0.905 0.734 0.787 1.156 0.781 ...
## $ perimeter_se : num 8.59 3.4 4.58 3.44 5.44 ...
## $ area_se : num 153.4 74.1 94 27.2 94.4 ...
## $ smoothness_se : num 0.0064 0.00522 0.00615 0.00911 0.01149 ...
## $ compactness_se : num 0.049 0.0131 0.0401 0.0746 0.0246 ...
## $ concavity_se : num 0.0537 0.0186 0.0383 0.0566 0.0569 ...
## $ concave.points_se : num 0.0159 0.0134 0.0206 0.0187 0.0188 ...
## $ symmetry_se : num 0.03 0.0139 0.0225 0.0596 0.0176 ...
## $ fractal_dimension_se : num 0.00619 0.00353 0.00457 0.00921 0.00511 ...
## $ radius_worst : num 25.4 25 23.6 14.9 22.5 ...
## $ texture_worst : num 17.3 23.4 25.5 26.5 16.7 ...
## $ perimeter_worst : num 184.6 158.8 152.5 98.9 152.2 ...
## $ area_worst : num 2019 1956 1709 568 1575 ...
## $ smoothness_worst : num 0.162 0.124 0.144 0.21 0.137 ...
## $ compactness_worst : num 0.666 0.187 0.424 0.866 0.205 ...
## $ concavity_worst : num 0.712 0.242 0.45 0.687 0.4 ...
## $ concave.points_worst : num 0.265 0.186 0.243 0.258 0.163 ...
## $ symmetry_worst : num 0.46 0.275 0.361 0.664 0.236 ...
## $ fractal_dimension_worst: num 0.1189 0.089 0.0876 0.173 0.0768 ...
##
## B M
## 357 212
##
## Benign Malignant
## 357 212
##
## Benign Malignant
## 62.7 37.3
## radius_mean area_mean smoothness_mean
## Min. : 6.981 Min. : 143.5 Min. :0.05263
## 1st Qu.:11.700 1st Qu.: 420.3 1st Qu.:0.08637
## Median :13.370 Median : 551.1 Median :0.09587
## Mean :14.127 Mean : 654.9 Mean :0.09636
## 3rd Qu.:15.780 3rd Qu.: 782.7 3rd Qu.:0.10530
## Max. :28.110 Max. :2501.0 Max. :0.16340
## [1] 0.00 0.25 0.50 0.75 1.00
## [1] 0.00 0.25 0.50 0.75 1.00
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0000 0.1174 0.1729 0.2169 0.2711 1.0000
## 'data.frame': 100 obs. of 30 variables:
## $ radius_mean : num 0.22 0.127 0.239 0.376 0.25 ...
## $ texture_mean : num 0.286 0.297 0.623 0.177 0.685 ...
## $ perimeter_mean : num 0.225 0.122 0.228 0.364 0.232 ...
## $ area_mean : num 0.1125 0.0618 0.13 0.2305 0.1365 ...
## $ smoothness_mean : num 0.586 0.332 0.315 0.256 0.22 ...
## $ compactness_mean : num 0.3954 0.1325 0.1246 0.2028 0.0448 ...
## $ concavity_mean : num 0.239 0.0691 0.0555 0.1298 0 ...
## $ concave.points_mean : num 0.2765 0.0752 0.1181 0.1601 0 ...
## $ symmetry_mean : num 0.453 0.595 0.401 0.317 0.324 ...
## $ fractal_dimension_mean : num 0.476 0.298 0.148 0.142 0.203 ...
## $ radius_se : num 0.1081 0.0964 0.1791 0.0482 0.1209 ...
## $ texture_se : num 0.3049 0.2188 0.5045 0.0162 0.7264 ...
## $ perimeter_se : num 0.107 0.0854 0.1575 0.0504 0.1002 ...
## $ area_se : num 0.0393 0.0297 0.0713 0.0308 0.0529 ...
## $ smoothness_se : num 0.438 0.197 0.197 0.053 0.191 ...
## $ compactness_se : num 0.2239 0.1325 0.0999 0.116 0.045 ...
## $ concavity_se : num 0.1253 0.0685 0.037 0.0583 0 ...
## $ concave.points_se : num 0.349 0.187 0.224 0.159 0 ...
## $ symmetry_se : num 0.1434 0.1649 0.1771 0.0506 0.3311 ...
## $ fractal_dimension_se : num 0.1493 0.1043 0.1032 0.0513 0.0774 ...
## $ radius_worst : num 0.193 0.114 0.202 0.329 0.196 ...
## $ texture_worst : num 0.357 0.362 0.568 0.165 0.694 ...
## $ perimeter_worst : num 0.188 0.102 0.183 0.307 0.173 ...
## $ area_worst : num 0.0843 0.0492 0.094 0.1773 0.0918 ...
## $ smoothness_worst : num 0.705 0.345 0.217 0.233 0.152 ...
## $ compactness_worst : num 0.2527 0.1231 0.0679 0.2443 0.0241 ...
## $ concavity_worst : num 0.2545 0.102 0.0441 0.2517 0 ...
## $ concave.points_worst : num 0.487 0.225 0.191 0.394 0 ...
## $ symmetry_worst : num 0.216 0.317 0.165 0.221 0.166 ...
## $ fractal_dimension_worst: num 0.247 0.1981 0.0744 0.1816 0.0813 ...
## 'data.frame': 469 obs. of 30 variables:
## $ radius_mean : num 0.521 0.643 0.601 0.21 0.63 ...
## $ texture_mean : num 0.0227 0.2726 0.3903 0.3608 0.1566 ...
## $ perimeter_mean : num 0.546 0.616 0.596 0.234 0.631 ...
## $ area_mean : num 0.364 0.502 0.449 0.103 0.489 ...
## $ smoothness_mean : num 0.594 0.29 0.514 0.811 0.43 ...
## $ compactness_mean : num 0.792 0.182 0.431 0.811 0.348 ...
## $ concavity_mean : num 0.703 0.204 0.463 0.566 0.464 ...
## $ concave.points_mean : num 0.731 0.349 0.636 0.523 0.518 ...
## $ symmetry_mean : num 0.686 0.38 0.51 0.776 0.378 ...
## $ fractal_dimension_mean : num 0.606 0.141 0.211 1 0.187 ...
## $ radius_se : num 0.356 0.156 0.23 0.139 0.234 ...
## $ texture_se : num 0.1205 0.0826 0.0943 0.1759 0.0931 ...
## $ perimeter_se : num 0.369 0.124 0.18 0.127 0.221 ...
## $ area_se : num 0.2738 0.1257 0.1629 0.0382 0.1637 ...
## $ smoothness_se : num 0.159 0.119 0.151 0.251 0.332 ...
## $ compactness_se : num 0.3514 0.0813 0.284 0.5432 0.1679 ...
## $ concavity_se : num 0.1357 0.047 0.0968 0.143 0.1436 ...
## $ concave.points_se : num 0.301 0.254 0.39 0.354 0.357 ...
## $ symmetry_se : num 0.3116 0.0845 0.2057 0.7281 0.1362 ...
## $ fractal_dimension_se : num 0.183 0.0911 0.127 0.2872 0.1458 ...
## $ radius_worst : num 0.621 0.607 0.556 0.248 0.52 ...
## $ texture_worst : num 0.142 0.304 0.36 0.386 0.124 ...
## $ perimeter_worst : num 0.668 0.54 0.508 0.241 0.507 ...
## $ area_worst : num 0.451 0.435 0.375 0.094 0.342 ...
## $ smoothness_worst : num 0.601 0.348 0.484 0.915 0.437 ...
## $ compactness_worst : num 0.619 0.155 0.385 0.814 0.172 ...
## $ concavity_worst : num 0.569 0.193 0.36 0.549 0.319 ...
## $ concave.points_worst : num 0.912 0.639 0.835 0.885 0.558 ...
## $ symmetry_worst : num 0.598 0.234 0.404 1 0.158 ...
## $ fractal_dimension_worst: num 0.419 0.223 0.213 0.774 0.143 ...
## Step 3 - Training a model on the data
#install.packages("class")
library(class)
# Building the classifier and making predictions:
# knn() function in the class package provides a standard, classic implementation of the k-NN algorithm.
# For each insatnce in the test data, the function will identify the k-Nearest Neighbors, using Euclidean
# distance.
##### kNN classification syntax
# train is a data frame containing numeric training data
# test is a data frame containing numeric test data
# class is a factor vector with the class for each row in the training data
# k is an integer indecating the number of nearest neighbors
# As the training data includes 469 instances, we might try k=21, an odd number roughly equal to the
# square root of 469
wbcd_test_pred <- knn(train = wbcd_train, test = wbcd_test, cl= wbcd_train_labels, k=21)
##knn() function returns a factor vector of predicted labels for each of the examples in the test dataset.
summary(wbcd_test_pred)
## Benign Malignant
## 79 21
##
##
## Cell Contents
## |-------------------------|
## | N |
## | N / Row Total |
## | N / Col Total |
## | N / Table Total |
## |-------------------------|
##
##
## Total Observations in Table: 100
##
##
## | wbcd_test_pred
## wbcd_test_labels | Benign | Malignant | Row Total |
## -----------------|-----------|-----------|-----------|
## Benign | 77 | 0 | 77 |
## | 1.000 | 0.000 | 0.770 |
## | 0.975 | 0.000 | |
## | 0.770 | 0.000 | |
## -----------------|-----------|-----------|-----------|
## Malignant | 2 | 21 | 23 |
## | 0.087 | 0.913 | 0.230 |
## | 0.025 | 1.000 | |
## | 0.020 | 0.210 | |
## -----------------|-----------|-----------|-----------|
## Column Total | 79 | 21 | 100 |
## | 0.790 | 0.210 | |
## -----------------|-----------|-----------|-----------|
##
##
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -1.4532 -0.6666 -0.2949 0.0000 0.3632 5.2459
## Benign Malignant
## 79 21
##
##
## Cell Contents
## |-------------------------|
## | N |
## | N / Row Total |
## | N / Col Total |
## | N / Table Total |
## |-------------------------|
##
##
## Total Observations in Table: 100
##
##
## | wbcd_test_pred
## wbcd_test_labels | Benign | Malignant | Row Total |
## -----------------|-----------|-----------|-----------|
## Benign | 77 | 0 | 77 |
## | 1.000 | 0.000 | 0.770 |
## | 0.975 | 0.000 | |
## | 0.770 | 0.000 | |
## -----------------|-----------|-----------|-----------|
## Malignant | 2 | 21 | 23 |
## | 0.087 | 0.913 | 0.230 |
## | 0.025 | 1.000 | |
## | 0.020 | 0.210 | |
## -----------------|-----------|-----------|-----------|
## Column Total | 79 | 21 | 100 |
## | 0.790 | 0.210 | |
## -----------------|-----------|-----------|-----------|
##
##
## 'data.frame': 5574 obs. of 2 variables:
## $ type: chr "ham" "ham" "spam" "ham" ...
## $ text: chr "Go until jurong point, crazy.. Available only in bugis n great world la e buffet... Cine there got amore wat..." "Ok lar... Joking wif u oni..." "Free entry in 2 a wkly comp to win FA Cup final tkts 21st May 2005. Text FA to 87121 to receive entry question("| __truncated__ "U dun say so early hor... U c already then say..." ...
## Factor w/ 2 levels "ham","spam": 1 1 2 1 1 2 1 1 2 2 ...
##
## ham spam
## 4827 747
## <<VCorpus>>
## Metadata: corpus specific: 0, document level (indexed): 0
## Content: documents: 5574
## <<VCorpus>>
## Metadata: corpus specific: 0, document level (indexed): 0
## Content: documents: 2
##
## [[1]]
## <<PlainTextDocument>>
## Metadata: 7
## Content: chars: 111
##
## [[2]]
## <<PlainTextDocument>>
## Metadata: 7
## Content: chars: 29
## [1] "Go until jurong point, crazy.. Available only in bugis n great world la e buffet... Cine there got amore wat..."
## $`1`
## [1] "Go until jurong point, crazy.. Available only in bugis n great world la e buffet... Cine there got amore wat..."
##
## $`2`
## [1] "Ok lar... Joking wif u oni..."
## [1] "Go until jurong point, crazy.. Available only in bugis n great world la e buffet... Cine there got amore wat..."
## [1] "go until jurong point, crazy.. available only in bugis n great world la e buffet... cine there got amore wat..."
## <<PlainTextDocument>>
## Metadata: 7
## Content: chars: 111
## <<PlainTextDocument>>
## Metadata: 7
## Content: chars: 86
## Warning: package 'SnowballC' was built under R version 3.5.2
## [1] "learn" "learn" "learn" "learn"
## [1] "Hope you are having a good week. Just checking in"
## [1] "hope good week just check"
## <<DocumentTermMatrix (documents: 5574, terms: 6617)>>
## Non-/sparse entries: 43025/36840133
## Sparsity : 100%
## Maximal term length: 47
## Weighting : term frequency (tf)
## <<DocumentTermMatrix (documents: 5574, terms: 8371)>>
## Non-/sparse entries: 44243/46615711
## Sparsity : 100%
## Maximal term length: 40
## Weighting : term frequency (tf)
## sms_train_labels
## ham spam
## 0.8648325 0.1351675
## sms_test_labels
## ham spam
## 0.8694405 0.1305595
## Loading required package: RColorBrewer
## Warning in tm_map.SimpleCorpus(corpus, tm::removePunctuation):
## transformation drops documents
## Warning in tm_map.SimpleCorpus(corpus, function(x) tm::removeWords(x,
## tm::stopwords())): transformation drops documents
## Warning in tm_map.SimpleCorpus(corpus, tm::removePunctuation):
## transformation drops documents
## Warning in tm_map.SimpleCorpus(corpus, tm::removePunctuation):
## transformation drops documents
## [1] "<c2><a3>" "<c2><a3>wk" "<c3><9c>"
## [4] "<c3><bc>" "<e2><80><93>" "<e2><80><a6>"
## [7] "abiola" "abl" "abt"
## [10] "accept" "access" "account"
## [13] "across" "activ" "actual"
## [16] "add" "address" "admir"
## [19] "adult" "advanc" "aft"
## [22] "afternoon" "aftr" "age"
## [25] "ago" "ahead" "ahmad"
## [28] "aight" "aint" "air"
## [31] "aiyah" "alex" "almost"
## [34] "alon" "alreadi" "alright"
## [37] "alrit" "also" "alway"
## [40] "amp" "angri" "announc"
## [43] "anoth" "answer" "anybodi"
## [46] "anymor" "anyon" "anyth"
## [49] "anytim" "anyway" "apart"
## [52] "app" "appli" "appoint"
## [55] "appreci" "april" "ard"
## [58] "area" "argument" "arm"
## [61] "around" "arrang" "arrest"
## [64] "arriv" "asap" "ask"
## [67] "askd" "asleep" "ass"
## [70] "attempt" "auction" "avail"
## [73] "ave" "avoid" "await"
## [76] "award" "away" "awesom"
## [79] "babe" "babi" "back"
## [82] "bad" "bag" "bak"
## [85] "balanc" "bank" "bare"
## [88] "bath" "batteri" "bcoz"
## [91] "bcum" "bday" "beauti"
## [94] "becom" "bed" "bedroom"
## [97] "begin" "believ" "belli"
## [100] "best" "better" "bid"
## [103] "big" "bill" "bird"
## [106] "birthday" "bit" "black"
## [109] "blank" "bless" "blue"
## [112] "bluetooth" "bodi" "bold"
## [115] "bonus" "boo" "book"
## [118] "bore" "boss" "bother"
## [121] "bout" "bowl" "box"
## [124] "boy" "boytoy" "brand"
## [127] "break" "breath" "brilliant"
## [130] "bring" "brother" "bslvyl"
## [133] "btnationalr" "budget" "bugi"
## [136] "bus" "busi" "buy"
## [139] "buzz" "cabin" "cafe"
## [142] "cal" "call" "caller"
## [145] "callertun" "camcord" "came"
## [148] "camera" "can" "cancel"
## [151] "cant" "car" "card"
## [154] "care" "carlo" "case"
## [157] "cash" "cashbal" "catch"
## [160] "caus" "chanc" "chang"
## [163] "charact" "charg" "chariti"
## [166] "chat" "cheap" "check"
## [169] "cheer" "chennai" "chikku"
## [172] "childish" "children" "chines"
## [175] "choic" "choos" "christma"
## [178] "cine" "cinema" "claim"
## [181] "class" "clean" "clear"
## [184] "click" "clock" "close"
## [187] "club" "code" "coffe"
## [190] "coin" "cold" "colleagu"
## [193] "collect" "colleg" "colour"
## [196] "come" "comin" "comp"
## [199] "compani" "competit" "complet"
## [202] "complimentari" "comput" "concentr"
## [205] "condit" "confid" "confirm"
## [208] "congrat" "congratul" "connect"
## [211] "contact" "content" "convey"
## [214] "cook" "cool" "copi"
## [217] "correct" "cos" "cost"
## [220] "countri" "coupl" "cours"
## [223] "cover" "coz" "crave"
## [226] "crazi" "credit" "cri"
## [229] "croydon" "cuddl" "cum"
## [232] "cup" "current" "custcar"
## [235] "custom" "cut" "cute"
## [238] "cuz" "dad" "daddi"
## [241] "damn" "darl" "darlin"
## [244] "darren" "dat" "date"
## [247] "day" "dead" "deal"
## [250] "dear" "decid" "deep"
## [253] "definit" "del" "delet"
## [256] "deliv" "deliveri" "den"
## [259] "depend" "detail" "dey"
## [262] "didnt" "die" "differ"
## [265] "difficult" "digit" "din"
## [268] "dinner" "direct" "dis"
## [271] "discount" "discuss" "disturb"
## [274] "dnt" "doctor" "doesnt"
## [277] "dog" "doin" "dollar"
## [280] "don" "don<e2><80><98>t" "done"
## [283] "dont" "door" "doubl"
## [286] "download" "draw" "dream"
## [289] "drink" "drive" "drop"
## [292] "drug" "dude" "dun"
## [295] "dunno" "dvd" "earli"
## [298] "earlier" "easi" "eat"
## [301] "eatin" "either" "els"
## [304] "email" "embarass" "empti"
## [307] "end" "enemi" "energi"
## [310] "england" "enjoy" "enough"
## [313] "enter" "entri" "envelop"
## [316] "especi" "etc" "euro"
## [319] "eve" "even" "ever"
## [322] "everi" "everyon" "everyth"
## [325] "exact" "exam" "excel"
## [328] "excit" "excus" "expect"
## [331] "experi" "expir" "extra"
## [334] "eye" "face" "facebook"
## [337] "fact" "fall" "famili"
## [340] "fanci" "fantasi" "fantast"
## [343] "far" "fast" "fat"
## [346] "father" "fault" "feel"
## [349] "felt" "fetch" "fight"
## [352] "figur" "file" "fill"
## [355] "film" "final" "find"
## [358] "fine" "finger" "finish"
## [361] "first" "five" "fix"
## [364] "flight" "flirt" "flower"
## [367] "follow" "fone" "food"
## [370] "forev" "forget" "forgot"
## [373] "forward" "found" "free"
## [376] "freemsg" "freephon" "fren"
## [379] "fri" "friday" "friend"
## [382] "friendship" "frm" "frnd"
## [385] "frnds" "fuck" "full"
## [388] "fullonsmscom" "fun" "funni"
## [391] "futur" "gal" "game"
## [394] "gap" "gas" "gave"
## [397] "gay" "gentl" "get"
## [400] "gettin" "gift" "girl"
## [403] "give" "glad" "god"
## [406] "goe" "goin" "gone"
## [409] "gonna" "good" "goodmorn"
## [412] "goodnight" "got" "goto"
## [415] "gotta" "great" "green"
## [418] "greet" "grin" "group"
## [421] "guarante" "gud" "guess"
## [424] "guy" "gym" "haf"
## [427] "haha" "hai" "hair"
## [430] "half" "hand" "hang"
## [433] "happen" "happi" "hard"
## [436] "hav" "havent" "head"
## [439] "hear" "heard" "heart"
## [442] "heavi" "hee" "hell"
## [445] "hello" "help" "hey"
## [448] "hgsuiteland" "high" "hit"
## [451] "hiya" "hmm" "hmmm"
## [454] "hmv" "hol" "hold"
## [457] "holder" "holiday" "home"
## [460] "honey" "hook" "hop"
## [463] "hope" "horni" "hospit"
## [466] "hot" "hotel" "hour"
## [469] "hous" "housemaid" "how"
## [472] "howev" "howz" "hrs"
## [475] "hug" "huh" "hungri"
## [478] "hurri" "hurt" "iam"
## [481] "ice" "idea" "identifi"
## [484] "ignor" "ill" "imagin"
## [487] "imma" "immedi" "import"
## [490] "inc" "inch" "includ"
## [493] "india" "indian" "info"
## [496] "inform" "insid" "instead"
## [499] "interest" "interview" "invit"
## [502] "ipod" "irrit" "ish"
## [505] "issu" "ive" "izzit"
## [508] "januari" "jay" "job"
## [511] "john" "join" "joke"
## [514] "joy" "jus" "just"
## [517] "juz" "kalli" "kate"
## [520] "keep" "kept" "key"
## [523] "kick" "kid" "kill"
## [526] "kind" "kinda" "king"
## [529] "kiss" "knew" "know"
## [532] "knw" "ladi" "land"
## [535] "landlin" "laptop" "lar"
## [538] "last" "late" "later"
## [541] "latest" "laugh" "lazi"
## [544] "ldn" "lead" "learn"
## [547] "least" "leav" "lect"
## [550] "left" "leh" "lei"
## [553] "lemm" "less" "lesson"
## [556] "let" "letter" "liao"
## [559] "librari" "lick" "lie"
## [562] "life" "lift" "light"
## [565] "like" "line" "link"
## [568] "list" "listen" "littl"
## [571] "live" "load" "loan"
## [574] "local" "locat" "log"
## [577] "login" "lol" "long"
## [580] "longer" "look" "lor"
## [583] "lose" "lost" "lot"
## [586] "lovabl" "love" "lover"
## [589] "loverboy" "loyalti" "ltd"
## [592] "ltdecimalgt" "ltgt" "lttimegt"
## [595] "luck" "lucki" "lunch"
## [598] "luv" "made" "mah"
## [601] "mail" "make" "man"
## [604] "mani" "march" "mark"
## [607] "marri" "marriag" "match"
## [610] "mate" "matter" "maxim"
## [613] "may" "mayb" "mean"
## [616] "meant" "med" "medic"
## [619] "meet" "meh" "mell"
## [622] "member" "men" "menu"
## [625] "merri" "messag" "met"
## [628] "mid" "midnight" "might"
## [631] "min" "mind" "mine"
## [634] "minut" "miracl" "miss"
## [637] "mistak" "moan" "mob"
## [640] "mobil" "mobileupd" "mode"
## [643] "mom" "moment" "mon"
## [646] "monday" "money" "month"
## [649] "mood" "moon" "morn"
## [652] "motorola" "move" "movi"
## [655] "mrng" "mrt" "msg"
## [658] "msgs" "mths" "much"
## [661] "mum" "murder" "music"
## [664] "must" "muz" "nah"
## [667] "nake" "name" "nation"
## [670] "natur" "naughti" "near"
## [673] "need" "net" "network"
## [676] "neva" "never" "new"
## [679] "news" "next" "nice"
## [682] "nigeria" "night" "nite"
## [685] "nobodi" "noe" "nokia"
## [688] "none" "noon" "nope"
## [691] "normal" "noth" "notic"
## [694] "now" "ntt" "num"
## [697] "number" "nxt" "nyt"
## [700] "offer" "offic" "offici"
## [703] "okay" "oki" "old"
## [706] "omw" "one" "onlin"
## [709] "oop" "open" "oper"
## [712] "opinion" "opt" "optout"
## [715] "orang" "orchard" "order"
## [718] "oredi" "oso" "other"
## [721] "otherwis" "outsid" "pack"
## [724] "page" "paid" "pain"
## [727] "paper" "parent" "park"
## [730] "part" "parti" "partner"
## [733] "pass" "passion" "password"
## [736] "past" "pay" "peac"
## [739] "peopl" "per" "person"
## [742] "pete" "phone" "photo"
## [745] "pic" "pick" "pictur"
## [748] "piec" "pix" "pizza"
## [751] "place" "plan" "plane"
## [754] "play" "player" "pleas"
## [757] "pleasur" "plenti" "pls"
## [760] "plus" "plz" "pmin"
## [763] "pmsg" "pobox" "poboxwwq"
## [766] "point" "poli" "polic"
## [769] "poor" "pop" "possibl"
## [772] "post" "pound" "power"
## [775] "pple" "ppm" "practic"
## [778] "pray" "prefer" "prepar"
## [781] "press" "pretti" "price"
## [784] "princess" "privat" "prize"
## [787] "prob" "probabl" "problem"
## [790] "process" "project" "promis"
## [793] "pub" "put" "qualiti"
## [796] "question" "quick" "quit"
## [799] "quiz" "quot" "rain"
## [802] "rate" "rather" "rcvd"
## [805] "reach" "read" "readi"
## [808] "real" "realiz" "realli"
## [811] "reason" "receipt" "receiv"
## [814] "recent" "record" "refer"
## [817] "regard" "regist" "remain"
## [820] "rememb" "remind" "remov"
## [823] "rent" "rental" "repli"
## [826] "repres" "request" "respond"
## [829] "respons" "rest" "result"
## [832] "return" "reveal" "review"
## [835] "right" "ring" "rington"
## [838] "rite" "road" "rock"
## [841] "room" "roommat" "rose"
## [844] "round" "rowwjhl" "rpli"
## [847] "rreveal" "run" "sad"
## [850] "sae" "safe" "said"
## [853] "sale" "sam" "sat"
## [856] "saturday" "savamob" "save"
## [859] "saw" "say" "sch"
## [862] "school" "score" "scream"
## [865] "sea" "search" "season"
## [868] "sec" "second" "secret"
## [871] "see" "seem" "seen"
## [874] "select" "self" "sell"
## [877] "semest" "send" "sens"
## [880] "sent" "serious" "servic"
## [883] "set" "settl" "sex"
## [886] "sexi" "shall" "share"
## [889] "shd" "ship" "shirt"
## [892] "shit" "shop" "short"
## [895] "show" "shower" "shuhui"
## [898] "sick" "side" "sigh"
## [901] "sight" "sign" "silent"
## [904] "simpl" "sinc" "sing"
## [907] "singl" "sir" "sis"
## [910] "sister" "sit" "situat"
## [913] "sky" "skype" "slave"
## [916] "sleep" "slept" "slow"
## [919] "slowli" "small" "smile"
## [922] "smoke" "sms" "smth"
## [925] "snow" "sofa" "solv"
## [928] "somebodi" "someon" "someth"
## [931] "sometim" "somewher" "song"
## [934] "soni" "sonyericsson" "soon"
## [937] "sorri" "sort" "sound"
## [940] "space" "speak" "special"
## [943] "specialcal" "spend" "spent"
## [946] "spoke" "sport" "spree"
## [949] "stand" "star" "start"
## [952] "statement" "station" "stay"
## [955] "std" "still" "stock"
## [958] "stop" "store" "stori"
## [961] "str" "straight" "street"
## [964] "strong" "student" "studi"
## [967] "stuff" "stupid" "style"
## [970] "sub" "subscrib" "success"
## [973] "summer" "sun" "sunday"
## [976] "sunshin" "support" "suppos"
## [979] "sure" "surpris" "sweet"
## [982] "swing" "system" "take"
## [985] "talk" "tampa" "tcs"
## [988] "teach" "team" "tear"
## [991] "teas" "tel" "tell"
## [994] "ten" "tenerif" "term"
## [997] "test" "text" "thank"
## [1000] "thanx" "that" "thing"
## [1003] "think" "thinkin" "thk"
## [1006] "thnk" "tho" "though"
## [1009] "thought" "throw" "thru"
## [1012] "tht" "thur" "ticket"
## [1015] "til" "till" "time"
## [1018] "tire" "titl" "tmr"
## [1021] "tncs" "today" "togeth"
## [1024] "told" "tomo" "tomorrow"
## [1027] "tone" "tonight" "tonit"
## [1030] "took" "top" "tot"
## [1033] "total" "touch" "tough"
## [1036] "tour" "toward" "town"
## [1039] "track" "train" "transact"
## [1042] "treat" "tri" "trip"
## [1045] "troubl" "true" "trust"
## [1048] "truth" "tscs" "ttyl"
## [1051] "tuesday" "turn" "twice"
## [1054] "two" "txt" "txting"
## [1057] "txts" "type" "ufind"
## [1060] "ugh" "umma" "uncl"
## [1063] "understand" "unless" "unlimit"
## [1066] "unredeem" "unsub" "unsubscrib"
## [1069] "updat" "ure" "urgent"
## [1072] "urself" "use" "usf"
## [1075] "usual" "uve" "valentin"
## [1078] "valid" "valu" "vari"
## [1081] "verifi" "via" "video"
## [1084] "visit" "voic" "voucher"
## [1087] "wait" "wake" "walk"
## [1090] "wan" "wana" "wanna"
## [1093] "want" "wap" "warm"
## [1096] "wast" "wat" "watch"
## [1099] "water" "way" "weak"
## [1102] "wear" "weather" "wed"
## [1105] "wednesday" "weed" "week"
## [1108] "weekend" "weight" "welcom"
## [1111] "well" "wen" "went"
## [1114] "wer" "wet" "what"
## [1117] "whatev" "whenev" "whole"
## [1120] "wid" "wif" "wife"
## [1123] "wil" "will" "win"
## [1126] "wine" "winner" "wish"
## [1129] "wit" "within" "without"
## [1132] "wiv" "wkli" "wnt"
## [1135] "woke" "won" "wonder"
## [1138] "wont" "word" "work"
## [1141] "workin" "world" "worri"
## [1144] "worth" "wot" "wow"
## [1147] "write" "wrong" "wun"
## [1150] "wwwgetzedcouk" "xmas" "xxx"
## [1153] "yahoo" "yar" "yeah"
## [1156] "year" "yep" "yes"
## [1159] "yest" "yesterday" "yet"
## [1162] "yoga" "yogasana" "yrs"
## [1165] "yun" "yup"
## chr [1:1166] "<c2><a3>" "<c2><a3>wk" "<c3><9c>" "<c3><bc>" ...
## Warning: package 'e1071' was built under R version 3.5.2
## Length Class Mode
## apriori 2 table numeric
## tables 1166 -none- list
## levels 2 -none- character
## call 3 -none- call
## ham spam
## 1226 168
##
##
## Cell Contents
## |-------------------------|
## | N |
## | N / Row Total |
## | N / Col Total |
## |-------------------------|
##
##
## Total Observations in Table: 1394
##
##
## | actual
## predicted | ham | spam | Row Total |
## -------------|-----------|-----------|-----------|
## ham | 1205 | 21 | 1226 |
## | 0.983 | 0.017 | 0.879 |
## | 0.994 | 0.115 | |
## -------------|-----------|-----------|-----------|
## spam | 7 | 161 | 168 |
## | 0.042 | 0.958 | 0.121 |
## | 0.006 | 0.885 | |
## -------------|-----------|-----------|-----------|
## Column Total | 1212 | 182 | 1394 |
## | 0.869 | 0.131 | |
## -------------|-----------|-----------|-----------|
##
##
##
##
## Cell Contents
## |-------------------------|
## | N |
## | N / Row Total |
## | N / Col Total |
## |-------------------------|
##
##
## Total Observations in Table: 1394
##
##
## | actual
## predicted | ham | spam | Row Total |
## -------------|-----------|-----------|-----------|
## ham | 1206 | 24 | 1230 |
## | 0.980 | 0.020 | 0.882 |
## | 0.995 | 0.132 | |
## -------------|-----------|-----------|-----------|
## spam | 6 | 158 | 164 |
## | 0.037 | 0.963 | 0.118 |
## | 0.005 | 0.868 | |
## -------------|-----------|-----------|-----------|
## Column Total | 1212 | 182 | 1394 |
## | 0.869 | 0.131 | |
## -------------|-----------|-----------|-----------|
##
##
## [1] 0.9709506
## 'data.frame': 1000 obs. of 21 variables:
## $ checking_balance : Factor w/ 4 levels "1 - 200 DM","< 0 DM",..: 2 1 4 2 2 4 4 1 4 1 ...
## $ months_loan_duration: int 6 48 12 42 24 36 24 36 12 30 ...
## $ credit_history : Factor w/ 5 levels "critical","delayed",..: 1 5 1 5 2 5 5 5 5 1 ...
## $ purpose : Factor w/ 10 levels "business","car (new)",..: 8 8 5 6 2 5 6 3 8 2 ...
## $ amount : int 1169 5951 2096 7882 4870 9055 2835 6948 3059 5234 ...
## $ savings_balance : Factor w/ 5 levels "101 - 500 DM",..: 5 3 3 3 3 5 2 3 4 3 ...
## $ employment_length : Factor w/ 5 levels "0 - 1 yrs","1 - 4 yrs",..: 4 2 3 3 2 2 4 2 3 5 ...
## $ installment_rate : int 4 2 2 2 3 2 3 2 2 4 ...
## $ personal_status : Factor w/ 4 levels "divorced male",..: 4 2 4 4 4 4 4 4 1 3 ...
## $ other_debtors : Factor w/ 3 levels "co-applicant",..: 3 3 3 2 3 3 3 3 3 3 ...
## $ residence_history : int 4 2 3 4 4 4 4 2 4 2 ...
## $ property : Factor w/ 4 levels "building society savings",..: 3 3 3 1 4 4 1 2 3 2 ...
## $ age : int 67 22 49 45 53 35 53 35 61 28 ...
## $ installment_plan : Factor w/ 3 levels "bank","none",..: 2 2 2 2 2 2 2 2 2 2 ...
## $ housing : Factor w/ 3 levels "for free","own",..: 2 2 2 1 1 1 2 3 2 2 ...
## $ existing_credits : int 2 1 1 1 2 1 1 1 1 2 ...
## $ default : int 1 2 1 1 2 1 1 1 1 2 ...
## $ dependents : int 1 1 2 2 2 2 1 1 1 1 ...
## $ telephone : Factor w/ 2 levels "none","yes": 2 1 1 1 1 2 1 2 1 1 ...
## $ foreign_worker : Factor w/ 2 levels "no","yes": 2 2 2 2 2 2 2 2 2 2 ...
## $ job : Factor w/ 4 levels "mangement self-employed",..: 2 2 4 2 2 4 2 1 4 1 ...
##
## 1 - 200 DM < 0 DM > 200 DM unknown
## 269 274 63 394
##
## 101 - 500 DM 501 - 1000 DM < 100 DM > 1000 DM unknown
## 103 63 603 48 183
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 4.0 12.0 18.0 20.9 24.0 72.0
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 250 1366 2320 3271 3972 18424
##
## no yes
## 700 300
## int [1:900] 288 788 409 881 937 46 525 887 548 453 ...
##
## no yes
## 0.7033333 0.2966667
##
## no yes
## 0.67 0.33
##
## Call:
## C5.0.default(x = credit_train[-17], y = as.factor(credit_train$default))
##
##
## C5.0 [Release 2.07 GPL Edition] Fri Jan 25 22:19:22 2019
## -------------------------------
##
## Class specified by attribute `outcome'
##
## Read 900 cases (21 attributes) from undefined.data
##
## Decision tree:
##
## checking_balance in {> 200 DM,unknown}: no (412/50)
## checking_balance in {1 - 200 DM,< 0 DM}:
## :...other_debtors = guarantor:
## :...months_loan_duration > 36: yes (4/1)
## : months_loan_duration <= 36:
## : :...installment_plan in {none,stores}: no (24)
## : installment_plan = bank:
## : :...purpose = car (new): yes (3)
## : purpose in {business,car (used),domestic appliances,education,
## : furniture,others,radio/tv,repairs,
## : retraining}: no (7/1)
## other_debtors in {co-applicant,none}:
## :...credit_history = critical: no (102/30)
## credit_history = fully repaid: yes (27/6)
## credit_history = fully repaid this bank:
## :...other_debtors = co-applicant: no (2)
## : other_debtors = none: yes (26/8)
## credit_history in {delayed,repaid}:
## :...savings_balance in {501 - 1000 DM,> 1000 DM}: no (19/3)
## savings_balance = 101 - 500 DM:
## :...other_debtors = co-applicant: yes (3)
## : other_debtors = none:
## : :...personal_status in {divorced male,
## : : married male}: yes (6/1)
## : personal_status = female:
## : :...installment_rate <= 3: no (4/1)
## : : installment_rate > 3: yes (4)
## : personal_status = single male:
## : :...age <= 41: no (15/2)
## : age > 41: yes (2)
## savings_balance = unknown:
## :...credit_history = delayed: no (8)
## : credit_history = repaid:
## : :...foreign_worker = no: no (2)
## : foreign_worker = yes:
## : :...checking_balance = < 0 DM:
## : :...telephone = none: yes (11/2)
## : : telephone = yes:
## : : :...amount <= 5045: no (5/1)
## : : amount > 5045: yes (2)
## : checking_balance = 1 - 200 DM:
## : :...residence_history > 3: no (9)
## : residence_history <= 3: [S1]
## savings_balance = < 100 DM:
## :...months_loan_duration > 39:
## :...residence_history <= 1: no (2)
## : residence_history > 1: yes (19/1)
## months_loan_duration <= 39:
## :...purpose in {car (new),retraining}: yes (47/16)
## purpose in {domestic appliances,others}: no (3)
## purpose = car (used):
## :...amount <= 8086: no (9/1)
## : amount > 8086: yes (5)
## purpose = education:
## :...checking_balance = 1 - 200 DM: no (2)
## : checking_balance = < 0 DM: yes (5)
## purpose = repairs:
## :...residence_history <= 3: yes (4/1)
## : residence_history > 3: no (3)
## purpose = business:
## :...credit_history = delayed: yes (2)
## : credit_history = repaid:
## : :...age <= 34: no (5)
## : age > 34: yes (2)
## purpose = radio/tv:
## :...employment_length in {0 - 1 yrs,
## : : unemployed}: yes (14/5)
## : employment_length = 4 - 7 yrs: no (3)
## : employment_length = > 7 yrs:
## : :...amount <= 932: yes (2)
## : : amount > 932: no (7)
## : employment_length = 1 - 4 yrs:
## : :...months_loan_duration <= 15: no (6)
## : months_loan_duration > 15:
## : :...amount <= 3275: yes (7)
## : amount > 3275: no (2)
## purpose = furniture:
## :...residence_history <= 1: no (8/1)
## residence_history > 1:
## :...installment_plan in {bank,stores}: no (3/1)
## installment_plan = none:
## :...telephone = yes: yes (7/1)
## telephone = none:
## :...months_loan_duration > 27: yes (3)
## months_loan_duration <= 27: [S2]
##
## SubTree [S1]
##
## property in {building society savings,unknown/none}: yes (4)
## property = other: no (6)
## property = real estate:
## :...job = skilled employee: yes (2)
## job in {mangement self-employed,unemployed non-resident,
## unskilled resident}: no (2)
##
## SubTree [S2]
##
## checking_balance = 1 - 200 DM: yes (5/2)
## checking_balance = < 0 DM:
## :...property in {building society savings,real estate,unknown/none}: no (8)
## property = other:
## :...installment_rate <= 1: no (2)
## installment_rate > 1: yes (4)
##
##
## Evaluation on training data (900 cases):
##
## Decision Tree
## ----------------
## Size Errors
##
## 54 135(15.0%) <<
##
##
## (a) (b) <-classified as
## ---- ----
## 589 44 (a): class no
## 91 176 (b): class yes
##
##
## Attribute usage:
##
## 100.00% checking_balance
## 54.22% other_debtors
## 50.00% credit_history
## 32.56% savings_balance
## 25.22% months_loan_duration
## 19.78% purpose
## 10.11% residence_history
## 7.33% installment_plan
## 5.22% telephone
## 4.78% foreign_worker
## 4.56% employment_length
## 4.33% amount
## 3.44% personal_status
## 3.11% property
## 2.67% age
## 1.56% installment_rate
## 0.44% job
##
##
## Time: 0.0 secs
##
##
## Cell Contents
## |-------------------------|
## | N |
## | N / Table Total |
## |-------------------------|
##
##
## Total Observations in Table: 100
##
##
## | predicted default
## actual default | no | yes | Row Total |
## ---------------|-----------|-----------|-----------|
## no | 60 | 7 | 67 |
## | 0.600 | 0.070 | |
## ---------------|-----------|-----------|-----------|
## yes | 19 | 14 | 33 |
## | 0.190 | 0.140 | |
## ---------------|-----------|-----------|-----------|
## Column Total | 79 | 21 | 100 |
## ---------------|-----------|-----------|-----------|
##
##
##
## Call:
## C5.0.default(x = credit_train[-17], y =
## as.factor(credit_train$default), trials = 10)
##
## Classification Tree
## Number of samples: 900
## Number of predictors: 20
##
## Number of boosting iterations: 10
## Average tree size: 49.7
##
## Non-standard options: attempt to group attributes
##
## Call:
## C5.0.default(x = credit_train[-17], y =
## as.factor(credit_train$default), trials = 10)
##
##
## C5.0 [Release 2.07 GPL Edition] Fri Jan 25 22:19:22 2019
## -------------------------------
##
## Class specified by attribute `outcome'
##
## Read 900 cases (21 attributes) from undefined.data
##
## ----- Trial 0: -----
##
## Decision tree:
##
## checking_balance in {> 200 DM,unknown}: no (412/50)
## checking_balance in {1 - 200 DM,< 0 DM}:
## :...other_debtors = guarantor:
## :...months_loan_duration > 36: yes (4/1)
## : months_loan_duration <= 36:
## : :...installment_plan in {none,stores}: no (24)
## : installment_plan = bank:
## : :...purpose = car (new): yes (3)
## : purpose in {business,car (used),domestic appliances,education,
## : furniture,others,radio/tv,repairs,
## : retraining}: no (7/1)
## other_debtors in {co-applicant,none}:
## :...credit_history = critical: no (102/30)
## credit_history = fully repaid: yes (27/6)
## credit_history = fully repaid this bank:
## :...other_debtors = co-applicant: no (2)
## : other_debtors = none: yes (26/8)
## credit_history in {delayed,repaid}:
## :...savings_balance in {501 - 1000 DM,> 1000 DM}: no (19/3)
## savings_balance = 101 - 500 DM:
## :...other_debtors = co-applicant: yes (3)
## : other_debtors = none:
## : :...personal_status in {divorced male,
## : : married male}: yes (6/1)
## : personal_status = female:
## : :...installment_rate <= 3: no (4/1)
## : : installment_rate > 3: yes (4)
## : personal_status = single male:
## : :...age <= 41: no (15/2)
## : age > 41: yes (2)
## savings_balance = unknown:
## :...credit_history = delayed: no (8)
## : credit_history = repaid:
## : :...foreign_worker = no: no (2)
## : foreign_worker = yes:
## : :...checking_balance = < 0 DM:
## : :...telephone = none: yes (11/2)
## : : telephone = yes:
## : : :...amount <= 5045: no (5/1)
## : : amount > 5045: yes (2)
## : checking_balance = 1 - 200 DM:
## : :...residence_history > 3: no (9)
## : residence_history <= 3: [S1]
## savings_balance = < 100 DM:
## :...months_loan_duration > 39:
## :...residence_history <= 1: no (2)
## : residence_history > 1: yes (19/1)
## months_loan_duration <= 39:
## :...purpose in {car (new),retraining}: yes (47/16)
## purpose in {domestic appliances,others}: no (3)
## purpose = car (used):
## :...amount <= 8086: no (9/1)
## : amount > 8086: yes (5)
## purpose = education:
## :...checking_balance = 1 - 200 DM: no (2)
## : checking_balance = < 0 DM: yes (5)
## purpose = repairs:
## :...residence_history <= 3: yes (4/1)
## : residence_history > 3: no (3)
## purpose = business:
## :...credit_history = delayed: yes (2)
## : credit_history = repaid:
## : :...age <= 34: no (5)
## : age > 34: yes (2)
## purpose = radio/tv:
## :...employment_length in {0 - 1 yrs,
## : : unemployed}: yes (14/5)
## : employment_length = 4 - 7 yrs: no (3)
## : employment_length = > 7 yrs:
## : :...amount <= 932: yes (2)
## : : amount > 932: no (7)
## : employment_length = 1 - 4 yrs:
## : :...months_loan_duration <= 15: no (6)
## : months_loan_duration > 15:
## : :...amount <= 3275: yes (7)
## : amount > 3275: no (2)
## purpose = furniture:
## :...residence_history <= 1: no (8/1)
## residence_history > 1:
## :...installment_plan in {bank,stores}: no (3/1)
## installment_plan = none:
## :...telephone = yes: yes (7/1)
## telephone = none:
## :...months_loan_duration > 27: yes (3)
## months_loan_duration <= 27: [S2]
##
## SubTree [S1]
##
## property in {building society savings,unknown/none}: yes (4)
## property = other: no (6)
## property = real estate:
## :...job = skilled employee: yes (2)
## job in {mangement self-employed,unemployed non-resident,
## unskilled resident}: no (2)
##
## SubTree [S2]
##
## checking_balance = 1 - 200 DM: yes (5/2)
## checking_balance = < 0 DM:
## :...property in {building society savings,real estate,unknown/none}: no (8)
## property = other:
## :...installment_rate <= 1: no (2)
## installment_rate > 1: yes (4)
##
## ----- Trial 1: -----
##
## Decision tree:
##
## foreign_worker = no: no (28.4/2.4)
## foreign_worker = yes:
## :...checking_balance = unknown:
## :...installment_plan in {bank,stores}:
## : :...other_debtors in {co-applicant,guarantor}: no (2.4)
## : : other_debtors = none:
## : : :...employment_length in {0 - 1 yrs,4 - 7 yrs,
## : : : > 7 yrs}: no (32.3/10.8)
## : : employment_length in {1 - 4 yrs,unemployed}: yes (31/7.1)
## : installment_plan = none:
## : :...credit_history in {critical,fully repaid,fully repaid this bank,
## : : repaid}: no (224.7/32.5)
## : credit_history = delayed:
## : :...residence_history <= 1: yes (4.3)
## : residence_history > 1:
## : :...installment_rate <= 3: no (11.9)
## : installment_rate > 3: yes (14.2/5.6)
## checking_balance in {1 - 200 DM,< 0 DM,> 200 DM}:
## :...other_debtors = co-applicant: yes (24.3/7.9)
## other_debtors = guarantor:
## :...property in {building society savings,real estate,
## : : unknown/none}: no (27.6/4)
## : property = other: yes (3)
## other_debtors = none:
## :...installment_rate <= 2:
## :...purpose in {business,car (new),car (used),domestic appliances,
## : : others,radio/tv,retraining}: no (125.5/34.3)
## : purpose in {education,repairs}: yes (13.6/4.8)
## : purpose = furniture:
## : :...job in {mangement self-employed,
## : : unemployed non-resident}: yes (4.3)
## : job in {skilled employee,unskilled resident}:
## : :...dependents > 1: yes (2.2)
## : dependents <= 1:
## : :...checking_balance = > 200 DM: no (4)
## : checking_balance in {1 - 200 DM,< 0 DM}:
## : :...telephone = none: yes (24.9/10.1)
## : telephone = yes: no (10.1/2.4)
## installment_rate > 2:
## :...residence_history <= 1: no (39/8.5)
## residence_history > 1:
## :...credit_history = fully repaid: yes (11.7)
## credit_history in {critical,delayed,fully repaid this bank,
## : repaid}:
## :...months_loan_duration <= 11:
## :...purpose in {business,car (new),car (used),
## : : domestic appliances,furniture,others,
## : : radio/tv,repairs,
## : : retraining}: no (35.2/6.9)
## : purpose = education: yes (5.3/0.8)
## months_loan_duration > 11:
## :...savings_balance = 501 - 1000 DM: yes (15.4/5.9)
## savings_balance = > 1000 DM: no (9.1/2.2)
## savings_balance = 101 - 500 DM:
## :...installment_plan in {bank,
## : : stores}: yes (8.3/0.8)
## : installment_plan = none: no (16.2/4.5)
## savings_balance = unknown:
## :...checking_balance = 1 - 200 DM: no (12.7/1.6)
## : checking_balance in {< 0 DM,
## : > 200 DM}: yes (20.8/5.6)
## savings_balance = < 100 DM:
## :...installment_plan in {bank,
## : stores}: yes (25.3/3.2)
## installment_plan = none:
## :...dependents > 1: no (14.4/5.6)
## dependents <= 1:
## :...months_loan_duration > 42: yes (11.5)
## months_loan_duration <= 42: [S1]
##
## SubTree [S1]
##
## credit_history in {delayed,fully repaid this bank}: yes (5.3)
## credit_history = repaid:
## :...job in {mangement self-employed,unskilled resident}: no (23.2/8.7)
## : job in {skilled employee,unemployed non-resident}: yes (24.2/7.1)
## credit_history = critical:
## :...existing_credits <= 1: no (6.9/2.2)
## existing_credits > 1:
## :...purpose in {business,car (new),domestic appliances,education,furniture,
## : others,repairs,retraining}: yes (22.7/3.2)
## purpose in {car (used),radio/tv}: no (4)
##
## ----- Trial 2: -----
##
## Decision tree:
##
## checking_balance = unknown:
## :...installment_plan = bank:
## : :...other_debtors = guarantor: yes (0)
## : : other_debtors = co-applicant: no (1.3)
## : : other_debtors = none:
## : : :...months_loan_duration <= 8: no (3.4)
## : : months_loan_duration > 8: yes (44.9/16.4)
## : installment_plan in {none,stores}:
## : :...employment_length in {0 - 1 yrs,unemployed}:
## : :...other_debtors = guarantor: no (0)
## : : other_debtors = co-applicant: yes (8.6)
## : : other_debtors = none:
## : : :...months_loan_duration > 30: yes (7.5)
## : : months_loan_duration <= 30:
## : : :...housing in {for free,rent}: no (5.8)
## : : housing = own:
## : : :...amount > 4594: yes (5.8)
## : : amount <= 4594:
## : : :...purpose in {business,repairs}: yes (4.6)
## : : purpose in {car (new),car (used),
## : : domestic appliances,education,
## : : furniture,others,radio/tv,
## : : retraining}: no (20.7)
## : employment_length in {1 - 4 yrs,4 - 7 yrs,> 7 yrs}:
## : :...installment_rate <= 3: no (91.9/5.8)
## : installment_rate > 3:
## : :...age > 30: no (70.1/5.3)
## : age <= 30:
## : :...other_debtors = co-applicant: no (0.6)
## : other_debtors = guarantor: yes (3.5/0.6)
## : other_debtors = none:
## : :...housing = for free: no (0.6)
## : housing = rent: yes (4.8/1.9)
## : housing = own:
## : :...amount <= 1445: no (8)
## : amount > 1445: yes (23.7/8)
## checking_balance in {1 - 200 DM,< 0 DM,> 200 DM}:
## :...months_loan_duration > 42:
## :...savings_balance in {101 - 500 DM,< 100 DM,> 1000 DM}: yes (42.1/6.1)
## : savings_balance in {501 - 1000 DM,unknown}: no (7.2)
## months_loan_duration <= 42:
## :...foreign_worker = no: no (15.8/3)
## foreign_worker = yes:
## :...other_debtors = co-applicant: no (26.3/12.7)
## other_debtors = guarantor:
## :...installment_plan = bank: yes (9.5/3.2)
## : installment_plan in {none,stores}: no (17.5/1.5)
## other_debtors = none:
## :...purpose in {domestic appliances,others,
## : retraining}: no (10/1.9)
## purpose = repairs: yes (14.2/6.1)
## purpose = education:
## :...checking_balance in {1 - 200 DM,> 200 DM}: no (18.2/7.3)
## : checking_balance = < 0 DM: yes (10.1)
## purpose = business:
## :...months_loan_duration <= 18: no (11.3)
## : months_loan_duration > 18:
## : :...telephone = none: no (10.4/2.8)
## : telephone = yes: yes (19.9/6)
## purpose = car (used):
## :...credit_history in {critical,delayed,
## : : fully repaid}: no (7.8)
## : credit_history in {fully repaid this bank,repaid}:
## : :...amount <= 3161: no (6.5)
## : amount > 3161: yes (20.4/5.7)
## purpose = car (new):
## :...credit_history = delayed: no (14.6/6.7)
## : credit_history in {fully repaid,
## : : fully repaid this bank}: yes (11/1.8)
## : credit_history = critical:
## : :...installment_rate <= 3: no (9.3)
## : : installment_rate > 3: yes (21/6.9)
## : credit_history = repaid:
## : :...personal_status = divorced male: yes (3)
## : personal_status = married male: no (6.3/2.2)
## : personal_status = female:
## : :...job in {mangement self-employed,
## : : : unemployed non-resident}: no (2.6)
## : : job in {skilled employee,
## : : unskilled resident}: yes (27.2/3.5)
## : personal_status = single male:
## : :...amount <= 8229: no (29.5/9.1)
## : amount > 8229: yes (6)
## purpose = radio/tv:
## :...employment_length in {4 - 7 yrs,> 7 yrs}: no (34.3/5)
## : employment_length in {0 - 1 yrs,1 - 4 yrs,unemployed}:
## : :...existing_credits > 1: yes (13.6/2.2)
## : existing_credits <= 1:
## : :...savings_balance in {101 - 500 DM,> 1000 DM,
## : : unknown}: yes (7.3/1.3)
## : savings_balance = 501 - 1000 DM: no (6.5/1.8)
## : savings_balance = < 100 DM:
## : :...amount > 4473: no (4.2)
## : amount <= 4473:
## : :...months_loan_duration <= 7: no (2.4)
## : months_loan_duration > 7: yes (40.6/11.5)
## purpose = furniture:
## :...installment_plan = stores: no (11.2)
## installment_plan in {bank,none}:
## :...dependents > 1: yes (5.2/0.6)
## dependents <= 1:
## :...checking_balance = > 200 DM: no (6.9)
## checking_balance in {1 - 200 DM,< 0 DM}:
## :...savings_balance in {101 - 500 DM,
## : 501 - 1000 DM}: yes (3.7/0.6)
## savings_balance in {> 1000 DM,
## : unknown}: no (14/4.3)
## savings_balance = < 100 DM: [S1]
##
## SubTree [S1]
##
## job in {mangement self-employed,unemployed non-resident,
## : unskilled resident}: yes (24.6/9.1)
## job = skilled employee:
## :...credit_history in {critical,delayed,fully repaid,repaid}: no (38.6/13.8)
## credit_history = fully repaid this bank: yes (2.8)
##
## ----- Trial 3: -----
##
## Decision tree:
##
## checking_balance = unknown:
## :...employment_length in {1 - 4 yrs,4 - 7 yrs,> 7 yrs}: no (235.6/50.4)
## : employment_length in {0 - 1 yrs,unemployed}:
## : :...other_debtors = guarantor: no (0)
## : other_debtors = co-applicant: yes (7.5/0.5)
## : other_debtors = none:
## : :...purpose = others: no (0)
## : purpose in {business,repairs}: yes (9)
## : purpose in {car (new),car (used),domestic appliances,education,
## : : furniture,radio/tv,retraining}:
## : :...amount <= 4594: no (23.4)
## : amount > 4594: yes (11.8/1.1)
## checking_balance in {1 - 200 DM,< 0 DM,> 200 DM}:
## :...other_debtors = guarantor: no (31.5/9.1)
## other_debtors = co-applicant:
## :...savings_balance in {501 - 1000 DM,> 1000 DM}: yes (0)
## : savings_balance = unknown: no (3.5)
## : savings_balance in {101 - 500 DM,< 100 DM}:
## : :...amount <= 2022: no (5.4)
## : amount > 2022:
## : :...employment_length in {0 - 1 yrs,1 - 4 yrs,4 - 7 yrs,
## : : > 7 yrs}: yes (24.5/2.4)
## : employment_length = unemployed: no (2.4)
## other_debtors = none:
## :...purpose in {domestic appliances,others}: yes (9.8/4.6)
## purpose in {repairs,retraining}: no (22/8)
## purpose = car (used):
## :...personal_status in {divorced male,single male}: no (29.7/6.9)
## : personal_status in {female,married male}: yes (13/4.1)
## purpose = education:
## :...employment_length in {0 - 1 yrs,1 - 4 yrs,> 7 yrs,
## : : unemployed}: yes (25.7/5.9)
## : employment_length = 4 - 7 yrs: no (5.9/1.4)
## purpose = business:
## :...age > 46: yes (5.2)
## : age <= 46:
## : :...amount <= 10722: no (43.7/12.9)
## : amount > 10722: yes (3.7)
## purpose = car (new):
## :...credit_history = critical:
## : :...personal_status in {divorced male,female,
## : : : single male}: no (31.7/7.2)
## : : personal_status = married male: yes (4.3)
## : credit_history in {delayed,fully repaid,fully repaid this bank,
## : : repaid}:
## : :...installment_rate > 2: yes (63.2/15.8)
## : installment_rate <= 2:
## : :...employment_length = > 7 yrs: yes (9.4)
## : employment_length in {0 - 1 yrs,1 - 4 yrs,4 - 7 yrs,
## : : unemployed}:
## : :...amount <= 1386: yes (7.7/0.5)
## : amount > 1386: no (31.5/7.2)
## purpose = radio/tv:
## :...dependents > 1: yes (8.5/1.6)
## : dependents <= 1:
## : :...employment_length = > 7 yrs: no (15.9/1.4)
## : employment_length in {0 - 1 yrs,1 - 4 yrs,4 - 7 yrs,unemployed}:
## : :...housing = for free: yes (4.2/0.5)
## : housing = rent: no (15.2/5.8)
## : housing = own:
## : :...months_loan_duration <= 39: no (68/30)
## : months_loan_duration > 39: yes (7.4/0.5)
## purpose = furniture:
## :...installment_plan = stores: no (9.1)
## installment_plan in {bank,none}:
## :...amount > 4281: yes (15.8/2.8)
## amount <= 4281:
## :...housing = for free: no (6.6/0.5)
## housing in {own,rent}:
## :...amount > 3573: no (17/3.4)
## amount <= 3573:
## :...personal_status = divorced male: no (7.5/2)
## personal_status in {married male,
## : single male}: yes (25.6/10.2)
## personal_status = female:
## :...residence_history <= 1: no (4.1)
## residence_history > 1:
## :...age <= 37: yes (30/6.1)
## age > 37: no (4.1)
##
## ----- Trial 4: -----
##
## Decision tree:
##
## months_loan_duration <= 7:
## :...amount <= 3380: no (48.6/5)
## : amount > 3380: yes (9.2/2.2)
## months_loan_duration > 7:
## :...savings_balance in {> 1000 DM,unknown}:
## :...other_debtors = co-applicant: no (3.7)
## : other_debtors = guarantor: yes (4.7/1.6)
## : other_debtors = none:
## : :...property in {building society savings,unknown/none}:
## : :...foreign_worker = no: no (2.5)
## : : foreign_worker = yes:
## : : :...savings_balance = > 1000 DM: yes (15.8/3)
## : : savings_balance = unknown:
## : : :...installment_rate <= 1: yes (7.2/1.2)
## : : installment_rate > 1: no (42.5/12.1)
## : property in {other,real estate}:
## : :...savings_balance = > 1000 DM: no (19.3)
## : savings_balance = unknown:
## : :...residence_history > 3: no (25/1.6)
## : residence_history <= 3:
## : :...property = real estate: yes (14.8/5.5)
## : property = other:
## : :...checking_balance in {1 - 200 DM,> 200 DM,
## : : unknown}: no (20.8/1.9)
## : checking_balance = < 0 DM: yes (6.4/1.2)
## savings_balance in {101 - 500 DM,501 - 1000 DM,< 100 DM}:
## :...checking_balance in {> 200 DM,unknown}:
## :...other_debtors = co-applicant: yes (12.1/4.3)
## : other_debtors = guarantor: no (2.9)
## : other_debtors = none:
## : :...age > 48: no (17.2/1.2)
## : age <= 48:
## : :...purpose in {business,education,repairs}: yes (36.9/15.9)
## : purpose in {car (used),domestic appliances,others,
## : : retraining}: no (17.1/2.1)
## : purpose = car (new):
## : :...installment_plan in {bank,stores}: yes (12.5/0.9)
## : : installment_plan = none: no (21.1/6.4)
## : purpose = furniture:
## : :...months_loan_duration <= 30: no (31.8/8.5)
## : : months_loan_duration > 30: yes (7.7/0.9)
## : purpose = radio/tv:
## : :...months_loan_duration <= 9: yes (8.7/0.4)
## : months_loan_duration > 9:
## : :...amount <= 2323: no (24.6)
## : amount > 2323: [S1]
## checking_balance in {1 - 200 DM,< 0 DM}:
## :...months_loan_duration <= 22:
## :...job = mangement self-employed: no (22.6/9.3)
## : job = unemployed non-resident: yes (6.9/0.9)
## : job = unskilled resident:
## : :...age <= 54: no (58.5/14.7)
## : : age > 54: yes (7.5/0.9)
## : job = skilled employee:
## : :...credit_history = delayed: no (4.3/0.4)
## : credit_history = fully repaid this bank: yes (4.8)
## : credit_history in {critical,fully repaid,repaid}:
## : :...amount <= 1381:
## : :...property in {other,unknown/none}: yes (18.7/0.4)
## : : property in {building society savings,real estate}:
## : : :...foreign_worker = no: no (2)
## : : foreign_worker = yes:
## : : :...amount <= 662: no (5)
## : : amount > 662: yes (25.4/5.4)
## : amount > 1381:
## : :...employment_length in {4 - 7 yrs,
## : : unemployed}: no (13.3)
## : employment_length in {0 - 1 yrs,1 - 4 yrs,> 7 yrs}:
## : :...housing = for free: yes (2.6)
## : housing = own: no (37.8/12.6)
## : housing = rent:
## : :...amount <= 1480: no (4)
## : amount > 1480: yes (22.5/4.4)
## months_loan_duration > 22:
## :...job = unemployed non-resident: no (1.4)
## job = unskilled resident: yes (38.6/5.5)
## job in {mangement self-employed,skilled employee}:
## :...existing_credits > 1: yes (63.2/17.9)
## existing_credits <= 1:
## :...personal_status in {divorced male,
## : married male}: yes (17.1/4.4)
## personal_status = female:
## :...age <= 52: yes (25.8/5)
## : age > 52: no (2.2)
## personal_status = single male:
## :...other_debtors = co-applicant: yes (4)
## other_debtors = guarantor: no (3.2)
## other_debtors = none:
## :...amount > 7596: yes (14.2/3.1)
## amount <= 7596:
## :...installment_rate <= 2: no (11.6)
## installment_rate > 2:
## :...age <= 32: no (29.3/8.5)
## age > 32: yes (9.9/2.8)
##
## SubTree [S1]
##
## credit_history in {critical,fully repaid,fully repaid this bank}: no (6.7)
## credit_history in {delayed,repaid}:
## :...existing_credits <= 1: no (12.6/5.2)
## existing_credits > 1: yes (11/1.4)
##
## ----- Trial 5: -----
##
## Decision tree:
##
## checking_balance = unknown:
## :...installment_plan = stores: no (14.6/5.4)
## : installment_plan = bank:
## : :...other_debtors in {co-applicant,guarantor}: no (3.1)
## : : other_debtors = none:
## : : :...existing_credits > 2: no (3.8)
## : : existing_credits <= 2:
## : : :...housing = for free: no (8.2/1.7)
## : : housing = rent: yes (7/0.4)
## : : housing = own:
## : : :...telephone = yes: yes (8.7/1.9)
## : : telephone = none:
## : : :...age <= 30: no (6)
## : : age > 30: yes (19.2/7)
## : installment_plan = none:
## : :...credit_history in {critical,fully repaid,
## : : fully repaid this bank}: no (63.7/4)
## : credit_history in {delayed,repaid}:
## : :...existing_credits <= 1:
## : :...purpose in {business,car (new),car (used),domestic appliances,
## : : : education,others,radio/tv,
## : : : retraining}: no (62.4/8.2)
## : : purpose in {furniture,repairs}: yes (20/6.2)
## : existing_credits > 1:
## : :...employment_length = 4 - 7 yrs: no (7.6)
## : employment_length in {0 - 1 yrs,1 - 4 yrs,> 7 yrs,unemployed}:
## : :...job in {mangement self-employed,
## : : unemployed non-resident}: yes (6.9)
## : job in {skilled employee,unskilled resident}:
## : :...employment_length in {0 - 1 yrs,> 7 yrs}: yes (19.8/4.4)
## : employment_length in {1 - 4 yrs,
## : unemployed}: no (7.2)
## checking_balance in {1 - 200 DM,< 0 DM,> 200 DM}:
## :...property = unknown/none:
## :...job = unskilled resident: yes (10.7)
## : job in {mangement self-employed,skilled employee,
## : : unemployed non-resident}:
## : :...installment_rate <= 2: no (31.5/11)
## : installment_rate > 2:
## : :...job = skilled employee: yes (40.9/10.1)
## : job = unemployed non-resident: no (1)
## : job = mangement self-employed:
## : :...dependents > 1: no (2.2)
## : dependents <= 1:
## : :...residence_history <= 1: no (4.8/1)
## : residence_history > 1: yes (19.4/4.5)
## property in {building society savings,other,real estate}:
## :...purpose in {domestic appliances,others,repairs,
## : retraining}: no (28.8/11.1)
## purpose = education: yes (21.7/9.7)
## purpose = car (used):
## :...amount <= 7253: no (20.5/1)
## : amount > 7253: yes (6.7/1.9)
## purpose = business:
## :...months_loan_duration <= 18: no (10.1)
## : months_loan_duration > 18:
## : :...housing = for free: no (0)
## : housing = rent: yes (9.4/1.9)
## : housing = own:
## : :...savings_balance in {101 - 500 DM,501 - 1000 DM,> 1000 DM,
## : : unknown}: no (11.1)
## : savings_balance = < 100 DM:
## : :...amount <= 2292: yes (7.7)
## : amount > 2292: no (17.4/7.2)
## purpose = radio/tv:
## :...months_loan_duration <= 8: no (6.8)
## : months_loan_duration > 8:
## : :...savings_balance = > 1000 DM: yes (0)
## : savings_balance = unknown: no (15.1/2.5)
## : savings_balance in {101 - 500 DM,501 - 1000 DM,< 100 DM}:
## : :...months_loan_duration > 36: yes (8.6)
## : months_loan_duration <= 36:
## : :...other_debtors = co-applicant: yes (2.5/0.8)
## : other_debtors = guarantor: no (9.1/1.7)
## : other_debtors = none:
## : :...employment_length in {0 - 1 yrs,
## : : unemployed}: yes (25.9/5.8)
## : employment_length in {4 - 7 yrs,
## : : > 7 yrs}: no (22.2/5.7)
## : employment_length = 1 - 4 yrs:
## : :...months_loan_duration <= 15: no (21.4/8.1)
## : months_loan_duration > 15: yes (23.7/5)
## purpose = furniture:
## :...installment_plan = stores: no (6.1)
## : installment_plan in {bank,none}:
## : :...other_debtors = guarantor: no (4.3)
## : other_debtors in {co-applicant,none}:
## : :...savings_balance in {101 - 500 DM,
## : : 501 - 1000 DM}: yes (4.1)
## : savings_balance = > 1000 DM: no (5.1)
## : savings_balance in {< 100 DM,unknown}:
## : :...telephone = yes: no (30.4/9.6)
## : telephone = none:
## : :...personal_status = divorced male: no (4.3)
## : personal_status in {married male,
## : : single male}: yes (33.4/9.9)
## : personal_status = female:
## : :...installment_plan = bank: yes (2.7)
## : installment_plan = none:
## : :...months_loan_duration <= 9: yes (3.1)
## : months_loan_duration > 9: no (26.5/8.1)
## purpose = car (new):
## :...other_debtors in {co-applicant,guarantor}: yes (12.4/2.8)
## other_debtors = none:
## :...property = real estate:
## :...installment_plan in {bank,stores}: yes (2.7)
## : installment_plan = none:
## : :...amount > 4380: no (6)
## : amount <= 4380:
## : :...personal_status in {divorced male,
## : : female}: yes (7.3/0.4)
## : personal_status in {married male,
## : single male}: no (29.7/6.1)
## property in {building society savings,other}:
## :...checking_balance = > 200 DM: no (3.7)
## checking_balance in {1 - 200 DM,< 0 DM}:
## :...amount <= 1126: yes (19.7/0.4)
## amount > 1126:
## :...installment_plan = stores: yes (0)
## installment_plan = bank: no (3.2)
## installment_plan = none:
## :...dependents > 1: no (5.9/1.2)
## dependents <= 1: [S1]
##
## SubTree [S1]
##
## job in {mangement self-employed,unemployed non-resident,
## : unskilled resident}: yes (19/3)
## job = skilled employee:
## :...installment_rate <= 1: no (4.9)
## installment_rate > 1:
## :...age <= 36: yes (23.5/7.3)
## age > 36: no (4.8)
##
## ----- Trial 6: -----
##
## Decision tree:
##
## checking_balance in {> 200 DM,unknown}:
## :...foreign_worker = no: no (6.9)
## : foreign_worker = yes:
## : :...months_loan_duration <= 8: no (23.8/1.3)
## : months_loan_duration > 8:
## : :...job in {mangement self-employed,skilled employee,
## : : unemployed non-resident}:
## : :...employment_length = > 7 yrs: no (67.6/8.6)
## : : employment_length in {0 - 1 yrs,1 - 4 yrs,4 - 7 yrs,unemployed}:
## : : :...purpose in {car (used),domestic appliances,others,repairs,
## : : : retraining}: no (21.8/2)
## : : purpose = education: yes (16.3/8.1)
## : : purpose = business:
## : : :...existing_credits <= 2: no (23.5/8.6)
## : : : existing_credits > 2: yes (2.9)
## : : purpose = car (new):
## : : :...property in {building society savings,real estate,
## : : : : unknown/none}: yes (20.1/5.9)
## : : : property = other: no (4.1)
## : : purpose = furniture:
## : : :...months_loan_duration > 30: yes (7.5/1.9)
## : : : months_loan_duration <= 30:
## : : : :...age <= 22: yes (4.8/1.2)
## : : : age > 22: no (18.5)
## : : purpose = radio/tv:
## : : :...dependents > 1: no (4.3)
## : : dependents <= 1:
## : : :...months_loan_duration <= 9: yes (4.7)
## : : months_loan_duration > 9:
## : : :...installment_rate <= 1: yes (2.1)
## : : installment_rate > 1: no (38.2/9.1)
## : job = unskilled resident:
## : :...age > 48: no (6.3)
## : age <= 48:
## : :...purpose in {domestic appliances,others,
## : : repairs}: yes (0)
## : purpose in {business,retraining}: no (5.2)
## : purpose in {car (new),car (used),education,furniture,
## : : radio/tv}:
## : :...installment_plan = bank: yes (13.7/2.6)
## : installment_plan = stores: no (1.5)
## : installment_plan = none: [S1]
## checking_balance in {1 - 200 DM,< 0 DM}:
## :...credit_history in {fully repaid,fully repaid this bank}:
## :...other_debtors = co-applicant: no (3.3)
## : other_debtors in {guarantor,none}:
## : :...property in {building society savings,unknown/none}: yes (36/3.1)
## : property in {other,real estate}:
## : :...housing in {for free,rent}: yes (8/0.9)
## : housing = own:
## : :...age <= 35: no (23.4/8.2)
## : age > 35: yes (7.1/0.8)
## credit_history in {critical,delayed,repaid}:
## :...other_debtors = guarantor: no (24.3/7.1)
## other_debtors = co-applicant:
## :...foreign_worker = no: no (3.5)
## : foreign_worker = yes:
## : :...installment_plan = stores: yes (0)
## : installment_plan = bank: no (1.3)
## : installment_plan = none:
## : :...amount <= 1961: no (4.9)
## : amount > 1961: yes (18.9/4.5)
## other_debtors = none:
## :...credit_history = delayed:
## :...savings_balance in {101 - 500 DM,501 - 1000 DM,
## : : unknown}: no (22.9/2.7)
## : savings_balance in {< 100 DM,> 1000 DM}:
## : :...installment_rate <= 1: no (4.8)
## : installment_rate > 1:
## : :...job in {mangement self-employed,skilled employee,
## : : unemployed non-resident}: yes (21.6/1.9)
## : job = unskilled resident: no (3.5/0.8)
## credit_history = critical:
## :...residence_history <= 1: no (7.4)
## : residence_history > 1:
## : :...savings_balance in {101 - 500 DM,> 1000 DM,
## : : unknown}: no (16.4/2.2)
## : savings_balance = 501 - 1000 DM: yes (5.1/2.2)
## : savings_balance = < 100 DM:
## : :...months_loan_duration > 36: yes (6.3)
## : months_loan_duration <= 36:
## : :...personal_status in {divorced male,
## : : married male}: yes (13.5/4.5)
## : personal_status in {female,
## : single male}: no (54.8/18.5)
## credit_history = repaid:
## :...savings_balance = > 1000 DM: no (6.2)
## savings_balance in {101 - 500 DM,501 - 1000 DM,< 100 DM,
## : unknown}:
## :...amount > 8086: yes (22.1/1.8)
## amount <= 8086:
## :...purpose in {business,domestic appliances,
## : retraining}: yes (16.6/5)
## purpose in {car (used),education,others,
## : repairs}: no (43.7/12.1)
## purpose = car (new):
## :...employment_length in {0 - 1 yrs,1 - 4 yrs,
## : : 4 - 7 yrs,
## : : > 7 yrs}: yes (56.2/20.9)
## : employment_length = unemployed: no (5.7)
## purpose = furniture:
## :...residence_history <= 1: no (9.3/2.1)
## : residence_history > 1:
## : :...telephone = yes: yes (16.5/6.8)
## : telephone = none:
## : :...months_loan_duration > 27: yes (5.6)
## : months_loan_duration <= 27:
## : :...amount <= 2520: yes (20.1/6.9)
## : amount > 2520: no (11.4/1.6)
## purpose = radio/tv:
## :...amount > 5324: yes (6.9)
## amount <= 5324:
## :...amount > 3190: no (9.8/0.3)
## amount <= 3190: [S2]
##
## SubTree [S1]
##
## credit_history = fully repaid this bank: yes (0)
## credit_history in {critical,fully repaid}: no (3.1)
## credit_history in {delayed,repaid}:
## :...amount <= 3229: yes (25.1/4.1)
## amount > 3229: no (3.5)
##
## SubTree [S2]
##
## property in {building society savings,unknown/none}: yes (8.1/1.1)
## property = other:
## :...dependents <= 1: no (20.1/7.6)
## : dependents > 1: yes (4.1/0.8)
## property = real estate:
## :...months_loan_duration <= 11: no (4.7)
## months_loan_duration > 11: yes (20.4/4.3)
##
## ----- Trial 7: -----
##
## Decision tree:
##
## checking_balance in {1 - 200 DM,< 0 DM}:
## :...credit_history in {fully repaid,fully repaid this bank}:
## : :...other_debtors = co-applicant: no (2.7)
## : : other_debtors in {guarantor,none}:
## : : :...age <= 22: no (3.8)
## : : age > 22: yes (66.8/16.7)
## : credit_history in {critical,delayed,repaid}:
## : :...purpose in {car (used),others}: no (47.7/16.6)
## : purpose in {domestic appliances,repairs,retraining}: yes (26.3/10.1)
## : purpose = business:
## : :...personal_status = divorced male: yes (4.4/0.6)
## : : personal_status in {female,married male,single male}: no (34.1/7.1)
## : purpose = education:
## : :...employment_length in {0 - 1 yrs,1 - 4 yrs,> 7 yrs,
## : : : unemployed}: yes (25.4/5.2)
## : : employment_length = 4 - 7 yrs: no (5.4)
## : purpose = furniture:
## : :...dependents > 1: no (6.1/0.5)
## : : dependents <= 1:
## : : :...savings_balance in {101 - 500 DM,
## : : : 501 - 1000 DM}: yes (6.6/1.5)
## : : savings_balance in {> 1000 DM,unknown}: no (21.7/7.5)
## : : savings_balance = < 100 DM:
## : : :...personal_status = married male: no (5.1)
## : : personal_status in {divorced male,female,single male}:
## : : :...amount <= 1893: no (25.1/5)
## : : amount > 1893: yes (54.1/17.9)
## : purpose = car (new):
## : :...installment_plan in {bank,stores}: yes (19.7/4.3)
## : : installment_plan = none:
## : : :...job = mangement self-employed: yes (15.8/5.9)
## : : job in {skilled employee,unemployed non-resident,
## : : : unskilled resident}:
## : : :...checking_balance = 1 - 200 DM: no (40.4/8.8)
## : : checking_balance = < 0 DM:
## : : :...installment_rate <= 2: no (17.7/3.3)
## : : installment_rate > 2:
## : : :...telephone = none: yes (30.3/8)
## : : telephone = yes: no (10.1/2.1)
## : purpose = radio/tv:
## : :...foreign_worker = no: no (3.1)
## : foreign_worker = yes:
## : :...months_loan_duration <= 8: no (6.8)
## : months_loan_duration > 8:
## : :...employment_length in {4 - 7 yrs,
## : : unemployed}: yes (20.6/7)
## : employment_length = > 7 yrs: no (15/4.1)
## : employment_length = 1 - 4 yrs:
## : :...credit_history in {critical,repaid}: yes (33.8/13.6)
## : : credit_history = delayed: no (3.3)
## : employment_length = 0 - 1 yrs:
## : :...other_debtors = co-applicant: yes (0)
## : other_debtors = guarantor: no (1.6)
## : other_debtors = none:
## : :...amount <= 2214: yes (14.4)
## : amount > 2214: no (12.4/4.6)
## checking_balance in {> 200 DM,unknown}:
## :...foreign_worker = no: no (5.6)
## foreign_worker = yes:
## :...installment_plan = stores: yes (17.4/7.6)
## installment_plan = bank:
## :...housing in {for free,own}: no (55/21.3)
## : housing = rent: yes (5.4)
## installment_plan = none:
## :...credit_history in {critical,fully repaid,
## : fully repaid this bank}: no (69.3/11.6)
## credit_history = delayed:
## :...residence_history <= 1: yes (3.5)
## : residence_history > 1:
## : :...installment_rate <= 3: no (9.2)
## : installment_rate > 3: yes (21.3/7.6)
## credit_history = repaid:
## :...telephone = yes: no (49.7/6.8)
## telephone = none:
## :...other_debtors in {co-applicant,guarantor}: yes (11.3/3.3)
## other_debtors = none:
## :...savings_balance in {> 1000 DM,unknown}: no (11.2)
## savings_balance in {101 - 500 DM,501 - 1000 DM,
## : < 100 DM}:
## :...personal_status in {divorced male,
## : married male}: no (7.8)
## personal_status in {female,single male}:
## :...housing = for free: yes (2.2/0.5)
## housing = rent: no (10/2.5)
## housing = own:
## :...age <= 34: yes (32.8/12.5)
## age > 34: no (8)
##
## ----- Trial 8: -----
##
## Decision tree:
##
## checking_balance in {> 200 DM,unknown}:
## :...installment_plan = bank:
## : :...other_debtors = guarantor: yes (0)
## : : other_debtors = co-applicant: no (1.7)
## : : other_debtors = none:
## : : :...existing_credits > 2: no (3.1)
## : : existing_credits <= 2:
## : : :...savings_balance in {101 - 500 DM,> 1000 DM}: no (9/1.6)
## : : savings_balance in {501 - 1000 DM,< 100 DM,
## : : unknown}: yes (47.7/16.8)
## : installment_plan in {none,stores}:
## : :...purpose in {car (used),domestic appliances,education,others,
## : : retraining}: no (39.1/4.1)
## : purpose = repairs: yes (7.8/3.5)
## : purpose = business:
## : :...job = mangement self-employed: yes (7.9/0.7)
## : : job in {skilled employee,unemployed non-resident,
## : : unskilled resident}: no (18.7/4.2)
## : purpose = car (new):
## : :...existing_credits <= 2: no (50/7.7)
## : : existing_credits > 2: yes (3.4/0.6)
## : purpose = furniture:
## : :...job in {mangement self-employed,
## : : : unemployed non-resident}: yes (5.7/1.9)
## : : job in {skilled employee,unskilled resident}: no (49.3/11.7)
## : purpose = radio/tv:
## : :...checking_balance = > 200 DM:
## : :...age <= 41: yes (19.4/5.9)
## : : age > 41: no (4.8)
## : checking_balance = unknown:
## : :...age <= 23: yes (6.6/1.7)
## : age > 23: no (38.6/4.2)
## checking_balance in {1 - 200 DM,< 0 DM}:
## :...employment_length = unemployed:
## :...residence_history <= 1: yes (5.5)
## : residence_history > 1:
## : :...dependents <= 1: no (39.3/9.7)
## : dependents > 1: yes (6.6/1.5)
## employment_length = 4 - 7 yrs:
## :...age > 29: no (61.5/13.3)
## : age <= 29:
## : :...installment_rate <= 1: no (3.6)
## : installment_rate > 1:
## : :...savings_balance in {101 - 500 DM,501 - 1000 DM,< 100 DM,
## : : > 1000 DM}: yes (32.7/8.8)
## : savings_balance = unknown: no (2.5)
## employment_length = 0 - 1 yrs:
## :...foreign_worker = no: no (5.5)
## : foreign_worker = yes:
## : :...housing = for free: no (7.5/2.5)
## : housing = rent: yes (32.9/7.3)
## : housing = own:
## : :...savings_balance in {501 - 1000 DM,> 1000 DM,
## : : unknown}: no (7.9)
## : savings_balance in {101 - 500 DM,< 100 DM}:
## : :...residence_history <= 1: no (29/9.7)
## : residence_history > 1: yes (33.5/8.4)
## employment_length = 1 - 4 yrs:
## :...amount > 7721: yes (13.6/0.6)
## : amount <= 7721:
## : :...housing = for free: yes (6.7/2.9)
## : housing = rent:
## : :...residence_history <= 3: no (10.3/4)
## : : residence_history > 3: yes (26/7.9)
## : housing = own:
## : :...personal_status = divorced male: no (10.7/1.6)
## : personal_status = married male:
## : :...job = skilled employee: yes (16.5/6.7)
## : : job in {mangement self-employed,unemployed non-resident,
## : : unskilled resident}: no (7.3)
## : personal_status = single male:
## : :...amount <= 902: yes (7.5/1.4)
## : : amount > 902: no (59.1/13.3)
## : personal_status = female:
## : :...residence_history <= 1: no (7.4/0.9)
## : residence_history > 1:
## : :...age <= 37: yes (29.9/8.7)
## : age > 37: no (5.4)
## employment_length = > 7 yrs:
## :...personal_status = married male: no (4.8)
## personal_status in {divorced male,female,single male}:
## :...months_loan_duration > 40: yes (6)
## months_loan_duration <= 40:
## :...residence_history <= 3:
## :...savings_balance = 101 - 500 DM: no (3.9/0.5)
## : savings_balance in {501 - 1000 DM,< 100 DM,> 1000 DM,
## : unknown}: yes (27.3/3.9)
## residence_history > 3:
## :...age <= 30: no (13.7/0.6)
## age > 30:
## :...existing_credits <= 1: yes (36.3/9.5)
## existing_credits > 1: [S1]
##
## SubTree [S1]
##
## credit_history in {critical,fully repaid this bank,repaid}: no (20.9/4.5)
## credit_history in {delayed,fully repaid}: yes (3.9)
##
## ----- Trial 9: -----
##
## Decision tree:
##
## checking_balance in {> 200 DM,unknown}:
## :...checking_balance = > 200 DM:
## : :...dependents <= 1: no (60.2/17.5)
## : : dependents > 1: yes (9.4/2.7)
## : checking_balance = unknown:
## : :...amount <= 4455: no (163.6/30.7)
## : amount > 4455:
## : :...employment_length in {0 - 1 yrs,1 - 4 yrs,
## : : unemployed}: yes (44.6/13.8)
## : employment_length in {4 - 7 yrs,> 7 yrs}: no (20.2)
## checking_balance in {1 - 200 DM,< 0 DM}:
## :...foreign_worker = no: no (14.6/3.4)
## foreign_worker = yes:
## :...credit_history in {fully repaid,
## : fully repaid this bank}: yes (71.9/23.9)
## credit_history in {critical,delayed,repaid}:
## :...amount > 7966:
## :...credit_history in {critical,repaid}: yes (31.9/5.2)
## : credit_history = delayed: no (4.4/1.4)
## amount <= 7966:
## :...installment_plan = stores: yes (20.7/6.4)
## installment_plan in {bank,none}:
## :...months_loan_duration > 36:
## :...dependents > 1: no (6.3/1.6)
## : dependents <= 1:
## : :...employment_length in {0 - 1 yrs,1 - 4 yrs,
## : : 4 - 7 yrs,
## : : > 7 yrs}: yes (24/2.3)
## : employment_length = unemployed: no (3.4)
## months_loan_duration <= 36:
## :...other_debtors = co-applicant: yes (17.9/8.4)
## other_debtors = guarantor: no (22.1/4.4)
## other_debtors = none:
## :...employment_length = 4 - 7 yrs:
## :...personal_status in {divorced male,
## : : married male}: yes (13.8/5)
## : personal_status in {female,
## : single male}: no (41.6/4.7)
## employment_length = unemployed:
## :...residence_history <= 2: yes (14.9/2.1)
## : residence_history > 2: no (19.1/4.6)
## employment_length = 1 - 4 yrs:
## :...housing in {for free,own}: no (95.8/31.1)
## : housing = rent: [S1]
## employment_length = > 7 yrs:
## :...months_loan_duration <= 8: no (7.3)
## : months_loan_duration > 8:
## : :...residence_history <= 3:
## : :...amount <= 5129: yes (21.1/4.9)
## : : amount > 5129: no (3.3)
## : residence_history > 3:
## : :...amount <= 6948: no (46.9/14.4)
## : amount > 6948: yes (3.9/0.9)
## employment_length = 0 - 1 yrs:
## :...job in {mangement self-employed,
## : unemployed non-resident}: no (7.9/2.2)
## job = unskilled resident: yes (21.3/7.4)
## job = skilled employee:
## :...amount > 4870: no (6.5)
## amount <= 4870:
## :...existing_credits > 1: yes (4.6/0.5)
## existing_credits <= 1: [S2]
##
## SubTree [S1]
##
## purpose in {car (new),car (used)}: no (14.8/3.2)
## purpose in {business,domestic appliances,education,furniture,others,radio/tv,
## repairs,retraining}: yes (13.6/1.2)
##
## SubTree [S2]
##
## personal_status in {divorced male,single male}: no (10.5)
## personal_status in {female,married male}:
## :...credit_history = delayed: yes (0)
## credit_history = critical: no (1.8)
## credit_history = repaid:
## :...months_loan_duration <= 24: yes (25.9/8.1)
## months_loan_duration > 24: no (3.1)
##
##
## Evaluation on training data (900 cases):
##
## Trial Decision Tree
## ----- ----------------
## Size Errors
##
## 0 54 135(15.0%)
## 1 37 184(20.4%)
## 2 58 172(19.1%)
## 3 40 173(19.2%)
## 4 54 188(20.9%)
## 5 63 162(18.0%)
## 6 61 158(17.6%)
## 7 46 209(23.2%)
## 8 49 186(20.7%)
## 9 35 178(19.8%)
## boost 29( 3.2%) <<
##
##
## (a) (b) <-classified as
## ---- ----
## 630 3 (a): class no
## 26 241 (b): class yes
##
##
## Attribute usage:
##
## 100.00% checking_balance
## 100.00% months_loan_duration
## 100.00% foreign_worker
## 99.00% employment_length
## 98.67% purpose
## 98.00% other_debtors
## 96.67% amount
## 96.44% savings_balance
## 95.22% installment_plan
## 93.67% credit_history
## 90.00% job
## 87.11% installment_rate
## 74.44% age
## 74.33% property
## 59.33% existing_credits
## 58.56% residence_history
## 55.33% personal_status
## 54.89% housing
## 46.00% dependents
## 37.44% telephone
##
##
## Time: 0.0 secs
##
##
## Cell Contents
## |-------------------------|
## | N |
## | N / Table Total |
## |-------------------------|
##
##
## Total Observations in Table: 100
##
##
## | predicted default
## actual default | no | yes | Row Total |
## ---------------|-----------|-----------|-----------|
## no | 60 | 7 | 67 |
## | 0.600 | 0.070 | |
## ---------------|-----------|-----------|-----------|
## yes | 17 | 16 | 33 |
## | 0.170 | 0.160 | |
## ---------------|-----------|-----------|-----------|
## Column Total | 77 | 23 | 100 |
## ---------------|-----------|-----------|-----------|
##
##
## $predicted
## [1] "no" "yes"
##
## $actual
## [1] "no" "yes"
##
##
## Cell Contents
## |-------------------------|
## | N |
## | N / Table Total |
## |-------------------------|
##
##
## Total Observations in Table: 100
##
##
## | predicted default
## actual default | no | yes | Row Total |
## ---------------|-----------|-----------|-----------|
## no | 33 | 34 | 67 |
## | 0.330 | 0.340 | |
## ---------------|-----------|-----------|-----------|
## yes | 7 | 26 | 33 |
## | 0.070 | 0.260 | |
## ---------------|-----------|-----------|-----------|
## Column Total | 40 | 60 | 100 |
## ---------------|-----------|-----------|-----------|
##
##
## 'data.frame': 8124 obs. of 23 variables:
## $ type : Factor w/ 2 levels "e","p": 2 1 1 2 1 1 1 1 2 1 ...
## $ cap.shape : Factor w/ 6 levels "b","c","f","k",..: 6 6 1 6 6 6 1 1 6 1 ...
## $ cap.surface : Factor w/ 4 levels "f","g","s","y": 3 3 3 4 3 4 3 4 4 3 ...
## $ cap.color : Factor w/ 10 levels "b","c","e","g",..: 5 10 9 9 4 10 9 9 9 10 ...
## $ bruises. : Factor w/ 2 levels "f","t": 2 2 2 2 1 2 2 2 2 2 ...
## $ odor : Factor w/ 9 levels "a","c","f","l",..: 7 1 4 7 6 1 1 4 7 1 ...
## $ gill.attachment : Factor w/ 2 levels "a","f": 2 2 2 2 2 2 2 2 2 2 ...
## $ gill.spacing : Factor w/ 2 levels "c","w": 1 1 1 1 2 1 1 1 1 1 ...
## $ gill.size : Factor w/ 2 levels "b","n": 2 1 1 2 1 1 1 1 2 1 ...
## $ gill.color : Factor w/ 12 levels "b","e","g","h",..: 5 5 6 6 5 6 3 6 8 3 ...
## $ stalk.shape : Factor w/ 2 levels "e","t": 1 1 1 1 2 1 1 1 1 1 ...
## $ stalk.root : Factor w/ 5 levels "?","b","c","e",..: 4 3 3 4 4 3 3 3 4 3 ...
## $ stalk.surface.above.ring: Factor w/ 4 levels "f","k","s","y": 3 3 3 3 3 3 3 3 3 3 ...
## $ stalk.surface.below.ring: Factor w/ 4 levels "f","k","s","y": 3 3 3 3 3 3 3 3 3 3 ...
## $ stalk.color.above.ring : Factor w/ 9 levels "b","c","e","g",..: 8 8 8 8 8 8 8 8 8 8 ...
## $ stalk.color.below.ring : Factor w/ 9 levels "b","c","e","g",..: 8 8 8 8 8 8 8 8 8 8 ...
## $ veil.type : Factor w/ 1 level "p": 1 1 1 1 1 1 1 1 1 1 ...
## $ veil.color : Factor w/ 4 levels "n","o","w","y": 3 3 3 3 3 3 3 3 3 3 ...
## $ ring.number : Factor w/ 3 levels "n","o","t": 2 2 2 2 2 2 2 2 2 2 ...
## $ ring.type : Factor w/ 5 levels "e","f","l","n",..: 5 5 5 5 1 5 5 5 5 5 ...
## $ spore.print.color : Factor w/ 9 levels "b","h","k","n",..: 3 4 4 3 4 3 3 4 3 3 ...
## $ population : Factor w/ 6 levels "a","c","n","s",..: 4 3 3 4 1 3 3 4 5 4 ...
## $ habitat : Factor w/ 7 levels "d","g","l","m",..: 6 2 4 6 2 2 4 4 2 4 ...
##
## e p
## 4208 3916
##
## Call:
## OneR.formula(formula = type ~ ., data = mushrooms)
##
## Rules:
## If odor = a then type = e
## If odor = c then type = p
## If odor = f then type = p
## If odor = l then type = e
## If odor = m then type = p
## If odor = n then type = e
## If odor = p then type = p
## If odor = s then type = p
## If odor = y then type = p
##
## Accuracy:
## 8004 of 8124 instances classified correctly (98.52%)
##
## Call:
## OneR.formula(formula = type ~ ., data = mushrooms)
##
## Rules:
## If odor = a then type = e
## If odor = c then type = p
## If odor = f then type = p
## If odor = l then type = e
## If odor = m then type = p
## If odor = n then type = e
## If odor = p then type = p
## If odor = s then type = p
## If odor = y then type = p
##
## Accuracy:
## 8004 of 8124 instances classified correctly (98.52%)
##
## Contingency table:
## odor
## type a c f l m n p s y Sum
## e * 400 0 0 * 400 0 * 3408 0 0 0 4208
## p 0 * 192 * 2160 0 * 36 120 * 256 * 576 * 576 3916
## Sum 400 192 2160 400 36 3528 256 576 576 8124
## ---
## Maximum in each column: '*'
##
## Pearson's Chi-squared test:
## X-squared = 7659.7, df = 8, p-value < 2.2e-16
## -- Attaching packages ------------------------------------------------------------------- tidyverse 1.2.1 --
## v ggplot2 3.1.0 v purrr 0.2.5
## v tibble 1.4.2 v dplyr 0.7.8
## v tidyr 0.8.2 v stringr 1.3.1
## v readr 1.2.1 v forcats 0.3.0
## -- Conflicts ---------------------------------------------------------------------- tidyverse_conflicts() --
## x ggplot2::annotate() masks NLP::annotate()
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
## Parsed with column specification:
## cols(
## .default = col_character()
## )
## See spec(...) for full column specifications.
## Observations: 8,125
## Variables: 23
## $ X1 <chr> "type", "p", "e", "e", "p", "e", "e", "e", "e", "p", "e", ...
## $ X2 <chr> "cap-shape", "x", "x", "b", "x", "x", "x", "b", "b", "x", ...
## $ X3 <chr> "cap-surface", "s", "s", "s", "y", "s", "y", "s", "y", "y"...
## $ X4 <chr> "cap-color", "n", "y", "w", "w", "g", "y", "w", "w", "w", ...
## $ X5 <chr> "bruises?", "t", "t", "t", "t", "f", "t", "t", "t", "t", "...
## $ X6 <chr> "odor", "p", "a", "l", "p", "n", "a", "a", "l", "p", "a", ...
## $ X7 <chr> "gill-attachment", "f", "f", "f", "f", "f", "f", "f", "f",...
## $ X8 <chr> "gill-spacing", "c", "c", "c", "c", "w", "c", "c", "c", "c...
## $ X9 <chr> "gill-size", "n", "b", "b", "n", "b", "b", "b", "b", "n", ...
## $ X10 <chr> "gill-color", "k", "k", "n", "n", "k", "n", "g", "n", "p",...
## $ X11 <chr> "stalk-shape", "e", "e", "e", "e", "t", "e", "e", "e", "e"...
## $ X12 <chr> "stalk-root", "e", "c", "c", "e", "e", "c", "c", "c", "e",...
## $ X13 <chr> "stalk-surface-above-ring", "s", "s", "s", "s", "s", "s", ...
## $ X14 <chr> "stalk-surface-below-ring", "s", "s", "s", "s", "s", "s", ...
## $ X15 <chr> "stalk-color-above-ring", "w", "w", "w", "w", "w", "w", "w...
## $ X16 <chr> "stalk-color-below-ring", "w", "w", "w", "w", "w", "w", "w...
## $ X17 <chr> "veil-type", "p", "p", "p", "p", "p", "p", "p", "p", "p", ...
## $ X18 <chr> "veil-color", "w", "w", "w", "w", "w", "w", "w", "w", "w",...
## $ X19 <chr> "ring-number", "o", "o", "o", "o", "o", "o", "o", "o", "o"...
## $ X20 <chr> "ring-type", "p", "p", "p", "p", "e", "p", "p", "p", "p", ...
## $ X21 <chr> "spore-print-color", "k", "n", "n", "k", "n", "k", "k", "n...
## $ X22 <chr> "population", "s", "n", "n", "s", "a", "n", "n", "s", "v",...
## $ X23 <chr> "habitat", "u", "g", "m", "u", "g", "g", "m", "m", "g", "m...
## X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18
## 2 6 4 10 2 9 2 2 2 12 2 5 4 4 9 9 1 4
## X19 X20 X21 X22 X23
## 3 5 9 6 7
## Classes 'tbl_df', 'tbl' and 'data.frame': 8124 obs. of 23 variables:
## $ X1 : chr "p" "e" "e" "p" ...
## $ X2 : chr "x" "x" "b" "x" ...
## $ X3 : chr "s" "s" "s" "y" ...
## $ X4 : chr "n" "y" "w" "w" ...
## $ X5 : chr "t" "t" "t" "t" ...
## $ X6 : chr "p" "a" "l" "p" ...
## $ X7 : chr "f" "f" "f" "f" ...
## $ X8 : chr "c" "c" "c" "c" ...
## $ X9 : chr "n" "b" "b" "n" ...
## $ X10: chr "k" "k" "n" "n" ...
## $ X11: chr "e" "e" "e" "e" ...
## $ X12: chr "e" "c" "c" "e" ...
## $ X13: chr "s" "s" "s" "s" ...
## $ X14: chr "s" "s" "s" "s" ...
## $ X15: chr "w" "w" "w" "w" ...
## $ X16: chr "w" "w" "w" "w" ...
## $ X17: chr "p" "p" "p" "p" ...
## $ X18: chr "w" "w" "w" "w" ...
## $ X19: chr "o" "o" "o" "o" ...
## $ X20: chr "p" "p" "p" "p" ...
## $ X21: chr "k" "n" "n" "k" ...
## $ X22: chr "s" "n" "n" "s" ...
## $ X23: chr "u" "g" "m" "u" ...
## Classes 'tbl_df', 'tbl' and 'data.frame': 8124 obs. of 23 variables:
## $ edibility : Factor w/ 2 levels "edible","poisonous": 2 1 1 2 1 1 1 1 2 1 ...
## $ cap_shape : Factor w/ 6 levels "bell","conical",..: 6 6 1 6 6 6 1 1 6 1 ...
## $ cap_surface : Factor w/ 4 levels "fibrous","grooves",..: 3 3 3 4 3 4 3 4 4 3 ...
## $ cap_color : Factor w/ 10 levels "buff","cinnamon",..: 5 10 9 9 4 10 9 9 9 10 ...
## $ bruises : Factor w/ 2 levels "no","yes": 2 2 2 2 1 2 2 2 2 2 ...
## $ odor : Factor w/ 9 levels "almond","creosote",..: 7 1 4 7 6 1 1 4 7 1 ...
## $ gill_attachement : Factor w/ 2 levels "attached","free": 2 2 2 2 2 2 2 2 2 2 ...
## $ gill_spacing : Factor w/ 2 levels "close","crowded": 1 1 1 1 2 1 1 1 1 1 ...
## $ gill_size : Factor w/ 2 levels "broad","narrow": 2 1 1 2 1 1 1 1 2 1 ...
## $ gill_color : Factor w/ 12 levels "buff","red","gray",..: 5 5 6 6 5 6 3 6 8 3 ...
## $ stalk_shape : Factor w/ 2 levels "enlarging","tapering": 1 1 1 1 2 1 1 1 1 1 ...
## $ stalk_root : Factor w/ 5 levels "missing","bulbous",..: 4 3 3 4 4 3 3 3 4 3 ...
## $ stalk_surface_above_ring: Factor w/ 4 levels "fibrous","silky",..: 3 3 3 3 3 3 3 3 3 3 ...
## $ stalk_surface_below_ring: Factor w/ 4 levels "fibrous","silky",..: 3 3 3 3 3 3 3 3 3 3 ...
## $ stalk_color_above_ring : Factor w/ 10 levels "buff","cinnamon",..: 8 8 8 8 8 8 8 8 8 8 ...
## $ stalk_color_below_ring : Factor w/ 10 levels "buff","cinnamon",..: 8 8 8 8 8 8 8 8 8 8 ...
## $ veil_type : Factor w/ 1 level "partial": 1 1 1 1 1 1 1 1 1 1 ...
## $ veil_color : Factor w/ 4 levels "brown","orange",..: 3 3 3 3 3 3 3 3 3 3 ...
## $ ring_number : Factor w/ 3 levels "none","one","two": 2 2 2 2 2 2 2 2 2 2 ...
## $ ring_type : Factor w/ 5 levels "evanescent","flaring",..: 5 5 5 5 1 5 5 5 5 5 ...
## $ spore_print_color : Factor w/ 9 levels "buff","chocolate",..: 3 4 4 3 4 3 3 4 3 3 ...
## $ population : Factor w/ 6 levels "abundant","clustered",..: 4 3 3 4 1 3 3 4 5 4 ...
## $ habitat : Factor w/ 7 levels "wood","grasses",..: 6 2 4 6 2 2 4 4 2 4 ...
## Observations: 8,124
## Variables: 23
## $ edibility <fct> poisonous, edible, edible, poisonous,...
## $ cap_shape <fct> convex, convex, bell, convex, convex,...
## $ cap_surface <fct> scaly, scaly, scaly, smooth, scaly, s...
## $ cap_color <fct> brown, yellow, white, white, gray, ye...
## $ bruises <fct> yes, yes, yes, yes, no, yes, yes, yes...
## $ odor <fct> pungent, almond, anise, pungent, none...
## $ gill_attachement <fct> free, free, free, free, free, free, f...
## $ gill_spacing <fct> close, close, close, close, crowded, ...
## $ gill_size <fct> narrow, broad, broad, narrow, broad, ...
## $ gill_color <fct> black, black, brown, brown, black, br...
## $ stalk_shape <fct> enlarging, enlarging, enlarging, enla...
## $ stalk_root <fct> equal, club, club, equal, equal, club...
## $ stalk_surface_above_ring <fct> smooth, smooth, smooth, smooth, smoot...
## $ stalk_surface_below_ring <fct> smooth, smooth, smooth, smooth, smoot...
## $ stalk_color_above_ring <fct> purple, purple, purple, purple, purpl...
## $ stalk_color_below_ring <fct> purple, purple, purple, purple, purpl...
## $ veil_type <fct> partial, partial, partial, partial, p...
## $ veil_color <fct> white, white, white, white, white, wh...
## $ ring_number <fct> one, one, one, one, one, one, one, on...
## $ ring_type <fct> pendant, pendant, pendant, pendant, e...
## $ spore_print_color <fct> black, brown, brown, black, brown, bl...
## $ population <fct> scattered, numerous, numerous, scatte...
## $ habitat <fct> urban, grasses, meadows, urban, grass...
## # A tibble: 23 x 2
## `Variable name` `Number of levels`
## <chr> <dbl>
## 1 gill_color 12
## 2 cap_color 10
## 3 stalk_color_above_ring 10
## 4 stalk_color_below_ring 10
## 5 odor 9
## 6 spore_print_color 9
## 7 habitat 7
## 8 cap_shape 6
## 9 population 6
## 10 stalk_root 5
## # ... with 13 more rows
## edibility cap_shape cap_surface
## 0 0 0
## cap_color bruises odor
## 0 0 0
## gill_attachement gill_spacing gill_size
## 0 0 0
## gill_color stalk_shape stalk_root
## 0 0 0
## stalk_surface_above_ring stalk_surface_below_ring stalk_color_above_ring
## 0 0 0
## stalk_color_below_ring veil_color ring_number
## 0 0 0
## ring_type spore_print_color population
## 0 0 0
## habitat
## 0
##
## edible poisonous
## 0.52 0.48
##
## edible poisonous
## 0.52 0.48
##
## edible poisonous
## 0.52 0.48
## n= 6500
##
## node), split, n, loss, yval, (yprob)
## * denotes terminal node
##
## 1) root 6500 3133 edible (0.51800000 0.48200000)
## 2) odor=almond,anise,none 3468 101 edible (0.97087659 0.02912341)
## 4) spore_print_color=buff,chocolate,black,brown,orange,purple,white,yellow 3408 41 edible (0.98796948 0.01203052) *
## 5) spore_print_color=green 60 0 poisonous (0.00000000 1.00000000) *
## 3) odor=creosote,foul,musty,pungent,spicy,fishy 3032 0 poisonous (0.00000000 1.00000000) *
## Confusion Matrix and Statistics
##
## Reference
## Prediction edible poisonous
## edible 3367 41
## poisonous 0 3092
##
## Accuracy : 0.9937
## 95% CI : (0.9915, 0.9955)
## No Information Rate : 0.518
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.9874
## Mcnemar's Test P-Value : 4.185e-10
##
## Sensitivity : 1.0000
## Specificity : 0.9869
## Pos Pred Value : 0.9880
## Neg Pred Value : 1.0000
## Prevalence : 0.5180
## Detection Rate : 0.5180
## Detection Prevalence : 0.5243
## Balanced Accuracy : 0.9935
##
## 'Positive' Class : edible
##
## Confusion Matrix and Statistics
##
## Reference
## Prediction edible poisonous
## edible 3367 0
## poisonous 0 3133
##
## Accuracy : 1
## 95% CI : (0.9994, 1)
## No Information Rate : 0.518
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 1
## Mcnemar's Test P-Value : NA
##
## Sensitivity : 1.000
## Specificity : 1.000
## Pos Pred Value : 1.000
## Neg Pred Value : 1.000
## Prevalence : 0.518
## Detection Rate : 0.518
## Detection Prevalence : 0.518
## Balanced Accuracy : 1.000
##
## 'Positive' Class : edible
##
##
## Classification tree:
## rpart(formula = edibility ~ ., data = train_mushroom, method = "class",
## cp = 1e-05)
##
## Variables actually used in tree construction:
## [1] cap_surface habitat odor
## [4] spore_print_color stalk_color_below_ring stalk_root
##
## Root node error: 3133/6500 = 0.482
##
## n= 6500
##
## CP nsplit rel error xerror xstd
## 1 0.9677625 0 1.0000000 1.0000000 0.01285833
## 2 0.0191510 1 0.0322375 0.0322375 0.00318273
## 3 0.0063837 2 0.0130865 0.0130865 0.00203731
## 4 0.0022343 3 0.0067028 0.0067028 0.00146032
## 5 0.0011171 5 0.0022343 0.0022343 0.00084402
## 6 0.0000100 7 0.0000000 0.0022343 0.00084402
## [1] 0.00111714
## Loading required package: grid
## Loading required package: mvtnorm
## Loading required package: modeltools
## Loading required package: stats4
## Loading required package: strucchange
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
## Loading required package: sandwich
##
## Attaching package: 'strucchange'
## The following object is masked from 'package:stringr':
##
## boundary
## Warning: package 'psych' was built under R version 3.5.2
##
## Attaching package: 'psych'
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
## 'data.frame': 4898 obs. of 12 variables:
## $ fixed.acidity : num 6.7 5.7 5.9 5.3 6.4 7 7.9 6.6 7 6.5 ...
## $ volatile.acidity : num 0.62 0.22 0.19 0.47 0.29 0.14 0.12 0.38 0.16 0.37 ...
## $ citric.acid : num 0.24 0.2 0.26 0.1 0.21 0.41 0.49 0.28 0.3 0.33 ...
## $ residual.sugar : num 1.1 16 7.4 1.3 9.65 0.9 5.2 2.8 2.6 3.9 ...
## $ chlorides : num 0.039 0.044 0.034 0.036 0.041 0.037 0.049 0.043 0.043 0.027 ...
## $ free.sulfur.dioxide : num 6 41 33 11 36 22 33 17 34 40 ...
## $ total.sulfur.dioxide: num 62 113 123 74 119 95 152 67 90 130 ...
## $ density : num 0.993 0.999 0.995 0.991 0.993 ...
## $ pH : num 3.41 3.22 3.49 3.48 2.99 3.25 3.18 3.21 2.88 3.28 ...
## $ sulphates : num 0.32 0.46 0.42 0.54 0.34 0.43 0.47 0.47 0.47 0.39 ...
## $ alcohol : num 10.4 8.9 10.1 11.2 10.9 ...
## $ quality : int 5 6 6 4 6 6 6 6 6 7 ...
## 'data.frame': 3750 obs. of 12 variables:
## $ fixed.acidity : num 6.7 5.7 5.9 5.3 6.4 7 7.9 6.6 7 6.5 ...
## $ volatile.acidity : num 0.62 0.22 0.19 0.47 0.29 0.14 0.12 0.38 0.16 0.37 ...
## $ citric.acid : num 0.24 0.2 0.26 0.1 0.21 0.41 0.49 0.28 0.3 0.33 ...
## $ residual.sugar : num 1.1 16 7.4 1.3 9.65 0.9 5.2 2.8 2.6 3.9 ...
## $ chlorides : num 0.039 0.044 0.034 0.036 0.041 0.037 0.049 0.043 0.043 0.027 ...
## $ free.sulfur.dioxide : num 6 41 33 11 36 22 33 17 34 40 ...
## $ total.sulfur.dioxide: num 62 113 123 74 119 95 152 67 90 130 ...
## $ density : num 0.993 0.999 0.995 0.991 0.993 ...
## $ pH : num 3.41 3.22 3.49 3.48 2.99 3.25 3.18 3.21 2.88 3.28 ...
## $ sulphates : num 0.32 0.46 0.42 0.54 0.34 0.43 0.47 0.47 0.47 0.39 ...
## $ alcohol : num 10.4 8.9 10.1 11.2 10.9 ...
## $ quality : int 5 6 6 4 6 6 6 6 6 7 ...
## 'data.frame': 1148 obs. of 12 variables:
## $ fixed.acidity : num 7 7.4 6.9 6.5 6.8 7.3 6.1 6.8 6 6.3 ...
## $ volatile.acidity : num 0.33 0.39 0.14 0.18 0.28 0.4 0.32 0.35 0.28 0.24 ...
## $ citric.acid : num 0.28 0.23 0.38 0.29 0.44 0.28 0.33 0.44 0.27 0.29 ...
## $ residual.sugar : num 5.7 7 1 1.7 11.5 6.5 10.7 6.5 15.5 13.7 ...
## $ chlorides : num 0.033 0.033 0.041 0.035 0.04 0.037 0.036 0.056 0.036 0.035 ...
## $ free.sulfur.dioxide : num 39 29 22 39 58 26 27 31 31 53 ...
## $ total.sulfur.dioxide: num 204 126 81 144 223 97 98 161 134 134 ...
## $ density : num 0.992 0.994 0.99 0.993 0.997 ...
## $ pH : num 3.17 3.14 3.03 3.49 3.22 3.16 3.34 3.14 3.19 3.17 ...
## $ sulphates : num 0.64 0.42 0.54 0.5 0.56 0.58 0.52 0.44 0.44 0.38 ...
## $ alcohol : num 12.5 10.5 11.4 10.5 9.5 12.6 10.2 9.5 13 10.6 ...
## $ quality : int 6 5 6 6 5 7 6 5 7 6 ...
## n= 3750
##
## node), split, n, deviance, yval
## * denotes terminal node
##
## 1) root 3750 2945.53200 5.870933
## 2) alcohol< 10.85 2372 1418.86100 5.604975
## 4) volatile.acidity>=0.2275 1611 821.30730 5.432030
## 8) volatile.acidity>=0.3025 688 278.97670 5.255814 *
## 9) volatile.acidity< 0.3025 923 505.04230 5.563380 *
## 5) volatile.acidity< 0.2275 761 447.36400 5.971091 *
## 3) alcohol>=10.85 1378 1070.08200 6.328737
## 6) free.sulfur.dioxide< 10.5 84 95.55952 5.369048 *
## 7) free.sulfur.dioxide>=10.5 1294 892.13600 6.391036
## 14) alcohol< 11.76667 629 430.11130 6.173291
## 28) volatile.acidity>=0.465 11 10.72727 4.545455 *
## 29) volatile.acidity< 0.465 618 389.71680 6.202265 *
## 15) alcohol>=11.76667 665 403.99400 6.596992 *
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 4.545 5.563 5.971 5.893 6.202 6.597
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 3.000 5.000 6.000 5.901 6.000 9.000
## 'data.frame': 1338 obs. of 7 variables:
## $ age : int 19 18 28 33 32 31 46 37 37 60 ...
## $ sex : Factor w/ 2 levels "female","male": 1 2 2 2 2 1 1 1 2 1 ...
## $ bmi : num 27.9 33.8 33 22.7 28.9 25.7 33.4 27.7 29.8 25.8 ...
## $ children: int 0 1 3 0 0 0 1 3 2 0 ...
## $ smoker : Factor w/ 2 levels "no","yes": 2 1 1 1 1 1 1 1 1 1 ...
## $ region : Factor w/ 4 levels "northeast","northwest",..: 4 3 3 2 2 3 3 2 1 2 ...
## $ expenses: num 16885 1726 4449 21984 3867 ...
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1122 4740 9382 13270 16640 63770
##
## northeast northwest southeast southwest
## 324 325 364 325
## age bmi children charges
## age 1.0000000 0.10934101 0.04246900 0.29900819
## bmi 0.1093410 1.00000000 0.01264471 0.19857626
## children 0.0424690 0.01264471 1.00000000 0.06799823
## charges 0.2990082 0.19857626 0.06799823 1.00000000
##
## Call:
## lm(formula = charges ~ ., data = insurance)
##
## Coefficients:
## (Intercept) age sexmale bmi
## -11941.6 256.8 -131.4 339.3
## children smokeryes regionnorthwest regionsoutheast
## 475.7 23847.5 -352.8 -1035.6
## regionsouthwest
## -959.3
##
## Call:
## lm(formula = charges ~ ., data = insurance)
##
## Residuals:
## Min 1Q Median 3Q Max
## -11302.7 -2850.9 -979.6 1383.9 29981.7
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -11941.6 987.8 -12.089 < 2e-16 ***
## age 256.8 11.9 21.586 < 2e-16 ***
## sexmale -131.3 332.9 -0.395 0.693255
## bmi 339.3 28.6 11.864 < 2e-16 ***
## children 475.7 137.8 3.452 0.000574 ***
## smokeryes 23847.5 413.1 57.723 < 2e-16 ***
## regionnorthwest -352.8 476.3 -0.741 0.458976
## regionsoutheast -1035.6 478.7 -2.163 0.030685 *
## regionsouthwest -959.3 477.9 -2.007 0.044921 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 6062 on 1329 degrees of freedom
## Multiple R-squared: 0.7509, Adjusted R-squared: 0.7494
## F-statistic: 500.9 on 8 and 1329 DF, p-value: < 2.2e-16
##
## Call:
## lm(formula = charges ~ age + age2 + children + bmi + sex + bmi30 *
## smoker + region, data = insurance)
##
## Residuals:
## Min 1Q Median 3Q Max
## -17297.1 -1656.0 -1262.7 -727.8 24161.6
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 139.0053 1363.1359 0.102 0.918792
## age -32.6181 59.8250 -0.545 0.585690
## age2 3.7307 0.7463 4.999 6.54e-07 ***
## children 678.6017 105.8855 6.409 2.03e-10 ***
## bmi 119.7715 34.2796 3.494 0.000492 ***
## sexmale -496.7690 244.3713 -2.033 0.042267 *
## bmi30 -997.9355 422.9607 -2.359 0.018449 *
## smokeryes 13404.5952 439.9591 30.468 < 2e-16 ***
## regionnorthwest -279.1661 349.2826 -0.799 0.424285
## regionsoutheast -828.0345 351.6484 -2.355 0.018682 *
## regionsouthwest -1222.1619 350.5314 -3.487 0.000505 ***
## bmi30:smokeryes 19810.1534 604.6769 32.762 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4445 on 1326 degrees of freedom
## Multiple R-squared: 0.8664, Adjusted R-squared: 0.8653
## F-statistic: 781.7 on 11 and 1326 DF, p-value: < 2.2e-16