load the data
setwd("C:/Users/Maxwell/Desktop/Learn R")
library(tidyverse)
## Loading tidyverse: ggplot2
## Loading tidyverse: tibble
## Loading tidyverse: tidyr
## Loading tidyverse: readr
## Loading tidyverse: purrr
## Loading tidyverse: dplyr
## Conflicts with tidy packages ----------------------------------------------
## filter(): dplyr, stats
## lag(): dplyr, stats
credit=read.csv("credit.csv")
str(credit)
## 'data.frame': 1000 obs. of 21 variables:
## $ checking_balance : Factor w/ 4 levels "< 0 DM","> 200 DM",..: 1 3 4 1 1 4 4 3 4 3 ...
## $ months_loan_duration: int 6 48 12 42 24 36 24 36 12 30 ...
## $ credit_history : Factor w/ 5 levels "critical","delayed",..: 1 5 1 5 2 5 5 5 5 1 ...
## $ purpose : Factor w/ 10 levels "business","car (new)",..: 8 8 5 6 2 5 6 3 8 2 ...
## $ amount : int 1169 5951 2096 7882 4870 9055 2835 6948 3059 5234 ...
## $ savings_balance : Factor w/ 5 levels "< 100 DM","> 1000 DM",..: 5 1 1 1 1 5 4 1 2 1 ...
## $ employment_length : Factor w/ 5 levels "> 7 yrs","0 - 1 yrs",..: 1 3 4 4 3 3 1 3 4 5 ...
## $ installment_rate : int 4 2 2 2 3 2 3 2 2 4 ...
## $ personal_status : Factor w/ 4 levels "divorced male",..: 4 2 4 4 4 4 4 4 1 3 ...
## $ other_debtors : Factor w/ 3 levels "co-applicant",..: 3 3 3 2 3 3 3 3 3 3 ...
## $ residence_history : int 4 2 3 4 4 4 4 2 4 2 ...
## $ property : Factor w/ 4 levels "building society savings",..: 3 3 3 1 4 4 1 2 3 2 ...
## $ age : int 67 22 49 45 53 35 53 35 61 28 ...
## $ installment_plan : Factor w/ 3 levels "bank","none",..: 2 2 2 2 2 2 2 2 2 2 ...
## $ housing : Factor w/ 3 levels "for free","own",..: 2 2 2 1 1 1 2 3 2 2 ...
## $ existing_credits : int 2 1 1 1 2 1 1 1 1 2 ...
## $ default : int 1 2 1 1 2 1 1 1 1 2 ...
## $ dependents : int 1 1 2 2 2 2 1 1 1 1 ...
## $ telephone : Factor w/ 2 levels "none","yes": 2 1 1 1 1 2 1 2 1 1 ...
## $ foreign_worker : Factor w/ 2 levels "no","yes": 2 2 2 2 2 2 2 2 2 2 ...
## $ job : Factor w/ 4 levels "mangement self-employed",..: 2 2 4 2 2 4 2 1 4 1 ...
table(credit$checking_balance)
##
## < 0 DM > 200 DM 1 - 200 DM unknown
## 274 63 269 394
table(credit$savings_balance)
##
## < 100 DM > 1000 DM 101 - 500 DM 501 - 1000 DM unknown
## 603 48 103 63 183
change the default variable into factor (required by c5.0 decision tree)
credit$default= ifelse(credit$default==1, "no", "yes")%>%factor
summary(credit)
## checking_balance months_loan_duration credit_history
## < 0 DM :274 Min. : 4.0 critical :293
## > 200 DM : 63 1st Qu.:12.0 delayed : 88
## 1 - 200 DM:269 Median :18.0 fully repaid : 40
## unknown :394 Mean :20.9 fully repaid this bank: 49
## 3rd Qu.:24.0 repaid :530
## Max. :72.0
##
## purpose amount savings_balance employment_length
## radio/tv :280 Min. : 250 < 100 DM :603 > 7 yrs :253
## car (new) :234 1st Qu.: 1366 > 1000 DM : 48 0 - 1 yrs :172
## furniture :181 Median : 2320 101 - 500 DM :103 1 - 4 yrs :339
## car (used):103 Mean : 3271 501 - 1000 DM: 63 4 - 7 yrs :174
## business : 97 3rd Qu.: 3972 unknown :183 unemployed: 62
## education : 50 Max. :18424
## (Other) : 55
## installment_rate personal_status other_debtors
## Min. :1.000 divorced male: 50 co-applicant: 41
## 1st Qu.:2.000 female :310 guarantor : 52
## Median :3.000 married male : 92 none :907
## Mean :2.973 single male :548
## 3rd Qu.:4.000
## Max. :4.000
##
## residence_history property age
## Min. :1.000 building society savings:232 Min. :19.00
## 1st Qu.:2.000 other :332 1st Qu.:27.00
## Median :3.000 real estate :282 Median :33.00
## Mean :2.845 unknown/none :154 Mean :35.55
## 3rd Qu.:4.000 3rd Qu.:42.00
## Max. :4.000 Max. :75.00
##
## installment_plan housing existing_credits default
## bank :139 for free:108 Min. :1.000 no :700
## none :814 own :713 1st Qu.:1.000 yes:300
## stores: 47 rent :179 Median :1.000
## Mean :1.407
## 3rd Qu.:2.000
## Max. :4.000
##
## dependents telephone foreign_worker job
## Min. :1.000 none:596 no : 37 mangement self-employed:148
## 1st Qu.:1.000 yes :404 yes:963 skilled employee :630
## Median :1.000 unemployed non-resident: 22
## Mean :1.155 unskilled resident :200
## 3rd Qu.:1.000
## Max. :2.000
##
use caret package for splitting the data
library(caret)
## Loading required package: lattice
##
## Attaching package: 'caret'
## The following object is masked from 'package:purrr':
##
## lift
index=createDataPartition(credit$checking_balance,p=0.9,list=FALSE)
train_credit=credit[index,]
test_credit=credit[-index,]
load the C50 decision tree package
library(C50)
library(tidyverse)
credit_dtree= C5.0(default~.,data=train_credit)
credit_dtree
##
## Call:
## C5.0.formula(formula = default ~ ., data = train_credit)
##
## Classification Tree
## Number of samples: 902
## Number of predictors: 20
##
## Tree size: 48
##
## Non-standard options: attempt to group attributes
summary(credit_dtree)
##
## Call:
## C5.0.formula(formula = default ~ ., data = train_credit)
##
##
## C5.0 [Release 2.07 GPL Edition] Fri Feb 17 20:58:16 2017
## -------------------------------
##
## Class specified by attribute `outcome'
##
## Read 902 cases (21 attributes) from undefined.data
##
## Decision tree:
##
## checking_balance in {> 200 DM,unknown}: no (412/57)
## checking_balance in {< 0 DM,1 - 200 DM}:
## :...credit_history in {fully repaid,fully repaid this bank}:
## :...dependents > 1: yes (11)
## : dependents <= 1:
## : :...savings_balance = > 1000 DM: yes (0)
## : savings_balance in {501 - 1000 DM,unknown}: no (7)
## : savings_balance in {< 100 DM,101 - 500 DM}:
## : :...other_debtors = co-applicant: no (1)
## : other_debtors = none: yes (40/9)
## : other_debtors = guarantor:
## : :...employment_length in {> 7 yrs,0 - 1 yrs,1 - 4 yrs,
## : : unemployed}: no (2)
## : employment_length = 4 - 7 yrs: yes (2)
## credit_history in {critical,delayed,repaid}:
## :...other_debtors = co-applicant:
## :...foreign_worker = no: no (2)
## : foreign_worker = yes: yes (17/5)
## other_debtors = guarantor:
## :...purpose in {business,car (used),domestic appliances,education,
## : : furniture,others,repairs,retraining}: no (10)
## : purpose = car (new):
## : :...installment_plan in {bank,stores}: yes (4)
## : : installment_plan = none: no (1)
## : purpose = radio/tv:
## : :...months_loan_duration <= 33: no (20)
## : months_loan_duration > 33: yes (3)
## other_debtors = none:
## :...savings_balance = > 1000 DM: no (16/1)
## savings_balance = 501 - 1000 DM:
## :...months_loan_duration <= 21: no (10/1)
## : months_loan_duration > 21: yes (2)
## savings_balance = 101 - 500 DM:
## :...personal_status = divorced male:
## : :...credit_history in {critical,delayed}: no (2)
## : : credit_history = repaid: yes (2)
## : personal_status = female:
## : :...credit_history = delayed: no (1)
## : : credit_history in {critical,repaid}: yes (6/1)
## : personal_status = married male:
## : :...credit_history = critical: no (1)
## : : credit_history in {delayed,repaid}: yes (3)
## : personal_status = single male:
## : :...existing_credits <= 1: no (15/1)
## : existing_credits > 1:
## : :...residence_history <= 3: no (3)
## : residence_history > 3: yes (2)
## savings_balance = unknown:
## :...existing_credits > 1: no (12)
## : existing_credits <= 1:
## : :...credit_history in {critical,delayed}: no (5)
## : credit_history = repaid:
## : :...checking_balance = < 0 DM:
## : :...telephone = none: yes (9/1)
## : : telephone = yes:
## : : :...months_loan_duration <= 30: no (7/1)
## : : months_loan_duration > 30: yes (2)
## : checking_balance = 1 - 200 DM:
## : :...property = building society savings: yes (4/1)
## : property in {other,real estate}: no (14/2)
## : property = unknown/none:
## : :...amount <= 5848: no (3)
## : amount > 5848: yes (2)
## savings_balance = < 100 DM:
## :...months_loan_duration > 27:
## :...residence_history > 1: yes (49/11)
## : residence_history <= 1:
## : :...checking_balance = 1 - 200 DM: no (3)
## : checking_balance = < 0 DM:
## : :...months_loan_duration <= 40: yes (3)
## : months_loan_duration > 40: no (2)
## months_loan_duration <= 27:
## :...credit_history = critical: no (56/12)
## credit_history in {delayed,repaid}:
## :...installment_rate > 2:
## :...months_loan_duration <= 11: no (23/7)
## : months_loan_duration > 11: yes (64/21)
## installment_rate <= 2:
## :...foreign_worker = no: yes (2)
## foreign_worker = yes:
## :...amount <= 1372:
## :...telephone = none: yes (7)
## : telephone = yes: no (2)
## amount > 1372:
## :...amount <= 9629: no (36/5)
## amount > 9629: yes (2)
##
##
## Evaluation on training data (902 cases):
##
## Decision Tree
## ----------------
## Size Errors
##
## 47 136(15.1%) <<
##
##
## (a) (b) <-classified as
## ---- ----
## 579 49 (a): class no
## 87 187 (b): class yes
##
##
## Attribute usage:
##
## 100.00% checking_balance
## 54.32% credit_history
## 52.33% other_debtors
## 46.78% savings_balance
## 32.48% months_loan_duration
## 15.08% installment_rate
## 8.65% existing_credits
## 7.54% foreign_worker
## 6.98% dependents
## 6.87% residence_history
## 5.76% amount
## 4.21% purpose
## 3.88% personal_status
## 2.99% telephone
## 2.55% property
## 0.55% installment_plan
## 0.44% employment_length
##
##
## Time: 0.0 secs
predict using the model
credit_dtree_pred=predict(credit_dtree,test_credit)
library(gmodels)
CrossTable(credit_dtree_pred,test_credit$default)
##
##
## Cell Contents
## |-------------------------|
## | N |
## | Chi-square contribution |
## | N / Row Total |
## | N / Col Total |
## | N / Table Total |
## |-------------------------|
##
##
## Total Observations in Table: 98
##
##
## | test_credit$default
## credit_dtree_pred | no | yes | Row Total |
## ------------------|-----------|-----------|-----------|
## no | 59 | 13 | 72 |
## | 0.704 | 1.949 | |
## | 0.819 | 0.181 | 0.735 |
## | 0.819 | 0.500 | |
## | 0.602 | 0.133 | |
## ------------------|-----------|-----------|-----------|
## yes | 13 | 13 | 26 |
## | 1.949 | 5.398 | |
## | 0.500 | 0.500 | 0.265 |
## | 0.181 | 0.500 | |
## | 0.133 | 0.133 | |
## ------------------|-----------|-----------|-----------|
## Column Total | 72 | 26 | 98 |
## | 0.735 | 0.265 | |
## ------------------|-----------|-----------|-----------|
##
##
improved decision tree
credit_bdtree=C5.0(default~.,data=train_credit,trials=10)
credit_bdtree
##
## Call:
## C5.0.formula(formula = default ~ ., data = train_credit, trials = 10)
##
## Classification Tree
## Number of samples: 902
## Number of predictors: 20
##
## Number of boosting iterations: 10
## Average tree size: 41.2
##
## Non-standard options: attempt to group attributes
credit_bdtree_pred=predict(credit_bdtree,test_credit)
library(gmodels)
CrossTable(credit_bdtree_pred,test_credit$default)
##
##
## Cell Contents
## |-------------------------|
## | N |
## | Chi-square contribution |
## | N / Row Total |
## | N / Col Total |
## | N / Table Total |
## |-------------------------|
##
##
## Total Observations in Table: 98
##
##
## | test_credit$default
## credit_bdtree_pred | no | yes | Row Total |
## -------------------|-----------|-----------|-----------|
## no | 65 | 12 | 77 |
## | 1.256 | 3.478 | |
## | 0.844 | 0.156 | 0.786 |
## | 0.903 | 0.462 | |
## | 0.663 | 0.122 | |
## -------------------|-----------|-----------|-----------|
## yes | 7 | 14 | 21 |
## | 4.604 | 12.751 | |
## | 0.333 | 0.667 | 0.214 |
## | 0.097 | 0.538 | |
## | 0.071 | 0.143 | |
## -------------------|-----------|-----------|-----------|
## Column Total | 72 | 26 | 98 |
## | 0.735 | 0.265 | |
## -------------------|-----------|-----------|-----------|
##
##
add a cost matrix(c(0,1,4,0),nrow=2,ncol=2)
costM=matrix(c(0,1,25,0), nrow=2)
dimnames(costM)=list(c("no","yes"),c("no","yes"))
costM
## no yes
## no 0 25
## yes 1 0
credit_cbdtree=C5.0(formula=default~.,data=train_credit,trials=10,costs=costM)
credit_cbdtree_pred=predict(credit_cbdtree,test_credit)
library(gmodels)
CrossTable(credit_cbdtree_pred,test_credit$default)
##
##
## Cell Contents
## |-------------------------|
## | N |
## | Chi-square contribution |
## | N / Row Total |
## | N / Col Total |
## | N / Table Total |
## |-------------------------|
##
##
## Total Observations in Table: 98
##
##
## | test_credit$default
## credit_cbdtree_pred | no | yes | Row Total |
## --------------------|-----------|-----------|-----------|
## no | 64 | 13 | 77 |
## | 0.975 | 2.701 | |
## | 0.831 | 0.169 | 0.786 |
## | 0.889 | 0.500 | |
## | 0.653 | 0.133 | |
## --------------------|-----------|-----------|-----------|
## yes | 8 | 13 | 21 |
## | 3.577 | 9.905 | |
## | 0.381 | 0.619 | 0.214 |
## | 0.111 | 0.500 | |
## | 0.082 | 0.133 | |
## --------------------|-----------|-----------|-----------|
## Column Total | 72 | 26 | 98 |
## | 0.735 | 0.265 | |
## --------------------|-----------|-----------|-----------|
##
##
continue for the mushroom data
getwd()
## [1] "C:/Users/Maxwell/Desktop/Learn R"
mushroom=read.csv("mushrooms.csv")
str(mushroom)
## 'data.frame': 8124 obs. of 23 variables:
## $ type : Factor w/ 2 levels "e","p": 2 1 1 2 1 1 1 1 2 1 ...
## $ cap_shape : Factor w/ 6 levels "b","c","f","k",..: 6 6 1 6 6 6 1 1 6 1 ...
## $ cap_surface : Factor w/ 4 levels "f","g","s","y": 3 3 3 4 3 4 3 4 4 3 ...
## $ cap_color : Factor w/ 10 levels "b","c","e","g",..: 5 10 9 9 4 10 9 9 9 10 ...
## $ bruises : Factor w/ 2 levels "f","t": 2 2 2 2 1 2 2 2 2 2 ...
## $ odor : Factor w/ 9 levels "a","c","f","l",..: 7 1 4 7 6 1 1 4 7 1 ...
## $ gill_attachment : Factor w/ 2 levels "a","f": 2 2 2 2 2 2 2 2 2 2 ...
## $ gill_spacing : Factor w/ 2 levels "c","w": 1 1 1 1 2 1 1 1 1 1 ...
## $ gill_size : Factor w/ 2 levels "b","n": 2 1 1 2 1 1 1 1 2 1 ...
## $ gill_color : Factor w/ 12 levels "b","e","g","h",..: 5 5 6 6 5 6 3 6 8 3 ...
## $ stalk_shape : Factor w/ 2 levels "e","t": 1 1 1 1 2 1 1 1 1 1 ...
## $ stalk_root : Factor w/ 5 levels "?","b","c","e",..: 4 3 3 4 4 3 3 3 4 3 ...
## $ stalk_surface_above_ring: Factor w/ 4 levels "f","k","s","y": 3 3 3 3 3 3 3 3 3 3 ...
## $ stalk_surface_below_ring: Factor w/ 4 levels "f","k","s","y": 3 3 3 3 3 3 3 3 3 3 ...
## $ stalk_color_above_ring : Factor w/ 9 levels "b","c","e","g",..: 8 8 8 8 8 8 8 8 8 8 ...
## $ stalk_color_below_ring : Factor w/ 9 levels "b","c","e","g",..: 8 8 8 8 8 8 8 8 8 8 ...
## $ veil_type : Factor w/ 1 level "p": 1 1 1 1 1 1 1 1 1 1 ...
## $ veil_color : Factor w/ 4 levels "n","o","w","y": 3 3 3 3 3 3 3 3 3 3 ...
## $ ring_number : Factor w/ 3 levels "n","o","t": 2 2 2 2 2 2 2 2 2 2 ...
## $ ring_type : Factor w/ 5 levels "e","f","l","n",..: 5 5 5 5 1 5 5 5 5 5 ...
## $ spore_print_color : Factor w/ 9 levels "b","h","k","n",..: 3 4 4 3 4 3 3 4 3 3 ...
## $ population : Factor w/ 6 levels "a","c","n","s",..: 4 3 3 4 1 3 3 4 5 4 ...
## $ habitat : Factor w/ 7 levels "d","g","l","m",..: 6 2 4 6 2 2 4 4 2 4 ...
summary(mushroom)
## type cap_shape cap_surface cap_color bruises odor
## e:4208 b: 452 f:2320 n :2284 f:4748 n :3528
## p:3916 c: 4 g: 4 g :1840 t:3376 f :2160
## f:3152 s:2556 e :1500 s : 576
## k: 828 y:3244 y :1072 y : 576
## s: 32 w :1040 a : 400
## x:3656 b : 168 l : 400
## (Other): 220 (Other): 484
## gill_attachment gill_spacing gill_size gill_color stalk_shape
## a: 210 c:6812 b:5612 b :1728 e:3516
## f:7914 w:1312 n:2512 p :1492 t:4608
## w :1202
## n :1048
## g : 752
## h : 732
## (Other):1170
## stalk_root stalk_surface_above_ring stalk_surface_below_ring
## ?:2480 f: 552 f: 600
## b:3776 k:2372 k:2304
## c: 556 s:5176 s:4936
## e:1120 y: 24 y: 284
## r: 192
##
##
## stalk_color_above_ring stalk_color_below_ring veil_type veil_color
## w :4464 w :4384 p:8124 n: 96
## p :1872 p :1872 o: 96
## g : 576 g : 576 w:7924
## n : 448 n : 512 y: 8
## b : 432 b : 432
## o : 192 o : 192
## (Other): 140 (Other): 156
## ring_number ring_type spore_print_color population habitat
## n: 36 e:2776 w :2388 a: 384 d:3148
## o:7488 f: 48 n :1968 c: 340 g:2148
## t: 600 l:1296 k :1872 n: 400 l: 832
## n: 36 h :1632 s:1248 m: 292
## p:3968 r : 72 v:4040 p:1144
## b : 48 y:1712 u: 368
## (Other): 144 w: 192
notice the veil column, it has only one level. Need to remove it.
mushroom$veil_type=NULL
summary(mushroom)
## type cap_shape cap_surface cap_color bruises odor
## e:4208 b: 452 f:2320 n :2284 f:4748 n :3528
## p:3916 c: 4 g: 4 g :1840 t:3376 f :2160
## f:3152 s:2556 e :1500 s : 576
## k: 828 y:3244 y :1072 y : 576
## s: 32 w :1040 a : 400
## x:3656 b : 168 l : 400
## (Other): 220 (Other): 484
## gill_attachment gill_spacing gill_size gill_color stalk_shape
## a: 210 c:6812 b:5612 b :1728 e:3516
## f:7914 w:1312 n:2512 p :1492 t:4608
## w :1202
## n :1048
## g : 752
## h : 732
## (Other):1170
## stalk_root stalk_surface_above_ring stalk_surface_below_ring
## ?:2480 f: 552 f: 600
## b:3776 k:2372 k:2304
## c: 556 s:5176 s:4936
## e:1120 y: 24 y: 284
## r: 192
##
##
## stalk_color_above_ring stalk_color_below_ring veil_color ring_number
## w :4464 w :4384 n: 96 n: 36
## p :1872 p :1872 o: 96 o:7488
## g : 576 g : 576 w:7924 t: 600
## n : 448 n : 512 y: 8
## b : 432 b : 432
## o : 192 o : 192
## (Other): 140 (Other): 156
## ring_type spore_print_color population habitat
## e:2776 w :2388 a: 384 d:3148
## f: 48 n :1968 c: 340 g:2148
## l:1296 k :1872 n: 400 l: 832
## n: 36 h :1632 s:1248 m: 292
## p:3968 r : 72 v:4040 p:1144
## b : 48 y:1712 u: 368
## (Other): 144 w: 192
use RWeka package for rule learning.
library(RWeka)
mush_rule=OneR(type~.,data=mushroom)
mush_rule
## odor:
## a -> e
## c -> p
## f -> p
## l -> e
## m -> p
## n -> e
## p -> p
## s -> p
## y -> p
## (8004/8124 instances correct)
summary(mush_rule)
##
## === Summary ===
##
## Correctly Classified Instances 8004 98.5229 %
## Incorrectly Classified Instances 120 1.4771 %
## Kappa statistic 0.9704
## Mean absolute error 0.0148
## Root mean squared error 0.1215
## Relative absolute error 2.958 %
## Root relative squared error 24.323 %
## Total Number of Instances 8124
##
## === Confusion Matrix ===
##
## a b <-- classified as
## 4208 0 | a = e
## 120 3796 | b = p
use another alogrithm in RWeka
library(RWeka)
mush_rip=JRip(type~.,data=mushroom)
mush_rip
## JRIP rules:
## ===========
##
## (odor = f) => type=p (2160.0/0.0)
## (gill_size = n) and (gill_color = b) => type=p (1152.0/0.0)
## (gill_size = n) and (odor = p) => type=p (256.0/0.0)
## (odor = c) => type=p (192.0/0.0)
## (spore_print_color = r) => type=p (72.0/0.0)
## (stalk_surface_below_ring = y) and (stalk_surface_above_ring = k) => type=p (68.0/0.0)
## (habitat = l) and (cap_color = w) => type=p (8.0/0.0)
## (stalk_color_above_ring = y) => type=p (8.0/0.0)
## => type=e (4208.0/0.0)
##
## Number of Rules : 9
summary(mush_rip)
##
## === Summary ===
##
## Correctly Classified Instances 8124 100 %
## Incorrectly Classified Instances 0 0 %
## Kappa statistic 1
## Mean absolute error 0
## Root mean squared error 0
## Relative absolute error 0 %
## Root relative squared error 0 %
## Total Number of Instances 8124
##
## === Confusion Matrix ===
##
## a b <-- classified as
## 4208 0 | a = e
## 0 3916 | b = p
how about rule learner’s prediction accuracy with only 80% of the data? as what we did in
split training and testing data
library(caret)
mushroom%>%colnames()
## [1] "type" "cap_shape"
## [3] "cap_surface" "cap_color"
## [5] "bruises" "odor"
## [7] "gill_attachment" "gill_spacing"
## [9] "gill_size" "gill_color"
## [11] "stalk_shape" "stalk_root"
## [13] "stalk_surface_above_ring" "stalk_surface_below_ring"
## [15] "stalk_color_above_ring" "stalk_color_below_ring"
## [17] "veil_color" "ring_number"
## [19] "ring_type" "spore_print_color"
## [21] "population" "habitat"
index=createDataPartition(mushroom$type, p=0.8 ,list=FALSE)
train_mush=mushroom[index,]
test_mush=mushroom[-index,]
train the JRip on training data
mush_ript=JRip(type~.,data=train_mush)
mush_ript_pred=predict(mush_ript,test_mush)
library(gmodels)
CrossTable(mush_ript_pred,test_mush$type)
##
##
## Cell Contents
## |-------------------------|
## | N |
## | Chi-square contribution |
## | N / Row Total |
## | N / Col Total |
## | N / Table Total |
## |-------------------------|
##
##
## Total Observations in Table: 1624
##
##
## | test_mush$type
## mush_ript_pred | e | p | Row Total |
## ---------------|-----------|-----------|-----------|
## e | 841 | 0 | 841 |
## | 377.518 | 405.482 | |
## | 1.000 | 0.000 | 0.518 |
## | 1.000 | 0.000 | |
## | 0.518 | 0.000 | |
## ---------------|-----------|-----------|-----------|
## p | 0 | 783 | 783 |
## | 405.482 | 435.518 | |
## | 0.000 | 1.000 | 0.482 |
## | 0.000 | 1.000 | |
## | 0.000 | 0.482 | |
## ---------------|-----------|-----------|-----------|
## Column Total | 841 | 783 | 1624 |
## | 0.518 | 0.482 | |
## ---------------|-----------|-----------|-----------|
##
##
The JRip method is pretty powerful according to this untuned model.