Contoh CHAID dalam R
Cek versi R yang dipergunakan
## R version 3.1.2 (2014-10-31)
## Platform: i386-w64-mingw32/i386 (32-bit)
##
## locale:
## [1] LC_COLLATE=English_United States.1252
## [2] LC_CTYPE=English_United States.1252
## [3] LC_MONETARY=English_United States.1252
## [4] LC_NUMERIC=C
## [5] LC_TIME=English_United States.1252
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## loaded via a namespace (and not attached):
## [1] digest_0.6.9 evaluate_0.8.3 formatR_1.3 htmltools_0.3.5
## [5] knitr_1.12.3 magrittr_1.5 Rcpp_0.12.4 rmarkdown_0.9.5
## [9] stringi_1.0-1 stringr_1.0.0 tools_3.1.2 yaml_2.1.13
Install Packages CHAID
#install.packages("CHAID",repos="http://R#-Forge.R-project.org")
library("CHAID")
## Loading required package: partykit
## Loading required package: grid
setwd("D:/BARU/Damin2016/Tree Model")
data1=read.csv("Titanic.csv",header=TRUE,sep=",")
train.ind <- sample(1:nrow(data1), ceiling(nrow(data1)*0.8), replace=FALSE)
mytree=chaid(survival~class+age+gender,data=data1[train.ind,])
print(mytree)
##
## Model formula:
## survival ~ class + age + gender
##
## Fitted party:
## [1] root
## | [2] gender in Female
## | | [3] class in Crew, Second Class: Survival (n = 102, err = 14.7%)
## | | [4] class in First Class: Survival (n = 117, err = 2.6%)
## | | [5] class in Third Class: Missing (n = 157, err = 47.1%)
## | [6] gender in Male
## | | [7] age in Adult
## | | | [8] class in Crew: Missing (n = 684, err = 22.1%)
## | | | [9] class in First Class: Missing (n = 134, err = 34.3%)
## | | | [10] class in Second Class: Missing (n = 138, err = 9.4%)
## | | | [11] class in Third Class: Missing (n = 374, err = 16.6%)
## | | [12] age in Children
## | | | [13] class in Crew, First Class, Second Class: Survival (n = 13, err = 0.0%)
## | | | [14] class in Third Class: Missing (n = 42, err = 28.6%)
##
## Number of inner nodes: 5
## Number of terminal nodes: 9
plot(mytree)
# predict on holdout units
nb.pred <- predict(mytree, data1[-train.ind,])
confusion.mat=table(nb.pred, data1[-train.ind,4])
# raw accuracy
sum(diag(confusion.mat))/sum(confusion.mat)
## [1] 0.09545455