Contoh CHAID dalam R

Cek versi R yang dipergunakan

## R version 3.1.2 (2014-10-31)
## Platform: i386-w64-mingw32/i386 (32-bit)
## 
## locale:
## [1] LC_COLLATE=English_United States.1252 
## [2] LC_CTYPE=English_United States.1252   
## [3] LC_MONETARY=English_United States.1252
## [4] LC_NUMERIC=C                          
## [5] LC_TIME=English_United States.1252    
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## loaded via a namespace (and not attached):
##  [1] digest_0.6.9    evaluate_0.8.3  formatR_1.3     htmltools_0.3.5
##  [5] knitr_1.12.3    magrittr_1.5    Rcpp_0.12.4     rmarkdown_0.9.5
##  [9] stringi_1.0-1   stringr_1.0.0   tools_3.1.2     yaml_2.1.13

Install Packages CHAID

#install.packages("CHAID",repos="http://R#-Forge.R-project.org")
library("CHAID")
## Loading required package: partykit
## Loading required package: grid
setwd("D:/BARU/Damin2016/Tree Model")
data1=read.csv("Titanic.csv",header=TRUE,sep=",")
 train.ind <- sample(1:nrow(data1), ceiling(nrow(data1)*0.8), replace=FALSE)
 mytree=chaid(survival~class+age+gender,data=data1[train.ind,])
 print(mytree)
## 
## Model formula:
## survival ~ class + age + gender
## 
## Fitted party:
## [1] root
## |   [2] gender in Female
## |   |   [3] class in Crew, Second Class: Survival (n = 102, err = 14.7%)
## |   |   [4] class in First Class: Survival (n = 117, err = 2.6%)
## |   |   [5] class in Third Class: Missing (n = 157, err = 47.1%)
## |   [6] gender in Male
## |   |   [7] age in Adult
## |   |   |   [8] class in Crew: Missing (n = 684, err = 22.1%)
## |   |   |   [9] class in First Class: Missing (n = 134, err = 34.3%)
## |   |   |   [10] class in Second Class: Missing (n = 138, err = 9.4%)
## |   |   |   [11] class in Third Class: Missing (n = 374, err = 16.6%)
## |   |   [12] age in Children
## |   |   |   [13] class in Crew, First Class, Second Class: Survival (n = 13, err = 0.0%)
## |   |   |   [14] class in Third Class: Missing (n = 42, err = 28.6%)
## 
## Number of inner nodes:    5
## Number of terminal nodes: 9
 plot(mytree)

# predict on holdout units
nb.pred <- predict(mytree, data1[-train.ind,])

confusion.mat=table(nb.pred, data1[-train.ind,4])

# raw accuracy
sum(diag(confusion.mat))/sum(confusion.mat)
## [1] 0.09545455