#SOURCE https://sites.google.com/site/kittipat/rtechniques/usingchaiddecisiontreeinr
#install.packages("CHAID", repos="http://R-Forge.R-project.org")
library("CHAID")
## Loading required package: partykit
## Loading required package: grid
### fit tree to sub-sample
set.seed(290875)
data("USvote")
#
# A data frame with 10645 observations on the following 6 variables.
#
# vote3
# candidate voted for Gore or Bush
#
# gender
# gender, a factor with levels male and female
#
# ager
# age group, an ordered factor with levels 18-24 < 25-34 < 35-44 < 45-54 < 55-64 < 65+
#
# empstat
# status of employment, a factor with levels yes, no or retired
#
# educr
# status of education, an ordered factor with levels <HS < HS < >HS < College < Post Coll
#
# marstat
# status of living situation, a factor with levels married, widowed, divorced or never married
USvoteS <- USvote[sample(1:nrow(USvote), 1000),]
ctrl <- chaid_control(minsplit = 20, minbucket = 5, minprob = 0)
chaidUS <- chaid(vote3 ~ ., data = USvoteS, control = ctrl)
print(chaidUS)
##
## Model formula:
## vote3 ~ gender + ager + empstat + educr + marstat
##
## Fitted party:
## [1] root
## | [2] marstat in married
## | | [3] educr <HS, HS, >HS: Gore (n = 311, err = 49.5%)
## | | [4] educr in College, Post Coll: Bush (n = 249, err = 35.3%)
## | [5] marstat in widowed, divorced, never married
## | | [6] gender in male: Gore (n = 159, err = 47.8%)
## | | [7] gender in female
## | | | [8] ager in 18-24, 25-34, 35-44, 45-54: Gore (n = 127, err = 22.0%)
## | | | [9] ager in 55-64, 65+: Gore (n = 115, err = 40.9%)
##
## Number of inner nodes: 4
## Number of terminal nodes: 5
## Plot the decision tree
plot(chaidUS)
