INSTALL PACKAGES REQUIRED FOR DECISION TREES
library(CHAID)
## Loading required package: partykit
## Loading required package: grid
## Loading required package: libcoin
## Loading required package: mvtnorm
LOAD DATA
setwd("~/RProjects/DecisionTree")
empdata <- read.csv("Employee Churn.csv")
PREPARE DATA
empdata$status<-as.factor(empdata$status)
empdata$sn <- NULL
RUN CHAID TO FORM DECISION TREE
tree <- chaid(formula = status~., data = empdata)
tree
##
## Model formula:
## status ~ function. + exp + gender + source
##
## Fitted party:
## [1] root
## | [2] exp <3: 1 (n = 35, err = 28.6%)
## | [3] exp >=3 and <=5, >5
## | | [4] function. in CS: 1 (n = 11, err = 45.5%)
## | | [5] function. in FINANCE, MARKETING: 0 (n = 37, err = 5.4%)
##
## Number of inner nodes: 2
## Number of terminal nodes: 3
PLOT DECISION TREE
plot(tree, type="simple")
ROC CURVE
predtree <- predict(tree, empdata, type = "prob")
library(ROCR)
## Loading required package: gplots
##
## Attaching package: 'gplots'
## The following object is masked from 'package:stats':
##
## lowess
library()
pred <- prediction(predtree[,2], empdata$status)
perf <- performance(pred, "tpr", "fpr")
plot(perf)
abline(0,1)
AREA UNDER CURVE - GOODNESS OF FIT FOR DECISION TREE
auc <- performance(pred, "auc")
auc@y.values
## [[1]]
## [1] 0.8393939
DECISION TREE USING partykit
ctree <- partykit::ctree(formula = status~. , data = empdata)
plot(ctree, type = "simple")