INSTALL PACKAGES REQUIRED FOR DECISION TREES

library(CHAID) 
## Loading required package: partykit
## Loading required package: grid
## Loading required package: libcoin
## Loading required package: mvtnorm

LOAD DATA

setwd("~/RProjects/DecisionTree")
empdata <- read.csv("Employee Churn.csv")

PREPARE DATA

empdata$status<-as.factor(empdata$status)
empdata$sn <- NULL

RUN CHAID TO FORM DECISION TREE

tree <- chaid(formula = status~., data = empdata)
tree
## 
## Model formula:
## status ~ function. + exp + gender + source
## 
## Fitted party:
## [1] root
## |   [2] exp <3: 1 (n = 35, err = 28.6%)
## |   [3] exp >=3 and <=5, >5
## |   |   [4] function. in CS: 1 (n = 11, err = 45.5%)
## |   |   [5] function. in FINANCE, MARKETING: 0 (n = 37, err = 5.4%)
## 
## Number of inner nodes:    2
## Number of terminal nodes: 3

PLOT DECISION TREE

plot(tree, type="simple")

ROC CURVE

predtree <- predict(tree, empdata, type = "prob")
library(ROCR)
## Loading required package: gplots
## 
## Attaching package: 'gplots'
## The following object is masked from 'package:stats':
## 
##     lowess
library()
pred <- prediction(predtree[,2], empdata$status)
perf <- performance(pred, "tpr", "fpr")
plot(perf)
abline(0,1)

AREA UNDER CURVE - GOODNESS OF FIT FOR DECISION TREE

auc <- performance(pred, "auc")
auc@y.values
## [[1]]
## [1] 0.8393939

DECISION TREE USING partykit

ctree <- partykit::ctree(formula = status~. , data = empdata)
plot(ctree, type = "simple")