DataMiningGAGrp2_CART_OriginalData.R

##DataMining Group Assignment 

##Loading Required Libraries
setwd("C:/Users/acer/Documents/PGPBABI/Data Mining/GrpAssignment")
library(rpart)
library(rpart.plot)
library(neuralnet)
library(rattle)

## Rattle: A free graphical interface for data science with R.
## Version 5.1.0 Copyright (c) 2006-2017 Togaware Pty Ltd.
## Type 'rattle()' to shake, rattle, and roll your data.

library(RColorBrewer)
library(ggplot2)
library(grid)
library(gridExtra)
library(corrplot)

## corrplot 0.84 loaded

##Reading Data into a DataFrame 
HRData<-read.csv(file ="HR_Employee_Attrition_Data.csv", header = TRUE)

##View the data structure
str(HRData)

## 'data.frame':    2940 obs. of  35 variables:
##  $ Age                     : int  41 49 37 33 27 32 59 30 38 36 ...
##  $ Attrition               : Factor w/ 2 levels "No","Yes": 2 1 2 1 1 1 1 1 1 1 ...
##  $ BusinessTravel          : Factor w/ 3 levels "Non-Travel","Travel_Frequently",..: 3 2 3 2 3 2 3 3 2 3 ...
##  $ DailyRate               : int  1102 279 1373 1392 591 1005 1324 1358 216 1299 ...
##  $ Department              : Factor w/ 3 levels "Human Resources",..: 3 2 2 2 2 2 2 2 2 2 ...
##  $ DistanceFromHome        : int  1 8 2 3 2 2 3 24 23 27 ...
##  $ Education               : int  2 1 2 4 1 2 3 1 3 3 ...
##  $ EducationField          : Factor w/ 6 levels "Human Resources",..: 2 2 5 2 4 2 4 2 2 4 ...
##  $ EmployeeCount           : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ EmployeeNumber          : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ EnvironmentSatisfaction : int  2 3 4 4 1 4 3 4 4 3 ...
##  $ Gender                  : Factor w/ 2 levels "Female","Male": 1 2 2 1 2 2 1 2 2 2 ...
##  $ HourlyRate              : int  94 61 92 56 40 79 81 67 44 94 ...
##  $ JobInvolvement          : int  3 2 2 3 3 3 4 3 2 3 ...
##  $ JobLevel                : int  2 2 1 1 1 1 1 1 3 2 ...
##  $ JobRole                 : Factor w/ 9 levels "Healthcare Representative",..: 8 7 3 7 3 3 3 3 5 1 ...
##  $ JobSatisfaction         : int  4 2 3 3 2 4 1 3 3 3 ...
##  $ MaritalStatus           : Factor w/ 3 levels "Divorced","Married",..: 3 2 3 2 2 3 2 1 3 2 ...
##  $ MonthlyIncome           : int  5993 5130 2090 2909 3468 3068 2670 2693 9526 5237 ...
##  $ MonthlyRate             : int  19479 24907 2396 23159 16632 11864 9964 13335 8787 16577 ...
##  $ NumCompaniesWorked      : int  8 1 6 1 9 0 4 1 0 6 ...
##  $ Over18                  : Factor w/ 1 level "Y": 1 1 1 1 1 1 1 1 1 1 ...
##  $ OverTime                : Factor w/ 2 levels "No","Yes": 2 1 2 2 1 1 2 1 1 1 ...
##  $ PercentSalaryHike       : int  11 23 15 11 12 13 20 22 21 13 ...
##  $ PerformanceRating       : int  3 4 3 3 3 3 4 4 4 3 ...
##  $ RelationshipSatisfaction: int  1 4 2 3 4 3 1 2 2 2 ...
##  $ StandardHours           : int  80 80 80 80 80 80 80 80 80 80 ...
##  $ StockOptionLevel        : int  0 1 0 0 1 0 3 1 0 2 ...
##  $ TotalWorkingYears       : int  8 10 7 8 6 8 12 1 10 17 ...
##  $ TrainingTimesLastYear   : int  0 3 3 3 3 2 3 2 2 3 ...
##  $ WorkLifeBalance         : int  1 3 3 3 3 2 2 3 3 2 ...
##  $ YearsAtCompany          : int  6 10 0 8 2 7 1 1 9 7 ...
##  $ YearsInCurrentRole      : int  4 7 0 7 2 7 0 0 7 7 ...
##  $ YearsSinceLastPromotion : int  0 1 0 3 2 3 0 0 1 7 ...
##  $ YearsWithCurrManager    : int  5 7 0 0 2 6 0 0 8 7 ...

summary(HRData)

##       Age        Attrition            BusinessTravel   DailyRate     
##  Min.   :18.00   No :2466   Non-Travel       : 300   Min.   : 102.0  
##  1st Qu.:30.00   Yes: 474   Travel_Frequently: 554   1st Qu.: 465.0  
##  Median :36.00              Travel_Rarely    :2086   Median : 802.0  
##  Mean   :36.92                                       Mean   : 802.5  
##  3rd Qu.:43.00                                       3rd Qu.:1157.0  
##  Max.   :60.00                                       Max.   :1499.0  
##                                                                      
##                   Department   DistanceFromHome   Education    
##  Human Resources       : 126   Min.   : 1.000   Min.   :1.000  
##  Research & Development:1922   1st Qu.: 2.000   1st Qu.:2.000  
##  Sales                 : 892   Median : 7.000   Median :3.000  
##                                Mean   : 9.193   Mean   :2.913  
##                                3rd Qu.:14.000   3rd Qu.:4.000  
##                                Max.   :29.000   Max.   :5.000  
##                                                                
##           EducationField EmployeeCount EmployeeNumber  
##  Human Resources :  54   Min.   :1     Min.   :   1.0  
##  Life Sciences   :1212   1st Qu.:1     1st Qu.: 735.8  
##  Marketing       : 318   Median :1     Median :1470.5  
##  Medical         : 928   Mean   :1     Mean   :1470.5  
##  Other           : 164   3rd Qu.:1     3rd Qu.:2205.2  
##  Technical Degree: 264   Max.   :1     Max.   :2940.0  
##                                                        
##  EnvironmentSatisfaction    Gender       HourlyRate     JobInvolvement
##  Min.   :1.000           Female:1176   Min.   : 30.00   Min.   :1.00  
##  1st Qu.:2.000           Male  :1764   1st Qu.: 48.00   1st Qu.:2.00  
##  Median :3.000                         Median : 66.00   Median :3.00  
##  Mean   :2.722                         Mean   : 65.89   Mean   :2.73  
##  3rd Qu.:4.000                         3rd Qu.: 84.00   3rd Qu.:3.00  
##  Max.   :4.000                         Max.   :100.00   Max.   :4.00  
##                                                                       
##     JobLevel                          JobRole    JobSatisfaction
##  Min.   :1.000   Sales Executive          :652   Min.   :1.000  
##  1st Qu.:1.000   Research Scientist       :584   1st Qu.:2.000  
##  Median :2.000   Laboratory Technician    :518   Median :3.000  
##  Mean   :2.064   Manufacturing Director   :290   Mean   :2.729  
##  3rd Qu.:3.000   Healthcare Representative:262   3rd Qu.:4.000  
##  Max.   :5.000   Manager                  :204   Max.   :4.000  
##                  (Other)                  :430                  
##   MaritalStatus  MonthlyIncome    MonthlyRate    NumCompaniesWorked
##  Divorced: 654   Min.   : 1009   Min.   : 2094   Min.   :0.000     
##  Married :1346   1st Qu.: 2911   1st Qu.: 8045   1st Qu.:1.000     
##  Single  : 940   Median : 4919   Median :14236   Median :2.000     
##                  Mean   : 6503   Mean   :14313   Mean   :2.693     
##                  3rd Qu.: 8380   3rd Qu.:20462   3rd Qu.:4.000     
##                  Max.   :19999   Max.   :26999   Max.   :9.000     
##                                                                    
##  Over18   OverTime   PercentSalaryHike PerformanceRating
##  Y:2940   No :2108   Min.   :11.00     Min.   :3.000    
##           Yes: 832   1st Qu.:12.00     1st Qu.:3.000    
##                      Median :14.00     Median :3.000    
##                      Mean   :15.21     Mean   :3.154    
##                      3rd Qu.:18.00     3rd Qu.:3.000    
##                      Max.   :25.00     Max.   :4.000    
##                                                         
##  RelationshipSatisfaction StandardHours StockOptionLevel TotalWorkingYears
##  Min.   :1.000            Min.   :80    Min.   :0.0000   Min.   : 0.00    
##  1st Qu.:2.000            1st Qu.:80    1st Qu.:0.0000   1st Qu.: 6.00    
##  Median :3.000            Median :80    Median :1.0000   Median :10.00    
##  Mean   :2.712            Mean   :80    Mean   :0.7939   Mean   :11.28    
##  3rd Qu.:4.000            3rd Qu.:80    3rd Qu.:1.0000   3rd Qu.:15.00    
##  Max.   :4.000            Max.   :80    Max.   :3.0000   Max.   :40.00    
##                                                                           
##  TrainingTimesLastYear WorkLifeBalance YearsAtCompany   YearsInCurrentRole
##  Min.   :0.000         Min.   :1.000   Min.   : 0.000   Min.   : 0.000    
##  1st Qu.:2.000         1st Qu.:2.000   1st Qu.: 3.000   1st Qu.: 2.000    
##  Median :3.000         Median :3.000   Median : 5.000   Median : 3.000    
##  Mean   :2.799         Mean   :2.761   Mean   : 7.008   Mean   : 4.229    
##  3rd Qu.:3.000         3rd Qu.:3.000   3rd Qu.: 9.000   3rd Qu.: 7.000    
##  Max.   :6.000         Max.   :4.000   Max.   :40.000   Max.   :18.000    
##                                                                           
##  YearsSinceLastPromotion YearsWithCurrManager
##  Min.   : 0.000          Min.   : 0.000      
##  1st Qu.: 0.000          1st Qu.: 2.000      
##  Median : 1.000          Median : 3.000      
##  Mean   : 2.188          Mean   : 4.123      
##  3rd Qu.: 3.000          3rd Qu.: 7.000      
##  Max.   :15.000          Max.   :17.000      
##

##Standard Hours, Over 18, Employee Count & Employee number attributes are not useful for analysis
HRData1<-subset(HRData,select = c(1:8,11:21,23:26,28:35))

##Separating Data into Dev & Holdout (Train & Test)
set.seed(200)
HRData$Random<-runif(nrow(HRData1),0,1)
HRData.Train<-HRData1[which(HRData$Random<=0.70),]
HRData.Test<-HRData1[which(HRData$Random>0.70),]

##Exploratory Data Analysis
##We will compare each of variables in data set to the Target Variable (Attrition)
attach(HRData.Train)
##We shall consider the variables in group of 4 to observe the impact on Attrition
AgeAttrition<-ggplot(data = HRData.Train, aes(Age,fill = Attrition))+geom_density()+facet_grid(~Attrition)
TravelAttrition<-ggplot(data = HRData.Train, aes(BusinessTravel, fill = Attrition))+geom_bar()
TotWorkAttrition<-ggplot(data = HRData.Train, aes(TotalWorkingYears, fill = Attrition))+geom_bar()
DepartAttrition<-ggplot(data = HRData.Train, aes(Department, fill = Attrition))+geom_bar()
grid.arrange(AgeAttrition,TravelAttrition,TotWorkAttrition,DepartAttrition,ncol = 2, top = "Attrition Analysis - Age, Travel, Total Experience & Department")

##Second Group of Variables
DistanceAttrition<-ggplot(data = HRData.Train, aes(DistanceFromHome,fill = Attrition))+geom_bar()
EduAttrition<-ggplot(data = HRData.Train, aes(Education, fill = Attrition))+geom_bar()
EduFieldAttrition<-ggplot(data = HRData.Train, aes(EducationField, fill = Attrition))+geom_bar()
EnvSatisAttrition<-ggplot(data = HRData.Train, aes(EnvironmentSatisfaction, fill = Attrition))+geom_bar()
grid.arrange(DistanceAttrition,EduAttrition,EduFieldAttrition,EnvSatisAttrition, ncol = 2, top = "Attrition Analysis - Distance from Home, Education & Environment satisfaction ")

##Gender, Marital Status View
GenderAttrition<-ggplot(data = HRData.Train, aes(Gender,fill = Attrition))+geom_bar()
MaritalAttrition<-ggplot(data = HRData.Train, aes(MaritalStatus, fill = Attrition))+geom_bar()
grid.arrange(GenderAttrition,MaritalAttrition, ncol = 2, top = "Attrition Analysis - Gender & Marital Status")

##Daily Hourly & Monthly Rate View
DRateAttrition<-ggplot(data = HRData.Train,aes(DailyRate,Attrition))+geom_point()
HRateAttrition<-ggplot(data = HRData.Train, aes(HourlyRate,Attrition))+geom_point()
MRateAttriion<-ggplot(data = HRData.Train, aes(MonthlyRate,Attrition))+geom_point()
grid.arrange(DRateAttrition,HRateAttrition,MRateAttriion, ncol = 2, top = "Attrition Analysis - Daily, Hourly & MOnthly Rate")

##Job Parameters View
JobInvolAttrition<-ggplot(data = HRData.Train, aes(JobInvolvement, fill = Attrition))+geom_bar()
JobLevelAttrition<-ggplot(data = HRData.Train, aes(JobLevel, fill = Attrition))+geom_bar()
JobRoleAttrition<-ggplot(data = HRData.Train, aes(JobRole, fill = Attrition))+geom_bar()
JobSatisAttrition<-ggplot(data = HRData.Train, aes(JobSatisfaction, fill = Attrition))+geom_bar()
grid.arrange(JobInvolAttrition,JobLevelAttrition,JobRoleAttrition,JobSatisAttrition, ncol = 2, top = "Attrition Analysis - Job Role, Level, Satisfaction & Involvement ")

##Income & Related Parameters View
MonthlyIncAttrition<-ggplot(data = HRData.Train, aes(MonthlyIncome, fill = Attrition))+geom_density()+facet_grid(~Attrition)
OverTAttrition<-ggplot(data = HRData.Train, aes(OverTime, fill = Attrition))+geom_bar()
PerfRateAttrition<-ggplot(data = HRData.Train, aes(PerformanceRating, fill = Attrition))+geom_bar()
PercentHikeAttrition<-ggplot(data = HRData.Train, aes(PercentSalaryHike, fill = Attrition))+geom_bar()
grid.arrange(MonthlyIncAttrition,OverTAttrition,PerfRateAttrition,PercentHikeAttrition, ncol = 2, top = "Attrition Analysis - Monthly Income, Over Time Perf, Rating & Percent Hike")

##Training, Relationship Satisfaction & Work Life Balance
WorkLifBalAttrition<-ggplot(data = HRData.Train, aes(WorkLifeBalance, fill = Attrition))+geom_bar()
NumCompAttrition<-ggplot(data = HRData.Train, aes(NumCompaniesWorked, fill = Attrition))+geom_bar()
TrainTimeAttrition<-ggplot(data = HRData.Train, aes(TrainingTimesLastYear, fill = Attrition))+geom_bar()
StockOptAttrition<-ggplot(data = HRData.Train, aes(StockOptionLevel, fill = Attrition))+geom_bar()
RelSatisAttrition<-ggplot(data = HRData.Train, aes(RelationshipSatisfaction, fill = Attrition))+geom_bar()
grid.arrange(WorkLifBalAttrition,NumCompAttrition,RelSatisAttrition,StockOptAttrition,TrainTimeAttrition, ncol = 2, top = "Attrition Analysis - WorkLife Balance, No. Companies Worked, Training Time, Relationship Satisfaction & Stock Options")

##Promotion, Role, Current Manager & Years at Company

YearsCompAttrition<-ggplot(data = HRData.Train, aes(YearsAtCompany, fill = Attrition))+geom_bar()
YearsRoleAttrition<-ggplot(data = HRData.Train, aes(YearsInCurrentRole, fill = Attrition))+geom_bar()
YearsPromoAttrition<-ggplot(data = HRData.Train, aes(YearsSinceLastPromotion, fill = Attrition))+geom_bar()
YearsMgrAttrition<-ggplot(data = HRData.Train, aes(YearsWithCurrManager, fill = Attrition))+geom_bar()
grid.arrange(YearsCompAttrition,YearsRoleAttrition,YearsPromoAttrition,YearsMgrAttrition, ncol = 2, top = "Attrition Analysis - Years in Role, Promotion, Manager and Company")

##Construct the Classification & Regression Tree
## setting the control paramter inputs for rpart
r.ctrl<-rpart.control(minsplit=60, cp = 0, xval = 10)
Tree1 <- rpart(formula = Attrition ~ ., data = HRData.Train, method = "class", control = r.ctrl)
printcp(Tree1)

## 
## Classification tree:
## rpart(formula = Attrition ~ ., data = HRData.Train, method = "class", 
##     control = r.ctrl)
## 
## Variables actually used in tree construction:
##  [1] Age                     BusinessTravel         
##  [3] DailyRate               EnvironmentSatisfaction
##  [5] HourlyRate              JobRole                
##  [7] MaritalStatus           MonthlyIncome          
##  [9] NumCompaniesWorked      OverTime               
## [11] TotalWorkingYears      
## 
## Root node error: 354/2120 = 0.16698
## 
## n= 2120 
## 
##          CP nsplit rel error  xerror     xstd
## 1 0.0579096      0   1.00000 1.00000 0.048509
## 2 0.0353107      2   0.88418 0.95763 0.047672
## 3 0.0254237      4   0.81356 0.89266 0.046322
## 4 0.0183616      5   0.78814 0.87571 0.045957
## 5 0.0141243      7   0.75141 0.85028 0.045397
## 6 0.0047081      8   0.73729 0.83616 0.045080
## 7 0.0000000     11   0.72316 0.84463 0.045271

fancyRpartPlot(Tree1)

##Prune the Tree
Tree1Pruned<-prune(Tree1,cp = 0.005,"CP")
printcp(Tree1Pruned)

## 
## Classification tree:
## rpart(formula = Attrition ~ ., data = HRData.Train, method = "class", 
##     control = r.ctrl)
## 
## Variables actually used in tree construction:
## [1] BusinessTravel    DailyRate         HourlyRate        JobRole          
## [5] MaritalStatus     MonthlyIncome     OverTime          TotalWorkingYears
## 
## Root node error: 354/2120 = 0.16698
## 
## n= 2120 
## 
##         CP nsplit rel error  xerror     xstd
## 1 0.057910      0   1.00000 1.00000 0.048509
## 2 0.035311      2   0.88418 0.95763 0.047672
## 3 0.025424      4   0.81356 0.89266 0.046322
## 4 0.018362      5   0.78814 0.87571 0.045957
## 5 0.014124      7   0.75141 0.85028 0.045397
## 6 0.005000      8   0.73729 0.83616 0.045080

fancyRpartPlot(Tree1Pruned)

##Scoring Syntax on Train Data
HRData.Train$predictclass<-predict(Tree1Pruned,HRData.Train,type = "class")
HRData.Train$predictscore<-predict(Tree1Pruned,HRData.Train)

##Scoring Syntax on Test Data
HRData.Test$predictclass<-predict(Tree1Pruned,HRData.Test,type = "class")
HRData.Test$predictscore<-predict(Tree1Pruned,HRData.Test)

##KS Statstic for Train Data
library(ROCR)

## Loading required package: gplots

## 
## Attaching package: 'gplots'

## The following object is masked from 'package:stats':
## 
##     lowess

## 
## Attaching package: 'ROCR'

## The following object is masked from 'package:neuralnet':
## 
##     prediction

TrainCart.p.scores<-prediction(HRData.Train$predictscore[,2],HRData.Train$Attrition)
TrainCart.p.perf<-performance(TrainCart.p.scores,"tpr","fpr")
ks.TrainCart<-max(attr(TrainCart.p.perf, "y.values")[[1]] -(attr(TrainCart.p.perf, "x.values")[[1]]))
ks.TrainCart

## [1] 0.4414842

##KS Statstic for Test Data
library(ROCR)
TestCart.p.scores<-prediction(HRData.Test$predictscore[,2],HRData.Test$Attrition)
TestCart.p.perf<-performance(TestCart.p.scores,"tpr","fpr")
ks.TestCart<-max(attr(TestCart.p.perf, "y.values")[[1]] -(attr(TestCart.p.perf, "x.values")[[1]]))
ks.TestCart

## [1] 0.3388095

##AUC for Train & Test Data
AUCTrain<-performance(TrainCart.p.scores,"auc")
AUCTrain<-as.numeric(AUCTrain@y.values)
AUCTrain

## [1] 0.7580763

AUCTest<-performance(TestCart.p.scores,"auc")
AUCTest<-as.numeric(AUCTest@y.values)
AUCTest

## [1] 0.6966429

##Accuracy performance of CART
library(caret)

## Loading required package: lattice

AccuracyTrainCart<-postResample(HRData.Train$predictclass,HRData.Train$Attrition)
AccuracyTrainCart

##  Accuracy     Kappa 
## 0.8768868 0.4693718

AccuracyTestCart<-postResample(HRData.Test$predictclass,HRData.Test$Attrition)
AccuracyTestCart

##  Accuracy     Kappa 
## 0.8548780 0.3233948

##Using Ensemble methods

##Load Librares
library(mlbench)
library(caret)
library(caretEnsemble)

## 
## Attaching package: 'caretEnsemble'

## The following object is masked from 'package:ggplot2':
## 
##     autoplot

library(C50)
library(rattle)

##Refreshing the dataset for ensmble purpose
HRData.TrainE<-subset(HRData.Train, select = c(1:31))
HRData.TestE<-subset(HRData.Test, select = c(1:31))

##Boosting using C5.0
C50Tree<-C5.0(Attrition~., data = HRData.TrainE)
C50Tree

## 
## Call:
## C5.0.formula(formula = Attrition ~ ., data = HRData.TrainE)
## 
## Classification Tree
## Number of samples: 2120 
## Number of predictors: 30 
## 
## Tree size: 94 
## 
## Non-standard options: attempt to group attributes

##Assigning Predicted class and Predict score
HRData.TrainE$predictclass<-predict(C50Tree,HRData.TrainE, type = "class")
HRData.TrainE$predictscore<-predict(C50Tree,HRData.TrainE, type = "prob")

##Assigning Predicted class and Predict score in Test Data
HRData.TestE$predictclass<-predict(C50Tree,HRData.TestE, type = "class")
HRData.TestE$predictscore<-predict(C50Tree,HRData.TestE, type = "prob")

##Model performance of C5.0
AccuracyTrain<-postResample(HRData.TrainE$predictclass,HRData.TrainE$Attrition)
AccuracyTrain

##  Accuracy     Kappa 
## 0.9721698 0.8968160

AccuracyTest<-postResample(HRData.TestE$predictclass,HRData.TestE$Attrition)
AccuracyTest

##  Accuracy     Kappa 
## 0.9060976 0.6007335

##KS Statistic for Train & Test C5.0
library(ROCR)
TrainC50.p.scores<-prediction(HRData.TrainE$predictscore[,2],HRData.TrainE$Attrition)
TrainC50.p.perf<-performance(TrainC50.p.scores,"tpr","fpr")
ks.TrainC50<-max(attr(TrainC50.p.perf, "y.values")[[1]] -(attr(TrainC50.p.perf, "x.values")[[1]]))
ks.TrainC50

## [1] 0.8694711

TestC50.p.scores<-prediction(HRData.TestE$predictscore[,2],HRData.TestE$Attrition)
TestC50.p.perf<-performance(TestC50.p.scores,"tpr","fpr")
ks.TestC50<-max(attr(TestC50.p.perf, "y.values")[[1]] -(attr(TestC50.p.perf, "x.values")[[1]]))
ks.TestC50

## [1] 0.645

##Trying Bagged Cart

HRData.TrainE2<-subset(HRData.Train, select = c(1:31))
HRData.TestE2<-subset(HRData.Test, select = c(1:31))

bcartcontrol <- trainControl(method="repeatedcv", number=10, repeats=3)
# Bagged CART
set.seed(7)
fit.treebag <- train(Attrition~., data=HRData.TrainE2, method="treebag", metric="Accuracy", trControl=bcartcontrol)

##Assigning Predicted class and Predict score
HRData.TrainE2$predictclass<-predict(fit.treebag,HRData.TrainE2)
HRData.TrainE2$predictscore<-predict(C50Tree,HRData.TrainE2, type = "prob")

##Assigning Predicted class and Predict score in Test Data
HRData.TestE2$predictclass<-predict(C50Tree,HRData.TestE2)
HRData.TestE2$predictscore<-predict(C50Tree,HRData.TestE2, type = "prob")

##Model performance of bcart
AccuracyTrainBC<-postResample(HRData.TrainE2$predictclass,HRData.TrainE2$Attrition)
AccuracyTrainBC

##  Accuracy     Kappa 
## 0.9990566 0.9966012

AccuracyTestBC<-postResample(HRData.TestE2$predictclass,HRData.TestE2$Attrition)
AccuracyTestBC

##  Accuracy     Kappa 
## 0.9060976 0.6007335

##KS Statistic for Train & Test bcart
library(ROCR)
Trainbcart.p.scores<-prediction(HRData.TrainE2$predictscore[,2],HRData.TrainE2$Attrition)
Trainbcart.p.perf<-performance(Trainbcart.p.scores,"tpr","fpr")
ks.Trainbcart<-max(attr(Trainbcart.p.perf, "y.values")[[1]] -(attr(Trainbcart.p.perf, "x.values")[[1]]))
ks.Trainbcart

## [1] 0.8694711

Testbcart.p.scores<-prediction(HRData.TestE2$predictscore[,2],HRData.TestE2$Attrition)
Testbcart.p.perf<-performance(Testbcart.p.scores,"tpr","fpr")
ks.Testbcart<-max(attr(Testbcart.p.perf, "y.values")[[1]] -(attr(Testbcart.p.perf, "x.values")[[1]]))
ks.Testbcart

## [1] 0.645

DataMiningGAGrp2_CART_OriginalData.R

krisg

Mon Jan 01 20:05:17 2018