library(C50)
## Warning: package 'C50' was built under R version 3.3.3
data(churn)
str(churnTest)
## 'data.frame': 1667 obs. of 20 variables:
## $ state : Factor w/ 51 levels "AK","AL","AR",..: 12 27 36 33 41 13 29 19 25 44 ...
## $ account_length : int 101 137 103 99 108 117 63 94 138 128 ...
## $ area_code : Factor w/ 3 levels "area_code_408",..: 3 3 1 2 2 2 2 1 3 2 ...
## $ international_plan : Factor w/ 2 levels "no","yes": 1 1 1 1 1 1 1 1 1 1 ...
## $ voice_mail_plan : Factor w/ 2 levels "no","yes": 1 1 2 1 1 1 2 1 1 2 ...
## $ number_vmail_messages : int 0 0 29 0 0 0 32 0 0 43 ...
## $ total_day_minutes : num 70.9 223.6 294.7 216.8 197.4 ...
## $ total_day_calls : int 123 86 95 123 78 85 124 97 117 100 ...
## $ total_day_charge : num 12.1 38 50.1 36.9 33.6 ...
## $ total_eve_minutes : num 212 245 237 126 124 ...
## $ total_eve_calls : int 73 139 105 88 101 68 125 112 46 89 ...
## $ total_eve_charge : num 18 20.8 20.2 10.7 10.5 ...
## $ total_night_minutes : num 236 94.2 300.3 220.6 204.5 ...
## $ total_night_calls : int 73 81 127 82 107 90 120 106 71 92 ...
## $ total_night_charge : num 10.62 4.24 13.51 9.93 9.2 ...
## $ total_intl_minutes : num 10.6 9.5 13.7 15.7 7.7 6.9 12.9 11.1 9.9 11.9 ...
## $ total_intl_calls : int 3 7 6 2 4 5 3 6 4 1 ...
## $ total_intl_charge : num 2.86 2.57 3.7 4.24 2.08 1.86 3.48 3 2.67 3.21 ...
## $ number_customer_service_calls: int 3 0 1 1 2 1 1 0 2 0 ...
## $ churn : Factor w/ 2 levels "yes","no": 2 2 2 2 2 2 2 2 2 2 ...
str(churnTrain)
## 'data.frame': 3333 obs. of 20 variables:
## $ state : Factor w/ 51 levels "AK","AL","AR",..: 17 36 32 36 37 2 20 25 19 50 ...
## $ account_length : int 128 107 137 84 75 118 121 147 117 141 ...
## $ area_code : Factor w/ 3 levels "area_code_408",..: 2 2 2 1 2 3 3 2 1 2 ...
## $ international_plan : Factor w/ 2 levels "no","yes": 1 1 1 2 2 2 1 2 1 2 ...
## $ voice_mail_plan : Factor w/ 2 levels "no","yes": 2 2 1 1 1 1 2 1 1 2 ...
## $ number_vmail_messages : int 25 26 0 0 0 0 24 0 0 37 ...
## $ total_day_minutes : num 265 162 243 299 167 ...
## $ total_day_calls : int 110 123 114 71 113 98 88 79 97 84 ...
## $ total_day_charge : num 45.1 27.5 41.4 50.9 28.3 ...
## $ total_eve_minutes : num 197.4 195.5 121.2 61.9 148.3 ...
## $ total_eve_calls : int 99 103 110 88 122 101 108 94 80 111 ...
## $ total_eve_charge : num 16.78 16.62 10.3 5.26 12.61 ...
## $ total_night_minutes : num 245 254 163 197 187 ...
## $ total_night_calls : int 91 103 104 89 121 118 118 96 90 97 ...
## $ total_night_charge : num 11.01 11.45 7.32 8.86 8.41 ...
## $ total_intl_minutes : num 10 13.7 12.2 6.6 10.1 6.3 7.5 7.1 8.7 11.2 ...
## $ total_intl_calls : int 3 3 5 7 3 6 7 6 4 5 ...
## $ total_intl_charge : num 2.7 3.7 3.29 1.78 2.73 1.7 2.03 1.92 2.35 3.02 ...
## $ number_customer_service_calls: int 1 1 0 2 3 0 3 0 1 0 ...
## $ churn : Factor w/ 2 levels "yes","no": 2 2 2 2 2 2 2 2 2 2 ...
Data=rbind.data.frame(churnTest,churnTrain)
sapply(Data, function(x) sum(is.na(x)))
## state account_length
## 0 0
## area_code international_plan
## 0 0
## voice_mail_plan number_vmail_messages
## 0 0
## total_day_minutes total_day_calls
## 0 0
## total_day_charge total_eve_minutes
## 0 0
## total_eve_calls total_eve_charge
## 0 0
## total_night_minutes total_night_calls
## 0 0
## total_night_charge total_intl_minutes
## 0 0
## total_intl_calls total_intl_charge
## 0 0
## number_customer_service_calls churn
## 0 0
library(caret)
## Loading required package: lattice
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 3.3.3
nearZeroVar(Data, saveMetrics= TRUE)
## freqRatio percentUnique zeroVar nzv
## state 1.264000 1.02 FALSE FALSE
## account_length 1.101695 4.36 FALSE FALSE
## area_code 1.981732 0.06 FALSE FALSE
## international_plan 9.570825 0.04 FALSE FALSE
## voice_mail_plan 2.779289 0.04 FALSE FALSE
## number_vmail_messages 44.313253 0.96 FALSE TRUE
## total_day_minutes 1.000000 39.22 FALSE FALSE
## total_day_calls 1.035398 2.46 FALSE FALSE
## total_day_charge 1.000000 39.22 FALSE FALSE
## total_eve_minutes 1.000000 37.58 FALSE FALSE
## total_eve_calls 1.045455 2.52 FALSE FALSE
## total_eve_charge 1.000000 33.18 FALSE FALSE
## total_night_minutes 1.000000 37.06 FALSE FALSE
## total_night_calls 1.110092 2.62 FALSE FALSE
## total_night_charge 1.000000 20.56 FALSE FALSE
## total_intl_minutes 1.022727 3.40 FALSE FALSE
## total_intl_calls 1.040923 0.42 FALSE FALSE
## total_intl_charge 1.022727 3.40 FALSE FALSE
## number_customer_service_calls 1.584738 0.20 FALSE FALSE
## churn 6.072136 0.04 FALSE FALSE
Data=Data[,-2]
Cont_Data=Data[,-c(1,2,3,4)]
Cat_Data=Data[,c(1,2,3,4,19)]
library(AppliedPredictiveModeling)
transparentTheme(trans = .4)
library(caret)
featurePlot (x = Cont_Data[, 1:14],
y = Cont_Data$churn,
plot = "box",
## Pass in options to bwplot()
scales = list(y = list(relation="free"),
x = list(rot = 90)),
layout = c(7,2))
steps include 1. Partition of data set into train and test 2. Running the model-First Result 3. Hyperparameter Tuning using Grid.Approach
library(rpart)
## Warning: package 'rpart' was built under R version 3.3.3
library(rpart.plot)
library(rattle)
## Warning: package 'rattle' was built under R version 3.3.3
## Rattle: A free graphical interface for data mining with R.
## Versión 4.1.0 Copyright (c) 2006-2015 Togaware Pty Ltd.
## Escriba 'rattle()' para agitar, sacudir y rotar sus datos.
library(caret)
set.seed(3456)
trainIndex <- createDataPartition(Data$churn, p = .6,
list = FALSE,
times = 1)
Train <- Data[ trainIndex,]
Test <- Data[-trainIndex,]
Tree_model <- rpart(churn~., data = Train)
summary(Tree_model)
## Call:
## rpart(formula = churn ~ ., data = Train)
## n= 3001
##
## CP nsplit rel error xerror xstd
## 1 0.07058824 0 1.0000000 1.0000000 0.04494128
## 2 0.06000000 3 0.7717647 0.8423529 0.04177997
## 3 0.05176471 5 0.6517647 0.6588235 0.03749051
## 4 0.04000000 8 0.4564706 0.4941176 0.03288271
## 5 0.01882353 9 0.4164706 0.4658824 0.03199795
## 6 0.01647059 11 0.3788235 0.4635294 0.03192274
## 7 0.01529412 12 0.3623529 0.4635294 0.03192274
## 8 0.01411765 14 0.3317647 0.4682353 0.03207293
## 9 0.01176471 16 0.3035294 0.4635294 0.03192274
## 10 0.01000000 18 0.2800000 0.4611765 0.03184729
##
## Variable importance
## total_day_minutes total_day_charge
## 16 16
## total_eve_charge total_eve_minutes
## 8 8
## state total_intl_minutes
## 8 7
## total_intl_charge number_customer_service_calls
## 7 7
## total_intl_calls international_plan
## 5 5
## number_vmail_messages voice_mail_plan
## 5 4
## total_night_minutes total_night_charge
## 2 2
## total_day_calls
## 1
##
## Node number 1: 3001 observations, complexity param=0.07058824
## predicted class=no expected loss=0.1416195 P(node) =1
## class counts: 425 2576
## probabilities: 0.142 0.858
## left son=2 (301 obs) right son=3 (2700 obs)
## Primary splits:
## total_day_minutes < 248.65 to the right, improve=85.14364, (0 missing)
## total_day_charge < 42.27 to the right, improve=85.14364, (0 missing)
## number_customer_service_calls < 3.5 to the right, improve=60.91735, (0 missing)
## international_plan splits as RL, improve=51.33479, (0 missing)
## state splits as RRLRLRLRRRRRRRRLRRRLLLRRRLLLRRRLRLLRLLRRLRLLRRRLRRR, improve=12.70748, (0 missing)
## Surrogate splits:
## total_day_charge < 42.27 to the right, agree=1.0, adj=1.000, (0 split)
## total_eve_minutes < 41.95 to the left, agree=0.9, adj=0.003, (0 split)
## total_eve_charge < 3.565 to the left, agree=0.9, adj=0.003, (0 split)
##
## Node number 2: 301 observations, complexity param=0.07058824
## predicted class=no expected loss=0.4983389 P(node) =0.1002999
## class counts: 150 151
## probabilities: 0.498 0.502
## left son=4 (234 obs) right son=5 (67 obs)
## Primary splits:
## voice_mail_plan splits as LR, improve=35.45930, (0 missing)
## number_vmail_messages < 6.5 to the left, improve=35.45930, (0 missing)
## total_eve_minutes < 201.15 to the right, improve=28.62253, (0 missing)
## total_eve_charge < 17.1 to the right, improve=28.62253, (0 missing)
## total_day_minutes < 285.5 to the right, improve=17.58056, (0 missing)
## Surrogate splits:
## number_vmail_messages < 6.5 to the left, agree=1.000, adj=1.000, (0 split)
## state splits as LRLLLLLLLLLLLLLLLLRLLLLLLLLRLLLLLLLLLLLLLLLLLLLLLLR, agree=0.791, adj=0.060, (0 split)
## total_night_minutes < 298.75 to the left, agree=0.781, adj=0.015, (0 split)
## total_night_charge < 13.445 to the left, agree=0.781, adj=0.015, (0 split)
##
## Node number 3: 2700 observations, complexity param=0.06
## predicted class=no expected loss=0.1018519 P(node) =0.8997001
## class counts: 275 2425
## probabilities: 0.102 0.898
## left son=6 (207 obs) right son=7 (2493 obs)
## Primary splits:
## number_customer_service_calls < 3.5 to the right, improve=65.168630, (0 missing)
## international_plan splits as RL, improve=46.007020, (0 missing)
## state splits as RRLRLRLRLRRRRRLLRRLLLLLLRLLLLRRLRLLRLLRRLLLLRRRLRLR, improve= 7.779143, (0 missing)
## total_intl_minutes < 13.45 to the right, improve= 6.884975, (0 missing)
## total_intl_charge < 3.635 to the right, improve= 6.884975, (0 missing)
##
## Node number 4: 234 observations, complexity param=0.07058824
## predicted class=yes expected loss=0.3717949 P(node) =0.07797401
## class counts: 147 87
## probabilities: 0.628 0.372
## left son=8 (125 obs) right son=9 (109 obs)
## Primary splits:
## total_eve_minutes < 195.6 to the right, improve=36.22351, (0 missing)
## total_eve_charge < 16.625 to the right, improve=36.22351, (0 missing)
## total_day_minutes < 285.5 to the right, improve=17.80445, (0 missing)
## total_day_charge < 48.535 to the right, improve=17.80445, (0 missing)
## state splits as LLRLRRLRRLLRLLRLLL-LLLLLRLLRRRLLRLLLLLLRLLLRRRLLRLR, improve=14.52764, (0 missing)
## Surrogate splits:
## total_eve_charge < 16.625 to the right, agree=1.000, adj=1.000, (0 split)
## state splits as RLRLLLLRRLLLLLRLRR-RRLLLLRLLLLRLLLRLLLRRLLLRRRLLRLL, agree=0.697, adj=0.349, (0 split)
## total_day_calls < 85.5 to the right, agree=0.568, adj=0.073, (0 split)
## total_intl_minutes < 6.65 to the right, agree=0.564, adj=0.064, (0 split)
## total_intl_calls < 1.5 to the right, agree=0.564, adj=0.064, (0 split)
##
## Node number 5: 67 observations
## predicted class=no expected loss=0.04477612 P(node) =0.02232589
## class counts: 3 64
## probabilities: 0.045 0.955
##
## Node number 6: 207 observations, complexity param=0.06
## predicted class=no expected loss=0.4830918 P(node) =0.06897701
## class counts: 100 107
## probabilities: 0.483 0.517
## left son=12 (59 obs) right son=13 (148 obs)
## Primary splits:
## total_day_minutes < 144.65 to the left, improve=33.288880, (0 missing)
## total_day_charge < 24.595 to the left, improve=33.288880, (0 missing)
## state splits as LRLRLRRRRRLRRLRRRRLLRLRRRLLR-RLRRRLRRRLRRLRRLRRLLRR, improve=17.409420, (0 missing)
## total_eve_minutes < 190.8 to the left, improve= 8.907583, (0 missing)
## total_eve_charge < 16.22 to the left, improve= 8.907583, (0 missing)
## Surrogate splits:
## total_day_charge < 24.595 to the left, agree=1.000, adj=1.000, (0 split)
## state splits as RRLRRRRRRRRRLRRRRRLRRRRRRRLR-RRRRRRRRRLRRLRRLRRRLRL, agree=0.783, adj=0.237, (0 split)
## total_eve_minutes < 98.85 to the left, agree=0.725, adj=0.034, (0 split)
## total_eve_charge < 8.405 to the left, agree=0.725, adj=0.034, (0 split)
## number_vmail_messages < 43.5 to the right, agree=0.720, adj=0.017, (0 split)
##
## Node number 7: 2493 observations, complexity param=0.05176471
## predicted class=no expected loss=0.07019655 P(node) =0.8307231
## class counts: 175 2318
## probabilities: 0.070 0.930
## left son=14 (238 obs) right son=15 (2255 obs)
## Primary splits:
## international_plan splits as RL, improve=44.607750, (0 missing)
## total_intl_minutes < 13.45 to the right, improve= 6.570178, (0 missing)
## total_intl_charge < 3.635 to the right, improve= 6.570178, (0 missing)
## total_day_minutes < 217.75 to the right, improve= 6.152075, (0 missing)
## total_day_charge < 37.02 to the right, improve= 6.152075, (0 missing)
##
## Node number 8: 125 observations, complexity param=0.01176471
## predicted class=yes expected loss=0.112 P(node) =0.04165278
## class counts: 111 14
## probabilities: 0.888 0.112
## left son=16 (105 obs) right son=17 (20 obs)
## Primary splits:
## state splits as -LLLRRL-LLLRLL-LLL-LLLLLLLLLRRLLLLLLLL--LLRLRRLLLLL, improve=9.135429, (0 missing)
## total_night_minutes < 157.9 to the right, improve=6.724000, (0 missing)
## total_night_charge < 7.105 to the right, improve=6.724000, (0 missing)
## total_day_minutes < 264.65 to the right, improve=2.586222, (0 missing)
## total_day_charge < 44.99 to the right, improve=2.586222, (0 missing)
## Surrogate splits:
## total_night_minutes < 110.15 to the right, agree=0.856, adj=0.1, (0 split)
## total_night_charge < 4.955 to the right, agree=0.856, adj=0.1, (0 split)
##
## Node number 9: 109 observations, complexity param=0.04
## predicted class=no expected loss=0.3302752 P(node) =0.03632123
## class counts: 36 73
## probabilities: 0.330 0.670
## left son=18 (23 obs) right son=19 (86 obs)
## Primary splits:
## total_day_minutes < 286.9 to the right, improve=16.956280, (0 missing)
## total_day_charge < 48.775 to the right, improve=16.956280, (0 missing)
## state splits as L-LL-RRLR---RLRLLL-LLR-RRLRRRRLLRLLRLLLL-R-LLLLRLRR, improve= 9.030310, (0 missing)
## total_night_minutes < 186.45 to the right, improve= 6.784356, (0 missing)
## total_night_charge < 8.39 to the right, improve= 6.784356, (0 missing)
## Surrogate splits:
## total_day_charge < 48.775 to the right, agree=1.000, adj=1.00, (0 split)
## state splits as R-RR-RRRR---RRRLRL-RRR-RRRRRLRRRRRRRRRRR-R-RRRRRRRR, agree=0.817, adj=0.13, (0 split)
##
## Node number 12: 59 observations
## predicted class=yes expected loss=0.06779661 P(node) =0.01966011
## class counts: 55 4
## probabilities: 0.932 0.068
##
## Node number 13: 148 observations, complexity param=0.01529412
## predicted class=no expected loss=0.3040541 P(node) =0.04931689
## class counts: 45 103
## probabilities: 0.304 0.696
## left son=26 (77 obs) right son=27 (71 obs)
## Primary splits:
## state splits as LRLRLRRLRRLRRLRRLRLLRLLLRL-L-RLLRLLRRR-RLRLLRRRLLLR, improve=11.521910, (0 missing)
## total_eve_minutes < 191.6 to the left, improve= 8.649079, (0 missing)
## total_eve_charge < 16.29 to the left, improve= 8.649079, (0 missing)
## total_day_minutes < 196.95 to the left, improve= 4.095429, (0 missing)
## total_day_charge < 33.48 to the left, improve= 4.095429, (0 missing)
## Surrogate splits:
## total_eve_minutes < 189.55 to the left, agree=0.649, adj=0.268, (0 split)
## total_eve_charge < 16.11 to the left, agree=0.649, adj=0.268, (0 split)
## total_intl_minutes < 10.2 to the left, agree=0.622, adj=0.211, (0 split)
## total_intl_charge < 2.755 to the left, agree=0.622, adj=0.211, (0 split)
## total_night_minutes < 240.75 to the left, agree=0.595, adj=0.155, (0 split)
##
## Node number 14: 238 observations, complexity param=0.05176471
## predicted class=no expected loss=0.3613445 P(node) =0.0793069
## class counts: 86 152
## probabilities: 0.361 0.639
## left son=28 (44 obs) right son=29 (194 obs)
## Primary splits:
## total_intl_calls < 2.5 to the left, improve=44.034310, (0 missing)
## total_intl_minutes < 13.05 to the right, improve=41.603840, (0 missing)
## total_intl_charge < 3.525 to the right, improve=41.603840, (0 missing)
## state splits as RLLRLRRRLLRRLRL-LLRLLLRLRRRLRRRLRLRLLRRRLRLLRRRRRRR, improve=14.736300, (0 missing)
## total_eve_calls < 103.5 to the left, improve= 2.067076, (0 missing)
## Surrogate splits:
## state splits as RRRRLRRRRRRRLRR-RRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRR, agree=0.824, adj=0.045, (0 split)
## total_night_minutes < 303.15 to the right, agree=0.819, adj=0.023, (0 split)
## total_night_charge < 13.64 to the right, agree=0.819, adj=0.023, (0 split)
## total_intl_minutes < 4.35 to the left, agree=0.819, adj=0.023, (0 split)
## total_intl_charge < 1.175 to the left, agree=0.819, adj=0.023, (0 split)
##
## Node number 15: 2255 observations, complexity param=0.01882353
## predicted class=no expected loss=0.03946785 P(node) =0.7514162
## class counts: 89 2166
## probabilities: 0.039 0.961
## left son=30 (273 obs) right son=31 (1982 obs)
## Primary splits:
## total_day_minutes < 221.95 to the right, improve=6.640284, (0 missing)
## total_day_charge < 37.73 to the right, improve=6.640284, (0 missing)
## total_eve_minutes < 257.95 to the right, improve=5.995820, (0 missing)
## total_eve_charge < 21.925 to the right, improve=5.995820, (0 missing)
## state splits as RRRRLRRLLRRRRRRLRRLRLRLLRLLLLRRLRRRRRLRRLLLRRRRLRLR, improve=2.648549, (0 missing)
## Surrogate splits:
## total_day_charge < 37.73 to the right, agree=1, adj=1, (0 split)
##
## Node number 16: 105 observations
## predicted class=yes expected loss=0.02857143 P(node) =0.03498834
## class counts: 102 3
## probabilities: 0.971 0.029
##
## Node number 17: 20 observations, complexity param=0.01176471
## predicted class=no expected loss=0.45 P(node) =0.006664445
## class counts: 9 11
## probabilities: 0.450 0.550
## left son=34 (8 obs) right son=35 (12 obs)
## Primary splits:
## total_night_minutes < 200.15 to the right, improve=8.066667, (0 missing)
## total_night_charge < 9.01 to the right, improve=8.066667, (0 missing)
## total_eve_calls < 102 to the left, improve=3.570330, (0 missing)
## total_day_minutes < 263.85 to the right, improve=2.500000, (0 missing)
## total_day_charge < 44.855 to the right, improve=2.500000, (0 missing)
## Surrogate splits:
## total_night_charge < 9.01 to the right, agree=1.00, adj=1.000, (0 split)
## total_eve_calls < 102 to the left, agree=0.75, adj=0.375, (0 split)
## total_day_minutes < 263.85 to the right, agree=0.70, adj=0.250, (0 split)
## total_day_charge < 44.855 to the right, agree=0.70, adj=0.250, (0 split)
## total_intl_minutes < 11.85 to the right, agree=0.70, adj=0.250, (0 split)
##
## Node number 18: 23 observations
## predicted class=yes expected loss=0.1304348 P(node) =0.007664112
## class counts: 20 3
## probabilities: 0.870 0.130
##
## Node number 19: 86 observations
## predicted class=no expected loss=0.1860465 P(node) =0.02865711
## class counts: 16 70
## probabilities: 0.186 0.814
##
## Node number 26: 77 observations, complexity param=0.01529412
## predicted class=no expected loss=0.4935065 P(node) =0.02565811
## class counts: 38 39
## probabilities: 0.494 0.506
## left son=52 (53 obs) right son=53 (24 obs)
## Primary splits:
## total_eve_minutes < 211.2 to the left, improve=5.671179, (0 missing)
## total_eve_charge < 17.95 to the left, improve=5.671179, (0 missing)
## total_day_minutes < 200.1 to the left, improve=4.407300, (0 missing)
## total_day_charge < 34.02 to the left, improve=4.407300, (0 missing)
## state splits as L-R-R--R--L--R--R-LL-RRR-L-R--RR-RR-----R-RR---LLR-, improve=4.110310, (0 missing)
## Surrogate splits:
## total_eve_charge < 17.95 to the left, agree=1.000, adj=1.000, (0 split)
## state splits as L-L-L--L--R--L--L-LL-LLR-L-L--LL-LL-----R-LL---LLL-, agree=0.727, adj=0.125, (0 split)
## total_night_minutes < 146.75 to the right, agree=0.714, adj=0.083, (0 split)
## total_night_charge < 6.605 to the right, agree=0.714, adj=0.083, (0 split)
## total_intl_minutes < 5.75 to the right, agree=0.714, adj=0.083, (0 split)
##
## Node number 27: 71 observations
## predicted class=no expected loss=0.09859155 P(node) =0.02365878
## class counts: 7 64
## probabilities: 0.099 0.901
##
## Node number 28: 44 observations
## predicted class=yes expected loss=0 P(node) =0.01466178
## class counts: 44 0
## probabilities: 1.000 0.000
##
## Node number 29: 194 observations, complexity param=0.05176471
## predicted class=no expected loss=0.2164948 P(node) =0.06464512
## class counts: 42 152
## probabilities: 0.216 0.784
## left son=58 (39 obs) right son=59 (155 obs)
## Primary splits:
## total_intl_minutes < 13.05 to the right, improve=59.930560, (0 missing)
## total_intl_charge < 3.525 to the right, improve=59.930560, (0 missing)
## state splits as RLLRRRRRLLRRRRL-LLRLLLRRRRRLRRRLLLLLLRRRLRLLRRLRRRR, improve=10.501880, (0 missing)
## total_eve_calls < 73 to the left, improve= 2.056191, (0 missing)
## total_day_calls < 100.5 to the left, improve= 1.947298, (0 missing)
## Surrogate splits:
## total_intl_charge < 3.525 to the right, agree=1.000, adj=1.000, (0 split)
## state splits as RRRRRRRRRRRRRRR-RRRRRLRRRRRRRRRRRRRRRRRRRRRRRRRRRRR, agree=0.809, adj=0.051, (0 split)
## number_vmail_messages < 40.5 to the right, agree=0.804, adj=0.026, (0 split)
## total_day_minutes < 52.45 to the left, agree=0.804, adj=0.026, (0 split)
## total_day_calls < 143.5 to the right, agree=0.804, adj=0.026, (0 split)
##
## Node number 30: 273 observations, complexity param=0.01882353
## predicted class=no expected loss=0.1428571 P(node) =0.09096968
## class counts: 39 234
## probabilities: 0.143 0.857
## left son=60 (30 obs) right son=61 (243 obs)
## Primary splits:
## total_eve_minutes < 267.35 to the right, improve=26.230810, (0 missing)
## total_eve_charge < 22.725 to the right, improve=26.230810, (0 missing)
## state splits as RRLRLRLRLLRRRRRLRLRLRRLLLLLRLRRLLRRLRLRRRLRLRRRRRLR, improve= 9.714764, (0 missing)
## total_night_minutes < 191.85 to the right, improve= 3.223335, (0 missing)
## total_night_charge < 8.635 to the right, improve= 3.223335, (0 missing)
## Surrogate splits:
## total_eve_charge < 22.725 to the right, agree=1.000, adj=1.000, (0 split)
## total_intl_minutes < 2.05 to the left, agree=0.894, adj=0.033, (0 split)
## total_intl_charge < 0.555 to the left, agree=0.894, adj=0.033, (0 split)
##
## Node number 31: 1982 observations
## predicted class=no expected loss=0.02522704 P(node) =0.6604465
## class counts: 50 1932
## probabilities: 0.025 0.975
##
## Node number 34: 8 observations
## predicted class=yes expected loss=0 P(node) =0.002665778
## class counts: 8 0
## probabilities: 1.000 0.000
##
## Node number 35: 12 observations
## predicted class=no expected loss=0.08333333 P(node) =0.003998667
## class counts: 1 11
## probabilities: 0.083 0.917
##
## Node number 52: 53 observations, complexity param=0.01411765
## predicted class=yes expected loss=0.3773585 P(node) =0.01766078
## class counts: 33 20
## probabilities: 0.623 0.377
## left son=104 (18 obs) right son=105 (35 obs)
## Primary splits:
## state splits as L-L-L--R--L--R--R-LL-LLR-L-R--RR-RR-----L-RR---LLR-, improve=7.762803, (0 missing)
## total_day_minutes < 182.75 to the left, improve=7.563398, (0 missing)
## total_day_charge < 31.065 to the left, improve=7.563398, (0 missing)
## total_eve_minutes < 138.55 to the left, improve=2.092474, (0 missing)
## total_eve_charge < 11.78 to the left, improve=2.092474, (0 missing)
## Surrogate splits:
## total_day_minutes < 174.45 to the left, agree=0.736, adj=0.222, (0 split)
## total_day_calls < 66 to the left, agree=0.736, adj=0.222, (0 split)
## total_day_charge < 29.66 to the left, agree=0.736, adj=0.222, (0 split)
## total_night_minutes < 178.75 to the left, agree=0.736, adj=0.222, (0 split)
## total_night_charge < 8.045 to the left, agree=0.736, adj=0.222, (0 split)
##
## Node number 53: 24 observations
## predicted class=no expected loss=0.2083333 P(node) =0.007997334
## class counts: 5 19
## probabilities: 0.208 0.792
##
## Node number 58: 39 observations
## predicted class=yes expected loss=0 P(node) =0.01299567
## class counts: 39 0
## probabilities: 1.000 0.000
##
## Node number 59: 155 observations
## predicted class=no expected loss=0.01935484 P(node) =0.05164945
## class counts: 3 152
## probabilities: 0.019 0.981
##
## Node number 60: 30 observations, complexity param=0.01647059
## predicted class=yes expected loss=0.2333333 P(node) =0.009996668
## class counts: 23 7
## probabilities: 0.767 0.233
## left son=120 (23 obs) right son=121 (7 obs)
## Primary splits:
## state splits as -RL-L-L-L-RR---LRL----LLLLL-LR--L--L-L---L------RL-, improve=10.7333300, (0 missing)
## total_day_calls < 102.5 to the left, improve= 2.1777780, (0 missing)
## total_night_calls < 113.5 to the right, improve= 1.1878790, (0 missing)
## total_day_minutes < 239.75 to the left, improve= 0.6960663, (0 missing)
## total_day_charge < 40.76 to the left, improve= 0.6960663, (0 missing)
## Surrogate splits:
## voice_mail_plan splits as LR, agree=0.9, adj=0.571, (0 split)
## number_vmail_messages < 12 to the left, agree=0.9, adj=0.571, (0 split)
## total_night_minutes < 179.15 to the right, agree=0.8, adj=0.143, (0 split)
## total_night_charge < 8.06 to the right, agree=0.8, adj=0.143, (0 split)
##
## Node number 61: 243 observations
## predicted class=no expected loss=0.06584362 P(node) =0.08097301
## class counts: 16 227
## probabilities: 0.066 0.934
##
## Node number 104: 18 observations
## predicted class=yes expected loss=0 P(node) =0.005998001
## class counts: 18 0
## probabilities: 1.000 0.000
##
## Node number 105: 35 observations, complexity param=0.01411765
## predicted class=no expected loss=0.4285714 P(node) =0.01166278
## class counts: 15 20
## probabilities: 0.429 0.571
## left son=210 (9 obs) right son=211 (26 obs)
## Primary splits:
## total_day_minutes < 182.5 to the left, improve=5.134310, (0 missing)
## total_day_charge < 31.025 to the left, improve=5.134310, (0 missing)
## total_eve_minutes < 138.55 to the left, improve=3.862857, (0 missing)
## total_eve_charge < 11.78 to the left, improve=3.862857, (0 missing)
## total_eve_calls < 87.5 to the left, improve=2.142857, (0 missing)
## Surrogate splits:
## total_day_charge < 31.025 to the left, agree=1.0, adj=1.000, (0 split)
## state splits as -------R-----R--R------R---R--RR-LL-------RR-----R-, agree=0.8, adj=0.222, (0 split)
## total_eve_calls < 67.5 to the left, agree=0.8, adj=0.222, (0 split)
## total_intl_minutes < 14.9 to the right, agree=0.8, adj=0.222, (0 split)
## total_intl_charge < 4.025 to the right, agree=0.8, adj=0.222, (0 split)
##
## Node number 120: 23 observations
## predicted class=yes expected loss=0 P(node) =0.007664112
## class counts: 23 0
## probabilities: 1.000 0.000
##
## Node number 121: 7 observations
## predicted class=no expected loss=0 P(node) =0.002332556
## class counts: 0 7
## probabilities: 0.000 1.000
##
## Node number 210: 9 observations
## predicted class=yes expected loss=0.1111111 P(node) =0.002999
## class counts: 8 1
## probabilities: 0.889 0.111
##
## Node number 211: 26 observations
## predicted class=no expected loss=0.2692308 P(node) =0.008663779
## class counts: 7 19
## probabilities: 0.269 0.731
fancyRpartPlot(Tree_model)
Tree_pre <- predict(Tree_model,Test,type = 'class')
Tree_pre2 <-predict(Tree_model, Test,type = 'prob')
library(randomForest)
## Warning: package 'randomForest' was built under R version 3.3.3
## randomForest 4.6-12
## Type rfNews() to see new features/changes/bug fixes.
##
## Attaching package: 'randomForest'
## The following object is masked from 'package:ggplot2':
##
## margin
rf=randomForest(churn~ ., data=Train, maxnodes=4, ntree=50,importance=TRUE)
varImp(rf)
## yes no
## state 0.3297402 0.3297402
## area_code 0.0000000 0.0000000
## international_plan 2.5833397 2.5833397
## voice_mail_plan 2.0002049 2.0002049
## number_vmail_messages 1.8125712 1.8125712
## total_day_minutes 4.3514624 4.3514624
## total_day_calls 0.0000000 0.0000000
## total_day_charge 4.9337185 4.9337185
## total_eve_minutes 2.0165740 2.0165740
## total_eve_calls 0.0000000 0.0000000
## total_eve_charge 1.2499870 1.2499870
## total_night_minutes 1.3547760 1.3547760
## total_night_calls 1.1197686 1.1197686
## total_night_charge -1.0101525 -1.0101525
## total_intl_minutes 1.4228727 1.4228727
## total_intl_calls 1.7747736 1.7747736
## total_intl_charge -0.4899153 -0.4899153
## number_customer_service_calls 1.9553956 1.9553956
varImpPlot(rf)
rf_pre <- predict(rf,Test,type = 'class')
rf_pre2 <-predict(rf, Test,type = 'prob')
library(ggplot2)
library(plotROC)
## Warning: package 'plotROC' was built under R version 3.3.3
Ds_df=cbind.data.frame(Tree_pre,Tree_pre2)
Rf_df=cbind.data.frame(rf_pre,rf_pre2)
ggplot(Ds_df, aes(d = Tree_pre, m = no)) + geom_roc()+ style_roc()
## Warning in verify_d(data$d): D not labeled 0/1, assuming no = 0 and yes =
## 1!
ggplot(Rf_df, aes(d = rf_pre, m = no)) + geom_roc()+ style_roc()
## Warning in verify_d(data$d): D not labeled 0/1, assuming no = 0 and yes =
## 1!