Classification

Decision Tree - using churn data in C50 package

#install.packages("C50")
library(C50)

data(churn)
str(churnTrain)

## 'data.frame':    3333 obs. of  20 variables:
##  $ state                        : Factor w/ 51 levels "AK","AL","AR",..: 17 36 32 36 37 2 20 25 19 50 ...
##  $ account_length               : int  128 107 137 84 75 118 121 147 117 141 ...
##  $ area_code                    : Factor w/ 3 levels "area_code_408",..: 2 2 2 1 2 3 3 2 1 2 ...
##  $ international_plan           : Factor w/ 2 levels "no","yes": 1 1 1 2 2 2 1 2 1 2 ...
##  $ voice_mail_plan              : Factor w/ 2 levels "no","yes": 2 2 1 1 1 1 2 1 1 2 ...
##  $ number_vmail_messages        : int  25 26 0 0 0 0 24 0 0 37 ...
##  $ total_day_minutes            : num  265 162 243 299 167 ...
##  $ total_day_calls              : int  110 123 114 71 113 98 88 79 97 84 ...
##  $ total_day_charge             : num  45.1 27.5 41.4 50.9 28.3 ...
##  $ total_eve_minutes            : num  197.4 195.5 121.2 61.9 148.3 ...
##  $ total_eve_calls              : int  99 103 110 88 122 101 108 94 80 111 ...
##  $ total_eve_charge             : num  16.78 16.62 10.3 5.26 12.61 ...
##  $ total_night_minutes          : num  245 254 163 197 187 ...
##  $ total_night_calls            : int  91 103 104 89 121 118 118 96 90 97 ...
##  $ total_night_charge           : num  11.01 11.45 7.32 8.86 8.41 ...
##  $ total_intl_minutes           : num  10 13.7 12.2 6.6 10.1 6.3 7.5 7.1 8.7 11.2 ...
##  $ total_intl_calls             : int  3 3 5 7 3 6 7 6 4 5 ...
##  $ total_intl_charge            : num  2.7 3.7 3.29 1.78 2.73 1.7 2.03 1.92 2.35 3.02 ...
##  $ number_customer_service_calls: int  1 1 0 2 3 0 3 0 1 0 ...
##  $ churn                        : Factor w/ 2 levels "yes","no": 2 2 2 2 2 2 2 2 2 2 ...

names(churnTrain) %in% c("state", "area_code", "account_length")

##  [1]  TRUE  TRUE  TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [12] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE

!names(churnTrain) %in% c("state", "area_code", "account_length")

##  [1] FALSE FALSE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
## [12]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE

#選擇建模變數
variable.list = !names(churnTrain) %in% c('state','area_code','account_length')
churnTrain=churnTrain[,variable.list]
churnTest=churnTest[,variable.list]

str(churnTrain)

## 'data.frame':    3333 obs. of  17 variables:
##  $ international_plan           : Factor w/ 2 levels "no","yes": 1 1 1 2 2 2 1 2 1 2 ...
##  $ voice_mail_plan              : Factor w/ 2 levels "no","yes": 2 2 1 1 1 1 2 1 1 2 ...
##  $ number_vmail_messages        : int  25 26 0 0 0 0 24 0 0 37 ...
##  $ total_day_minutes            : num  265 162 243 299 167 ...
##  $ total_day_calls              : int  110 123 114 71 113 98 88 79 97 84 ...
##  $ total_day_charge             : num  45.1 27.5 41.4 50.9 28.3 ...
##  $ total_eve_minutes            : num  197.4 195.5 121.2 61.9 148.3 ...
##  $ total_eve_calls              : int  99 103 110 88 122 101 108 94 80 111 ...
##  $ total_eve_charge             : num  16.78 16.62 10.3 5.26 12.61 ...
##  $ total_night_minutes          : num  245 254 163 197 187 ...
##  $ total_night_calls            : int  91 103 104 89 121 118 118 96 90 97 ...
##  $ total_night_charge           : num  11.01 11.45 7.32 8.86 8.41 ...
##  $ total_intl_minutes           : num  10 13.7 12.2 6.6 10.1 6.3 7.5 7.1 8.7 11.2 ...
##  $ total_intl_calls             : int  3 3 5 7 3 6 7 6 4 5 ...
##  $ total_intl_charge            : num  2.7 3.7 3.29 1.78 2.73 1.7 2.03 1.92 2.35 3.02 ...
##  $ number_customer_service_calls: int  1 1 0 2 3 0 3 0 1 0 ...
##  $ churn                        : Factor w/ 2 levels "yes","no": 2 2 2 2 2 2 2 2 2 2 ...

#sample
?sample
sample(1:10)

##  [1]  8  2  3  4  9  1  7  6 10  5

sample(1:10, size = 5)

## [1]  5  6 10  2  1

sample(c(0,1), size= 10, replace = T)

##  [1] 1 1 0 0 0 0 0 1 1 0

sample.int(20, 12) # 兩個參數都要放整數，此例為取1:20中的12個不重複樣本

##  [1]  2  9 15 17 18 20  6 12 10  4  7  3

set.seed(2)
#把資料分成training data 和 validation data
ind<-sample(1:2, size=nrow(churnTrain), replace=T, prob=c(0.7, 0.3))
trainset=churnTrain[ind==1,]
testset=churnTrain[ind==2,]

rpart

#install.packages('rpart')
library('rpart')
#使用rpart(CART)建立決策樹模型
?rpart
con = rpart.control(minsplit=20,cp=0.01)
?rpart.control
churn.rp<-rpart(churn ~., data=trainset,control = con)
#churn.rp<-rpart(churn ~ total_day_charge + international_plan, data=trainset)

churn.rp

## n= 2315 
## 
## node), split, n, loss, yval, (yprob)
##       * denotes terminal node
## 
##  1) root 2315 342 no (0.14773218 0.85226782)  
##    2) total_day_minutes>=265.45 144  59 yes (0.59027778 0.40972222)  
##      4) voice_mail_plan=no 110  29 yes (0.73636364 0.26363636)  
##        8) total_eve_minutes>=188.5 67   3 yes (0.95522388 0.04477612) *
##        9) total_eve_minutes< 188.5 43  17 no (0.39534884 0.60465116)  
##         18) total_day_minutes>=282.7 19   6 yes (0.68421053 0.31578947) *
##         19) total_day_minutes< 282.7 24   4 no (0.16666667 0.83333333) *
##      5) voice_mail_plan=yes 34   4 no (0.11764706 0.88235294) *
##    3) total_day_minutes< 265.45 2171 257 no (0.11837863 0.88162137)  
##      6) number_customer_service_calls>=3.5 168  82 yes (0.51190476 0.48809524)  
##       12) total_day_minutes< 160.2 71  10 yes (0.85915493 0.14084507) *
##       13) total_day_minutes>=160.2 97  25 no (0.25773196 0.74226804)  
##         26) total_eve_minutes< 155.5 20   7 yes (0.65000000 0.35000000) *
##         27) total_eve_minutes>=155.5 77  12 no (0.15584416 0.84415584) *
##      7) number_customer_service_calls< 3.5 2003 171 no (0.08537194 0.91462806)  
##       14) international_plan=yes 188  76 no (0.40425532 0.59574468)  
##         28) total_intl_calls< 2.5 38   0 yes (1.00000000 0.00000000) *
##         29) total_intl_calls>=2.5 150  38 no (0.25333333 0.74666667)  
##           58) total_intl_minutes>=13.1 32   0 yes (1.00000000 0.00000000) *
##           59) total_intl_minutes< 13.1 118   6 no (0.05084746 0.94915254) *
##       15) international_plan=no 1815  95 no (0.05234160 0.94765840)  
##         30) total_day_minutes>=224.15 251  50 no (0.19920319 0.80079681)  
##           60) total_eve_minutes>=259.8 36  10 yes (0.72222222 0.27777778) *
##           61) total_eve_minutes< 259.8 215  24 no (0.11162791 0.88837209) *
##         31) total_day_minutes< 224.15 1564  45 no (0.02877238 0.97122762) *

summary(churn.rp)

## Call:
## rpart(formula = churn ~ ., data = trainset, control = con)
##   n= 2315 
## 
##           CP nsplit rel error    xerror       xstd
## 1 0.07602339      0 1.0000000 1.0000000 0.04992005
## 2 0.07456140      2 0.8479532 0.9970760 0.04985964
## 3 0.05555556      4 0.6988304 0.7602339 0.04442127
## 4 0.02631579      7 0.4941520 0.5263158 0.03767329
## 5 0.02339181      8 0.4678363 0.5204678 0.03748096
## 6 0.02046784     10 0.4210526 0.5087719 0.03709209
## 7 0.01754386     11 0.4005848 0.4707602 0.03578773
## 8 0.01000000     12 0.3830409 0.4766082 0.03599261
## 
## Variable importance
##             total_day_minutes              total_day_charge 
##                            18                            18 
## number_customer_service_calls            total_intl_minutes 
##                            10                             8 
##             total_intl_charge              total_eve_charge 
##                             8                             8 
##             total_eve_minutes            international_plan 
##                             8                             7 
##              total_intl_calls         number_vmail_messages 
##                             6                             3 
##               voice_mail_plan             total_night_calls 
##                             3                             1 
##               total_eve_calls 
##                             1 
## 
## Node number 1: 2315 observations,    complexity param=0.07602339
##   predicted class=no   expected loss=0.1477322  P(node) =1
##     class counts:   342  1973
##    probabilities: 0.148 0.852 
##   left son=2 (144 obs) right son=3 (2171 obs)
##   Primary splits:
##       total_day_minutes             < 265.45 to the right, improve=60.145020, (0 missing)
##       total_day_charge              < 45.125 to the right, improve=60.145020, (0 missing)
##       number_customer_service_calls < 3.5    to the right, improve=53.641430, (0 missing)
##       international_plan            splits as  RL,         improve=43.729370, (0 missing)
##       voice_mail_plan               splits as  LR,         improve= 6.089388, (0 missing)
##   Surrogate splits:
##       total_day_charge < 45.125 to the right, agree=1, adj=1, (0 split)
## 
## Node number 2: 144 observations,    complexity param=0.07602339
##   predicted class=yes  expected loss=0.4097222  P(node) =0.06220302
##     class counts:    85    59
##    probabilities: 0.590 0.410 
##   left son=4 (110 obs) right son=5 (34 obs)
##   Primary splits:
##       voice_mail_plan       splits as  LR,         improve=19.884860, (0 missing)
##       number_vmail_messages < 9.5    to the left,  improve=19.884860, (0 missing)
##       total_eve_minutes     < 167.05 to the right, improve=14.540020, (0 missing)
##       total_eve_charge      < 14.2   to the right, improve=14.540020, (0 missing)
##       total_day_minutes     < 283.9  to the right, improve= 6.339827, (0 missing)
##   Surrogate splits:
##       number_vmail_messages < 9.5    to the left,  agree=1.000, adj=1.000, (0 split)
##       total_night_minutes   < 110.3  to the right, agree=0.785, adj=0.088, (0 split)
##       total_night_charge    < 4.965  to the right, agree=0.785, adj=0.088, (0 split)
##       total_night_calls     < 50     to the right, agree=0.778, adj=0.059, (0 split)
##       total_intl_minutes    < 15.3   to the left,  agree=0.771, adj=0.029, (0 split)
## 
## Node number 3: 2171 observations,    complexity param=0.0745614
##   predicted class=no   expected loss=0.1183786  P(node) =0.937797
##     class counts:   257  1914
##    probabilities: 0.118 0.882 
##   left son=6 (168 obs) right son=7 (2003 obs)
##   Primary splits:
##       number_customer_service_calls < 3.5    to the right, improve=56.398210, (0 missing)
##       international_plan            splits as  RL,         improve=43.059160, (0 missing)
##       total_day_minutes             < 224.15 to the right, improve=10.847440, (0 missing)
##       total_day_charge              < 38.105 to the right, improve=10.847440, (0 missing)
##       total_intl_minutes            < 13.15  to the right, improve= 6.347319, (0 missing)
## 
## Node number 4: 110 observations,    complexity param=0.02631579
##   predicted class=yes  expected loss=0.2636364  P(node) =0.0475162
##     class counts:    81    29
##    probabilities: 0.736 0.264 
##   left son=8 (67 obs) right son=9 (43 obs)
##   Primary splits:
##       total_eve_minutes   < 188.5  to the right, improve=16.419610, (0 missing)
##       total_eve_charge    < 16.025 to the right, improve=16.419610, (0 missing)
##       total_night_minutes < 206.85 to the right, improve= 5.350500, (0 missing)
##       total_night_charge  < 9.305  to the right, improve= 5.350500, (0 missing)
##       total_day_minutes   < 281.15 to the right, improve= 5.254545, (0 missing)
##   Surrogate splits:
##       total_eve_charge   < 16.025 to the right, agree=1.000, adj=1.000, (0 split)
##       total_night_calls  < 82     to the right, agree=0.655, adj=0.116, (0 split)
##       total_intl_minutes < 3.35   to the right, agree=0.636, adj=0.070, (0 split)
##       total_intl_charge  < 0.905  to the right, agree=0.636, adj=0.070, (0 split)
##       total_day_minutes  < 268.55 to the right, agree=0.627, adj=0.047, (0 split)
## 
## Node number 5: 34 observations
##   predicted class=no   expected loss=0.1176471  P(node) =0.01468683
##     class counts:     4    30
##    probabilities: 0.118 0.882 
## 
## Node number 6: 168 observations,    complexity param=0.0745614
##   predicted class=yes  expected loss=0.4880952  P(node) =0.07257019
##     class counts:    86    82
##    probabilities: 0.512 0.488 
##   left son=12 (71 obs) right son=13 (97 obs)
##   Primary splits:
##       total_day_minutes             < 160.2  to the left,  improve=29.655880, (0 missing)
##       total_day_charge              < 27.235 to the left,  improve=29.655880, (0 missing)
##       total_eve_minutes             < 180.65 to the left,  improve= 8.556953, (0 missing)
##       total_eve_charge              < 15.355 to the left,  improve= 8.556953, (0 missing)
##       number_customer_service_calls < 4.5    to the right, improve= 5.975362, (0 missing)
##   Surrogate splits:
##       total_day_charge              < 27.235 to the left,  agree=1.000, adj=1.000, (0 split)
##       total_night_calls             < 79     to the left,  agree=0.625, adj=0.113, (0 split)
##       total_intl_calls              < 2.5    to the left,  agree=0.619, adj=0.099, (0 split)
##       number_customer_service_calls < 4.5    to the right, agree=0.607, adj=0.070, (0 split)
##       total_eve_calls               < 89.5   to the left,  agree=0.601, adj=0.056, (0 split)
## 
## Node number 7: 2003 observations,    complexity param=0.05555556
##   predicted class=no   expected loss=0.08537194  P(node) =0.8652268
##     class counts:   171  1832
##    probabilities: 0.085 0.915 
##   left son=14 (188 obs) right son=15 (1815 obs)
##   Primary splits:
##       international_plan splits as  RL,         improve=42.194510, (0 missing)
##       total_day_minutes  < 224.15 to the right, improve=16.838410, (0 missing)
##       total_day_charge   < 38.105 to the right, improve=16.838410, (0 missing)
##       total_intl_minutes < 13.15  to the right, improve= 6.210678, (0 missing)
##       total_intl_charge  < 3.55   to the right, improve= 6.210678, (0 missing)
## 
## Node number 8: 67 observations
##   predicted class=yes  expected loss=0.04477612  P(node) =0.02894168
##     class counts:    64     3
##    probabilities: 0.955 0.045 
## 
## Node number 9: 43 observations,    complexity param=0.02046784
##   predicted class=no   expected loss=0.3953488  P(node) =0.01857451
##     class counts:    17    26
##    probabilities: 0.395 0.605 
##   left son=18 (19 obs) right son=19 (24 obs)
##   Primary splits:
##       total_day_minutes   < 282.7  to the right, improve=5.680947, (0 missing)
##       total_day_charge    < 48.06  to the right, improve=5.680947, (0 missing)
##       total_night_minutes < 212.65 to the right, improve=4.558140, (0 missing)
##       total_night_charge  < 9.57   to the right, improve=4.558140, (0 missing)
##       total_eve_minutes   < 145.4  to the right, improve=4.356169, (0 missing)
##   Surrogate splits:
##       total_day_charge   < 48.06  to the right, agree=1.000, adj=1.000, (0 split)
##       total_day_calls    < 103    to the left,  agree=0.674, adj=0.263, (0 split)
##       total_eve_calls    < 104.5  to the left,  agree=0.674, adj=0.263, (0 split)
##       total_intl_minutes < 11.55  to the left,  agree=0.651, adj=0.211, (0 split)
##       total_intl_charge  < 3.12   to the left,  agree=0.651, adj=0.211, (0 split)
## 
## Node number 12: 71 observations
##   predicted class=yes  expected loss=0.1408451  P(node) =0.03066955
##     class counts:    61    10
##    probabilities: 0.859 0.141 
## 
## Node number 13: 97 observations,    complexity param=0.01754386
##   predicted class=no   expected loss=0.257732  P(node) =0.04190065
##     class counts:    25    72
##    probabilities: 0.258 0.742 
##   left son=26 (20 obs) right son=27 (77 obs)
##   Primary splits:
##       total_eve_minutes             < 155.5  to the left,  improve=7.753662, (0 missing)
##       total_eve_charge              < 13.22  to the left,  improve=7.753662, (0 missing)
##       total_intl_minutes            < 13.55  to the right, improve=2.366149, (0 missing)
##       total_intl_charge             < 3.66   to the right, improve=2.366149, (0 missing)
##       number_customer_service_calls < 4.5    to the right, improve=2.297667, (0 missing)
##   Surrogate splits:
##       total_eve_charge  < 13.22  to the left,  agree=1.000, adj=1.00, (0 split)
##       total_night_calls < 143.5  to the right, agree=0.814, adj=0.10, (0 split)
##       total_eve_calls   < 62     to the left,  agree=0.804, adj=0.05, (0 split)
## 
## Node number 14: 188 observations,    complexity param=0.05555556
##   predicted class=no   expected loss=0.4042553  P(node) =0.0812095
##     class counts:    76   112
##    probabilities: 0.404 0.596 
##   left son=28 (38 obs) right son=29 (150 obs)
##   Primary splits:
##       total_intl_calls   < 2.5    to the left,  improve=33.806520, (0 missing)
##       total_intl_minutes < 13.1   to the right, improve=30.527050, (0 missing)
##       total_intl_charge  < 3.535  to the right, improve=30.527050, (0 missing)
##       total_day_minutes  < 221.95 to the right, improve= 3.386095, (0 missing)
##       total_day_charge   < 37.735 to the right, improve= 3.386095, (0 missing)
## 
## Node number 15: 1815 observations,    complexity param=0.02339181
##   predicted class=no   expected loss=0.0523416  P(node) =0.7840173
##     class counts:    95  1720
##    probabilities: 0.052 0.948 
##   left son=30 (251 obs) right son=31 (1564 obs)
##   Primary splits:
##       total_day_minutes   < 224.15 to the right, improve=12.5649300, (0 missing)
##       total_day_charge    < 38.105 to the right, improve=12.5649300, (0 missing)
##       total_eve_minutes   < 244.95 to the right, improve= 4.7875890, (0 missing)
##       total_eve_charge    < 20.825 to the right, improve= 4.7875890, (0 missing)
##       total_night_minutes < 163.85 to the right, improve= 0.9074391, (0 missing)
##   Surrogate splits:
##       total_day_charge < 38.105 to the right, agree=1, adj=1, (0 split)
## 
## Node number 18: 19 observations
##   predicted class=yes  expected loss=0.3157895  P(node) =0.008207343
##     class counts:    13     6
##    probabilities: 0.684 0.316 
## 
## Node number 19: 24 observations
##   predicted class=no   expected loss=0.1666667  P(node) =0.01036717
##     class counts:     4    20
##    probabilities: 0.167 0.833 
## 
## Node number 26: 20 observations
##   predicted class=yes  expected loss=0.35  P(node) =0.008639309
##     class counts:    13     7
##    probabilities: 0.650 0.350 
## 
## Node number 27: 77 observations
##   predicted class=no   expected loss=0.1558442  P(node) =0.03326134
##     class counts:    12    65
##    probabilities: 0.156 0.844 
## 
## Node number 28: 38 observations
##   predicted class=yes  expected loss=0  P(node) =0.01641469
##     class counts:    38     0
##    probabilities: 1.000 0.000 
## 
## Node number 29: 150 observations,    complexity param=0.05555556
##   predicted class=no   expected loss=0.2533333  P(node) =0.06479482
##     class counts:    38   112
##    probabilities: 0.253 0.747 
##   left son=58 (32 obs) right son=59 (118 obs)
##   Primary splits:
##       total_intl_minutes < 13.1   to the right, improve=45.356840, (0 missing)
##       total_intl_charge  < 3.535  to the right, improve=45.356840, (0 missing)
##       total_day_calls    < 95.5   to the left,  improve= 4.036407, (0 missing)
##       total_day_minutes  < 237.75 to the right, improve= 1.879020, (0 missing)
##       total_day_charge   < 40.42  to the right, improve= 1.879020, (0 missing)
##   Surrogate splits:
##       total_intl_charge < 3.535  to the right, agree=1.0, adj=1.000, (0 split)
##       total_day_minutes < 52.45  to the left,  agree=0.8, adj=0.063, (0 split)
##       total_day_charge  < 8.92   to the left,  agree=0.8, adj=0.063, (0 split)
## 
## Node number 30: 251 observations,    complexity param=0.02339181
##   predicted class=no   expected loss=0.1992032  P(node) =0.1084233
##     class counts:    50   201
##    probabilities: 0.199 0.801 
##   left son=60 (36 obs) right son=61 (215 obs)
##   Primary splits:
##       total_eve_minutes     < 259.8  to the right, improve=22.993380, (0 missing)
##       total_eve_charge      < 22.08  to the right, improve=22.993380, (0 missing)
##       voice_mail_plan       splits as  LR,         improve= 4.745664, (0 missing)
##       number_vmail_messages < 7.5    to the left,  improve= 4.745664, (0 missing)
##       total_night_minutes   < 181.15 to the right, improve= 3.509731, (0 missing)
##   Surrogate splits:
##       total_eve_charge < 22.08  to the right, agree=1, adj=1, (0 split)
## 
## Node number 31: 1564 observations
##   predicted class=no   expected loss=0.02877238  P(node) =0.675594
##     class counts:    45  1519
##    probabilities: 0.029 0.971 
## 
## Node number 58: 32 observations
##   predicted class=yes  expected loss=0  P(node) =0.01382289
##     class counts:    32     0
##    probabilities: 1.000 0.000 
## 
## Node number 59: 118 observations
##   predicted class=no   expected loss=0.05084746  P(node) =0.05097192
##     class counts:     6   112
##    probabilities: 0.051 0.949 
## 
## Node number 60: 36 observations
##   predicted class=yes  expected loss=0.2777778  P(node) =0.01555076
##     class counts:    26    10
##    probabilities: 0.722 0.278 
## 
## Node number 61: 215 observations
##   predicted class=no   expected loss=0.1116279  P(node) =0.09287257
##     class counts:    24   191
##    probabilities: 0.112 0.888

#畫出決策樹
par(mfrow=c(1,1))
?plot.rpart
plot(churn.rp, uniform=TRUE,branch = 0.6, margin=0.1)
text(churn.rp, all=TRUE, use.n=TRUE, cex=0.7)

library('rpart.plot')
rpart.plot(churn.rp)

cost complexity

http://mlwiki.org/index.php/Cost-Complexity_Pruning

Prune

printcp(churn.rp)

## 
## Classification tree:
## rpart(formula = churn ~ ., data = trainset, control = con)
## 
## Variables actually used in tree construction:
## [1] international_plan            number_customer_service_calls
## [3] total_day_minutes             total_eve_minutes            
## [5] total_intl_calls              total_intl_minutes           
## [7] voice_mail_plan              
## 
## Root node error: 342/2315 = 0.14773
## 
## n= 2315 
## 
##         CP nsplit rel error  xerror     xstd
## 1 0.076023      0   1.00000 1.00000 0.049920
## 2 0.074561      2   0.84795 0.99708 0.049860
## 3 0.055556      4   0.69883 0.76023 0.044421
## 4 0.026316      7   0.49415 0.52632 0.037673
## 5 0.023392      8   0.46784 0.52047 0.037481
## 6 0.020468     10   0.42105 0.50877 0.037092
## 7 0.017544     11   0.40058 0.47076 0.035788
## 8 0.010000     12   0.38304 0.47661 0.035993

plotcp(churn.rp)

#找出minimum cross-validation errors
min_row = which.min(churn.rp$cptable[,"xerror"])
churn.cp = churn.rp$cptable[min_row, "CP"]
#將churn.cp設為臨界值來修剪樹
prune.tree=prune(churn.rp, cp=churn.cp)
plot(prune.tree, margin=0.1)
text(prune.tree, all=TRUE, use.n=TRUE, cex=0.7)

test_tree = prune(churn.rp,cp=0.06)
plot(test_tree, margin=0.1)
text(test_tree, all=TRUE, use.n=TRUE, cex=0.7)

predictions <-predict(prune.tree, testset, type='class')
table(predictions,testset$churn)

##            
## predictions yes  no
##         yes  95  14
##         no   46 863

#install.packages('caret')
#install.packages('e1071')
library('caret')

## Loading required package: lattice

## Loading required package: ggplot2

library('e1071')
confusionMatrix(table(predictions, testset$churn))

## Confusion Matrix and Statistics
## 
##            
## predictions yes  no
##         yes  95  14
##         no   46 863
##                                           
##                Accuracy : 0.9411          
##                  95% CI : (0.9248, 0.9547)
##     No Information Rate : 0.8615          
##     P-Value [Acc > NIR] : 2.786e-16       
##                                           
##                   Kappa : 0.727           
##  Mcnemar's Test P-Value : 6.279e-05       
##                                           
##             Sensitivity : 0.67376         
##             Specificity : 0.98404         
##          Pos Pred Value : 0.87156         
##          Neg Pred Value : 0.94939         
##              Prevalence : 0.13851         
##          Detection Rate : 0.09332         
##    Detection Prevalence : 0.10707         
##       Balanced Accuracy : 0.82890         
##                                           
##        'Positive' Class : yes             
##

?confusionMatrix

## Help on topic 'confusionMatrix' was found in the following
## packages:
## 
##   Package               Library
##   caret                 /Library/Frameworks/R.framework/Versions/3.5/Resources/library
##   ModelMetrics          /Library/Frameworks/R.framework/Versions/3.5/Resources/library
## 
## 
## Using the first match ...

use churnTest data

test_predict = predict(prune.tree,churnTest,type='class')
confusionMatrix(table(test_predict,churnTest$churn))

## Confusion Matrix and Statistics
## 
##             
## test_predict  yes   no
##          yes  147   17
##          no    77 1426
##                                           
##                Accuracy : 0.9436          
##                  95% CI : (0.9314, 0.9542)
##     No Information Rate : 0.8656          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.7267          
##  Mcnemar's Test P-Value : 1.162e-09       
##                                           
##             Sensitivity : 0.65625         
##             Specificity : 0.98822         
##          Pos Pred Value : 0.89634         
##          Neg Pred Value : 0.94877         
##              Prevalence : 0.13437         
##          Detection Rate : 0.08818         
##    Detection Prevalence : 0.09838         
##       Balanced Accuracy : 0.82223         
##                                           
##        'Positive' Class : yes             
##

ctree

#install.packages("party")
library('party')

## Loading required package: grid

## Loading required package: mvtnorm

## Loading required package: modeltools

## Loading required package: stats4

## Loading required package: strucchange

## Loading required package: zoo

## 
## Attaching package: 'zoo'

## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric

## Loading required package: sandwich

ctree.model = ctree(churn ~ . , data = trainset)
plot(ctree.model, margin=0.1)

daycharge.model = ctree(churn ~ total_day_charge + international_plan, data = trainset)
plot(daycharge.model)

ctree.predict = predict(ctree.model ,testset)
table(ctree.predict, testset$churn)

##              
## ctree.predict yes  no
##           yes  99  15
##           no   42 862

confusionMatrix(table(ctree.predict, testset$churn))

## Confusion Matrix and Statistics
## 
##              
## ctree.predict yes  no
##           yes  99  15
##           no   42 862
##                                           
##                Accuracy : 0.944           
##                  95% CI : (0.9281, 0.9573)
##     No Information Rate : 0.8615          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.7449          
##  Mcnemar's Test P-Value : 0.0005736       
##                                           
##             Sensitivity : 0.70213         
##             Specificity : 0.98290         
##          Pos Pred Value : 0.86842         
##          Neg Pred Value : 0.95354         
##              Prevalence : 0.13851         
##          Detection Rate : 0.09725         
##    Detection Prevalence : 0.11198         
##       Balanced Accuracy : 0.84251         
##                                           
##        'Positive' Class : yes             
##

C5.0

#install.packages("C50")
library(C50)
c50.model = C5.0(churn ~., data=trainset)

?C5.0Control

c=C5.0Control(minCases = 20)
c50.model = C5.0(churn ~., data=trainset,control = c)

summary(c50.model)

## 
## Call:
## C5.0.formula(formula = churn ~ ., data = trainset, control = c)
## 
## 
## C5.0 [Release 2.07 GPL Edition]      Tue Nov  6 16:25:58 2018
## -------------------------------
## 
## Class specified by attribute `outcome'
## 
## Read 2315 cases (17 attributes) from undefined.data
## 
## Decision tree:
## 
## number_customer_service_calls > 3:
## :...total_day_minutes <= 160.1: yes (71/10)
## :   total_day_minutes > 160.1: no (108/32)
## number_customer_service_calls <= 3:
## :...international_plan = yes:
##     :...total_intl_calls <= 2: yes (41)
##     :   total_intl_calls > 2:
##     :   :...total_intl_minutes <= 13.1: no (134/13)
##     :       total_intl_minutes > 13.1: yes (34)
##     international_plan = no:
##     :...total_day_minutes <= 224.1: no (1564/45)
##         total_day_minutes > 224.1:
##         :...voice_mail_plan = yes: no (97/4)
##             voice_mail_plan = no:
##             :...total_eve_charge <= 17.47:
##                 :...total_day_minutes <= 278.4: no (124/10)
##                 :   total_day_minutes > 278.4: yes (20/5)
##                 total_eve_charge > 17.47:
##                 :...total_day_minutes > 264: yes (46)
##                     total_day_minutes <= 264:
##                     :...total_eve_charge > 22.04: yes (29/4)
##                         total_eve_charge <= 22.04:
##                         :...total_night_charge <= 9.04: no (23/1)
##                             total_night_charge > 9.04: yes (24/9)
## 
## 
## Evaluation on training data (2315 cases):
## 
##      Decision Tree   
##    ----------------  
##    Size      Errors  
## 
##      13  133( 5.7%)   <<
## 
## 
##     (a)   (b)    <-classified as
##    ----  ----
##     237   105    (a): class yes
##      28  1945    (b): class no
## 
## 
##  Attribute usage:
## 
##  100.00% number_customer_service_calls
##   92.27% international_plan
##   90.97% total_day_minutes
##   15.68% voice_mail_plan
##   11.49% total_eve_charge
##    9.03% total_intl_calls
##    7.26% total_intl_minutes
##    2.03% total_night_charge
## 
## 
## Time: 0.0 secs

plot(c50.model)

c50.predict = predict(c50.model,testset,type='class')
table(c50.predict, testset$churn)

##            
## c50.predict yes  no
##         yes  97  15
##         no   44 862

confusionMatrix(table(c50.predict, testset$churn))

## Confusion Matrix and Statistics
## 
##            
## c50.predict yes  no
##         yes  97  15
##         no   44 862
##                                           
##                Accuracy : 0.942           
##                  95% CI : (0.9259, 0.9556)
##     No Information Rate : 0.8615          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.7342          
##  Mcnemar's Test P-Value : 0.0002671       
##                                           
##             Sensitivity : 0.68794         
##             Specificity : 0.98290         
##          Pos Pred Value : 0.86607         
##          Neg Pred Value : 0.95143         
##              Prevalence : 0.13851         
##          Detection Rate : 0.09528         
##    Detection Prevalence : 0.11002         
##       Balanced Accuracy : 0.83542         
##                                           
##        'Positive' Class : yes             
##

use caret package

#install.packages("caret")
library(caret)
control=trainControl(method="repeatedcv", number=10, repeats=3)
model =train(churn~., data=churnTrain, method="rpart", trControl=control)


control=trainControl(method="repeatedcv", number=10, repeats=3,classProbs = TRUE,summaryFunction = multiClassSummary)
tune_funs = expand.grid(cp=seq(0.01,0.1,0.01))
model =train(churn~., data=churnTrain, method="rpart", trControl=control,tuneGrid=tune_funs)

## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info =
## trainInfo, : There were missing values in resampled performance measures.

model

## CART 
## 
## 3333 samples
##   16 predictor
##    2 classes: 'yes', 'no' 
## 
## No pre-processing
## Resampling: Cross-Validated (10 fold, repeated 3 times) 
## Summary of sample sizes: 2999, 3000, 3000, 2999, 3000, 3000, ... 
## Resampling results across tuning parameters:
## 
##   cp    logLoss    AUC        prAUC        Accuracy   Kappa     
##   0.01  0.2183740  0.8993844  0.748816542  0.9410941  0.74180070
##   0.02  0.2412734  0.8541321  0.671743381  0.9267945  0.66829253
##   0.03  0.2362443  0.8425755  0.732258969  0.9286940  0.67811511
##   0.04  0.2362443  0.8425755  0.732258969  0.9286940  0.67811511
##   0.05  0.2474592  0.8276578  0.686167398  0.9245938  0.65323752
##   0.06  0.3134127  0.7350591  0.258464836  0.9004909  0.50678549
##   0.07  0.3134127  0.7350591  0.258464836  0.9004909  0.50678549
##   0.08  0.3545264  0.6493653  0.281334959  0.8822898  0.36579912
##   0.09  0.4033295  0.5386085  0.057210892  0.8561885  0.10038175
##   0.10  0.4130831  0.5045595  0.009106077  0.8545878  0.01213431
##   F1         Sensitivity  Specificity  Pos_Pred_Value  Neg_Pred_Value
##   0.7751921  0.70522959   0.9810526    0.8643794       0.9516527     
##   0.7088791  0.61848073   0.9790643    0.8385450       0.9381492     
##   0.7178292  0.62872732   0.9795322    0.8408383       0.9397181     
##   0.7178292  0.62872732   0.9795322    0.8408383       0.9397181     
##   0.6945537  0.60043934   0.9795322    0.8323579       0.9355206     
##   0.5575400  0.43408447   0.9795322    0.7836547       0.9108595     
##   0.5575400  0.43408447   0.9795322    0.7836547       0.9108595     
##   0.4305091  0.30308957   0.9804678    0.7261865       0.8927735     
##   0.3164827  0.09136905   0.9858480    0.5194401       0.8652928     
##   0.2502704  0.01169218   0.9974269    0.4368421       0.8562896     
##   Precision  Recall      Detection_Rate  Balanced_Accuracy
##   0.8643794  0.70522959  0.102207297     0.8431411        
##   0.8385450  0.61848073  0.089610569     0.7987725        
##   0.8408383  0.62872732  0.091108474     0.8041297        
##   0.8408383  0.62872732  0.091108474     0.8041297        
##   0.8323579  0.60043934  0.087008266     0.7899858        
##   0.7836547  0.43408447  0.062905420     0.7068083        
##   0.7836547  0.43408447  0.062905420     0.7068083        
##   0.7261865  0.30308957  0.043906481     0.6417787        
##   0.5194401  0.09136905  0.013206919     0.5386085        
##   0.4368421  0.01169218  0.001699304     0.5045595        
## 
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was cp = 0.01.

predictions = predict(model, churnTest)


confusionMatrix(table(predictions,churnTest$churn))

## Confusion Matrix and Statistics
## 
##            
## predictions  yes   no
##         yes  145   15
##         no    79 1428
##                                           
##                Accuracy : 0.9436          
##                  95% CI : (0.9314, 0.9542)
##     No Information Rate : 0.8656          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.7243          
##  Mcnemar's Test P-Value : 8.142e-11       
##                                           
##             Sensitivity : 0.64732         
##             Specificity : 0.98960         
##          Pos Pred Value : 0.90625         
##          Neg Pred Value : 0.94758         
##              Prevalence : 0.13437         
##          Detection Rate : 0.08698         
##    Detection Prevalence : 0.09598         
##       Balanced Accuracy : 0.81846         
##                                           
##        'Positive' Class : yes             
##

caret 套件使用說明

# 查詢caret package 有實作的所有演算法
names(getModelInfo())

##   [1] "ada"                 "AdaBag"              "AdaBoost.M1"        
##   [4] "adaboost"            "amdai"               "ANFIS"              
##   [7] "avNNet"              "awnb"                "awtan"              
##  [10] "bag"                 "bagEarth"            "bagEarthGCV"        
##  [13] "bagFDA"              "bagFDAGCV"           "bam"                
##  [16] "bartMachine"         "bayesglm"            "binda"              
##  [19] "blackboost"          "blasso"              "blassoAveraged"     
##  [22] "bridge"              "brnn"                "BstLm"              
##  [25] "bstSm"               "bstTree"             "C5.0"               
##  [28] "C5.0Cost"            "C5.0Rules"           "C5.0Tree"           
##  [31] "cforest"             "chaid"               "CSimca"             
##  [34] "ctree"               "ctree2"              "cubist"             
##  [37] "dda"                 "deepboost"           "DENFIS"             
##  [40] "dnn"                 "dwdLinear"           "dwdPoly"            
##  [43] "dwdRadial"           "earth"               "elm"                
##  [46] "enet"                "evtree"              "extraTrees"         
##  [49] "fda"                 "FH.GBML"             "FIR.DM"             
##  [52] "foba"                "FRBCS.CHI"           "FRBCS.W"            
##  [55] "FS.HGD"              "gam"                 "gamboost"           
##  [58] "gamLoess"            "gamSpline"           "gaussprLinear"      
##  [61] "gaussprPoly"         "gaussprRadial"       "gbm_h2o"            
##  [64] "gbm"                 "gcvEarth"            "GFS.FR.MOGUL"       
##  [67] "GFS.LT.RS"           "GFS.THRIFT"          "glm.nb"             
##  [70] "glm"                 "glmboost"            "glmnet_h2o"         
##  [73] "glmnet"              "glmStepAIC"          "gpls"               
##  [76] "hda"                 "hdda"                "hdrda"              
##  [79] "HYFIS"               "icr"                 "J48"                
##  [82] "JRip"                "kernelpls"           "kknn"               
##  [85] "knn"                 "krlsPoly"            "krlsRadial"         
##  [88] "lars"                "lars2"               "lasso"              
##  [91] "lda"                 "lda2"                "leapBackward"       
##  [94] "leapForward"         "leapSeq"             "Linda"              
##  [97] "lm"                  "lmStepAIC"           "LMT"                
## [100] "loclda"              "logicBag"            "LogitBoost"         
## [103] "logreg"              "lssvmLinear"         "lssvmPoly"          
## [106] "lssvmRadial"         "lvq"                 "M5"                 
## [109] "M5Rules"             "manb"                "mda"                
## [112] "Mlda"                "mlp"                 "mlpKerasDecay"      
## [115] "mlpKerasDecayCost"   "mlpKerasDropout"     "mlpKerasDropoutCost"
## [118] "mlpML"               "mlpSGD"              "mlpWeightDecay"     
## [121] "mlpWeightDecayML"    "monmlp"              "msaenet"            
## [124] "multinom"            "mxnet"               "mxnetAdam"          
## [127] "naive_bayes"         "nb"                  "nbDiscrete"         
## [130] "nbSearch"            "neuralnet"           "nnet"               
## [133] "nnls"                "nodeHarvest"         "null"               
## [136] "OneR"                "ordinalNet"          "ORFlog"             
## [139] "ORFpls"              "ORFridge"            "ORFsvm"             
## [142] "ownn"                "pam"                 "parRF"              
## [145] "PART"                "partDSA"             "pcaNNet"            
## [148] "pcr"                 "pda"                 "pda2"               
## [151] "penalized"           "PenalizedLDA"        "plr"                
## [154] "pls"                 "plsRglm"             "polr"               
## [157] "ppr"                 "PRIM"                "protoclass"         
## [160] "qda"                 "QdaCov"              "qrf"                
## [163] "qrnn"                "randomGLM"           "ranger"             
## [166] "rbf"                 "rbfDDA"              "Rborist"            
## [169] "rda"                 "regLogistic"         "relaxo"             
## [172] "rf"                  "rFerns"              "RFlda"              
## [175] "rfRules"             "ridge"               "rlda"               
## [178] "rlm"                 "rmda"                "rocc"               
## [181] "rotationForest"      "rotationForestCp"    "rpart"              
## [184] "rpart1SE"            "rpart2"              "rpartCost"          
## [187] "rpartScore"          "rqlasso"             "rqnc"               
## [190] "RRF"                 "RRFglobal"           "rrlda"              
## [193] "RSimca"              "rvmLinear"           "rvmPoly"            
## [196] "rvmRadial"           "SBC"                 "sda"                
## [199] "sdwd"                "simpls"              "SLAVE"              
## [202] "slda"                "smda"                "snn"                
## [205] "sparseLDA"           "spikeslab"           "spls"               
## [208] "stepLDA"             "stepQDA"             "superpc"            
## [211] "svmBoundrangeString" "svmExpoString"       "svmLinear"          
## [214] "svmLinear2"          "svmLinear3"          "svmLinearWeights"   
## [217] "svmLinearWeights2"   "svmPoly"             "svmRadial"          
## [220] "svmRadialCost"       "svmRadialSigma"      "svmRadialWeights"   
## [223] "svmSpectrumString"   "tan"                 "tanSearch"          
## [226] "treebag"             "vbmpRadial"          "vglmAdjCat"         
## [229] "vglmContRatio"       "vglmCumulative"      "widekernelpls"      
## [232] "WM"                  "wsrf"                "xgbDART"            
## [235] "xgbLinear"           "xgbTree"             "xyf"

# 查詢caret package 有沒有實作rpart演算法
names(getModelInfo())[grep('rpart',names(getModelInfo()))]

## [1] "rpart"      "rpart1SE"   "rpart2"     "rpartCost"  "rpartScore"

# 查詢rpart model資訊
getModelInfo('rpart')

## $rpart
## $rpart$label
## [1] "CART"
## 
## $rpart$library
## [1] "rpart"
## 
## $rpart$type
## [1] "Regression"     "Classification"
## 
## $rpart$parameters
##   parameter   class                label
## 1        cp numeric Complexity Parameter
## 
## $rpart$grid
## function (x, y, len = NULL, search = "grid") 
## {
##     dat <- if (is.data.frame(x)) 
##         x
##     else as.data.frame(x)
##     dat$.outcome <- y
##     initialFit <- rpart::rpart(.outcome ~ ., data = dat, control = rpart::rpart.control(cp = 0))$cptable
##     initialFit <- initialFit[order(-initialFit[, "CP"]), , drop = FALSE]
##     if (search == "grid") {
##         if (nrow(initialFit) < len) {
##             tuneSeq <- data.frame(cp = seq(min(initialFit[, "CP"]), 
##                 max(initialFit[, "CP"]), length = len))
##         }
##         else tuneSeq <- data.frame(cp = initialFit[1:len, "CP"])
##         colnames(tuneSeq) <- "cp"
##     }
##     else {
##         tuneSeq <- data.frame(cp = unique(sample(initialFit[, 
##             "CP"], size = len, replace = TRUE)))
##     }
##     tuneSeq
## }
## 
## $rpart$loop
## function (grid) 
## {
##     grid <- grid[order(grid$cp, decreasing = FALSE), , drop = FALSE]
##     loop <- grid[1, , drop = FALSE]
##     submodels <- list(grid[-1, , drop = FALSE])
##     list(loop = loop, submodels = submodels)
## }
## 
## $rpart$fit
## function (x, y, wts, param, lev, last, classProbs, ...) 
## {
##     cpValue <- if (!last) 
##         param$cp
##     else 0
##     theDots <- list(...)
##     if (any(names(theDots) == "control")) {
##         theDots$control$cp <- cpValue
##         theDots$control$xval <- 0
##         ctl <- theDots$control
##         theDots$control <- NULL
##     }
##     else ctl <- rpart::rpart.control(cp = cpValue, xval = 0)
##     if (!is.null(wts)) 
##         theDots$weights <- wts
##     modelArgs <- c(list(formula = as.formula(".outcome ~ ."), 
##         data = if (is.data.frame(x)) x else as.data.frame(x), 
##         control = ctl), theDots)
##     modelArgs$data$.outcome <- y
##     out <- do.call(rpart::rpart, modelArgs)
##     if (last) 
##         out <- rpart::prune.rpart(out, cp = param$cp)
##     out
## }
## 
## $rpart$predict
## function (modelFit, newdata, submodels = NULL) 
## {
##     if (!is.data.frame(newdata)) 
##         newdata <- as.data.frame(newdata)
##     pType <- if (modelFit$problemType == "Classification") 
##         "class"
##     else "vector"
##     out <- predict(modelFit, newdata, type = pType)
##     if (!is.null(submodels)) {
##         tmp <- vector(mode = "list", length = nrow(submodels) + 
##             1)
##         tmp[[1]] <- out
##         for (j in seq(along = submodels$cp)) {
##             prunedFit <- rpart::prune.rpart(modelFit, cp = submodels$cp[j])
##             tmp[[j + 1]] <- predict(prunedFit, newdata, type = pType)
##         }
##         out <- tmp
##     }
##     out
## }
## 
## $rpart$prob
## function (modelFit, newdata, submodels = NULL) 
## {
##     if (!is.data.frame(newdata)) 
##         newdata <- as.data.frame(newdata)
##     out <- predict(modelFit, newdata, type = "prob")
##     if (!is.null(submodels)) {
##         tmp <- vector(mode = "list", length = nrow(submodels) + 
##             1)
##         tmp[[1]] <- out
##         for (j in seq(along = submodels$cp)) {
##             prunedFit <- rpart::prune.rpart(modelFit, cp = submodels$cp[j])
##             tmpProb <- predict(prunedFit, newdata, type = "prob")
##             tmp[[j + 1]] <- as.data.frame(tmpProb[, modelFit$obsLevels, 
##                 drop = FALSE])
##         }
##         out <- tmp
##     }
##     out
## }
## 
## $rpart$predictors
## function (x, surrogate = TRUE, ...) 
## {
##     out <- as.character(x$frame$var)
##     out <- out[!(out %in% c("<leaf>"))]
##     if (surrogate) {
##         splits <- x$splits
##         splits <- splits[splits[, "adj"] > 0, ]
##         out <- c(out, rownames(splits))
##     }
##     unique(out)
## }
## 
## $rpart$varImp
## function (object, surrogates = FALSE, competes = TRUE, ...) 
## {
##     if (nrow(object$splits) > 0) {
##         tmp <- rownames(object$splits)
##         rownames(object$splits) <- 1:nrow(object$splits)
##         splits <- data.frame(object$splits)
##         splits$var <- tmp
##         splits$type <- ""
##         frame <- as.data.frame(object$frame)
##         index <- 0
##         for (i in 1:nrow(frame)) {
##             if (frame$var[i] != "<leaf>") {
##                 index <- index + 1
##                 splits$type[index] <- "primary"
##                 if (frame$ncompete[i] > 0) {
##                   for (j in 1:frame$ncompete[i]) {
##                     index <- index + 1
##                     splits$type[index] <- "competing"
##                   }
##                 }
##                 if (frame$nsurrogate[i] > 0) {
##                   for (j in 1:frame$nsurrogate[i]) {
##                     index <- index + 1
##                     splits$type[index] <- "surrogate"
##                   }
##                 }
##             }
##         }
##         splits$var <- factor(as.character(splits$var))
##         if (!surrogates) 
##             splits <- subset(splits, type != "surrogate")
##         if (!competes) 
##             splits <- subset(splits, type != "competing")
##         out <- aggregate(splits$improve, list(Variable = splits$var), 
##             sum, na.rm = TRUE)
##     }
##     else {
##         out <- data.frame(x = numeric(), Vaiable = character())
##     }
##     allVars <- colnames(attributes(object$terms)$factors)
##     if (!all(allVars %in% out$Variable)) {
##         missingVars <- allVars[!(allVars %in% out$Variable)]
##         zeros <- data.frame(x = rep(0, length(missingVars)), 
##             Variable = missingVars)
##         out <- rbind(out, zeros)
##     }
##     out2 <- data.frame(Overall = out$x)
##     rownames(out2) <- out$Variable
##     out2
## }
## 
## $rpart$levels
## function (x) 
## x$obsLevels
## 
## $rpart$trim
## function (x) 
## {
##     x$call <- list(na.action = (x$call)$na.action)
##     x$x <- NULL
##     x$y <- NULL
##     x$where <- NULL
##     x
## }
## 
## $rpart$tags
## [1] "Tree-Based Model"              "Implicit Feature Selection"   
## [3] "Handle Missing Predictor Data" "Accepts Case Weights"         
## 
## $rpart$sort
## function (x) 
## x[order(x[, 1], decreasing = TRUE), ]
## 
## 
## $rpart1SE
## $rpart1SE$label
## [1] "CART"
## 
## $rpart1SE$library
## [1] "rpart"
## 
## $rpart1SE$type
## [1] "Regression"     "Classification"
## 
## $rpart1SE$parameters
##   parameter     class     label
## 1 parameter character parameter
## 
## $rpart1SE$grid
## function (x, y, len = NULL, search = "grid") 
## data.frame(parameter = "none")
## 
## $rpart1SE$loop
## NULL
## 
## $rpart1SE$fit
## function (x, y, wts, param, lev, last, classProbs, ...) 
## {
##     dat <- if (is.data.frame(x)) 
##         x
##     else as.data.frame(x)
##     dat$.outcome <- y
##     if (!is.null(wts)) {
##         out <- rpart::rpart(.outcome ~ ., data = dat, ...)
##     }
##     else {
##         out <- rpart::rpart(.outcome ~ ., data = dat, weights = wts, 
##             ...)
##     }
##     out
## }
## 
## $rpart1SE$predict
## function (modelFit, newdata, submodels = NULL) 
## {
##     if (!is.data.frame(newdata)) 
##         newdata <- as.data.frame(newdata)
##     out <- if (modelFit$problemType == "Classification") 
##         predict(modelFit, newdata, type = "class")
##     else predict(modelFit, newdata)
##     out
## }
## 
## $rpart1SE$prob
## function (modelFit, newdata, submodels = NULL) 
## {
##     if (!is.data.frame(newdata)) 
##         newdata <- as.data.frame(newdata)
##     predict(modelFit, newdata, type = "prob")
## }
## 
## $rpart1SE$predictors
## function (x, surrogate = TRUE, ...) 
## {
##     out <- as.character(x$frame$var)
##     out <- out[!(out %in% c("<leaf>"))]
##     if (surrogate) {
##         splits <- x$splits
##         splits <- splits[splits[, "adj"] > 0, ]
##         out <- c(out, rownames(splits))
##     }
##     unique(out)
## }
## 
## $rpart1SE$varImp
## function (object, surrogates = FALSE, competes = TRUE, ...) 
## {
##     tmp <- rownames(object$splits)
##     rownames(object$splits) <- 1:nrow(object$splits)
##     splits <- data.frame(object$splits)
##     splits$var <- tmp
##     splits$type <- ""
##     frame <- as.data.frame(object$frame)
##     index <- 0
##     for (i in 1:nrow(frame)) {
##         if (frame$var[i] != "<leaf>") {
##             index <- index + 1
##             splits$type[index] <- "primary"
##             if (frame$ncompete[i] > 0) {
##                 for (j in 1:frame$ncompete[i]) {
##                   index <- index + 1
##                   splits$type[index] <- "competing"
##                 }
##             }
##             if (frame$nsurrogate[i] > 0) {
##                 for (j in 1:frame$nsurrogate[i]) {
##                   index <- index + 1
##                   splits$type[index] <- "surrogate"
##                 }
##             }
##         }
##     }
##     splits$var <- factor(as.character(splits$var))
##     if (!surrogates) 
##         splits <- subset(splits, type != "surrogate")
##     if (!competes) 
##         splits <- subset(splits, type != "competing")
##     out <- aggregate(splits$improve, list(Variable = splits$var), 
##         sum, na.rm = TRUE)
##     allVars <- colnames(attributes(object$terms)$factors)
##     if (!all(allVars %in% out$Variable)) {
##         missingVars <- allVars[!(allVars %in% out$Variable)]
##         zeros <- data.frame(x = rep(0, length(missingVars)), 
##             Variable = missingVars)
##         out <- rbind(out, zeros)
##     }
##     out2 <- data.frame(Overall = out$x)
##     rownames(out2) <- out$Variable
##     out2
## }
## 
## $rpart1SE$levels
## function (x) 
## x$obsLevels
## 
## $rpart1SE$trim
## function (x) 
## {
##     x$call <- list(na.action = (x$call)$na.action)
##     x$x <- NULL
##     x$y <- NULL
##     x$where <- NULL
##     x
## }
## 
## $rpart1SE$notes
## [1] "This CART model replicates the same process used by the `rpart` function where the model complexity is determined using the one-standard error method. This procedure is replicated inside of the resampling done by `train` so that an external resampling estimate can be obtained."
## 
## $rpart1SE$tags
## [1] "Tree-Based Model"              "Implicit Feature Selection"   
## [3] "Handle Missing Predictor Data" "Accepts Case Weights"         
## 
## $rpart1SE$sort
## function (x) 
## x[order(x[, 1], decreasing = TRUE), ]
## 
## 
## $rpart2
## $rpart2$label
## [1] "CART"
## 
## $rpart2$library
## [1] "rpart"
## 
## $rpart2$type
## [1] "Regression"     "Classification"
## 
## $rpart2$parameters
##   parameter   class          label
## 1  maxdepth numeric Max Tree Depth
## 
## $rpart2$grid
## function (x, y, len = NULL, search = "grid") 
## {
##     dat <- if (is.data.frame(x)) 
##         x
##     else as.data.frame(x)
##     dat$.outcome <- y
##     initialFit <- rpart::rpart(.outcome ~ ., data = dat, control = rpart::rpart.control(cp = 0))$cptable
##     initialFit <- initialFit[order(-initialFit[, "CP"]), "nsplit", 
##         drop = FALSE]
##     initialFit <- initialFit[initialFit[, "nsplit"] > 0 & initialFit[, 
##         "nsplit"] <= 30, , drop = FALSE]
##     if (search == "grid") {
##         if (dim(initialFit)[1] < len) {
##             cat("note: only", nrow(initialFit), "possible values of the max tree depth from the initial fit.\n", 
##                 "Truncating the grid to", nrow(initialFit), ".\n\n")
##             tuneSeq <- as.data.frame(initialFit)
##         }
##         else tuneSeq <- as.data.frame(initialFit[1:len, ])
##         colnames(tuneSeq) <- "maxdepth"
##     }
##     else {
##         tuneSeq <- data.frame(maxdepth = unique(sample(as.vector(initialFit[, 
##             1]), size = len, replace = TRUE)))
##     }
##     tuneSeq
## }
## 
## $rpart2$loop
## function (grid) 
## {
##     grid <- grid[order(grid$maxdepth, decreasing = TRUE), , drop = FALSE]
##     loop <- grid[1, , drop = FALSE]
##     submodels <- list(grid[-1, , drop = FALSE])
##     list(loop = loop, submodels = submodels)
## }
## 
## $rpart2$fit
## function (x, y, wts, param, lev, last, classProbs, ...) 
## {
##     theDots <- list(...)
##     if (any(names(theDots) == "control")) {
##         theDots$control$maxdepth <- param$maxdepth
##         theDots$control$xval <- 0
##         ctl <- theDots$control
##         theDots$control <- NULL
##     }
##     else ctl <- rpart::rpart.control(maxdepth = param$maxdepth, 
##         xval = 0)
##     if (!is.null(wts)) 
##         theDots$weights <- wts
##     modelArgs <- c(list(formula = as.formula(".outcome ~ ."), 
##         data = if (is.data.frame(x)) x else as.data.frame(x), 
##         control = ctl), theDots)
##     modelArgs$data$.outcome <- y
##     out <- do.call(rpart::rpart, modelArgs)
##     out
## }
## 
## $rpart2$predict
## function (modelFit, newdata, submodels = NULL) 
## {
##     depth2cp <- function(x, depth) {
##         out <- approx(x[, "nsplit"], x[, "CP"], depth)$y
##         out[depth > max(x[, "nsplit"])] <- min(x[, "CP"]) * 0.99
##         out
##     }
##     if (!is.data.frame(newdata)) 
##         newdata <- as.data.frame(newdata)
##     pType <- if (modelFit$problemType == "Classification") 
##         "class"
##     else "vector"
##     out <- predict(modelFit, newdata, type = pType)
##     if (!is.null(submodels)) {
##         tmp <- vector(mode = "list", length = nrow(submodels) + 
##             1)
##         tmp[[1]] <- out
##         cpValues <- depth2cp(modelFit$cptable, submodels$maxdepth)
##         for (j in seq(along = cpValues)) {
##             prunedFit <- rpart::prune.rpart(modelFit, cp = cpValues[j])
##             tmp[[j + 1]] <- predict(prunedFit, newdata, type = pType)
##         }
##         out <- tmp
##     }
##     out
## }
## 
## $rpart2$prob
## function (modelFit, newdata, submodels = NULL) 
## {
##     depth2cp <- function(x, depth) {
##         out <- approx(x[, "nsplit"], x[, "CP"], depth)$y
##         out[depth > max(x[, "nsplit"])] <- min(x[, "CP"]) * 0.99
##         out
##     }
##     if (!is.data.frame(newdata)) 
##         newdata <- as.data.frame(newdata)
##     out <- predict(modelFit, newdata, type = "prob")
##     if (!is.null(submodels)) {
##         tmp <- vector(mode = "list", length = nrow(submodels) + 
##             1)
##         tmp[[1]] <- out
##         cpValues <- depth2cp(modelFit$cptable, submodels$maxdepth)
##         for (j in seq(along = cpValues)) {
##             prunedFit <- rpart::prune.rpart(modelFit, cp = cpValues[j])
##             tmpProb <- predict(prunedFit, newdata, type = "prob")
##             tmp[[j + 1]] <- as.data.frame(tmpProb[, modelFit$obsLevels, 
##                 drop = FALSE])
##         }
##         out <- tmp
##     }
##     out
## }
## 
## $rpart2$predictors
## function (x, surrogate = TRUE, ...) 
## {
##     out <- as.character(x$frame$var)
##     out <- out[!(out %in% c("<leaf>"))]
##     if (surrogate) {
##         splits <- x$splits
##         splits <- splits[splits[, "adj"] > 0, ]
##         out <- c(out, rownames(splits))
##     }
##     unique(out)
## }
## 
## $rpart2$varImp
## function (object, surrogates = FALSE, competes = TRUE, ...) 
## {
##     tmp <- rownames(object$splits)
##     rownames(object$splits) <- 1:nrow(object$splits)
##     splits <- data.frame(object$splits)
##     splits$var <- tmp
##     splits$type <- ""
##     frame <- as.data.frame(object$frame)
##     index <- 0
##     for (i in 1:nrow(frame)) {
##         if (frame$var[i] != "<leaf>") {
##             index <- index + 1
##             splits$type[index] <- "primary"
##             if (frame$ncompete[i] > 0) {
##                 for (j in 1:frame$ncompete[i]) {
##                   index <- index + 1
##                   splits$type[index] <- "competing"
##                 }
##             }
##             if (frame$nsurrogate[i] > 0) {
##                 for (j in 1:frame$nsurrogate[i]) {
##                   index <- index + 1
##                   splits$type[index] <- "surrogate"
##                 }
##             }
##         }
##     }
##     splits$var <- factor(as.character(splits$var))
##     if (!surrogates) 
##         splits <- subset(splits, type != "surrogate")
##     if (!competes) 
##         splits <- subset(splits, type != "competing")
##     out <- aggregate(splits$improve, list(Variable = splits$var), 
##         sum, na.rm = TRUE)
##     allVars <- colnames(attributes(object$terms)$factors)
##     if (!all(allVars %in% out$Variable)) {
##         missingVars <- allVars[!(allVars %in% out$Variable)]
##         zeros <- data.frame(x = rep(0, length(missingVars)), 
##             Variable = missingVars)
##         out <- rbind(out, zeros)
##     }
##     out2 <- data.frame(Overall = out$x)
##     rownames(out2) <- out$Variable
##     out2
## }
## 
## $rpart2$levels
## function (x) 
## x$obsLevels
## 
## $rpart2$trim
## function (x) 
## {
##     x$call <- list(na.action = (x$call)$na.action)
##     x$x <- NULL
##     x$y <- NULL
##     x$where <- NULL
##     x
## }
## 
## $rpart2$tags
## [1] "Tree-Based Model"              "Implicit Feature Selection"   
## [3] "Handle Missing Predictor Data" "Accepts Case Weights"         
## 
## $rpart2$sort
## function (x) 
## x[order(x[, 1]), ]
## 
## 
## $rpartCost
## $rpartCost$label
## [1] "Cost-Sensitive CART"
## 
## $rpartCost$library
## [1] "rpart" "plyr" 
## 
## $rpartCost$type
## [1] "Classification"
## 
## $rpartCost$parameters
##   parameter   class                label
## 1        cp numeric Complexity Parameter
## 2      Cost numeric                 Cost
## 
## $rpartCost$grid
## function (x, y, len = NULL, search = "grid") 
## {
##     dat <- if (is.data.frame(x)) 
##         x
##     else as.data.frame(x)
##     dat$.outcome <- y
##     initialFit <- rpart::rpart(.outcome ~ ., data = dat, control = rpart::rpart.control(cp = 0))$cptable
##     initialFit <- initialFit[order(-initialFit[, "CP"]), , drop = FALSE]
##     if (search == "grid") {
##         if (nrow(initialFit) < len) {
##             tuneSeq <- expand.grid(cp = seq(min(initialFit[, 
##                 "CP"]), max(initialFit[, "CP"]), length = len), 
##                 Cost = 1:len)
##         }
##         else tuneSeq <- data.frame(cp = initialFit[1:len, "CP"], 
##             Cost = 1:len)
##         colnames(tuneSeq) <- c("cp", "Cost")
##     }
##     else {
##         tuneSeq <- data.frame(cp = 10^runif(len, min = -8, max = -1), 
##             Cost = runif(len, min = 1, max = 30))
##     }
##     tuneSeq
## }
## 
## $rpartCost$loop
## function (grid) 
## {
##     loop <- plyr::ddply(grid, plyr::.(Cost), function(x) c(cp = min(x$cp)))
##     submodels <- vector(mode = "list", length = nrow(loop))
##     for (i in seq(along = submodels)) {
##         larger_cp <- subset(grid, subset = Cost == loop$Cost[i] & 
##             cp > loop$cp[i])
##         submodels[[i]] <- data.frame(cp = sort(larger_cp$cp))
##     }
##     list(loop = loop, submodels = submodels)
## }
## 
## $rpartCost$fit
## function (x, y, wts, param, lev, last, classProbs, ...) 
## {
##     theDots <- list(...)
##     if (any(names(theDots) == "control")) {
##         theDots$control$cp <- param$cp
##         theDots$control$xval <- 0
##         ctl <- theDots$control
##         theDots$control <- NULL
##     }
##     else ctl <- rpart::rpart.control(cp = param$cp, xval = 0)
##     lmat <- matrix(c(0, 1, param$Cost, 0), ncol = 2)
##     rownames(lmat) <- colnames(lmat) <- levels(y)
##     if (any(names(theDots) == "parms")) {
##         theDots$parms$loss <- lmat
##     }
##     else parms <- list(loss = lmat)
##     if (!is.null(wts)) 
##         theDots$weights <- wts
##     modelArgs <- c(list(formula = as.formula(".outcome ~ ."), 
##         data = if (is.data.frame(x)) x else as.data.frame(x), 
##         parms = parms, control = ctl), theDots)
##     modelArgs$data$.outcome <- y
##     out <- do.call(rpart::rpart, modelArgs)
##     out
## }
## 
## $rpartCost$predict
## function (modelFit, newdata, submodels = NULL) 
## {
##     if (!is.data.frame(newdata)) 
##         newdata <- as.data.frame(newdata)
##     pType <- if (modelFit$problemType == "Classification") 
##         "class"
##     else "vector"
##     out <- predict(modelFit, newdata, type = pType)
##     if (!is.null(submodels)) {
##         tmp <- vector(mode = "list", length = nrow(submodels) + 
##             1)
##         tmp[[1]] <- out
##         for (j in seq(along = submodels$cp)) {
##             prunedFit <- rpart::prune.rpart(modelFit, cp = submodels$cp[j])
##             tmp[[j + 1]] <- predict(prunedFit, newdata, type = pType)
##         }
##         out <- tmp
##     }
##     out
## }
## 
## $rpartCost$levels
## function (x) 
## x$obsLevels
## 
## $rpartCost$prob
## NULL
## 
## $rpartCost$tags
## [1] "Tree-Based Model"              "Implicit Feature Selection"   
## [3] "Cost Sensitive Learning"       "Two Class Only"               
## [5] "Handle Missing Predictor Data" "Accepts Case Weights"         
## 
## $rpartCost$sort
## function (x) 
## x[order(-x$cp, -x$Cost), ]
## 
## 
## $rpartScore
## $rpartScore$label
## [1] "CART or Ordinal Responses"
## 
## $rpartScore$library
## [1] "rpartScore" "plyr"      
## 
## $rpartScore$type
## [1] "Classification"
## 
## $rpartScore$parameters
##   parameter     class                label
## 1        cp   numeric Complexity Parameter
## 2     split character       Split Function
## 3     prune character      Pruning Measure
## 
## $rpartScore$grid
## function (x, y, len = NULL, search = "grid") 
## {
##     dat <- if (is.data.frame(x)) 
##         x
##     else as.data.frame(x)
##     dat$.outcome <- y
##     initialFit <- rpart::rpart(.outcome ~ ., data = dat, control = rpart::rpart.control(cp = 0))$cptable
##     initialFit <- initialFit[order(-initialFit[, "CP"]), , drop = FALSE]
##     if (search == "grid") {
##         if (nrow(initialFit) < len) {
##             tuneSeq <- expand.grid(cp = seq(min(initialFit[, 
##                 "CP"]), max(initialFit[, "CP"]), length = len), 
##                 split = c("abs", "quad"), prune = c("mr", "mc"))
##         }
##         else tuneSeq <- expand.grid(cp = initialFit[1:len, "CP"], 
##             split = c("abs", "quad"), prune = c("mr", "mc"))
##         colnames(tuneSeq)[1] <- "cp"
##     }
##     else {
##         tuneSeq <- expand.grid(cp = unique(sample(initialFit[, 
##             "CP"], size = len, replace = TRUE)), split = c("abs", 
##             "quad"), prune = c("mr", "mc"))
##     }
##     tuneSeq
## }
## 
## $rpartScore$fit
## function (x, y, wts, param, lev, last, classProbs, ...) 
## {
##     cpValue <- if (!last) 
##         param$cp
##     else 0
##     theDots <- list(...)
##     if (any(names(theDots) == "control")) {
##         theDots$control$cp <- cpValue
##         theDots$control$xval <- 0
##         ctl <- theDots$control
##         theDots$control <- NULL
##     }
##     else ctl <- rpart::rpart.control(cp = cpValue, xval = 0)
##     if (!is.null(wts)) 
##         theDots$weights <- wts
##     modelArgs <- c(list(formula = as.formula(".outcome ~ ."), 
##         data = if (is.data.frame(x)) x else as.data.frame(x), 
##         split = as.character(param$split), prune = as.character(param$prune), 
##         control = ctl), theDots)
##     modelArgs$data$.outcome <- as.numeric(y)
##     out <- do.call(rpartScore::rpartScore, modelArgs)
##     if (last) 
##         out <- rpart::prune.rpart(out, cp = param$cp)
##     out
## }
## 
## $rpartScore$predict
## function (modelFit, newdata, submodels = NULL) 
## {
##     if (!is.data.frame(newdata)) 
##         newdata <- as.data.frame(newdata)
##     out <- modelFit$obsLevels[predict(modelFit, newdata)]
##     if (!is.null(submodels)) {
##         tmp <- vector(mode = "list", length = nrow(submodels) + 
##             1)
##         tmp[[1]] <- out
##         for (j in seq(along = submodels$cp)) {
##             prunedFit <- rpart::prune.rpart(modelFit, cp = submodels$cp[j])
##             tmp[[j + 1]] <- modelFit$obsLevels[predict(prunedFit, 
##                 newdata)]
##         }
##         out <- tmp
##     }
##     out
## }
## 
## $rpartScore$prob
## NULL
## 
## $rpartScore$predictors
## function (x, surrogate = TRUE, ...) 
## {
##     out <- as.character(x$frame$var)
##     out <- out[!(out %in% c("<leaf>"))]
##     if (surrogate) {
##         splits <- x$splits
##         splits <- splits[splits[, "adj"] > 0, ]
##         out <- c(out, rownames(splits))
##     }
##     unique(out)
## }
## 
## $rpartScore$varImp
## function (object, surrogates = FALSE, competes = TRUE, ...) 
## {
##     allVars <- all.vars(object$terms)
##     allVars <- allVars[allVars != ".outcome"]
##     out <- data.frame(Overall = object$variable.importance, Variable = names(object$variable.importance))
##     rownames(out) <- names(object$variable.importance)
##     if (!all(allVars %in% out$Variable)) {
##         missingVars <- allVars[!(allVars %in% out$Variable)]
##         zeros <- data.frame(Overall = rep(0, length(missingVars)), 
##             Variable = missingVars)
##         out <- rbind(out, zeros)
##     }
##     rownames(out) <- out$Variable
##     out$Variable <- NULL
##     out
## }
## 
## $rpartScore$levels
## function (x) 
## x$obsLevels
## 
## $rpartScore$trim
## function (x) 
## {
##     x$call <- list(na.action = (x$call)$na.action)
##     x$x <- NULL
##     x$y <- NULL
##     x$where <- NULL
##     x
## }
## 
## $rpartScore$tags
## [1] "Tree-Based Model"              "Implicit Feature Selection"   
## [3] "Handle Missing Predictor Data" "Accepts Case Weights"         
## [5] "Ordinal Outcomes"             
## 
## $rpartScore$sort
## function (x) 
## x[order(x[, 1], decreasing = TRUE), ]

# 查詢rpart model可以tune的parameters
getModelInfo('rpart')$rpart$parameters

##   parameter   class                label
## 1        cp numeric Complexity Parameter

find importance variable

library('caret')
importance = varImp(model, scale=FALSE)
importance

## rpart variable importance
## 
##                               Overall
## total_day_minutes             219.693
## total_day_charge              206.025
## number_customer_service_calls 168.529
## international_planyes         163.107
## total_intl_minutes            135.324
## total_eve_minutes             117.225
## total_intl_charge             116.860
## total_eve_charge              111.593
## number_vmail_messages          52.586
## voice_mail_planyes             52.586
## total_intl_calls               52.444
## total_night_minutes            24.705
## total_night_charge             18.159
## total_night_calls              11.200
## total_day_calls                 2.214
## total_eve_calls                 0.000

plot(importance)

ROC

#install.packages("ROCR")
library(ROCR)

## Loading required package: gplots

## 
## Attaching package: 'gplots'

## The following object is masked from 'package:stats':
## 
##     lowess

predictions <-predict(model, testset, type="prob")
head(predictions)

##           yes         no
## 2  0.02701486 0.97298514
## 5  0.04046243 0.95953757
## 6  0.04046243 0.95953757
## 8  0.04046243 0.95953757
## 13 0.02701486 0.97298514
## 16 0.95049505 0.04950495

pred.to.roc<-predictions[, 1]
head(pred.to.roc)

## [1] 0.02701486 0.04046243 0.04046243 0.04046243 0.02701486 0.95049505

pred.rocr<-prediction(pred.to.roc, testset$churn)
pred.rocr

## An object of class "prediction"
## Slot "predictions":
## [[1]]
##    [1] 0.02701486 0.04046243 0.04046243 0.04046243 0.02701486 0.95049505
##    [7] 0.02701486 0.02701486 0.02701486 0.02701486 0.10240964 0.02701486
##   [13] 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486
##   [19] 0.02701486 0.02701486 0.02701486 0.10240964 0.02701486 0.11320755
##   [25] 0.02701486 0.02701486 0.87254902 0.02701486 0.87254902 0.02701486
##   [31] 0.02701486 0.02701486 0.02701486 0.11320755 0.02701486 0.02701486
##   [37] 1.00000000 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486
##   [43] 0.02701486 0.10416667 0.02701486 0.10240964 0.02701486 0.10416667
##   [49] 0.95049505 0.02701486 0.02701486 0.02701486 0.02701486 0.10240964
##   [55] 0.02701486 0.10240964 0.10416667 0.02701486 0.02701486 0.02701486
##   [61] 0.95049505 0.02701486 0.02701486 0.02701486 0.02701486 0.10240964
##   [67] 0.02701486 1.00000000 0.02701486 0.02701486 0.02701486 0.11320755
##   [73] 0.02701486 0.02701486 0.04046243 0.02701486 0.10240964 0.02701486
##   [79] 0.02701486 0.87254902 0.04046243 0.02701486 0.10240964 0.02701486
##   [85] 0.02701486 0.02701486 0.02701486 0.02701486 0.04046243 0.02701486
##   [91] 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486
##   [97] 0.02701486 0.02701486 0.02701486 0.10240964 0.95049505 0.87254902
##  [103] 0.85000000 0.10240964 0.02701486 0.02701486 0.02701486 0.02701486
##  [109] 0.02701486 0.02701486 0.04046243 0.02701486 0.02701486 0.02701486
##  [115] 0.02701486 0.02701486 0.02701486 0.02701486 0.85000000 0.02701486
##  [121] 0.10416667 0.04046243 0.02701486 0.02701486 1.00000000 0.02701486
##  [127] 0.02701486 0.02701486 0.02701486 0.10240964 0.10240964 0.83333333
##  [133] 0.02701486 0.02701486 0.02701486 0.10240964 0.85000000 0.95049505
##  [139] 0.02701486 0.02701486 0.10240964 1.00000000 0.02701486 0.02701486
##  [145] 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486 0.04046243
##  [151] 0.10240964 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486
##  [157] 0.02701486 0.02701486 0.10240964 0.02701486 0.04046243 0.02701486
##  [163] 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486 0.10240964
##  [169] 0.87254902 0.10416667 0.87500000 0.02701486 0.02701486 0.02701486
##  [175] 0.02701486 0.10240964 0.83333333 0.02701486 0.04046243 0.10240964
##  [181] 0.11320755 0.02701486 0.02701486 0.02701486 0.87254902 0.02701486
##  [187] 0.02701486 0.02701486 0.02701486 0.10240964 0.10240964 0.04046243
##  [193] 0.95049505 0.10416667 0.02701486 0.02701486 0.02701486 0.02701486
##  [199] 0.02701486 0.10240964 0.02701486 0.02701486 0.02701486 0.95049505
##  [205] 0.02701486 0.10240964 0.10240964 0.02701486 0.10240964 0.10240964
##  [211] 0.02701486 0.11320755 0.87254902 0.02701486 0.02701486 0.02701486
##  [217] 0.02701486 0.02701486 0.04046243 0.02701486 0.02701486 0.04046243
##  [223] 0.02701486 0.87254902 0.02701486 0.02701486 0.10240964 0.10240964
##  [229] 0.04046243 0.10416667 0.02701486 0.02701486 0.02701486 0.11320755
##  [235] 0.02701486 0.02701486 0.10240964 0.10240964 0.04046243 0.00000000
##  [241] 0.10240964 0.11320755 0.02701486 0.02701486 0.02701486 1.00000000
##  [247] 0.02701486 0.02701486 0.02701486 0.04046243 0.02701486 0.10240964
##  [253] 0.02701486 0.02701486 0.95049505 0.02701486 0.04046243 0.02701486
##  [259] 0.02701486 0.11320755 0.02701486 0.10240964 0.16000000 0.02701486
##  [265] 0.02701486 0.02701486 0.02701486 0.10416667 0.02701486 0.02701486
##  [271] 0.02701486 0.02701486 0.02701486 0.87254902 0.73684211 0.02701486
##  [277] 0.10416667 0.95049505 0.02701486 0.02701486 0.02701486 0.02701486
##  [283] 1.00000000 0.02701486 0.10240964 0.02701486 0.10240964 0.87500000
##  [289] 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486 0.10240964
##  [295] 0.02701486 0.02701486 0.87254902 0.02701486 0.02701486 0.11320755
##  [301] 0.02701486 0.02701486 0.02701486 0.02701486 0.10240964 0.02701486
##  [307] 0.10240964 0.02701486 0.00000000 0.02701486 0.02701486 0.10240964
##  [313] 0.02701486 0.02701486 0.02701486 0.87254902 0.04046243 0.02701486
##  [319] 0.02701486 0.02701486 0.02701486 0.04046243 0.02701486 0.02701486
##  [325] 0.04046243 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486
##  [331] 0.02701486 0.95049505 0.10240964 0.02701486 0.02701486 0.02701486
##  [337] 0.02701486 0.73684211 0.10416667 0.02701486 0.02701486 0.02701486
##  [343] 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486
##  [349] 0.02701486 0.10240964 0.02701486 0.02701486 0.10240964 0.11320755
##  [355] 0.02701486 0.02701486 0.87254902 0.02701486 0.02701486 0.10240964
##  [361] 0.02701486 0.04046243 0.02701486 0.02701486 0.04046243 0.02701486
##  [367] 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486
##  [373] 0.02701486 0.02701486 0.02701486 0.10416667 0.02701486 0.16000000
##  [379] 0.02701486 0.10240964 0.02701486 0.02701486 0.02701486 0.02701486
##  [385] 0.02701486 0.02701486 0.02701486 0.02701486 0.95049505 0.10240964
##  [391] 0.10416667 0.10416667 0.04046243 0.02701486 0.04046243 0.02701486
##  [397] 0.02701486 0.02701486 0.02701486 0.02701486 0.73684211 0.10240964
##  [403] 0.02701486 0.87254902 0.00000000 0.02701486 0.85000000 0.10240964
##  [409] 0.02701486 0.02701486 0.95049505 0.02701486 0.95049505 0.02701486
##  [415] 0.02701486 0.04046243 0.10240964 0.02701486 1.00000000 0.02701486
##  [421] 0.87254902 0.02701486 0.02701486 0.10240964 0.02701486 1.00000000
##  [427] 0.04046243 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486
##  [433] 0.10416667 0.02701486 0.02701486 0.87254902 0.02701486 0.02701486
##  [439] 0.02701486 0.02701486 0.11320755 0.02701486 0.04046243 0.02701486
##  [445] 0.02701486 0.02701486 0.02701486 0.02701486 0.10240964 0.02701486
##  [451] 0.04046243 0.02701486 0.02701486 0.10416667 0.02701486 0.10240964
##  [457] 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486
##  [463] 0.02701486 0.02701486 0.02701486 0.02701486 0.10240964 0.04046243
##  [469] 1.00000000 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486
##  [475] 0.10240964 0.02701486 0.10240964 0.10240964 0.87254902 0.87254902
##  [481] 0.04046243 0.02701486 0.02701486 0.02701486 0.02701486 0.10240964
##  [487] 0.02701486 0.02701486 0.02701486 0.02701486 0.04046243 0.02701486
##  [493] 0.00000000 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486
##  [499] 0.02701486 0.02701486 0.85000000 0.02701486 0.04046243 0.02701486
##  [505] 0.87254902 0.10240964 0.04046243 0.02701486 0.02701486 0.02701486
##  [511] 0.02701486 0.02701486 0.02701486 0.87254902 0.02701486 0.02701486
##  [517] 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486
##  [523] 0.02701486 0.02701486 0.95049505 0.02701486 0.95049505 0.87254902
##  [529] 0.00000000 0.00000000 0.87254902 0.02701486 0.16000000 0.02701486
##  [535] 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486
##  [541] 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486
##  [547] 0.10240964 0.87254902 0.02701486 0.10240964 0.02701486 0.02701486
##  [553] 0.95049505 0.02701486 0.10416667 0.95049505 0.02701486 0.10240964
##  [559] 0.02701486 0.02701486 0.02701486 0.02701486 0.83333333 0.02701486
##  [565] 0.10416667 0.02701486 1.00000000 0.10240964 0.95049505 0.02701486
##  [571] 0.83333333 0.87500000 0.02701486 0.02701486 0.85000000 0.87254902
##  [577] 0.02701486 0.04046243 0.02701486 0.02701486 0.95049505 0.02701486
##  [583] 0.02701486 0.10240964 0.02701486 0.02701486 0.02701486 0.10416667
##  [589] 0.02701486 0.02701486 0.02701486 0.02701486 0.04046243 0.02701486
##  [595] 0.02701486 0.02701486 0.02701486 0.02701486 0.10240964 0.02701486
##  [601] 0.02701486 0.10240964 0.02701486 0.02701486 0.02701486 0.02701486
##  [607] 0.95049505 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486
##  [613] 0.10240964 0.02701486 0.02701486 0.10240964 0.02701486 0.02701486
##  [619] 0.02701486 0.02701486 0.02701486 0.02701486 0.83333333 0.87254902
##  [625] 0.02701486 0.10240964 0.02701486 0.02701486 0.00000000 0.10416667
##  [631] 0.02701486 0.02701486 0.02701486 0.02701486 0.95049505 0.02701486
##  [637] 0.02701486 0.02701486 0.10240964 0.02701486 0.10416667 0.10240964
##  [643] 0.02701486 0.10240964 0.10240964 0.10240964 0.02701486 0.02701486
##  [649] 0.02701486 1.00000000 0.02701486 0.02701486 0.02701486 0.10240964
##  [655] 0.02701486 0.02701486 0.73684211 0.02701486 0.04046243 0.04046243
##  [661] 0.02701486 0.10240964 1.00000000 0.02701486 1.00000000 0.02701486
##  [667] 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486 0.10240964
##  [673] 0.02701486 0.10240964 0.10240964 0.02701486 0.95049505 0.10416667
##  [679] 0.10240964 0.87254902 0.10416667 0.02701486 0.87500000 0.02701486
##  [685] 0.10240964 0.02701486 0.02701486 0.02701486 0.02701486 0.95049505
##  [691] 0.11320755 0.10416667 0.02701486 0.02701486 0.02701486 0.02701486
##  [697] 0.02701486 0.02701486 0.16000000 0.02701486 0.04046243 0.02701486
##  [703] 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486
##  [709] 0.02701486 0.11320755 0.02701486 0.10416667 0.87254902 0.73684211
##  [715] 0.02701486 0.02701486 0.02701486 0.02701486 1.00000000 0.02701486
##  [721] 0.04046243 0.95049505 0.10240964 0.02701486 0.02701486 0.10240964
##  [727] 0.73684211 0.02701486 0.10240964 0.95049505 0.02701486 0.73684211
##  [733] 0.02701486 0.02701486 0.02701486 0.10416667 0.04046243 0.02701486
##  [739] 0.02701486 0.83333333 0.87254902 0.02701486 0.02701486 0.11320755
##  [745] 0.02701486 0.02701486 0.02701486 0.10416667 0.02701486 0.10240964
##  [751] 0.95049505 0.02701486 0.02701486 0.04046243 0.02701486 0.02701486
##  [757] 0.02701486 0.02701486 0.02701486 0.02701486 1.00000000 0.02701486
##  [763] 0.02701486 0.02701486 0.02701486 0.02701486 0.10240964 0.02701486
##  [769] 0.02701486 0.10240964 0.02701486 0.02701486 0.02701486 0.02701486
##  [775] 0.02701486 1.00000000 0.02701486 0.02701486 0.02701486 0.02701486
##  [781] 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486
##  [787] 0.02701486 0.04046243 0.02701486 0.83333333 0.02701486 0.02701486
##  [793] 0.87254902 0.02701486 0.02701486 0.02701486 0.02701486 0.85000000
##  [799] 0.10416667 0.02701486 0.85000000 0.02701486 0.11320755 0.02701486
##  [805] 0.00000000 0.02701486 0.10240964 0.02701486 0.02701486 0.04046243
##  [811] 1.00000000 0.87254902 0.02701486 0.02701486 0.02701486 0.02701486
##  [817] 0.02701486 0.02701486 0.04046243 0.02701486 0.10416667 0.02701486
##  [823] 0.02701486 0.02701486 0.10240964 0.02701486 0.95049505 0.10240964
##  [829] 1.00000000 0.04046243 0.02701486 0.02701486 0.83333333 1.00000000
##  [835] 0.02701486 0.95049505 0.02701486 0.02701486 0.02701486 0.02701486
##  [841] 0.02701486 0.02701486 0.02701486 0.83333333 0.04046243 0.02701486
##  [847] 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486
##  [853] 1.00000000 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486
##  [859] 0.02701486 0.10240964 0.87500000 0.87254902 0.02701486 0.02701486
##  [865] 0.04046243 0.02701486 1.00000000 0.02701486 0.02701486 0.02701486
##  [871] 0.02701486 0.10240964 0.04046243 0.02701486 0.02701486 0.02701486
##  [877] 0.02701486 0.02701486 0.95049505 0.10240964 0.02701486 0.02701486
##  [883] 0.10416667 0.10240964 0.02701486 0.10240964 1.00000000 0.02701486
##  [889] 0.02701486 0.10240964 0.02701486 0.10240964 0.02701486 0.10240964
##  [895] 0.02701486 0.02701486 0.04046243 0.02701486 0.02701486 0.02701486
##  [901] 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486 0.85000000
##  [907] 0.02701486 0.00000000 0.02701486 0.10240964 0.87500000 0.10240964
##  [913] 0.02701486 0.02701486 0.02701486 0.85000000 0.02701486 0.02701486
##  [919] 0.02701486 0.04046243 0.02701486 0.02701486 0.10240964 0.02701486
##  [925] 0.02701486 0.02701486 0.02701486 0.02701486 0.10416667 0.02701486
##  [931] 0.10416667 0.04046243 0.02701486 0.02701486 0.02701486 0.02701486
##  [937] 0.02701486 0.02701486 0.02701486 0.87254902 0.10240964 0.02701486
##  [943] 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486 0.95049505
##  [949] 0.10240964 0.02701486 0.02701486 0.11320755 0.02701486 0.02701486
##  [955] 0.02701486 0.04046243 0.02701486 0.02701486 0.10416667 0.02701486
##  [961] 0.10240964 0.10240964 0.02701486 0.02701486 0.10416667 0.02701486
##  [967] 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486
##  [973] 0.02701486 0.02701486 0.02701486 0.10240964 0.85000000 0.11320755
##  [979] 0.02701486 0.02701486 1.00000000 0.02701486 0.02701486 0.10240964
##  [985] 0.02701486 0.02701486 0.02701486 0.10240964 0.04046243 0.10240964
##  [991] 0.95049505 0.10416667 0.02701486 0.02701486 0.10240964 0.02701486
##  [997] 0.02701486 0.10240964 0.02701486 0.10240964 0.02701486 0.04046243
## [1003] 0.10240964 0.10240964 0.02701486 0.02701486 1.00000000 0.10240964
## [1009] 0.02701486 0.95049505 0.02701486 0.02701486 0.02701486 0.02701486
## [1015] 0.95049505 0.02701486 0.02701486 0.00000000
## 
## 
## Slot "labels":
## [[1]]
##    [1] no  no  no  no  no  yes no  no  no  no  yes no  no  no  no  no  no 
##   [18] no  no  no  yes no  no  no  no  no  yes no  yes no  no  no  no  no 
##   [35] no  no  yes no  no  no  no  no  no  no  no  no  no  no  yes no  no 
##   [52] no  no  no  no  no  no  no  no  no  no  no  no  no  no  no  no  yes
##   [69] no  no  no  no  no  no  no  no  no  yes no  yes no  no  no  no  no 
##   [86] no  no  no  no  no  no  no  no  no  no  no  no  no  no  no  yes yes
##  [103] no  no  yes no  no  no  no  no  no  no  no  no  no  no  no  no  yes
##  [120] yes no  no  no  no  yes no  no  no  no  no  no  yes no  no  no  no 
##  [137] yes yes no  no  no  yes no  no  no  no  no  no  no  no  no  no  no 
##  [154] no  no  no  no  no  no  no  no  no  no  no  no  no  no  no  yes no 
##  [171] yes no  no  no  no  no  no  no  no  no  no  no  no  no  yes no  no 
##  [188] no  no  no  no  no  yes no  no  no  no  no  no  no  no  no  no  yes
##  [205] no  no  no  no  no  no  no  no  no  no  no  no  no  no  no  no  no 
##  [222] no  no  yes no  no  no  no  no  no  no  no  no  no  no  no  no  no 
##  [239] no  no  no  no  no  no  no  yes no  no  no  no  no  no  no  no  yes
##  [256] no  no  no  no  no  yes no  no  no  no  no  no  no  no  no  no  no 
##  [273] no  yes yes no  no  yes no  no  no  no  yes no  no  no  no  yes no 
##  [290] no  no  no  no  no  yes no  yes no  no  yes no  no  no  no  no  no 
##  [307] yes no  no  no  no  no  no  no  no  yes no  no  no  no  no  no  no 
##  [324] no  no  no  no  no  no  no  no  yes no  no  no  no  no  yes no  no 
##  [341] no  no  no  no  no  no  no  no  no  no  no  no  no  yes no  no  yes
##  [358] no  no  no  no  no  no  no  no  no  no  no  no  no  no  no  no  no 
##  [375] no  yes no  no  no  no  no  no  no  no  no  no  no  no  yes yes no 
##  [392] no  no  no  no  no  no  no  no  no  no  no  no  yes no  no  yes no 
##  [409] no  no  yes no  yes no  no  no  no  no  yes no  yes no  no  no  no 
##  [426] yes no  no  no  no  no  no  no  no  no  yes no  no  no  no  no  no 
##  [443] no  no  no  no  no  no  no  no  no  no  no  no  no  no  yes no  no 
##  [460] no  no  no  no  no  no  no  no  no  yes no  no  no  no  no  no  no 
##  [477] yes yes yes yes no  no  no  no  no  no  no  no  no  no  no  no  no 
##  [494] no  no  no  no  no  no  no  yes no  no  no  yes no  no  no  no  no 
##  [511] no  no  no  yes no  no  no  no  no  no  no  no  no  no  yes no  yes
##  [528] yes no  no  yes no  yes no  no  no  no  no  no  no  no  no  no  no 
##  [545] no  no  yes yes no  no  no  no  yes no  no  no  no  no  no  no  no 
##  [562] no  no  no  no  no  yes no  yes no  yes yes no  yes no  yes no  no 
##  [579] no  no  yes no  no  yes no  no  no  no  no  no  no  no  no  no  no 
##  [596] no  yes no  no  no  no  yes no  no  no  no  yes no  no  no  no  no 
##  [613] no  no  no  no  no  no  no  no  no  no  yes no  yes no  no  no  no 
##  [630] no  no  no  no  no  yes no  no  no  yes no  yes no  no  no  no  no 
##  [647] no  no  no  yes no  no  no  no  no  no  no  no  no  no  no  no  yes
##  [664] no  yes no  no  no  no  no  no  no  no  no  no  no  yes no  no  yes
##  [681] no  no  yes no  no  no  no  no  no  yes no  no  no  no  no  no  no 
##  [698] no  no  no  no  no  no  no  no  no  no  no  no  no  no  no  yes yes
##  [715] no  no  no  no  yes no  no  yes no  no  no  no  yes no  no  yes no 
##  [732] yes no  no  no  no  no  no  no  yes yes no  no  no  no  no  no  no 
##  [749] no  no  yes no  no  no  no  no  no  no  no  no  yes no  no  no  no 
##  [766] no  no  no  no  no  no  no  no  no  no  yes no  no  no  no  no  no 
##  [783] no  no  no  no  no  no  no  yes no  no  yes no  no  no  no  yes no 
##  [800] no  yes no  no  no  no  no  no  no  no  no  yes no  no  no  no  no 
##  [817] no  no  no  no  no  no  no  no  no  no  yes no  yes no  no  no  yes
##  [834] yes no  yes no  no  no  no  no  no  no  yes no  no  no  no  no  no 
##  [851] no  no  yes no  no  no  no  no  no  yes yes yes no  no  yes no  yes
##  [868] no  no  no  no  no  no  no  no  no  no  no  yes no  no  no  no  no 
##  [885] yes no  yes no  no  no  no  no  no  no  no  no  no  no  no  no  no 
##  [902] no  no  no  no  yes yes no  no  no  yes no  no  yes no  yes no  no 
##  [919] no  no  no  no  no  no  no  no  no  no  no  no  no  no  no  no  no 
##  [936] no  no  no  no  yes no  no  no  no  no  yes no  yes no  no  no  no 
##  [953] no  no  no  no  no  no  no  no  no  no  no  no  no  no  no  no  no 
##  [970] no  yes no  no  no  no  no  yes no  no  no  yes no  no  no  no  no 
##  [987] no  no  no  no  yes no  no  no  no  no  no  no  no  no  no  no  no 
## [1004] no  no  no  yes no  no  yes no  no  no  no  yes no  no  no 
## Levels: no < yes
## 
## 
## Slot "cutoffs":
## [[1]]
##  [1]        Inf 1.00000000 0.95049505 0.87500000 0.87254902 0.85000000
##  [7] 0.83333333 0.73684211 0.16000000 0.11320755 0.10416667 0.10240964
## [13] 0.04046243 0.02701486 0.00000000
## 
## 
## Slot "fp":
## [[1]]
##  [1]   0   0   2   2   5   7   9  11  14  29  61 164 218 867 877
## 
## 
## Slot "tp":
## [[1]]
##  [1]   0  24  55  61  89  98 105 110 111 113 115 125 126 141 141
## 
## 
## Slot "tn":
## [[1]]
##  [1] 877 877 875 875 872 870 868 866 863 848 816 713 659  10   0
## 
## 
## Slot "fn":
## [[1]]
##  [1] 141 117  86  80  52  43  36  31  30  28  26  16  15   0   0
## 
## 
## Slot "n.pos":
## [[1]]
## [1] 141
## 
## 
## Slot "n.neg":
## [[1]]
## [1] 877
## 
## 
## Slot "n.pos.pred":
## [[1]]
##  [1]    0   24   57   63   94  105  114  121  125  142  176  289  344 1008
## [15] 1018
## 
## 
## Slot "n.neg.pred":
## [[1]]
##  [1] 1018  994  961  955  924  913  904  897  893  876  842  729  674   10
## [15]    0

perf.rocr<-performance(pred.rocr, measure ="auc", x.measure="cutoff")
perf.tpr.rocr<-performance(pred.rocr, measure="tpr",x.measure = "fpr")
plot(perf.tpr.rocr,main=paste("AUC:",(perf.rocr@y.values)))

model comparison

#rpart
library('rpart')
churn.rp<-rpart(churn ~., data=trainset)

#ctree
#install.packages("party")
library('party')
ctree.model = ctree(churn ~ . , data = trainset)

#C5.0
library(C50)
c50.model = C5.0(churn ~., data=trainset)

rp.predict.prob = predict(churn.rp, testset,type='prob')
c50.predict.prob = predict(c50.model,testset,type='prob')
ctree.predict.prob = sapply(predict(ctree.model ,testset,type='prob'),function(e){unlist(e)[1]})
rp.prediction = prediction(rp.predict.prob[,1],testset$churn)
c50.prediction = prediction(c50.predict.prob[,1],testset$churn)
ctree.prediction = prediction(ctree.predict.prob,testset$churn)
rp.performance = performance(rp.prediction, "tpr","fpr")
c50.performance = performance(c50.prediction, "tpr","fpr")
ctree.performance = performance(ctree.prediction, "tpr","fpr")
plot(rp.performance,col='red')
plot(c50.performance, add=T,col='green')
plot(ctree.performance, add=T,col='blue')

rp.per.obj= performance(rp.prediction, measure = 'auc')
rp.per.obj

## An object of class "performance"
## Slot "x.name":
## [1] "None"
## 
## Slot "y.name":
## [1] "Area under the ROC curve"
## 
## Slot "alpha.name":
## [1] "none"
## 
## Slot "x.values":
## list()
## 
## Slot "y.values":
## [[1]]
## [1] 0.9090751
## 
## 
## Slot "alpha.values":
## list()

c50.per.obj = performance(c50.prediction, measure = 'auc')
c50.per.obj

## An object of class "performance"
## Slot "x.name":
## [1] "None"
## 
## Slot "y.name":
## [1] "Area under the ROC curve"
## 
## Slot "alpha.name":
## [1] "none"
## 
## Slot "x.values":
## list()
## 
## Slot "y.values":
## [[1]]
## [1] 0.8849438
## 
## 
## Slot "alpha.values":
## list()

ctree.per.obj = performance(ctree.prediction, measure = 'auc')
ctree.per.obj

## An object of class "performance"
## Slot "x.name":
## [1] "None"
## 
## Slot "y.name":
## [1] "Area under the ROC curve"
## 
## Slot "alpha.name":
## [1] "none"
## 
## Slot "x.values":
## list()
## 
## Slot "y.values":
## [[1]]
## [1] 0.9106197
## 
## 
## Slot "alpha.values":
## list()

R_basic4

York Lin

2018年11月6日

Learning map

Classification

Decision Tree - using churn data in C50 package

rpart

cost complexity

Prune

use churnTest data

ctree

C5.0

use caret package

caret 套件使用說明

find importance variable

ROC

model comparison