Classification
Decision Tree - using churn data in C50 package
#install.packages("C50")
library(C50)
data(churn)
str(churnTrain)
## 'data.frame': 3333 obs. of 20 variables:
## $ state : Factor w/ 51 levels "AK","AL","AR",..: 17 36 32 36 37 2 20 25 19 50 ...
## $ account_length : int 128 107 137 84 75 118 121 147 117 141 ...
## $ area_code : Factor w/ 3 levels "area_code_408",..: 2 2 2 1 2 3 3 2 1 2 ...
## $ international_plan : Factor w/ 2 levels "no","yes": 1 1 1 2 2 2 1 2 1 2 ...
## $ voice_mail_plan : Factor w/ 2 levels "no","yes": 2 2 1 1 1 1 2 1 1 2 ...
## $ number_vmail_messages : int 25 26 0 0 0 0 24 0 0 37 ...
## $ total_day_minutes : num 265 162 243 299 167 ...
## $ total_day_calls : int 110 123 114 71 113 98 88 79 97 84 ...
## $ total_day_charge : num 45.1 27.5 41.4 50.9 28.3 ...
## $ total_eve_minutes : num 197.4 195.5 121.2 61.9 148.3 ...
## $ total_eve_calls : int 99 103 110 88 122 101 108 94 80 111 ...
## $ total_eve_charge : num 16.78 16.62 10.3 5.26 12.61 ...
## $ total_night_minutes : num 245 254 163 197 187 ...
## $ total_night_calls : int 91 103 104 89 121 118 118 96 90 97 ...
## $ total_night_charge : num 11.01 11.45 7.32 8.86 8.41 ...
## $ total_intl_minutes : num 10 13.7 12.2 6.6 10.1 6.3 7.5 7.1 8.7 11.2 ...
## $ total_intl_calls : int 3 3 5 7 3 6 7 6 4 5 ...
## $ total_intl_charge : num 2.7 3.7 3.29 1.78 2.73 1.7 2.03 1.92 2.35 3.02 ...
## $ number_customer_service_calls: int 1 1 0 2 3 0 3 0 1 0 ...
## $ churn : Factor w/ 2 levels "yes","no": 2 2 2 2 2 2 2 2 2 2 ...
names(churnTrain) %in% c("state", "area_code", "account_length")
## [1] TRUE TRUE TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [12] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
!names(churnTrain) %in% c("state", "area_code", "account_length")
## [1] FALSE FALSE FALSE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [12] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
#選擇建模變數
variable.list = !names(churnTrain) %in% c('state','area_code','account_length')
churnTrain=churnTrain[,variable.list]
churnTest=churnTest[,variable.list]
str(churnTrain)
## 'data.frame': 3333 obs. of 17 variables:
## $ international_plan : Factor w/ 2 levels "no","yes": 1 1 1 2 2 2 1 2 1 2 ...
## $ voice_mail_plan : Factor w/ 2 levels "no","yes": 2 2 1 1 1 1 2 1 1 2 ...
## $ number_vmail_messages : int 25 26 0 0 0 0 24 0 0 37 ...
## $ total_day_minutes : num 265 162 243 299 167 ...
## $ total_day_calls : int 110 123 114 71 113 98 88 79 97 84 ...
## $ total_day_charge : num 45.1 27.5 41.4 50.9 28.3 ...
## $ total_eve_minutes : num 197.4 195.5 121.2 61.9 148.3 ...
## $ total_eve_calls : int 99 103 110 88 122 101 108 94 80 111 ...
## $ total_eve_charge : num 16.78 16.62 10.3 5.26 12.61 ...
## $ total_night_minutes : num 245 254 163 197 187 ...
## $ total_night_calls : int 91 103 104 89 121 118 118 96 90 97 ...
## $ total_night_charge : num 11.01 11.45 7.32 8.86 8.41 ...
## $ total_intl_minutes : num 10 13.7 12.2 6.6 10.1 6.3 7.5 7.1 8.7 11.2 ...
## $ total_intl_calls : int 3 3 5 7 3 6 7 6 4 5 ...
## $ total_intl_charge : num 2.7 3.7 3.29 1.78 2.73 1.7 2.03 1.92 2.35 3.02 ...
## $ number_customer_service_calls: int 1 1 0 2 3 0 3 0 1 0 ...
## $ churn : Factor w/ 2 levels "yes","no": 2 2 2 2 2 2 2 2 2 2 ...
#sample
?sample
## Help on topic 'sample' was found in the following packages:
##
## Package Library
## dplyr /Library/Frameworks/R.framework/Versions/3.5/Resources/library
## base /Library/Frameworks/R.framework/Resources/library
##
##
## Using the first match ...
sample(1:10)
## [1] 8 4 2 10 1 5 7 9 6 3
sample(1:10, size = 5)
## [1] 2 8 5 1 7
sample(c(0,1), size= 10, replace = T)
## [1] 0 1 0 0 1 1 1 1 0 1
sample.int(20, 12) # 兩個參數都要放整數,此例為取1:20中的12個不重複樣本
## [1] 16 6 11 10 14 20 13 7 12 9 1 18
set.seed(2)
#把資料分成training data 和 testing data
ind<-sample(1:2, size=nrow(churnTrain), replace=T, prob=c(0.7, 0.3))
trainset=churnTrain[ind==1,]
testset=churnTrain[ind==2,]
rpart
#install.packages('rpart')
library('rpart')
#使用rpart(CART)建立決策樹模型
?rpart
con = rpart.control(minsplit=20,cp=0.01)
?rpart.control
churn.rp<-rpart(churn ~., data=trainset,control = con)
#churn.rp<-rpart(churn ~ total_day_charge + international_plan, data=trainset)
churn.rp
## n= 2315
##
## node), split, n, loss, yval, (yprob)
## * denotes terminal node
##
## 1) root 2315 342 no (0.14773218 0.85226782)
## 2) total_day_minutes>=265.45 144 59 yes (0.59027778 0.40972222)
## 4) voice_mail_plan=no 110 29 yes (0.73636364 0.26363636)
## 8) total_eve_minutes>=188.5 67 3 yes (0.95522388 0.04477612) *
## 9) total_eve_minutes< 188.5 43 17 no (0.39534884 0.60465116)
## 18) total_day_minutes>=282.7 19 6 yes (0.68421053 0.31578947) *
## 19) total_day_minutes< 282.7 24 4 no (0.16666667 0.83333333) *
## 5) voice_mail_plan=yes 34 4 no (0.11764706 0.88235294) *
## 3) total_day_minutes< 265.45 2171 257 no (0.11837863 0.88162137)
## 6) number_customer_service_calls>=3.5 168 82 yes (0.51190476 0.48809524)
## 12) total_day_minutes< 160.2 71 10 yes (0.85915493 0.14084507) *
## 13) total_day_minutes>=160.2 97 25 no (0.25773196 0.74226804)
## 26) total_eve_minutes< 155.5 20 7 yes (0.65000000 0.35000000) *
## 27) total_eve_minutes>=155.5 77 12 no (0.15584416 0.84415584) *
## 7) number_customer_service_calls< 3.5 2003 171 no (0.08537194 0.91462806)
## 14) international_plan=yes 188 76 no (0.40425532 0.59574468)
## 28) total_intl_calls< 2.5 38 0 yes (1.00000000 0.00000000) *
## 29) total_intl_calls>=2.5 150 38 no (0.25333333 0.74666667)
## 58) total_intl_minutes>=13.1 32 0 yes (1.00000000 0.00000000) *
## 59) total_intl_minutes< 13.1 118 6 no (0.05084746 0.94915254) *
## 15) international_plan=no 1815 95 no (0.05234160 0.94765840)
## 30) total_day_minutes>=224.15 251 50 no (0.19920319 0.80079681)
## 60) total_eve_minutes>=259.8 36 10 yes (0.72222222 0.27777778) *
## 61) total_eve_minutes< 259.8 215 24 no (0.11162791 0.88837209) *
## 31) total_day_minutes< 224.15 1564 45 no (0.02877238 0.97122762) *
s = summary(churn.rp)
## Call:
## rpart(formula = churn ~ ., data = trainset, control = con)
## n= 2315
##
## CP nsplit rel error xerror xstd
## 1 0.07602339 0 1.0000000 1.0000000 0.04992005
## 2 0.07456140 2 0.8479532 0.9970760 0.04985964
## 3 0.05555556 4 0.6988304 0.7602339 0.04442127
## 4 0.02631579 7 0.4941520 0.5263158 0.03767329
## 5 0.02339181 8 0.4678363 0.5204678 0.03748096
## 6 0.02046784 10 0.4210526 0.5087719 0.03709209
## 7 0.01754386 11 0.4005848 0.4707602 0.03578773
## 8 0.01000000 12 0.3830409 0.4766082 0.03599261
##
## Variable importance
## total_day_minutes total_day_charge
## 18 18
## number_customer_service_calls total_intl_minutes
## 10 8
## total_intl_charge total_eve_charge
## 8 8
## total_eve_minutes international_plan
## 8 7
## total_intl_calls number_vmail_messages
## 6 3
## voice_mail_plan total_night_calls
## 3 1
## total_eve_calls
## 1
##
## Node number 1: 2315 observations, complexity param=0.07602339
## predicted class=no expected loss=0.1477322 P(node) =1
## class counts: 342 1973
## probabilities: 0.148 0.852
## left son=2 (144 obs) right son=3 (2171 obs)
## Primary splits:
## total_day_minutes < 265.45 to the right, improve=60.145020, (0 missing)
## total_day_charge < 45.125 to the right, improve=60.145020, (0 missing)
## number_customer_service_calls < 3.5 to the right, improve=53.641430, (0 missing)
## international_plan splits as RL, improve=43.729370, (0 missing)
## voice_mail_plan splits as LR, improve= 6.089388, (0 missing)
## Surrogate splits:
## total_day_charge < 45.125 to the right, agree=1, adj=1, (0 split)
##
## Node number 2: 144 observations, complexity param=0.07602339
## predicted class=yes expected loss=0.4097222 P(node) =0.06220302
## class counts: 85 59
## probabilities: 0.590 0.410
## left son=4 (110 obs) right son=5 (34 obs)
## Primary splits:
## voice_mail_plan splits as LR, improve=19.884860, (0 missing)
## number_vmail_messages < 9.5 to the left, improve=19.884860, (0 missing)
## total_eve_minutes < 167.05 to the right, improve=14.540020, (0 missing)
## total_eve_charge < 14.2 to the right, improve=14.540020, (0 missing)
## total_day_minutes < 283.9 to the right, improve= 6.339827, (0 missing)
## Surrogate splits:
## number_vmail_messages < 9.5 to the left, agree=1.000, adj=1.000, (0 split)
## total_night_minutes < 110.3 to the right, agree=0.785, adj=0.088, (0 split)
## total_night_charge < 4.965 to the right, agree=0.785, adj=0.088, (0 split)
## total_night_calls < 50 to the right, agree=0.778, adj=0.059, (0 split)
## total_intl_minutes < 15.3 to the left, agree=0.771, adj=0.029, (0 split)
##
## Node number 3: 2171 observations, complexity param=0.0745614
## predicted class=no expected loss=0.1183786 P(node) =0.937797
## class counts: 257 1914
## probabilities: 0.118 0.882
## left son=6 (168 obs) right son=7 (2003 obs)
## Primary splits:
## number_customer_service_calls < 3.5 to the right, improve=56.398210, (0 missing)
## international_plan splits as RL, improve=43.059160, (0 missing)
## total_day_minutes < 224.15 to the right, improve=10.847440, (0 missing)
## total_day_charge < 38.105 to the right, improve=10.847440, (0 missing)
## total_intl_minutes < 13.15 to the right, improve= 6.347319, (0 missing)
##
## Node number 4: 110 observations, complexity param=0.02631579
## predicted class=yes expected loss=0.2636364 P(node) =0.0475162
## class counts: 81 29
## probabilities: 0.736 0.264
## left son=8 (67 obs) right son=9 (43 obs)
## Primary splits:
## total_eve_minutes < 188.5 to the right, improve=16.419610, (0 missing)
## total_eve_charge < 16.025 to the right, improve=16.419610, (0 missing)
## total_night_minutes < 206.85 to the right, improve= 5.350500, (0 missing)
## total_night_charge < 9.305 to the right, improve= 5.350500, (0 missing)
## total_day_minutes < 281.15 to the right, improve= 5.254545, (0 missing)
## Surrogate splits:
## total_eve_charge < 16.025 to the right, agree=1.000, adj=1.000, (0 split)
## total_night_calls < 82 to the right, agree=0.655, adj=0.116, (0 split)
## total_intl_minutes < 3.35 to the right, agree=0.636, adj=0.070, (0 split)
## total_intl_charge < 0.905 to the right, agree=0.636, adj=0.070, (0 split)
## total_day_minutes < 268.55 to the right, agree=0.627, adj=0.047, (0 split)
##
## Node number 5: 34 observations
## predicted class=no expected loss=0.1176471 P(node) =0.01468683
## class counts: 4 30
## probabilities: 0.118 0.882
##
## Node number 6: 168 observations, complexity param=0.0745614
## predicted class=yes expected loss=0.4880952 P(node) =0.07257019
## class counts: 86 82
## probabilities: 0.512 0.488
## left son=12 (71 obs) right son=13 (97 obs)
## Primary splits:
## total_day_minutes < 160.2 to the left, improve=29.655880, (0 missing)
## total_day_charge < 27.235 to the left, improve=29.655880, (0 missing)
## total_eve_minutes < 180.65 to the left, improve= 8.556953, (0 missing)
## total_eve_charge < 15.355 to the left, improve= 8.556953, (0 missing)
## number_customer_service_calls < 4.5 to the right, improve= 5.975362, (0 missing)
## Surrogate splits:
## total_day_charge < 27.235 to the left, agree=1.000, adj=1.000, (0 split)
## total_night_calls < 79 to the left, agree=0.625, adj=0.113, (0 split)
## total_intl_calls < 2.5 to the left, agree=0.619, adj=0.099, (0 split)
## number_customer_service_calls < 4.5 to the right, agree=0.607, adj=0.070, (0 split)
## total_eve_calls < 89.5 to the left, agree=0.601, adj=0.056, (0 split)
##
## Node number 7: 2003 observations, complexity param=0.05555556
## predicted class=no expected loss=0.08537194 P(node) =0.8652268
## class counts: 171 1832
## probabilities: 0.085 0.915
## left son=14 (188 obs) right son=15 (1815 obs)
## Primary splits:
## international_plan splits as RL, improve=42.194510, (0 missing)
## total_day_minutes < 224.15 to the right, improve=16.838410, (0 missing)
## total_day_charge < 38.105 to the right, improve=16.838410, (0 missing)
## total_intl_minutes < 13.15 to the right, improve= 6.210678, (0 missing)
## total_intl_charge < 3.55 to the right, improve= 6.210678, (0 missing)
##
## Node number 8: 67 observations
## predicted class=yes expected loss=0.04477612 P(node) =0.02894168
## class counts: 64 3
## probabilities: 0.955 0.045
##
## Node number 9: 43 observations, complexity param=0.02046784
## predicted class=no expected loss=0.3953488 P(node) =0.01857451
## class counts: 17 26
## probabilities: 0.395 0.605
## left son=18 (19 obs) right son=19 (24 obs)
## Primary splits:
## total_day_minutes < 282.7 to the right, improve=5.680947, (0 missing)
## total_day_charge < 48.06 to the right, improve=5.680947, (0 missing)
## total_night_minutes < 212.65 to the right, improve=4.558140, (0 missing)
## total_night_charge < 9.57 to the right, improve=4.558140, (0 missing)
## total_eve_minutes < 145.4 to the right, improve=4.356169, (0 missing)
## Surrogate splits:
## total_day_charge < 48.06 to the right, agree=1.000, adj=1.000, (0 split)
## total_day_calls < 103 to the left, agree=0.674, adj=0.263, (0 split)
## total_eve_calls < 104.5 to the left, agree=0.674, adj=0.263, (0 split)
## total_intl_minutes < 11.55 to the left, agree=0.651, adj=0.211, (0 split)
## total_intl_charge < 3.12 to the left, agree=0.651, adj=0.211, (0 split)
##
## Node number 12: 71 observations
## predicted class=yes expected loss=0.1408451 P(node) =0.03066955
## class counts: 61 10
## probabilities: 0.859 0.141
##
## Node number 13: 97 observations, complexity param=0.01754386
## predicted class=no expected loss=0.257732 P(node) =0.04190065
## class counts: 25 72
## probabilities: 0.258 0.742
## left son=26 (20 obs) right son=27 (77 obs)
## Primary splits:
## total_eve_minutes < 155.5 to the left, improve=7.753662, (0 missing)
## total_eve_charge < 13.22 to the left, improve=7.753662, (0 missing)
## total_intl_minutes < 13.55 to the right, improve=2.366149, (0 missing)
## total_intl_charge < 3.66 to the right, improve=2.366149, (0 missing)
## number_customer_service_calls < 4.5 to the right, improve=2.297667, (0 missing)
## Surrogate splits:
## total_eve_charge < 13.22 to the left, agree=1.000, adj=1.00, (0 split)
## total_night_calls < 143.5 to the right, agree=0.814, adj=0.10, (0 split)
## total_eve_calls < 62 to the left, agree=0.804, adj=0.05, (0 split)
##
## Node number 14: 188 observations, complexity param=0.05555556
## predicted class=no expected loss=0.4042553 P(node) =0.0812095
## class counts: 76 112
## probabilities: 0.404 0.596
## left son=28 (38 obs) right son=29 (150 obs)
## Primary splits:
## total_intl_calls < 2.5 to the left, improve=33.806520, (0 missing)
## total_intl_minutes < 13.1 to the right, improve=30.527050, (0 missing)
## total_intl_charge < 3.535 to the right, improve=30.527050, (0 missing)
## total_day_minutes < 221.95 to the right, improve= 3.386095, (0 missing)
## total_day_charge < 37.735 to the right, improve= 3.386095, (0 missing)
##
## Node number 15: 1815 observations, complexity param=0.02339181
## predicted class=no expected loss=0.0523416 P(node) =0.7840173
## class counts: 95 1720
## probabilities: 0.052 0.948
## left son=30 (251 obs) right son=31 (1564 obs)
## Primary splits:
## total_day_minutes < 224.15 to the right, improve=12.5649300, (0 missing)
## total_day_charge < 38.105 to the right, improve=12.5649300, (0 missing)
## total_eve_minutes < 244.95 to the right, improve= 4.7875890, (0 missing)
## total_eve_charge < 20.825 to the right, improve= 4.7875890, (0 missing)
## total_night_minutes < 163.85 to the right, improve= 0.9074391, (0 missing)
## Surrogate splits:
## total_day_charge < 38.105 to the right, agree=1, adj=1, (0 split)
##
## Node number 18: 19 observations
## predicted class=yes expected loss=0.3157895 P(node) =0.008207343
## class counts: 13 6
## probabilities: 0.684 0.316
##
## Node number 19: 24 observations
## predicted class=no expected loss=0.1666667 P(node) =0.01036717
## class counts: 4 20
## probabilities: 0.167 0.833
##
## Node number 26: 20 observations
## predicted class=yes expected loss=0.35 P(node) =0.008639309
## class counts: 13 7
## probabilities: 0.650 0.350
##
## Node number 27: 77 observations
## predicted class=no expected loss=0.1558442 P(node) =0.03326134
## class counts: 12 65
## probabilities: 0.156 0.844
##
## Node number 28: 38 observations
## predicted class=yes expected loss=0 P(node) =0.01641469
## class counts: 38 0
## probabilities: 1.000 0.000
##
## Node number 29: 150 observations, complexity param=0.05555556
## predicted class=no expected loss=0.2533333 P(node) =0.06479482
## class counts: 38 112
## probabilities: 0.253 0.747
## left son=58 (32 obs) right son=59 (118 obs)
## Primary splits:
## total_intl_minutes < 13.1 to the right, improve=45.356840, (0 missing)
## total_intl_charge < 3.535 to the right, improve=45.356840, (0 missing)
## total_day_calls < 95.5 to the left, improve= 4.036407, (0 missing)
## total_day_minutes < 237.75 to the right, improve= 1.879020, (0 missing)
## total_day_charge < 40.42 to the right, improve= 1.879020, (0 missing)
## Surrogate splits:
## total_intl_charge < 3.535 to the right, agree=1.0, adj=1.000, (0 split)
## total_day_minutes < 52.45 to the left, agree=0.8, adj=0.063, (0 split)
## total_day_charge < 8.92 to the left, agree=0.8, adj=0.063, (0 split)
##
## Node number 30: 251 observations, complexity param=0.02339181
## predicted class=no expected loss=0.1992032 P(node) =0.1084233
## class counts: 50 201
## probabilities: 0.199 0.801
## left son=60 (36 obs) right son=61 (215 obs)
## Primary splits:
## total_eve_minutes < 259.8 to the right, improve=22.993380, (0 missing)
## total_eve_charge < 22.08 to the right, improve=22.993380, (0 missing)
## voice_mail_plan splits as LR, improve= 4.745664, (0 missing)
## number_vmail_messages < 7.5 to the left, improve= 4.745664, (0 missing)
## total_night_minutes < 181.15 to the right, improve= 3.509731, (0 missing)
## Surrogate splits:
## total_eve_charge < 22.08 to the right, agree=1, adj=1, (0 split)
##
## Node number 31: 1564 observations
## predicted class=no expected loss=0.02877238 P(node) =0.675594
## class counts: 45 1519
## probabilities: 0.029 0.971
##
## Node number 58: 32 observations
## predicted class=yes expected loss=0 P(node) =0.01382289
## class counts: 32 0
## probabilities: 1.000 0.000
##
## Node number 59: 118 observations
## predicted class=no expected loss=0.05084746 P(node) =0.05097192
## class counts: 6 112
## probabilities: 0.051 0.949
##
## Node number 60: 36 observations
## predicted class=yes expected loss=0.2777778 P(node) =0.01555076
## class counts: 26 10
## probabilities: 0.722 0.278
##
## Node number 61: 215 observations
## predicted class=no expected loss=0.1116279 P(node) =0.09287257
## class counts: 24 191
## probabilities: 0.112 0.888
s$cptable
## CP nsplit rel error xerror xstd
## 1 0.07602339 0 1.0000000 1.0000000 0.04992005
## 2 0.07456140 2 0.8479532 0.9970760 0.04985964
## 3 0.05555556 4 0.6988304 0.7602339 0.04442127
## 4 0.02631579 7 0.4941520 0.5263158 0.03767329
## 5 0.02339181 8 0.4678363 0.5204678 0.03748096
## 6 0.02046784 10 0.4210526 0.5087719 0.03709209
## 7 0.01754386 11 0.4005848 0.4707602 0.03578773
## 8 0.01000000 12 0.3830409 0.4766082 0.03599261
#畫出決策樹
par(mfrow=c(1,1))
?plot.rpart
plot(churn.rp, uniform=TRUE,branch = 0.6, margin=0.1)
text(churn.rp, all=TRUE, use.n=TRUE, cex=0.7)

library('rpart.plot')
rpart.plot(churn.rp)

Prune
printcp(churn.rp)
##
## Classification tree:
## rpart(formula = churn ~ ., data = trainset, control = con)
##
## Variables actually used in tree construction:
## [1] international_plan number_customer_service_calls
## [3] total_day_minutes total_eve_minutes
## [5] total_intl_calls total_intl_minutes
## [7] voice_mail_plan
##
## Root node error: 342/2315 = 0.14773
##
## n= 2315
##
## CP nsplit rel error xerror xstd
## 1 0.076023 0 1.00000 1.00000 0.049920
## 2 0.074561 2 0.84795 0.99708 0.049860
## 3 0.055556 4 0.69883 0.76023 0.044421
## 4 0.026316 7 0.49415 0.52632 0.037673
## 5 0.023392 8 0.46784 0.52047 0.037481
## 6 0.020468 10 0.42105 0.50877 0.037092
## 7 0.017544 11 0.40058 0.47076 0.035788
## 8 0.010000 12 0.38304 0.47661 0.035993
plotcp(churn.rp)

#找出minimum cross-validation errors
min_row = which.min(churn.rp$cptable[,"xerror"])
churn.cp = churn.rp$cptable[min_row, "CP"]
#將churn.cp設為臨界值來修剪樹
prune.tree=prune(churn.rp, cp=churn.cp)
plot(prune.tree, uniform=TRUE,branch = 0.6, margin=0.1)
text(prune.tree, all=TRUE, use.n=TRUE, cex=0.7)

test_tree = prune(churn.rp,cp=0.06)
plot(test_tree, margin=0.1)
text(test_tree, all=TRUE, use.n=TRUE, cex=0.7)

predictions <-predict(prune.tree, testset, type='class')
table(predictions,testset$churn)
##
## predictions yes no
## yes 95 14
## no 46 863
#install.packages('caret')
#install.packages('e1071')
library('caret')
## Loading required package: lattice
## Loading required package: ggplot2
library('e1071')
confusionMatrix(table(predictions, testset$churn))
## Confusion Matrix and Statistics
##
##
## predictions yes no
## yes 95 14
## no 46 863
##
## Accuracy : 0.9411
## 95% CI : (0.9248, 0.9547)
## No Information Rate : 0.8615
## P-Value [Acc > NIR] : 2.786e-16
##
## Kappa : 0.727
## Mcnemar's Test P-Value : 6.279e-05
##
## Sensitivity : 0.67376
## Specificity : 0.98404
## Pos Pred Value : 0.87156
## Neg Pred Value : 0.94939
## Prevalence : 0.13851
## Detection Rate : 0.09332
## Detection Prevalence : 0.10707
## Balanced Accuracy : 0.82890
##
## 'Positive' Class : yes
##
?confusionMatrix
## Help on topic 'confusionMatrix' was found in the following
## packages:
##
## Package Library
## caret /Library/Frameworks/R.framework/Versions/3.5/Resources/library
## ModelMetrics /Library/Frameworks/R.framework/Versions/3.5/Resources/library
##
##
## Using the first match ...
use caret package
#install.packages("caret")
library(caret)
control=trainControl(method="repeatedcv", number=10, repeats=3)
model =train(churn~., data=churnTrain, method="rpart", trControl=control)
predictions = predict(model,churnTest)
table(predictions,churnTest$churn)
##
## predictions yes no
## yes 60 24
## no 164 1419
confusionMatrix(table(predictions,churnTest$churn))
## Confusion Matrix and Statistics
##
##
## predictions yes no
## yes 60 24
## no 164 1419
##
## Accuracy : 0.8872
## 95% CI : (0.8711, 0.902)
## No Information Rate : 0.8656
## P-Value [Acc > NIR] : 0.00464
##
## Kappa : 0.3413
## Mcnemar's Test P-Value : < 2e-16
##
## Sensitivity : 0.26786
## Specificity : 0.98337
## Pos Pred Value : 0.71429
## Neg Pred Value : 0.89640
## Prevalence : 0.13437
## Detection Rate : 0.03599
## Detection Prevalence : 0.05039
## Balanced Accuracy : 0.62561
##
## 'Positive' Class : yes
##
control=trainControl(method="repeatedcv", number=10, repeats=3,summaryFunction = prSummary,classProbs=T)
model =train(churn~., data=churnTrain, method="rpart", trControl=control)
## Warning in train.default(x, y, weights = w, ...): The metric "Accuracy" was
## not in the result set. AUC will be used instead.
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info =
## trainInfo, : There were missing values in resampled performance measures.
tune_funs = expand.grid(cp=seq(0,0.1,0.01))
model =train(churn~., data=churnTrain, method="rpart", trControl=control,tuneGrid=tune_funs)
## Warning in train.default(x, y, weights = w, ...): The metric "Accuracy" was
## not in the result set. AUC will be used instead.
## Warning in train.default(x, y, weights = w, ...): There were missing values
## in resampled performance measures.
model
## CART
##
## 3333 samples
## 16 predictor
## 2 classes: 'yes', 'no'
##
## No pre-processing
## Resampling: Cross-Validated (10 fold, repeated 3 times)
## Summary of sample sizes: 2999, 3000, 2999, 3000, 2999, 3000, ...
## Resampling results across tuning parameters:
##
## cp AUC Precision Recall F
## 0.00 0.55518791 0.8660239 0.72465986 0.7876608
## 0.01 0.60792747 0.8674528 0.69494048 0.7702717
## 0.02 0.56696210 0.8283395 0.62057823 0.7063709
## 0.03 0.56261800 0.8350761 0.62542517 0.7134099
## 0.04 0.56261800 0.8350761 0.62542517 0.7134099
## 0.05 0.55430418 0.8306712 0.60667517 0.6988897
## 0.06 0.41535571 0.7775450 0.42042234 0.5419381
## 0.07 0.41430786 0.7697129 0.42111678 0.5396070
## 0.08 0.33324352 0.7122440 0.29803005 0.4187069
## 0.09 0.11029715 0.5151736 0.09322562 0.3007531
## 0.10 0.01418902 0.4017857 0.01458333 0.2828947
##
## AUC was used to select the optimal model using the largest value.
## The final value used for the model was cp = 0.01.
predictions = predict(model, churnTest)
confusionMatrix(table(predictions,churnTest$churn))
## Confusion Matrix and Statistics
##
##
## predictions yes no
## yes 145 15
## no 79 1428
##
## Accuracy : 0.9436
## 95% CI : (0.9314, 0.9542)
## No Information Rate : 0.8656
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.7243
## Mcnemar's Test P-Value : 8.142e-11
##
## Sensitivity : 0.64732
## Specificity : 0.98960
## Pos Pred Value : 0.90625
## Neg Pred Value : 0.94758
## Prevalence : 0.13437
## Detection Rate : 0.08698
## Detection Prevalence : 0.09598
## Balanced Accuracy : 0.81846
##
## 'Positive' Class : yes
##
caret 套件使用說明
# 查詢caret package 有實作的所有演算法
names(getModelInfo())
## [1] "ada" "AdaBag" "AdaBoost.M1"
## [4] "adaboost" "amdai" "ANFIS"
## [7] "avNNet" "awnb" "awtan"
## [10] "bag" "bagEarth" "bagEarthGCV"
## [13] "bagFDA" "bagFDAGCV" "bam"
## [16] "bartMachine" "bayesglm" "binda"
## [19] "blackboost" "blasso" "blassoAveraged"
## [22] "bridge" "brnn" "BstLm"
## [25] "bstSm" "bstTree" "C5.0"
## [28] "C5.0Cost" "C5.0Rules" "C5.0Tree"
## [31] "cforest" "chaid" "CSimca"
## [34] "ctree" "ctree2" "cubist"
## [37] "dda" "deepboost" "DENFIS"
## [40] "dnn" "dwdLinear" "dwdPoly"
## [43] "dwdRadial" "earth" "elm"
## [46] "enet" "evtree" "extraTrees"
## [49] "fda" "FH.GBML" "FIR.DM"
## [52] "foba" "FRBCS.CHI" "FRBCS.W"
## [55] "FS.HGD" "gam" "gamboost"
## [58] "gamLoess" "gamSpline" "gaussprLinear"
## [61] "gaussprPoly" "gaussprRadial" "gbm_h2o"
## [64] "gbm" "gcvEarth" "GFS.FR.MOGUL"
## [67] "GFS.LT.RS" "GFS.THRIFT" "glm.nb"
## [70] "glm" "glmboost" "glmnet_h2o"
## [73] "glmnet" "glmStepAIC" "gpls"
## [76] "hda" "hdda" "hdrda"
## [79] "HYFIS" "icr" "J48"
## [82] "JRip" "kernelpls" "kknn"
## [85] "knn" "krlsPoly" "krlsRadial"
## [88] "lars" "lars2" "lasso"
## [91] "lda" "lda2" "leapBackward"
## [94] "leapForward" "leapSeq" "Linda"
## [97] "lm" "lmStepAIC" "LMT"
## [100] "loclda" "logicBag" "LogitBoost"
## [103] "logreg" "lssvmLinear" "lssvmPoly"
## [106] "lssvmRadial" "lvq" "M5"
## [109] "M5Rules" "manb" "mda"
## [112] "Mlda" "mlp" "mlpKerasDecay"
## [115] "mlpKerasDecayCost" "mlpKerasDropout" "mlpKerasDropoutCost"
## [118] "mlpML" "mlpSGD" "mlpWeightDecay"
## [121] "mlpWeightDecayML" "monmlp" "msaenet"
## [124] "multinom" "mxnet" "mxnetAdam"
## [127] "naive_bayes" "nb" "nbDiscrete"
## [130] "nbSearch" "neuralnet" "nnet"
## [133] "nnls" "nodeHarvest" "null"
## [136] "OneR" "ordinalNet" "ORFlog"
## [139] "ORFpls" "ORFridge" "ORFsvm"
## [142] "ownn" "pam" "parRF"
## [145] "PART" "partDSA" "pcaNNet"
## [148] "pcr" "pda" "pda2"
## [151] "penalized" "PenalizedLDA" "plr"
## [154] "pls" "plsRglm" "polr"
## [157] "ppr" "PRIM" "protoclass"
## [160] "qda" "QdaCov" "qrf"
## [163] "qrnn" "randomGLM" "ranger"
## [166] "rbf" "rbfDDA" "Rborist"
## [169] "rda" "regLogistic" "relaxo"
## [172] "rf" "rFerns" "RFlda"
## [175] "rfRules" "ridge" "rlda"
## [178] "rlm" "rmda" "rocc"
## [181] "rotationForest" "rotationForestCp" "rpart"
## [184] "rpart1SE" "rpart2" "rpartCost"
## [187] "rpartScore" "rqlasso" "rqnc"
## [190] "RRF" "RRFglobal" "rrlda"
## [193] "RSimca" "rvmLinear" "rvmPoly"
## [196] "rvmRadial" "SBC" "sda"
## [199] "sdwd" "simpls" "SLAVE"
## [202] "slda" "smda" "snn"
## [205] "sparseLDA" "spikeslab" "spls"
## [208] "stepLDA" "stepQDA" "superpc"
## [211] "svmBoundrangeString" "svmExpoString" "svmLinear"
## [214] "svmLinear2" "svmLinear3" "svmLinearWeights"
## [217] "svmLinearWeights2" "svmPoly" "svmRadial"
## [220] "svmRadialCost" "svmRadialSigma" "svmRadialWeights"
## [223] "svmSpectrumString" "tan" "tanSearch"
## [226] "treebag" "vbmpRadial" "vglmAdjCat"
## [229] "vglmContRatio" "vglmCumulative" "widekernelpls"
## [232] "WM" "wsrf" "xgbDART"
## [235] "xgbLinear" "xgbTree" "xyf"
# 查詢caret package 有沒有實作rpart演算法
names(getModelInfo())[grep('rpart',names(getModelInfo()))]
## [1] "rpart" "rpart1SE" "rpart2" "rpartCost" "rpartScore"
# 查詢rpart model資訊
getModelInfo('rpart')
## $rpart
## $rpart$label
## [1] "CART"
##
## $rpart$library
## [1] "rpart"
##
## $rpart$type
## [1] "Regression" "Classification"
##
## $rpart$parameters
## parameter class label
## 1 cp numeric Complexity Parameter
##
## $rpart$grid
## function (x, y, len = NULL, search = "grid")
## {
## dat <- if (is.data.frame(x))
## x
## else as.data.frame(x)
## dat$.outcome <- y
## initialFit <- rpart::rpart(.outcome ~ ., data = dat, control = rpart::rpart.control(cp = 0))$cptable
## initialFit <- initialFit[order(-initialFit[, "CP"]), , drop = FALSE]
## if (search == "grid") {
## if (nrow(initialFit) < len) {
## tuneSeq <- data.frame(cp = seq(min(initialFit[, "CP"]),
## max(initialFit[, "CP"]), length = len))
## }
## else tuneSeq <- data.frame(cp = initialFit[1:len, "CP"])
## colnames(tuneSeq) <- "cp"
## }
## else {
## tuneSeq <- data.frame(cp = unique(sample(initialFit[,
## "CP"], size = len, replace = TRUE)))
## }
## tuneSeq
## }
##
## $rpart$loop
## function (grid)
## {
## grid <- grid[order(grid$cp, decreasing = FALSE), , drop = FALSE]
## loop <- grid[1, , drop = FALSE]
## submodels <- list(grid[-1, , drop = FALSE])
## list(loop = loop, submodels = submodels)
## }
##
## $rpart$fit
## function (x, y, wts, param, lev, last, classProbs, ...)
## {
## cpValue <- if (!last)
## param$cp
## else 0
## theDots <- list(...)
## if (any(names(theDots) == "control")) {
## theDots$control$cp <- cpValue
## theDots$control$xval <- 0
## ctl <- theDots$control
## theDots$control <- NULL
## }
## else ctl <- rpart::rpart.control(cp = cpValue, xval = 0)
## if (!is.null(wts))
## theDots$weights <- wts
## modelArgs <- c(list(formula = as.formula(".outcome ~ ."),
## data = if (is.data.frame(x)) x else as.data.frame(x),
## control = ctl), theDots)
## modelArgs$data$.outcome <- y
## out <- do.call(rpart::rpart, modelArgs)
## if (last)
## out <- rpart::prune.rpart(out, cp = param$cp)
## out
## }
##
## $rpart$predict
## function (modelFit, newdata, submodels = NULL)
## {
## if (!is.data.frame(newdata))
## newdata <- as.data.frame(newdata)
## pType <- if (modelFit$problemType == "Classification")
## "class"
## else "vector"
## out <- predict(modelFit, newdata, type = pType)
## if (!is.null(submodels)) {
## tmp <- vector(mode = "list", length = nrow(submodels) +
## 1)
## tmp[[1]] <- out
## for (j in seq(along = submodels$cp)) {
## prunedFit <- rpart::prune.rpart(modelFit, cp = submodels$cp[j])
## tmp[[j + 1]] <- predict(prunedFit, newdata, type = pType)
## }
## out <- tmp
## }
## out
## }
##
## $rpart$prob
## function (modelFit, newdata, submodels = NULL)
## {
## if (!is.data.frame(newdata))
## newdata <- as.data.frame(newdata)
## out <- predict(modelFit, newdata, type = "prob")
## if (!is.null(submodels)) {
## tmp <- vector(mode = "list", length = nrow(submodels) +
## 1)
## tmp[[1]] <- out
## for (j in seq(along = submodels$cp)) {
## prunedFit <- rpart::prune.rpart(modelFit, cp = submodels$cp[j])
## tmpProb <- predict(prunedFit, newdata, type = "prob")
## tmp[[j + 1]] <- as.data.frame(tmpProb[, modelFit$obsLevels,
## drop = FALSE])
## }
## out <- tmp
## }
## out
## }
##
## $rpart$predictors
## function (x, surrogate = TRUE, ...)
## {
## out <- as.character(x$frame$var)
## out <- out[!(out %in% c("<leaf>"))]
## if (surrogate) {
## splits <- x$splits
## splits <- splits[splits[, "adj"] > 0, ]
## out <- c(out, rownames(splits))
## }
## unique(out)
## }
##
## $rpart$varImp
## function (object, surrogates = FALSE, competes = TRUE, ...)
## {
## if (nrow(object$splits) > 0) {
## tmp <- rownames(object$splits)
## rownames(object$splits) <- 1:nrow(object$splits)
## splits <- data.frame(object$splits)
## splits$var <- tmp
## splits$type <- ""
## frame <- as.data.frame(object$frame)
## index <- 0
## for (i in 1:nrow(frame)) {
## if (frame$var[i] != "<leaf>") {
## index <- index + 1
## splits$type[index] <- "primary"
## if (frame$ncompete[i] > 0) {
## for (j in 1:frame$ncompete[i]) {
## index <- index + 1
## splits$type[index] <- "competing"
## }
## }
## if (frame$nsurrogate[i] > 0) {
## for (j in 1:frame$nsurrogate[i]) {
## index <- index + 1
## splits$type[index] <- "surrogate"
## }
## }
## }
## }
## splits$var <- factor(as.character(splits$var))
## if (!surrogates)
## splits <- subset(splits, type != "surrogate")
## if (!competes)
## splits <- subset(splits, type != "competing")
## out <- aggregate(splits$improve, list(Variable = splits$var),
## sum, na.rm = TRUE)
## }
## else {
## out <- data.frame(x = numeric(), Vaiable = character())
## }
## allVars <- colnames(attributes(object$terms)$factors)
## if (!all(allVars %in% out$Variable)) {
## missingVars <- allVars[!(allVars %in% out$Variable)]
## zeros <- data.frame(x = rep(0, length(missingVars)),
## Variable = missingVars)
## out <- rbind(out, zeros)
## }
## out2 <- data.frame(Overall = out$x)
## rownames(out2) <- out$Variable
## out2
## }
##
## $rpart$levels
## function (x)
## x$obsLevels
##
## $rpart$trim
## function (x)
## {
## x$call <- list(na.action = (x$call)$na.action)
## x$x <- NULL
## x$y <- NULL
## x$where <- NULL
## x
## }
##
## $rpart$tags
## [1] "Tree-Based Model" "Implicit Feature Selection"
## [3] "Handle Missing Predictor Data" "Accepts Case Weights"
##
## $rpart$sort
## function (x)
## x[order(x[, 1], decreasing = TRUE), ]
##
##
## $rpart1SE
## $rpart1SE$label
## [1] "CART"
##
## $rpart1SE$library
## [1] "rpart"
##
## $rpart1SE$type
## [1] "Regression" "Classification"
##
## $rpart1SE$parameters
## parameter class label
## 1 parameter character parameter
##
## $rpart1SE$grid
## function (x, y, len = NULL, search = "grid")
## data.frame(parameter = "none")
##
## $rpart1SE$loop
## NULL
##
## $rpart1SE$fit
## function (x, y, wts, param, lev, last, classProbs, ...)
## {
## dat <- if (is.data.frame(x))
## x
## else as.data.frame(x)
## dat$.outcome <- y
## if (!is.null(wts)) {
## out <- rpart::rpart(.outcome ~ ., data = dat, ...)
## }
## else {
## out <- rpart::rpart(.outcome ~ ., data = dat, weights = wts,
## ...)
## }
## out
## }
##
## $rpart1SE$predict
## function (modelFit, newdata, submodels = NULL)
## {
## if (!is.data.frame(newdata))
## newdata <- as.data.frame(newdata)
## out <- if (modelFit$problemType == "Classification")
## predict(modelFit, newdata, type = "class")
## else predict(modelFit, newdata)
## out
## }
##
## $rpart1SE$prob
## function (modelFit, newdata, submodels = NULL)
## {
## if (!is.data.frame(newdata))
## newdata <- as.data.frame(newdata)
## predict(modelFit, newdata, type = "prob")
## }
##
## $rpart1SE$predictors
## function (x, surrogate = TRUE, ...)
## {
## out <- as.character(x$frame$var)
## out <- out[!(out %in% c("<leaf>"))]
## if (surrogate) {
## splits <- x$splits
## splits <- splits[splits[, "adj"] > 0, ]
## out <- c(out, rownames(splits))
## }
## unique(out)
## }
##
## $rpart1SE$varImp
## function (object, surrogates = FALSE, competes = TRUE, ...)
## {
## tmp <- rownames(object$splits)
## rownames(object$splits) <- 1:nrow(object$splits)
## splits <- data.frame(object$splits)
## splits$var <- tmp
## splits$type <- ""
## frame <- as.data.frame(object$frame)
## index <- 0
## for (i in 1:nrow(frame)) {
## if (frame$var[i] != "<leaf>") {
## index <- index + 1
## splits$type[index] <- "primary"
## if (frame$ncompete[i] > 0) {
## for (j in 1:frame$ncompete[i]) {
## index <- index + 1
## splits$type[index] <- "competing"
## }
## }
## if (frame$nsurrogate[i] > 0) {
## for (j in 1:frame$nsurrogate[i]) {
## index <- index + 1
## splits$type[index] <- "surrogate"
## }
## }
## }
## }
## splits$var <- factor(as.character(splits$var))
## if (!surrogates)
## splits <- subset(splits, type != "surrogate")
## if (!competes)
## splits <- subset(splits, type != "competing")
## out <- aggregate(splits$improve, list(Variable = splits$var),
## sum, na.rm = TRUE)
## allVars <- colnames(attributes(object$terms)$factors)
## if (!all(allVars %in% out$Variable)) {
## missingVars <- allVars[!(allVars %in% out$Variable)]
## zeros <- data.frame(x = rep(0, length(missingVars)),
## Variable = missingVars)
## out <- rbind(out, zeros)
## }
## out2 <- data.frame(Overall = out$x)
## rownames(out2) <- out$Variable
## out2
## }
##
## $rpart1SE$levels
## function (x)
## x$obsLevels
##
## $rpart1SE$trim
## function (x)
## {
## x$call <- list(na.action = (x$call)$na.action)
## x$x <- NULL
## x$y <- NULL
## x$where <- NULL
## x
## }
##
## $rpart1SE$notes
## [1] "This CART model replicates the same process used by the `rpart` function where the model complexity is determined using the one-standard error method. This procedure is replicated inside of the resampling done by `train` so that an external resampling estimate can be obtained."
##
## $rpart1SE$tags
## [1] "Tree-Based Model" "Implicit Feature Selection"
## [3] "Handle Missing Predictor Data" "Accepts Case Weights"
##
## $rpart1SE$sort
## function (x)
## x[order(x[, 1], decreasing = TRUE), ]
##
##
## $rpart2
## $rpart2$label
## [1] "CART"
##
## $rpart2$library
## [1] "rpart"
##
## $rpart2$type
## [1] "Regression" "Classification"
##
## $rpart2$parameters
## parameter class label
## 1 maxdepth numeric Max Tree Depth
##
## $rpart2$grid
## function (x, y, len = NULL, search = "grid")
## {
## dat <- if (is.data.frame(x))
## x
## else as.data.frame(x)
## dat$.outcome <- y
## initialFit <- rpart::rpart(.outcome ~ ., data = dat, control = rpart::rpart.control(cp = 0))$cptable
## initialFit <- initialFit[order(-initialFit[, "CP"]), "nsplit",
## drop = FALSE]
## initialFit <- initialFit[initialFit[, "nsplit"] > 0 & initialFit[,
## "nsplit"] <= 30, , drop = FALSE]
## if (search == "grid") {
## if (dim(initialFit)[1] < len) {
## cat("note: only", nrow(initialFit), "possible values of the max tree depth from the initial fit.\n",
## "Truncating the grid to", nrow(initialFit), ".\n\n")
## tuneSeq <- as.data.frame(initialFit)
## }
## else tuneSeq <- as.data.frame(initialFit[1:len, ])
## colnames(tuneSeq) <- "maxdepth"
## }
## else {
## tuneSeq <- data.frame(maxdepth = unique(sample(as.vector(initialFit[,
## 1]), size = len, replace = TRUE)))
## }
## tuneSeq
## }
##
## $rpart2$loop
## function (grid)
## {
## grid <- grid[order(grid$maxdepth, decreasing = TRUE), , drop = FALSE]
## loop <- grid[1, , drop = FALSE]
## submodels <- list(grid[-1, , drop = FALSE])
## list(loop = loop, submodels = submodels)
## }
##
## $rpart2$fit
## function (x, y, wts, param, lev, last, classProbs, ...)
## {
## theDots <- list(...)
## if (any(names(theDots) == "control")) {
## theDots$control$maxdepth <- param$maxdepth
## theDots$control$xval <- 0
## ctl <- theDots$control
## theDots$control <- NULL
## }
## else ctl <- rpart::rpart.control(maxdepth = param$maxdepth,
## xval = 0)
## if (!is.null(wts))
## theDots$weights <- wts
## modelArgs <- c(list(formula = as.formula(".outcome ~ ."),
## data = if (is.data.frame(x)) x else as.data.frame(x),
## control = ctl), theDots)
## modelArgs$data$.outcome <- y
## out <- do.call(rpart::rpart, modelArgs)
## out
## }
##
## $rpart2$predict
## function (modelFit, newdata, submodels = NULL)
## {
## depth2cp <- function(x, depth) {
## out <- approx(x[, "nsplit"], x[, "CP"], depth)$y
## out[depth > max(x[, "nsplit"])] <- min(x[, "CP"]) * 0.99
## out
## }
## if (!is.data.frame(newdata))
## newdata <- as.data.frame(newdata)
## pType <- if (modelFit$problemType == "Classification")
## "class"
## else "vector"
## out <- predict(modelFit, newdata, type = pType)
## if (!is.null(submodels)) {
## tmp <- vector(mode = "list", length = nrow(submodels) +
## 1)
## tmp[[1]] <- out
## cpValues <- depth2cp(modelFit$cptable, submodels$maxdepth)
## for (j in seq(along = cpValues)) {
## prunedFit <- rpart::prune.rpart(modelFit, cp = cpValues[j])
## tmp[[j + 1]] <- predict(prunedFit, newdata, type = pType)
## }
## out <- tmp
## }
## out
## }
##
## $rpart2$prob
## function (modelFit, newdata, submodels = NULL)
## {
## depth2cp <- function(x, depth) {
## out <- approx(x[, "nsplit"], x[, "CP"], depth)$y
## out[depth > max(x[, "nsplit"])] <- min(x[, "CP"]) * 0.99
## out
## }
## if (!is.data.frame(newdata))
## newdata <- as.data.frame(newdata)
## out <- predict(modelFit, newdata, type = "prob")
## if (!is.null(submodels)) {
## tmp <- vector(mode = "list", length = nrow(submodels) +
## 1)
## tmp[[1]] <- out
## cpValues <- depth2cp(modelFit$cptable, submodels$maxdepth)
## for (j in seq(along = cpValues)) {
## prunedFit <- rpart::prune.rpart(modelFit, cp = cpValues[j])
## tmpProb <- predict(prunedFit, newdata, type = "prob")
## tmp[[j + 1]] <- as.data.frame(tmpProb[, modelFit$obsLevels,
## drop = FALSE])
## }
## out <- tmp
## }
## out
## }
##
## $rpart2$predictors
## function (x, surrogate = TRUE, ...)
## {
## out <- as.character(x$frame$var)
## out <- out[!(out %in% c("<leaf>"))]
## if (surrogate) {
## splits <- x$splits
## splits <- splits[splits[, "adj"] > 0, ]
## out <- c(out, rownames(splits))
## }
## unique(out)
## }
##
## $rpart2$varImp
## function (object, surrogates = FALSE, competes = TRUE, ...)
## {
## tmp <- rownames(object$splits)
## rownames(object$splits) <- 1:nrow(object$splits)
## splits <- data.frame(object$splits)
## splits$var <- tmp
## splits$type <- ""
## frame <- as.data.frame(object$frame)
## index <- 0
## for (i in 1:nrow(frame)) {
## if (frame$var[i] != "<leaf>") {
## index <- index + 1
## splits$type[index] <- "primary"
## if (frame$ncompete[i] > 0) {
## for (j in 1:frame$ncompete[i]) {
## index <- index + 1
## splits$type[index] <- "competing"
## }
## }
## if (frame$nsurrogate[i] > 0) {
## for (j in 1:frame$nsurrogate[i]) {
## index <- index + 1
## splits$type[index] <- "surrogate"
## }
## }
## }
## }
## splits$var <- factor(as.character(splits$var))
## if (!surrogates)
## splits <- subset(splits, type != "surrogate")
## if (!competes)
## splits <- subset(splits, type != "competing")
## out <- aggregate(splits$improve, list(Variable = splits$var),
## sum, na.rm = TRUE)
## allVars <- colnames(attributes(object$terms)$factors)
## if (!all(allVars %in% out$Variable)) {
## missingVars <- allVars[!(allVars %in% out$Variable)]
## zeros <- data.frame(x = rep(0, length(missingVars)),
## Variable = missingVars)
## out <- rbind(out, zeros)
## }
## out2 <- data.frame(Overall = out$x)
## rownames(out2) <- out$Variable
## out2
## }
##
## $rpart2$levels
## function (x)
## x$obsLevels
##
## $rpart2$trim
## function (x)
## {
## x$call <- list(na.action = (x$call)$na.action)
## x$x <- NULL
## x$y <- NULL
## x$where <- NULL
## x
## }
##
## $rpart2$tags
## [1] "Tree-Based Model" "Implicit Feature Selection"
## [3] "Handle Missing Predictor Data" "Accepts Case Weights"
##
## $rpart2$sort
## function (x)
## x[order(x[, 1]), ]
##
##
## $rpartCost
## $rpartCost$label
## [1] "Cost-Sensitive CART"
##
## $rpartCost$library
## [1] "rpart" "plyr"
##
## $rpartCost$type
## [1] "Classification"
##
## $rpartCost$parameters
## parameter class label
## 1 cp numeric Complexity Parameter
## 2 Cost numeric Cost
##
## $rpartCost$grid
## function (x, y, len = NULL, search = "grid")
## {
## dat <- if (is.data.frame(x))
## x
## else as.data.frame(x)
## dat$.outcome <- y
## initialFit <- rpart::rpart(.outcome ~ ., data = dat, control = rpart::rpart.control(cp = 0))$cptable
## initialFit <- initialFit[order(-initialFit[, "CP"]), , drop = FALSE]
## if (search == "grid") {
## if (nrow(initialFit) < len) {
## tuneSeq <- expand.grid(cp = seq(min(initialFit[,
## "CP"]), max(initialFit[, "CP"]), length = len),
## Cost = 1:len)
## }
## else tuneSeq <- data.frame(cp = initialFit[1:len, "CP"],
## Cost = 1:len)
## colnames(tuneSeq) <- c("cp", "Cost")
## }
## else {
## tuneSeq <- data.frame(cp = 10^runif(len, min = -8, max = -1),
## Cost = runif(len, min = 1, max = 30))
## }
## tuneSeq
## }
##
## $rpartCost$loop
## function (grid)
## {
## loop <- plyr::ddply(grid, plyr::.(Cost), function(x) c(cp = min(x$cp)))
## submodels <- vector(mode = "list", length = nrow(loop))
## for (i in seq(along = submodels)) {
## larger_cp <- subset(grid, subset = Cost == loop$Cost[i] &
## cp > loop$cp[i])
## submodels[[i]] <- data.frame(cp = sort(larger_cp$cp))
## }
## list(loop = loop, submodels = submodels)
## }
##
## $rpartCost$fit
## function (x, y, wts, param, lev, last, classProbs, ...)
## {
## theDots <- list(...)
## if (any(names(theDots) == "control")) {
## theDots$control$cp <- param$cp
## theDots$control$xval <- 0
## ctl <- theDots$control
## theDots$control <- NULL
## }
## else ctl <- rpart::rpart.control(cp = param$cp, xval = 0)
## lmat <- matrix(c(0, 1, param$Cost, 0), ncol = 2)
## rownames(lmat) <- colnames(lmat) <- levels(y)
## if (any(names(theDots) == "parms")) {
## theDots$parms$loss <- lmat
## }
## else parms <- list(loss = lmat)
## if (!is.null(wts))
## theDots$weights <- wts
## modelArgs <- c(list(formula = as.formula(".outcome ~ ."),
## data = if (is.data.frame(x)) x else as.data.frame(x),
## parms = parms, control = ctl), theDots)
## modelArgs$data$.outcome <- y
## out <- do.call(rpart::rpart, modelArgs)
## out
## }
##
## $rpartCost$predict
## function (modelFit, newdata, submodels = NULL)
## {
## if (!is.data.frame(newdata))
## newdata <- as.data.frame(newdata)
## pType <- if (modelFit$problemType == "Classification")
## "class"
## else "vector"
## out <- predict(modelFit, newdata, type = pType)
## if (!is.null(submodels)) {
## tmp <- vector(mode = "list", length = nrow(submodels) +
## 1)
## tmp[[1]] <- out
## for (j in seq(along = submodels$cp)) {
## prunedFit <- rpart::prune.rpart(modelFit, cp = submodels$cp[j])
## tmp[[j + 1]] <- predict(prunedFit, newdata, type = pType)
## }
## out <- tmp
## }
## out
## }
##
## $rpartCost$levels
## function (x)
## x$obsLevels
##
## $rpartCost$prob
## NULL
##
## $rpartCost$tags
## [1] "Tree-Based Model" "Implicit Feature Selection"
## [3] "Cost Sensitive Learning" "Two Class Only"
## [5] "Handle Missing Predictor Data" "Accepts Case Weights"
##
## $rpartCost$sort
## function (x)
## x[order(-x$cp, -x$Cost), ]
##
##
## $rpartScore
## $rpartScore$label
## [1] "CART or Ordinal Responses"
##
## $rpartScore$library
## [1] "rpartScore" "plyr"
##
## $rpartScore$type
## [1] "Classification"
##
## $rpartScore$parameters
## parameter class label
## 1 cp numeric Complexity Parameter
## 2 split character Split Function
## 3 prune character Pruning Measure
##
## $rpartScore$grid
## function (x, y, len = NULL, search = "grid")
## {
## dat <- if (is.data.frame(x))
## x
## else as.data.frame(x)
## dat$.outcome <- y
## initialFit <- rpart::rpart(.outcome ~ ., data = dat, control = rpart::rpart.control(cp = 0))$cptable
## initialFit <- initialFit[order(-initialFit[, "CP"]), , drop = FALSE]
## if (search == "grid") {
## if (nrow(initialFit) < len) {
## tuneSeq <- expand.grid(cp = seq(min(initialFit[,
## "CP"]), max(initialFit[, "CP"]), length = len),
## split = c("abs", "quad"), prune = c("mr", "mc"))
## }
## else tuneSeq <- expand.grid(cp = initialFit[1:len, "CP"],
## split = c("abs", "quad"), prune = c("mr", "mc"))
## colnames(tuneSeq)[1] <- "cp"
## }
## else {
## tuneSeq <- expand.grid(cp = unique(sample(initialFit[,
## "CP"], size = len, replace = TRUE)), split = c("abs",
## "quad"), prune = c("mr", "mc"))
## }
## tuneSeq
## }
##
## $rpartScore$fit
## function (x, y, wts, param, lev, last, classProbs, ...)
## {
## cpValue <- if (!last)
## param$cp
## else 0
## theDots <- list(...)
## if (any(names(theDots) == "control")) {
## theDots$control$cp <- cpValue
## theDots$control$xval <- 0
## ctl <- theDots$control
## theDots$control <- NULL
## }
## else ctl <- rpart::rpart.control(cp = cpValue, xval = 0)
## if (!is.null(wts))
## theDots$weights <- wts
## modelArgs <- c(list(formula = as.formula(".outcome ~ ."),
## data = if (is.data.frame(x)) x else as.data.frame(x),
## split = as.character(param$split), prune = as.character(param$prune),
## control = ctl), theDots)
## modelArgs$data$.outcome <- as.numeric(y)
## out <- do.call(rpartScore::rpartScore, modelArgs)
## if (last)
## out <- rpart::prune.rpart(out, cp = param$cp)
## out
## }
##
## $rpartScore$predict
## function (modelFit, newdata, submodels = NULL)
## {
## if (!is.data.frame(newdata))
## newdata <- as.data.frame(newdata)
## out <- modelFit$obsLevels[predict(modelFit, newdata)]
## if (!is.null(submodels)) {
## tmp <- vector(mode = "list", length = nrow(submodels) +
## 1)
## tmp[[1]] <- out
## for (j in seq(along = submodels$cp)) {
## prunedFit <- rpart::prune.rpart(modelFit, cp = submodels$cp[j])
## tmp[[j + 1]] <- modelFit$obsLevels[predict(prunedFit,
## newdata)]
## }
## out <- tmp
## }
## out
## }
##
## $rpartScore$prob
## NULL
##
## $rpartScore$predictors
## function (x, surrogate = TRUE, ...)
## {
## out <- as.character(x$frame$var)
## out <- out[!(out %in% c("<leaf>"))]
## if (surrogate) {
## splits <- x$splits
## splits <- splits[splits[, "adj"] > 0, ]
## out <- c(out, rownames(splits))
## }
## unique(out)
## }
##
## $rpartScore$varImp
## function (object, surrogates = FALSE, competes = TRUE, ...)
## {
## allVars <- all.vars(object$terms)
## allVars <- allVars[allVars != ".outcome"]
## out <- data.frame(Overall = object$variable.importance, Variable = names(object$variable.importance))
## rownames(out) <- names(object$variable.importance)
## if (!all(allVars %in% out$Variable)) {
## missingVars <- allVars[!(allVars %in% out$Variable)]
## zeros <- data.frame(Overall = rep(0, length(missingVars)),
## Variable = missingVars)
## out <- rbind(out, zeros)
## }
## rownames(out) <- out$Variable
## out$Variable <- NULL
## out
## }
##
## $rpartScore$levels
## function (x)
## x$obsLevels
##
## $rpartScore$trim
## function (x)
## {
## x$call <- list(na.action = (x$call)$na.action)
## x$x <- NULL
## x$y <- NULL
## x$where <- NULL
## x
## }
##
## $rpartScore$tags
## [1] "Tree-Based Model" "Implicit Feature Selection"
## [3] "Handle Missing Predictor Data" "Accepts Case Weights"
## [5] "Ordinal Outcomes"
##
## $rpartScore$sort
## function (x)
## x[order(x[, 1], decreasing = TRUE), ]
# 查詢rpart model可以tune的parameters
getModelInfo('rpart')$rpart$parameters
## parameter class label
## 1 cp numeric Complexity Parameter
find importance variable
library('caret')
importance = varImp(model, scale=T)
importance
## rpart variable importance
##
## Overall
## total_day_minutes 100.000
## total_day_charge 93.779
## number_customer_service_calls 76.711
## international_planyes 74.243
## total_intl_minutes 61.597
## total_eve_minutes 53.359
## total_intl_charge 53.193
## total_eve_charge 50.795
## number_vmail_messages 23.936
## voice_mail_planyes 23.936
## total_intl_calls 23.872
## total_night_minutes 11.245
## total_night_charge 8.266
## total_night_calls 5.098
## total_day_calls 1.008
## total_eve_calls 0.000
plot(importance)

ROC
#install.packages("ROCR")
library(ROCR)
## Loading required package: gplots
##
## Attaching package: 'gplots'
## The following object is masked from 'package:stats':
##
## lowess
predictions <-predict(model, churnTest, type="prob")
head(predictions)
## yes no
## 1 0.02701486 0.9729851
## 2 0.10240964 0.8975904
## 3 0.11320755 0.8867925
## 4 0.02701486 0.9729851
## 5 0.02701486 0.9729851
## 6 0.10240964 0.8975904
pred.to.roc<-predictions[, "yes"]
head(pred.to.roc)
## [1] 0.02701486 0.10240964 0.11320755 0.02701486 0.02701486 0.10240964
pred.rocr<-prediction(pred.to.roc, churnTest$churn)
pred.rocr
## An object of class "prediction"
## Slot "predictions":
## [[1]]
## [1] 0.02701486 0.10240964 0.11320755 0.02701486 0.02701486 0.10240964
## [7] 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486
## [13] 0.10240964 0.02701486 0.02701486 0.02701486 0.02701486 0.10240964
## [19] 0.00000000 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486
## [25] 0.10240964 0.10240964 0.02701486 0.04046243 0.04046243 0.02701486
## [31] 0.04046243 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486
## [37] 0.02701486 0.10240964 0.02701486 1.00000000 0.85000000 0.02701486
## [43] 0.02701486 0.12500000 0.02701486 0.02701486 0.02701486 0.10240964
## [49] 0.02701486 0.02701486 0.04046243 0.02701486 0.02701486 0.02701486
## [55] 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486
## [61] 0.02701486 0.02701486 0.83333333 0.12500000 0.02701486 0.02701486
## [67] 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486 0.10416667
## [73] 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486
## [79] 0.02701486 0.04046243 0.87254902 0.02701486 0.16000000 0.02701486
## [85] 0.02701486 0.02701486 0.02701486 0.02701486 0.10416667 0.02701486
## [91] 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486 1.00000000
## [97] 0.02701486 0.02701486 0.02701486 0.02701486 0.11320755 0.04046243
## [103] 0.10240964 0.02701486 0.02701486 0.10416667 0.02701486 0.02701486
## [109] 0.10416667 0.95049505 0.02701486 0.02701486 0.02701486 0.10240964
## [115] 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486 1.00000000
## [121] 0.02701486 0.10240964 0.10240964 0.02701486 0.95049505 0.10240964
## [127] 0.02701486 0.02701486 1.00000000 0.10240964 0.02701486 0.02701486
## [133] 0.02701486 0.02701486 0.02701486 0.10416667 0.10240964 0.02701486
## [139] 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486
## [145] 0.04046243 0.10240964 0.02701486 0.02701486 0.02701486 0.87254902
## [151] 0.02701486 0.02701486 0.02701486 0.95049505 0.87254902 0.10240964
## [157] 0.02701486 0.02701486 0.02701486 0.87500000 0.02701486 1.00000000
## [163] 0.02701486 0.87500000 0.10416667 1.00000000 0.02701486 0.02701486
## [169] 0.10240964 0.02701486 0.02701486 0.02701486 0.02701486 0.11320755
## [175] 0.02701486 0.02701486 0.73684211 0.02701486 0.02701486 0.02701486
## [181] 0.02701486 0.02701486 0.10240964 0.02701486 0.02701486 0.11320755
## [187] 0.10240964 0.10240964 0.02701486 0.02701486 0.02701486 0.02701486
## [193] 0.10240964 0.95049505 0.02701486 0.02701486 0.10240964 0.10240964
## [199] 0.10240964 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486
## [205] 0.02701486 0.04046243 0.02701486 0.95049505 0.02701486 0.02701486
## [211] 0.04046243 0.02701486 0.02701486 0.02701486 0.10240964 0.83333333
## [217] 0.73684211 0.04046243 0.02701486 0.10240964 0.02701486 0.87254902
## [223] 0.95049505 0.02701486 0.02701486 0.02701486 0.02701486 0.10416667
## [229] 0.02701486 0.02701486 0.02701486 0.02701486 0.11320755 0.10240964
## [235] 0.02701486 0.02701486 0.10240964 0.02701486 0.11320755 0.04046243
## [241] 0.02701486 0.10240964 0.04046243 0.04046243 0.10240964 0.02701486
## [247] 0.02701486 0.02701486 0.10240964 0.02701486 0.02701486 0.04046243
## [253] 0.87254902 0.10240964 0.85000000 0.02701486 0.02701486 0.02701486
## [259] 0.04046243 0.10240964 0.11320755 0.02701486 0.10416667 0.11320755
## [265] 0.04046243 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486
## [271] 0.02701486 0.02701486 0.02701486 0.04046243 0.00000000 0.83333333
## [277] 0.83333333 0.02701486 0.10240964 0.10240964 0.83333333 0.02701486
## [283] 0.02701486 1.00000000 0.83333333 0.02701486 0.02701486 0.10240964
## [289] 0.02701486 0.02701486 0.04046243 0.02701486 0.10240964 0.02701486
## [295] 0.04046243 0.12500000 0.02701486 1.00000000 0.02701486 0.02701486
## [301] 0.02701486 0.02701486 0.04046243 0.02701486 0.02701486 0.02701486
## [307] 0.02701486 0.02701486 0.02701486 0.10240964 0.10416667 0.02701486
## [313] 0.02701486 0.04046243 0.02701486 0.10240964 0.02701486 0.02701486
## [319] 0.10240964 0.02701486 0.02701486 0.02701486 0.11320755 0.95049505
## [325] 0.04046243 0.10416667 0.87254902 0.02701486 0.02701486 0.10240964
## [331] 0.02701486 0.02701486 0.73684211 0.02701486 0.10416667 0.02701486
## [337] 0.02701486 0.02701486 0.02701486 0.02701486 0.10240964 0.02701486
## [343] 0.83333333 0.02701486 0.02701486 0.02701486 0.02701486 0.10240964
## [349] 0.83333333 0.02701486 0.02701486 0.02701486 0.02701486 0.95049505
## [355] 0.04046243 0.85000000 0.02701486 0.02701486 0.02701486 0.95049505
## [361] 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486
## [367] 0.02701486 0.12500000 0.02701486 0.02701486 0.02701486 0.02701486
## [373] 0.02701486 0.02701486 0.10240964 0.02701486 0.02701486 0.02701486
## [379] 0.02701486 0.10240964 0.02701486 0.04046243 0.02701486 0.02701486
## [385] 0.87254902 0.02701486 0.10240964 0.02701486 0.02701486 0.02701486
## [391] 0.02701486 0.02701486 0.02701486 0.02701486 0.10240964 0.02701486
## [397] 0.04046243 0.04046243 0.02701486 0.02701486 0.12500000 0.10416667
## [403] 0.04046243 0.16000000 0.10416667 0.87254902 0.02701486 0.10240964
## [409] 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486
## [415] 0.02701486 0.02701486 0.02701486 0.02701486 0.10416667 0.02701486
## [421] 0.02701486 0.85000000 0.02701486 0.02701486 0.02701486 0.11320755
## [427] 0.02701486 0.02701486 0.04046243 0.02701486 0.02701486 0.02701486
## [433] 0.02701486 0.02701486 0.02701486 0.02701486 0.04046243 0.02701486
## [439] 0.02701486 0.02701486 0.10240964 0.10416667 0.02701486 0.02701486
## [445] 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486
## [451] 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486 0.10240964
## [457] 0.02701486 0.10240964 0.02701486 0.02701486 0.02701486 0.02701486
## [463] 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486 0.10416667
## [469] 0.02701486 0.02701486 0.11320755 0.02701486 0.02701486 0.02701486
## [475] 0.02701486 0.02701486 0.10416667 0.02701486 0.02701486 0.04046243
## [481] 0.02701486 0.02701486 0.02701486 0.10240964 0.02701486 0.10240964
## [487] 0.10240964 0.02701486 0.02701486 0.95049505 0.02701486 0.02701486
## [493] 0.02701486 0.02701486 0.02701486 0.02701486 0.04046243 0.10240964
## [499] 0.10240964 0.02701486 0.11320755 0.02701486 0.02701486 0.10416667
## [505] 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486 0.10416667
## [511] 0.02701486 0.02701486 0.02701486 0.02701486 0.87254902 0.02701486
## [517] 0.02701486 0.02701486 0.87254902 0.85000000 0.02701486 0.87254902
## [523] 0.02701486 0.02701486 0.10240964 0.02701486 0.02701486 0.02701486
## [529] 0.10240964 0.02701486 0.02701486 0.12500000 0.02701486 0.02701486
## [535] 0.02701486 0.85000000 0.02701486 0.02701486 0.02701486 0.04046243
## [541] 0.87500000 0.02701486 0.04046243 0.10240964 0.02701486 0.02701486
## [547] 0.04046243 0.02701486 0.02701486 0.02701486 0.02701486 0.12500000
## [553] 0.02701486 0.02701486 0.02701486 0.11320755 0.02701486 0.02701486
## [559] 0.10240964 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486
## [565] 0.02701486 0.10416667 0.11320755 0.02701486 0.95049505 0.02701486
## [571] 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486
## [577] 1.00000000 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486
## [583] 0.10240964 1.00000000 0.87254902 0.10416667 0.02701486 0.10416667
## [589] 0.02701486 0.02701486 0.04046243 0.02701486 0.02701486 0.02701486
## [595] 0.02701486 0.02701486 0.87254902 0.02701486 0.02701486 0.02701486
## [601] 0.02701486 0.04046243 0.02701486 0.02701486 0.10240964 0.02701486
## [607] 0.02701486 0.02701486 0.02701486 0.10240964 0.04046243 0.02701486
## [613] 0.02701486 0.10240964 0.02701486 0.02701486 0.02701486 0.02701486
## [619] 0.02701486 0.02701486 0.04046243 0.16000000 0.02701486 0.04046243
## [625] 0.02701486 0.02701486 0.02701486 0.02701486 0.10416667 0.10240964
## [631] 0.02701486 0.02701486 0.10240964 0.95049505 0.02701486 0.16000000
## [637] 0.02701486 0.04046243 0.02701486 0.10240964 0.87254902 0.02701486
## [643] 0.02701486 0.02701486 0.02701486 1.00000000 0.02701486 0.02701486
## [649] 0.04046243 0.10240964 0.10416667 0.02701486 0.02701486 0.87254902
## [655] 0.95049505 0.02701486 0.16000000 0.02701486 0.02701486 0.02701486
## [661] 0.95049505 0.02701486 0.02701486 0.10240964 0.02701486 0.02701486
## [667] 0.02701486 0.02701486 0.02701486 1.00000000 0.02701486 0.02701486
## [673] 0.02701486 0.10240964 0.02701486 0.02701486 0.10240964 0.02701486
## [679] 0.02701486 0.02701486 0.10416667 0.02701486 0.02701486 0.04046243
## [685] 0.02701486 0.87254902 0.02701486 0.02701486 1.00000000 0.02701486
## [691] 0.87254902 0.02701486 0.02701486 0.10240964 0.02701486 0.02701486
## [697] 0.02701486 0.10240964 0.02701486 0.02701486 0.02701486 0.02701486
## [703] 0.10240964 0.02701486 0.10240964 0.10240964 0.04046243 0.02701486
## [709] 0.10240964 0.02701486 0.02701486 0.10416667 0.02701486 0.02701486
## [715] 0.10240964 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486
## [721] 0.87254902 0.02701486 0.02701486 0.10240964 0.02701486 0.02701486
## [727] 0.02701486 0.02701486 0.02701486 1.00000000 0.02701486 0.04046243
## [733] 0.16000000 0.04046243 0.10416667 0.10240964 0.02701486 0.02701486
## [739] 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486
## [745] 0.02701486 0.02701486 0.95049505 0.02701486 0.10240964 0.02701486
## [751] 0.02701486 0.02701486 0.10416667 0.02701486 1.00000000 0.02701486
## [757] 0.02701486 0.02701486 0.10240964 0.02701486 0.10240964 0.02701486
## [763] 0.02701486 0.10416667 0.02701486 0.02701486 0.10416667 0.04046243
## [769] 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486 0.10240964
## [775] 0.02701486 0.04046243 0.02701486 0.87254902 0.16000000 0.10240964
## [781] 0.04046243 0.00000000 0.02701486 0.02701486 0.02701486 0.02701486
## [787] 0.10240964 0.16000000 0.83333333 0.04046243 0.02701486 0.02701486
## [793] 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486 0.87254902
## [799] 0.85000000 0.87254902 0.02701486 0.02701486 0.95049505 0.02701486
## [805] 0.02701486 0.04046243 0.02701486 0.10240964 0.10240964 0.02701486
## [811] 0.10240964 0.02701486 0.02701486 0.02701486 0.02701486 0.10240964
## [817] 0.02701486 0.02701486 0.02701486 0.02701486 0.10416667 0.10240964
## [823] 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486
## [829] 0.02701486 1.00000000 0.02701486 0.10240964 0.10240964 0.04046243
## [835] 0.02701486 0.02701486 0.02701486 0.00000000 1.00000000 0.85000000
## [841] 0.02701486 0.10240964 0.02701486 0.02701486 0.02701486 0.02701486
## [847] 0.02701486 0.02701486 0.16000000 0.02701486 0.10240964 0.02701486
## [853] 0.12500000 0.02701486 0.04046243 0.10240964 0.02701486 0.95049505
## [859] 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486 0.87254902
## [865] 0.02701486 0.02701486 0.10240964 0.10240964 0.02701486 0.10416667
## [871] 0.02701486 0.02701486 0.11320755 0.02701486 0.10240964 0.02701486
## [877] 0.02701486 0.02701486 0.10240964 0.02701486 0.04046243 0.02701486
## [883] 0.10240964 0.02701486 0.02701486 0.16000000 0.02701486 0.02701486
## [889] 0.02701486 0.02701486 0.02701486 0.02701486 1.00000000 0.02701486
## [895] 0.02701486 0.10240964 1.00000000 1.00000000 0.02701486 0.11320755
## [901] 0.02701486 0.02701486 0.04046243 0.87254902 0.02701486 0.10240964
## [907] 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486 0.10240964
## [913] 0.02701486 0.04046243 0.02701486 0.02701486 0.10240964 0.02701486
## [919] 0.10240964 0.02701486 0.95049505 0.02701486 0.02701486 0.02701486
## [925] 0.10416667 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486
## [931] 0.10240964 0.95049505 0.12500000 0.02701486 0.02701486 1.00000000
## [937] 0.11320755 0.02701486 0.00000000 0.02701486 0.02701486 0.04046243
## [943] 0.02701486 0.02701486 0.10240964 0.02701486 0.73684211 0.02701486
## [949] 0.02701486 0.02701486 1.00000000 0.10240964 0.87254902 0.02701486
## [955] 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486 1.00000000
## [961] 0.02701486 0.02701486 0.02701486 0.87254902 0.95049505 0.02701486
## [967] 0.02701486 0.02701486 0.02701486 0.02701486 0.10240964 0.02701486
## [973] 0.02701486 0.02701486 0.95049505 0.02701486 1.00000000 0.02701486
## [979] 0.04046243 0.16000000 0.02701486 0.02701486 0.10416667 0.10416667
## [985] 0.02701486 0.02701486 0.04046243 0.02701486 0.02701486 0.02701486
## [991] 0.02701486 0.04046243 0.02701486 0.02701486 0.02701486 1.00000000
## [997] 0.02701486 0.83333333 0.10240964 0.95049505 0.02701486 0.02701486
## [1003] 0.02701486 0.10240964 1.00000000 0.10240964 0.02701486 0.02701486
## [1009] 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486 0.10416667
## [1015] 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486 0.10240964
## [1021] 0.02701486 0.02701486 0.10416667 0.02701486 0.02701486 0.02701486
## [1027] 0.02701486 0.02701486 0.02701486 0.10240964 0.02701486 0.04046243
## [1033] 0.10240964 0.87254902 0.10416667 0.02701486 0.02701486 0.02701486
## [1039] 1.00000000 0.02701486 0.10240964 0.10240964 0.02701486 0.02701486
## [1045] 0.10416667 0.02701486 0.10416667 0.02701486 0.10416667 0.02701486
## [1051] 0.87254902 0.02701486 0.10240964 1.00000000 0.02701486 0.02701486
## [1057] 0.10240964 0.02701486 0.02701486 0.02701486 0.10240964 0.02701486
## [1063] 0.02701486 0.02701486 0.02701486 0.02701486 0.10240964 0.02701486
## [1069] 0.02701486 0.04046243 0.10240964 0.87254902 0.73684211 0.10416667
## [1075] 0.02701486 0.02701486 0.02701486 0.02701486 0.85000000 0.02701486
## [1081] 0.10240964 0.02701486 0.02701486 0.02701486 0.10416667 0.02701486
## [1087] 0.02701486 0.02701486 0.10416667 0.02701486 0.02701486 0.02701486
## [1093] 0.10416667 0.10240964 0.02701486 0.11320755 0.02701486 0.02701486
## [1099] 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486
## [1105] 1.00000000 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486
## [1111] 0.02701486 0.10240964 0.02701486 0.02701486 0.10416667 0.02701486
## [1117] 0.02701486 0.10240964 0.02701486 0.02701486 0.02701486 0.10240964
## [1123] 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486
## [1129] 0.02701486 0.02701486 0.02701486 0.02701486 0.10240964 0.16000000
## [1135] 0.02701486 0.10240964 0.02701486 0.04046243 1.00000000 0.04046243
## [1141] 0.02701486 0.02701486 0.02701486 0.95049505 0.02701486 0.02701486
## [1147] 0.02701486 0.02701486 0.02701486 0.02701486 0.10240964 0.04046243
## [1153] 0.02701486 0.95049505 0.10240964 1.00000000 0.10416667 0.02701486
## [1159] 0.04046243 0.95049505 0.02701486 0.10240964 0.10240964 0.02701486
## [1165] 0.02701486 0.04046243 0.02701486 0.02701486 0.04046243 0.87254902
## [1171] 0.04046243 0.02701486 0.02701486 0.02701486 0.85000000 0.02701486
## [1177] 0.11320755 0.10240964 0.02701486 0.02701486 0.95049505 0.02701486
## [1183] 0.16000000 0.02701486 0.02701486 0.10240964 0.04046243 0.02701486
## [1189] 0.02701486 0.02701486 0.10416667 0.04046243 0.04046243 0.02701486
## [1195] 0.10240964 0.10240964 0.02701486 0.02701486 0.02701486 0.02701486
## [1201] 0.02701486 0.02701486 0.02701486 0.02701486 0.87254902 0.10416667
## [1207] 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486
## [1213] 0.02701486 0.02701486 0.12500000 0.02701486 0.02701486 0.10416667
## [1219] 0.02701486 0.02701486 0.10240964 0.02701486 0.02701486 0.02701486
## [1225] 0.02701486 0.02701486 0.02701486 0.10240964 0.02701486 0.87254902
## [1231] 0.02701486 0.02701486 0.10240964 0.02701486 0.11320755 0.87254902
## [1237] 0.02701486 0.02701486 0.10240964 0.16000000 0.02701486 0.02701486
## [1243] 0.02701486 0.10240964 1.00000000 0.02701486 0.02701486 0.02701486
## [1249] 0.02701486 0.02701486 0.10240964 0.02701486 0.10240964 0.04046243
## [1255] 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486 0.87254902
## [1261] 0.02701486 0.11320755 0.02701486 0.10240964 1.00000000 0.02701486
## [1267] 0.95049505 0.02701486 0.02701486 0.10240964 0.02701486 0.02701486
## [1273] 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486
## [1279] 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486
## [1285] 0.02701486 0.10240964 0.02701486 0.02701486 0.02701486 0.10240964
## [1291] 0.02701486 0.10240964 0.02701486 0.02701486 0.02701486 0.87254902
## [1297] 0.02701486 0.02701486 0.10416667 0.02701486 0.02701486 0.04046243
## [1303] 0.02701486 1.00000000 0.02701486 0.95049505 0.02701486 0.10240964
## [1309] 0.02701486 0.02701486 1.00000000 0.10416667 0.02701486 0.02701486
## [1315] 0.02701486 0.02701486 0.02701486 0.95049505 0.02701486 0.02701486
## [1321] 0.16000000 0.02701486 0.02701486 0.02701486 0.02701486 0.83333333
## [1327] 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486
## [1333] 0.10240964 0.02701486 0.04046243 0.02701486 0.02701486 0.10416667
## [1339] 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486 1.00000000
## [1345] 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486
## [1351] 0.02701486 0.02701486 0.02701486 0.02701486 0.95049505 0.02701486
## [1357] 0.04046243 0.02701486 0.10240964 0.10416667 0.02701486 0.02701486
## [1363] 0.02701486 0.02701486 0.02701486 0.10416667 0.10416667 0.10416667
## [1369] 0.10240964 0.10240964 0.10240964 0.10240964 0.02701486 0.02701486
## [1375] 0.02701486 0.10416667 0.02701486 0.95049505 0.04046243 0.02701486
## [1381] 0.04046243 0.02701486 0.02701486 0.02701486 0.02701486 0.10240964
## [1387] 0.02701486 0.02701486 1.00000000 0.02701486 0.04046243 0.02701486
## [1393] 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486
## [1399] 0.11320755 0.02701486 0.02701486 0.10240964 0.10240964 0.10240964
## [1405] 0.87254902 0.02701486 0.02701486 0.87254902 0.02701486 0.04046243
## [1411] 0.02701486 0.10240964 0.02701486 0.02701486 0.02701486 0.02701486
## [1417] 0.02701486 0.02701486 0.10416667 0.02701486 0.10240964 0.02701486
## [1423] 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486 0.10240964
## [1429] 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486
## [1435] 0.02701486 0.10416667 0.02701486 0.02701486 0.02701486 0.87254902
## [1441] 0.87254902 0.10240964 0.02701486 0.10240964 0.02701486 0.87254902
## [1447] 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486
## [1453] 0.02701486 0.00000000 0.02701486 0.87254902 0.10240964 0.10416667
## [1459] 0.02701486 0.02701486 0.02701486 0.04046243 0.02701486 0.02701486
## [1465] 0.02701486 0.10240964 0.02701486 0.02701486 0.02701486 0.02701486
## [1471] 0.95049505 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486
## [1477] 0.95049505 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486
## [1483] 0.02701486 0.02701486 0.00000000 0.02701486 0.02701486 0.02701486
## [1489] 0.02701486 0.02701486 0.02701486 0.02701486 1.00000000 0.02701486
## [1495] 0.02701486 0.02701486 0.02701486 0.02701486 0.10416667 0.95049505
## [1501] 0.02701486 0.02701486 0.02701486 0.02701486 0.95049505 0.10240964
## [1507] 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486 0.10240964
## [1513] 0.04046243 0.02701486 0.02701486 0.10240964 0.02701486 0.02701486
## [1519] 0.02701486 0.10240964 0.02701486 0.10240964 0.02701486 0.16000000
## [1525] 0.02701486 0.02701486 0.02701486 0.10240964 0.95049505 0.95049505
## [1531] 0.10240964 0.02701486 0.00000000 0.12500000 0.02701486 0.02701486
## [1537] 0.02701486 0.04046243 0.02701486 0.02701486 0.02701486 0.02701486
## [1543] 0.02701486 0.02701486 0.02701486 0.02701486 0.04046243 0.10240964
## [1549] 0.02701486 0.02701486 0.02701486 0.02701486 0.16000000 0.10240964
## [1555] 0.02701486 0.02701486 0.02701486 0.87254902 0.02701486 0.02701486
## [1561] 0.02701486 0.10240964 1.00000000 0.02701486 0.83333333 0.95049505
## [1567] 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486
## [1573] 0.02701486 0.02701486 0.02701486 0.04046243 0.02701486 0.02701486
## [1579] 0.10240964 0.02701486 0.02701486 0.95049505 0.02701486 0.10416667
## [1585] 1.00000000 0.95049505 0.12500000 0.02701486 0.02701486 0.02701486
## [1591] 0.02701486 0.95049505 0.02701486 0.02701486 0.02701486 0.10240964
## [1597] 0.02701486 0.10416667 0.02701486 0.02701486 0.02701486 0.10416667
## [1603] 0.02701486 0.02701486 1.00000000 0.02701486 0.87254902 0.10240964
## [1609] 0.02701486 0.02701486 0.95049505 0.02701486 0.02701486 0.02701486
## [1615] 0.73684211 1.00000000 0.02701486 0.02701486 0.02701486 0.02701486
## [1621] 0.87254902 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486
## [1627] 0.95049505 0.02701486 0.02701486 0.02701486 0.04046243 0.02701486
## [1633] 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486
## [1639] 0.02701486 0.02701486 0.10240964 0.02701486 0.02701486 0.02701486
## [1645] 0.04046243 0.02701486 0.02701486 0.02701486 0.02701486 0.10416667
## [1651] 0.02701486 0.02701486 0.10240964 0.02701486 0.02701486 0.02701486
## [1657] 0.02701486 0.10240964 0.85000000 0.02701486 0.02701486 0.02701486
## [1663] 0.10240964 0.02701486 0.02701486 0.02701486 0.02701486
##
##
## Slot "labels":
## [[1]]
## [1] no no no no no no no no no no no no no no no no no
## [18] no no no no no no no no no no no no no no no no no
## [35] no no yes no no yes yes no no yes no no no no no yes no
## [52] no no no no no no no no no no no yes no no no no no
## [69] no no no no no no no no yes no no no yes no no no no
## [86] no no no yes no no no no no no yes no no no no no no
## [103] no no no no yes no no yes no no no no no no no no no
## [120] yes no no no no yes no no no yes no no no no no no yes
## [137] no no no no no no no no no no no no no yes no no no
## [154] yes yes no no no no no no yes no no no yes no no no no
## [171] no no no no no no no no no no no no no no no no no
## [188] no no no no no yes no no no no no no no no no no no
## [205] no no no yes no no no no no yes no yes yes no yes no no
## [222] yes yes no no no no no no no no no no no no no no no
## [239] no no no no no no no no no no no no no no yes no yes
## [256] no no no no no no no yes no no no no no yes no no yes
## [273] no no no yes no no no no yes yes no yes yes no no no no
## [290] no no no no no no yes no yes no no no no no no no yes
## [307] no no no no no no no no no no no no no no no no no
## [324] yes no no yes no no no no no yes no yes no no no no no
## [341] no no yes no no no no no yes no no no no yes no yes no
## [358] no no yes no no no no no no no no no yes no no no no
## [375] no no no no no no no no no no yes no no no no no no
## [392] no no no no no no no no no yes no no no no yes no no
## [409] no no no no no yes no no no no no no no yes no no no
## [426] no no no no no no no no no no no yes no no no no no
## [443] no no no no no no no no no no no no no no no no no
## [460] yes no no no no no no no no no no yes no no no no no
## [477] no no no no no no no no no no no no no yes no no no
## [494] no no no no no no no no no no no no no no no yes no
## [511] no no no no yes no no no yes yes no yes no no no no no
## [528] no no no no no no no no yes no no no no yes no no no
## [545] no no no no no no no no no no no no no no no no no
## [562] no no no no no no no yes no no no no no no no yes no
## [579] no no no no no yes no no no no no no no no no no no
## [596] no no no yes yes no no no no no no no no no no no no
## [613] no no no no no no no no no yes no no no no no no yes
## [630] no no no no yes no no no no no no yes no no no no yes
## [647] no no no no yes no no yes yes no no no yes no yes no no
## [664] no no no no no no yes no no no no no no no no no no
## [681] no no no no no yes no no yes no yes no no no yes no no
## [698] yes no no no no no no no no no no yes no no no no no
## [715] no yes no no no no yes no no no no no no no no yes no
## [732] no no no yes no no no no no no no no no no no yes no
## [749] no no no no no no yes no no no no no yes no no no no
## [766] no no no no no no no no no no no no no no yes no no
## [783] no no no no no yes yes no no no no no no no no yes yes
## [800] no no no yes no yes no no yes yes no no no no no no no
## [817] no no no no yes no no no yes yes no no no yes no no yes
## [834] no no no no no yes no no no no no no no no no no no
## [851] no no yes no no no no yes no no no no no yes no no no
## [868] no no yes no no no no no no no yes yes no no no no no
## [885] no no no no no no no no yes no no yes yes yes no no no
## [902] no no yes no no no no no no no no no no no no no no
## [919] yes no yes no no no yes yes no no no no no yes no no no
## [936] yes no no no no no no no no no no no yes no no yes no
## [953] yes no no no no no no yes no yes no yes yes no no no no
## [970] no no no no no yes no yes no no no no no no no no no
## [987] no no no no no yes no no no yes no yes no yes no no no
## [1004] no yes no no no no no no no no no no no no no no no
## [1021] no no yes no no no no no no no no no no yes no no no
## [1038] yes yes no no no no yes no no no no no no yes no no yes
## [1055] no no no no no no no no no no no no no no no no no
## [1072] yes no yes no no no no yes no no no no no no no no no
## [1089] yes no no no no no no no no no no no no no no no yes
## [1106] no no no no no no no no no no no no no no no no no
## [1123] no no no no no no no no no no yes no no no no no yes
## [1140] no no no no yes no no no no no no no no no no no yes
## [1157] yes no no yes no no no no no no no no no yes no no no
## [1174] no yes no no no no no yes no no no no no no no no no
## [1191] no no no no no no no no no no no no no no yes no no
## [1208] no no no no no no no no no no no no no no no no no
## [1225] no no no no no yes no no no no no yes no no no yes no
## [1242] no no yes yes no no no no no no no no no no no no no
## [1259] no yes no no no no yes no yes no no no no no no no no
## [1276] no no no no no no no no no no no no no no no no yes
## [1293] no no no yes no no no no no no no yes no yes no yes no
## [1310] no yes no no no no no no yes no no no no no no no yes
## [1327] no no no no no no no no no no no no no no no no no
## [1344] yes no yes no no no no no no no no yes no no no no yes
## [1361] no no no no no no no no no no no no no no no no no
## [1378] yes no no no no no no no no no no yes no no no no no
## [1395] no no no no no no no no yes no yes no no yes no no no
## [1412] yes no no no no no no no no no no no no no no no no
## [1429] no no no no no no no no no no no yes yes no no yes no
## [1446] yes no no no no no no no no no yes no no no no no no
## [1463] no no no yes no no no no yes no no no no no yes no no
## [1480] no no no no no no no no no no no no no yes no no no
## [1497] no no no yes no no no no yes no no no no no no no no
## [1514] no no no no no no no no no no yes no no no no yes yes
## [1531] no no no no no no no no no no no no no no no no no
## [1548] no no no no no no no no no no yes no no no no yes no
## [1565] no yes no no no no no no no no no no no no no no no
## [1582] yes no no yes yes yes no yes no no yes no no no no no no
## [1599] no no no no no no yes no yes no no no yes no no no no
## [1616] yes no no no no yes no no no no no yes no no no no no
## [1633] no no no no no no no no no no no no no no no yes no
## [1650] no no no no no no no no yes yes no no no no yes no no
## [1667] no
## Levels: no < yes
##
##
## Slot "cutoffs":
## [[1]]
## [1] Inf 1.00000000 0.95049505 0.87500000 0.87254902 0.85000000
## [7] 0.83333333 0.73684211 0.16000000 0.12500000 0.11320755 0.10416667
## [13] 0.10240964 0.04046243 0.02701486 0.00000000
##
##
## Slot "fp":
## [[1]]
## [1] 0 0 2 4 8 9 11 15 28 35 56 107 273 357
## [15] 1435 1443
##
##
## Slot "tp":
## [[1]]
## [1] 0 42 83 84 123 133 143 145 149 154 155 170 190 192 224 224
##
##
## Slot "tn":
## [[1]]
## [1] 1443 1443 1441 1439 1435 1434 1432 1428 1415 1408 1387 1336 1170 1086
## [15] 8 0
##
##
## Slot "fn":
## [[1]]
## [1] 224 182 141 140 101 91 81 79 75 70 69 54 34 32 0 0
##
##
## Slot "n.pos":
## [[1]]
## [1] 224
##
##
## Slot "n.neg":
## [[1]]
## [1] 1443
##
##
## Slot "n.pos.pred":
## [[1]]
## [1] 0 42 85 88 131 142 154 160 177 189 211 277 463 549
## [15] 1659 1667
##
##
## Slot "n.neg.pred":
## [[1]]
## [1] 1667 1625 1582 1579 1536 1525 1513 1507 1490 1478 1456 1390 1204 1118
## [15] 8 0
perf.rocr<-performance(pred.rocr, measure ="auc")
perf.tpr.rocr<-performance(pred.rocr, measure="tpr",x.measure = "fpr")
plot(perf.tpr.rocr,main=paste("AUC:",(perf.rocr@y.values)))
