Classification
review
library(modeldata)
## Warning: package 'modeldata' was built under R version 3.5.2
data(mlc_churn)
str(mlc_churn)
## Classes 'tbl_df', 'tbl' and 'data.frame': 5000 obs. of 20 variables:
## $ state : Factor w/ 51 levels "AK","AL","AR",..: 17 36 32 36 37 2 20 25 19 50 ...
## $ account_length : int 128 107 137 84 75 118 121 147 117 141 ...
## $ area_code : Factor w/ 3 levels "area_code_408",..: 2 2 2 1 2 3 3 2 1 2 ...
## $ international_plan : Factor w/ 2 levels "no","yes": 1 1 1 2 2 2 1 2 1 2 ...
## $ voice_mail_plan : Factor w/ 2 levels "no","yes": 2 2 1 1 1 1 2 1 1 2 ...
## $ number_vmail_messages : int 25 26 0 0 0 0 24 0 0 37 ...
## $ total_day_minutes : num 265 162 243 299 167 ...
## $ total_day_calls : int 110 123 114 71 113 98 88 79 97 84 ...
## $ total_day_charge : num 45.1 27.5 41.4 50.9 28.3 ...
## $ total_eve_minutes : num 197.4 195.5 121.2 61.9 148.3 ...
## $ total_eve_calls : int 99 103 110 88 122 101 108 94 80 111 ...
## $ total_eve_charge : num 16.78 16.62 10.3 5.26 12.61 ...
## $ total_night_minutes : num 245 254 163 197 187 ...
## $ total_night_calls : int 91 103 104 89 121 118 118 96 90 97 ...
## $ total_night_charge : num 11.01 11.45 7.32 8.86 8.41 ...
## $ total_intl_minutes : num 10 13.7 12.2 6.6 10.1 6.3 7.5 7.1 8.7 11.2 ...
## $ total_intl_calls : int 3 3 5 7 3 6 7 6 4 5 ...
## $ total_intl_charge : num 2.7 3.7 3.29 1.78 2.73 1.7 2.03 1.92 2.35 3.02 ...
## $ number_customer_service_calls: int 1 1 0 2 3 0 3 0 1 0 ...
## $ churn : Factor w/ 2 levels "yes","no": 2 2 2 2 2 2 2 2 2 2 ...
#選擇建模變數
variable.list = !names(mlc_churn) %in% c('state','area_code','account_length')
mlc_churn=mlc_churn[,variable.list]
str(mlc_churn)
## Classes 'tbl_df', 'tbl' and 'data.frame': 5000 obs. of 17 variables:
## $ international_plan : Factor w/ 2 levels "no","yes": 1 1 1 2 2 2 1 2 1 2 ...
## $ voice_mail_plan : Factor w/ 2 levels "no","yes": 2 2 1 1 1 1 2 1 1 2 ...
## $ number_vmail_messages : int 25 26 0 0 0 0 24 0 0 37 ...
## $ total_day_minutes : num 265 162 243 299 167 ...
## $ total_day_calls : int 110 123 114 71 113 98 88 79 97 84 ...
## $ total_day_charge : num 45.1 27.5 41.4 50.9 28.3 ...
## $ total_eve_minutes : num 197.4 195.5 121.2 61.9 148.3 ...
## $ total_eve_calls : int 99 103 110 88 122 101 108 94 80 111 ...
## $ total_eve_charge : num 16.78 16.62 10.3 5.26 12.61 ...
## $ total_night_minutes : num 245 254 163 197 187 ...
## $ total_night_calls : int 91 103 104 89 121 118 118 96 90 97 ...
## $ total_night_charge : num 11.01 11.45 7.32 8.86 8.41 ...
## $ total_intl_minutes : num 10 13.7 12.2 6.6 10.1 6.3 7.5 7.1 8.7 11.2 ...
## $ total_intl_calls : int 3 3 5 7 3 6 7 6 4 5 ...
## $ total_intl_charge : num 2.7 3.7 3.29 1.78 2.73 1.7 2.03 1.92 2.35 3.02 ...
## $ number_customer_service_calls: int 1 1 0 2 3 0 3 0 1 0 ...
## $ churn : Factor w/ 2 levels "yes","no": 2 2 2 2 2 2 2 2 2 2 ...
set.seed(222)
#把資料分成training data 和 testing data
ind<-sample(1:2, size=nrow(mlc_churn), replace=T, prob=c(0.7, 0.3))
trainset=mlc_churn[ind==1,]
testset=mlc_churn[ind==2,]
library('rpart')
library('rpart.plot')
churn.rp<-rpart(churn ~., data=trainset)
s = summary(churn.rp)
## Call:
## rpart(formula = churn ~ ., data = trainset)
## n= 3500
##
## CP nsplit rel error xerror xstd
## 1 0.11264822 0 1.0000000 1.0000000 0.04111655
## 2 0.08498024 1 0.8873518 0.9189723 0.03968453
## 3 0.07114625 2 0.8023715 0.8458498 0.03830434
## 4 0.04940711 4 0.6600791 0.6857708 0.03494155
## 5 0.03162055 7 0.4743083 0.4901186 0.02999968
## 6 0.01976285 8 0.4426877 0.4723320 0.02949102
## 7 0.01679842 9 0.4229249 0.4703557 0.02943377
## 8 0.01054018 11 0.3893281 0.4367589 0.02843690
## 9 0.01000000 14 0.3577075 0.4387352 0.02849682
##
## Variable importance
## total_day_charge total_day_minutes
## 21 21
## number_customer_service_calls total_intl_calls
## 9 7
## total_eve_charge total_eve_minutes
## 7 7
## international_plan total_intl_charge
## 6 5
## total_intl_minutes number_vmail_messages
## 5 5
## voice_mail_plan
## 5
##
## Node number 1: 3500 observations, complexity param=0.1126482
## predicted class=no expected loss=0.1445714 P(node) =1
## class counts: 506 2994
## probabilities: 0.145 0.855
## left son=2 (237 obs) right son=3 (3263 obs)
## Primary splits:
## total_day_minutes < 264.45 to the right, improve=115.04350, (0 missing)
## total_day_charge < 44.96 to the right, improve=115.04350, (0 missing)
## number_customer_service_calls < 3.5 to the right, improve= 78.71580, (0 missing)
## international_plan splits as RL, improve= 58.13553, (0 missing)
## total_intl_minutes < 13.15 to the right, improve= 10.57256, (0 missing)
## Surrogate splits:
## total_day_charge < 44.96 to the right, agree=1, adj=1, (0 split)
##
## Node number 2: 237 observations, complexity param=0.08498024
## predicted class=yes expected loss=0.3797468 P(node) =0.06771429
## class counts: 147 90
## probabilities: 0.620 0.380
## left son=4 (180 obs) right son=5 (57 obs)
## Primary splits:
## voice_mail_plan splits as LR, improve=37.14265, (0 missing)
## number_vmail_messages < 6.5 to the left, improve=37.14265, (0 missing)
## total_eve_minutes < 150.35 to the right, improve=13.63137, (0 missing)
## total_eve_charge < 12.78 to the right, improve=13.63137, (0 missing)
## total_day_minutes < 285.5 to the right, improve=11.37306, (0 missing)
## Surrogate splits:
## number_vmail_messages < 6.5 to the left, agree=1.000, adj=1.000, (0 split)
## total_night_minutes < 110.3 to the right, agree=0.764, adj=0.018, (0 split)
## total_night_charge < 4.965 to the right, agree=0.764, adj=0.018, (0 split)
##
## Node number 3: 3263 observations, complexity param=0.07114625
## predicted class=no expected loss=0.1100215 P(node) =0.9322857
## class counts: 359 2904
## probabilities: 0.110 0.890
## left son=6 (254 obs) right son=7 (3009 obs)
## Primary splits:
## number_customer_service_calls < 3.5 to the right, improve=82.096880, (0 missing)
## international_plan splits as RL, improve=51.144010, (0 missing)
## total_day_minutes < 244.65 to the right, improve=13.914910, (0 missing)
## total_day_charge < 41.59 to the right, improve=13.914910, (0 missing)
## total_intl_minutes < 13.15 to the right, improve= 8.656326, (0 missing)
## Surrogate splits:
## total_day_calls < 159 to the right, agree=0.922, adj=0.004, (0 split)
##
## Node number 4: 180 observations, complexity param=0.03162055
## predicted class=yes expected loss=0.2222222 P(node) =0.05142857
## class counts: 140 40
## probabilities: 0.778 0.222
## left son=8 (150 obs) right son=9 (30 obs)
## Primary splits:
## total_eve_minutes < 150.35 to the right, improve=21.342220, (0 missing)
## total_eve_charge < 12.78 to the right, improve=21.342220, (0 missing)
## total_day_minutes < 285.5 to the right, improve= 6.400000, (0 missing)
## total_day_charge < 48.535 to the right, improve= 6.400000, (0 missing)
## total_night_minutes < 198.05 to the right, improve= 5.203805, (0 missing)
## Surrogate splits:
## total_eve_charge < 12.78 to the right, agree=1, adj=1, (0 split)
##
## Node number 5: 57 observations
## predicted class=no expected loss=0.122807 P(node) =0.01628571
## class counts: 7 50
## probabilities: 0.123 0.877
##
## Node number 6: 254 observations, complexity param=0.07114625
## predicted class=no expected loss=0.496063 P(node) =0.07257143
## class counts: 126 128
## probabilities: 0.496 0.504
## left son=12 (100 obs) right son=13 (154 obs)
## Primary splits:
## total_day_minutes < 162.7 to the left, improve=43.691350, (0 missing)
## total_day_charge < 27.66 to the left, improve=43.691350, (0 missing)
## total_eve_minutes < 141.45 to the left, improve= 9.909319, (0 missing)
## total_eve_charge < 12.025 to the left, improve= 9.909319, (0 missing)
## number_customer_service_calls < 4.5 to the right, improve= 5.993972, (0 missing)
## Surrogate splits:
## total_day_charge < 27.66 to the left, agree=1.000, adj=1.00, (0 split)
## total_eve_minutes < 118.6 to the left, agree=0.634, adj=0.07, (0 split)
## total_eve_charge < 10.08 to the left, agree=0.634, adj=0.07, (0 split)
## total_night_minutes < 84.05 to the left, agree=0.622, adj=0.04, (0 split)
## total_night_calls < 79.5 to the left, agree=0.622, adj=0.04, (0 split)
##
## Node number 7: 3009 observations, complexity param=0.04940711
## predicted class=no expected loss=0.07743436 P(node) =0.8597143
## class counts: 233 2776
## probabilities: 0.077 0.923
## left son=14 (278 obs) right son=15 (2731 obs)
## Primary splits:
## international_plan splits as RL, improve=52.61575, (0 missing)
## total_day_minutes < 244.65 to the right, improve=18.19842, (0 missing)
## total_day_charge < 41.59 to the right, improve=18.19842, (0 missing)
## total_eve_minutes < 249.95 to the right, improve=10.63770, (0 missing)
## total_eve_charge < 21.245 to the right, improve=10.63770, (0 missing)
##
## Node number 8: 150 observations
## predicted class=yes expected loss=0.1133333 P(node) =0.04285714
## class counts: 133 17
## probabilities: 0.887 0.113
##
## Node number 9: 30 observations
## predicted class=no expected loss=0.2333333 P(node) =0.008571429
## class counts: 7 23
## probabilities: 0.233 0.767
##
## Node number 12: 100 observations
## predicted class=yes expected loss=0.14 P(node) =0.02857143
## class counts: 86 14
## probabilities: 0.860 0.140
##
## Node number 13: 154 observations, complexity param=0.01976285
## predicted class=no expected loss=0.2597403 P(node) =0.044
## class counts: 40 114
## probabilities: 0.260 0.740
## left son=26 (20 obs) right son=27 (134 obs)
## Primary splits:
## total_eve_minutes < 146.8 to the left, improve=11.049140, (0 missing)
## total_eve_charge < 12.48 to the left, improve=11.049140, (0 missing)
## total_night_calls < 118.5 to the right, improve= 5.577768, (0 missing)
## total_day_minutes < 180.8 to the left, improve= 3.187721, (0 missing)
## total_day_charge < 30.735 to the left, improve= 3.187721, (0 missing)
## Surrogate splits:
## total_eve_charge < 12.48 to the left, agree=1, adj=1, (0 split)
##
## Node number 14: 278 observations, complexity param=0.04940711
## predicted class=no expected loss=0.3705036 P(node) =0.07942857
## class counts: 103 175
## probabilities: 0.371 0.629
## left son=28 (50 obs) right son=29 (228 obs)
## Primary splits:
## total_intl_minutes < 13.05 to the right, improve=48.316610, (0 missing)
## total_intl_calls < 2.5 to the left, improve=48.316610, (0 missing)
## total_intl_charge < 3.525 to the right, improve=48.316610, (0 missing)
## total_day_minutes < 235.9 to the right, improve= 3.542388, (0 missing)
## total_day_charge < 40.1 to the right, improve= 3.542388, (0 missing)
## Surrogate splits:
## total_intl_charge < 3.525 to the right, agree=1.000, adj=1.00, (0 split)
## total_night_calls < 141.5 to the right, agree=0.824, adj=0.02, (0 split)
## total_intl_calls < 11.5 to the right, agree=0.824, adj=0.02, (0 split)
##
## Node number 15: 2731 observations, complexity param=0.01679842
## predicted class=no expected loss=0.04760161 P(node) =0.7802857
## class counts: 130 2601
## probabilities: 0.048 0.952
## left son=30 (148 obs) right son=31 (2583 obs)
## Primary splits:
## total_day_minutes < 244.65 to the right, improve=16.472980, (0 missing)
## total_day_charge < 41.59 to the right, improve=16.472980, (0 missing)
## total_eve_minutes < 249.15 to the right, improve= 9.194760, (0 missing)
## total_eve_charge < 21.175 to the right, improve= 9.194760, (0 missing)
## total_night_minutes < 233.15 to the right, improve= 2.900066, (0 missing)
## Surrogate splits:
## total_day_charge < 41.59 to the right, agree=1, adj=1, (0 split)
##
## Node number 26: 20 observations
## predicted class=yes expected loss=0.25 P(node) =0.005714286
## class counts: 15 5
## probabilities: 0.750 0.250
##
## Node number 27: 134 observations
## predicted class=no expected loss=0.1865672 P(node) =0.03828571
## class counts: 25 109
## probabilities: 0.187 0.813
##
## Node number 28: 50 observations
## predicted class=yes expected loss=0 P(node) =0.01428571
## class counts: 50 0
## probabilities: 1.000 0.000
##
## Node number 29: 228 observations, complexity param=0.04940711
## predicted class=no expected loss=0.2324561 P(node) =0.06514286
## class counts: 53 175
## probabilities: 0.232 0.768
## left son=58 (44 obs) right son=59 (184 obs)
## Primary splits:
## total_intl_calls < 2.5 to the left, improve=64.240080, (0 missing)
## total_day_minutes < 236.2 to the right, improve= 3.091144, (0 missing)
## total_day_charge < 40.155 to the right, improve= 3.091144, (0 missing)
## total_eve_minutes < 272.9 to the right, improve= 2.752684, (0 missing)
## total_eve_charge < 23.195 to the right, improve= 2.752684, (0 missing)
## Surrogate splits:
## total_day_calls < 48 to the left, agree=0.816, adj=0.045, (0 split)
##
## Node number 30: 148 observations, complexity param=0.01679842
## predicted class=no expected loss=0.277027 P(node) =0.04228571
## class counts: 41 107
## probabilities: 0.277 0.723
## left son=60 (33 obs) right son=61 (115 obs)
## Primary splits:
## total_eve_minutes < 243.5 to the right, improve=19.614750, (0 missing)
## total_eve_charge < 20.695 to the right, improve=19.614750, (0 missing)
## voice_mail_plan splits as LR, improve= 7.572072, (0 missing)
## number_vmail_messages < 5.5 to the left, improve= 7.572072, (0 missing)
## total_night_minutes < 224 to the right, improve= 7.071938, (0 missing)
## Surrogate splits:
## total_eve_charge < 20.695 to the right, agree=1.000, adj=1.000, (0 split)
## total_intl_calls < 8.5 to the right, agree=0.797, adj=0.091, (0 split)
##
## Node number 31: 2583 observations, complexity param=0.01054018
## predicted class=no expected loss=0.03445606 P(node) =0.738
## class counts: 89 2494
## probabilities: 0.034 0.966
## left son=62 (249 obs) right son=63 (2334 obs)
## Primary splits:
## total_eve_minutes < 266.05 to the right, improve=4.468302, (0 missing)
## total_eve_charge < 22.615 to the right, improve=4.468302, (0 missing)
## total_day_minutes < 220.85 to the right, improve=3.194997, (0 missing)
## total_day_charge < 37.545 to the right, improve=3.194997, (0 missing)
## total_night_minutes < 237.65 to the right, improve=1.415708, (0 missing)
## Surrogate splits:
## total_eve_charge < 22.615 to the right, agree=1, adj=1, (0 split)
##
## Node number 58: 44 observations
## predicted class=yes expected loss=0 P(node) =0.01257143
## class counts: 44 0
## probabilities: 1.000 0.000
##
## Node number 59: 184 observations
## predicted class=no expected loss=0.04891304 P(node) =0.05257143
## class counts: 9 175
## probabilities: 0.049 0.951
##
## Node number 60: 33 observations
## predicted class=yes expected loss=0.2424242 P(node) =0.009428571
## class counts: 25 8
## probabilities: 0.758 0.242
##
## Node number 61: 115 observations
## predicted class=no expected loss=0.1391304 P(node) =0.03285714
## class counts: 16 99
## probabilities: 0.139 0.861
##
## Node number 62: 249 observations, complexity param=0.01054018
## predicted class=no expected loss=0.124498 P(node) =0.07114286
## class counts: 31 218
## probabilities: 0.124 0.876
## left son=124 (38 obs) right son=125 (211 obs)
## Primary splits:
## total_day_minutes < 220.25 to the right, improve=18.522580, (0 missing)
## total_day_charge < 37.44 to the right, improve=18.522580, (0 missing)
## total_night_minutes < 231.8 to the right, improve= 6.237254, (0 missing)
## total_night_charge < 10.43 to the right, improve= 6.237254, (0 missing)
## voice_mail_plan splits as LR, improve= 2.147493, (0 missing)
## Surrogate splits:
## total_day_charge < 37.44 to the right, agree=1.000, adj=1.000, (0 split)
## total_intl_minutes < 2.75 to the left, agree=0.851, adj=0.026, (0 split)
## total_intl_charge < 0.745 to the left, agree=0.851, adj=0.026, (0 split)
##
## Node number 63: 2334 observations
## predicted class=no expected loss=0.02485004 P(node) =0.6668571
## class counts: 58 2276
## probabilities: 0.025 0.975
##
## Node number 124: 38 observations, complexity param=0.01054018
## predicted class=yes expected loss=0.4210526 P(node) =0.01085714
## class counts: 22 16
## probabilities: 0.579 0.421
## left son=248 (28 obs) right son=249 (10 obs)
## Primary splits:
## voice_mail_plan splits as LR, improve=9.097744, (0 missing)
## number_vmail_messages < 12 to the left, improve=9.097744, (0 missing)
## total_night_minutes < 216.4 to the right, improve=3.924043, (0 missing)
## total_night_charge < 9.74 to the right, improve=3.924043, (0 missing)
## total_day_calls < 102 to the left, improve=2.821700, (0 missing)
## Surrogate splits:
## number_vmail_messages < 12 to the left, agree=1.000, adj=1.0, (0 split)
## total_day_minutes < 239.4 to the left, agree=0.816, adj=0.3, (0 split)
## total_day_charge < 40.7 to the left, agree=0.816, adj=0.3, (0 split)
## total_eve_minutes < 310.8 to the left, agree=0.816, adj=0.3, (0 split)
## total_eve_charge < 26.42 to the left, agree=0.816, adj=0.3, (0 split)
##
## Node number 125: 211 observations
## predicted class=no expected loss=0.04265403 P(node) =0.06028571
## class counts: 9 202
## probabilities: 0.043 0.957
##
## Node number 248: 28 observations
## predicted class=yes expected loss=0.2142857 P(node) =0.008
## class counts: 22 6
## probabilities: 0.786 0.214
##
## Node number 249: 10 observations
## predicted class=no expected loss=0 P(node) =0.002857143
## class counts: 0 10
## probabilities: 0.000 1.000
rpart.plot(churn.rp)

printcp(churn.rp)
##
## Classification tree:
## rpart(formula = churn ~ ., data = trainset)
##
## Variables actually used in tree construction:
## [1] international_plan number_customer_service_calls
## [3] total_day_minutes total_eve_minutes
## [5] total_intl_calls total_intl_minutes
## [7] voice_mail_plan
##
## Root node error: 506/3500 = 0.14457
##
## n= 3500
##
## CP nsplit rel error xerror xstd
## 1 0.112648 0 1.00000 1.00000 0.041117
## 2 0.084980 1 0.88735 0.91897 0.039685
## 3 0.071146 2 0.80237 0.84585 0.038304
## 4 0.049407 4 0.66008 0.68577 0.034942
## 5 0.031621 7 0.47431 0.49012 0.030000
## 6 0.019763 8 0.44269 0.47233 0.029491
## 7 0.016798 9 0.42292 0.47036 0.029434
## 8 0.010540 11 0.38933 0.43676 0.028437
## 9 0.010000 14 0.35771 0.43874 0.028497
min_row = which.min(churn.rp$cptable[,"xerror"])
churn.cp = churn.rp$cptable[min_row, "CP"]
#將churn.cp設為臨界值來修剪樹
prune.tree=prune(churn.rp, cp=churn.cp)
rpart.plot(prune.tree)
library('caret')
## Loading required package: lattice
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 3.5.2

library('e1071')
predictions <-predict(prune.tree, testset, type='class')
table(predictions,testset$churn)
##
## predictions yes no
## yes 136 31
## no 65 1268
confusionMatrix(table(predictions, testset$churn))
## Confusion Matrix and Statistics
##
##
## predictions yes no
## yes 136 31
## no 65 1268
##
## Accuracy : 0.936
## 95% CI : (0.9224, 0.9479)
## No Information Rate : 0.866
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.703
## Mcnemar's Test P-Value : 0.000757
##
## Sensitivity : 0.67662
## Specificity : 0.97614
## Pos Pred Value : 0.81437
## Neg Pred Value : 0.95124
## Prevalence : 0.13400
## Detection Rate : 0.09067
## Detection Prevalence : 0.11133
## Balanced Accuracy : 0.82638
##
## 'Positive' Class : yes
##
use caret package
#install.packages("caret")
library(caret)
control=trainControl(method="repeatedcv", number=10, repeats=3)
model =train(churn~., data=trainset, method="rpart", trControl=control)
predictions = predict(model,testset,type='raw')
table(predictions,testset$churn)
##
## predictions yes no
## yes 41 21
## no 160 1278
confusionMatrix(table(predictions,testset$churn))
## Confusion Matrix and Statistics
##
##
## predictions yes no
## yes 41 21
## no 160 1278
##
## Accuracy : 0.8793
## 95% CI : (0.8618, 0.8954)
## No Information Rate : 0.866
## P-Value [Acc > NIR] : 0.06816
##
## Kappa : 0.2654
## Mcnemar's Test P-Value : < 2e-16
##
## Sensitivity : 0.20398
## Specificity : 0.98383
## Pos Pred Value : 0.66129
## Neg Pred Value : 0.88873
## Prevalence : 0.13400
## Detection Rate : 0.02733
## Detection Prevalence : 0.04133
## Balanced Accuracy : 0.59391
##
## 'Positive' Class : yes
##
caret 套件使用說明
# 查詢caret package 有實作的所有演算法
names(getModelInfo())
## [1] "ada" "AdaBag" "AdaBoost.M1"
## [4] "adaboost" "amdai" "ANFIS"
## [7] "avNNet" "awnb" "awtan"
## [10] "bag" "bagEarth" "bagEarthGCV"
## [13] "bagFDA" "bagFDAGCV" "bam"
## [16] "bartMachine" "bayesglm" "binda"
## [19] "blackboost" "blasso" "blassoAveraged"
## [22] "bridge" "brnn" "BstLm"
## [25] "bstSm" "bstTree" "C5.0"
## [28] "C5.0Cost" "C5.0Rules" "C5.0Tree"
## [31] "cforest" "chaid" "CSimca"
## [34] "ctree" "ctree2" "cubist"
## [37] "dda" "deepboost" "DENFIS"
## [40] "dnn" "dwdLinear" "dwdPoly"
## [43] "dwdRadial" "earth" "elm"
## [46] "enet" "evtree" "extraTrees"
## [49] "fda" "FH.GBML" "FIR.DM"
## [52] "foba" "FRBCS.CHI" "FRBCS.W"
## [55] "FS.HGD" "gam" "gamboost"
## [58] "gamLoess" "gamSpline" "gaussprLinear"
## [61] "gaussprPoly" "gaussprRadial" "gbm_h2o"
## [64] "gbm" "gcvEarth" "GFS.FR.MOGUL"
## [67] "GFS.LT.RS" "GFS.THRIFT" "glm.nb"
## [70] "glm" "glmboost" "glmnet_h2o"
## [73] "glmnet" "glmStepAIC" "gpls"
## [76] "hda" "hdda" "hdrda"
## [79] "HYFIS" "icr" "J48"
## [82] "JRip" "kernelpls" "kknn"
## [85] "knn" "krlsPoly" "krlsRadial"
## [88] "lars" "lars2" "lasso"
## [91] "lda" "lda2" "leapBackward"
## [94] "leapForward" "leapSeq" "Linda"
## [97] "lm" "lmStepAIC" "LMT"
## [100] "loclda" "logicBag" "LogitBoost"
## [103] "logreg" "lssvmLinear" "lssvmPoly"
## [106] "lssvmRadial" "lvq" "M5"
## [109] "M5Rules" "manb" "mda"
## [112] "Mlda" "mlp" "mlpKerasDecay"
## [115] "mlpKerasDecayCost" "mlpKerasDropout" "mlpKerasDropoutCost"
## [118] "mlpML" "mlpSGD" "mlpWeightDecay"
## [121] "mlpWeightDecayML" "monmlp" "msaenet"
## [124] "multinom" "mxnet" "mxnetAdam"
## [127] "naive_bayes" "nb" "nbDiscrete"
## [130] "nbSearch" "neuralnet" "nnet"
## [133] "nnls" "nodeHarvest" "null"
## [136] "OneR" "ordinalNet" "ORFlog"
## [139] "ORFpls" "ORFridge" "ORFsvm"
## [142] "ownn" "pam" "parRF"
## [145] "PART" "partDSA" "pcaNNet"
## [148] "pcr" "pda" "pda2"
## [151] "penalized" "PenalizedLDA" "plr"
## [154] "pls" "plsRglm" "polr"
## [157] "ppr" "PRIM" "protoclass"
## [160] "qda" "QdaCov" "qrf"
## [163] "qrnn" "randomGLM" "ranger"
## [166] "rbf" "rbfDDA" "Rborist"
## [169] "rda" "regLogistic" "relaxo"
## [172] "rf" "rFerns" "RFlda"
## [175] "rfRules" "ridge" "rlda"
## [178] "rlm" "rmda" "rocc"
## [181] "rotationForest" "rotationForestCp" "rpart"
## [184] "rpart1SE" "rpart2" "rpartCost"
## [187] "rpartScore" "rqlasso" "rqnc"
## [190] "RRF" "RRFglobal" "rrlda"
## [193] "RSimca" "rvmLinear" "rvmPoly"
## [196] "rvmRadial" "SBC" "sda"
## [199] "sdwd" "simpls" "SLAVE"
## [202] "slda" "smda" "snn"
## [205] "sparseLDA" "spikeslab" "spls"
## [208] "stepLDA" "stepQDA" "superpc"
## [211] "svmBoundrangeString" "svmExpoString" "svmLinear"
## [214] "svmLinear2" "svmLinear3" "svmLinearWeights"
## [217] "svmLinearWeights2" "svmPoly" "svmRadial"
## [220] "svmRadialCost" "svmRadialSigma" "svmRadialWeights"
## [223] "svmSpectrumString" "tan" "tanSearch"
## [226] "treebag" "vbmpRadial" "vglmAdjCat"
## [229] "vglmContRatio" "vglmCumulative" "widekernelpls"
## [232] "WM" "wsrf" "xgbDART"
## [235] "xgbLinear" "xgbTree" "xyf"
# 查詢caret package 有沒有實作rpart演算法
names(getModelInfo())[grep('rpart',names(getModelInfo()))]
## [1] "rpart" "rpart1SE" "rpart2" "rpartCost" "rpartScore"
# 查詢rpart model資訊
getModelInfo('rpart')
## $rpart
## $rpart$label
## [1] "CART"
##
## $rpart$library
## [1] "rpart"
##
## $rpart$type
## [1] "Regression" "Classification"
##
## $rpart$parameters
## parameter class label
## 1 cp numeric Complexity Parameter
##
## $rpart$grid
## function (x, y, len = NULL, search = "grid")
## {
## dat <- if (is.data.frame(x))
## x
## else as.data.frame(x)
## dat$.outcome <- y
## initialFit <- rpart::rpart(.outcome ~ ., data = dat, control = rpart::rpart.control(cp = 0))$cptable
## initialFit <- initialFit[order(-initialFit[, "CP"]), , drop = FALSE]
## if (search == "grid") {
## if (nrow(initialFit) < len) {
## tuneSeq <- data.frame(cp = seq(min(initialFit[, "CP"]),
## max(initialFit[, "CP"]), length = len))
## }
## else tuneSeq <- data.frame(cp = initialFit[1:len, "CP"])
## colnames(tuneSeq) <- "cp"
## }
## else {
## tuneSeq <- data.frame(cp = unique(sample(initialFit[,
## "CP"], size = len, replace = TRUE)))
## }
## tuneSeq
## }
##
## $rpart$loop
## function (grid)
## {
## grid <- grid[order(grid$cp, decreasing = FALSE), , drop = FALSE]
## loop <- grid[1, , drop = FALSE]
## submodels <- list(grid[-1, , drop = FALSE])
## list(loop = loop, submodels = submodels)
## }
##
## $rpart$fit
## function (x, y, wts, param, lev, last, classProbs, ...)
## {
## cpValue <- if (!last)
## param$cp
## else 0
## theDots <- list(...)
## if (any(names(theDots) == "control")) {
## theDots$control$cp <- cpValue
## theDots$control$xval <- 0
## ctl <- theDots$control
## theDots$control <- NULL
## }
## else ctl <- rpart::rpart.control(cp = cpValue, xval = 0)
## if (!is.null(wts))
## theDots$weights <- wts
## modelArgs <- c(list(formula = as.formula(".outcome ~ ."),
## data = if (is.data.frame(x)) x else as.data.frame(x),
## control = ctl), theDots)
## modelArgs$data$.outcome <- y
## out <- do.call(rpart::rpart, modelArgs)
## if (last)
## out <- rpart::prune.rpart(out, cp = param$cp)
## out
## }
##
## $rpart$predict
## function (modelFit, newdata, submodels = NULL)
## {
## if (!is.data.frame(newdata))
## newdata <- as.data.frame(newdata)
## pType <- if (modelFit$problemType == "Classification")
## "class"
## else "vector"
## out <- predict(modelFit, newdata, type = pType)
## if (!is.null(submodels)) {
## tmp <- vector(mode = "list", length = nrow(submodels) +
## 1)
## tmp[[1]] <- out
## for (j in seq(along = submodels$cp)) {
## prunedFit <- rpart::prune.rpart(modelFit, cp = submodels$cp[j])
## tmp[[j + 1]] <- predict(prunedFit, newdata, type = pType)
## }
## out <- tmp
## }
## out
## }
##
## $rpart$prob
## function (modelFit, newdata, submodels = NULL)
## {
## if (!is.data.frame(newdata))
## newdata <- as.data.frame(newdata)
## out <- predict(modelFit, newdata, type = "prob")
## if (!is.null(submodels)) {
## tmp <- vector(mode = "list", length = nrow(submodels) +
## 1)
## tmp[[1]] <- out
## for (j in seq(along = submodels$cp)) {
## prunedFit <- rpart::prune.rpart(modelFit, cp = submodels$cp[j])
## tmpProb <- predict(prunedFit, newdata, type = "prob")
## tmp[[j + 1]] <- as.data.frame(tmpProb[, modelFit$obsLevels,
## drop = FALSE])
## }
## out <- tmp
## }
## out
## }
##
## $rpart$predictors
## function (x, surrogate = TRUE, ...)
## {
## out <- as.character(x$frame$var)
## out <- out[!(out %in% c("<leaf>"))]
## if (surrogate) {
## splits <- x$splits
## splits <- splits[splits[, "adj"] > 0, ]
## out <- c(out, rownames(splits))
## }
## unique(out)
## }
##
## $rpart$varImp
## function (object, surrogates = FALSE, competes = TRUE, ...)
## {
## if (nrow(object$splits) > 0) {
## tmp <- rownames(object$splits)
## rownames(object$splits) <- 1:nrow(object$splits)
## splits <- data.frame(object$splits)
## splits$var <- tmp
## splits$type <- ""
## frame <- as.data.frame(object$frame)
## index <- 0
## for (i in 1:nrow(frame)) {
## if (frame$var[i] != "<leaf>") {
## index <- index + 1
## splits$type[index] <- "primary"
## if (frame$ncompete[i] > 0) {
## for (j in 1:frame$ncompete[i]) {
## index <- index + 1
## splits$type[index] <- "competing"
## }
## }
## if (frame$nsurrogate[i] > 0) {
## for (j in 1:frame$nsurrogate[i]) {
## index <- index + 1
## splits$type[index] <- "surrogate"
## }
## }
## }
## }
## splits$var <- factor(as.character(splits$var))
## if (!surrogates)
## splits <- subset(splits, type != "surrogate")
## if (!competes)
## splits <- subset(splits, type != "competing")
## out <- aggregate(splits$improve, list(Variable = splits$var),
## sum, na.rm = TRUE)
## }
## else {
## out <- data.frame(x = numeric(), Vaiable = character())
## }
## allVars <- colnames(attributes(object$terms)$factors)
## if (!all(allVars %in% out$Variable)) {
## missingVars <- allVars[!(allVars %in% out$Variable)]
## zeros <- data.frame(x = rep(0, length(missingVars)),
## Variable = missingVars)
## out <- rbind(out, zeros)
## }
## out2 <- data.frame(Overall = out$x)
## rownames(out2) <- out$Variable
## out2
## }
##
## $rpart$levels
## function (x)
## x$obsLevels
##
## $rpart$trim
## function (x)
## {
## x$call <- list(na.action = (x$call)$na.action)
## x$x <- NULL
## x$y <- NULL
## x$where <- NULL
## x
## }
##
## $rpart$tags
## [1] "Tree-Based Model" "Implicit Feature Selection"
## [3] "Handle Missing Predictor Data" "Accepts Case Weights"
##
## $rpart$sort
## function (x)
## x[order(x[, 1], decreasing = TRUE), ]
##
##
## $rpart1SE
## $rpart1SE$label
## [1] "CART"
##
## $rpart1SE$library
## [1] "rpart"
##
## $rpart1SE$type
## [1] "Regression" "Classification"
##
## $rpart1SE$parameters
## parameter class label
## 1 parameter character parameter
##
## $rpart1SE$grid
## function (x, y, len = NULL, search = "grid")
## data.frame(parameter = "none")
##
## $rpart1SE$loop
## NULL
##
## $rpart1SE$fit
## function (x, y, wts, param, lev, last, classProbs, ...)
## {
## dat <- if (is.data.frame(x))
## x
## else as.data.frame(x)
## dat$.outcome <- y
## if (!is.null(wts)) {
## out <- rpart::rpart(.outcome ~ ., data = dat, ...)
## }
## else {
## out <- rpart::rpart(.outcome ~ ., data = dat, weights = wts,
## ...)
## }
## out
## }
##
## $rpart1SE$predict
## function (modelFit, newdata, submodels = NULL)
## {
## if (!is.data.frame(newdata))
## newdata <- as.data.frame(newdata)
## out <- if (modelFit$problemType == "Classification")
## predict(modelFit, newdata, type = "class")
## else predict(modelFit, newdata)
## out
## }
##
## $rpart1SE$prob
## function (modelFit, newdata, submodels = NULL)
## {
## if (!is.data.frame(newdata))
## newdata <- as.data.frame(newdata)
## predict(modelFit, newdata, type = "prob")
## }
##
## $rpart1SE$predictors
## function (x, surrogate = TRUE, ...)
## {
## out <- as.character(x$frame$var)
## out <- out[!(out %in% c("<leaf>"))]
## if (surrogate) {
## splits <- x$splits
## splits <- splits[splits[, "adj"] > 0, ]
## out <- c(out, rownames(splits))
## }
## unique(out)
## }
##
## $rpart1SE$varImp
## function (object, surrogates = FALSE, competes = TRUE, ...)
## {
## tmp <- rownames(object$splits)
## rownames(object$splits) <- 1:nrow(object$splits)
## splits <- data.frame(object$splits)
## splits$var <- tmp
## splits$type <- ""
## frame <- as.data.frame(object$frame)
## index <- 0
## for (i in 1:nrow(frame)) {
## if (frame$var[i] != "<leaf>") {
## index <- index + 1
## splits$type[index] <- "primary"
## if (frame$ncompete[i] > 0) {
## for (j in 1:frame$ncompete[i]) {
## index <- index + 1
## splits$type[index] <- "competing"
## }
## }
## if (frame$nsurrogate[i] > 0) {
## for (j in 1:frame$nsurrogate[i]) {
## index <- index + 1
## splits$type[index] <- "surrogate"
## }
## }
## }
## }
## splits$var <- factor(as.character(splits$var))
## if (!surrogates)
## splits <- subset(splits, type != "surrogate")
## if (!competes)
## splits <- subset(splits, type != "competing")
## out <- aggregate(splits$improve, list(Variable = splits$var),
## sum, na.rm = TRUE)
## allVars <- colnames(attributes(object$terms)$factors)
## if (!all(allVars %in% out$Variable)) {
## missingVars <- allVars[!(allVars %in% out$Variable)]
## zeros <- data.frame(x = rep(0, length(missingVars)),
## Variable = missingVars)
## out <- rbind(out, zeros)
## }
## out2 <- data.frame(Overall = out$x)
## rownames(out2) <- out$Variable
## out2
## }
##
## $rpart1SE$levels
## function (x)
## x$obsLevels
##
## $rpart1SE$trim
## function (x)
## {
## x$call <- list(na.action = (x$call)$na.action)
## x$x <- NULL
## x$y <- NULL
## x$where <- NULL
## x
## }
##
## $rpart1SE$notes
## [1] "This CART model replicates the same process used by the `rpart` function where the model complexity is determined using the one-standard error method. This procedure is replicated inside of the resampling done by `train` so that an external resampling estimate can be obtained."
##
## $rpart1SE$tags
## [1] "Tree-Based Model" "Implicit Feature Selection"
## [3] "Handle Missing Predictor Data" "Accepts Case Weights"
##
## $rpart1SE$sort
## function (x)
## x[order(x[, 1], decreasing = TRUE), ]
##
##
## $rpart2
## $rpart2$label
## [1] "CART"
##
## $rpart2$library
## [1] "rpart"
##
## $rpart2$type
## [1] "Regression" "Classification"
##
## $rpart2$parameters
## parameter class label
## 1 maxdepth numeric Max Tree Depth
##
## $rpart2$grid
## function (x, y, len = NULL, search = "grid")
## {
## dat <- if (is.data.frame(x))
## x
## else as.data.frame(x)
## dat$.outcome <- y
## initialFit <- rpart::rpart(.outcome ~ ., data = dat, control = rpart::rpart.control(cp = 0))$cptable
## initialFit <- initialFit[order(-initialFit[, "CP"]), "nsplit",
## drop = FALSE]
## initialFit <- initialFit[initialFit[, "nsplit"] > 0 & initialFit[,
## "nsplit"] <= 30, , drop = FALSE]
## if (search == "grid") {
## if (dim(initialFit)[1] < len) {
## cat("note: only", nrow(initialFit), "possible values of the max tree depth from the initial fit.\n",
## "Truncating the grid to", nrow(initialFit), ".\n\n")
## tuneSeq <- as.data.frame(initialFit)
## }
## else tuneSeq <- as.data.frame(initialFit[1:len, ])
## colnames(tuneSeq) <- "maxdepth"
## }
## else {
## tuneSeq <- data.frame(maxdepth = unique(sample(as.vector(initialFit[,
## 1]), size = len, replace = TRUE)))
## }
## tuneSeq
## }
##
## $rpart2$loop
## function (grid)
## {
## grid <- grid[order(grid$maxdepth, decreasing = TRUE), , drop = FALSE]
## loop <- grid[1, , drop = FALSE]
## submodels <- list(grid[-1, , drop = FALSE])
## list(loop = loop, submodels = submodels)
## }
##
## $rpart2$fit
## function (x, y, wts, param, lev, last, classProbs, ...)
## {
## theDots <- list(...)
## if (any(names(theDots) == "control")) {
## theDots$control$maxdepth <- param$maxdepth
## theDots$control$xval <- 0
## ctl <- theDots$control
## theDots$control <- NULL
## }
## else ctl <- rpart::rpart.control(maxdepth = param$maxdepth,
## xval = 0)
## if (!is.null(wts))
## theDots$weights <- wts
## modelArgs <- c(list(formula = as.formula(".outcome ~ ."),
## data = if (is.data.frame(x)) x else as.data.frame(x),
## control = ctl), theDots)
## modelArgs$data$.outcome <- y
## out <- do.call(rpart::rpart, modelArgs)
## out
## }
##
## $rpart2$predict
## function (modelFit, newdata, submodels = NULL)
## {
## depth2cp <- function(x, depth) {
## out <- approx(x[, "nsplit"], x[, "CP"], depth)$y
## out[depth > max(x[, "nsplit"])] <- min(x[, "CP"]) * 0.99
## out
## }
## if (!is.data.frame(newdata))
## newdata <- as.data.frame(newdata)
## pType <- if (modelFit$problemType == "Classification")
## "class"
## else "vector"
## out <- predict(modelFit, newdata, type = pType)
## if (!is.null(submodels)) {
## tmp <- vector(mode = "list", length = nrow(submodels) +
## 1)
## tmp[[1]] <- out
## cpValues <- depth2cp(modelFit$cptable, submodels$maxdepth)
## for (j in seq(along = cpValues)) {
## prunedFit <- rpart::prune.rpart(modelFit, cp = cpValues[j])
## tmp[[j + 1]] <- predict(prunedFit, newdata, type = pType)
## }
## out <- tmp
## }
## out
## }
##
## $rpart2$prob
## function (modelFit, newdata, submodels = NULL)
## {
## depth2cp <- function(x, depth) {
## out <- approx(x[, "nsplit"], x[, "CP"], depth)$y
## out[depth > max(x[, "nsplit"])] <- min(x[, "CP"]) * 0.99
## out
## }
## if (!is.data.frame(newdata))
## newdata <- as.data.frame(newdata)
## out <- predict(modelFit, newdata, type = "prob")
## if (!is.null(submodels)) {
## tmp <- vector(mode = "list", length = nrow(submodels) +
## 1)
## tmp[[1]] <- out
## cpValues <- depth2cp(modelFit$cptable, submodels$maxdepth)
## for (j in seq(along = cpValues)) {
## prunedFit <- rpart::prune.rpart(modelFit, cp = cpValues[j])
## tmpProb <- predict(prunedFit, newdata, type = "prob")
## tmp[[j + 1]] <- as.data.frame(tmpProb[, modelFit$obsLevels,
## drop = FALSE])
## }
## out <- tmp
## }
## out
## }
##
## $rpart2$predictors
## function (x, surrogate = TRUE, ...)
## {
## out <- as.character(x$frame$var)
## out <- out[!(out %in% c("<leaf>"))]
## if (surrogate) {
## splits <- x$splits
## splits <- splits[splits[, "adj"] > 0, ]
## out <- c(out, rownames(splits))
## }
## unique(out)
## }
##
## $rpart2$varImp
## function (object, surrogates = FALSE, competes = TRUE, ...)
## {
## tmp <- rownames(object$splits)
## rownames(object$splits) <- 1:nrow(object$splits)
## splits <- data.frame(object$splits)
## splits$var <- tmp
## splits$type <- ""
## frame <- as.data.frame(object$frame)
## index <- 0
## for (i in 1:nrow(frame)) {
## if (frame$var[i] != "<leaf>") {
## index <- index + 1
## splits$type[index] <- "primary"
## if (frame$ncompete[i] > 0) {
## for (j in 1:frame$ncompete[i]) {
## index <- index + 1
## splits$type[index] <- "competing"
## }
## }
## if (frame$nsurrogate[i] > 0) {
## for (j in 1:frame$nsurrogate[i]) {
## index <- index + 1
## splits$type[index] <- "surrogate"
## }
## }
## }
## }
## splits$var <- factor(as.character(splits$var))
## if (!surrogates)
## splits <- subset(splits, type != "surrogate")
## if (!competes)
## splits <- subset(splits, type != "competing")
## out <- aggregate(splits$improve, list(Variable = splits$var),
## sum, na.rm = TRUE)
## allVars <- colnames(attributes(object$terms)$factors)
## if (!all(allVars %in% out$Variable)) {
## missingVars <- allVars[!(allVars %in% out$Variable)]
## zeros <- data.frame(x = rep(0, length(missingVars)),
## Variable = missingVars)
## out <- rbind(out, zeros)
## }
## out2 <- data.frame(Overall = out$x)
## rownames(out2) <- out$Variable
## out2
## }
##
## $rpart2$levels
## function (x)
## x$obsLevels
##
## $rpart2$trim
## function (x)
## {
## x$call <- list(na.action = (x$call)$na.action)
## x$x <- NULL
## x$y <- NULL
## x$where <- NULL
## x
## }
##
## $rpart2$tags
## [1] "Tree-Based Model" "Implicit Feature Selection"
## [3] "Handle Missing Predictor Data" "Accepts Case Weights"
##
## $rpart2$sort
## function (x)
## x[order(x[, 1]), ]
##
##
## $rpartCost
## $rpartCost$label
## [1] "Cost-Sensitive CART"
##
## $rpartCost$library
## [1] "rpart" "plyr"
##
## $rpartCost$type
## [1] "Classification"
##
## $rpartCost$parameters
## parameter class label
## 1 cp numeric Complexity Parameter
## 2 Cost numeric Cost
##
## $rpartCost$grid
## function (x, y, len = NULL, search = "grid")
## {
## dat <- if (is.data.frame(x))
## x
## else as.data.frame(x)
## dat$.outcome <- y
## initialFit <- rpart::rpart(.outcome ~ ., data = dat, control = rpart::rpart.control(cp = 0))$cptable
## initialFit <- initialFit[order(-initialFit[, "CP"]), , drop = FALSE]
## if (search == "grid") {
## if (nrow(initialFit) < len) {
## tuneSeq <- expand.grid(cp = seq(min(initialFit[,
## "CP"]), max(initialFit[, "CP"]), length = len),
## Cost = 1:len)
## }
## else tuneSeq <- data.frame(cp = initialFit[1:len, "CP"],
## Cost = 1:len)
## colnames(tuneSeq) <- c("cp", "Cost")
## }
## else {
## tuneSeq <- data.frame(cp = 10^runif(len, min = -8, max = -1),
## Cost = runif(len, min = 1, max = 30))
## }
## tuneSeq
## }
##
## $rpartCost$loop
## function (grid)
## {
## loop <- plyr::ddply(grid, plyr::.(Cost), function(x) c(cp = min(x$cp)))
## submodels <- vector(mode = "list", length = nrow(loop))
## for (i in seq(along = submodels)) {
## larger_cp <- subset(grid, subset = Cost == loop$Cost[i] &
## cp > loop$cp[i])
## submodels[[i]] <- data.frame(cp = sort(larger_cp$cp))
## }
## list(loop = loop, submodels = submodels)
## }
##
## $rpartCost$fit
## function (x, y, wts, param, lev, last, classProbs, ...)
## {
## theDots <- list(...)
## if (any(names(theDots) == "control")) {
## theDots$control$cp <- param$cp
## theDots$control$xval <- 0
## ctl <- theDots$control
## theDots$control <- NULL
## }
## else ctl <- rpart::rpart.control(cp = param$cp, xval = 0)
## lmat <- matrix(c(0, 1, param$Cost, 0), ncol = 2)
## rownames(lmat) <- colnames(lmat) <- levels(y)
## if (any(names(theDots) == "parms")) {
## theDots$parms$loss <- lmat
## }
## else parms <- list(loss = lmat)
## if (!is.null(wts))
## theDots$weights <- wts
## modelArgs <- c(list(formula = as.formula(".outcome ~ ."),
## data = if (is.data.frame(x)) x else as.data.frame(x),
## parms = parms, control = ctl), theDots)
## modelArgs$data$.outcome <- y
## out <- do.call(rpart::rpart, modelArgs)
## out
## }
##
## $rpartCost$predict
## function (modelFit, newdata, submodels = NULL)
## {
## if (!is.data.frame(newdata))
## newdata <- as.data.frame(newdata)
## pType <- if (modelFit$problemType == "Classification")
## "class"
## else "vector"
## out <- predict(modelFit, newdata, type = pType)
## if (!is.null(submodels)) {
## tmp <- vector(mode = "list", length = nrow(submodels) +
## 1)
## tmp[[1]] <- out
## for (j in seq(along = submodels$cp)) {
## prunedFit <- rpart::prune.rpart(modelFit, cp = submodels$cp[j])
## tmp[[j + 1]] <- predict(prunedFit, newdata, type = pType)
## }
## out <- tmp
## }
## out
## }
##
## $rpartCost$levels
## function (x)
## x$obsLevels
##
## $rpartCost$prob
## NULL
##
## $rpartCost$tags
## [1] "Tree-Based Model" "Implicit Feature Selection"
## [3] "Cost Sensitive Learning" "Two Class Only"
## [5] "Handle Missing Predictor Data" "Accepts Case Weights"
##
## $rpartCost$sort
## function (x)
## x[order(-x$cp, -x$Cost), ]
##
##
## $rpartScore
## $rpartScore$label
## [1] "CART or Ordinal Responses"
##
## $rpartScore$library
## [1] "rpartScore" "plyr"
##
## $rpartScore$type
## [1] "Classification"
##
## $rpartScore$parameters
## parameter class label
## 1 cp numeric Complexity Parameter
## 2 split character Split Function
## 3 prune character Pruning Measure
##
## $rpartScore$grid
## function (x, y, len = NULL, search = "grid")
## {
## dat <- if (is.data.frame(x))
## x
## else as.data.frame(x)
## dat$.outcome <- y
## initialFit <- rpart::rpart(.outcome ~ ., data = dat, control = rpart::rpart.control(cp = 0))$cptable
## initialFit <- initialFit[order(-initialFit[, "CP"]), , drop = FALSE]
## if (search == "grid") {
## if (nrow(initialFit) < len) {
## tuneSeq <- expand.grid(cp = seq(min(initialFit[,
## "CP"]), max(initialFit[, "CP"]), length = len),
## split = c("abs", "quad"), prune = c("mr", "mc"))
## }
## else tuneSeq <- expand.grid(cp = initialFit[1:len, "CP"],
## split = c("abs", "quad"), prune = c("mr", "mc"))
## colnames(tuneSeq)[1] <- "cp"
## }
## else {
## tuneSeq <- expand.grid(cp = unique(sample(initialFit[,
## "CP"], size = len, replace = TRUE)), split = c("abs",
## "quad"), prune = c("mr", "mc"))
## }
## tuneSeq
## }
##
## $rpartScore$fit
## function (x, y, wts, param, lev, last, classProbs, ...)
## {
## cpValue <- if (!last)
## param$cp
## else 0
## theDots <- list(...)
## if (any(names(theDots) == "control")) {
## theDots$control$cp <- cpValue
## theDots$control$xval <- 0
## ctl <- theDots$control
## theDots$control <- NULL
## }
## else ctl <- rpart::rpart.control(cp = cpValue, xval = 0)
## if (!is.null(wts))
## theDots$weights <- wts
## modelArgs <- c(list(formula = as.formula(".outcome ~ ."),
## data = if (is.data.frame(x)) x else as.data.frame(x),
## split = as.character(param$split), prune = as.character(param$prune),
## control = ctl), theDots)
## modelArgs$data$.outcome <- as.numeric(y)
## out <- do.call(rpartScore::rpartScore, modelArgs)
## if (last)
## out <- rpart::prune.rpart(out, cp = param$cp)
## out
## }
##
## $rpartScore$predict
## function (modelFit, newdata, submodels = NULL)
## {
## if (!is.data.frame(newdata))
## newdata <- as.data.frame(newdata)
## out <- modelFit$obsLevels[predict(modelFit, newdata)]
## if (!is.null(submodels)) {
## tmp <- vector(mode = "list", length = nrow(submodels) +
## 1)
## tmp[[1]] <- out
## for (j in seq(along = submodels$cp)) {
## prunedFit <- rpart::prune.rpart(modelFit, cp = submodels$cp[j])
## tmp[[j + 1]] <- modelFit$obsLevels[predict(prunedFit,
## newdata)]
## }
## out <- tmp
## }
## out
## }
##
## $rpartScore$prob
## NULL
##
## $rpartScore$predictors
## function (x, surrogate = TRUE, ...)
## {
## out <- as.character(x$frame$var)
## out <- out[!(out %in% c("<leaf>"))]
## if (surrogate) {
## splits <- x$splits
## splits <- splits[splits[, "adj"] > 0, ]
## out <- c(out, rownames(splits))
## }
## unique(out)
## }
##
## $rpartScore$varImp
## function (object, surrogates = FALSE, competes = TRUE, ...)
## {
## allVars <- all.vars(object$terms)
## allVars <- allVars[allVars != ".outcome"]
## out <- data.frame(Overall = object$variable.importance, Variable = names(object$variable.importance))
## rownames(out) <- names(object$variable.importance)
## if (!all(allVars %in% out$Variable)) {
## missingVars <- allVars[!(allVars %in% out$Variable)]
## zeros <- data.frame(Overall = rep(0, length(missingVars)),
## Variable = missingVars)
## out <- rbind(out, zeros)
## }
## rownames(out) <- out$Variable
## out$Variable <- NULL
## out
## }
##
## $rpartScore$levels
## function (x)
## x$obsLevels
##
## $rpartScore$trim
## function (x)
## {
## x$call <- list(na.action = (x$call)$na.action)
## x$x <- NULL
## x$y <- NULL
## x$where <- NULL
## x
## }
##
## $rpartScore$tags
## [1] "Tree-Based Model" "Implicit Feature Selection"
## [3] "Handle Missing Predictor Data" "Accepts Case Weights"
## [5] "Ordinal Outcomes"
##
## $rpartScore$sort
## function (x)
## x[order(x[, 1], decreasing = TRUE), ]
# 查詢rpart model可以tune的parameters
getModelInfo('rpart')$rpart$parameters
## parameter class label
## 1 cp numeric Complexity Parameter
caret tune
control=trainControl(method="repeatedcv", number=10, repeats=3,summaryFunction = multiClassSummary,classProbs=T)
tune_funs = expand.grid(cp=seq(0,0.1,0.01))
model =train(churn~., data=trainset, method="rpart", trControl=control,tuneGrid=tune_funs,metric="AUC")
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info =
## trainInfo, : There were missing values in resampled performance measures.
model
## CART
##
## 3500 samples
## 16 predictor
## 2 classes: 'yes', 'no'
##
## No pre-processing
## Resampling: Cross-Validated (10 fold, repeated 3 times)
## Summary of sample sizes: 3150, 3149, 3149, 3150, 3151, 3151, ...
## Resampling results across tuning parameters:
##
## cp logLoss AUC prAUC Accuracy Kappa F1
## 0.00 0.2691721 0.9015393 0.7255436 0.9391339 0.7383540 0.7731842
## 0.01 0.2192485 0.8919731 0.6315904 0.9399951 0.7369638 0.7709636
## 0.02 0.2427428 0.8410676 0.4744059 0.9319036 0.6847413 0.7218977
## 0.03 0.2386386 0.8406813 0.4738924 0.9301885 0.6769397 0.7150530
## 0.04 0.2425569 0.8397226 0.4694517 0.9273317 0.6704385 0.7107356
## 0.05 0.2765850 0.7860229 0.3575481 0.9127553 0.5788310 0.6240974
## 0.06 0.3102768 0.7440674 0.2592558 0.9002845 0.5060621 0.5565002
## 0.07 0.3335187 0.6940869 0.2375028 0.8905761 0.4283913 0.4784603
## 0.08 0.3657227 0.6221009 0.1958430 0.8762890 0.3254161 0.3902122
## 0.09 0.3742809 0.6149336 0.1758562 0.8687646 0.2968036 0.3787813
## 0.10 0.3797777 0.6009359 0.1233326 0.8665749 0.2593330 0.3722692
## Sensitivity Specificity Pos_Pred_Value Neg_Pred_Value Precision
## 0.7227059 0.9757250 0.8346950 0.9542660 0.8346950
## 0.7016863 0.9802921 0.8599284 0.9511591 0.8599284
## 0.6167712 0.9851929 0.8800489 0.9384283 0.8800489
## 0.6115033 0.9840784 0.8703594 0.9375635 0.8703594
## 0.6226013 0.9788458 0.8357364 0.9389420 0.8357364
## 0.5206667 0.9790687 0.8072794 0.9239527 0.8072794
## 0.4389020 0.9782910 0.7825965 0.9117722 0.7825965
## 0.3634118 0.9797388 0.7606634 0.9014060 0.7606634
## 0.2675294 0.9791828 0.7007252 0.8879489 0.7007252
## 0.2597516 0.9717261 0.6110112 0.8861508 0.6110112
## 0.2272549 0.9746169 0.6001664 0.8822213 0.6001664
## Recall Detection_Rate Balanced_Accuracy
## 0.7227059 0.10446902 0.8492154
## 0.7016863 0.10142684 0.8409892
## 0.6167712 0.08914433 0.8009821
## 0.6115033 0.08838270 0.7977908
## 0.6226013 0.08999903 0.8007235
## 0.5206667 0.07523218 0.7498677
## 0.4389020 0.06342803 0.7085965
## 0.3634118 0.05248187 0.6715753
## 0.2675294 0.03866656 0.6233561
## 0.2597516 0.03752398 0.6157389
## 0.2272549 0.03285704 0.6009359
##
## AUC was used to select the optimal model using the largest value.
## The final value used for the model was cp = 0.
predictions = predict(model, testset)
confusionMatrix(table(predictions,testset$churn))
## Confusion Matrix and Statistics
##
##
## predictions yes no
## yes 146 30
## no 55 1269
##
## Accuracy : 0.9433
## 95% CI : (0.9304, 0.9545)
## No Information Rate : 0.866
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.7423
## Mcnemar's Test P-Value : 0.009237
##
## Sensitivity : 0.72637
## Specificity : 0.97691
## Pos Pred Value : 0.82955
## Neg Pred Value : 0.95846
## Prevalence : 0.13400
## Detection Rate : 0.09733
## Detection Prevalence : 0.11733
## Balanced Accuracy : 0.85164
##
## 'Positive' Class : yes
##
find importance variable
library('caret')
importance = varImp(model, scale=T)
importance
## rpart variable importance
##
## Overall
## total_day_minutes 100.000
## total_day_charge 94.193
## number_customer_service_calls 56.713
## international_planyes 56.381
## total_eve_minutes 41.542
## total_eve_charge 41.227
## total_intl_calls 38.132
## total_intl_minutes 23.906
## total_night_minutes 22.395
## voice_mail_planyes 18.501
## number_vmail_messages 18.165
## total_intl_charge 15.528
## total_night_charge 14.196
## total_night_calls 3.477
## total_day_calls 2.744
## total_eve_calls 0.000
plot(importance)

ROC
#install.packages("ROCR")
library(ROCR)
## Loading required package: gplots
##
## Attaching package: 'gplots'
## The following object is masked from 'package:stats':
##
## lowess
predictions <-predict(model, testset, type="prob")
head(predictions)
## yes no
## 1 0.041666667 0.9583333
## 5 0.048913043 0.9510870
## 6 0.048913043 0.9510870
## 14 0.008474576 0.9915254
## 15 0.285714286 0.7142857
## 21 0.005235602 0.9947644
pred.to.roc<-predictions[, "yes"]
head(pred.to.roc)
## [1] 0.041666667 0.048913043 0.048913043 0.008474576 0.285714286 0.005235602
pred.rocr<-prediction(pred.to.roc, testset$churn)
pred.rocr
## An object of class "prediction"
## Slot "predictions":
## [[1]]
## [1] 0.041666667 0.048913043 0.048913043 0.008474576 0.285714286
## [6] 0.005235602 0.042654028 0.011958146 0.014326648 0.014326648
## [11] 0.042654028 0.011958146 1.000000000 0.042654028 0.014326648
## [16] 0.011958146 0.011958146 0.071428571 0.042654028 0.008474576
## [21] 0.008474576 0.012500000 0.014326648 0.038961039 0.011958146
## [26] 0.048913043 0.005235602 0.903225806 0.014326648 0.014326648
## [31] 0.011958146 0.014326648 0.116504854 0.903225806 0.011958146
## [36] 0.042654028 0.011958146 0.014326648 0.014326648 0.011958146
## [41] 0.903225806 0.000000000 0.011958146 0.012500000 0.116504854
## [46] 0.074380165 0.014326648 0.970873786 0.014326648 0.750000000
## [51] 0.042654028 0.008474576 0.014326648 0.005235602 0.014326648
## [56] 0.014326648 0.012500000 0.074380165 0.011958146 0.005235602
## [61] 0.042654028 0.074380165 0.011958146 0.014326648 0.014326648
## [66] 0.042654028 0.008474576 0.005235602 0.011958146 0.011958146
## [71] 0.014326648 0.048913043 0.011958146 0.903225806 0.074380165
## [76] 0.014326648 0.012500000 0.263157895 0.048913043 0.005235602
## [81] 0.038961039 0.090909091 0.005235602 0.011958146 0.014326648
## [86] 0.014326648 0.014326648 0.970873786 0.142857143 0.011958146
## [91] 0.000000000 0.011958146 0.116504854 0.011958146 0.008474576
## [96] 0.970873786 0.970873786 0.042654028 0.042654028 0.044776119
## [101] 0.014326648 0.044776119 0.011958146 0.071428571 0.042654028
## [106] 0.011958146 0.903225806 0.014326648 0.005235602 0.014326648
## [111] 0.044776119 0.005235602 0.042654028 0.005235602 1.000000000
## [116] 0.011958146 0.008474576 0.042654028 0.042654028 0.014326648
## [121] 0.903225806 0.048913043 0.014326648 0.014326648 0.038961039
## [126] 0.014326648 1.000000000 1.000000000 0.014326648 0.014326648
## [131] 0.042654028 0.038961039 0.011958146 0.011958146 0.011958146
## [136] 0.041666667 0.011958146 0.041666667 0.038961039 0.014326648
## [141] 0.846153846 0.005235602 0.012500000 0.008474576 0.011958146
## [146] 0.014326648 0.011958146 0.014326648 0.048913043 1.000000000
## [151] 0.042654028 1.000000000 0.012500000 0.014326648 0.014326648
## [156] 0.014326648 0.011958146 0.071428571 0.048913043 0.014326648
## [161] 0.014326648 0.074380165 0.116504854 0.008474576 0.014326648
## [166] 0.042654028 0.000000000 0.041666667 0.011958146 0.005235602
## [171] 0.042654028 0.042654028 0.011958146 0.014326648 0.042654028
## [176] 0.011958146 0.090909091 0.008474576 1.000000000 0.044776119
## [181] 0.005235602 0.014326648 0.008474576 0.903225806 1.000000000
## [186] 0.014326648 0.005235602 0.014326648 0.000000000 0.142857143
## [191] 0.011958146 0.014326648 0.970873786 0.074380165 0.011958146
## [196] 0.041666667 0.005235602 0.014326648 0.005235602 0.011958146
## [201] 0.285714286 0.014326648 0.011958146 0.048913043 0.005235602
## [206] 0.011958146 0.048913043 0.005235602 0.042654028 0.005235602
## [211] 0.011958146 0.011958146 0.454545455 0.048913043 0.005235602
## [216] 0.005235602 0.041666667 0.048913043 0.014326648 0.014326648
## [221] 1.000000000 0.005235602 0.008474576 0.011958146 0.011958146
## [226] 0.048913043 0.048913043 0.133333333 0.048913043 0.012500000
## [231] 0.014326648 0.005235602 0.011958146 1.000000000 0.042654028
## [236] 0.115384615 0.011958146 0.005235602 0.011958146 0.011958146
## [241] 1.000000000 0.048913043 0.005235602 0.042654028 0.116504854
## [246] 0.014326648 0.011958146 0.008474576 0.005235602 0.038961039
## [251] 0.571428571 0.666666667 0.042654028 0.005235602 0.750000000
## [256] 0.008474576 0.133333333 0.041666667 0.042654028 0.044776119
## [261] 0.005235602 0.005235602 0.048913043 0.011958146 0.011958146
## [266] 0.048913043 0.263157895 0.011958146 0.750000000 0.000000000
## [271] 0.011958146 0.042654028 0.071428571 0.005235602 0.042654028
## [276] 0.011958146 0.042654028 0.011958146 0.846153846 0.048913043
## [281] 0.011958146 0.042654028 0.011958146 0.012500000 0.071428571
## [286] 0.071428571 0.008474576 0.044776119 0.000000000 0.005235602
## [291] 0.012500000 0.014326648 0.005235602 0.000000000 0.011958146
## [296] 0.011958146 0.042654028 0.428571429 0.042654028 0.014326648
## [301] 0.011958146 0.014326648 0.011958146 0.014326648 0.014326648
## [306] 0.048913043 1.000000000 0.014326648 0.014326648 0.042654028
## [311] 0.038961039 0.071428571 0.014326648 0.011958146 0.005235602
## [316] 0.014326648 0.903225806 0.041666667 0.038961039 0.044776119
## [321] 0.011958146 0.005235602 0.014326648 0.970873786 0.041666667
## [326] 0.133333333 0.005235602 0.008474576 0.012500000 0.042654028
## [331] 0.042654028 0.044776119 0.011958146 0.005235602 0.005235602
## [336] 0.014326648 0.048913043 0.014326648 0.903225806 0.005235602
## [341] 0.142857143 0.048913043 0.014326648 0.011958146 0.008474576
## [346] 0.014326648 0.048913043 0.005235602 0.014326648 0.038961039
## [351] 0.011958146 0.014326648 0.005235602 0.014326648 0.005235602
## [356] 0.038961039 0.014326648 0.012500000 0.011958146 0.071428571
## [361] 0.005235602 0.011958146 0.011958146 0.116504854 0.008474576
## [366] 0.666666667 0.014326648 0.000000000 0.048913043 0.005235602
## [371] 0.263157895 0.042654028 0.014326648 0.014326648 0.074380165
## [376] 0.005235602 0.014326648 0.042654028 0.048913043 0.903225806
## [381] 0.011958146 0.454545455 0.042654028 0.116504854 0.014326648
## [386] 0.038961039 0.005235602 0.014326648 0.042654028 0.970873786
## [391] 0.970873786 0.044776119 0.008474576 0.074380165 0.011958146
## [396] 0.011958146 0.014326648 0.038961039 0.014326648 0.011958146
## [401] 0.005235602 0.005235602 0.038961039 0.903225806 0.000000000
## [406] 0.011958146 0.960000000 0.903225806 0.014326648 0.011958146
## [411] 0.041666667 0.014326648 0.011958146 0.011958146 0.008474576
## [416] 0.090909091 0.005235602 0.008474576 0.116504854 0.005235602
## [421] 0.011958146 0.116504854 0.014326648 0.116504854 0.008474576
## [426] 0.012500000 0.012500000 0.048913043 0.041666667 0.005235602
## [431] 0.005235602 0.044776119 0.005235602 0.263157895 0.014326648
## [436] 0.048913043 0.012500000 0.011958146 0.116504854 0.014326648
## [441] 0.071428571 0.008474576 0.014326648 1.000000000 0.042654028
## [446] 0.970873786 0.014326648 0.042654028 0.005235602 0.042654028
## [451] 0.042654028 0.048913043 0.011958146 0.042654028 0.011958146
## [456] 0.042654028 0.014326648 0.011958146 0.014326648 0.014326648
## [461] 0.005235602 0.005235602 0.011958146 0.833333333 0.014326648
## [466] 0.903225806 0.048913043 0.014326648 0.142857143 0.042654028
## [471] 0.014326648 0.011958146 0.011958146 0.014326648 0.012500000
## [476] 0.011958146 0.014326648 0.008474576 0.042654028 0.011958146
## [481] 0.014326648 0.014326648 0.042654028 0.005235602 0.454545455
## [486] 0.012500000 0.012500000 0.014326648 0.011958146 0.014326648
## [491] 0.011958146 0.042654028 0.074380165 0.970873786 0.014326648
## [496] 1.000000000 0.011958146 0.008474576 0.014326648 0.038961039
## [501] 0.903225806 0.014326648 0.011958146 0.011958146 0.042654028
## [506] 0.012500000 1.000000000 0.005235602 0.005235602 0.014326648
## [511] 0.011958146 0.011958146 0.000000000 0.903225806 0.903225806
## [516] 0.044776119 0.071428571 0.000000000 0.044776119 0.014326648
## [521] 0.044776119 0.970873786 0.071428571 0.038961039 1.000000000
## [526] 0.005235602 0.903225806 1.000000000 0.008474576 0.011958146
## [531] 0.005235602 0.011958146 0.005235602 0.074380165 0.011958146
## [536] 0.044776119 0.970873786 0.011958146 0.014326648 0.074380165
## [541] 0.014326648 0.005235602 0.090909091 0.014326648 0.011958146
## [546] 0.960000000 0.011958146 0.005235602 0.116504854 0.014326648
## [551] 0.011958146 0.042654028 0.846153846 0.048913043 0.903225806
## [556] 0.044776119 0.048913043 0.038961039 0.008474576 0.005235602
## [561] 0.048913043 0.011958146 0.014326648 0.005235602 1.000000000
## [566] 0.012500000 0.011958146 0.005235602 0.116504854 0.903225806
## [571] 0.014326648 0.011958146 0.048913043 0.846153846 0.970873786
## [576] 0.074380165 0.014326648 0.005235602 0.014326648 0.005235602
## [581] 0.903225806 0.011958146 0.454545455 0.014326648 0.014326648
## [586] 0.008474576 0.005235602 0.071428571 0.011958146 0.116504854
## [591] 0.042654028 0.008474576 0.903225806 0.071428571 0.750000000
## [596] 0.044776119 0.008474576 0.005235602 0.011958146 0.011958146
## [601] 0.014326648 0.014326648 0.090909091 0.014326648 0.014326648
## [606] 0.014326648 0.005235602 0.115384615 0.014326648 0.038961039
## [611] 0.005235602 0.014326648 0.014326648 0.011958146 0.903225806
## [616] 0.042654028 0.011958146 0.042654028 0.011958146 0.116504854
## [621] 0.014326648 0.014326648 0.014326648 0.014326648 0.014326648
## [626] 0.041666667 0.116504854 0.014326648 0.042654028 0.005235602
## [631] 0.011958146 0.014326648 0.014326648 0.042654028 0.014326648
## [636] 0.014326648 0.903225806 0.042654028 0.005235602 0.014326648
## [641] 0.014326648 0.014326648 0.012500000 0.263157895 0.833333333
## [646] 0.014326648 0.012500000 0.038961039 0.011958146 0.042654028
## [651] 0.750000000 0.011958146 0.011958146 0.041666667 0.454545455
## [656] 0.011958146 0.970873786 0.000000000 0.042654028 1.000000000
## [661] 0.005235602 1.000000000 0.008474576 1.000000000 0.116504854
## [666] 0.008474576 0.071428571 0.048913043 0.285714286 0.011958146
## [671] 0.042654028 0.014326648 0.903225806 0.750000000 0.014326648
## [676] 0.014326648 0.000000000 0.048913043 0.012500000 0.014326648
## [681] 0.005235602 0.014326648 0.263157895 0.011958146 0.014326648
## [686] 0.074380165 0.048913043 0.011958146 0.041666667 0.116504854
## [691] 0.014326648 0.005235602 0.014326648 0.074380165 0.005235602
## [696] 0.005235602 0.014326648 0.014326648 0.133333333 0.011958146
## [701] 0.011958146 0.005235602 0.038961039 0.044776119 0.903225806
## [706] 0.970873786 0.750000000 0.074380165 1.000000000 0.042654028
## [711] 0.011958146 0.090909091 0.970873786 0.042654028 0.005235602
## [716] 0.042654028 0.090909091 0.038961039 0.014326648 0.038961039
## [721] 0.005235602 0.011958146 0.011958146 0.005235602 0.200000000
## [726] 1.000000000 0.042654028 0.014326648 0.048913043 0.970873786
## [731] 0.014326648 0.011958146 0.011958146 0.011958146 0.042654028
## [736] 0.005235602 0.042654028 0.903225806 0.008474576 0.005235602
## [741] 0.011958146 0.005235602 0.014326648 0.005235602 0.011958146
## [746] 0.014326648 0.005235602 0.116504854 0.014326648 0.011958146
## [751] 0.014326648 0.014326648 0.048913043 0.005235602 0.011958146
## [756] 0.042654028 1.000000000 0.014326648 0.014326648 0.116504854
## [761] 0.903225806 0.011958146 0.115384615 0.042654028 0.074380165
## [766] 0.011958146 0.011958146 0.011958146 0.074380165 0.090909091
## [771] 0.014326648 0.005235602 0.044776119 0.014326648 0.011958146
## [776] 0.005235602 0.011958146 0.071428571 0.008474576 0.042654028
## [781] 0.005235602 0.014326648 0.042654028 0.116504854 0.011958146
## [786] 0.000000000 0.005235602 0.011958146 0.005235602 0.960000000
## [791] 0.960000000 0.903225806 0.000000000 0.011958146 0.014326648
## [796] 0.071428571 0.011958146 0.970873786 0.042654028 1.000000000
## [801] 0.014326648 0.454545455 0.071428571 0.011958146 0.005235602
## [806] 0.014326648 0.048913043 0.011958146 0.005235602 0.042654028
## [811] 0.014326648 0.014326648 0.008474576 0.042654028 0.048913043
## [816] 0.014326648 0.005235602 0.044776119 0.571428571 0.008474576
## [821] 0.071428571 1.000000000 0.014326648 0.011958146 0.014326648
## [826] 0.011958146 0.005235602 0.014326648 0.042654028 0.008474576
## [831] 0.042654028 0.048913043 0.846153846 0.044776119 0.011958146
## [836] 0.014326648 0.014326648 0.833333333 0.044776119 0.014326648
## [841] 0.005235602 0.011958146 0.014326648 0.014326648 0.011958146
## [846] 0.008474576 0.014326648 0.042654028 0.005235602 0.048913043
## [851] 0.042654028 0.005235602 0.903225806 0.115384615 1.000000000
## [856] 0.005235602 0.005235602 0.008474576 0.005235602 0.014326648
## [861] 1.000000000 0.038961039 0.048913043 0.042654028 0.014326648
## [866] 0.038961039 0.048913043 0.011958146 0.903225806 0.011958146
## [871] 0.014326648 0.014326648 0.014326648 0.970873786 0.012500000
## [876] 0.071428571 0.008474576 0.014326648 0.116504854 0.071428571
## [881] 1.000000000 0.014326648 0.042654028 0.005235602 0.970873786
## [886] 0.048913043 0.038961039 0.014326648 0.000000000 0.011958146
## [891] 0.038961039 0.014326648 0.970873786 0.011958146 0.014326648
## [896] 0.014326648 0.011958146 0.014326648 0.042654028 0.970873786
## [901] 0.005235602 0.970873786 0.074380165 0.008474576 0.011958146
## [906] 0.903225806 0.011958146 0.011958146 0.011958146 0.011958146
## [911] 0.005235602 0.011958146 0.005235602 0.014326648 1.000000000
## [916] 0.074380165 0.014326648 0.115384615 0.008474576 0.074380165
## [921] 0.116504854 0.014326648 0.011958146 0.011958146 0.116504854
## [926] 0.014326648 0.014326648 0.005235602 0.011958146 0.011958146
## [931] 0.074380165 0.903225806 0.011958146 0.038961039 0.014326648
## [936] 0.014326648 0.011958146 0.042654028 0.116504854 0.048913043
## [941] 0.042654028 0.005235602 0.014326648 0.011958146 0.005235602
## [946] 0.005235602 0.014326648 0.011958146 0.005235602 0.011958146
## [951] 0.012500000 0.071428571 0.011958146 0.846153846 0.903225806
## [956] 0.011958146 0.011958146 0.074380165 0.903225806 0.750000000
## [961] 1.000000000 0.583333333 0.903225806 0.005235602 0.042654028
## [966] 0.833333333 0.005235602 0.048913043 0.014326648 0.011958146
## [971] 0.005235602 0.011958146 0.970873786 0.005235602 1.000000000
## [976] 0.048913043 0.011958146 0.011958146 0.454545455 0.014326648
## [981] 0.014326648 0.011958146 0.011958146 0.011958146 0.115384615
## [986] 0.048913043 1.000000000 0.903225806 1.000000000 0.005235602
## [991] 0.042654028 0.960000000 1.000000000 0.011958146 0.011958146
## [996] 0.011958146 0.090909091 0.012500000 0.005235602 0.005235602
## [1001] 0.014326648 0.071428571 0.074380165 0.011958146 0.011958146
## [1006] 0.970873786 0.116504854 0.074380165 0.011958146 0.011958146
## [1011] 0.042654028 0.011958146 0.903225806 0.014326648 0.011958146
## [1016] 0.074380165 0.048913043 0.014326648 0.014326648 0.116504854
## [1021] 0.014326648 0.071428571 0.014326648 0.042654028 0.048913043
## [1026] 0.005235602 0.042654028 0.011958146 0.090909091 0.011958146
## [1031] 0.011958146 0.071428571 0.048913043 0.011958146 0.090909091
## [1036] 0.011958146 0.042654028 0.042654028 0.048913043 0.074380165
## [1041] 0.042654028 0.014326648 0.012500000 0.011958146 1.000000000
## [1046] 0.005235602 0.011958146 0.000000000 0.000000000 0.014326648
## [1051] 0.116504854 0.005235602 0.005235602 0.005235602 0.005235602
## [1056] 0.903225806 0.903225806 0.011958146 0.014326648 0.750000000
## [1061] 0.074380165 0.008474576 0.011958146 0.833333333 0.970873786
## [1066] 0.005235602 0.014326648 0.011958146 0.042654028 0.048913043
## [1071] 0.048913043 0.000000000 0.011958146 0.011958146 0.014326648
## [1076] 0.005235602 0.011958146 0.041666667 0.048913043 0.012500000
## [1081] 0.005235602 0.048913043 0.090909091 0.014326648 0.903225806
## [1086] 0.005235602 0.014326648 0.666666667 0.014326648 0.014326648
## [1091] 0.071428571 0.014326648 0.074380165 0.960000000 0.005235602
## [1096] 0.014326648 0.005235602 0.116504854 0.005235602 0.048913043
## [1101] 0.014326648 0.011958146 0.014326648 0.116504854 0.071428571
## [1106] 0.044776119 0.750000000 0.014326648 0.038961039 0.014326648
## [1111] 0.014326648 0.970873786 0.048913043 0.970873786 0.005235602
## [1116] 0.011958146 0.042654028 0.044776119 0.011958146 0.042654028
## [1121] 0.014326648 0.005235602 0.005235602 0.014326648 0.011958146
## [1126] 0.005235602 0.005235602 0.014326648 0.074380165 0.750000000
## [1131] 0.011958146 0.012500000 0.071428571 0.044776119 0.048913043
## [1136] 0.074380165 0.011958146 0.042654028 0.005235602 0.005235602
## [1141] 0.044776119 0.005235602 0.011958146 0.011958146 0.011958146
## [1146] 0.005235602 0.011958146 0.005235602 0.014326648 0.011958146
## [1151] 0.014326648 0.014326648 0.011958146 0.071428571 0.014326648
## [1156] 0.012500000 0.041666667 0.014326648 0.042654028 0.014326648
## [1161] 0.014326648 0.014326648 0.903225806 0.454545455 0.903225806
## [1166] 0.005235602 0.014326648 0.005235602 0.071428571 0.090909091
## [1171] 0.005235602 0.008474576 0.048913043 0.005235602 0.005235602
## [1176] 0.005235602 0.041666667 0.005235602 0.014326648 0.038961039
## [1181] 0.014326648 0.970873786 0.005235602 0.014326648 0.571428571
## [1186] 1.000000000 0.014326648 0.011958146 0.048913043 0.014326648
## [1191] 0.005235602 0.048913043 0.005235602 0.014326648 0.048913043
## [1196] 0.011958146 0.048913043 0.005235602 0.970873786 0.090909091
## [1201] 0.042654028 0.011958146 0.014326648 0.048913043 0.038961039
## [1206] 0.116504854 0.903225806 0.090909091 0.014326648 0.011958146
## [1211] 0.014326648 0.014326648 0.014326648 0.011958146 0.116504854
## [1216] 0.903225806 0.005235602 0.014326648 0.042654028 0.038961039
## [1221] 0.011958146 0.048913043 0.014326648 0.005235602 0.011958146
## [1226] 0.000000000 0.014326648 0.005235602 0.090909091 0.048913043
## [1231] 0.011958146 0.005235602 0.071428571 0.014326648 0.011958146
## [1236] 0.042654028 1.000000000 0.011958146 0.011958146 0.011958146
## [1241] 0.116504854 0.005235602 0.038961039 0.014326648 0.116504854
## [1246] 0.005235602 0.042654028 0.960000000 0.014326648 0.005235602
## [1251] 0.011958146 0.011958146 0.011958146 0.833333333 0.074380165
## [1256] 0.042654028 0.014326648 0.116504854 0.074380165 0.014326648
## [1261] 0.142857143 0.116504854 1.000000000 0.008474576 0.014326648
## [1266] 0.133333333 0.071428571 0.005235602 0.041666667 0.014326648
## [1271] 0.005235602 0.074380165 0.133333333 0.042654028 0.005235602
## [1276] 0.011958146 0.074380165 1.000000000 0.005235602 0.014326648
## [1281] 0.014326648 0.014326648 0.074380165 0.014326648 0.005235602
## [1286] 0.042654028 0.012500000 0.014326648 0.970873786 0.000000000
## [1291] 0.048913043 0.090909091 0.042654028 0.011958146 0.014326648
## [1296] 0.014326648 0.014326648 0.014326648 0.042654028 0.038961039
## [1301] 0.090909091 0.116504854 0.000000000 0.011958146 0.011958146
## [1306] 0.142857143 1.000000000 0.090909091 0.014326648 0.042654028
## [1311] 0.014326648 0.014326648 0.750000000 0.005235602 0.005235602
## [1316] 0.074380165 0.005235602 0.116504854 0.090909091 0.014326648
## [1321] 0.011958146 0.014326648 0.116504854 0.011958146 0.005235602
## [1326] 0.014326648 0.038961039 0.014326648 0.005235602 0.014326648
## [1331] 0.048913043 0.750000000 0.014326648 0.116504854 0.014326648
## [1336] 0.012500000 0.011958146 0.071428571 0.042654028 0.005235602
## [1341] 1.000000000 0.011958146 0.011958146 0.005235602 0.011958146
## [1346] 0.005235602 0.038961039 0.048913043 0.048913043 0.074380165
## [1351] 0.014326648 0.970873786 0.005235602 0.011958146 0.074380165
## [1356] 0.005235602 0.970873786 0.014326648 0.011958146 0.014326648
## [1361] 0.454545455 0.014326648 0.970873786 0.042654028 0.011958146
## [1366] 0.666666667 0.048913043 0.011958146 0.044776119 0.011958146
## [1371] 0.903225806 0.012500000 0.042654028 0.014326648 0.014326648
## [1376] 0.011958146 0.011958146 1.000000000 0.115384615 0.090909091
## [1381] 0.008474576 0.012500000 0.571428571 0.005235602 0.011958146
## [1386] 0.005235602 0.014326648 0.011958146 0.011958146 0.011958146
## [1391] 0.014326648 0.011958146 0.042654028 0.011958146 0.011958146
## [1396] 0.116504854 0.011958146 0.833333333 0.074380165 1.000000000
## [1401] 0.074380165 0.005235602 0.014326648 0.005235602 0.074380165
## [1406] 0.012500000 0.012500000 0.014326648 0.011958146 0.011958146
## [1411] 0.014326648 1.000000000 0.005235602 0.014326648 0.048913043
## [1416] 0.014326648 0.263157895 0.263157895 0.263157895 0.005235602
## [1421] 0.042654028 0.090909091 0.074380165 0.005235602 0.011958146
## [1426] 0.014326648 0.014326648 0.285714286 0.011958146 0.012500000
## [1431] 0.014326648 0.074380165 0.014326648 0.011958146 0.014326648
## [1436] 0.005235602 0.011958146 0.042654028 0.042654028 0.011958146
## [1441] 0.005235602 0.846153846 0.903225806 0.008474576 0.048913043
## [1446] 0.970873786 0.970873786 0.011958146 0.074380165 0.042654028
## [1451] 0.005235602 0.014326648 0.014326648 0.042654028 0.970873786
## [1456] 0.011958146 0.005235602 0.074380165 0.000000000 0.038961039
## [1461] 0.005235602 0.090909091 0.014326648 0.011958146 0.014326648
## [1466] 0.005235602 0.090909091 0.903225806 0.042654028 0.014326648
## [1471] 0.008474576 0.011958146 1.000000000 0.625000000 0.014326648
## [1476] 0.115384615 0.008474576 0.014326648 0.005235602 0.903225806
## [1481] 0.014326648 0.042654028 0.014326648 0.014326648 0.903225806
## [1486] 0.005235602 0.011958146 0.014326648 0.014326648 0.071428571
## [1491] 0.011958146 0.074380165 0.014326648 0.005235602 0.011958146
## [1496] 0.042654028 0.011958146 0.014326648 0.074380165 0.014326648
##
##
## Slot "labels":
## [[1]]
## [1] no no no no no no no no no no no no yes no no no no
## [18] no no no no no no no no no no yes no no yes no no no
## [35] no no no no no no yes no no no no no no no no no no
## [52] no no no no no no no no no no no no no no no no no
## [69] no no no no no yes no no no no no no no no no no no
## [86] no no yes no no no no no no no yes yes no no no no no
## [103] no no no yes yes no no no no no no no yes no no no no
## [120] no yes no no no no no yes yes no no no no no no no no
## [137] no no no no yes no no no no no no no no yes no yes no
## [154] no no no no no no no no no yes no no no no no no no
## [171] no yes no no no no no no yes no no no no no yes no no
## [188] no no no no no yes no no no no no no no no no no no
## [205] no no no no no no no no no no no no no no no no yes
## [222] no no no no no no no no no no no no yes no no no no
## [239] no no yes no no no no no yes no no no no no no no no
## [256] no no no no no no yes no no no no no no yes no no no
## [273] no no no no no no yes no no no no no no no yes no no
## [290] no no no no no no no no no no no no no no no no no
## [307] yes no no no no no no no no no yes no no no no no no
## [324] no no no no yes no no no no no no no no no no yes no
## [341] no no no no no no no no no no no no no no no no no
## [358] no no no no no yes no no no no no no no no no no no
## [375] no no no no no yes no yes no no no no no no no yes yes
## [392] no no no yes no no no no no no no no yes no no yes yes
## [409] no no no no no no no no no no no no no no no no no
## [426] no no no no no no no no no no no no no no no no no
## [443] no yes no yes no no no no no no no no no no no no no
## [460] no no no no yes no yes no no no yes no no no no no no
## [477] no no no no no no no no yes no no no no no no no no
## [494] yes no yes no no no no yes no no no no no yes no no no
## [511] no no no yes yes no no no no no no yes no no yes no yes
## [528] yes no no no no no no no no yes no no no no no no no
## [545] no no no no no no no no no no yes no no no no no no
## [562] no no no yes no no no no yes no no no yes yes no no yes
## [579] no no yes no yes no no no no no yes no no no yes no no
## [596] no no no no no no no no no no no no no no no no no
## [613] no no no yes no no no no no no no no no no no no no
## [630] no no no no no no no no no no no no no no no yes no
## [647] no no no no no no yes no yes no yes no no yes no yes no
## [664] yes yes no no no yes no yes no yes no no no no no no no
## [681] no no no no no no no no no no no no no no no no no
## [698] no no no no no no no yes yes yes no yes no no no yes yes
## [715] no no no no no no no yes no no no yes no no no yes no
## [732] no no yes no no no yes yes no no no no no no no no no
## [749] no no no no no yes no no yes no no yes yes no no no no
## [766] no no no no no no no no no no no no no no no no no
## [783] no no no no no no no yes yes yes no no no no no yes no
## [800] yes no yes no no no no no yes no no no no no no no no
## [817] no no no no no yes no no no no no no no no no no yes
## [834] no no no no yes no no no yes no no no no no no no no
## [851] no no yes yes yes no yes no no no yes no no no no yes no
## [868] yes no no no no no yes no no no no no no yes no no no
## [885] yes no no no no yes no no yes no no no no no no yes yes
## [902] yes no no yes yes no no no no no no no no yes no no no
## [919] no no no no no no no no no no no no no yes no no no
## [936] no no no no no no no no no no no no no no no no no
## [953] no no yes no no no yes no yes no yes no no yes no no yes
## [970] no no no yes no yes no no no yes no no no no no no no
## [987] yes yes yes no no yes no no no no no no no no no no no
## [1004] no no yes yes no no no no no yes no no no no no no no
## [1021] no no no no no no no no no no no no no no no no no
## [1038] no no no no no no no yes no no no no no yes no no no
## [1055] no yes yes no no no no no no yes no no no no no no no
## [1072] yes no no no no no no no no no no no no yes no no yes
## [1089] no no no yes no yes no no no no no no no no no no no
## [1106] no yes no no no no yes no yes no no no no no no no no
## [1123] no no no no no no no no no no no no no no no no no
## [1140] no no no no no no no no no no no no no no no no no
## [1157] no no no no no no yes yes yes no no no no no no no no
## [1174] no no no no no no no no yes no no no yes no no no no
## [1191] no no no no no no no no yes no no no no no no yes yes
## [1208] no no no no no no no no yes no no no no no no no no
## [1225] no no no no no no no no no no no no yes no no no no
## [1242] no no no no no no yes no no no no yes yes no no no yes
## [1259] no no no no yes no no no no no no no no no no no no
## [1276] no no yes no no no no no no yes no no no yes no no no
## [1293] yes no no no no no no no no no no no no no yes no no
## [1310] no no no yes no no no no no no yes no no no no no no
## [1327] no no no no no no no no no no no no no no yes no no
## [1344] no no no no no no no no yes no no no no yes no no no
## [1361] yes no yes no no no no no no no yes no no no no no no
## [1378] yes no no no no no no no no no no no no no yes no no
## [1395] no no no yes no yes no no no no no no no no no no no
## [1412] yes no no no no no no no no no no no no no no no yes
## [1429] no no no no no no no no no no no no no no yes no no
## [1446] yes yes no no no no no no no yes no no no no no no no
## [1463] no no no no no yes no no no no yes yes yes no no no no
## [1480] yes no no no no yes no no no no no no no no no no no
## [1497] no no no no
## Levels: no < yes
##
##
## Slot "cutoffs":
## [[1]]
## [1] Inf 1.000000000 0.970873786 0.960000000 0.903225806
## [6] 0.846153846 0.833333333 0.750000000 0.666666667 0.625000000
## [11] 0.583333333 0.571428571 0.454545455 0.428571429 0.285714286
## [16] 0.263157895 0.200000000 0.142857143 0.133333333 0.116504854
## [21] 0.115384615 0.090909091 0.090909091 0.074380165 0.071428571
## [26] 0.048913043 0.044776119 0.042654028 0.041666667 0.038961039
## [31] 0.014326648 0.012500000 0.011958146 0.008474576 0.005235602
## [36] 0.000000000
##
##
## Slot "fp":
## [[1]]
## [1] 0 1 4 5 10 13 13 22 25 25 26 30 31 32
## [15] 34 43 44 50 56 90 97 111 121 168 201 276 302 412
## [29] 430 464 761 797 1045 1091 1277 1299
##
##
## Slot "tp":
## [[1]]
## [1] 0 45 79 85 129 133 140 144 145 146 146 146 154 154 156 156 156
## [18] 156 156 163 164 164 164 164 164 164 164 170 170 171 175 175 191 194
## [35] 200 201
##
##
## Slot "tn":
## [[1]]
## [1] 1299 1298 1295 1294 1289 1286 1286 1277 1274 1274 1273 1269 1268 1267
## [15] 1265 1256 1255 1249 1243 1209 1202 1188 1178 1131 1098 1023 997 887
## [29] 869 835 538 502 254 208 22 0
##
##
## Slot "fn":
## [[1]]
## [1] 201 156 122 116 72 68 61 57 56 55 55 55 47 47 45 45 45
## [18] 45 45 38 37 37 37 37 37 37 37 31 31 30 26 26 10 7
## [35] 1 0
##
##
## Slot "n.pos":
## [[1]]
## [1] 201
##
##
## Slot "n.neg":
## [[1]]
## [1] 1299
##
##
## Slot "n.pos.pred":
## [[1]]
## [1] 0 46 83 90 139 146 153 166 170 171 172 176 185 186
## [15] 190 199 200 206 212 253 261 275 285 332 365 440 466 582
## [29] 600 635 936 972 1236 1285 1477 1500
##
##
## Slot "n.neg.pred":
## [[1]]
## [1] 1500 1454 1417 1410 1361 1354 1347 1334 1330 1329 1328 1324 1315 1314
## [15] 1310 1301 1300 1294 1288 1247 1239 1225 1215 1168 1135 1060 1034 918
## [29] 900 865 564 528 264 215 23 0
perf.rocr<-performance(pred.rocr, measure ="auc")
perf.tpr.rocr<-performance(pred.rocr, measure="tpr",x.measure = "fpr")
plot(perf.tpr.rocr,main=paste("AUC:",(perf.rocr@y.values)))
