R_basic5

cp

http://mlwiki.org/index.php/Cost-Complexity_Pruning

library(C50)

data(churn)

names(churnTrain) %in% c("state", "area_code", "account_length")

##  [1]  TRUE  TRUE  TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [12] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE

!names(churnTrain) %in% c("state", "area_code", "account_length")

##  [1] FALSE FALSE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
## [12]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE

#選擇建模變數
variable.list = !names(churnTrain) %in% c('state','area_code','account_length')
churnTrain=churnTrain[,variable.list]
churnTest=churnTest[,variable.list]

set.seed(2)
#把資料分成training data 和 testing data
ind<-sample(1:2, size=nrow(churnTrain), replace=T, prob=c(0.7, 0.3))
trainset=churnTrain[ind==1,]
testset=churnTrain[ind==2,]

library('rpart')
library('caret')

## Loading required package: lattice

## Loading required package: ggplot2

library('e1071')

con = rpart.control(minsplit=20,cp=0.01)
churn.rp<-rpart(churn ~., data=trainset,control = con)
summary(churn.rp)

## Call:
## rpart(formula = churn ~ ., data = trainset, control = con)
##   n= 2315 
## 
##           CP nsplit rel error    xerror       xstd
## 1 0.07602339      0 1.0000000 1.0000000 0.04992005
## 2 0.07456140      2 0.8479532 0.9970760 0.04985964
## 3 0.05555556      4 0.6988304 0.7602339 0.04442127
## 4 0.02631579      7 0.4941520 0.5263158 0.03767329
## 5 0.02339181      8 0.4678363 0.5204678 0.03748096
## 6 0.02046784     10 0.4210526 0.5087719 0.03709209
## 7 0.01754386     11 0.4005848 0.4707602 0.03578773
## 8 0.01000000     12 0.3830409 0.4766082 0.03599261
## 
## Variable importance
##             total_day_minutes              total_day_charge 
##                            18                            18 
## number_customer_service_calls            total_intl_minutes 
##                            10                             8 
##             total_intl_charge              total_eve_charge 
##                             8                             8 
##             total_eve_minutes            international_plan 
##                             8                             7 
##              total_intl_calls         number_vmail_messages 
##                             6                             3 
##               voice_mail_plan             total_night_calls 
##                             3                             1 
##               total_eve_calls 
##                             1 
## 
## Node number 1: 2315 observations,    complexity param=0.07602339
##   predicted class=no   expected loss=0.1477322  P(node) =1
##     class counts:   342  1973
##    probabilities: 0.148 0.852 
##   left son=2 (144 obs) right son=3 (2171 obs)
##   Primary splits:
##       total_day_minutes             < 265.45 to the right, improve=60.145020, (0 missing)
##       total_day_charge              < 45.125 to the right, improve=60.145020, (0 missing)
##       number_customer_service_calls < 3.5    to the right, improve=53.641430, (0 missing)
##       international_plan            splits as  RL,         improve=43.729370, (0 missing)
##       voice_mail_plan               splits as  LR,         improve= 6.089388, (0 missing)
##   Surrogate splits:
##       total_day_charge < 45.125 to the right, agree=1, adj=1, (0 split)
## 
## Node number 2: 144 observations,    complexity param=0.07602339
##   predicted class=yes  expected loss=0.4097222  P(node) =0.06220302
##     class counts:    85    59
##    probabilities: 0.590 0.410 
##   left son=4 (110 obs) right son=5 (34 obs)
##   Primary splits:
##       voice_mail_plan       splits as  LR,         improve=19.884860, (0 missing)
##       number_vmail_messages < 9.5    to the left,  improve=19.884860, (0 missing)
##       total_eve_minutes     < 167.05 to the right, improve=14.540020, (0 missing)
##       total_eve_charge      < 14.2   to the right, improve=14.540020, (0 missing)
##       total_day_minutes     < 283.9  to the right, improve= 6.339827, (0 missing)
##   Surrogate splits:
##       number_vmail_messages < 9.5    to the left,  agree=1.000, adj=1.000, (0 split)
##       total_night_minutes   < 110.3  to the right, agree=0.785, adj=0.088, (0 split)
##       total_night_charge    < 4.965  to the right, agree=0.785, adj=0.088, (0 split)
##       total_night_calls     < 50     to the right, agree=0.778, adj=0.059, (0 split)
##       total_intl_minutes    < 15.3   to the left,  agree=0.771, adj=0.029, (0 split)
## 
## Node number 3: 2171 observations,    complexity param=0.0745614
##   predicted class=no   expected loss=0.1183786  P(node) =0.937797
##     class counts:   257  1914
##    probabilities: 0.118 0.882 
##   left son=6 (168 obs) right son=7 (2003 obs)
##   Primary splits:
##       number_customer_service_calls < 3.5    to the right, improve=56.398210, (0 missing)
##       international_plan            splits as  RL,         improve=43.059160, (0 missing)
##       total_day_minutes             < 224.15 to the right, improve=10.847440, (0 missing)
##       total_day_charge              < 38.105 to the right, improve=10.847440, (0 missing)
##       total_intl_minutes            < 13.15  to the right, improve= 6.347319, (0 missing)
## 
## Node number 4: 110 observations,    complexity param=0.02631579
##   predicted class=yes  expected loss=0.2636364  P(node) =0.0475162
##     class counts:    81    29
##    probabilities: 0.736 0.264 
##   left son=8 (67 obs) right son=9 (43 obs)
##   Primary splits:
##       total_eve_minutes   < 188.5  to the right, improve=16.419610, (0 missing)
##       total_eve_charge    < 16.025 to the right, improve=16.419610, (0 missing)
##       total_night_minutes < 206.85 to the right, improve= 5.350500, (0 missing)
##       total_night_charge  < 9.305  to the right, improve= 5.350500, (0 missing)
##       total_day_minutes   < 281.15 to the right, improve= 5.254545, (0 missing)
##   Surrogate splits:
##       total_eve_charge   < 16.025 to the right, agree=1.000, adj=1.000, (0 split)
##       total_night_calls  < 82     to the right, agree=0.655, adj=0.116, (0 split)
##       total_intl_minutes < 3.35   to the right, agree=0.636, adj=0.070, (0 split)
##       total_intl_charge  < 0.905  to the right, agree=0.636, adj=0.070, (0 split)
##       total_day_minutes  < 268.55 to the right, agree=0.627, adj=0.047, (0 split)
## 
## Node number 5: 34 observations
##   predicted class=no   expected loss=0.1176471  P(node) =0.01468683
##     class counts:     4    30
##    probabilities: 0.118 0.882 
## 
## Node number 6: 168 observations,    complexity param=0.0745614
##   predicted class=yes  expected loss=0.4880952  P(node) =0.07257019
##     class counts:    86    82
##    probabilities: 0.512 0.488 
##   left son=12 (71 obs) right son=13 (97 obs)
##   Primary splits:
##       total_day_minutes             < 160.2  to the left,  improve=29.655880, (0 missing)
##       total_day_charge              < 27.235 to the left,  improve=29.655880, (0 missing)
##       total_eve_minutes             < 180.65 to the left,  improve= 8.556953, (0 missing)
##       total_eve_charge              < 15.355 to the left,  improve= 8.556953, (0 missing)
##       number_customer_service_calls < 4.5    to the right, improve= 5.975362, (0 missing)
##   Surrogate splits:
##       total_day_charge              < 27.235 to the left,  agree=1.000, adj=1.000, (0 split)
##       total_night_calls             < 79     to the left,  agree=0.625, adj=0.113, (0 split)
##       total_intl_calls              < 2.5    to the left,  agree=0.619, adj=0.099, (0 split)
##       number_customer_service_calls < 4.5    to the right, agree=0.607, adj=0.070, (0 split)
##       total_eve_calls               < 89.5   to the left,  agree=0.601, adj=0.056, (0 split)
## 
## Node number 7: 2003 observations,    complexity param=0.05555556
##   predicted class=no   expected loss=0.08537194  P(node) =0.8652268
##     class counts:   171  1832
##    probabilities: 0.085 0.915 
##   left son=14 (188 obs) right son=15 (1815 obs)
##   Primary splits:
##       international_plan splits as  RL,         improve=42.194510, (0 missing)
##       total_day_minutes  < 224.15 to the right, improve=16.838410, (0 missing)
##       total_day_charge   < 38.105 to the right, improve=16.838410, (0 missing)
##       total_intl_minutes < 13.15  to the right, improve= 6.210678, (0 missing)
##       total_intl_charge  < 3.55   to the right, improve= 6.210678, (0 missing)
## 
## Node number 8: 67 observations
##   predicted class=yes  expected loss=0.04477612  P(node) =0.02894168
##     class counts:    64     3
##    probabilities: 0.955 0.045 
## 
## Node number 9: 43 observations,    complexity param=0.02046784
##   predicted class=no   expected loss=0.3953488  P(node) =0.01857451
##     class counts:    17    26
##    probabilities: 0.395 0.605 
##   left son=18 (19 obs) right son=19 (24 obs)
##   Primary splits:
##       total_day_minutes   < 282.7  to the right, improve=5.680947, (0 missing)
##       total_day_charge    < 48.06  to the right, improve=5.680947, (0 missing)
##       total_night_minutes < 212.65 to the right, improve=4.558140, (0 missing)
##       total_night_charge  < 9.57   to the right, improve=4.558140, (0 missing)
##       total_eve_minutes   < 145.4  to the right, improve=4.356169, (0 missing)
##   Surrogate splits:
##       total_day_charge   < 48.06  to the right, agree=1.000, adj=1.000, (0 split)
##       total_day_calls    < 103    to the left,  agree=0.674, adj=0.263, (0 split)
##       total_eve_calls    < 104.5  to the left,  agree=0.674, adj=0.263, (0 split)
##       total_intl_minutes < 11.55  to the left,  agree=0.651, adj=0.211, (0 split)
##       total_intl_charge  < 3.12   to the left,  agree=0.651, adj=0.211, (0 split)
## 
## Node number 12: 71 observations
##   predicted class=yes  expected loss=0.1408451  P(node) =0.03066955
##     class counts:    61    10
##    probabilities: 0.859 0.141 
## 
## Node number 13: 97 observations,    complexity param=0.01754386
##   predicted class=no   expected loss=0.257732  P(node) =0.04190065
##     class counts:    25    72
##    probabilities: 0.258 0.742 
##   left son=26 (20 obs) right son=27 (77 obs)
##   Primary splits:
##       total_eve_minutes             < 155.5  to the left,  improve=7.753662, (0 missing)
##       total_eve_charge              < 13.22  to the left,  improve=7.753662, (0 missing)
##       total_intl_minutes            < 13.55  to the right, improve=2.366149, (0 missing)
##       total_intl_charge             < 3.66   to the right, improve=2.366149, (0 missing)
##       number_customer_service_calls < 4.5    to the right, improve=2.297667, (0 missing)
##   Surrogate splits:
##       total_eve_charge  < 13.22  to the left,  agree=1.000, adj=1.00, (0 split)
##       total_night_calls < 143.5  to the right, agree=0.814, adj=0.10, (0 split)
##       total_eve_calls   < 62     to the left,  agree=0.804, adj=0.05, (0 split)
## 
## Node number 14: 188 observations,    complexity param=0.05555556
##   predicted class=no   expected loss=0.4042553  P(node) =0.0812095
##     class counts:    76   112
##    probabilities: 0.404 0.596 
##   left son=28 (38 obs) right son=29 (150 obs)
##   Primary splits:
##       total_intl_calls   < 2.5    to the left,  improve=33.806520, (0 missing)
##       total_intl_minutes < 13.1   to the right, improve=30.527050, (0 missing)
##       total_intl_charge  < 3.535  to the right, improve=30.527050, (0 missing)
##       total_day_minutes  < 221.95 to the right, improve= 3.386095, (0 missing)
##       total_day_charge   < 37.735 to the right, improve= 3.386095, (0 missing)
## 
## Node number 15: 1815 observations,    complexity param=0.02339181
##   predicted class=no   expected loss=0.0523416  P(node) =0.7840173
##     class counts:    95  1720
##    probabilities: 0.052 0.948 
##   left son=30 (251 obs) right son=31 (1564 obs)
##   Primary splits:
##       total_day_minutes   < 224.15 to the right, improve=12.5649300, (0 missing)
##       total_day_charge    < 38.105 to the right, improve=12.5649300, (0 missing)
##       total_eve_minutes   < 244.95 to the right, improve= 4.7875890, (0 missing)
##       total_eve_charge    < 20.825 to the right, improve= 4.7875890, (0 missing)
##       total_night_minutes < 163.85 to the right, improve= 0.9074391, (0 missing)
##   Surrogate splits:
##       total_day_charge < 38.105 to the right, agree=1, adj=1, (0 split)
## 
## Node number 18: 19 observations
##   predicted class=yes  expected loss=0.3157895  P(node) =0.008207343
##     class counts:    13     6
##    probabilities: 0.684 0.316 
## 
## Node number 19: 24 observations
##   predicted class=no   expected loss=0.1666667  P(node) =0.01036717
##     class counts:     4    20
##    probabilities: 0.167 0.833 
## 
## Node number 26: 20 observations
##   predicted class=yes  expected loss=0.35  P(node) =0.008639309
##     class counts:    13     7
##    probabilities: 0.650 0.350 
## 
## Node number 27: 77 observations
##   predicted class=no   expected loss=0.1558442  P(node) =0.03326134
##     class counts:    12    65
##    probabilities: 0.156 0.844 
## 
## Node number 28: 38 observations
##   predicted class=yes  expected loss=0  P(node) =0.01641469
##     class counts:    38     0
##    probabilities: 1.000 0.000 
## 
## Node number 29: 150 observations,    complexity param=0.05555556
##   predicted class=no   expected loss=0.2533333  P(node) =0.06479482
##     class counts:    38   112
##    probabilities: 0.253 0.747 
##   left son=58 (32 obs) right son=59 (118 obs)
##   Primary splits:
##       total_intl_minutes < 13.1   to the right, improve=45.356840, (0 missing)
##       total_intl_charge  < 3.535  to the right, improve=45.356840, (0 missing)
##       total_day_calls    < 95.5   to the left,  improve= 4.036407, (0 missing)
##       total_day_minutes  < 237.75 to the right, improve= 1.879020, (0 missing)
##       total_day_charge   < 40.42  to the right, improve= 1.879020, (0 missing)
##   Surrogate splits:
##       total_intl_charge < 3.535  to the right, agree=1.0, adj=1.000, (0 split)
##       total_day_minutes < 52.45  to the left,  agree=0.8, adj=0.063, (0 split)
##       total_day_charge  < 8.92   to the left,  agree=0.8, adj=0.063, (0 split)
## 
## Node number 30: 251 observations,    complexity param=0.02339181
##   predicted class=no   expected loss=0.1992032  P(node) =0.1084233
##     class counts:    50   201
##    probabilities: 0.199 0.801 
##   left son=60 (36 obs) right son=61 (215 obs)
##   Primary splits:
##       total_eve_minutes     < 259.8  to the right, improve=22.993380, (0 missing)
##       total_eve_charge      < 22.08  to the right, improve=22.993380, (0 missing)
##       voice_mail_plan       splits as  LR,         improve= 4.745664, (0 missing)
##       number_vmail_messages < 7.5    to the left,  improve= 4.745664, (0 missing)
##       total_night_minutes   < 181.15 to the right, improve= 3.509731, (0 missing)
##   Surrogate splits:
##       total_eve_charge < 22.08  to the right, agree=1, adj=1, (0 split)
## 
## Node number 31: 1564 observations
##   predicted class=no   expected loss=0.02877238  P(node) =0.675594
##     class counts:    45  1519
##    probabilities: 0.029 0.971 
## 
## Node number 58: 32 observations
##   predicted class=yes  expected loss=0  P(node) =0.01382289
##     class counts:    32     0
##    probabilities: 1.000 0.000 
## 
## Node number 59: 118 observations
##   predicted class=no   expected loss=0.05084746  P(node) =0.05097192
##     class counts:     6   112
##    probabilities: 0.051 0.949 
## 
## Node number 60: 36 observations
##   predicted class=yes  expected loss=0.2777778  P(node) =0.01555076
##     class counts:    26    10
##    probabilities: 0.722 0.278 
## 
## Node number 61: 215 observations
##   predicted class=no   expected loss=0.1116279  P(node) =0.09287257
##     class counts:    24   191
##    probabilities: 0.112 0.888

printcp(churn.rp)

## 
## Classification tree:
## rpart(formula = churn ~ ., data = trainset, control = con)
## 
## Variables actually used in tree construction:
## [1] international_plan            number_customer_service_calls
## [3] total_day_minutes             total_eve_minutes            
## [5] total_intl_calls              total_intl_minutes           
## [7] voice_mail_plan              
## 
## Root node error: 342/2315 = 0.14773
## 
## n= 2315 
## 
##         CP nsplit rel error  xerror     xstd
## 1 0.076023      0   1.00000 1.00000 0.049920
## 2 0.074561      2   0.84795 0.99708 0.049860
## 3 0.055556      4   0.69883 0.76023 0.044421
## 4 0.026316      7   0.49415 0.52632 0.037673
## 5 0.023392      8   0.46784 0.52047 0.037481
## 6 0.020468     10   0.42105 0.50877 0.037092
## 7 0.017544     11   0.40058 0.47076 0.035788
## 8 0.010000     12   0.38304 0.47661 0.035993

#找出minimum cross-validation errors
min_row = which.min(churn.rp$cptable[,"xerror"])
churn.cp = churn.rp$cptable[min_row, "CP"]
#將churn.cp設為臨界值來修剪樹
prune.tree=prune(churn.rp, cp=churn.cp)
plot(prune.tree, uniform=TRUE,branch = 0.6, margin=0.1)
text(prune.tree, all=TRUE, use.n=TRUE, cex=0.7)

predictions <-predict(prune.tree, testset, type='class')
table(predictions,testset$churn)

##            
## predictions yes  no
##         yes  95  14
##         no   46 863

confusionMatrix(table(predictions, testset$churn))

## Confusion Matrix and Statistics
## 
##            
## predictions yes  no
##         yes  95  14
##         no   46 863
##                                           
##                Accuracy : 0.9411          
##                  95% CI : (0.9248, 0.9547)
##     No Information Rate : 0.8615          
##     P-Value [Acc > NIR] : 2.786e-16       
##                                           
##                   Kappa : 0.727           
##  Mcnemar's Test P-Value : 6.279e-05       
##                                           
##             Sensitivity : 0.67376         
##             Specificity : 0.98404         
##          Pos Pred Value : 0.87156         
##          Neg Pred Value : 0.94939         
##              Prevalence : 0.13851         
##          Detection Rate : 0.09332         
##    Detection Prevalence : 0.10707         
##       Balanced Accuracy : 0.82890         
##                                           
##        'Positive' Class : yes             
##

use caret package

#install.packages("caret")
library(caret)
control=trainControl(method="repeatedcv", number=10, repeats=3)

model =train(churn~., data=churnTrain, method="rpart", trControl=control)

predictions = predict(model,churnTest)
table(predictions,churnTest$churn)

##            
## predictions  yes   no
##         yes   60   24
##         no   164 1419

confusionMatrix(table(predictions,churnTest$churn))

## Confusion Matrix and Statistics
## 
##            
## predictions  yes   no
##         yes   60   24
##         no   164 1419
##                                          
##                Accuracy : 0.8872         
##                  95% CI : (0.8711, 0.902)
##     No Information Rate : 0.8656         
##     P-Value [Acc > NIR] : 0.00464        
##                                          
##                   Kappa : 0.3413         
##  Mcnemar's Test P-Value : < 2e-16        
##                                          
##             Sensitivity : 0.26786        
##             Specificity : 0.98337        
##          Pos Pred Value : 0.71429        
##          Neg Pred Value : 0.89640        
##              Prevalence : 0.13437        
##          Detection Rate : 0.03599        
##    Detection Prevalence : 0.05039        
##       Balanced Accuracy : 0.62561        
##                                          
##        'Positive' Class : yes            
##

caret 套件使用說明

# 查詢caret package 有實作的所有演算法
names(getModelInfo())

##   [1] "ada"                 "AdaBag"              "AdaBoost.M1"        
##   [4] "adaboost"            "amdai"               "ANFIS"              
##   [7] "avNNet"              "awnb"                "awtan"              
##  [10] "bag"                 "bagEarth"            "bagEarthGCV"        
##  [13] "bagFDA"              "bagFDAGCV"           "bam"                
##  [16] "bartMachine"         "bayesglm"            "binda"              
##  [19] "blackboost"          "blasso"              "blassoAveraged"     
##  [22] "bridge"              "brnn"                "BstLm"              
##  [25] "bstSm"               "bstTree"             "C5.0"               
##  [28] "C5.0Cost"            "C5.0Rules"           "C5.0Tree"           
##  [31] "cforest"             "chaid"               "CSimca"             
##  [34] "ctree"               "ctree2"              "cubist"             
##  [37] "dda"                 "deepboost"           "DENFIS"             
##  [40] "dnn"                 "dwdLinear"           "dwdPoly"            
##  [43] "dwdRadial"           "earth"               "elm"                
##  [46] "enet"                "evtree"              "extraTrees"         
##  [49] "fda"                 "FH.GBML"             "FIR.DM"             
##  [52] "foba"                "FRBCS.CHI"           "FRBCS.W"            
##  [55] "FS.HGD"              "gam"                 "gamboost"           
##  [58] "gamLoess"            "gamSpline"           "gaussprLinear"      
##  [61] "gaussprPoly"         "gaussprRadial"       "gbm_h2o"            
##  [64] "gbm"                 "gcvEarth"            "GFS.FR.MOGUL"       
##  [67] "GFS.LT.RS"           "GFS.THRIFT"          "glm.nb"             
##  [70] "glm"                 "glmboost"            "glmnet_h2o"         
##  [73] "glmnet"              "glmStepAIC"          "gpls"               
##  [76] "hda"                 "hdda"                "hdrda"              
##  [79] "HYFIS"               "icr"                 "J48"                
##  [82] "JRip"                "kernelpls"           "kknn"               
##  [85] "knn"                 "krlsPoly"            "krlsRadial"         
##  [88] "lars"                "lars2"               "lasso"              
##  [91] "lda"                 "lda2"                "leapBackward"       
##  [94] "leapForward"         "leapSeq"             "Linda"              
##  [97] "lm"                  "lmStepAIC"           "LMT"                
## [100] "loclda"              "logicBag"            "LogitBoost"         
## [103] "logreg"              "lssvmLinear"         "lssvmPoly"          
## [106] "lssvmRadial"         "lvq"                 "M5"                 
## [109] "M5Rules"             "manb"                "mda"                
## [112] "Mlda"                "mlp"                 "mlpKerasDecay"      
## [115] "mlpKerasDecayCost"   "mlpKerasDropout"     "mlpKerasDropoutCost"
## [118] "mlpML"               "mlpSGD"              "mlpWeightDecay"     
## [121] "mlpWeightDecayML"    "monmlp"              "msaenet"            
## [124] "multinom"            "mxnet"               "mxnetAdam"          
## [127] "naive_bayes"         "nb"                  "nbDiscrete"         
## [130] "nbSearch"            "neuralnet"           "nnet"               
## [133] "nnls"                "nodeHarvest"         "null"               
## [136] "OneR"                "ordinalNet"          "ORFlog"             
## [139] "ORFpls"              "ORFridge"            "ORFsvm"             
## [142] "ownn"                "pam"                 "parRF"              
## [145] "PART"                "partDSA"             "pcaNNet"            
## [148] "pcr"                 "pda"                 "pda2"               
## [151] "penalized"           "PenalizedLDA"        "plr"                
## [154] "pls"                 "plsRglm"             "polr"               
## [157] "ppr"                 "PRIM"                "protoclass"         
## [160] "qda"                 "QdaCov"              "qrf"                
## [163] "qrnn"                "randomGLM"           "ranger"             
## [166] "rbf"                 "rbfDDA"              "Rborist"            
## [169] "rda"                 "regLogistic"         "relaxo"             
## [172] "rf"                  "rFerns"              "RFlda"              
## [175] "rfRules"             "ridge"               "rlda"               
## [178] "rlm"                 "rmda"                "rocc"               
## [181] "rotationForest"      "rotationForestCp"    "rpart"              
## [184] "rpart1SE"            "rpart2"              "rpartCost"          
## [187] "rpartScore"          "rqlasso"             "rqnc"               
## [190] "RRF"                 "RRFglobal"           "rrlda"              
## [193] "RSimca"              "rvmLinear"           "rvmPoly"            
## [196] "rvmRadial"           "SBC"                 "sda"                
## [199] "sdwd"                "simpls"              "SLAVE"              
## [202] "slda"                "smda"                "snn"                
## [205] "sparseLDA"           "spikeslab"           "spls"               
## [208] "stepLDA"             "stepQDA"             "superpc"            
## [211] "svmBoundrangeString" "svmExpoString"       "svmLinear"          
## [214] "svmLinear2"          "svmLinear3"          "svmLinearWeights"   
## [217] "svmLinearWeights2"   "svmPoly"             "svmRadial"          
## [220] "svmRadialCost"       "svmRadialSigma"      "svmRadialWeights"   
## [223] "svmSpectrumString"   "tan"                 "tanSearch"          
## [226] "treebag"             "vbmpRadial"          "vglmAdjCat"         
## [229] "vglmContRatio"       "vglmCumulative"      "widekernelpls"      
## [232] "WM"                  "wsrf"                "xgbDART"            
## [235] "xgbLinear"           "xgbTree"             "xyf"

# 查詢caret package 有沒有實作rpart演算法
names(getModelInfo())[grep('rpart',names(getModelInfo()))]

## [1] "rpart"      "rpart1SE"   "rpart2"     "rpartCost"  "rpartScore"

# 查詢rpart model資訊
getModelInfo('rpart')

## $rpart
## $rpart$label
## [1] "CART"
## 
## $rpart$library
## [1] "rpart"
## 
## $rpart$type
## [1] "Regression"     "Classification"
## 
## $rpart$parameters
##   parameter   class                label
## 1        cp numeric Complexity Parameter
## 
## $rpart$grid
## function (x, y, len = NULL, search = "grid") 
## {
##     dat <- if (is.data.frame(x)) 
##         x
##     else as.data.frame(x)
##     dat$.outcome <- y
##     initialFit <- rpart::rpart(.outcome ~ ., data = dat, control = rpart::rpart.control(cp = 0))$cptable
##     initialFit <- initialFit[order(-initialFit[, "CP"]), , drop = FALSE]
##     if (search == "grid") {
##         if (nrow(initialFit) < len) {
##             tuneSeq <- data.frame(cp = seq(min(initialFit[, "CP"]), 
##                 max(initialFit[, "CP"]), length = len))
##         }
##         else tuneSeq <- data.frame(cp = initialFit[1:len, "CP"])
##         colnames(tuneSeq) <- "cp"
##     }
##     else {
##         tuneSeq <- data.frame(cp = unique(sample(initialFit[, 
##             "CP"], size = len, replace = TRUE)))
##     }
##     tuneSeq
## }
## 
## $rpart$loop
## function (grid) 
## {
##     grid <- grid[order(grid$cp, decreasing = FALSE), , drop = FALSE]
##     loop <- grid[1, , drop = FALSE]
##     submodels <- list(grid[-1, , drop = FALSE])
##     list(loop = loop, submodels = submodels)
## }
## 
## $rpart$fit
## function (x, y, wts, param, lev, last, classProbs, ...) 
## {
##     cpValue <- if (!last) 
##         param$cp
##     else 0
##     theDots <- list(...)
##     if (any(names(theDots) == "control")) {
##         theDots$control$cp <- cpValue
##         theDots$control$xval <- 0
##         ctl <- theDots$control
##         theDots$control <- NULL
##     }
##     else ctl <- rpart::rpart.control(cp = cpValue, xval = 0)
##     if (!is.null(wts)) 
##         theDots$weights <- wts
##     modelArgs <- c(list(formula = as.formula(".outcome ~ ."), 
##         data = if (is.data.frame(x)) x else as.data.frame(x), 
##         control = ctl), theDots)
##     modelArgs$data$.outcome <- y
##     out <- do.call(rpart::rpart, modelArgs)
##     if (last) 
##         out <- rpart::prune.rpart(out, cp = param$cp)
##     out
## }
## 
## $rpart$predict
## function (modelFit, newdata, submodels = NULL) 
## {
##     if (!is.data.frame(newdata)) 
##         newdata <- as.data.frame(newdata)
##     pType <- if (modelFit$problemType == "Classification") 
##         "class"
##     else "vector"
##     out <- predict(modelFit, newdata, type = pType)
##     if (!is.null(submodels)) {
##         tmp <- vector(mode = "list", length = nrow(submodels) + 
##             1)
##         tmp[[1]] <- out
##         for (j in seq(along = submodels$cp)) {
##             prunedFit <- rpart::prune.rpart(modelFit, cp = submodels$cp[j])
##             tmp[[j + 1]] <- predict(prunedFit, newdata, type = pType)
##         }
##         out <- tmp
##     }
##     out
## }
## 
## $rpart$prob
## function (modelFit, newdata, submodels = NULL) 
## {
##     if (!is.data.frame(newdata)) 
##         newdata <- as.data.frame(newdata)
##     out <- predict(modelFit, newdata, type = "prob")
##     if (!is.null(submodels)) {
##         tmp <- vector(mode = "list", length = nrow(submodels) + 
##             1)
##         tmp[[1]] <- out
##         for (j in seq(along = submodels$cp)) {
##             prunedFit <- rpart::prune.rpart(modelFit, cp = submodels$cp[j])
##             tmpProb <- predict(prunedFit, newdata, type = "prob")
##             tmp[[j + 1]] <- as.data.frame(tmpProb[, modelFit$obsLevels, 
##                 drop = FALSE])
##         }
##         out <- tmp
##     }
##     out
## }
## 
## $rpart$predictors
## function (x, surrogate = TRUE, ...) 
## {
##     out <- as.character(x$frame$var)
##     out <- out[!(out %in% c("<leaf>"))]
##     if (surrogate) {
##         splits <- x$splits
##         splits <- splits[splits[, "adj"] > 0, ]
##         out <- c(out, rownames(splits))
##     }
##     unique(out)
## }
## 
## $rpart$varImp
## function (object, surrogates = FALSE, competes = TRUE, ...) 
## {
##     if (nrow(object$splits) > 0) {
##         tmp <- rownames(object$splits)
##         rownames(object$splits) <- 1:nrow(object$splits)
##         splits <- data.frame(object$splits)
##         splits$var <- tmp
##         splits$type <- ""
##         frame <- as.data.frame(object$frame)
##         index <- 0
##         for (i in 1:nrow(frame)) {
##             if (frame$var[i] != "<leaf>") {
##                 index <- index + 1
##                 splits$type[index] <- "primary"
##                 if (frame$ncompete[i] > 0) {
##                   for (j in 1:frame$ncompete[i]) {
##                     index <- index + 1
##                     splits$type[index] <- "competing"
##                   }
##                 }
##                 if (frame$nsurrogate[i] > 0) {
##                   for (j in 1:frame$nsurrogate[i]) {
##                     index <- index + 1
##                     splits$type[index] <- "surrogate"
##                   }
##                 }
##             }
##         }
##         splits$var <- factor(as.character(splits$var))
##         if (!surrogates) 
##             splits <- subset(splits, type != "surrogate")
##         if (!competes) 
##             splits <- subset(splits, type != "competing")
##         out <- aggregate(splits$improve, list(Variable = splits$var), 
##             sum, na.rm = TRUE)
##     }
##     else {
##         out <- data.frame(x = numeric(), Vaiable = character())
##     }
##     allVars <- colnames(attributes(object$terms)$factors)
##     if (!all(allVars %in% out$Variable)) {
##         missingVars <- allVars[!(allVars %in% out$Variable)]
##         zeros <- data.frame(x = rep(0, length(missingVars)), 
##             Variable = missingVars)
##         out <- rbind(out, zeros)
##     }
##     out2 <- data.frame(Overall = out$x)
##     rownames(out2) <- out$Variable
##     out2
## }
## 
## $rpart$levels
## function (x) 
## x$obsLevels
## 
## $rpart$trim
## function (x) 
## {
##     x$call <- list(na.action = (x$call)$na.action)
##     x$x <- NULL
##     x$y <- NULL
##     x$where <- NULL
##     x
## }
## 
## $rpart$tags
## [1] "Tree-Based Model"              "Implicit Feature Selection"   
## [3] "Handle Missing Predictor Data" "Accepts Case Weights"         
## 
## $rpart$sort
## function (x) 
## x[order(x[, 1], decreasing = TRUE), ]
## 
## 
## $rpart1SE
## $rpart1SE$label
## [1] "CART"
## 
## $rpart1SE$library
## [1] "rpart"
## 
## $rpart1SE$type
## [1] "Regression"     "Classification"
## 
## $rpart1SE$parameters
##   parameter     class     label
## 1 parameter character parameter
## 
## $rpart1SE$grid
## function (x, y, len = NULL, search = "grid") 
## data.frame(parameter = "none")
## 
## $rpart1SE$loop
## NULL
## 
## $rpart1SE$fit
## function (x, y, wts, param, lev, last, classProbs, ...) 
## {
##     dat <- if (is.data.frame(x)) 
##         x
##     else as.data.frame(x)
##     dat$.outcome <- y
##     if (!is.null(wts)) {
##         out <- rpart::rpart(.outcome ~ ., data = dat, ...)
##     }
##     else {
##         out <- rpart::rpart(.outcome ~ ., data = dat, weights = wts, 
##             ...)
##     }
##     out
## }
## 
## $rpart1SE$predict
## function (modelFit, newdata, submodels = NULL) 
## {
##     if (!is.data.frame(newdata)) 
##         newdata <- as.data.frame(newdata)
##     out <- if (modelFit$problemType == "Classification") 
##         predict(modelFit, newdata, type = "class")
##     else predict(modelFit, newdata)
##     out
## }
## 
## $rpart1SE$prob
## function (modelFit, newdata, submodels = NULL) 
## {
##     if (!is.data.frame(newdata)) 
##         newdata <- as.data.frame(newdata)
##     predict(modelFit, newdata, type = "prob")
## }
## 
## $rpart1SE$predictors
## function (x, surrogate = TRUE, ...) 
## {
##     out <- as.character(x$frame$var)
##     out <- out[!(out %in% c("<leaf>"))]
##     if (surrogate) {
##         splits <- x$splits
##         splits <- splits[splits[, "adj"] > 0, ]
##         out <- c(out, rownames(splits))
##     }
##     unique(out)
## }
## 
## $rpart1SE$varImp
## function (object, surrogates = FALSE, competes = TRUE, ...) 
## {
##     tmp <- rownames(object$splits)
##     rownames(object$splits) <- 1:nrow(object$splits)
##     splits <- data.frame(object$splits)
##     splits$var <- tmp
##     splits$type <- ""
##     frame <- as.data.frame(object$frame)
##     index <- 0
##     for (i in 1:nrow(frame)) {
##         if (frame$var[i] != "<leaf>") {
##             index <- index + 1
##             splits$type[index] <- "primary"
##             if (frame$ncompete[i] > 0) {
##                 for (j in 1:frame$ncompete[i]) {
##                   index <- index + 1
##                   splits$type[index] <- "competing"
##                 }
##             }
##             if (frame$nsurrogate[i] > 0) {
##                 for (j in 1:frame$nsurrogate[i]) {
##                   index <- index + 1
##                   splits$type[index] <- "surrogate"
##                 }
##             }
##         }
##     }
##     splits$var <- factor(as.character(splits$var))
##     if (!surrogates) 
##         splits <- subset(splits, type != "surrogate")
##     if (!competes) 
##         splits <- subset(splits, type != "competing")
##     out <- aggregate(splits$improve, list(Variable = splits$var), 
##         sum, na.rm = TRUE)
##     allVars <- colnames(attributes(object$terms)$factors)
##     if (!all(allVars %in% out$Variable)) {
##         missingVars <- allVars[!(allVars %in% out$Variable)]
##         zeros <- data.frame(x = rep(0, length(missingVars)), 
##             Variable = missingVars)
##         out <- rbind(out, zeros)
##     }
##     out2 <- data.frame(Overall = out$x)
##     rownames(out2) <- out$Variable
##     out2
## }
## 
## $rpart1SE$levels
## function (x) 
## x$obsLevels
## 
## $rpart1SE$trim
## function (x) 
## {
##     x$call <- list(na.action = (x$call)$na.action)
##     x$x <- NULL
##     x$y <- NULL
##     x$where <- NULL
##     x
## }
## 
## $rpart1SE$notes
## [1] "This CART model replicates the same process used by the `rpart` function where the model complexity is determined using the one-standard error method. This procedure is replicated inside of the resampling done by `train` so that an external resampling estimate can be obtained."
## 
## $rpart1SE$tags
## [1] "Tree-Based Model"              "Implicit Feature Selection"   
## [3] "Handle Missing Predictor Data" "Accepts Case Weights"         
## 
## $rpart1SE$sort
## function (x) 
## x[order(x[, 1], decreasing = TRUE), ]
## 
## 
## $rpart2
## $rpart2$label
## [1] "CART"
## 
## $rpart2$library
## [1] "rpart"
## 
## $rpart2$type
## [1] "Regression"     "Classification"
## 
## $rpart2$parameters
##   parameter   class          label
## 1  maxdepth numeric Max Tree Depth
## 
## $rpart2$grid
## function (x, y, len = NULL, search = "grid") 
## {
##     dat <- if (is.data.frame(x)) 
##         x
##     else as.data.frame(x)
##     dat$.outcome <- y
##     initialFit <- rpart::rpart(.outcome ~ ., data = dat, control = rpart::rpart.control(cp = 0))$cptable
##     initialFit <- initialFit[order(-initialFit[, "CP"]), "nsplit", 
##         drop = FALSE]
##     initialFit <- initialFit[initialFit[, "nsplit"] > 0 & initialFit[, 
##         "nsplit"] <= 30, , drop = FALSE]
##     if (search == "grid") {
##         if (dim(initialFit)[1] < len) {
##             cat("note: only", nrow(initialFit), "possible values of the max tree depth from the initial fit.\n", 
##                 "Truncating the grid to", nrow(initialFit), ".\n\n")
##             tuneSeq <- as.data.frame(initialFit)
##         }
##         else tuneSeq <- as.data.frame(initialFit[1:len, ])
##         colnames(tuneSeq) <- "maxdepth"
##     }
##     else {
##         tuneSeq <- data.frame(maxdepth = unique(sample(as.vector(initialFit[, 
##             1]), size = len, replace = TRUE)))
##     }
##     tuneSeq
## }
## 
## $rpart2$loop
## function (grid) 
## {
##     grid <- grid[order(grid$maxdepth, decreasing = TRUE), , drop = FALSE]
##     loop <- grid[1, , drop = FALSE]
##     submodels <- list(grid[-1, , drop = FALSE])
##     list(loop = loop, submodels = submodels)
## }
## 
## $rpart2$fit
## function (x, y, wts, param, lev, last, classProbs, ...) 
## {
##     theDots <- list(...)
##     if (any(names(theDots) == "control")) {
##         theDots$control$maxdepth <- param$maxdepth
##         theDots$control$xval <- 0
##         ctl <- theDots$control
##         theDots$control <- NULL
##     }
##     else ctl <- rpart::rpart.control(maxdepth = param$maxdepth, 
##         xval = 0)
##     if (!is.null(wts)) 
##         theDots$weights <- wts
##     modelArgs <- c(list(formula = as.formula(".outcome ~ ."), 
##         data = if (is.data.frame(x)) x else as.data.frame(x), 
##         control = ctl), theDots)
##     modelArgs$data$.outcome <- y
##     out <- do.call(rpart::rpart, modelArgs)
##     out
## }
## 
## $rpart2$predict
## function (modelFit, newdata, submodels = NULL) 
## {
##     depth2cp <- function(x, depth) {
##         out <- approx(x[, "nsplit"], x[, "CP"], depth)$y
##         out[depth > max(x[, "nsplit"])] <- min(x[, "CP"]) * 0.99
##         out
##     }
##     if (!is.data.frame(newdata)) 
##         newdata <- as.data.frame(newdata)
##     pType <- if (modelFit$problemType == "Classification") 
##         "class"
##     else "vector"
##     out <- predict(modelFit, newdata, type = pType)
##     if (!is.null(submodels)) {
##         tmp <- vector(mode = "list", length = nrow(submodels) + 
##             1)
##         tmp[[1]] <- out
##         cpValues <- depth2cp(modelFit$cptable, submodels$maxdepth)
##         for (j in seq(along = cpValues)) {
##             prunedFit <- rpart::prune.rpart(modelFit, cp = cpValues[j])
##             tmp[[j + 1]] <- predict(prunedFit, newdata, type = pType)
##         }
##         out <- tmp
##     }
##     out
## }
## 
## $rpart2$prob
## function (modelFit, newdata, submodels = NULL) 
## {
##     depth2cp <- function(x, depth) {
##         out <- approx(x[, "nsplit"], x[, "CP"], depth)$y
##         out[depth > max(x[, "nsplit"])] <- min(x[, "CP"]) * 0.99
##         out
##     }
##     if (!is.data.frame(newdata)) 
##         newdata <- as.data.frame(newdata)
##     out <- predict(modelFit, newdata, type = "prob")
##     if (!is.null(submodels)) {
##         tmp <- vector(mode = "list", length = nrow(submodels) + 
##             1)
##         tmp[[1]] <- out
##         cpValues <- depth2cp(modelFit$cptable, submodels$maxdepth)
##         for (j in seq(along = cpValues)) {
##             prunedFit <- rpart::prune.rpart(modelFit, cp = cpValues[j])
##             tmpProb <- predict(prunedFit, newdata, type = "prob")
##             tmp[[j + 1]] <- as.data.frame(tmpProb[, modelFit$obsLevels, 
##                 drop = FALSE])
##         }
##         out <- tmp
##     }
##     out
## }
## 
## $rpart2$predictors
## function (x, surrogate = TRUE, ...) 
## {
##     out <- as.character(x$frame$var)
##     out <- out[!(out %in% c("<leaf>"))]
##     if (surrogate) {
##         splits <- x$splits
##         splits <- splits[splits[, "adj"] > 0, ]
##         out <- c(out, rownames(splits))
##     }
##     unique(out)
## }
## 
## $rpart2$varImp
## function (object, surrogates = FALSE, competes = TRUE, ...) 
## {
##     tmp <- rownames(object$splits)
##     rownames(object$splits) <- 1:nrow(object$splits)
##     splits <- data.frame(object$splits)
##     splits$var <- tmp
##     splits$type <- ""
##     frame <- as.data.frame(object$frame)
##     index <- 0
##     for (i in 1:nrow(frame)) {
##         if (frame$var[i] != "<leaf>") {
##             index <- index + 1
##             splits$type[index] <- "primary"
##             if (frame$ncompete[i] > 0) {
##                 for (j in 1:frame$ncompete[i]) {
##                   index <- index + 1
##                   splits$type[index] <- "competing"
##                 }
##             }
##             if (frame$nsurrogate[i] > 0) {
##                 for (j in 1:frame$nsurrogate[i]) {
##                   index <- index + 1
##                   splits$type[index] <- "surrogate"
##                 }
##             }
##         }
##     }
##     splits$var <- factor(as.character(splits$var))
##     if (!surrogates) 
##         splits <- subset(splits, type != "surrogate")
##     if (!competes) 
##         splits <- subset(splits, type != "competing")
##     out <- aggregate(splits$improve, list(Variable = splits$var), 
##         sum, na.rm = TRUE)
##     allVars <- colnames(attributes(object$terms)$factors)
##     if (!all(allVars %in% out$Variable)) {
##         missingVars <- allVars[!(allVars %in% out$Variable)]
##         zeros <- data.frame(x = rep(0, length(missingVars)), 
##             Variable = missingVars)
##         out <- rbind(out, zeros)
##     }
##     out2 <- data.frame(Overall = out$x)
##     rownames(out2) <- out$Variable
##     out2
## }
## 
## $rpart2$levels
## function (x) 
## x$obsLevels
## 
## $rpart2$trim
## function (x) 
## {
##     x$call <- list(na.action = (x$call)$na.action)
##     x$x <- NULL
##     x$y <- NULL
##     x$where <- NULL
##     x
## }
## 
## $rpart2$tags
## [1] "Tree-Based Model"              "Implicit Feature Selection"   
## [3] "Handle Missing Predictor Data" "Accepts Case Weights"         
## 
## $rpart2$sort
## function (x) 
## x[order(x[, 1]), ]
## 
## 
## $rpartCost
## $rpartCost$label
## [1] "Cost-Sensitive CART"
## 
## $rpartCost$library
## [1] "rpart" "plyr" 
## 
## $rpartCost$type
## [1] "Classification"
## 
## $rpartCost$parameters
##   parameter   class                label
## 1        cp numeric Complexity Parameter
## 2      Cost numeric                 Cost
## 
## $rpartCost$grid
## function (x, y, len = NULL, search = "grid") 
## {
##     dat <- if (is.data.frame(x)) 
##         x
##     else as.data.frame(x)
##     dat$.outcome <- y
##     initialFit <- rpart::rpart(.outcome ~ ., data = dat, control = rpart::rpart.control(cp = 0))$cptable
##     initialFit <- initialFit[order(-initialFit[, "CP"]), , drop = FALSE]
##     if (search == "grid") {
##         if (nrow(initialFit) < len) {
##             tuneSeq <- expand.grid(cp = seq(min(initialFit[, 
##                 "CP"]), max(initialFit[, "CP"]), length = len), 
##                 Cost = 1:len)
##         }
##         else tuneSeq <- data.frame(cp = initialFit[1:len, "CP"], 
##             Cost = 1:len)
##         colnames(tuneSeq) <- c("cp", "Cost")
##     }
##     else {
##         tuneSeq <- data.frame(cp = 10^runif(len, min = -8, max = -1), 
##             Cost = runif(len, min = 1, max = 30))
##     }
##     tuneSeq
## }
## 
## $rpartCost$loop
## function (grid) 
## {
##     loop <- plyr::ddply(grid, plyr::.(Cost), function(x) c(cp = min(x$cp)))
##     submodels <- vector(mode = "list", length = nrow(loop))
##     for (i in seq(along = submodels)) {
##         larger_cp <- subset(grid, subset = Cost == loop$Cost[i] & 
##             cp > loop$cp[i])
##         submodels[[i]] <- data.frame(cp = sort(larger_cp$cp))
##     }
##     list(loop = loop, submodels = submodels)
## }
## 
## $rpartCost$fit
## function (x, y, wts, param, lev, last, classProbs, ...) 
## {
##     theDots <- list(...)
##     if (any(names(theDots) == "control")) {
##         theDots$control$cp <- param$cp
##         theDots$control$xval <- 0
##         ctl <- theDots$control
##         theDots$control <- NULL
##     }
##     else ctl <- rpart::rpart.control(cp = param$cp, xval = 0)
##     lmat <- matrix(c(0, 1, param$Cost, 0), ncol = 2)
##     rownames(lmat) <- colnames(lmat) <- levels(y)
##     if (any(names(theDots) == "parms")) {
##         theDots$parms$loss <- lmat
##     }
##     else parms <- list(loss = lmat)
##     if (!is.null(wts)) 
##         theDots$weights <- wts
##     modelArgs <- c(list(formula = as.formula(".outcome ~ ."), 
##         data = if (is.data.frame(x)) x else as.data.frame(x), 
##         parms = parms, control = ctl), theDots)
##     modelArgs$data$.outcome <- y
##     out <- do.call(rpart::rpart, modelArgs)
##     out
## }
## 
## $rpartCost$predict
## function (modelFit, newdata, submodels = NULL) 
## {
##     if (!is.data.frame(newdata)) 
##         newdata <- as.data.frame(newdata)
##     pType <- if (modelFit$problemType == "Classification") 
##         "class"
##     else "vector"
##     out <- predict(modelFit, newdata, type = pType)
##     if (!is.null(submodels)) {
##         tmp <- vector(mode = "list", length = nrow(submodels) + 
##             1)
##         tmp[[1]] <- out
##         for (j in seq(along = submodels$cp)) {
##             prunedFit <- rpart::prune.rpart(modelFit, cp = submodels$cp[j])
##             tmp[[j + 1]] <- predict(prunedFit, newdata, type = pType)
##         }
##         out <- tmp
##     }
##     out
## }
## 
## $rpartCost$levels
## function (x) 
## x$obsLevels
## 
## $rpartCost$prob
## NULL
## 
## $rpartCost$tags
## [1] "Tree-Based Model"              "Implicit Feature Selection"   
## [3] "Cost Sensitive Learning"       "Two Class Only"               
## [5] "Handle Missing Predictor Data" "Accepts Case Weights"         
## 
## $rpartCost$sort
## function (x) 
## x[order(-x$cp, -x$Cost), ]
## 
## 
## $rpartScore
## $rpartScore$label
## [1] "CART or Ordinal Responses"
## 
## $rpartScore$library
## [1] "rpartScore" "plyr"      
## 
## $rpartScore$type
## [1] "Classification"
## 
## $rpartScore$parameters
##   parameter     class                label
## 1        cp   numeric Complexity Parameter
## 2     split character       Split Function
## 3     prune character      Pruning Measure
## 
## $rpartScore$grid
## function (x, y, len = NULL, search = "grid") 
## {
##     dat <- if (is.data.frame(x)) 
##         x
##     else as.data.frame(x)
##     dat$.outcome <- y
##     initialFit <- rpart::rpart(.outcome ~ ., data = dat, control = rpart::rpart.control(cp = 0))$cptable
##     initialFit <- initialFit[order(-initialFit[, "CP"]), , drop = FALSE]
##     if (search == "grid") {
##         if (nrow(initialFit) < len) {
##             tuneSeq <- expand.grid(cp = seq(min(initialFit[, 
##                 "CP"]), max(initialFit[, "CP"]), length = len), 
##                 split = c("abs", "quad"), prune = c("mr", "mc"))
##         }
##         else tuneSeq <- expand.grid(cp = initialFit[1:len, "CP"], 
##             split = c("abs", "quad"), prune = c("mr", "mc"))
##         colnames(tuneSeq)[1] <- "cp"
##     }
##     else {
##         tuneSeq <- expand.grid(cp = unique(sample(initialFit[, 
##             "CP"], size = len, replace = TRUE)), split = c("abs", 
##             "quad"), prune = c("mr", "mc"))
##     }
##     tuneSeq
## }
## 
## $rpartScore$fit
## function (x, y, wts, param, lev, last, classProbs, ...) 
## {
##     cpValue <- if (!last) 
##         param$cp
##     else 0
##     theDots <- list(...)
##     if (any(names(theDots) == "control")) {
##         theDots$control$cp <- cpValue
##         theDots$control$xval <- 0
##         ctl <- theDots$control
##         theDots$control <- NULL
##     }
##     else ctl <- rpart::rpart.control(cp = cpValue, xval = 0)
##     if (!is.null(wts)) 
##         theDots$weights <- wts
##     modelArgs <- c(list(formula = as.formula(".outcome ~ ."), 
##         data = if (is.data.frame(x)) x else as.data.frame(x), 
##         split = as.character(param$split), prune = as.character(param$prune), 
##         control = ctl), theDots)
##     modelArgs$data$.outcome <- as.numeric(y)
##     out <- do.call(rpartScore::rpartScore, modelArgs)
##     if (last) 
##         out <- rpart::prune.rpart(out, cp = param$cp)
##     out
## }
## 
## $rpartScore$predict
## function (modelFit, newdata, submodels = NULL) 
## {
##     if (!is.data.frame(newdata)) 
##         newdata <- as.data.frame(newdata)
##     out <- modelFit$obsLevels[predict(modelFit, newdata)]
##     if (!is.null(submodels)) {
##         tmp <- vector(mode = "list", length = nrow(submodels) + 
##             1)
##         tmp[[1]] <- out
##         for (j in seq(along = submodels$cp)) {
##             prunedFit <- rpart::prune.rpart(modelFit, cp = submodels$cp[j])
##             tmp[[j + 1]] <- modelFit$obsLevels[predict(prunedFit, 
##                 newdata)]
##         }
##         out <- tmp
##     }
##     out
## }
## 
## $rpartScore$prob
## NULL
## 
## $rpartScore$predictors
## function (x, surrogate = TRUE, ...) 
## {
##     out <- as.character(x$frame$var)
##     out <- out[!(out %in% c("<leaf>"))]
##     if (surrogate) {
##         splits <- x$splits
##         splits <- splits[splits[, "adj"] > 0, ]
##         out <- c(out, rownames(splits))
##     }
##     unique(out)
## }
## 
## $rpartScore$varImp
## function (object, surrogates = FALSE, competes = TRUE, ...) 
## {
##     allVars <- all.vars(object$terms)
##     allVars <- allVars[allVars != ".outcome"]
##     out <- data.frame(Overall = object$variable.importance, Variable = names(object$variable.importance))
##     rownames(out) <- names(object$variable.importance)
##     if (!all(allVars %in% out$Variable)) {
##         missingVars <- allVars[!(allVars %in% out$Variable)]
##         zeros <- data.frame(Overall = rep(0, length(missingVars)), 
##             Variable = missingVars)
##         out <- rbind(out, zeros)
##     }
##     rownames(out) <- out$Variable
##     out$Variable <- NULL
##     out
## }
## 
## $rpartScore$levels
## function (x) 
## x$obsLevels
## 
## $rpartScore$trim
## function (x) 
## {
##     x$call <- list(na.action = (x$call)$na.action)
##     x$x <- NULL
##     x$y <- NULL
##     x$where <- NULL
##     x
## }
## 
## $rpartScore$tags
## [1] "Tree-Based Model"              "Implicit Feature Selection"   
## [3] "Handle Missing Predictor Data" "Accepts Case Weights"         
## [5] "Ordinal Outcomes"             
## 
## $rpartScore$sort
## function (x) 
## x[order(x[, 1], decreasing = TRUE), ]

# 查詢rpart model可以tune的parameters
getModelInfo('rpart')$rpart$parameters

##   parameter   class                label
## 1        cp numeric Complexity Parameter

R_basic5

York Lin

2019年12月17日

cp

use caret package

caret 套件使用說明