library(C50)
library('caret')
## Loading required package: lattice
## Loading required package: ggplot2
library('e1071')
data(churn)

names(churnTrain) %in% c("state", "area_code", "account_length")
##  [1]  TRUE  TRUE  TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [12] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
!names(churnTrain) %in% c("state", "area_code", "account_length")
##  [1] FALSE FALSE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
## [12]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
#選擇建模變數
variable.list = !names(churnTrain) %in% c('state','area_code','account_length')
churnTrain=churnTrain[,variable.list]
churnTest=churnTest[,variable.list]

set.seed(2)
#把資料分成training data 和 validation data
ind<-sample(1:2, size=nrow(churnTrain), replace=T, prob=c(0.7, 0.3))
trainset=churnTrain[ind==1,]
testset=churnTrain[ind==2,]

use caret package

#install.packages("caret")
library(caret)
control=trainControl(method="repeatedcv", number=10, repeats=3,classProbs = TRUE,summaryFunction = multiClassSummary)
tune_funs = expand.grid(cp=seq(0.01,0.1,0.01))
model =train(churn~., data=churnTrain, method="rpart", trControl=control,tuneGrid=tune_funs)
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info =
## trainInfo, : There were missing values in resampled performance measures.
predictions = predict(model, churnTest)

confusionMatrix(table(predictions,churnTest$churn))
## Confusion Matrix and Statistics
## 
##            
## predictions  yes   no
##         yes  145   15
##         no    79 1428
##                                           
##                Accuracy : 0.9436          
##                  95% CI : (0.9314, 0.9542)
##     No Information Rate : 0.8656          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.7243          
##  Mcnemar's Test P-Value : 8.142e-11       
##                                           
##             Sensitivity : 0.64732         
##             Specificity : 0.98960         
##          Pos Pred Value : 0.90625         
##          Neg Pred Value : 0.94758         
##              Prevalence : 0.13437         
##          Detection Rate : 0.08698         
##    Detection Prevalence : 0.09598         
##       Balanced Accuracy : 0.81846         
##                                           
##        'Positive' Class : yes             
## 

find importance variable

library('caret')
importance = varImp(model, scale=FALSE)
importance
## rpart variable importance
## 
##                               Overall
## total_day_minutes             219.693
## total_day_charge              206.025
## number_customer_service_calls 168.529
## international_planyes         163.107
## total_intl_minutes            135.324
## total_eve_minutes             117.225
## total_intl_charge             116.860
## total_eve_charge              111.593
## number_vmail_messages          52.586
## voice_mail_planyes             52.586
## total_intl_calls               52.444
## total_night_minutes            24.705
## total_night_charge             18.159
## total_night_calls              11.200
## total_day_calls                 2.214
## total_eve_calls                 0.000
plot(importance)

ROC

#install.packages("ROCR")
library(ROCR)
## Loading required package: gplots
## 
## Attaching package: 'gplots'
## The following object is masked from 'package:stats':
## 
##     lowess
predictions <-predict(model, churnTest, type="prob")
head(predictions)
##          yes        no
## 1 0.02701486 0.9729851
## 2 0.10240964 0.8975904
## 3 0.11320755 0.8867925
## 4 0.02701486 0.9729851
## 5 0.02701486 0.9729851
## 6 0.10240964 0.8975904
pred.to.roc<-predictions[, 1]
head(pred.to.roc)
## [1] 0.02701486 0.10240964 0.11320755 0.02701486 0.02701486 0.10240964
pred.rocr<-prediction(pred.to.roc, churnTest$churn)
pred.rocr
## An object of class "prediction"
## Slot "predictions":
## [[1]]
##    [1] 0.02701486 0.10240964 0.11320755 0.02701486 0.02701486 0.10240964
##    [7] 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486
##   [13] 0.10240964 0.02701486 0.02701486 0.02701486 0.02701486 0.10240964
##   [19] 0.00000000 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486
##   [25] 0.10240964 0.10240964 0.02701486 0.04046243 0.04046243 0.02701486
##   [31] 0.04046243 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486
##   [37] 0.02701486 0.10240964 0.02701486 1.00000000 0.85000000 0.02701486
##   [43] 0.02701486 0.12500000 0.02701486 0.02701486 0.02701486 0.10240964
##   [49] 0.02701486 0.02701486 0.04046243 0.02701486 0.02701486 0.02701486
##   [55] 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486
##   [61] 0.02701486 0.02701486 0.83333333 0.12500000 0.02701486 0.02701486
##   [67] 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486 0.10416667
##   [73] 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486
##   [79] 0.02701486 0.04046243 0.87254902 0.02701486 0.16000000 0.02701486
##   [85] 0.02701486 0.02701486 0.02701486 0.02701486 0.10416667 0.02701486
##   [91] 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486 1.00000000
##   [97] 0.02701486 0.02701486 0.02701486 0.02701486 0.11320755 0.04046243
##  [103] 0.10240964 0.02701486 0.02701486 0.10416667 0.02701486 0.02701486
##  [109] 0.10416667 0.95049505 0.02701486 0.02701486 0.02701486 0.10240964
##  [115] 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486 1.00000000
##  [121] 0.02701486 0.10240964 0.10240964 0.02701486 0.95049505 0.10240964
##  [127] 0.02701486 0.02701486 1.00000000 0.10240964 0.02701486 0.02701486
##  [133] 0.02701486 0.02701486 0.02701486 0.10416667 0.10240964 0.02701486
##  [139] 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486
##  [145] 0.04046243 0.10240964 0.02701486 0.02701486 0.02701486 0.87254902
##  [151] 0.02701486 0.02701486 0.02701486 0.95049505 0.87254902 0.10240964
##  [157] 0.02701486 0.02701486 0.02701486 0.87500000 0.02701486 1.00000000
##  [163] 0.02701486 0.87500000 0.10416667 1.00000000 0.02701486 0.02701486
##  [169] 0.10240964 0.02701486 0.02701486 0.02701486 0.02701486 0.11320755
##  [175] 0.02701486 0.02701486 0.73684211 0.02701486 0.02701486 0.02701486
##  [181] 0.02701486 0.02701486 0.10240964 0.02701486 0.02701486 0.11320755
##  [187] 0.10240964 0.10240964 0.02701486 0.02701486 0.02701486 0.02701486
##  [193] 0.10240964 0.95049505 0.02701486 0.02701486 0.10240964 0.10240964
##  [199] 0.10240964 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486
##  [205] 0.02701486 0.04046243 0.02701486 0.95049505 0.02701486 0.02701486
##  [211] 0.04046243 0.02701486 0.02701486 0.02701486 0.10240964 0.83333333
##  [217] 0.73684211 0.04046243 0.02701486 0.10240964 0.02701486 0.87254902
##  [223] 0.95049505 0.02701486 0.02701486 0.02701486 0.02701486 0.10416667
##  [229] 0.02701486 0.02701486 0.02701486 0.02701486 0.11320755 0.10240964
##  [235] 0.02701486 0.02701486 0.10240964 0.02701486 0.11320755 0.04046243
##  [241] 0.02701486 0.10240964 0.04046243 0.04046243 0.10240964 0.02701486
##  [247] 0.02701486 0.02701486 0.10240964 0.02701486 0.02701486 0.04046243
##  [253] 0.87254902 0.10240964 0.85000000 0.02701486 0.02701486 0.02701486
##  [259] 0.04046243 0.10240964 0.11320755 0.02701486 0.10416667 0.11320755
##  [265] 0.04046243 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486
##  [271] 0.02701486 0.02701486 0.02701486 0.04046243 0.00000000 0.83333333
##  [277] 0.83333333 0.02701486 0.10240964 0.10240964 0.83333333 0.02701486
##  [283] 0.02701486 1.00000000 0.83333333 0.02701486 0.02701486 0.10240964
##  [289] 0.02701486 0.02701486 0.04046243 0.02701486 0.10240964 0.02701486
##  [295] 0.04046243 0.12500000 0.02701486 1.00000000 0.02701486 0.02701486
##  [301] 0.02701486 0.02701486 0.04046243 0.02701486 0.02701486 0.02701486
##  [307] 0.02701486 0.02701486 0.02701486 0.10240964 0.10416667 0.02701486
##  [313] 0.02701486 0.04046243 0.02701486 0.10240964 0.02701486 0.02701486
##  [319] 0.10240964 0.02701486 0.02701486 0.02701486 0.11320755 0.95049505
##  [325] 0.04046243 0.10416667 0.87254902 0.02701486 0.02701486 0.10240964
##  [331] 0.02701486 0.02701486 0.73684211 0.02701486 0.10416667 0.02701486
##  [337] 0.02701486 0.02701486 0.02701486 0.02701486 0.10240964 0.02701486
##  [343] 0.83333333 0.02701486 0.02701486 0.02701486 0.02701486 0.10240964
##  [349] 0.83333333 0.02701486 0.02701486 0.02701486 0.02701486 0.95049505
##  [355] 0.04046243 0.85000000 0.02701486 0.02701486 0.02701486 0.95049505
##  [361] 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486
##  [367] 0.02701486 0.12500000 0.02701486 0.02701486 0.02701486 0.02701486
##  [373] 0.02701486 0.02701486 0.10240964 0.02701486 0.02701486 0.02701486
##  [379] 0.02701486 0.10240964 0.02701486 0.04046243 0.02701486 0.02701486
##  [385] 0.87254902 0.02701486 0.10240964 0.02701486 0.02701486 0.02701486
##  [391] 0.02701486 0.02701486 0.02701486 0.02701486 0.10240964 0.02701486
##  [397] 0.04046243 0.04046243 0.02701486 0.02701486 0.12500000 0.10416667
##  [403] 0.04046243 0.16000000 0.10416667 0.87254902 0.02701486 0.10240964
##  [409] 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486
##  [415] 0.02701486 0.02701486 0.02701486 0.02701486 0.10416667 0.02701486
##  [421] 0.02701486 0.85000000 0.02701486 0.02701486 0.02701486 0.11320755
##  [427] 0.02701486 0.02701486 0.04046243 0.02701486 0.02701486 0.02701486
##  [433] 0.02701486 0.02701486 0.02701486 0.02701486 0.04046243 0.02701486
##  [439] 0.02701486 0.02701486 0.10240964 0.10416667 0.02701486 0.02701486
##  [445] 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486
##  [451] 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486 0.10240964
##  [457] 0.02701486 0.10240964 0.02701486 0.02701486 0.02701486 0.02701486
##  [463] 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486 0.10416667
##  [469] 0.02701486 0.02701486 0.11320755 0.02701486 0.02701486 0.02701486
##  [475] 0.02701486 0.02701486 0.10416667 0.02701486 0.02701486 0.04046243
##  [481] 0.02701486 0.02701486 0.02701486 0.10240964 0.02701486 0.10240964
##  [487] 0.10240964 0.02701486 0.02701486 0.95049505 0.02701486 0.02701486
##  [493] 0.02701486 0.02701486 0.02701486 0.02701486 0.04046243 0.10240964
##  [499] 0.10240964 0.02701486 0.11320755 0.02701486 0.02701486 0.10416667
##  [505] 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486 0.10416667
##  [511] 0.02701486 0.02701486 0.02701486 0.02701486 0.87254902 0.02701486
##  [517] 0.02701486 0.02701486 0.87254902 0.85000000 0.02701486 0.87254902
##  [523] 0.02701486 0.02701486 0.10240964 0.02701486 0.02701486 0.02701486
##  [529] 0.10240964 0.02701486 0.02701486 0.12500000 0.02701486 0.02701486
##  [535] 0.02701486 0.85000000 0.02701486 0.02701486 0.02701486 0.04046243
##  [541] 0.87500000 0.02701486 0.04046243 0.10240964 0.02701486 0.02701486
##  [547] 0.04046243 0.02701486 0.02701486 0.02701486 0.02701486 0.12500000
##  [553] 0.02701486 0.02701486 0.02701486 0.11320755 0.02701486 0.02701486
##  [559] 0.10240964 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486
##  [565] 0.02701486 0.10416667 0.11320755 0.02701486 0.95049505 0.02701486
##  [571] 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486
##  [577] 1.00000000 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486
##  [583] 0.10240964 1.00000000 0.87254902 0.10416667 0.02701486 0.10416667
##  [589] 0.02701486 0.02701486 0.04046243 0.02701486 0.02701486 0.02701486
##  [595] 0.02701486 0.02701486 0.87254902 0.02701486 0.02701486 0.02701486
##  [601] 0.02701486 0.04046243 0.02701486 0.02701486 0.10240964 0.02701486
##  [607] 0.02701486 0.02701486 0.02701486 0.10240964 0.04046243 0.02701486
##  [613] 0.02701486 0.10240964 0.02701486 0.02701486 0.02701486 0.02701486
##  [619] 0.02701486 0.02701486 0.04046243 0.16000000 0.02701486 0.04046243
##  [625] 0.02701486 0.02701486 0.02701486 0.02701486 0.10416667 0.10240964
##  [631] 0.02701486 0.02701486 0.10240964 0.95049505 0.02701486 0.16000000
##  [637] 0.02701486 0.04046243 0.02701486 0.10240964 0.87254902 0.02701486
##  [643] 0.02701486 0.02701486 0.02701486 1.00000000 0.02701486 0.02701486
##  [649] 0.04046243 0.10240964 0.10416667 0.02701486 0.02701486 0.87254902
##  [655] 0.95049505 0.02701486 0.16000000 0.02701486 0.02701486 0.02701486
##  [661] 0.95049505 0.02701486 0.02701486 0.10240964 0.02701486 0.02701486
##  [667] 0.02701486 0.02701486 0.02701486 1.00000000 0.02701486 0.02701486
##  [673] 0.02701486 0.10240964 0.02701486 0.02701486 0.10240964 0.02701486
##  [679] 0.02701486 0.02701486 0.10416667 0.02701486 0.02701486 0.04046243
##  [685] 0.02701486 0.87254902 0.02701486 0.02701486 1.00000000 0.02701486
##  [691] 0.87254902 0.02701486 0.02701486 0.10240964 0.02701486 0.02701486
##  [697] 0.02701486 0.10240964 0.02701486 0.02701486 0.02701486 0.02701486
##  [703] 0.10240964 0.02701486 0.10240964 0.10240964 0.04046243 0.02701486
##  [709] 0.10240964 0.02701486 0.02701486 0.10416667 0.02701486 0.02701486
##  [715] 0.10240964 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486
##  [721] 0.87254902 0.02701486 0.02701486 0.10240964 0.02701486 0.02701486
##  [727] 0.02701486 0.02701486 0.02701486 1.00000000 0.02701486 0.04046243
##  [733] 0.16000000 0.04046243 0.10416667 0.10240964 0.02701486 0.02701486
##  [739] 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486
##  [745] 0.02701486 0.02701486 0.95049505 0.02701486 0.10240964 0.02701486
##  [751] 0.02701486 0.02701486 0.10416667 0.02701486 1.00000000 0.02701486
##  [757] 0.02701486 0.02701486 0.10240964 0.02701486 0.10240964 0.02701486
##  [763] 0.02701486 0.10416667 0.02701486 0.02701486 0.10416667 0.04046243
##  [769] 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486 0.10240964
##  [775] 0.02701486 0.04046243 0.02701486 0.87254902 0.16000000 0.10240964
##  [781] 0.04046243 0.00000000 0.02701486 0.02701486 0.02701486 0.02701486
##  [787] 0.10240964 0.16000000 0.83333333 0.04046243 0.02701486 0.02701486
##  [793] 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486 0.87254902
##  [799] 0.85000000 0.87254902 0.02701486 0.02701486 0.95049505 0.02701486
##  [805] 0.02701486 0.04046243 0.02701486 0.10240964 0.10240964 0.02701486
##  [811] 0.10240964 0.02701486 0.02701486 0.02701486 0.02701486 0.10240964
##  [817] 0.02701486 0.02701486 0.02701486 0.02701486 0.10416667 0.10240964
##  [823] 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486
##  [829] 0.02701486 1.00000000 0.02701486 0.10240964 0.10240964 0.04046243
##  [835] 0.02701486 0.02701486 0.02701486 0.00000000 1.00000000 0.85000000
##  [841] 0.02701486 0.10240964 0.02701486 0.02701486 0.02701486 0.02701486
##  [847] 0.02701486 0.02701486 0.16000000 0.02701486 0.10240964 0.02701486
##  [853] 0.12500000 0.02701486 0.04046243 0.10240964 0.02701486 0.95049505
##  [859] 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486 0.87254902
##  [865] 0.02701486 0.02701486 0.10240964 0.10240964 0.02701486 0.10416667
##  [871] 0.02701486 0.02701486 0.11320755 0.02701486 0.10240964 0.02701486
##  [877] 0.02701486 0.02701486 0.10240964 0.02701486 0.04046243 0.02701486
##  [883] 0.10240964 0.02701486 0.02701486 0.16000000 0.02701486 0.02701486
##  [889] 0.02701486 0.02701486 0.02701486 0.02701486 1.00000000 0.02701486
##  [895] 0.02701486 0.10240964 1.00000000 1.00000000 0.02701486 0.11320755
##  [901] 0.02701486 0.02701486 0.04046243 0.87254902 0.02701486 0.10240964
##  [907] 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486 0.10240964
##  [913] 0.02701486 0.04046243 0.02701486 0.02701486 0.10240964 0.02701486
##  [919] 0.10240964 0.02701486 0.95049505 0.02701486 0.02701486 0.02701486
##  [925] 0.10416667 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486
##  [931] 0.10240964 0.95049505 0.12500000 0.02701486 0.02701486 1.00000000
##  [937] 0.11320755 0.02701486 0.00000000 0.02701486 0.02701486 0.04046243
##  [943] 0.02701486 0.02701486 0.10240964 0.02701486 0.73684211 0.02701486
##  [949] 0.02701486 0.02701486 1.00000000 0.10240964 0.87254902 0.02701486
##  [955] 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486 1.00000000
##  [961] 0.02701486 0.02701486 0.02701486 0.87254902 0.95049505 0.02701486
##  [967] 0.02701486 0.02701486 0.02701486 0.02701486 0.10240964 0.02701486
##  [973] 0.02701486 0.02701486 0.95049505 0.02701486 1.00000000 0.02701486
##  [979] 0.04046243 0.16000000 0.02701486 0.02701486 0.10416667 0.10416667
##  [985] 0.02701486 0.02701486 0.04046243 0.02701486 0.02701486 0.02701486
##  [991] 0.02701486 0.04046243 0.02701486 0.02701486 0.02701486 1.00000000
##  [997] 0.02701486 0.83333333 0.10240964 0.95049505 0.02701486 0.02701486
## [1003] 0.02701486 0.10240964 1.00000000 0.10240964 0.02701486 0.02701486
## [1009] 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486 0.10416667
## [1015] 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486 0.10240964
## [1021] 0.02701486 0.02701486 0.10416667 0.02701486 0.02701486 0.02701486
## [1027] 0.02701486 0.02701486 0.02701486 0.10240964 0.02701486 0.04046243
## [1033] 0.10240964 0.87254902 0.10416667 0.02701486 0.02701486 0.02701486
## [1039] 1.00000000 0.02701486 0.10240964 0.10240964 0.02701486 0.02701486
## [1045] 0.10416667 0.02701486 0.10416667 0.02701486 0.10416667 0.02701486
## [1051] 0.87254902 0.02701486 0.10240964 1.00000000 0.02701486 0.02701486
## [1057] 0.10240964 0.02701486 0.02701486 0.02701486 0.10240964 0.02701486
## [1063] 0.02701486 0.02701486 0.02701486 0.02701486 0.10240964 0.02701486
## [1069] 0.02701486 0.04046243 0.10240964 0.87254902 0.73684211 0.10416667
## [1075] 0.02701486 0.02701486 0.02701486 0.02701486 0.85000000 0.02701486
## [1081] 0.10240964 0.02701486 0.02701486 0.02701486 0.10416667 0.02701486
## [1087] 0.02701486 0.02701486 0.10416667 0.02701486 0.02701486 0.02701486
## [1093] 0.10416667 0.10240964 0.02701486 0.11320755 0.02701486 0.02701486
## [1099] 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486
## [1105] 1.00000000 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486
## [1111] 0.02701486 0.10240964 0.02701486 0.02701486 0.10416667 0.02701486
## [1117] 0.02701486 0.10240964 0.02701486 0.02701486 0.02701486 0.10240964
## [1123] 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486
## [1129] 0.02701486 0.02701486 0.02701486 0.02701486 0.10240964 0.16000000
## [1135] 0.02701486 0.10240964 0.02701486 0.04046243 1.00000000 0.04046243
## [1141] 0.02701486 0.02701486 0.02701486 0.95049505 0.02701486 0.02701486
## [1147] 0.02701486 0.02701486 0.02701486 0.02701486 0.10240964 0.04046243
## [1153] 0.02701486 0.95049505 0.10240964 1.00000000 0.10416667 0.02701486
## [1159] 0.04046243 0.95049505 0.02701486 0.10240964 0.10240964 0.02701486
## [1165] 0.02701486 0.04046243 0.02701486 0.02701486 0.04046243 0.87254902
## [1171] 0.04046243 0.02701486 0.02701486 0.02701486 0.85000000 0.02701486
## [1177] 0.11320755 0.10240964 0.02701486 0.02701486 0.95049505 0.02701486
## [1183] 0.16000000 0.02701486 0.02701486 0.10240964 0.04046243 0.02701486
## [1189] 0.02701486 0.02701486 0.10416667 0.04046243 0.04046243 0.02701486
## [1195] 0.10240964 0.10240964 0.02701486 0.02701486 0.02701486 0.02701486
## [1201] 0.02701486 0.02701486 0.02701486 0.02701486 0.87254902 0.10416667
## [1207] 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486
## [1213] 0.02701486 0.02701486 0.12500000 0.02701486 0.02701486 0.10416667
## [1219] 0.02701486 0.02701486 0.10240964 0.02701486 0.02701486 0.02701486
## [1225] 0.02701486 0.02701486 0.02701486 0.10240964 0.02701486 0.87254902
## [1231] 0.02701486 0.02701486 0.10240964 0.02701486 0.11320755 0.87254902
## [1237] 0.02701486 0.02701486 0.10240964 0.16000000 0.02701486 0.02701486
## [1243] 0.02701486 0.10240964 1.00000000 0.02701486 0.02701486 0.02701486
## [1249] 0.02701486 0.02701486 0.10240964 0.02701486 0.10240964 0.04046243
## [1255] 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486 0.87254902
## [1261] 0.02701486 0.11320755 0.02701486 0.10240964 1.00000000 0.02701486
## [1267] 0.95049505 0.02701486 0.02701486 0.10240964 0.02701486 0.02701486
## [1273] 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486
## [1279] 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486
## [1285] 0.02701486 0.10240964 0.02701486 0.02701486 0.02701486 0.10240964
## [1291] 0.02701486 0.10240964 0.02701486 0.02701486 0.02701486 0.87254902
## [1297] 0.02701486 0.02701486 0.10416667 0.02701486 0.02701486 0.04046243
## [1303] 0.02701486 1.00000000 0.02701486 0.95049505 0.02701486 0.10240964
## [1309] 0.02701486 0.02701486 1.00000000 0.10416667 0.02701486 0.02701486
## [1315] 0.02701486 0.02701486 0.02701486 0.95049505 0.02701486 0.02701486
## [1321] 0.16000000 0.02701486 0.02701486 0.02701486 0.02701486 0.83333333
## [1327] 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486
## [1333] 0.10240964 0.02701486 0.04046243 0.02701486 0.02701486 0.10416667
## [1339] 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486 1.00000000
## [1345] 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486
## [1351] 0.02701486 0.02701486 0.02701486 0.02701486 0.95049505 0.02701486
## [1357] 0.04046243 0.02701486 0.10240964 0.10416667 0.02701486 0.02701486
## [1363] 0.02701486 0.02701486 0.02701486 0.10416667 0.10416667 0.10416667
## [1369] 0.10240964 0.10240964 0.10240964 0.10240964 0.02701486 0.02701486
## [1375] 0.02701486 0.10416667 0.02701486 0.95049505 0.04046243 0.02701486
## [1381] 0.04046243 0.02701486 0.02701486 0.02701486 0.02701486 0.10240964
## [1387] 0.02701486 0.02701486 1.00000000 0.02701486 0.04046243 0.02701486
## [1393] 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486
## [1399] 0.11320755 0.02701486 0.02701486 0.10240964 0.10240964 0.10240964
## [1405] 0.87254902 0.02701486 0.02701486 0.87254902 0.02701486 0.04046243
## [1411] 0.02701486 0.10240964 0.02701486 0.02701486 0.02701486 0.02701486
## [1417] 0.02701486 0.02701486 0.10416667 0.02701486 0.10240964 0.02701486
## [1423] 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486 0.10240964
## [1429] 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486
## [1435] 0.02701486 0.10416667 0.02701486 0.02701486 0.02701486 0.87254902
## [1441] 0.87254902 0.10240964 0.02701486 0.10240964 0.02701486 0.87254902
## [1447] 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486
## [1453] 0.02701486 0.00000000 0.02701486 0.87254902 0.10240964 0.10416667
## [1459] 0.02701486 0.02701486 0.02701486 0.04046243 0.02701486 0.02701486
## [1465] 0.02701486 0.10240964 0.02701486 0.02701486 0.02701486 0.02701486
## [1471] 0.95049505 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486
## [1477] 0.95049505 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486
## [1483] 0.02701486 0.02701486 0.00000000 0.02701486 0.02701486 0.02701486
## [1489] 0.02701486 0.02701486 0.02701486 0.02701486 1.00000000 0.02701486
## [1495] 0.02701486 0.02701486 0.02701486 0.02701486 0.10416667 0.95049505
## [1501] 0.02701486 0.02701486 0.02701486 0.02701486 0.95049505 0.10240964
## [1507] 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486 0.10240964
## [1513] 0.04046243 0.02701486 0.02701486 0.10240964 0.02701486 0.02701486
## [1519] 0.02701486 0.10240964 0.02701486 0.10240964 0.02701486 0.16000000
## [1525] 0.02701486 0.02701486 0.02701486 0.10240964 0.95049505 0.95049505
## [1531] 0.10240964 0.02701486 0.00000000 0.12500000 0.02701486 0.02701486
## [1537] 0.02701486 0.04046243 0.02701486 0.02701486 0.02701486 0.02701486
## [1543] 0.02701486 0.02701486 0.02701486 0.02701486 0.04046243 0.10240964
## [1549] 0.02701486 0.02701486 0.02701486 0.02701486 0.16000000 0.10240964
## [1555] 0.02701486 0.02701486 0.02701486 0.87254902 0.02701486 0.02701486
## [1561] 0.02701486 0.10240964 1.00000000 0.02701486 0.83333333 0.95049505
## [1567] 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486
## [1573] 0.02701486 0.02701486 0.02701486 0.04046243 0.02701486 0.02701486
## [1579] 0.10240964 0.02701486 0.02701486 0.95049505 0.02701486 0.10416667
## [1585] 1.00000000 0.95049505 0.12500000 0.02701486 0.02701486 0.02701486
## [1591] 0.02701486 0.95049505 0.02701486 0.02701486 0.02701486 0.10240964
## [1597] 0.02701486 0.10416667 0.02701486 0.02701486 0.02701486 0.10416667
## [1603] 0.02701486 0.02701486 1.00000000 0.02701486 0.87254902 0.10240964
## [1609] 0.02701486 0.02701486 0.95049505 0.02701486 0.02701486 0.02701486
## [1615] 0.73684211 1.00000000 0.02701486 0.02701486 0.02701486 0.02701486
## [1621] 0.87254902 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486
## [1627] 0.95049505 0.02701486 0.02701486 0.02701486 0.04046243 0.02701486
## [1633] 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486
## [1639] 0.02701486 0.02701486 0.10240964 0.02701486 0.02701486 0.02701486
## [1645] 0.04046243 0.02701486 0.02701486 0.02701486 0.02701486 0.10416667
## [1651] 0.02701486 0.02701486 0.10240964 0.02701486 0.02701486 0.02701486
## [1657] 0.02701486 0.10240964 0.85000000 0.02701486 0.02701486 0.02701486
## [1663] 0.10240964 0.02701486 0.02701486 0.02701486 0.02701486
## 
## 
## Slot "labels":
## [[1]]
##    [1] no  no  no  no  no  no  no  no  no  no  no  no  no  no  no  no  no 
##   [18] no  no  no  no  no  no  no  no  no  no  no  no  no  no  no  no  no 
##   [35] no  no  yes no  no  yes yes no  no  yes no  no  no  no  no  yes no 
##   [52] no  no  no  no  no  no  no  no  no  no  no  yes no  no  no  no  no 
##   [69] no  no  no  no  no  no  no  no  yes no  no  no  yes no  no  no  no 
##   [86] no  no  no  yes no  no  no  no  no  no  yes no  no  no  no  no  no 
##  [103] no  no  no  no  yes no  no  yes no  no  no  no  no  no  no  no  no 
##  [120] yes no  no  no  no  yes no  no  no  yes no  no  no  no  no  no  yes
##  [137] no  no  no  no  no  no  no  no  no  no  no  no  no  yes no  no  no 
##  [154] yes yes no  no  no  no  no  no  yes no  no  no  yes no  no  no  no 
##  [171] no  no  no  no  no  no  no  no  no  no  no  no  no  no  no  no  no 
##  [188] no  no  no  no  no  yes no  no  no  no  no  no  no  no  no  no  no 
##  [205] no  no  no  yes no  no  no  no  no  yes no  yes yes no  yes no  no 
##  [222] yes yes no  no  no  no  no  no  no  no  no  no  no  no  no  no  no 
##  [239] no  no  no  no  no  no  no  no  no  no  no  no  no  no  yes no  yes
##  [256] no  no  no  no  no  no  no  yes no  no  no  no  no  yes no  no  yes
##  [273] no  no  no  yes no  no  no  no  yes yes no  yes yes no  no  no  no 
##  [290] no  no  no  no  no  no  yes no  yes no  no  no  no  no  no  no  yes
##  [307] no  no  no  no  no  no  no  no  no  no  no  no  no  no  no  no  no 
##  [324] yes no  no  yes no  no  no  no  no  yes no  yes no  no  no  no  no 
##  [341] no  no  yes no  no  no  no  no  yes no  no  no  no  yes no  yes no 
##  [358] no  no  yes no  no  no  no  no  no  no  no  no  yes no  no  no  no 
##  [375] no  no  no  no  no  no  no  no  no  no  yes no  no  no  no  no  no 
##  [392] no  no  no  no  no  no  no  no  no  yes no  no  no  no  yes no  no 
##  [409] no  no  no  no  no  yes no  no  no  no  no  no  no  yes no  no  no 
##  [426] no  no  no  no  no  no  no  no  no  no  no  yes no  no  no  no  no 
##  [443] no  no  no  no  no  no  no  no  no  no  no  no  no  no  no  no  no 
##  [460] yes no  no  no  no  no  no  no  no  no  no  yes no  no  no  no  no 
##  [477] no  no  no  no  no  no  no  no  no  no  no  no  no  yes no  no  no 
##  [494] no  no  no  no  no  no  no  no  no  no  no  no  no  no  no  yes no 
##  [511] no  no  no  no  yes no  no  no  yes yes no  yes no  no  no  no  no 
##  [528] no  no  no  no  no  no  no  no  yes no  no  no  no  yes no  no  no 
##  [545] no  no  no  no  no  no  no  no  no  no  no  no  no  no  no  no  no 
##  [562] no  no  no  no  no  no  no  yes no  no  no  no  no  no  no  yes no 
##  [579] no  no  no  no  no  yes no  no  no  no  no  no  no  no  no  no  no 
##  [596] no  no  no  yes yes no  no  no  no  no  no  no  no  no  no  no  no 
##  [613] no  no  no  no  no  no  no  no  no  yes no  no  no  no  no  no  yes
##  [630] no  no  no  no  yes no  no  no  no  no  no  yes no  no  no  no  yes
##  [647] no  no  no  no  yes no  no  yes yes no  no  no  yes no  yes no  no 
##  [664] no  no  no  no  no  no  yes no  no  no  no  no  no  no  no  no  no 
##  [681] no  no  no  no  no  yes no  no  yes no  yes no  no  no  yes no  no 
##  [698] yes no  no  no  no  no  no  no  no  no  no  yes no  no  no  no  no 
##  [715] no  yes no  no  no  no  yes no  no  no  no  no  no  no  no  yes no 
##  [732] no  no  no  yes no  no  no  no  no  no  no  no  no  no  no  yes no 
##  [749] no  no  no  no  no  no  yes no  no  no  no  no  yes no  no  no  no 
##  [766] no  no  no  no  no  no  no  no  no  no  no  no  no  no  yes no  no 
##  [783] no  no  no  no  no  yes yes no  no  no  no  no  no  no  no  yes yes
##  [800] no  no  no  yes no  yes no  no  yes yes no  no  no  no  no  no  no 
##  [817] no  no  no  no  yes no  no  no  yes yes no  no  no  yes no  no  yes
##  [834] no  no  no  no  no  yes no  no  no  no  no  no  no  no  no  no  no 
##  [851] no  no  yes no  no  no  no  yes no  no  no  no  no  yes no  no  no 
##  [868] no  no  yes no  no  no  no  no  no  no  yes yes no  no  no  no  no 
##  [885] no  no  no  no  no  no  no  no  yes no  no  yes yes yes no  no  no 
##  [902] no  no  yes no  no  no  no  no  no  no  no  no  no  no  no  no  no 
##  [919] yes no  yes no  no  no  yes yes no  no  no  no  no  yes no  no  no 
##  [936] yes no  no  no  no  no  no  no  no  no  no  no  yes no  no  yes no 
##  [953] yes no  no  no  no  no  no  yes no  yes no  yes yes no  no  no  no 
##  [970] no  no  no  no  no  yes no  yes no  no  no  no  no  no  no  no  no 
##  [987] no  no  no  no  no  yes no  no  no  yes no  yes no  yes no  no  no 
## [1004] no  yes no  no  no  no  no  no  no  no  no  no  no  no  no  no  no 
## [1021] no  no  yes no  no  no  no  no  no  no  no  no  no  yes no  no  no 
## [1038] yes yes no  no  no  no  yes no  no  no  no  no  no  yes no  no  yes
## [1055] no  no  no  no  no  no  no  no  no  no  no  no  no  no  no  no  no 
## [1072] yes no  yes no  no  no  no  yes no  no  no  no  no  no  no  no  no 
## [1089] yes no  no  no  no  no  no  no  no  no  no  no  no  no  no  no  yes
## [1106] no  no  no  no  no  no  no  no  no  no  no  no  no  no  no  no  no 
## [1123] no  no  no  no  no  no  no  no  no  no  yes no  no  no  no  no  yes
## [1140] no  no  no  no  yes no  no  no  no  no  no  no  no  no  no  no  yes
## [1157] yes no  no  yes no  no  no  no  no  no  no  no  no  yes no  no  no 
## [1174] no  yes no  no  no  no  no  yes no  no  no  no  no  no  no  no  no 
## [1191] no  no  no  no  no  no  no  no  no  no  no  no  no  no  yes no  no 
## [1208] no  no  no  no  no  no  no  no  no  no  no  no  no  no  no  no  no 
## [1225] no  no  no  no  no  yes no  no  no  no  no  yes no  no  no  yes no 
## [1242] no  no  yes yes no  no  no  no  no  no  no  no  no  no  no  no  no 
## [1259] no  yes no  no  no  no  yes no  yes no  no  no  no  no  no  no  no 
## [1276] no  no  no  no  no  no  no  no  no  no  no  no  no  no  no  no  yes
## [1293] no  no  no  yes no  no  no  no  no  no  no  yes no  yes no  yes no 
## [1310] no  yes no  no  no  no  no  no  yes no  no  no  no  no  no  no  yes
## [1327] no  no  no  no  no  no  no  no  no  no  no  no  no  no  no  no  no 
## [1344] yes no  yes no  no  no  no  no  no  no  no  yes no  no  no  no  yes
## [1361] no  no  no  no  no  no  no  no  no  no  no  no  no  no  no  no  no 
## [1378] yes no  no  no  no  no  no  no  no  no  no  yes no  no  no  no  no 
## [1395] no  no  no  no  no  no  no  no  yes no  yes no  no  yes no  no  no 
## [1412] yes no  no  no  no  no  no  no  no  no  no  no  no  no  no  no  no 
## [1429] no  no  no  no  no  no  no  no  no  no  no  yes yes no  no  yes no 
## [1446] yes no  no  no  no  no  no  no  no  no  yes no  no  no  no  no  no 
## [1463] no  no  no  yes no  no  no  no  yes no  no  no  no  no  yes no  no 
## [1480] no  no  no  no  no  no  no  no  no  no  no  no  no  yes no  no  no 
## [1497] no  no  no  yes no  no  no  no  yes no  no  no  no  no  no  no  no 
## [1514] no  no  no  no  no  no  no  no  no  no  yes no  no  no  no  yes yes
## [1531] no  no  no  no  no  no  no  no  no  no  no  no  no  no  no  no  no 
## [1548] no  no  no  no  no  no  no  no  no  no  yes no  no  no  no  yes no 
## [1565] no  yes no  no  no  no  no  no  no  no  no  no  no  no  no  no  no 
## [1582] yes no  no  yes yes yes no  yes no  no  yes no  no  no  no  no  no 
## [1599] no  no  no  no  no  no  yes no  yes no  no  no  yes no  no  no  no 
## [1616] yes no  no  no  no  yes no  no  no  no  no  yes no  no  no  no  no 
## [1633] no  no  no  no  no  no  no  no  no  no  no  no  no  no  no  yes no 
## [1650] no  no  no  no  no  no  no  no  yes yes no  no  no  no  yes no  no 
## [1667] no 
## Levels: no < yes
## 
## 
## Slot "cutoffs":
## [[1]]
##  [1]        Inf 1.00000000 0.95049505 0.87500000 0.87254902 0.85000000
##  [7] 0.83333333 0.73684211 0.16000000 0.12500000 0.11320755 0.10416667
## [13] 0.10240964 0.04046243 0.02701486 0.00000000
## 
## 
## Slot "fp":
## [[1]]
##  [1]    0    0    2    4    8    9   11   15   28   35   56  107  273  357
## [15] 1435 1443
## 
## 
## Slot "tp":
## [[1]]
##  [1]   0  42  83  84 123 133 143 145 149 154 155 170 190 192 224 224
## 
## 
## Slot "tn":
## [[1]]
##  [1] 1443 1443 1441 1439 1435 1434 1432 1428 1415 1408 1387 1336 1170 1086
## [15]    8    0
## 
## 
## Slot "fn":
## [[1]]
##  [1] 224 182 141 140 101  91  81  79  75  70  69  54  34  32   0   0
## 
## 
## Slot "n.pos":
## [[1]]
## [1] 224
## 
## 
## Slot "n.neg":
## [[1]]
## [1] 1443
## 
## 
## Slot "n.pos.pred":
## [[1]]
##  [1]    0   42   85   88  131  142  154  160  177  189  211  277  463  549
## [15] 1659 1667
## 
## 
## Slot "n.neg.pred":
## [[1]]
##  [1] 1667 1625 1582 1579 1536 1525 1513 1507 1490 1478 1456 1390 1204 1118
## [15]    8    0
perf.rocr<-performance(pred.rocr, measure ="auc")
perf.tpr.rocr<-performance(pred.rocr, measure="tpr",x.measure = "fpr")
plot(perf.tpr.rocr,main=paste("AUC:",(perf.rocr@y.values)))

model comparison

#rpart
library('rpart')
churn.rp<-rpart(churn ~., data=trainset)

#ctree
#install.packages("party")
library('party')
## Loading required package: grid
## Loading required package: mvtnorm
## Loading required package: modeltools
## Loading required package: stats4
## Loading required package: strucchange
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## Loading required package: sandwich
ctree.model = ctree(churn ~ . , data = trainset)

#C5.0
library(C50)
c50.model = C5.0(churn ~., data=trainset)

rp.predict.prob = predict(churn.rp, testset,type='prob')
c50.predict.prob = predict(c50.model,testset,type='prob')
ctree.predict.prob = sapply(predict(ctree.model ,testset,type='prob'),function(e){unlist(e)[1]})
rp.prediction = prediction(rp.predict.prob[,1],testset$churn)
c50.prediction = prediction(c50.predict.prob[,1],testset$churn)
ctree.prediction = prediction(ctree.predict.prob,testset$churn)
rp.performance = performance(rp.prediction, "tpr","fpr")
c50.performance = performance(c50.prediction, "tpr","fpr")
ctree.performance = performance(ctree.prediction, "tpr","fpr")
plot(rp.performance,col='red')
plot(c50.performance, add=T,col='green')
plot(ctree.performance, add=T,col='blue')

rp.per.obj= performance(rp.prediction, measure = 'auc')
c50.per.obj = performance(c50.prediction, measure = 'auc')
ctree.per.obj = performance(ctree.prediction, measure = 'auc')
rp.per.obj@y.values
## [[1]]
## [1] 0.9090751
c50.per.obj@y.values
## [[1]]
## [1] 0.8849438
ctree.per.obj@y.values
## [[1]]
## [1] 0.9106197

補充:隨機森林(Random Forest)

library(randomForest)
## randomForest 4.6-14
## Type rfNews() to see new features/changes/bug fixes.
## 
## Attaching package: 'randomForest'
## The following object is masked from 'package:ggplot2':
## 
##     margin
rf_model = randomForest(formula=churn ~ .,data=churnTrain)
#find best ntree
plot(rf_model)
legend("topright",colnames(rf_model$err.rate),col=1:3,cex=0.8,fill=1:3)

#find nest mtry
tuneRF(churnTrain[,-17],churnTrain[,17])
## mtry = 4  OOB error = 4.5% 
## Searching left ...
## mtry = 2     OOB error = 6.12% 
## -0.36 0.05 
## Searching right ...
## mtry = 8     OOB error = 4.68% 
## -0.04 0.05

##       mtry   OOBError
## 2.OOB    2 0.06120612
## 4.OOB    4 0.04500450
## 8.OOB    8 0.04680468
rf_model <- randomForest(churn ~., data = churnTrain, ntree=50,mtry=4)
# rf_model = train(churn~.,data=churnTrain,method='rf')
confusionMatrix(table(predict(rf_model,churnTest),churnTest$churn))
## Confusion Matrix and Statistics
## 
##      
##        yes   no
##   yes  163    9
##   no    61 1434
##                                           
##                Accuracy : 0.958           
##                  95% CI : (0.9472, 0.9671)
##     No Information Rate : 0.8656          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.7999          
##  Mcnemar's Test P-Value : 1.09e-09        
##                                           
##             Sensitivity : 0.72768         
##             Specificity : 0.99376         
##          Pos Pred Value : 0.94767         
##          Neg Pred Value : 0.95920         
##              Prevalence : 0.13437         
##          Detection Rate : 0.09778         
##    Detection Prevalence : 0.10318         
##       Balanced Accuracy : 0.86072         
##                                           
##        'Positive' Class : yes             
## 
rf.predict.prob <- predict(rf_model, churnTest, type="prob")
rf.prediction <- prediction(rf.predict.prob[,1], as.factor(churnTest$churn))
rf.auc <- performance(rf.prediction, measure = "auc", x.measure = "cutoff")
rf.performance <- performance(rf.prediction, "tpr","fpr")
plot(rf.performance)

#比較CART和RandomForest
tune_funs = expand.grid(cp=seq(0.01,0.1,0.01))
rpart_model =train(churn~., data=churnTrain, method="rpart",tuneGrid=tune_funs)

rpart_prob_yes = predict(rpart_model,churnTest,type='prob')[,1]
rpart_pred.rocr = prediction(rpart_prob_yes,churnTest$churn)
rpart_perf.rocr = performance(rpart_pred.rocr,measure = 'tpr',x.measure = 'fpr')

plot(rpart_perf.rocr,col='red')
plot(rf.performance,col='black',add=T)
legend(0.7, 0.2, c('randomforest','rpart'), 1:2)

分群問題

距離計算

x =c(0, 0, 1, 1, 1, 1)
y =c(1, 0, 1, 1, 0, 1)

#euclidean
?dist
rbind(x,y)
##   [,1] [,2] [,3] [,4] [,5] [,6]
## x    0    0    1    1    1    1
## y    1    0    1    1    0    1
dist(rbind(x,y), method ="euclidean")
##          x
## y 1.414214
sqrt(sum((x-y)^2))
## [1] 1.414214
dist(rbind(x,y), method ="minkowski", p=2)
##          x
## y 1.414214
#city block
dist(rbind(x,y), method ="manhattan")
##   x
## y 2
sum(abs(x-y))
## [1] 2
dist(rbind(x,y), method ="minkowski", p=1)
##   x
## y 2

Hierarchical Clustering

聚合式(bottom-up)

setwd('~/lecture/riii')
customer=read.csv('data/customer.csv',header=TRUE)
head(customer)
##   ID Visit.Time Average.Expense Sex Age
## 1  1          3             5.7   0  10
## 2  2          5            14.5   0  27
## 3  3         16            33.5   0  32
## 4  4          5            15.9   0  30
## 5  5         16            24.9   0  23
## 6  6          3            12.0   0  15
str(customer)
## 'data.frame':    60 obs. of  5 variables:
##  $ ID             : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ Visit.Time     : int  3 5 16 5 16 3 12 14 6 3 ...
##  $ Average.Expense: num  5.7 14.5 33.5 15.9 24.9 12 28.5 18.8 23.8 5.3 ...
##  $ Sex            : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ Age            : int  10 27 32 30 23 15 33 27 16 11 ...
#數值變數作正規化
customer_s =scale(customer[,-1])
?scale

#正規化後的變數平均數為0, 標準差為1
round(mean(customer_s[,2]),3)
## [1] 0
round(sd(customer_s[,2]),3)
## [1] 1
?hclust
hc=hclust(dist(customer_s, method="euclidean"), method="ward.D2")
plot(hc,hang =-0.01, cex=0.7)

hc3 =hclust(dist(customer, method="euclidean"), method="single")
plot(hc3, hang =-0.01, cex=0.8)

cutree

fit =cutree(hc, k =4)
fit
##  [1] 1 1 2 1 2 1 2 2 1 1 1 2 2 1 1 1 2 1 2 3 4 3 4 3 3 4 4 3 4 4 4 3 3 3 4
## [36] 4 3 4 4 4 4 4 4 4 3 3 4 4 4 3 4 3 3 4 4 4 3 4 4 3
table(fit)
## fit
##  1  2  3  4 
## 11  8 16 25
plot(hc, hang =-0.01, cex=0.7)
rect.hclust(hc, k =4, border="red")
rect.hclust(hc, k =3, border="blue")

c_1 = customer[fit == 1,]
summary(c_1)
##        ID           Visit.Time    Average.Expense      Sex         Age    
##  Min.   : 1.000   Min.   :3.000   Min.   : 4.60   Min.   :0   Min.   : 9  
##  1st Qu.: 5.000   1st Qu.:3.500   1st Qu.: 7.15   1st Qu.:0   1st Qu.:12  
##  Median :10.000   Median :5.000   Median :14.50   Median :0   Median :16  
##  Mean   : 9.636   Mean   :4.909   Mean   :12.71   Mean   :0   Mean   :17  
##  3rd Qu.:14.500   3rd Qu.:6.000   3rd Qu.:16.00   3rd Qu.:0   3rd Qu.:20  
##  Max.   :18.000   Max.   :8.000   Max.   :23.80   Max.   :0   Max.   :30

分裂式階層式(top-down)

#install.packages('cluster')
library(cluster)
?diana
dv =diana(customer_s, metric ="euclidean")
summary(dv)
## Merge:
##       [,1] [,2]
##  [1,]  -24  -50
##  [2,]  -28  -46
##  [3,]   -7  -13
##  [4,]  -30  -35
##  [5,]  -21  -40
##  [6,]  -54  -58
##  [7,]  -23  -26
##  [8,]   -1  -10
##  [9,]    7  -51
## [10,]  -27  -59
## [11,]    5  -39
## [12,]  -32  -45
## [13,]   -8  -12
## [14,]   -2   -4
## [15,]  -14  -18
## [16,]   11  -43
## [17,]  -44  -49
## [18,]    9  -56
## [19,]  -37  -60
## [20,]   -6  -11
## [21,]  -29  -48
## [22,]   -5  -19
## [23,]   10  -36
## [24,]  -42   17
## [25,]  -25   12
## [26,]   18  -41
## [27,]   21  -38
## [28,]   13  -17
## [29,]  -34  -52
## [30,]   16    6
## [31,]    8   20
## [32,]   26    4
## [33,]   19  -57
## [34,]  -47  -55
## [35,]   25  -53
## [36,]   24  -31
## [37,]   30   36
## [38,]   -3    3
## [39,]   -9   15
## [40,]  -33   33
## [41,]   32   23
## [42,]   22   28
## [43,]   31  -15
## [44,]   37   27
## [45,]  -20   40
## [46,]  -22   35
## [47,]   44   34
## [48,]   14   39
## [49,]    1   29
## [50,]   45    2
## [51,]   38   42
## [52,]   43  -16
## [53,]   46   49
## [54,]   52   48
## [55,]   47   41
## [56,]   50   53
## [57,]   54   55
## [58,]   51   56
## [59,]   57   58
## Order of objects:
##  [1]  1 10  6 11 15 16  2  4  9 14 18 21 40 39 43 54 58 42 44 49 31 29 48
## [24] 38 47 55 23 26 51 56 41 30 35 27 59 36  3  7 13  5 19  8 12 17 20 33
## [47] 37 60 57 28 46 22 25 32 45 53 24 50 34 52
## Height:
##  [1] 0.11775833 0.92338041 0.50974266 1.47360965 2.04722777 2.51250579
##  [7] 0.36355872 1.79099892 1.08967479 0.39308959 3.57679780 0.00000000
## [13] 0.21833707 0.44391855 0.80354844 0.08334529 0.98499722 0.70126085
## [19] 0.44921797 0.98499722 1.48962560 0.55960408 0.76573069 1.77868059
## [25] 0.97891452 2.79693737 0.09525176 0.12305649 0.48657744 0.76517620
## [31] 0.93270565 0.00000000 1.28196769 0.16054657 0.60321756 5.85655734
## [37] 1.07657773 0.00000000 1.98611220 0.59473487 1.44920797 0.33912975
## [43] 0.78523518 3.88572195 1.51921913 1.18521332 0.50902071 0.97225583
## [49] 1.91123321 0.00000000 3.39304108 1.52798723 0.72296652 0.31544012
## [55] 0.98335831 2.45910026 0.00000000 1.85224545 0.79085454
## Divisive coefficient:
## [1] 0.9117911
## 
## 1770 dissimilarities, summarized :
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   0.000   1.845   2.572   2.595   3.354   5.857 
## Metric :  euclidean 
## Number of objects : 60
## 
## Available components:
## [1] "order"  "height" "dc"     "merge"  "diss"   "call"   "data"
plot(dv)

fit2 =cutree(dv,k=4)
c_1 = customer[fit2 ==1,]
summary(c_1)
##        ID           Visit.Time    Average.Expense      Sex         Age    
##  Min.   : 1.000   Min.   :3.000   Min.   : 4.60   Min.   :0   Min.   : 9  
##  1st Qu.: 5.000   1st Qu.:3.500   1st Qu.: 7.15   1st Qu.:0   1st Qu.:12  
##  Median :10.000   Median :5.000   Median :14.50   Median :0   Median :16  
##  Mean   : 9.636   Mean   :4.909   Mean   :12.71   Mean   :0   Mean   :17  
##  3rd Qu.:14.500   3rd Qu.:6.000   3rd Qu.:16.00   3rd Qu.:0   3rd Qu.:20  
##  Max.   :18.000   Max.   :8.000   Max.   :23.80   Max.   :0   Max.   :30

k-means

str(customer_s)
##  num [1:60, 1:4] -1.202 -0.757 1.692 -0.757 1.692 ...
##  - attr(*, "dimnames")=List of 2
##   ..$ : NULL
##   ..$ : chr [1:4] "Visit.Time" "Average.Expense" "Sex" "Age"
##  - attr(*, "scaled:center")= Named num [1:4] 8.4 17.058 0.683 21.433
##   ..- attr(*, "names")= chr [1:4] "Visit.Time" "Average.Expense" "Sex" "Age"
##  - attr(*, "scaled:scale")= Named num [1:4] 4.492 8.399 0.469 9.285
##   ..- attr(*, "names")= chr [1:4] "Visit.Time" "Average.Expense" "Sex" "Age"
set.seed(22)
fit =kmeans(customer_s, centers=4)
?kmeans

barplot(t(fit$centers), beside =TRUE,xlab="cluster", ylab="value")

?barplot
fit$centers
##   Visit.Time Average.Expense        Sex        Age
## 1  1.3302016       1.0155226 -1.4566845  0.5591307
## 2 -0.7771737      -0.5178412 -1.4566845 -0.4774599
## 3  0.8571173       0.9887331  0.6750489  1.0505015
## 4 -0.6322632      -0.7299063  0.6750489 -0.6411604
customer[fit$cluster == 1,]
##    ID Visit.Time Average.Expense Sex Age
## 3   3         16            33.5   0  32
## 5   5         16            24.9   0  23
## 7   7         12            28.5   0  33
## 8   8         14            18.8   0  27
## 12 12         14            21.0   0  25
## 13 13         12            28.5   0  33
## 17 17         14            23.6   0  22
## 19 19         17            25.9   0  18

投影至二維空間,繪製分群結果

plot(customer[,-1],col=fit$cluster)

#install.packages("cluster")
library(cluster)
clusplot(customer_s, fit$cluster, color=TRUE, shade=TRUE)

#了解component 成分為何
pca =princomp(customer_s)
summary(pca)
## Importance of components:
##                           Comp.1    Comp.2     Comp.3     Comp.4
## Standard deviation     1.5339215 0.9953978 0.62428436 0.44706853
## Proportion of Variance 0.5981988 0.2519026 0.09908414 0.05081448
## Cumulative Proportion  0.5981988 0.8501014 0.94918552 1.00000000
pca$loadings
## 
## Loadings:
##                 Comp.1 Comp.2 Comp.3 Comp.4
## Visit.Time       0.576         0.601  0.554
## Average.Expense  0.602         0.146 -0.785
## Sex                    -0.989  0.133       
## Age              0.550 -0.148 -0.775  0.274
## 
##                Comp.1 Comp.2 Comp.3 Comp.4
## SS loadings      1.00   1.00   1.00   1.00
## Proportion Var   0.25   0.25   0.25   0.25
## Cumulative Var   0.25   0.50   0.75   1.00

Evaluating model

#silhouette
library('cluster')
par(mfrow= c(1,1))
set.seed(22)
library(cluster)
km =kmeans(customer_s, 4)
kms=silhouette(km$cluster,dist(customer_s))
summary(kms)
## Silhouette of 60 units in 4 clusters from silhouette.default(x = km$cluster, dist = dist(customer_s)) :
##  Cluster sizes and average silhouette widths:
##         8        11        16        25 
## 0.5464597 0.4080823 0.3794910 0.5164434 
## Individual silhouette widths:
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.1931  0.4030  0.4890  0.4641  0.5422  0.6333
plot(kms)

選擇k-means最佳k值

#within sum of squares
nk=2:10
SW = sapply(nk,function(k){
  set.seed(22); summary(silhouette(kmeans(customer_s,centers=k)$cluster,dist(customer_s)))$avg.width
})

plot(x=nk,y=SW,type='l')

set.seed(22)
WSS =sapply(nk, function(k){set.seed(22);kmeans(customer_s, centers=k)$tot.withinss})
WSS
## [1] 123.49224  93.08341  61.34890  48.76431  43.08965  40.25820  29.58014
## [8]  26.97709  24.99510
plot(x=nk, y=WSS, type="l", xlab="number of k", ylab="within sum of squares")

使用fpc套件的cluster.stats函數

#install.packages("fpc")
#install.packages("robustbase", repos="http://R-Forge.R-project.org")
library(fpc)
?cluster.stats
cluster.stats(dist(customer_s), kmeans(customer_s, centers=2)$cluster)
## $n
## [1] 60
## 
## $cluster.number
## [1] 2
## 
## $cluster.size
## [1] 24 36
## 
## $min.cluster.size
## [1] 24
## 
## $noisen
## [1] 0
## 
## $diameter
## [1] 3.885722 3.576798
## 
## $average.distance
## [1] 2.045880 1.798055
## 
## $median.distance
## [1] 2.243328 1.996810
## 
## $separation
## [1] 0.9276315 0.9276315
## 
## $average.toother
## [1] 3.351368 3.351368
## 
## $separation.matrix
##           [,1]      [,2]
## [1,] 0.0000000 0.9276315
## [2,] 0.9276315 0.0000000
## 
## $ave.between.matrix
##          [,1]     [,2]
## [1,] 0.000000 3.351368
## [2,] 3.351368 0.000000
## 
## $average.between
## [1] 3.351368
## 
## $average.within
## [1] 1.873552
## 
## $n.between
## [1] 864
## 
## $n.within
## [1] 906
## 
## $max.diameter
## [1] 3.885722
## 
## $min.separation
## [1] 0.9276315
## 
## $within.cluster.ss
## [1] 123.4922
## 
## $clus.avg.silwidths
##         1         2 
## 0.3827935 0.4454536 
## 
## $avg.silwidth
## [1] 0.4203896
## 
## $g2
## NULL
## 
## $g3
## NULL
## 
## $pearsongamma
## [1] 0.6564321
## 
## $dunn
## [1] 0.2387282
## 
## $dunn2
## [1] 1.638105
## 
## $entropy
## [1] 0.6730117
## 
## $wb.ratio
## [1] 0.559041
## 
## $ch
## [1] 52.84097
## 
## $cwidegap
## [1] 2.148705 2.131733
## 
## $widestgap
## [1] 2.148705
## 
## $sindex
## [1] 1.010004
## 
## $corrected.rand
## NULL
## 
## $vi
## NULL
WSS =sapply(nk, function(k){set.seed(22);cluster.stats(dist(customer_s), kmeans(customer_s, centers=k)$cluster)$within.cluster.ss})

plot(x=nk, y=WSS, type="l", xlab="number of k", ylab="within sum of squares")

SW =sapply(2:10,function(k){set.seed(22);cluster.stats(dist(customer_s),kmeans(customer_s, centers=k)$cluster)$avg.silwidth})

plot(x=nk,y=SW,type='l')

model comparison

single_c=hclust(dist(customer_s), method="single")
hc_single=cutree(single_c, k =4)

complete_c=hclust(dist(customer_s), method="complete")
hc_complete=cutree(complete_c, k =4)

set.seed(22)
km =kmeans(customer_s, 4)

cs=cluster.stats(dist(customer_s),km$cluster)
cs[c("within.cluster.ss","avg.silwidth")]
## $within.cluster.ss
## [1] 61.3489
## 
## $avg.silwidth
## [1] 0.4640587
q =sapply(
  list(kmeans=km$cluster, 
       hc_single=hc_single, 
       hc_complete=hc_complete), function(c)cluster.stats(dist(customer_s),c)[c("within.cluster.ss","avg.silwidth")])
q
##                   kmeans    hc_single hc_complete
## within.cluster.ss 61.3489   136.0092  65.94076   
## avg.silwidth      0.4640587 0.2481926 0.4255961

density-based method-DBSCAN

#install.packages("mlbench")
# mlbench package provides many methods to generate simulated data with different shapes and sizes.
#In this example, we generate a Cassini problem graph
library(mlbench)
#install.packages("fpc")
library(fpc)
set.seed(2)
p = mlbench.cassini(500)
plot(p$x)

?mlbench.cassini

ds = dbscan(data = dist(p$x),eps= 0.2, MinPts = 2, method="dist")
ds
## dbscan Pts=500 MinPts=2 eps=0.2
##         1   2   3
## seed  200 200 100
## total 200 200 100
plot(ds, p$x)

#filter群集的raw data
cluster_1_raw = p$x[ds$cluster == 1,]
cluster_1_raw
##                [,1]       [,2]
##   [1,] -0.878020041 -0.9762015
##   [2,]  0.204310908 -1.8311169
##   [3,] -1.033283148 -0.7664819
##   [4,] -0.089110770 -1.2200260
##   [5,]  0.146767003 -1.7177684
##   [6,]  0.725874430 -1.8106878
##   [7,]  0.451102355 -1.4799207
##   [8,] -0.425548959 -1.3179628
##   [9,] -0.977311794 -1.5286999
##  [10,]  0.864295737 -0.7098223
##  [11,]  0.039793615 -1.0964859
##  [12,]  0.959690709 -1.6442071
##  [13,]  0.465944793 -1.8592484
##  [14,]  1.237244233 -0.8282179
##  [15,] -0.625804973 -0.8010245
##  [16,]  0.317509698 -0.9637028
##  [17,]  0.752385028 -0.6808253
##  [18,]  0.348629646 -1.6835199
##  [19,]  1.000493954 -1.4000192
##  [20,] -0.311664918 -1.3615982
##  [21,] -0.624969555 -1.6033902
##  [22,] -0.882224692 -0.8912601
##  [23,] -0.590534200 -0.7114485
##  [24,] -0.271268856 -1.1837040
##  [25,] -0.416158140 -1.0248017
##  [26,] -0.835962518 -0.7295600
##  [27,]  0.649746827 -1.5558908
##  [28,] -0.334361972 -1.2033798
##  [29,] -0.100842130 -1.7851571
##  [30,] -1.071873313 -1.3959494
##  [31,] -0.833292664 -1.4157775
##  [32,] -0.593559806 -1.1465330
##  [33,]  1.111665006 -1.3745968
##  [34,] -1.324461332 -0.9219018
##  [35,] -0.352864039 -1.1809969
##  [36,]  0.906469168 -0.7980869
##  [37,]  1.261369418 -1.1895967
##  [38,] -0.721705502 -0.8884615
##  [39,]  0.175082884 -1.6118350
##  [40,]  0.539582980 -1.5624870
##  [41,] -0.818874233 -0.6291580
##  [42,] -0.513059044 -1.0350265
##  [43,] -0.876957755 -1.4607719
##  [44,] -0.895933315 -1.6433053
##  [45,]  0.360744853 -1.6198395
##  [46,] -0.156942578 -0.9316785
##  [47,]  0.468558055 -1.6013488
##  [48,] -0.059688396 -1.6337438
##  [49,] -0.885565535 -1.5227799
##  [50,]  1.125117614 -1.4695238
##  [51,]  0.779837759 -1.6453457
##  [52,] -1.267874267 -0.8865873
##  [53,] -0.565935794 -1.0583129
##  [54,] -1.355198401 -1.2378089
##  [55,] -0.677769509 -0.6677644
##  [56,] -0.312223762 -0.8291433
##  [57,] -0.419629384 -1.8780091
##  [58,]  0.419366754 -1.2295931
##  [59,]  1.240594060 -1.1852068
##  [60,]  1.263970040 -0.8515952
##  [61,] -1.063583088 -0.7606211
##  [62,] -1.150307795 -0.9022401
##  [63,] -1.100364472 -1.4493018
##  [64,]  0.008498944 -1.7775133
##  [65,] -0.836672143 -1.8149838
##  [66,]  0.999430884 -1.0677434
##  [67,] -0.343672879 -0.7894168
##  [68,] -0.761394282 -1.7695864
##  [69,] -0.851809277 -1.7150909
##  [70,]  1.336660721 -1.2611998
##  [71,]  0.909374488 -1.0265832
##  [72,] -0.382126787 -1.7644891
##  [73,] -0.969304533 -0.6027001
##  [74,] -0.275587040 -1.3226617
##  [75,]  0.505679994 -0.7335432
##  [76,]  1.150520920 -1.3386278
##  [77,]  0.595382492 -0.7723944
##  [78,]  0.489829736 -1.5564774
##  [79,]  1.088978634 -1.2538732
##  [80,]  1.043025382 -1.6861489
##  [81,]  0.118268481 -1.7543981
##  [82,] -0.789308146 -1.3415347
##  [83,]  1.094284148 -1.6172529
##  [84,]  0.153857954 -0.8916541
##  [85,] -0.464299827 -1.2733054
##  [86,] -0.980257695 -1.5290886
##  [87,] -1.279712214 -1.0883512
##  [88,]  0.361285474 -1.5286257
##  [89,]  0.858581717 -1.0253372
##  [90,] -1.332177181 -0.9967377
##  [91,]  1.079295343 -1.2404652
##  [92,]  1.072808084 -0.6373208
##  [93,]  0.972371113 -1.6303852
##  [94,]  0.791835346 -1.3100768
##  [95,]  0.443013721 -1.0278206
##  [96,]  0.094980484 -1.1810036
##  [97,]  0.758548042 -1.5286569
##  [98,] -0.678020587 -1.5936866
##  [99,]  0.681099042 -1.3005344
## [100,]  0.581809991 -1.4361657
## [101,] -0.949328033 -1.2934566
## [102,]  0.934646309 -1.3200800
## [103,] -0.838076695 -1.0010805
## [104,] -1.143456853 -1.3945971
## [105,]  0.294368850 -1.3797689
## [106,]  0.690397641 -1.8165608
## [107,]  0.431482058 -1.8374265
## [108,] -0.294068155 -1.7588508
## [109,]  0.391565978 -1.6878085
## [110,] -0.868340232 -0.7029784
## [111,] -1.092895774 -1.1875185
## [112,]  0.944616018 -0.7959523
## [113,]  1.008227022 -0.7349331
## [114,]  0.063169014 -1.5183811
## [115,] -0.883764856 -1.3822555
## [116,] -0.676683002 -1.1494103
## [117,] -0.905608044 -0.8047834
## [118,]  0.819799909 -1.1321852
## [119,] -1.247826523 -0.9938843
## [120,]  0.425023325 -1.7127364
## [121,]  0.028040646 -1.0690623
## [122,]  0.548264973 -1.0473068
## [123,] -1.109889201 -1.3317341
## [124,] -0.085253708 -1.6740866
## [125,] -1.144451660 -1.1216628
## [126,] -0.292571453 -1.2366095
## [127,]  1.007551267 -0.8961479
## [128,]  0.719466695 -0.6217663
## [129,] -0.476545375 -1.8497985
## [130,] -0.847643081 -0.7277424
## [131,] -0.758695996 -0.8727665
## [132,]  0.326395991 -1.1894974
## [133,]  0.241189831 -1.5574133
## [134,] -0.928170585 -0.8081438
## [135,]  0.416418319 -0.9710204
## [136,] -0.381826950 -1.0647319
## [137,]  0.628505803 -1.4240533
## [138,]  0.254806683 -0.8233607
## [139,] -0.557311573 -0.9897810
## [140,] -1.182075255 -1.1919261
## [141,]  1.080463546 -1.5803151
## [142,] -1.134948642 -0.7702150
## [143,] -1.038683810 -0.7100887
## [144,]  1.262473473 -1.3013711
## [145,]  0.431152384 -1.7899316
## [146,]  0.732401457 -1.2241335
## [147,] -0.797356507 -0.9931729
## [148,]  1.317411229 -1.2101679
## [149,]  0.078235150 -1.7534496
## [150,]  0.503113262 -0.9063731
## [151,] -0.099743764 -1.8801311
## [152,]  0.875963378 -0.7685420
## [153,]  1.359823570 -1.0845456
## [154,] -1.322579861 -1.3218902
## [155,] -0.646839537 -0.8774735
## [156,] -0.033284404 -0.9049642
## [157,] -1.274560036 -1.3949715
## [158,] -0.986175143 -1.3928907
## [159,]  0.516447124 -0.8282905
## [160,] -0.617672489 -0.7544855
## [161,] -0.090804023 -0.8836617
## [162,]  1.057596764 -1.3103589
## [163,]  0.581555766 -1.3198854
## [164,]  0.785857302 -0.6411018
## [165,] -0.596294891 -1.1266062
## [166,]  0.308849454 -1.7633732
## [167,] -0.508235955 -0.6506234
## [168,] -0.491856613 -1.2025502
## [169,]  0.534989613 -1.2040689
## [170,] -0.438313225 -1.7248040
## [171,] -0.196149509 -0.9212789
## [172,] -0.349246728 -1.3181133
## [173,]  0.837322887 -1.0071313
## [174,] -0.332437398 -1.4488438
## [175,] -0.312666601 -1.1529168
## [176,]  0.937778199 -1.2671769
## [177,] -0.109157470 -0.9522752
## [178,] -1.315804660 -0.9816730
## [179,] -0.672054106 -0.6749106
## [180,]  0.495424973 -0.8340842
## [181,]  0.560205803 -1.6648770
## [182,] -1.086024289 -0.9038942
## [183,]  0.276024465 -0.8513125
## [184,]  1.242073722 -1.5273660
## [185,] -0.039474960 -1.2577110
## [186,]  0.970639213 -1.2981420
## [187,] -0.433099332 -1.1673614
## [188,]  0.674286036 -1.3360663
## [189,]  1.113452181 -1.6006035
## [190,] -0.654962503 -1.2169928
## [191,]  1.261636182 -1.1705721
## [192,]  1.057543215 -0.8091239
## [193,] -0.103213762 -1.7898732
## [194,]  0.617210972 -1.1585440
## [195,]  0.098981815 -0.8887357
## [196,]  1.202563385 -0.9429087
## [197,] -0.385531083 -1.3979427
## [198,] -1.343070698 -0.9174087
## [199,] -0.344961580 -1.7403067
## [200,]  0.746652400 -1.0796416