library(C50)
library('caret')
## Loading required package: lattice
## Loading required package: ggplot2
library('e1071')
data(churn)
names(churnTrain) %in% c("state", "area_code", "account_length")
## [1] TRUE TRUE TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [12] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
!names(churnTrain) %in% c("state", "area_code", "account_length")
## [1] FALSE FALSE FALSE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [12] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
#選擇建模變數
variable.list = !names(churnTrain) %in% c('state','area_code','account_length')
churnTrain=churnTrain[,variable.list]
churnTest=churnTest[,variable.list]
set.seed(2)
#把資料分成training data 和 validation data
ind<-sample(1:2, size=nrow(churnTrain), replace=T, prob=c(0.7, 0.3))
trainset=churnTrain[ind==1,]
testset=churnTrain[ind==2,]
#install.packages("caret")
library(caret)
control=trainControl(method="repeatedcv", number=10, repeats=3,classProbs = TRUE,summaryFunction = multiClassSummary)
tune_funs = expand.grid(cp=seq(0.01,0.1,0.01))
model =train(churn~., data=churnTrain, method="rpart", trControl=control,tuneGrid=tune_funs)
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info =
## trainInfo, : There were missing values in resampled performance measures.
predictions = predict(model, churnTest)
confusionMatrix(table(predictions,churnTest$churn))
## Confusion Matrix and Statistics
##
##
## predictions yes no
## yes 145 15
## no 79 1428
##
## Accuracy : 0.9436
## 95% CI : (0.9314, 0.9542)
## No Information Rate : 0.8656
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.7243
## Mcnemar's Test P-Value : 8.142e-11
##
## Sensitivity : 0.64732
## Specificity : 0.98960
## Pos Pred Value : 0.90625
## Neg Pred Value : 0.94758
## Prevalence : 0.13437
## Detection Rate : 0.08698
## Detection Prevalence : 0.09598
## Balanced Accuracy : 0.81846
##
## 'Positive' Class : yes
##
library('caret')
importance = varImp(model, scale=FALSE)
importance
## rpart variable importance
##
## Overall
## total_day_minutes 219.693
## total_day_charge 206.025
## number_customer_service_calls 168.529
## international_planyes 163.107
## total_intl_minutes 135.324
## total_eve_minutes 117.225
## total_intl_charge 116.860
## total_eve_charge 111.593
## number_vmail_messages 52.586
## voice_mail_planyes 52.586
## total_intl_calls 52.444
## total_night_minutes 24.705
## total_night_charge 18.159
## total_night_calls 11.200
## total_day_calls 2.214
## total_eve_calls 0.000
plot(importance)
#install.packages("ROCR")
library(ROCR)
## Loading required package: gplots
##
## Attaching package: 'gplots'
## The following object is masked from 'package:stats':
##
## lowess
predictions <-predict(model, churnTest, type="prob")
head(predictions)
## yes no
## 1 0.02701486 0.9729851
## 2 0.10240964 0.8975904
## 3 0.11320755 0.8867925
## 4 0.02701486 0.9729851
## 5 0.02701486 0.9729851
## 6 0.10240964 0.8975904
pred.to.roc<-predictions[, 1]
head(pred.to.roc)
## [1] 0.02701486 0.10240964 0.11320755 0.02701486 0.02701486 0.10240964
pred.rocr<-prediction(pred.to.roc, churnTest$churn)
pred.rocr
## An object of class "prediction"
## Slot "predictions":
## [[1]]
## [1] 0.02701486 0.10240964 0.11320755 0.02701486 0.02701486 0.10240964
## [7] 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486
## [13] 0.10240964 0.02701486 0.02701486 0.02701486 0.02701486 0.10240964
## [19] 0.00000000 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486
## [25] 0.10240964 0.10240964 0.02701486 0.04046243 0.04046243 0.02701486
## [31] 0.04046243 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486
## [37] 0.02701486 0.10240964 0.02701486 1.00000000 0.85000000 0.02701486
## [43] 0.02701486 0.12500000 0.02701486 0.02701486 0.02701486 0.10240964
## [49] 0.02701486 0.02701486 0.04046243 0.02701486 0.02701486 0.02701486
## [55] 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486
## [61] 0.02701486 0.02701486 0.83333333 0.12500000 0.02701486 0.02701486
## [67] 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486 0.10416667
## [73] 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486
## [79] 0.02701486 0.04046243 0.87254902 0.02701486 0.16000000 0.02701486
## [85] 0.02701486 0.02701486 0.02701486 0.02701486 0.10416667 0.02701486
## [91] 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486 1.00000000
## [97] 0.02701486 0.02701486 0.02701486 0.02701486 0.11320755 0.04046243
## [103] 0.10240964 0.02701486 0.02701486 0.10416667 0.02701486 0.02701486
## [109] 0.10416667 0.95049505 0.02701486 0.02701486 0.02701486 0.10240964
## [115] 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486 1.00000000
## [121] 0.02701486 0.10240964 0.10240964 0.02701486 0.95049505 0.10240964
## [127] 0.02701486 0.02701486 1.00000000 0.10240964 0.02701486 0.02701486
## [133] 0.02701486 0.02701486 0.02701486 0.10416667 0.10240964 0.02701486
## [139] 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486
## [145] 0.04046243 0.10240964 0.02701486 0.02701486 0.02701486 0.87254902
## [151] 0.02701486 0.02701486 0.02701486 0.95049505 0.87254902 0.10240964
## [157] 0.02701486 0.02701486 0.02701486 0.87500000 0.02701486 1.00000000
## [163] 0.02701486 0.87500000 0.10416667 1.00000000 0.02701486 0.02701486
## [169] 0.10240964 0.02701486 0.02701486 0.02701486 0.02701486 0.11320755
## [175] 0.02701486 0.02701486 0.73684211 0.02701486 0.02701486 0.02701486
## [181] 0.02701486 0.02701486 0.10240964 0.02701486 0.02701486 0.11320755
## [187] 0.10240964 0.10240964 0.02701486 0.02701486 0.02701486 0.02701486
## [193] 0.10240964 0.95049505 0.02701486 0.02701486 0.10240964 0.10240964
## [199] 0.10240964 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486
## [205] 0.02701486 0.04046243 0.02701486 0.95049505 0.02701486 0.02701486
## [211] 0.04046243 0.02701486 0.02701486 0.02701486 0.10240964 0.83333333
## [217] 0.73684211 0.04046243 0.02701486 0.10240964 0.02701486 0.87254902
## [223] 0.95049505 0.02701486 0.02701486 0.02701486 0.02701486 0.10416667
## [229] 0.02701486 0.02701486 0.02701486 0.02701486 0.11320755 0.10240964
## [235] 0.02701486 0.02701486 0.10240964 0.02701486 0.11320755 0.04046243
## [241] 0.02701486 0.10240964 0.04046243 0.04046243 0.10240964 0.02701486
## [247] 0.02701486 0.02701486 0.10240964 0.02701486 0.02701486 0.04046243
## [253] 0.87254902 0.10240964 0.85000000 0.02701486 0.02701486 0.02701486
## [259] 0.04046243 0.10240964 0.11320755 0.02701486 0.10416667 0.11320755
## [265] 0.04046243 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486
## [271] 0.02701486 0.02701486 0.02701486 0.04046243 0.00000000 0.83333333
## [277] 0.83333333 0.02701486 0.10240964 0.10240964 0.83333333 0.02701486
## [283] 0.02701486 1.00000000 0.83333333 0.02701486 0.02701486 0.10240964
## [289] 0.02701486 0.02701486 0.04046243 0.02701486 0.10240964 0.02701486
## [295] 0.04046243 0.12500000 0.02701486 1.00000000 0.02701486 0.02701486
## [301] 0.02701486 0.02701486 0.04046243 0.02701486 0.02701486 0.02701486
## [307] 0.02701486 0.02701486 0.02701486 0.10240964 0.10416667 0.02701486
## [313] 0.02701486 0.04046243 0.02701486 0.10240964 0.02701486 0.02701486
## [319] 0.10240964 0.02701486 0.02701486 0.02701486 0.11320755 0.95049505
## [325] 0.04046243 0.10416667 0.87254902 0.02701486 0.02701486 0.10240964
## [331] 0.02701486 0.02701486 0.73684211 0.02701486 0.10416667 0.02701486
## [337] 0.02701486 0.02701486 0.02701486 0.02701486 0.10240964 0.02701486
## [343] 0.83333333 0.02701486 0.02701486 0.02701486 0.02701486 0.10240964
## [349] 0.83333333 0.02701486 0.02701486 0.02701486 0.02701486 0.95049505
## [355] 0.04046243 0.85000000 0.02701486 0.02701486 0.02701486 0.95049505
## [361] 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486
## [367] 0.02701486 0.12500000 0.02701486 0.02701486 0.02701486 0.02701486
## [373] 0.02701486 0.02701486 0.10240964 0.02701486 0.02701486 0.02701486
## [379] 0.02701486 0.10240964 0.02701486 0.04046243 0.02701486 0.02701486
## [385] 0.87254902 0.02701486 0.10240964 0.02701486 0.02701486 0.02701486
## [391] 0.02701486 0.02701486 0.02701486 0.02701486 0.10240964 0.02701486
## [397] 0.04046243 0.04046243 0.02701486 0.02701486 0.12500000 0.10416667
## [403] 0.04046243 0.16000000 0.10416667 0.87254902 0.02701486 0.10240964
## [409] 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486
## [415] 0.02701486 0.02701486 0.02701486 0.02701486 0.10416667 0.02701486
## [421] 0.02701486 0.85000000 0.02701486 0.02701486 0.02701486 0.11320755
## [427] 0.02701486 0.02701486 0.04046243 0.02701486 0.02701486 0.02701486
## [433] 0.02701486 0.02701486 0.02701486 0.02701486 0.04046243 0.02701486
## [439] 0.02701486 0.02701486 0.10240964 0.10416667 0.02701486 0.02701486
## [445] 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486
## [451] 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486 0.10240964
## [457] 0.02701486 0.10240964 0.02701486 0.02701486 0.02701486 0.02701486
## [463] 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486 0.10416667
## [469] 0.02701486 0.02701486 0.11320755 0.02701486 0.02701486 0.02701486
## [475] 0.02701486 0.02701486 0.10416667 0.02701486 0.02701486 0.04046243
## [481] 0.02701486 0.02701486 0.02701486 0.10240964 0.02701486 0.10240964
## [487] 0.10240964 0.02701486 0.02701486 0.95049505 0.02701486 0.02701486
## [493] 0.02701486 0.02701486 0.02701486 0.02701486 0.04046243 0.10240964
## [499] 0.10240964 0.02701486 0.11320755 0.02701486 0.02701486 0.10416667
## [505] 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486 0.10416667
## [511] 0.02701486 0.02701486 0.02701486 0.02701486 0.87254902 0.02701486
## [517] 0.02701486 0.02701486 0.87254902 0.85000000 0.02701486 0.87254902
## [523] 0.02701486 0.02701486 0.10240964 0.02701486 0.02701486 0.02701486
## [529] 0.10240964 0.02701486 0.02701486 0.12500000 0.02701486 0.02701486
## [535] 0.02701486 0.85000000 0.02701486 0.02701486 0.02701486 0.04046243
## [541] 0.87500000 0.02701486 0.04046243 0.10240964 0.02701486 0.02701486
## [547] 0.04046243 0.02701486 0.02701486 0.02701486 0.02701486 0.12500000
## [553] 0.02701486 0.02701486 0.02701486 0.11320755 0.02701486 0.02701486
## [559] 0.10240964 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486
## [565] 0.02701486 0.10416667 0.11320755 0.02701486 0.95049505 0.02701486
## [571] 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486
## [577] 1.00000000 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486
## [583] 0.10240964 1.00000000 0.87254902 0.10416667 0.02701486 0.10416667
## [589] 0.02701486 0.02701486 0.04046243 0.02701486 0.02701486 0.02701486
## [595] 0.02701486 0.02701486 0.87254902 0.02701486 0.02701486 0.02701486
## [601] 0.02701486 0.04046243 0.02701486 0.02701486 0.10240964 0.02701486
## [607] 0.02701486 0.02701486 0.02701486 0.10240964 0.04046243 0.02701486
## [613] 0.02701486 0.10240964 0.02701486 0.02701486 0.02701486 0.02701486
## [619] 0.02701486 0.02701486 0.04046243 0.16000000 0.02701486 0.04046243
## [625] 0.02701486 0.02701486 0.02701486 0.02701486 0.10416667 0.10240964
## [631] 0.02701486 0.02701486 0.10240964 0.95049505 0.02701486 0.16000000
## [637] 0.02701486 0.04046243 0.02701486 0.10240964 0.87254902 0.02701486
## [643] 0.02701486 0.02701486 0.02701486 1.00000000 0.02701486 0.02701486
## [649] 0.04046243 0.10240964 0.10416667 0.02701486 0.02701486 0.87254902
## [655] 0.95049505 0.02701486 0.16000000 0.02701486 0.02701486 0.02701486
## [661] 0.95049505 0.02701486 0.02701486 0.10240964 0.02701486 0.02701486
## [667] 0.02701486 0.02701486 0.02701486 1.00000000 0.02701486 0.02701486
## [673] 0.02701486 0.10240964 0.02701486 0.02701486 0.10240964 0.02701486
## [679] 0.02701486 0.02701486 0.10416667 0.02701486 0.02701486 0.04046243
## [685] 0.02701486 0.87254902 0.02701486 0.02701486 1.00000000 0.02701486
## [691] 0.87254902 0.02701486 0.02701486 0.10240964 0.02701486 0.02701486
## [697] 0.02701486 0.10240964 0.02701486 0.02701486 0.02701486 0.02701486
## [703] 0.10240964 0.02701486 0.10240964 0.10240964 0.04046243 0.02701486
## [709] 0.10240964 0.02701486 0.02701486 0.10416667 0.02701486 0.02701486
## [715] 0.10240964 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486
## [721] 0.87254902 0.02701486 0.02701486 0.10240964 0.02701486 0.02701486
## [727] 0.02701486 0.02701486 0.02701486 1.00000000 0.02701486 0.04046243
## [733] 0.16000000 0.04046243 0.10416667 0.10240964 0.02701486 0.02701486
## [739] 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486
## [745] 0.02701486 0.02701486 0.95049505 0.02701486 0.10240964 0.02701486
## [751] 0.02701486 0.02701486 0.10416667 0.02701486 1.00000000 0.02701486
## [757] 0.02701486 0.02701486 0.10240964 0.02701486 0.10240964 0.02701486
## [763] 0.02701486 0.10416667 0.02701486 0.02701486 0.10416667 0.04046243
## [769] 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486 0.10240964
## [775] 0.02701486 0.04046243 0.02701486 0.87254902 0.16000000 0.10240964
## [781] 0.04046243 0.00000000 0.02701486 0.02701486 0.02701486 0.02701486
## [787] 0.10240964 0.16000000 0.83333333 0.04046243 0.02701486 0.02701486
## [793] 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486 0.87254902
## [799] 0.85000000 0.87254902 0.02701486 0.02701486 0.95049505 0.02701486
## [805] 0.02701486 0.04046243 0.02701486 0.10240964 0.10240964 0.02701486
## [811] 0.10240964 0.02701486 0.02701486 0.02701486 0.02701486 0.10240964
## [817] 0.02701486 0.02701486 0.02701486 0.02701486 0.10416667 0.10240964
## [823] 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486
## [829] 0.02701486 1.00000000 0.02701486 0.10240964 0.10240964 0.04046243
## [835] 0.02701486 0.02701486 0.02701486 0.00000000 1.00000000 0.85000000
## [841] 0.02701486 0.10240964 0.02701486 0.02701486 0.02701486 0.02701486
## [847] 0.02701486 0.02701486 0.16000000 0.02701486 0.10240964 0.02701486
## [853] 0.12500000 0.02701486 0.04046243 0.10240964 0.02701486 0.95049505
## [859] 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486 0.87254902
## [865] 0.02701486 0.02701486 0.10240964 0.10240964 0.02701486 0.10416667
## [871] 0.02701486 0.02701486 0.11320755 0.02701486 0.10240964 0.02701486
## [877] 0.02701486 0.02701486 0.10240964 0.02701486 0.04046243 0.02701486
## [883] 0.10240964 0.02701486 0.02701486 0.16000000 0.02701486 0.02701486
## [889] 0.02701486 0.02701486 0.02701486 0.02701486 1.00000000 0.02701486
## [895] 0.02701486 0.10240964 1.00000000 1.00000000 0.02701486 0.11320755
## [901] 0.02701486 0.02701486 0.04046243 0.87254902 0.02701486 0.10240964
## [907] 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486 0.10240964
## [913] 0.02701486 0.04046243 0.02701486 0.02701486 0.10240964 0.02701486
## [919] 0.10240964 0.02701486 0.95049505 0.02701486 0.02701486 0.02701486
## [925] 0.10416667 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486
## [931] 0.10240964 0.95049505 0.12500000 0.02701486 0.02701486 1.00000000
## [937] 0.11320755 0.02701486 0.00000000 0.02701486 0.02701486 0.04046243
## [943] 0.02701486 0.02701486 0.10240964 0.02701486 0.73684211 0.02701486
## [949] 0.02701486 0.02701486 1.00000000 0.10240964 0.87254902 0.02701486
## [955] 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486 1.00000000
## [961] 0.02701486 0.02701486 0.02701486 0.87254902 0.95049505 0.02701486
## [967] 0.02701486 0.02701486 0.02701486 0.02701486 0.10240964 0.02701486
## [973] 0.02701486 0.02701486 0.95049505 0.02701486 1.00000000 0.02701486
## [979] 0.04046243 0.16000000 0.02701486 0.02701486 0.10416667 0.10416667
## [985] 0.02701486 0.02701486 0.04046243 0.02701486 0.02701486 0.02701486
## [991] 0.02701486 0.04046243 0.02701486 0.02701486 0.02701486 1.00000000
## [997] 0.02701486 0.83333333 0.10240964 0.95049505 0.02701486 0.02701486
## [1003] 0.02701486 0.10240964 1.00000000 0.10240964 0.02701486 0.02701486
## [1009] 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486 0.10416667
## [1015] 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486 0.10240964
## [1021] 0.02701486 0.02701486 0.10416667 0.02701486 0.02701486 0.02701486
## [1027] 0.02701486 0.02701486 0.02701486 0.10240964 0.02701486 0.04046243
## [1033] 0.10240964 0.87254902 0.10416667 0.02701486 0.02701486 0.02701486
## [1039] 1.00000000 0.02701486 0.10240964 0.10240964 0.02701486 0.02701486
## [1045] 0.10416667 0.02701486 0.10416667 0.02701486 0.10416667 0.02701486
## [1051] 0.87254902 0.02701486 0.10240964 1.00000000 0.02701486 0.02701486
## [1057] 0.10240964 0.02701486 0.02701486 0.02701486 0.10240964 0.02701486
## [1063] 0.02701486 0.02701486 0.02701486 0.02701486 0.10240964 0.02701486
## [1069] 0.02701486 0.04046243 0.10240964 0.87254902 0.73684211 0.10416667
## [1075] 0.02701486 0.02701486 0.02701486 0.02701486 0.85000000 0.02701486
## [1081] 0.10240964 0.02701486 0.02701486 0.02701486 0.10416667 0.02701486
## [1087] 0.02701486 0.02701486 0.10416667 0.02701486 0.02701486 0.02701486
## [1093] 0.10416667 0.10240964 0.02701486 0.11320755 0.02701486 0.02701486
## [1099] 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486
## [1105] 1.00000000 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486
## [1111] 0.02701486 0.10240964 0.02701486 0.02701486 0.10416667 0.02701486
## [1117] 0.02701486 0.10240964 0.02701486 0.02701486 0.02701486 0.10240964
## [1123] 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486
## [1129] 0.02701486 0.02701486 0.02701486 0.02701486 0.10240964 0.16000000
## [1135] 0.02701486 0.10240964 0.02701486 0.04046243 1.00000000 0.04046243
## [1141] 0.02701486 0.02701486 0.02701486 0.95049505 0.02701486 0.02701486
## [1147] 0.02701486 0.02701486 0.02701486 0.02701486 0.10240964 0.04046243
## [1153] 0.02701486 0.95049505 0.10240964 1.00000000 0.10416667 0.02701486
## [1159] 0.04046243 0.95049505 0.02701486 0.10240964 0.10240964 0.02701486
## [1165] 0.02701486 0.04046243 0.02701486 0.02701486 0.04046243 0.87254902
## [1171] 0.04046243 0.02701486 0.02701486 0.02701486 0.85000000 0.02701486
## [1177] 0.11320755 0.10240964 0.02701486 0.02701486 0.95049505 0.02701486
## [1183] 0.16000000 0.02701486 0.02701486 0.10240964 0.04046243 0.02701486
## [1189] 0.02701486 0.02701486 0.10416667 0.04046243 0.04046243 0.02701486
## [1195] 0.10240964 0.10240964 0.02701486 0.02701486 0.02701486 0.02701486
## [1201] 0.02701486 0.02701486 0.02701486 0.02701486 0.87254902 0.10416667
## [1207] 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486
## [1213] 0.02701486 0.02701486 0.12500000 0.02701486 0.02701486 0.10416667
## [1219] 0.02701486 0.02701486 0.10240964 0.02701486 0.02701486 0.02701486
## [1225] 0.02701486 0.02701486 0.02701486 0.10240964 0.02701486 0.87254902
## [1231] 0.02701486 0.02701486 0.10240964 0.02701486 0.11320755 0.87254902
## [1237] 0.02701486 0.02701486 0.10240964 0.16000000 0.02701486 0.02701486
## [1243] 0.02701486 0.10240964 1.00000000 0.02701486 0.02701486 0.02701486
## [1249] 0.02701486 0.02701486 0.10240964 0.02701486 0.10240964 0.04046243
## [1255] 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486 0.87254902
## [1261] 0.02701486 0.11320755 0.02701486 0.10240964 1.00000000 0.02701486
## [1267] 0.95049505 0.02701486 0.02701486 0.10240964 0.02701486 0.02701486
## [1273] 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486
## [1279] 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486
## [1285] 0.02701486 0.10240964 0.02701486 0.02701486 0.02701486 0.10240964
## [1291] 0.02701486 0.10240964 0.02701486 0.02701486 0.02701486 0.87254902
## [1297] 0.02701486 0.02701486 0.10416667 0.02701486 0.02701486 0.04046243
## [1303] 0.02701486 1.00000000 0.02701486 0.95049505 0.02701486 0.10240964
## [1309] 0.02701486 0.02701486 1.00000000 0.10416667 0.02701486 0.02701486
## [1315] 0.02701486 0.02701486 0.02701486 0.95049505 0.02701486 0.02701486
## [1321] 0.16000000 0.02701486 0.02701486 0.02701486 0.02701486 0.83333333
## [1327] 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486
## [1333] 0.10240964 0.02701486 0.04046243 0.02701486 0.02701486 0.10416667
## [1339] 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486 1.00000000
## [1345] 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486
## [1351] 0.02701486 0.02701486 0.02701486 0.02701486 0.95049505 0.02701486
## [1357] 0.04046243 0.02701486 0.10240964 0.10416667 0.02701486 0.02701486
## [1363] 0.02701486 0.02701486 0.02701486 0.10416667 0.10416667 0.10416667
## [1369] 0.10240964 0.10240964 0.10240964 0.10240964 0.02701486 0.02701486
## [1375] 0.02701486 0.10416667 0.02701486 0.95049505 0.04046243 0.02701486
## [1381] 0.04046243 0.02701486 0.02701486 0.02701486 0.02701486 0.10240964
## [1387] 0.02701486 0.02701486 1.00000000 0.02701486 0.04046243 0.02701486
## [1393] 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486
## [1399] 0.11320755 0.02701486 0.02701486 0.10240964 0.10240964 0.10240964
## [1405] 0.87254902 0.02701486 0.02701486 0.87254902 0.02701486 0.04046243
## [1411] 0.02701486 0.10240964 0.02701486 0.02701486 0.02701486 0.02701486
## [1417] 0.02701486 0.02701486 0.10416667 0.02701486 0.10240964 0.02701486
## [1423] 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486 0.10240964
## [1429] 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486
## [1435] 0.02701486 0.10416667 0.02701486 0.02701486 0.02701486 0.87254902
## [1441] 0.87254902 0.10240964 0.02701486 0.10240964 0.02701486 0.87254902
## [1447] 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486
## [1453] 0.02701486 0.00000000 0.02701486 0.87254902 0.10240964 0.10416667
## [1459] 0.02701486 0.02701486 0.02701486 0.04046243 0.02701486 0.02701486
## [1465] 0.02701486 0.10240964 0.02701486 0.02701486 0.02701486 0.02701486
## [1471] 0.95049505 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486
## [1477] 0.95049505 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486
## [1483] 0.02701486 0.02701486 0.00000000 0.02701486 0.02701486 0.02701486
## [1489] 0.02701486 0.02701486 0.02701486 0.02701486 1.00000000 0.02701486
## [1495] 0.02701486 0.02701486 0.02701486 0.02701486 0.10416667 0.95049505
## [1501] 0.02701486 0.02701486 0.02701486 0.02701486 0.95049505 0.10240964
## [1507] 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486 0.10240964
## [1513] 0.04046243 0.02701486 0.02701486 0.10240964 0.02701486 0.02701486
## [1519] 0.02701486 0.10240964 0.02701486 0.10240964 0.02701486 0.16000000
## [1525] 0.02701486 0.02701486 0.02701486 0.10240964 0.95049505 0.95049505
## [1531] 0.10240964 0.02701486 0.00000000 0.12500000 0.02701486 0.02701486
## [1537] 0.02701486 0.04046243 0.02701486 0.02701486 0.02701486 0.02701486
## [1543] 0.02701486 0.02701486 0.02701486 0.02701486 0.04046243 0.10240964
## [1549] 0.02701486 0.02701486 0.02701486 0.02701486 0.16000000 0.10240964
## [1555] 0.02701486 0.02701486 0.02701486 0.87254902 0.02701486 0.02701486
## [1561] 0.02701486 0.10240964 1.00000000 0.02701486 0.83333333 0.95049505
## [1567] 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486
## [1573] 0.02701486 0.02701486 0.02701486 0.04046243 0.02701486 0.02701486
## [1579] 0.10240964 0.02701486 0.02701486 0.95049505 0.02701486 0.10416667
## [1585] 1.00000000 0.95049505 0.12500000 0.02701486 0.02701486 0.02701486
## [1591] 0.02701486 0.95049505 0.02701486 0.02701486 0.02701486 0.10240964
## [1597] 0.02701486 0.10416667 0.02701486 0.02701486 0.02701486 0.10416667
## [1603] 0.02701486 0.02701486 1.00000000 0.02701486 0.87254902 0.10240964
## [1609] 0.02701486 0.02701486 0.95049505 0.02701486 0.02701486 0.02701486
## [1615] 0.73684211 1.00000000 0.02701486 0.02701486 0.02701486 0.02701486
## [1621] 0.87254902 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486
## [1627] 0.95049505 0.02701486 0.02701486 0.02701486 0.04046243 0.02701486
## [1633] 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486 0.02701486
## [1639] 0.02701486 0.02701486 0.10240964 0.02701486 0.02701486 0.02701486
## [1645] 0.04046243 0.02701486 0.02701486 0.02701486 0.02701486 0.10416667
## [1651] 0.02701486 0.02701486 0.10240964 0.02701486 0.02701486 0.02701486
## [1657] 0.02701486 0.10240964 0.85000000 0.02701486 0.02701486 0.02701486
## [1663] 0.10240964 0.02701486 0.02701486 0.02701486 0.02701486
##
##
## Slot "labels":
## [[1]]
## [1] no no no no no no no no no no no no no no no no no
## [18] no no no no no no no no no no no no no no no no no
## [35] no no yes no no yes yes no no yes no no no no no yes no
## [52] no no no no no no no no no no no yes no no no no no
## [69] no no no no no no no no yes no no no yes no no no no
## [86] no no no yes no no no no no no yes no no no no no no
## [103] no no no no yes no no yes no no no no no no no no no
## [120] yes no no no no yes no no no yes no no no no no no yes
## [137] no no no no no no no no no no no no no yes no no no
## [154] yes yes no no no no no no yes no no no yes no no no no
## [171] no no no no no no no no no no no no no no no no no
## [188] no no no no no yes no no no no no no no no no no no
## [205] no no no yes no no no no no yes no yes yes no yes no no
## [222] yes yes no no no no no no no no no no no no no no no
## [239] no no no no no no no no no no no no no no yes no yes
## [256] no no no no no no no yes no no no no no yes no no yes
## [273] no no no yes no no no no yes yes no yes yes no no no no
## [290] no no no no no no yes no yes no no no no no no no yes
## [307] no no no no no no no no no no no no no no no no no
## [324] yes no no yes no no no no no yes no yes no no no no no
## [341] no no yes no no no no no yes no no no no yes no yes no
## [358] no no yes no no no no no no no no no yes no no no no
## [375] no no no no no no no no no no yes no no no no no no
## [392] no no no no no no no no no yes no no no no yes no no
## [409] no no no no no yes no no no no no no no yes no no no
## [426] no no no no no no no no no no no yes no no no no no
## [443] no no no no no no no no no no no no no no no no no
## [460] yes no no no no no no no no no no yes no no no no no
## [477] no no no no no no no no no no no no no yes no no no
## [494] no no no no no no no no no no no no no no no yes no
## [511] no no no no yes no no no yes yes no yes no no no no no
## [528] no no no no no no no no yes no no no no yes no no no
## [545] no no no no no no no no no no no no no no no no no
## [562] no no no no no no no yes no no no no no no no yes no
## [579] no no no no no yes no no no no no no no no no no no
## [596] no no no yes yes no no no no no no no no no no no no
## [613] no no no no no no no no no yes no no no no no no yes
## [630] no no no no yes no no no no no no yes no no no no yes
## [647] no no no no yes no no yes yes no no no yes no yes no no
## [664] no no no no no no yes no no no no no no no no no no
## [681] no no no no no yes no no yes no yes no no no yes no no
## [698] yes no no no no no no no no no no yes no no no no no
## [715] no yes no no no no yes no no no no no no no no yes no
## [732] no no no yes no no no no no no no no no no no yes no
## [749] no no no no no no yes no no no no no yes no no no no
## [766] no no no no no no no no no no no no no no yes no no
## [783] no no no no no yes yes no no no no no no no no yes yes
## [800] no no no yes no yes no no yes yes no no no no no no no
## [817] no no no no yes no no no yes yes no no no yes no no yes
## [834] no no no no no yes no no no no no no no no no no no
## [851] no no yes no no no no yes no no no no no yes no no no
## [868] no no yes no no no no no no no yes yes no no no no no
## [885] no no no no no no no no yes no no yes yes yes no no no
## [902] no no yes no no no no no no no no no no no no no no
## [919] yes no yes no no no yes yes no no no no no yes no no no
## [936] yes no no no no no no no no no no no yes no no yes no
## [953] yes no no no no no no yes no yes no yes yes no no no no
## [970] no no no no no yes no yes no no no no no no no no no
## [987] no no no no no yes no no no yes no yes no yes no no no
## [1004] no yes no no no no no no no no no no no no no no no
## [1021] no no yes no no no no no no no no no no yes no no no
## [1038] yes yes no no no no yes no no no no no no yes no no yes
## [1055] no no no no no no no no no no no no no no no no no
## [1072] yes no yes no no no no yes no no no no no no no no no
## [1089] yes no no no no no no no no no no no no no no no yes
## [1106] no no no no no no no no no no no no no no no no no
## [1123] no no no no no no no no no no yes no no no no no yes
## [1140] no no no no yes no no no no no no no no no no no yes
## [1157] yes no no yes no no no no no no no no no yes no no no
## [1174] no yes no no no no no yes no no no no no no no no no
## [1191] no no no no no no no no no no no no no no yes no no
## [1208] no no no no no no no no no no no no no no no no no
## [1225] no no no no no yes no no no no no yes no no no yes no
## [1242] no no yes yes no no no no no no no no no no no no no
## [1259] no yes no no no no yes no yes no no no no no no no no
## [1276] no no no no no no no no no no no no no no no no yes
## [1293] no no no yes no no no no no no no yes no yes no yes no
## [1310] no yes no no no no no no yes no no no no no no no yes
## [1327] no no no no no no no no no no no no no no no no no
## [1344] yes no yes no no no no no no no no yes no no no no yes
## [1361] no no no no no no no no no no no no no no no no no
## [1378] yes no no no no no no no no no no yes no no no no no
## [1395] no no no no no no no no yes no yes no no yes no no no
## [1412] yes no no no no no no no no no no no no no no no no
## [1429] no no no no no no no no no no no yes yes no no yes no
## [1446] yes no no no no no no no no no yes no no no no no no
## [1463] no no no yes no no no no yes no no no no no yes no no
## [1480] no no no no no no no no no no no no no yes no no no
## [1497] no no no yes no no no no yes no no no no no no no no
## [1514] no no no no no no no no no no yes no no no no yes yes
## [1531] no no no no no no no no no no no no no no no no no
## [1548] no no no no no no no no no no yes no no no no yes no
## [1565] no yes no no no no no no no no no no no no no no no
## [1582] yes no no yes yes yes no yes no no yes no no no no no no
## [1599] no no no no no no yes no yes no no no yes no no no no
## [1616] yes no no no no yes no no no no no yes no no no no no
## [1633] no no no no no no no no no no no no no no no yes no
## [1650] no no no no no no no no yes yes no no no no yes no no
## [1667] no
## Levels: no < yes
##
##
## Slot "cutoffs":
## [[1]]
## [1] Inf 1.00000000 0.95049505 0.87500000 0.87254902 0.85000000
## [7] 0.83333333 0.73684211 0.16000000 0.12500000 0.11320755 0.10416667
## [13] 0.10240964 0.04046243 0.02701486 0.00000000
##
##
## Slot "fp":
## [[1]]
## [1] 0 0 2 4 8 9 11 15 28 35 56 107 273 357
## [15] 1435 1443
##
##
## Slot "tp":
## [[1]]
## [1] 0 42 83 84 123 133 143 145 149 154 155 170 190 192 224 224
##
##
## Slot "tn":
## [[1]]
## [1] 1443 1443 1441 1439 1435 1434 1432 1428 1415 1408 1387 1336 1170 1086
## [15] 8 0
##
##
## Slot "fn":
## [[1]]
## [1] 224 182 141 140 101 91 81 79 75 70 69 54 34 32 0 0
##
##
## Slot "n.pos":
## [[1]]
## [1] 224
##
##
## Slot "n.neg":
## [[1]]
## [1] 1443
##
##
## Slot "n.pos.pred":
## [[1]]
## [1] 0 42 85 88 131 142 154 160 177 189 211 277 463 549
## [15] 1659 1667
##
##
## Slot "n.neg.pred":
## [[1]]
## [1] 1667 1625 1582 1579 1536 1525 1513 1507 1490 1478 1456 1390 1204 1118
## [15] 8 0
perf.rocr<-performance(pred.rocr, measure ="auc")
perf.tpr.rocr<-performance(pred.rocr, measure="tpr",x.measure = "fpr")
plot(perf.tpr.rocr,main=paste("AUC:",(perf.rocr@y.values)))
#rpart
library('rpart')
churn.rp<-rpart(churn ~., data=trainset)
#ctree
#install.packages("party")
library('party')
## Loading required package: grid
## Loading required package: mvtnorm
## Loading required package: modeltools
## Loading required package: stats4
## Loading required package: strucchange
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
## Loading required package: sandwich
ctree.model = ctree(churn ~ . , data = trainset)
#C5.0
library(C50)
c50.model = C5.0(churn ~., data=trainset)
rp.predict.prob = predict(churn.rp, testset,type='prob')
c50.predict.prob = predict(c50.model,testset,type='prob')
ctree.predict.prob = sapply(predict(ctree.model ,testset,type='prob'),function(e){unlist(e)[1]})
rp.prediction = prediction(rp.predict.prob[,1],testset$churn)
c50.prediction = prediction(c50.predict.prob[,1],testset$churn)
ctree.prediction = prediction(ctree.predict.prob,testset$churn)
rp.performance = performance(rp.prediction, "tpr","fpr")
c50.performance = performance(c50.prediction, "tpr","fpr")
ctree.performance = performance(ctree.prediction, "tpr","fpr")
plot(rp.performance,col='red')
plot(c50.performance, add=T,col='green')
plot(ctree.performance, add=T,col='blue')
rp.per.obj= performance(rp.prediction, measure = 'auc')
c50.per.obj = performance(c50.prediction, measure = 'auc')
ctree.per.obj = performance(ctree.prediction, measure = 'auc')
rp.per.obj@y.values
## [[1]]
## [1] 0.9090751
c50.per.obj@y.values
## [[1]]
## [1] 0.8849438
ctree.per.obj@y.values
## [[1]]
## [1] 0.9106197
library(randomForest)
## randomForest 4.6-14
## Type rfNews() to see new features/changes/bug fixes.
##
## Attaching package: 'randomForest'
## The following object is masked from 'package:ggplot2':
##
## margin
rf_model = randomForest(formula=churn ~ .,data=churnTrain)
#find best ntree
plot(rf_model)
legend("topright",colnames(rf_model$err.rate),col=1:3,cex=0.8,fill=1:3)
#find nest mtry
tuneRF(churnTrain[,-17],churnTrain[,17])
## mtry = 4 OOB error = 4.5%
## Searching left ...
## mtry = 2 OOB error = 6.12%
## -0.36 0.05
## Searching right ...
## mtry = 8 OOB error = 4.68%
## -0.04 0.05
## mtry OOBError
## 2.OOB 2 0.06120612
## 4.OOB 4 0.04500450
## 8.OOB 8 0.04680468
rf_model <- randomForest(churn ~., data = churnTrain, ntree=50,mtry=4)
# rf_model = train(churn~.,data=churnTrain,method='rf')
confusionMatrix(table(predict(rf_model,churnTest),churnTest$churn))
## Confusion Matrix and Statistics
##
##
## yes no
## yes 163 9
## no 61 1434
##
## Accuracy : 0.958
## 95% CI : (0.9472, 0.9671)
## No Information Rate : 0.8656
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.7999
## Mcnemar's Test P-Value : 1.09e-09
##
## Sensitivity : 0.72768
## Specificity : 0.99376
## Pos Pred Value : 0.94767
## Neg Pred Value : 0.95920
## Prevalence : 0.13437
## Detection Rate : 0.09778
## Detection Prevalence : 0.10318
## Balanced Accuracy : 0.86072
##
## 'Positive' Class : yes
##
rf.predict.prob <- predict(rf_model, churnTest, type="prob")
rf.prediction <- prediction(rf.predict.prob[,1], as.factor(churnTest$churn))
rf.auc <- performance(rf.prediction, measure = "auc", x.measure = "cutoff")
rf.performance <- performance(rf.prediction, "tpr","fpr")
plot(rf.performance)
#比較CART和RandomForest
tune_funs = expand.grid(cp=seq(0.01,0.1,0.01))
rpart_model =train(churn~., data=churnTrain, method="rpart",tuneGrid=tune_funs)
rpart_prob_yes = predict(rpart_model,churnTest,type='prob')[,1]
rpart_pred.rocr = prediction(rpart_prob_yes,churnTest$churn)
rpart_perf.rocr = performance(rpart_pred.rocr,measure = 'tpr',x.measure = 'fpr')
plot(rpart_perf.rocr,col='red')
plot(rf.performance,col='black',add=T)
legend(0.7, 0.2, c('randomforest','rpart'), 1:2)
x =c(0, 0, 1, 1, 1, 1)
y =c(1, 0, 1, 1, 0, 1)
#euclidean
?dist
rbind(x,y)
## [,1] [,2] [,3] [,4] [,5] [,6]
## x 0 0 1 1 1 1
## y 1 0 1 1 0 1
dist(rbind(x,y), method ="euclidean")
## x
## y 1.414214
sqrt(sum((x-y)^2))
## [1] 1.414214
dist(rbind(x,y), method ="minkowski", p=2)
## x
## y 1.414214
#city block
dist(rbind(x,y), method ="manhattan")
## x
## y 2
sum(abs(x-y))
## [1] 2
dist(rbind(x,y), method ="minkowski", p=1)
## x
## y 2
setwd('~/lecture/riii')
customer=read.csv('data/customer.csv',header=TRUE)
head(customer)
## ID Visit.Time Average.Expense Sex Age
## 1 1 3 5.7 0 10
## 2 2 5 14.5 0 27
## 3 3 16 33.5 0 32
## 4 4 5 15.9 0 30
## 5 5 16 24.9 0 23
## 6 6 3 12.0 0 15
str(customer)
## 'data.frame': 60 obs. of 5 variables:
## $ ID : int 1 2 3 4 5 6 7 8 9 10 ...
## $ Visit.Time : int 3 5 16 5 16 3 12 14 6 3 ...
## $ Average.Expense: num 5.7 14.5 33.5 15.9 24.9 12 28.5 18.8 23.8 5.3 ...
## $ Sex : int 0 0 0 0 0 0 0 0 0 0 ...
## $ Age : int 10 27 32 30 23 15 33 27 16 11 ...
#數值變數作正規化
customer_s =scale(customer[,-1])
?scale
#正規化後的變數平均數為0, 標準差為1
round(mean(customer_s[,2]),3)
## [1] 0
round(sd(customer_s[,2]),3)
## [1] 1
?hclust
hc=hclust(dist(customer_s, method="euclidean"), method="ward.D2")
plot(hc,hang =-0.01, cex=0.7)
hc3 =hclust(dist(customer, method="euclidean"), method="single")
plot(hc3, hang =-0.01, cex=0.8)
fit =cutree(hc, k =4)
fit
## [1] 1 1 2 1 2 1 2 2 1 1 1 2 2 1 1 1 2 1 2 3 4 3 4 3 3 4 4 3 4 4 4 3 3 3 4
## [36] 4 3 4 4 4 4 4 4 4 3 3 4 4 4 3 4 3 3 4 4 4 3 4 4 3
table(fit)
## fit
## 1 2 3 4
## 11 8 16 25
plot(hc, hang =-0.01, cex=0.7)
rect.hclust(hc, k =4, border="red")
rect.hclust(hc, k =3, border="blue")
c_1 = customer[fit == 1,]
summary(c_1)
## ID Visit.Time Average.Expense Sex Age
## Min. : 1.000 Min. :3.000 Min. : 4.60 Min. :0 Min. : 9
## 1st Qu.: 5.000 1st Qu.:3.500 1st Qu.: 7.15 1st Qu.:0 1st Qu.:12
## Median :10.000 Median :5.000 Median :14.50 Median :0 Median :16
## Mean : 9.636 Mean :4.909 Mean :12.71 Mean :0 Mean :17
## 3rd Qu.:14.500 3rd Qu.:6.000 3rd Qu.:16.00 3rd Qu.:0 3rd Qu.:20
## Max. :18.000 Max. :8.000 Max. :23.80 Max. :0 Max. :30
#install.packages('cluster')
library(cluster)
?diana
dv =diana(customer_s, metric ="euclidean")
summary(dv)
## Merge:
## [,1] [,2]
## [1,] -24 -50
## [2,] -28 -46
## [3,] -7 -13
## [4,] -30 -35
## [5,] -21 -40
## [6,] -54 -58
## [7,] -23 -26
## [8,] -1 -10
## [9,] 7 -51
## [10,] -27 -59
## [11,] 5 -39
## [12,] -32 -45
## [13,] -8 -12
## [14,] -2 -4
## [15,] -14 -18
## [16,] 11 -43
## [17,] -44 -49
## [18,] 9 -56
## [19,] -37 -60
## [20,] -6 -11
## [21,] -29 -48
## [22,] -5 -19
## [23,] 10 -36
## [24,] -42 17
## [25,] -25 12
## [26,] 18 -41
## [27,] 21 -38
## [28,] 13 -17
## [29,] -34 -52
## [30,] 16 6
## [31,] 8 20
## [32,] 26 4
## [33,] 19 -57
## [34,] -47 -55
## [35,] 25 -53
## [36,] 24 -31
## [37,] 30 36
## [38,] -3 3
## [39,] -9 15
## [40,] -33 33
## [41,] 32 23
## [42,] 22 28
## [43,] 31 -15
## [44,] 37 27
## [45,] -20 40
## [46,] -22 35
## [47,] 44 34
## [48,] 14 39
## [49,] 1 29
## [50,] 45 2
## [51,] 38 42
## [52,] 43 -16
## [53,] 46 49
## [54,] 52 48
## [55,] 47 41
## [56,] 50 53
## [57,] 54 55
## [58,] 51 56
## [59,] 57 58
## Order of objects:
## [1] 1 10 6 11 15 16 2 4 9 14 18 21 40 39 43 54 58 42 44 49 31 29 48
## [24] 38 47 55 23 26 51 56 41 30 35 27 59 36 3 7 13 5 19 8 12 17 20 33
## [47] 37 60 57 28 46 22 25 32 45 53 24 50 34 52
## Height:
## [1] 0.11775833 0.92338041 0.50974266 1.47360965 2.04722777 2.51250579
## [7] 0.36355872 1.79099892 1.08967479 0.39308959 3.57679780 0.00000000
## [13] 0.21833707 0.44391855 0.80354844 0.08334529 0.98499722 0.70126085
## [19] 0.44921797 0.98499722 1.48962560 0.55960408 0.76573069 1.77868059
## [25] 0.97891452 2.79693737 0.09525176 0.12305649 0.48657744 0.76517620
## [31] 0.93270565 0.00000000 1.28196769 0.16054657 0.60321756 5.85655734
## [37] 1.07657773 0.00000000 1.98611220 0.59473487 1.44920797 0.33912975
## [43] 0.78523518 3.88572195 1.51921913 1.18521332 0.50902071 0.97225583
## [49] 1.91123321 0.00000000 3.39304108 1.52798723 0.72296652 0.31544012
## [55] 0.98335831 2.45910026 0.00000000 1.85224545 0.79085454
## Divisive coefficient:
## [1] 0.9117911
##
## 1770 dissimilarities, summarized :
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.000 1.845 2.572 2.595 3.354 5.857
## Metric : euclidean
## Number of objects : 60
##
## Available components:
## [1] "order" "height" "dc" "merge" "diss" "call" "data"
plot(dv)
fit2 =cutree(dv,k=4)
c_1 = customer[fit2 ==1,]
summary(c_1)
## ID Visit.Time Average.Expense Sex Age
## Min. : 1.000 Min. :3.000 Min. : 4.60 Min. :0 Min. : 9
## 1st Qu.: 5.000 1st Qu.:3.500 1st Qu.: 7.15 1st Qu.:0 1st Qu.:12
## Median :10.000 Median :5.000 Median :14.50 Median :0 Median :16
## Mean : 9.636 Mean :4.909 Mean :12.71 Mean :0 Mean :17
## 3rd Qu.:14.500 3rd Qu.:6.000 3rd Qu.:16.00 3rd Qu.:0 3rd Qu.:20
## Max. :18.000 Max. :8.000 Max. :23.80 Max. :0 Max. :30
str(customer_s)
## num [1:60, 1:4] -1.202 -0.757 1.692 -0.757 1.692 ...
## - attr(*, "dimnames")=List of 2
## ..$ : NULL
## ..$ : chr [1:4] "Visit.Time" "Average.Expense" "Sex" "Age"
## - attr(*, "scaled:center")= Named num [1:4] 8.4 17.058 0.683 21.433
## ..- attr(*, "names")= chr [1:4] "Visit.Time" "Average.Expense" "Sex" "Age"
## - attr(*, "scaled:scale")= Named num [1:4] 4.492 8.399 0.469 9.285
## ..- attr(*, "names")= chr [1:4] "Visit.Time" "Average.Expense" "Sex" "Age"
set.seed(22)
fit =kmeans(customer_s, centers=4)
?kmeans
barplot(t(fit$centers), beside =TRUE,xlab="cluster", ylab="value")
?barplot
fit$centers
## Visit.Time Average.Expense Sex Age
## 1 1.3302016 1.0155226 -1.4566845 0.5591307
## 2 -0.7771737 -0.5178412 -1.4566845 -0.4774599
## 3 0.8571173 0.9887331 0.6750489 1.0505015
## 4 -0.6322632 -0.7299063 0.6750489 -0.6411604
customer[fit$cluster == 1,]
## ID Visit.Time Average.Expense Sex Age
## 3 3 16 33.5 0 32
## 5 5 16 24.9 0 23
## 7 7 12 28.5 0 33
## 8 8 14 18.8 0 27
## 12 12 14 21.0 0 25
## 13 13 12 28.5 0 33
## 17 17 14 23.6 0 22
## 19 19 17 25.9 0 18
plot(customer[,-1],col=fit$cluster)
#install.packages("cluster")
library(cluster)
clusplot(customer_s, fit$cluster, color=TRUE, shade=TRUE)
#了解component 成分為何
pca =princomp(customer_s)
summary(pca)
## Importance of components:
## Comp.1 Comp.2 Comp.3 Comp.4
## Standard deviation 1.5339215 0.9953978 0.62428436 0.44706853
## Proportion of Variance 0.5981988 0.2519026 0.09908414 0.05081448
## Cumulative Proportion 0.5981988 0.8501014 0.94918552 1.00000000
pca$loadings
##
## Loadings:
## Comp.1 Comp.2 Comp.3 Comp.4
## Visit.Time 0.576 0.601 0.554
## Average.Expense 0.602 0.146 -0.785
## Sex -0.989 0.133
## Age 0.550 -0.148 -0.775 0.274
##
## Comp.1 Comp.2 Comp.3 Comp.4
## SS loadings 1.00 1.00 1.00 1.00
## Proportion Var 0.25 0.25 0.25 0.25
## Cumulative Var 0.25 0.50 0.75 1.00
#silhouette
library('cluster')
par(mfrow= c(1,1))
set.seed(22)
library(cluster)
km =kmeans(customer_s, 4)
kms=silhouette(km$cluster,dist(customer_s))
summary(kms)
## Silhouette of 60 units in 4 clusters from silhouette.default(x = km$cluster, dist = dist(customer_s)) :
## Cluster sizes and average silhouette widths:
## 8 11 16 25
## 0.5464597 0.4080823 0.3794910 0.5164434
## Individual silhouette widths:
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.1931 0.4030 0.4890 0.4641 0.5422 0.6333
plot(kms)
#within sum of squares
nk=2:10
SW = sapply(nk,function(k){
set.seed(22); summary(silhouette(kmeans(customer_s,centers=k)$cluster,dist(customer_s)))$avg.width
})
plot(x=nk,y=SW,type='l')
set.seed(22)
WSS =sapply(nk, function(k){set.seed(22);kmeans(customer_s, centers=k)$tot.withinss})
WSS
## [1] 123.49224 93.08341 61.34890 48.76431 43.08965 40.25820 29.58014
## [8] 26.97709 24.99510
plot(x=nk, y=WSS, type="l", xlab="number of k", ylab="within sum of squares")
#install.packages("fpc")
#install.packages("robustbase", repos="http://R-Forge.R-project.org")
library(fpc)
?cluster.stats
cluster.stats(dist(customer_s), kmeans(customer_s, centers=2)$cluster)
## $n
## [1] 60
##
## $cluster.number
## [1] 2
##
## $cluster.size
## [1] 24 36
##
## $min.cluster.size
## [1] 24
##
## $noisen
## [1] 0
##
## $diameter
## [1] 3.885722 3.576798
##
## $average.distance
## [1] 2.045880 1.798055
##
## $median.distance
## [1] 2.243328 1.996810
##
## $separation
## [1] 0.9276315 0.9276315
##
## $average.toother
## [1] 3.351368 3.351368
##
## $separation.matrix
## [,1] [,2]
## [1,] 0.0000000 0.9276315
## [2,] 0.9276315 0.0000000
##
## $ave.between.matrix
## [,1] [,2]
## [1,] 0.000000 3.351368
## [2,] 3.351368 0.000000
##
## $average.between
## [1] 3.351368
##
## $average.within
## [1] 1.873552
##
## $n.between
## [1] 864
##
## $n.within
## [1] 906
##
## $max.diameter
## [1] 3.885722
##
## $min.separation
## [1] 0.9276315
##
## $within.cluster.ss
## [1] 123.4922
##
## $clus.avg.silwidths
## 1 2
## 0.3827935 0.4454536
##
## $avg.silwidth
## [1] 0.4203896
##
## $g2
## NULL
##
## $g3
## NULL
##
## $pearsongamma
## [1] 0.6564321
##
## $dunn
## [1] 0.2387282
##
## $dunn2
## [1] 1.638105
##
## $entropy
## [1] 0.6730117
##
## $wb.ratio
## [1] 0.559041
##
## $ch
## [1] 52.84097
##
## $cwidegap
## [1] 2.148705 2.131733
##
## $widestgap
## [1] 2.148705
##
## $sindex
## [1] 1.010004
##
## $corrected.rand
## NULL
##
## $vi
## NULL
WSS =sapply(nk, function(k){set.seed(22);cluster.stats(dist(customer_s), kmeans(customer_s, centers=k)$cluster)$within.cluster.ss})
plot(x=nk, y=WSS, type="l", xlab="number of k", ylab="within sum of squares")
SW =sapply(2:10,function(k){set.seed(22);cluster.stats(dist(customer_s),kmeans(customer_s, centers=k)$cluster)$avg.silwidth})
plot(x=nk,y=SW,type='l')
single_c=hclust(dist(customer_s), method="single")
hc_single=cutree(single_c, k =4)
complete_c=hclust(dist(customer_s), method="complete")
hc_complete=cutree(complete_c, k =4)
set.seed(22)
km =kmeans(customer_s, 4)
cs=cluster.stats(dist(customer_s),km$cluster)
cs[c("within.cluster.ss","avg.silwidth")]
## $within.cluster.ss
## [1] 61.3489
##
## $avg.silwidth
## [1] 0.4640587
q =sapply(
list(kmeans=km$cluster,
hc_single=hc_single,
hc_complete=hc_complete), function(c)cluster.stats(dist(customer_s),c)[c("within.cluster.ss","avg.silwidth")])
q
## kmeans hc_single hc_complete
## within.cluster.ss 61.3489 136.0092 65.94076
## avg.silwidth 0.4640587 0.2481926 0.4255961
#install.packages("mlbench")
# mlbench package provides many methods to generate simulated data with different shapes and sizes.
#In this example, we generate a Cassini problem graph
library(mlbench)
#install.packages("fpc")
library(fpc)
set.seed(2)
p = mlbench.cassini(500)
plot(p$x)
?mlbench.cassini
ds = dbscan(data = dist(p$x),eps= 0.2, MinPts = 2, method="dist")
ds
## dbscan Pts=500 MinPts=2 eps=0.2
## 1 2 3
## seed 200 200 100
## total 200 200 100
plot(ds, p$x)
#filter群集的raw data
cluster_1_raw = p$x[ds$cluster == 1,]
cluster_1_raw
## [,1] [,2]
## [1,] -0.878020041 -0.9762015
## [2,] 0.204310908 -1.8311169
## [3,] -1.033283148 -0.7664819
## [4,] -0.089110770 -1.2200260
## [5,] 0.146767003 -1.7177684
## [6,] 0.725874430 -1.8106878
## [7,] 0.451102355 -1.4799207
## [8,] -0.425548959 -1.3179628
## [9,] -0.977311794 -1.5286999
## [10,] 0.864295737 -0.7098223
## [11,] 0.039793615 -1.0964859
## [12,] 0.959690709 -1.6442071
## [13,] 0.465944793 -1.8592484
## [14,] 1.237244233 -0.8282179
## [15,] -0.625804973 -0.8010245
## [16,] 0.317509698 -0.9637028
## [17,] 0.752385028 -0.6808253
## [18,] 0.348629646 -1.6835199
## [19,] 1.000493954 -1.4000192
## [20,] -0.311664918 -1.3615982
## [21,] -0.624969555 -1.6033902
## [22,] -0.882224692 -0.8912601
## [23,] -0.590534200 -0.7114485
## [24,] -0.271268856 -1.1837040
## [25,] -0.416158140 -1.0248017
## [26,] -0.835962518 -0.7295600
## [27,] 0.649746827 -1.5558908
## [28,] -0.334361972 -1.2033798
## [29,] -0.100842130 -1.7851571
## [30,] -1.071873313 -1.3959494
## [31,] -0.833292664 -1.4157775
## [32,] -0.593559806 -1.1465330
## [33,] 1.111665006 -1.3745968
## [34,] -1.324461332 -0.9219018
## [35,] -0.352864039 -1.1809969
## [36,] 0.906469168 -0.7980869
## [37,] 1.261369418 -1.1895967
## [38,] -0.721705502 -0.8884615
## [39,] 0.175082884 -1.6118350
## [40,] 0.539582980 -1.5624870
## [41,] -0.818874233 -0.6291580
## [42,] -0.513059044 -1.0350265
## [43,] -0.876957755 -1.4607719
## [44,] -0.895933315 -1.6433053
## [45,] 0.360744853 -1.6198395
## [46,] -0.156942578 -0.9316785
## [47,] 0.468558055 -1.6013488
## [48,] -0.059688396 -1.6337438
## [49,] -0.885565535 -1.5227799
## [50,] 1.125117614 -1.4695238
## [51,] 0.779837759 -1.6453457
## [52,] -1.267874267 -0.8865873
## [53,] -0.565935794 -1.0583129
## [54,] -1.355198401 -1.2378089
## [55,] -0.677769509 -0.6677644
## [56,] -0.312223762 -0.8291433
## [57,] -0.419629384 -1.8780091
## [58,] 0.419366754 -1.2295931
## [59,] 1.240594060 -1.1852068
## [60,] 1.263970040 -0.8515952
## [61,] -1.063583088 -0.7606211
## [62,] -1.150307795 -0.9022401
## [63,] -1.100364472 -1.4493018
## [64,] 0.008498944 -1.7775133
## [65,] -0.836672143 -1.8149838
## [66,] 0.999430884 -1.0677434
## [67,] -0.343672879 -0.7894168
## [68,] -0.761394282 -1.7695864
## [69,] -0.851809277 -1.7150909
## [70,] 1.336660721 -1.2611998
## [71,] 0.909374488 -1.0265832
## [72,] -0.382126787 -1.7644891
## [73,] -0.969304533 -0.6027001
## [74,] -0.275587040 -1.3226617
## [75,] 0.505679994 -0.7335432
## [76,] 1.150520920 -1.3386278
## [77,] 0.595382492 -0.7723944
## [78,] 0.489829736 -1.5564774
## [79,] 1.088978634 -1.2538732
## [80,] 1.043025382 -1.6861489
## [81,] 0.118268481 -1.7543981
## [82,] -0.789308146 -1.3415347
## [83,] 1.094284148 -1.6172529
## [84,] 0.153857954 -0.8916541
## [85,] -0.464299827 -1.2733054
## [86,] -0.980257695 -1.5290886
## [87,] -1.279712214 -1.0883512
## [88,] 0.361285474 -1.5286257
## [89,] 0.858581717 -1.0253372
## [90,] -1.332177181 -0.9967377
## [91,] 1.079295343 -1.2404652
## [92,] 1.072808084 -0.6373208
## [93,] 0.972371113 -1.6303852
## [94,] 0.791835346 -1.3100768
## [95,] 0.443013721 -1.0278206
## [96,] 0.094980484 -1.1810036
## [97,] 0.758548042 -1.5286569
## [98,] -0.678020587 -1.5936866
## [99,] 0.681099042 -1.3005344
## [100,] 0.581809991 -1.4361657
## [101,] -0.949328033 -1.2934566
## [102,] 0.934646309 -1.3200800
## [103,] -0.838076695 -1.0010805
## [104,] -1.143456853 -1.3945971
## [105,] 0.294368850 -1.3797689
## [106,] 0.690397641 -1.8165608
## [107,] 0.431482058 -1.8374265
## [108,] -0.294068155 -1.7588508
## [109,] 0.391565978 -1.6878085
## [110,] -0.868340232 -0.7029784
## [111,] -1.092895774 -1.1875185
## [112,] 0.944616018 -0.7959523
## [113,] 1.008227022 -0.7349331
## [114,] 0.063169014 -1.5183811
## [115,] -0.883764856 -1.3822555
## [116,] -0.676683002 -1.1494103
## [117,] -0.905608044 -0.8047834
## [118,] 0.819799909 -1.1321852
## [119,] -1.247826523 -0.9938843
## [120,] 0.425023325 -1.7127364
## [121,] 0.028040646 -1.0690623
## [122,] 0.548264973 -1.0473068
## [123,] -1.109889201 -1.3317341
## [124,] -0.085253708 -1.6740866
## [125,] -1.144451660 -1.1216628
## [126,] -0.292571453 -1.2366095
## [127,] 1.007551267 -0.8961479
## [128,] 0.719466695 -0.6217663
## [129,] -0.476545375 -1.8497985
## [130,] -0.847643081 -0.7277424
## [131,] -0.758695996 -0.8727665
## [132,] 0.326395991 -1.1894974
## [133,] 0.241189831 -1.5574133
## [134,] -0.928170585 -0.8081438
## [135,] 0.416418319 -0.9710204
## [136,] -0.381826950 -1.0647319
## [137,] 0.628505803 -1.4240533
## [138,] 0.254806683 -0.8233607
## [139,] -0.557311573 -0.9897810
## [140,] -1.182075255 -1.1919261
## [141,] 1.080463546 -1.5803151
## [142,] -1.134948642 -0.7702150
## [143,] -1.038683810 -0.7100887
## [144,] 1.262473473 -1.3013711
## [145,] 0.431152384 -1.7899316
## [146,] 0.732401457 -1.2241335
## [147,] -0.797356507 -0.9931729
## [148,] 1.317411229 -1.2101679
## [149,] 0.078235150 -1.7534496
## [150,] 0.503113262 -0.9063731
## [151,] -0.099743764 -1.8801311
## [152,] 0.875963378 -0.7685420
## [153,] 1.359823570 -1.0845456
## [154,] -1.322579861 -1.3218902
## [155,] -0.646839537 -0.8774735
## [156,] -0.033284404 -0.9049642
## [157,] -1.274560036 -1.3949715
## [158,] -0.986175143 -1.3928907
## [159,] 0.516447124 -0.8282905
## [160,] -0.617672489 -0.7544855
## [161,] -0.090804023 -0.8836617
## [162,] 1.057596764 -1.3103589
## [163,] 0.581555766 -1.3198854
## [164,] 0.785857302 -0.6411018
## [165,] -0.596294891 -1.1266062
## [166,] 0.308849454 -1.7633732
## [167,] -0.508235955 -0.6506234
## [168,] -0.491856613 -1.2025502
## [169,] 0.534989613 -1.2040689
## [170,] -0.438313225 -1.7248040
## [171,] -0.196149509 -0.9212789
## [172,] -0.349246728 -1.3181133
## [173,] 0.837322887 -1.0071313
## [174,] -0.332437398 -1.4488438
## [175,] -0.312666601 -1.1529168
## [176,] 0.937778199 -1.2671769
## [177,] -0.109157470 -0.9522752
## [178,] -1.315804660 -0.9816730
## [179,] -0.672054106 -0.6749106
## [180,] 0.495424973 -0.8340842
## [181,] 0.560205803 -1.6648770
## [182,] -1.086024289 -0.9038942
## [183,] 0.276024465 -0.8513125
## [184,] 1.242073722 -1.5273660
## [185,] -0.039474960 -1.2577110
## [186,] 0.970639213 -1.2981420
## [187,] -0.433099332 -1.1673614
## [188,] 0.674286036 -1.3360663
## [189,] 1.113452181 -1.6006035
## [190,] -0.654962503 -1.2169928
## [191,] 1.261636182 -1.1705721
## [192,] 1.057543215 -0.8091239
## [193,] -0.103213762 -1.7898732
## [194,] 0.617210972 -1.1585440
## [195,] 0.098981815 -0.8887357
## [196,] 1.202563385 -0.9429087
## [197,] -0.385531083 -1.3979427
## [198,] -1.343070698 -0.9174087
## [199,] -0.344961580 -1.7403067
## [200,] 0.746652400 -1.0796416