# sqlite3 download page: https://www.sqlite.org/download.html
#install.packages('dbplyr')
#install.packages('RSQLite')
library('dplyr')
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library('dbplyr')
##
## Attaching package: 'dbplyr'
## The following objects are masked from 'package:dplyr':
##
## ident, sql
library('RSQLite')
setwd('~/lecture/riii')
load('Statistics/applenews.RData')
my_database = src_sqlite('./mydatabase',create=T)
copy_to(my_database,applenews,temporary = F)
tbl(my_database,"applenews")
## # Source: table<applenews> [?? x 5]
## # Database: sqlite 3.19.3 [./mydatabase]
## content
## <chr>
## 1 "\n (更新:新增影片)想要透過刮刮樂彩券一夕致富,但他卻用錯方法!台
## 2 "\n 澳洲一名就讀雪梨大學的華裔博士生,日前公開一段燒毀中國護照的影片
## 3 "\n 【行銷專題企劃】房價高高在上,沒錢買房沒關係,但你認為自己是聰明
## 4 "\n 本內容由中央廣播電臺提供 美國國防部長卡特(Ash
## 5 "\n 俄羅斯夫妻Murad和Nataly,因牽手背景照「Follow
## 6 "\n 台灣浩鼎生技股份有限公司(4174)今(15)日中午在法務部公司
## 7 "\n 本內容由中央廣播電臺提供 衛生福利部國民健康署今天
## 8 "\n 連結嘉義縣民雄與竹崎交流道的嘉166線道民雄鄉大崎村段,近來出現
## 9 "\n 日本九州熊本縣遭強震重創,民航局表示,華航即日起到5月8日止,從
## 10 "\n 台中市政府二月下旬抽驗麵龜,發現2家業者麵龜製品檢出非法色素「鹽
## # ... with more rows, and 4 more variables: title <chr>, dt <dbl>,
## # category <chr>, clicked <int>
tbl(my_database,"applenews") %>% collect()
## # A tibble: 1,500 x 5
## content
## <chr>
## 1 "\n (更新:新增影片)想要透過刮刮樂彩券一夕致富,但他卻用錯方法!台
## 2 "\n 澳洲一名就讀雪梨大學的華裔博士生,日前公開一段燒毀中國護照的影片
## 3 "\n 【行銷專題企劃】房價高高在上,沒錢買房沒關係,但你認為自己是聰明
## 4 "\n 本內容由中央廣播電臺提供 美國國防部長卡特(Ash
## 5 "\n 俄羅斯夫妻Murad和Nataly,因牽手背景照「Follow
## 6 "\n 台灣浩鼎生技股份有限公司(4174)今(15)日中午在法務部公司
## 7 "\n 本內容由中央廣播電臺提供 衛生福利部國民健康署今天
## 8 "\n 連結嘉義縣民雄與竹崎交流道的嘉166線道民雄鄉大崎村段,近來出現
## 9 "\n 日本九州熊本縣遭強震重創,民航局表示,華航即日起到5月8日止,從
## 10 "\n 台中市政府二月下旬抽驗麵龜,發現2家業者麵龜製品檢出非法色素「鹽
## # ... with 1,490 more rows, and 4 more variables: title <chr>, dt <dbl>,
## # category <chr>, clicked <int>
category_stat = tbl(my_database,"applenews") %>%
group_by(category) %>%
summarise_at(.funs=funs(min,max,mean), .vars=vars(matches('clicked'))) %>%
arrange(desc(mean)) %>%
collect()
library('ggplot2')
g <- ggplot(category_stat,aes(x=category,y=mean))
g + geom_bar(stat='identity') + scale_x_discrete(limits=category_stat$category) + theme(text=element_text(size=16, family="Songti SC"))
http://www.r-bloggers.com/whats-the-difference-between-machine-learning-statistics-and-data-mining/
http://mp.weixin.qq.com/s?__biz=MjM5ODczNTkwMA==&mid=2650107069&idx=1&sn=44a2eab6c4858c56af236749fdd1d784#rd
#install.packages("C50")
library(C50)
data(churn)
str(churnTrain)
## 'data.frame': 3333 obs. of 20 variables:
## $ state : Factor w/ 51 levels "AK","AL","AR",..: 17 36 32 36 37 2 20 25 19 50 ...
## $ account_length : int 128 107 137 84 75 118 121 147 117 141 ...
## $ area_code : Factor w/ 3 levels "area_code_408",..: 2 2 2 1 2 3 3 2 1 2 ...
## $ international_plan : Factor w/ 2 levels "no","yes": 1 1 1 2 2 2 1 2 1 2 ...
## $ voice_mail_plan : Factor w/ 2 levels "no","yes": 2 2 1 1 1 1 2 1 1 2 ...
## $ number_vmail_messages : int 25 26 0 0 0 0 24 0 0 37 ...
## $ total_day_minutes : num 265 162 243 299 167 ...
## $ total_day_calls : int 110 123 114 71 113 98 88 79 97 84 ...
## $ total_day_charge : num 45.1 27.5 41.4 50.9 28.3 ...
## $ total_eve_minutes : num 197.4 195.5 121.2 61.9 148.3 ...
## $ total_eve_calls : int 99 103 110 88 122 101 108 94 80 111 ...
## $ total_eve_charge : num 16.78 16.62 10.3 5.26 12.61 ...
## $ total_night_minutes : num 245 254 163 197 187 ...
## $ total_night_calls : int 91 103 104 89 121 118 118 96 90 97 ...
## $ total_night_charge : num 11.01 11.45 7.32 8.86 8.41 ...
## $ total_intl_minutes : num 10 13.7 12.2 6.6 10.1 6.3 7.5 7.1 8.7 11.2 ...
## $ total_intl_calls : int 3 3 5 7 3 6 7 6 4 5 ...
## $ total_intl_charge : num 2.7 3.7 3.29 1.78 2.73 1.7 2.03 1.92 2.35 3.02 ...
## $ number_customer_service_calls: int 1 1 0 2 3 0 3 0 1 0 ...
## $ churn : Factor w/ 2 levels "yes","no": 2 2 2 2 2 2 2 2 2 2 ...
names(churnTrain) %in% c("state", "area_code", "account_length")
## [1] TRUE TRUE TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [12] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
!names(churnTrain) %in% c("state", "area_code", "account_length")
## [1] FALSE FALSE FALSE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [12] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
#選擇建模變數
variable.list = !names(churnTrain) %in% c('state','area_code','account_length')
churnTrain=churnTrain[,variable.list]
str(churnTrain)
## 'data.frame': 3333 obs. of 17 variables:
## $ international_plan : Factor w/ 2 levels "no","yes": 1 1 1 2 2 2 1 2 1 2 ...
## $ voice_mail_plan : Factor w/ 2 levels "no","yes": 2 2 1 1 1 1 2 1 1 2 ...
## $ number_vmail_messages : int 25 26 0 0 0 0 24 0 0 37 ...
## $ total_day_minutes : num 265 162 243 299 167 ...
## $ total_day_calls : int 110 123 114 71 113 98 88 79 97 84 ...
## $ total_day_charge : num 45.1 27.5 41.4 50.9 28.3 ...
## $ total_eve_minutes : num 197.4 195.5 121.2 61.9 148.3 ...
## $ total_eve_calls : int 99 103 110 88 122 101 108 94 80 111 ...
## $ total_eve_charge : num 16.78 16.62 10.3 5.26 12.61 ...
## $ total_night_minutes : num 245 254 163 197 187 ...
## $ total_night_calls : int 91 103 104 89 121 118 118 96 90 97 ...
## $ total_night_charge : num 11.01 11.45 7.32 8.86 8.41 ...
## $ total_intl_minutes : num 10 13.7 12.2 6.6 10.1 6.3 7.5 7.1 8.7 11.2 ...
## $ total_intl_calls : int 3 3 5 7 3 6 7 6 4 5 ...
## $ total_intl_charge : num 2.7 3.7 3.29 1.78 2.73 1.7 2.03 1.92 2.35 3.02 ...
## $ number_customer_service_calls: int 1 1 0 2 3 0 3 0 1 0 ...
## $ churn : Factor w/ 2 levels "yes","no": 2 2 2 2 2 2 2 2 2 2 ...
#sample
?sample
## Help on topic 'sample' was found in the following packages:
##
## Package Library
## dplyr /Library/Frameworks/R.framework/Versions/3.3/Resources/library
## base /Library/Frameworks/R.framework/Resources/library
##
##
## Using the first match ...
sample(1:10)
## [1] 8 9 3 5 7 1 4 10 2 6
sample(1:10, size = 5)
## [1] 5 4 7 8 3
sample(c(0,1), size= 10, replace = T)
## [1] 0 1 0 0 1 0 1 1 0 1
sample.int(20, 12) # 兩個參數都要放整數,此例為取1:20中的12個不重複樣本
## [1] 12 3 6 2 11 5 7 13 19 9 10 18
set.seed(2)
#把資料分成training data 和 testing data
ind<-sample(1:2, size=nrow(churnTrain), replace=T, prob=c(0.7, 0.3))
trainset=churnTrain[ind==1,]
testset=churnTrain[ind==2,]
#install.packages('rpart')
library('rpart')
#使用rpart(CART)建立決策樹模型
?rpart
con = rpart.control(cp=0.01)
?rpart.control
churn.rp<-rpart(churn ~., data=trainset,control = con)
#churn.rp<-rpart(churn ~ total_day_charge + international_plan, data=trainset)
churn.rp
## n= 2315
##
## node), split, n, loss, yval, (yprob)
## * denotes terminal node
##
## 1) root 2315 342 no (0.14773218 0.85226782)
## 2) total_day_minutes>=265.45 144 59 yes (0.59027778 0.40972222)
## 4) voice_mail_plan=no 110 29 yes (0.73636364 0.26363636)
## 8) total_eve_minutes>=188.5 67 3 yes (0.95522388 0.04477612) *
## 9) total_eve_minutes< 188.5 43 17 no (0.39534884 0.60465116)
## 18) total_day_minutes>=282.7 19 6 yes (0.68421053 0.31578947) *
## 19) total_day_minutes< 282.7 24 4 no (0.16666667 0.83333333) *
## 5) voice_mail_plan=yes 34 4 no (0.11764706 0.88235294) *
## 3) total_day_minutes< 265.45 2171 257 no (0.11837863 0.88162137)
## 6) number_customer_service_calls>=3.5 168 82 yes (0.51190476 0.48809524)
## 12) total_day_minutes< 160.2 71 10 yes (0.85915493 0.14084507) *
## 13) total_day_minutes>=160.2 97 25 no (0.25773196 0.74226804)
## 26) total_eve_minutes< 155.5 20 7 yes (0.65000000 0.35000000) *
## 27) total_eve_minutes>=155.5 77 12 no (0.15584416 0.84415584) *
## 7) number_customer_service_calls< 3.5 2003 171 no (0.08537194 0.91462806)
## 14) international_plan=yes 188 76 no (0.40425532 0.59574468)
## 28) total_intl_calls< 2.5 38 0 yes (1.00000000 0.00000000) *
## 29) total_intl_calls>=2.5 150 38 no (0.25333333 0.74666667)
## 58) total_intl_minutes>=13.1 32 0 yes (1.00000000 0.00000000) *
## 59) total_intl_minutes< 13.1 118 6 no (0.05084746 0.94915254) *
## 15) international_plan=no 1815 95 no (0.05234160 0.94765840)
## 30) total_day_minutes>=224.15 251 50 no (0.19920319 0.80079681)
## 60) total_eve_minutes>=259.8 36 10 yes (0.72222222 0.27777778) *
## 61) total_eve_minutes< 259.8 215 24 no (0.11162791 0.88837209) *
## 31) total_day_minutes< 224.15 1564 45 no (0.02877238 0.97122762) *
summary(churn.rp)
## Call:
## rpart(formula = churn ~ ., data = trainset, control = con)
## n= 2315
##
## CP nsplit rel error xerror xstd
## 1 0.07602339 0 1.0000000 1.0000000 0.04992005
## 2 0.07456140 2 0.8479532 0.9970760 0.04985964
## 3 0.05555556 4 0.6988304 0.7602339 0.04442127
## 4 0.02631579 7 0.4941520 0.5263158 0.03767329
## 5 0.02339181 8 0.4678363 0.5204678 0.03748096
## 6 0.02046784 10 0.4210526 0.5087719 0.03709209
## 7 0.01754386 11 0.4005848 0.4707602 0.03578773
## 8 0.01000000 12 0.3830409 0.4766082 0.03599261
##
## Variable importance
## total_day_minutes total_day_charge
## 18 18
## number_customer_service_calls total_intl_minutes
## 10 8
## total_intl_charge total_eve_charge
## 8 8
## total_eve_minutes international_plan
## 8 7
## total_intl_calls number_vmail_messages
## 6 3
## voice_mail_plan total_night_calls
## 3 1
## total_eve_calls
## 1
##
## Node number 1: 2315 observations, complexity param=0.07602339
## predicted class=no expected loss=0.1477322 P(node) =1
## class counts: 342 1973
## probabilities: 0.148 0.852
## left son=2 (144 obs) right son=3 (2171 obs)
## Primary splits:
## total_day_minutes < 265.45 to the right, improve=60.145020, (0 missing)
## total_day_charge < 45.125 to the right, improve=60.145020, (0 missing)
## number_customer_service_calls < 3.5 to the right, improve=53.641430, (0 missing)
## international_plan splits as RL, improve=43.729370, (0 missing)
## voice_mail_plan splits as LR, improve= 6.089388, (0 missing)
## Surrogate splits:
## total_day_charge < 45.125 to the right, agree=1, adj=1, (0 split)
##
## Node number 2: 144 observations, complexity param=0.07602339
## predicted class=yes expected loss=0.4097222 P(node) =0.06220302
## class counts: 85 59
## probabilities: 0.590 0.410
## left son=4 (110 obs) right son=5 (34 obs)
## Primary splits:
## voice_mail_plan splits as LR, improve=19.884860, (0 missing)
## number_vmail_messages < 9.5 to the left, improve=19.884860, (0 missing)
## total_eve_minutes < 167.05 to the right, improve=14.540020, (0 missing)
## total_eve_charge < 14.2 to the right, improve=14.540020, (0 missing)
## total_day_minutes < 283.9 to the right, improve= 6.339827, (0 missing)
## Surrogate splits:
## number_vmail_messages < 9.5 to the left, agree=1.000, adj=1.000, (0 split)
## total_night_minutes < 110.3 to the right, agree=0.785, adj=0.088, (0 split)
## total_night_charge < 4.965 to the right, agree=0.785, adj=0.088, (0 split)
## total_night_calls < 50 to the right, agree=0.778, adj=0.059, (0 split)
## total_intl_minutes < 15.3 to the left, agree=0.771, adj=0.029, (0 split)
##
## Node number 3: 2171 observations, complexity param=0.0745614
## predicted class=no expected loss=0.1183786 P(node) =0.937797
## class counts: 257 1914
## probabilities: 0.118 0.882
## left son=6 (168 obs) right son=7 (2003 obs)
## Primary splits:
## number_customer_service_calls < 3.5 to the right, improve=56.398210, (0 missing)
## international_plan splits as RL, improve=43.059160, (0 missing)
## total_day_minutes < 224.15 to the right, improve=10.847440, (0 missing)
## total_day_charge < 38.105 to the right, improve=10.847440, (0 missing)
## total_intl_minutes < 13.15 to the right, improve= 6.347319, (0 missing)
##
## Node number 4: 110 observations, complexity param=0.02631579
## predicted class=yes expected loss=0.2636364 P(node) =0.0475162
## class counts: 81 29
## probabilities: 0.736 0.264
## left son=8 (67 obs) right son=9 (43 obs)
## Primary splits:
## total_eve_minutes < 188.5 to the right, improve=16.419610, (0 missing)
## total_eve_charge < 16.025 to the right, improve=16.419610, (0 missing)
## total_night_minutes < 206.85 to the right, improve= 5.350500, (0 missing)
## total_night_charge < 9.305 to the right, improve= 5.350500, (0 missing)
## total_day_minutes < 281.15 to the right, improve= 5.254545, (0 missing)
## Surrogate splits:
## total_eve_charge < 16.025 to the right, agree=1.000, adj=1.000, (0 split)
## total_night_calls < 82 to the right, agree=0.655, adj=0.116, (0 split)
## total_intl_minutes < 3.35 to the right, agree=0.636, adj=0.070, (0 split)
## total_intl_charge < 0.905 to the right, agree=0.636, adj=0.070, (0 split)
## total_day_minutes < 268.55 to the right, agree=0.627, adj=0.047, (0 split)
##
## Node number 5: 34 observations
## predicted class=no expected loss=0.1176471 P(node) =0.01468683
## class counts: 4 30
## probabilities: 0.118 0.882
##
## Node number 6: 168 observations, complexity param=0.0745614
## predicted class=yes expected loss=0.4880952 P(node) =0.07257019
## class counts: 86 82
## probabilities: 0.512 0.488
## left son=12 (71 obs) right son=13 (97 obs)
## Primary splits:
## total_day_minutes < 160.2 to the left, improve=29.655880, (0 missing)
## total_day_charge < 27.235 to the left, improve=29.655880, (0 missing)
## total_eve_minutes < 180.65 to the left, improve= 8.556953, (0 missing)
## total_eve_charge < 15.355 to the left, improve= 8.556953, (0 missing)
## number_customer_service_calls < 4.5 to the right, improve= 5.975362, (0 missing)
## Surrogate splits:
## total_day_charge < 27.235 to the left, agree=1.000, adj=1.000, (0 split)
## total_night_calls < 79 to the left, agree=0.625, adj=0.113, (0 split)
## total_intl_calls < 2.5 to the left, agree=0.619, adj=0.099, (0 split)
## number_customer_service_calls < 4.5 to the right, agree=0.607, adj=0.070, (0 split)
## total_eve_calls < 89.5 to the left, agree=0.601, adj=0.056, (0 split)
##
## Node number 7: 2003 observations, complexity param=0.05555556
## predicted class=no expected loss=0.08537194 P(node) =0.8652268
## class counts: 171 1832
## probabilities: 0.085 0.915
## left son=14 (188 obs) right son=15 (1815 obs)
## Primary splits:
## international_plan splits as RL, improve=42.194510, (0 missing)
## total_day_minutes < 224.15 to the right, improve=16.838410, (0 missing)
## total_day_charge < 38.105 to the right, improve=16.838410, (0 missing)
## total_intl_minutes < 13.15 to the right, improve= 6.210678, (0 missing)
## total_intl_charge < 3.55 to the right, improve= 6.210678, (0 missing)
##
## Node number 8: 67 observations
## predicted class=yes expected loss=0.04477612 P(node) =0.02894168
## class counts: 64 3
## probabilities: 0.955 0.045
##
## Node number 9: 43 observations, complexity param=0.02046784
## predicted class=no expected loss=0.3953488 P(node) =0.01857451
## class counts: 17 26
## probabilities: 0.395 0.605
## left son=18 (19 obs) right son=19 (24 obs)
## Primary splits:
## total_day_minutes < 282.7 to the right, improve=5.680947, (0 missing)
## total_day_charge < 48.06 to the right, improve=5.680947, (0 missing)
## total_night_minutes < 212.65 to the right, improve=4.558140, (0 missing)
## total_night_charge < 9.57 to the right, improve=4.558140, (0 missing)
## total_eve_minutes < 145.4 to the right, improve=4.356169, (0 missing)
## Surrogate splits:
## total_day_charge < 48.06 to the right, agree=1.000, adj=1.000, (0 split)
## total_day_calls < 103 to the left, agree=0.674, adj=0.263, (0 split)
## total_eve_calls < 104.5 to the left, agree=0.674, adj=0.263, (0 split)
## total_intl_minutes < 11.55 to the left, agree=0.651, adj=0.211, (0 split)
## total_intl_charge < 3.12 to the left, agree=0.651, adj=0.211, (0 split)
##
## Node number 12: 71 observations
## predicted class=yes expected loss=0.1408451 P(node) =0.03066955
## class counts: 61 10
## probabilities: 0.859 0.141
##
## Node number 13: 97 observations, complexity param=0.01754386
## predicted class=no expected loss=0.257732 P(node) =0.04190065
## class counts: 25 72
## probabilities: 0.258 0.742
## left son=26 (20 obs) right son=27 (77 obs)
## Primary splits:
## total_eve_minutes < 155.5 to the left, improve=7.753662, (0 missing)
## total_eve_charge < 13.22 to the left, improve=7.753662, (0 missing)
## total_intl_minutes < 13.55 to the right, improve=2.366149, (0 missing)
## total_intl_charge < 3.66 to the right, improve=2.366149, (0 missing)
## number_customer_service_calls < 4.5 to the right, improve=2.297667, (0 missing)
## Surrogate splits:
## total_eve_charge < 13.22 to the left, agree=1.000, adj=1.00, (0 split)
## total_night_calls < 143.5 to the right, agree=0.814, adj=0.10, (0 split)
## total_eve_calls < 62 to the left, agree=0.804, adj=0.05, (0 split)
##
## Node number 14: 188 observations, complexity param=0.05555556
## predicted class=no expected loss=0.4042553 P(node) =0.0812095
## class counts: 76 112
## probabilities: 0.404 0.596
## left son=28 (38 obs) right son=29 (150 obs)
## Primary splits:
## total_intl_calls < 2.5 to the left, improve=33.806520, (0 missing)
## total_intl_minutes < 13.1 to the right, improve=30.527050, (0 missing)
## total_intl_charge < 3.535 to the right, improve=30.527050, (0 missing)
## total_day_minutes < 221.95 to the right, improve= 3.386095, (0 missing)
## total_day_charge < 37.735 to the right, improve= 3.386095, (0 missing)
##
## Node number 15: 1815 observations, complexity param=0.02339181
## predicted class=no expected loss=0.0523416 P(node) =0.7840173
## class counts: 95 1720
## probabilities: 0.052 0.948
## left son=30 (251 obs) right son=31 (1564 obs)
## Primary splits:
## total_day_minutes < 224.15 to the right, improve=12.5649300, (0 missing)
## total_day_charge < 38.105 to the right, improve=12.5649300, (0 missing)
## total_eve_minutes < 244.95 to the right, improve= 4.7875890, (0 missing)
## total_eve_charge < 20.825 to the right, improve= 4.7875890, (0 missing)
## total_night_minutes < 163.85 to the right, improve= 0.9074391, (0 missing)
## Surrogate splits:
## total_day_charge < 38.105 to the right, agree=1, adj=1, (0 split)
##
## Node number 18: 19 observations
## predicted class=yes expected loss=0.3157895 P(node) =0.008207343
## class counts: 13 6
## probabilities: 0.684 0.316
##
## Node number 19: 24 observations
## predicted class=no expected loss=0.1666667 P(node) =0.01036717
## class counts: 4 20
## probabilities: 0.167 0.833
##
## Node number 26: 20 observations
## predicted class=yes expected loss=0.35 P(node) =0.008639309
## class counts: 13 7
## probabilities: 0.650 0.350
##
## Node number 27: 77 observations
## predicted class=no expected loss=0.1558442 P(node) =0.03326134
## class counts: 12 65
## probabilities: 0.156 0.844
##
## Node number 28: 38 observations
## predicted class=yes expected loss=0 P(node) =0.01641469
## class counts: 38 0
## probabilities: 1.000 0.000
##
## Node number 29: 150 observations, complexity param=0.05555556
## predicted class=no expected loss=0.2533333 P(node) =0.06479482
## class counts: 38 112
## probabilities: 0.253 0.747
## left son=58 (32 obs) right son=59 (118 obs)
## Primary splits:
## total_intl_minutes < 13.1 to the right, improve=45.356840, (0 missing)
## total_intl_charge < 3.535 to the right, improve=45.356840, (0 missing)
## total_day_calls < 95.5 to the left, improve= 4.036407, (0 missing)
## total_day_minutes < 237.75 to the right, improve= 1.879020, (0 missing)
## total_day_charge < 40.42 to the right, improve= 1.879020, (0 missing)
## Surrogate splits:
## total_intl_charge < 3.535 to the right, agree=1.0, adj=1.000, (0 split)
## total_day_minutes < 52.45 to the left, agree=0.8, adj=0.063, (0 split)
## total_day_charge < 8.92 to the left, agree=0.8, adj=0.063, (0 split)
##
## Node number 30: 251 observations, complexity param=0.02339181
## predicted class=no expected loss=0.1992032 P(node) =0.1084233
## class counts: 50 201
## probabilities: 0.199 0.801
## left son=60 (36 obs) right son=61 (215 obs)
## Primary splits:
## total_eve_minutes < 259.8 to the right, improve=22.993380, (0 missing)
## total_eve_charge < 22.08 to the right, improve=22.993380, (0 missing)
## voice_mail_plan splits as LR, improve= 4.745664, (0 missing)
## number_vmail_messages < 7.5 to the left, improve= 4.745664, (0 missing)
## total_night_minutes < 181.15 to the right, improve= 3.509731, (0 missing)
## Surrogate splits:
## total_eve_charge < 22.08 to the right, agree=1, adj=1, (0 split)
##
## Node number 31: 1564 observations
## predicted class=no expected loss=0.02877238 P(node) =0.675594
## class counts: 45 1519
## probabilities: 0.029 0.971
##
## Node number 58: 32 observations
## predicted class=yes expected loss=0 P(node) =0.01382289
## class counts: 32 0
## probabilities: 1.000 0.000
##
## Node number 59: 118 observations
## predicted class=no expected loss=0.05084746 P(node) =0.05097192
## class counts: 6 112
## probabilities: 0.051 0.949
##
## Node number 60: 36 observations
## predicted class=yes expected loss=0.2777778 P(node) =0.01555076
## class counts: 26 10
## probabilities: 0.722 0.278
##
## Node number 61: 215 observations
## predicted class=no expected loss=0.1116279 P(node) =0.09287257
## class counts: 24 191
## probabilities: 0.112 0.888
#畫出決策樹
par(mfrow=c(1,1))
?plot.rpart
plot(churn.rp, uniform=TRUE,branch = 0.6, margin=0.1)
text(churn.rp, all=TRUE, use.n=TRUE, cex=0.7)
printcp(churn.rp)
##
## Classification tree:
## rpart(formula = churn ~ ., data = trainset, control = con)
##
## Variables actually used in tree construction:
## [1] international_plan number_customer_service_calls
## [3] total_day_minutes total_eve_minutes
## [5] total_intl_calls total_intl_minutes
## [7] voice_mail_plan
##
## Root node error: 342/2315 = 0.14773
##
## n= 2315
##
## CP nsplit rel error xerror xstd
## 1 0.076023 0 1.00000 1.00000 0.049920
## 2 0.074561 2 0.84795 0.99708 0.049860
## 3 0.055556 4 0.69883 0.76023 0.044421
## 4 0.026316 7 0.49415 0.52632 0.037673
## 5 0.023392 8 0.46784 0.52047 0.037481
## 6 0.020468 10 0.42105 0.50877 0.037092
## 7 0.017544 11 0.40058 0.47076 0.035788
## 8 0.010000 12 0.38304 0.47661 0.035993
plotcp(churn.rp)
#找出minimum cross-validation errors
min_row = which.min(churn.rp$cptable[,"xerror"])
churn.cp = churn.rp$cptable[min_row, "CP"]
#將churn.cp設為臨界值來修剪樹
prune.tree=prune(churn.rp, cp=churn.cp)
plot(prune.tree, margin=0.1)
text(prune.tree, all=TRUE, use.n=TRUE, cex=0.7)
predictions <-predict(prune.tree, testset, type='class')
table(predictions,testset$churn)
##
## predictions yes no
## yes 95 14
## no 46 863
#install.packages('caret')
#install.packages('e1071')
library('caret')
## Loading required package: lattice
library('e1071')
confusionMatrix(table(predictions, testset$churn))
## Confusion Matrix and Statistics
##
##
## predictions yes no
## yes 95 14
## no 46 863
##
## Accuracy : 0.9411
## 95% CI : (0.9248, 0.9547)
## No Information Rate : 0.8615
## P-Value [Acc > NIR] : 2.786e-16
##
## Kappa : 0.727
## Mcnemar's Test P-Value : 6.279e-05
##
## Sensitivity : 0.67376
## Specificity : 0.98404
## Pos Pred Value : 0.87156
## Neg Pred Value : 0.94939
## Prevalence : 0.13851
## Detection Rate : 0.09332
## Detection Prevalence : 0.10707
## Balanced Accuracy : 0.82890
##
## 'Positive' Class : yes
##
?confusionMatrix
## Help on topic 'confusionMatrix' was found in the following
## packages:
##
## Package Library
## ModelMetrics /Library/Frameworks/R.framework/Versions/3.3/Resources/library
## caret /Library/Frameworks/R.framework/Versions/3.3/Resources/library
##
##
## Using the first match ...
#install.packages("party")
library('party')
## Loading required package: grid
## Loading required package: mvtnorm
## Loading required package: modeltools
## Loading required package: stats4
## Loading required package: strucchange
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
## Loading required package: sandwich
ctree.model = ctree(churn ~ . , data = trainset)
plot(ctree.model, margin=0.1)
daycharge.model = ctree(churn ~ total_day_charge + international_plan, data = trainset)
plot(daycharge.model)
ctree.predict = predict(ctree.model ,testset)
table(ctree.predict, testset$churn)
##
## ctree.predict yes no
## yes 99 15
## no 42 862
confusionMatrix(table(ctree.predict, testset$churn))
## Confusion Matrix and Statistics
##
##
## ctree.predict yes no
## yes 99 15
## no 42 862
##
## Accuracy : 0.944
## 95% CI : (0.9281, 0.9573)
## No Information Rate : 0.8615
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.7449
## Mcnemar's Test P-Value : 0.0005736
##
## Sensitivity : 0.70213
## Specificity : 0.98290
## Pos Pred Value : 0.86842
## Neg Pred Value : 0.95354
## Prevalence : 0.13851
## Detection Rate : 0.09725
## Detection Prevalence : 0.11198
## Balanced Accuracy : 0.84251
##
## 'Positive' Class : yes
##
#install.packages("C50")
library(C50)
c50.model = C5.0(churn ~., data=trainset)
?C5.0Control
c=C5.0Control(minCases = 20)
c50.model = C5.0(churn ~., data=trainset,control = c)
summary(c50.model)
##
## Call:
## C5.0.formula(formula = churn ~ ., data = trainset, control = c)
##
##
## C5.0 [Release 2.07 GPL Edition] Mon Feb 5 11:52:34 2018
## -------------------------------
##
## Class specified by attribute `outcome'
##
## Read 2315 cases (17 attributes) from undefined.data
##
## Decision tree:
##
## number_customer_service_calls > 3:
## :...total_day_minutes <= 160.1: yes (71/10)
## : total_day_minutes > 160.1: no (108/32)
## number_customer_service_calls <= 3:
## :...international_plan = yes:
## :...total_intl_calls <= 2: yes (41)
## : total_intl_calls > 2:
## : :...total_intl_minutes <= 13.1: no (134/13)
## : total_intl_minutes > 13.1: yes (34)
## international_plan = no:
## :...total_day_minutes <= 224.1: no (1564/45)
## total_day_minutes > 224.1:
## :...voice_mail_plan = yes: no (97/4)
## voice_mail_plan = no:
## :...total_eve_charge <= 17.47:
## :...total_day_minutes <= 278.4: no (124/10)
## : total_day_minutes > 278.4: yes (20/5)
## total_eve_charge > 17.47:
## :...total_day_minutes > 264: yes (46)
## total_day_minutes <= 264:
## :...total_eve_charge > 22.04: yes (29/4)
## total_eve_charge <= 22.04:
## :...total_night_charge <= 9.04: no (23/1)
## total_night_charge > 9.04: yes (24/9)
##
##
## Evaluation on training data (2315 cases):
##
## Decision Tree
## ----------------
## Size Errors
##
## 13 133( 5.7%) <<
##
##
## (a) (b) <-classified as
## ---- ----
## 237 105 (a): class yes
## 28 1945 (b): class no
##
##
## Attribute usage:
##
## 100.00% number_customer_service_calls
## 92.27% international_plan
## 90.97% total_day_minutes
## 15.68% voice_mail_plan
## 11.49% total_eve_charge
## 9.03% total_intl_calls
## 7.26% total_intl_minutes
## 2.03% total_night_charge
##
##
## Time: 0.0 secs
plot(c50.model)
c50.predict = predict(c50.model,testset)
table(c50.predict, testset$churn)
##
## c50.predict yes no
## yes 97 15
## no 44 862
confusionMatrix(table(c50.predict, testset$churn))
## Confusion Matrix and Statistics
##
##
## c50.predict yes no
## yes 97 15
## no 44 862
##
## Accuracy : 0.942
## 95% CI : (0.9259, 0.9556)
## No Information Rate : 0.8615
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.7342
## Mcnemar's Test P-Value : 0.0002671
##
## Sensitivity : 0.68794
## Specificity : 0.98290
## Pos Pred Value : 0.86607
## Neg Pred Value : 0.95143
## Prevalence : 0.13851
## Detection Rate : 0.09528
## Detection Prevalence : 0.11002
## Balanced Accuracy : 0.83542
##
## 'Positive' Class : yes
##
ind = cut(1:nrow(churnTrain), breaks=10, labels=F)
ind
## [1] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [24] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [47] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [70] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [93] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [116] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [139] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [162] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [185] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [208] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [231] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [254] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [277] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [300] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [323] 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2
## [346] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
## [369] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
## [392] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
## [415] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
## [438] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
## [461] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
## [484] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
## [507] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
## [530] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
## [553] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
## [576] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
## [599] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
## [622] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
## [645] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
## [668] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [691] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [714] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [737] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [760] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [783] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [806] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [829] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [852] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [875] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [898] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [921] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [944] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [967] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [990] 3 3 3 3 3 3 3 3 3 3 3 4 4 4 4 4 4 4 4 4 4 4 4
## [1013] 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4
## [1036] 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4
## [1059] 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4
## [1082] 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4
## [1105] 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4
## [1128] 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4
## [1151] 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4
## [1174] 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4
## [1197] 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4
## [1220] 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4
## [1243] 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4
## [1266] 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4
## [1289] 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4
## [1312] 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 5
## [1335] 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5
## [1358] 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5
## [1381] 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5
## [1404] 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5
## [1427] 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5
## [1450] 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5
## [1473] 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5
## [1496] 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5
## [1519] 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5
## [1542] 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5
## [1565] 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5
## [1588] 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5
## [1611] 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5
## [1634] 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5
## [1657] 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6 6 6 6 6 6 6 6 6
## [1680] 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6
## [1703] 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6
## [1726] 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6
## [1749] 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6
## [1772] 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6
## [1795] 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6
## [1818] 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6
## [1841] 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6
## [1864] 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6
## [1887] 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6
## [1910] 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6
## [1933] 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6
## [1956] 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6
## [1979] 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 7
## [2002] 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7
## [2025] 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7
## [2048] 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7
## [2071] 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7
## [2094] 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7
## [2117] 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7
## [2140] 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7
## [2163] 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7
## [2186] 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7
## [2209] 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7
## [2232] 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7
## [2255] 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7
## [2278] 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7
## [2301] 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7
## [2324] 7 7 7 7 7 7 7 7 7 7 8 8 8 8 8 8 8 8 8 8 8 8 8
## [2347] 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8
## [2370] 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8
## [2393] 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8
## [2416] 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8
## [2439] 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8
## [2462] 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8
## [2485] 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8
## [2508] 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8
## [2531] 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8
## [2554] 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8
## [2577] 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8
## [2600] 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8
## [2623] 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8
## [2646] 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 9 9
## [2669] 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9
## [2692] 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9
## [2715] 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9
## [2738] 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9
## [2761] 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9
## [2784] 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9
## [2807] 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9
## [2830] 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9
## [2853] 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9
## [2876] 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9
## [2899] 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9
## [2922] 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9
## [2945] 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9
## [2968] 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9
## [2991] 9 9 9 9 9 9 9 9 9 10 10 10 10 10 10 10 10 10 10 10 10 10 10
## [3014] 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10
## [3037] 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10
## [3060] 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10
## [3083] 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10
## [3106] 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10
## [3129] 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10
## [3152] 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10
## [3175] 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10
## [3198] 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10
## [3221] 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10
## [3244] 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10
## [3267] 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10
## [3290] 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10
## [3313] 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10
accuracies = c()
for (i in 1:10) {
fit = rpart(formula=churn ~., data=churnTrain[ind != i,])
predictions = predict(fit, churnTrain[ind == i, ! names(churnTrain) %in% c("churn")], type="class")
correct_count = sum(predictions == churnTrain[ind == i,c("churn")])
accuracies = append(correct_count / nrow(churnTrain[ind == i,]), accuracies)
}
accuracies
## [1] 0.9640719 0.9099099 0.9519520 0.9519520 0.9369369 0.9371257 0.9369369
## [8] 0.9429429 0.9399399 0.9341317
mean(accuracies)
## [1] 0.94059
#install.packages("caret")
library(caret)
control=trainControl(method="repeatedcv", number=10, repeats=3)
model =train(churn~., data=trainset, method="rpart", trControl=control)
model
## CART
##
## 2315 samples
## 16 predictor
## 2 classes: 'yes', 'no'
##
## No pre-processing
## Resampling: Cross-Validated (10 fold, repeated 3 times)
## Summary of sample sizes: 2083, 2084, 2083, 2084, 2084, 2083, ...
## Resampling results across tuning parameters:
##
## cp Accuracy Kappa
## 0.05555556 0.9036795 0.5396942
## 0.07456140 0.8663810 0.2526744
## 0.07602339 0.8574617 0.1761057
##
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was cp = 0.05555556.
predictions = predict(model, testset)
table(predictions,testset$churn)
##
## predictions yes no
## yes 64 10
## no 77 867
library('caret')
importance = varImp(model, scale=FALSE)
importance
## rpart variable importance
##
## Overall
## number_customer_service_calls 116.015
## total_day_minutes 106.988
## total_day_charge 100.648
## international_planyes 86.789
## voice_mail_planyes 25.974
## total_eve_minutes 23.097
## total_eve_charge 23.097
## number_vmail_messages 19.885
## total_intl_minutes 6.347
## total_day_calls 0.000
## total_night_minutes 0.000
## total_night_calls 0.000
## total_intl_calls 0.000
## total_night_charge 0.000
## total_intl_charge 0.000
## total_eve_calls 0.000
plot(importance)
#install.packages("ROCR")
library(ROCR)
## Loading required package: gplots
##
## Attaching package: 'gplots'
## The following object is masked from 'package:stats':
##
## lowess
predictions <-predict(churn.rp, testset, type="prob")
head(predictions)
## yes no
## 2 0.02877238 0.97122762
## 5 0.05084746 0.94915254
## 6 0.05084746 0.94915254
## 8 0.05084746 0.94915254
## 13 0.02877238 0.97122762
## 16 0.95522388 0.04477612
pred.to.roc<-predictions[, 1]
head(pred.to.roc)
## 2 5 6 8 13 16
## 0.02877238 0.05084746 0.05084746 0.05084746 0.02877238 0.95522388
pred.rocr<-prediction(pred.to.roc, testset$churn)
pred.rocr
## An object of class "prediction"
## Slot "predictions":
## [[1]]
## 2 5 6 8 13 16
## 0.02877238 0.05084746 0.05084746 0.05084746 0.02877238 0.95522388
## 17 23 29 33 34 37
## 0.02877238 0.02877238 0.02877238 0.02877238 0.11162791 0.02877238
## 41 45 46 47 50 54
## 0.02877238 0.02877238 0.02877238 0.02877238 0.02877238 0.02877238
## 56 57 58 60 61 62
## 0.02877238 0.02877238 0.02877238 0.11162791 0.02877238 0.11764706
## 65 76 78 86 87 89
## 0.02877238 0.02877238 0.85915493 0.02877238 0.85915493 0.02877238
## 91 103 104 107 112 115
## 0.02877238 0.02877238 0.02877238 0.11764706 0.02877238 0.02877238
## 116 117 119 123 124 126
## 1.00000000 0.02877238 0.02877238 0.02877238 0.02877238 0.02877238
## 132 134 138 146 153 155
## 0.02877238 0.15584416 0.02877238 0.11162791 0.02877238 0.65000000
## 157 161 162 164 168 171
## 0.95522388 0.02877238 0.02877238 0.02877238 0.02877238 0.11162791
## 174 176 181 183 184 186
## 0.02877238 0.11162791 0.15584416 0.02877238 0.02877238 0.02877238
## 188 193 201 204 206 211
## 0.95522388 0.02877238 0.02877238 0.02877238 0.02877238 0.11162791
## 213 215 222 226 227 229
## 0.02877238 1.00000000 0.02877238 0.02877238 0.02877238 0.11764706
## 230 233 235 241 244 245
## 0.02877238 0.02877238 0.05084746 0.02877238 0.11162791 0.02877238
## 246 251 255 261 265 269
## 0.02877238 0.85915493 0.05084746 0.02877238 0.11162791 0.02877238
## 271 272 273 275 277 282
## 0.02877238 0.02877238 0.02877238 0.02877238 0.05084746 0.02877238
## 286 289 293 295 301 304
## 0.02877238 0.02877238 0.02877238 0.02877238 0.02877238 0.02877238
## 305 309 313 319 332 333
## 0.02877238 0.02877238 0.02877238 0.11162791 0.95522388 0.85915493
## 339 340 341 342 343 348
## 0.72222222 0.11162791 0.02877238 0.02877238 0.02877238 0.02877238
## 354 356 357 369 371 378
## 0.02877238 0.02877238 0.05084746 0.02877238 0.02877238 0.02877238
## 389 390 391 392 398 400
## 0.02877238 0.02877238 0.02877238 0.02877238 0.72222222 0.02877238
## 405 406 410 412 417 420
## 0.15584416 0.05084746 0.02877238 0.02877238 1.00000000 0.02877238
## 421 425 432 433 436 438
## 0.02877238 0.02877238 0.02877238 0.11162791 0.11162791 0.16666667
## 439 443 447 452 455 456
## 0.02877238 0.02877238 0.02877238 0.11162791 0.72222222 0.11162791
## 458 459 461 466 468 476
## 0.02877238 0.02877238 0.11162791 1.00000000 0.02877238 0.02877238
## 480 482 483 484 485 487
## 0.02877238 0.02877238 0.02877238 0.02877238 0.02877238 0.05084746
## 489 490 494 498 506 509
## 0.11162791 0.02877238 0.02877238 0.02877238 0.02877238 0.02877238
## 517 520 521 524 530 532
## 0.02877238 0.02877238 0.11162791 0.02877238 0.05084746 0.02877238
## 533 536 537 540 542 544
## 0.02877238 0.02877238 0.02877238 0.02877238 0.02877238 0.11162791
## 548 550 551 555 560 561
## 0.85915493 0.15584416 0.15584416 0.02877238 0.02877238 0.02877238
## 563 565 569 571 573 574
## 0.02877238 0.11162791 0.68421053 0.02877238 0.05084746 0.11162791
## 576 577 583 586 589 590
## 0.11764706 0.02877238 0.02877238 0.02877238 0.85915493 0.02877238
## 596 605 609 611 615 617
## 0.02877238 0.02877238 0.02877238 0.11162791 0.11162791 0.05084746
## 620 628 633 642 643 645
## 0.95522388 0.15584416 0.02877238 0.02877238 0.02877238 0.02877238
## 648 651 653 655 657 661
## 0.02877238 0.11162791 0.02877238 0.02877238 0.02877238 0.72222222
## 664 667 669 673 676 677
## 0.02877238 0.11162791 0.11162791 0.02877238 0.11162791 0.11162791
## 678 689 695 697 701 702
## 0.02877238 0.11764706 0.85915493 0.02877238 0.02877238 0.02877238
## 703 707 709 711 715 717
## 0.02877238 0.02877238 0.05084746 0.02877238 0.02877238 0.05084746
## 721 722 723 726 728 734
## 0.02877238 0.85915493 0.02877238 0.02877238 0.11162791 0.11162791
## 735 743 747 754 755 759
## 0.05084746 0.15584416 0.02877238 0.02877238 0.02877238 0.11764706
## 760 764 769 777 778 779
## 0.02877238 0.02877238 0.11162791 0.11162791 0.05084746 0.15584416
## 783 785 787 792 795 798
## 0.11162791 0.11764706 0.02877238 0.02877238 0.02877238 1.00000000
## 799 800 806 809 814 817
## 0.02877238 0.02877238 0.02877238 0.05084746 0.02877238 0.11162791
## 820 830 833 837 838 841
## 0.02877238 0.02877238 0.95522388 0.02877238 0.05084746 0.02877238
## 845 846 847 849 851 853
## 0.02877238 0.11764706 0.02877238 0.11162791 0.16666667 0.02877238
## 855 858 864 866 867 868
## 0.02877238 0.02877238 0.02877238 0.15584416 0.02877238 0.02877238
## 874 890 892 895 906 908
## 0.02877238 0.02877238 0.02877238 0.85915493 0.65000000 0.02877238
## 909 915 917 922 929 932
## 0.15584416 0.95522388 0.02877238 0.02877238 0.02877238 0.02877238
## 934 937 938 941 943 945
## 1.00000000 0.02877238 0.11162791 0.02877238 0.11162791 0.15584416
## 946 954 956 962 966 968
## 0.02877238 0.02877238 0.02877238 0.02877238 0.02877238 0.11162791
## 969 974 978 981 985 988
## 0.02877238 0.02877238 0.85915493 0.02877238 0.02877238 0.11764706
## 992 998 1003 1008 1011 1015
## 0.02877238 0.02877238 0.02877238 0.02877238 0.11162791 0.02877238
## 1019 1022 1026 1027 1030 1033
## 0.11162791 0.02877238 0.72222222 0.02877238 0.02877238 0.11162791
## 1034 1035 1036 1039 1043 1044
## 0.02877238 0.02877238 0.02877238 0.85915493 0.05084746 0.02877238
## 1048 1050 1051 1053 1054 1057
## 0.02877238 0.02877238 0.02877238 0.05084746 0.02877238 0.02877238
## 1065 1067 1068 1072 1073 1076
## 0.05084746 0.02877238 0.02877238 0.02877238 0.02877238 0.02877238
## 1077 1079 1086 1087 1091 1094
## 0.02877238 0.95522388 0.11162791 0.02877238 0.02877238 0.02877238
## 1099 1103 1105 1107 1108 1111
## 0.02877238 0.65000000 0.15584416 0.02877238 0.02877238 0.02877238
## 1112 1115 1117 1120 1121 1125
## 0.02877238 0.02877238 0.02877238 0.02877238 0.02877238 0.02877238
## 1131 1132 1133 1135 1136 1137
## 0.02877238 0.11162791 0.02877238 0.02877238 0.11162791 0.11764706
## 1141 1142 1143 1144 1146 1148
## 0.02877238 0.02877238 0.85915493 0.02877238 0.02877238 0.11162791
## 1149 1150 1153 1157 1159 1160
## 0.02877238 0.05084746 0.02877238 0.02877238 0.05084746 0.02877238
## 1163 1164 1167 1172 1174 1179
## 0.02877238 0.02877238 0.02877238 0.02877238 0.02877238 0.02877238
## 1186 1188 1190 1193 1209 1211
## 0.02877238 0.02877238 0.02877238 0.15584416 0.02877238 0.16666667
## 1212 1214 1215 1216 1217 1222
## 0.02877238 0.11162791 0.02877238 0.02877238 0.02877238 0.02877238
## 1224 1226 1227 1228 1232 1233
## 0.02877238 0.02877238 0.02877238 0.02877238 0.95522388 0.11162791
## 1236 1242 1243 1246 1249 1252
## 0.15584416 0.15584416 0.05084746 0.02877238 0.05084746 0.02877238
## 1256 1257 1260 1262 1263 1266
## 0.02877238 0.02877238 0.02877238 0.02877238 0.65000000 0.11162791
## 1267 1270 1273 1276 1279 1291
## 0.02877238 0.85915493 0.15584416 0.02877238 0.72222222 0.11162791
## 1294 1296 1300 1301 1303 1306
## 0.02877238 0.02877238 0.95522388 0.02877238 0.95522388 0.02877238
## 1309 1310 1316 1322 1323 1325
## 0.02877238 0.05084746 0.11162791 0.02877238 1.00000000 0.02877238
## 1326 1329 1332 1333 1339 1347
## 0.85915493 0.02877238 0.02877238 0.11162791 0.02877238 1.00000000
## 1368 1384 1385 1386 1390 1396
## 0.05084746 0.02877238 0.02877238 0.02877238 0.02877238 0.02877238
## 1397 1399 1401 1406 1407 1411
## 0.15584416 0.02877238 0.02877238 0.85915493 0.02877238 0.02877238
## 1412 1414 1422 1423 1424 1425
## 0.02877238 0.02877238 0.11162791 0.02877238 0.05084746 0.02877238
## 1426 1427 1431 1433 1437 1439
## 0.02877238 0.02877238 0.02877238 0.02877238 0.11162791 0.02877238
## 1444 1445 1446 1450 1451 1454
## 0.05084746 0.02877238 0.02877238 0.15584416 0.02877238 0.11162791
## 1455 1460 1461 1462 1463 1465
## 0.02877238 0.02877238 0.02877238 0.02877238 0.02877238 0.02877238
## 1469 1471 1476 1477 1483 1484
## 0.02877238 0.02877238 0.02877238 0.02877238 0.11162791 0.05084746
## 1485 1487 1495 1497 1500 1508
## 1.00000000 0.02877238 0.02877238 0.02877238 0.02877238 0.02877238
## 1510 1528 1529 1531 1532 1538
## 0.11162791 0.02877238 0.02877238 0.11162791 0.85915493 0.85915493
## 1540 1541 1546 1550 1554 1560
## 0.05084746 0.02877238 0.02877238 0.02877238 0.02877238 0.11162791
## 1564 1569 1573 1574 1575 1580
## 0.02877238 0.02877238 0.02877238 0.02877238 0.05084746 0.02877238
## 1581 1596 1597 1599 1600 1604
## 0.72222222 0.02877238 0.02877238 0.02877238 0.02877238 0.02877238
## 1606 1613 1615 1617 1618 1628
## 0.02877238 0.02877238 0.72222222 0.02877238 0.05084746 0.02877238
## 1636 1638 1642 1643 1645 1650
## 0.85915493 0.11162791 0.05084746 0.02877238 0.02877238 0.02877238
## 1658 1659 1660 1663 1669 1673
## 0.02877238 0.02877238 0.02877238 0.85915493 0.02877238 0.02877238
## 1678 1681 1682 1683 1684 1685
## 0.02877238 0.02877238 0.02877238 0.02877238 0.02877238 0.02877238
## 1686 1694 1702 1704 1705 1708
## 0.02877238 0.02877238 0.95522388 0.02877238 0.95522388 0.85915493
## 1709 1713 1714 1718 1719 1721
## 0.15584416 0.15584416 0.85915493 0.02877238 0.16666667 0.02877238
## 1725 1727 1728 1730 1731 1736
## 0.02877238 0.02877238 0.02877238 0.02877238 0.02877238 0.02877238
## 1738 1743 1754 1759 1761 1763
## 0.02877238 0.02877238 0.02877238 0.02877238 0.02877238 0.02877238
## 1765 1766 1771 1775 1777 1788
## 0.11162791 0.85915493 0.02877238 0.11162791 0.02877238 0.02877238
## 1795 1800 1803 1813 1821 1824
## 0.95522388 0.02877238 0.15584416 0.95522388 0.02877238 0.11162791
## 1825 1827 1828 1829 1831 1835
## 0.02877238 0.02877238 0.02877238 0.02877238 0.68421053 0.02877238
## 1836 1837 1838 1841 1843 1848
## 0.15584416 0.02877238 1.00000000 0.11162791 0.95522388 0.02877238
## 1850 1852 1856 1858 1859 1866
## 0.68421053 0.15584416 0.02877238 0.02877238 0.72222222 0.85915493
## 1869 1873 1875 1876 1882 1889
## 0.02877238 0.05084746 0.02877238 0.02877238 0.95522388 0.02877238
## 1892 1894 1902 1907 1911 1912
## 0.02877238 0.11162791 0.02877238 0.02877238 0.02877238 0.15584416
## 1915 1916 1926 1930 1932 1937
## 0.02877238 0.02877238 0.02877238 0.02877238 0.05084746 0.02877238
## 1938 1939 1940 1941 1946 1961
## 0.02877238 0.02877238 0.02877238 0.02877238 0.11162791 0.02877238
## 1962 1966 1968 1972 1982 1983
## 0.02877238 0.11162791 0.02877238 0.02877238 0.02877238 0.02877238
## 1985 1987 1988 1990 1991 1992
## 0.95522388 0.02877238 0.02877238 0.02877238 0.02877238 0.02877238
## 1996 2000 2003 2008 2011 2014
## 0.11162791 0.02877238 0.02877238 0.11162791 0.02877238 0.02877238
## 2019 2023 2025 2027 2029 2032
## 0.02877238 0.02877238 0.02877238 0.02877238 0.68421053 0.85915493
## 2034 2037 2038 2042 2043 2046
## 0.02877238 0.11162791 0.02877238 0.02877238 0.72222222 0.15584416
## 2051 2055 2057 2059 2060 2063
## 0.02877238 0.02877238 0.02877238 0.02877238 0.95522388 0.02877238
## 2065 2069 2070 2075 2077 2078
## 0.02877238 0.02877238 0.11162791 0.02877238 0.15584416 0.11162791
## 2079 2080 2096 2097 2102 2110
## 0.02877238 0.11162791 0.11162791 0.11162791 0.02877238 0.02877238
## 2112 2116 2122 2125 2128 2131
## 0.02877238 1.00000000 0.02877238 0.02877238 0.02877238 0.11162791
## 2133 2139 2141 2145 2147 2153
## 0.02877238 0.02877238 0.65000000 0.02877238 0.05084746 0.05084746
## 2154 2157 2161 2163 2165 2167
## 0.02877238 0.11162791 1.00000000 0.02877238 1.00000000 0.02877238
## 2168 2178 2180 2182 2189 2194
## 0.02877238 0.02877238 0.02877238 0.02877238 0.02877238 0.11162791
## 2196 2199 2200 2208 2211 2212
## 0.02877238 0.11162791 0.11162791 0.02877238 0.95522388 0.15584416
## 2213 2219 2224 2233 2238 2243
## 0.11162791 0.85915493 0.65000000 0.02877238 0.15584416 0.02877238
## 2245 2246 2250 2257 2258 2259
## 0.11162791 0.02877238 0.02877238 0.02877238 0.02877238 0.95522388
## 2261 2262 2269 2270 2274 2282
## 0.11764706 0.15584416 0.02877238 0.02877238 0.02877238 0.02877238
## 2283 2285 2288 2289 2290 2292
## 0.02877238 0.02877238 0.16666667 0.02877238 0.05084746 0.02877238
## 2293 2295 2297 2305 2313 2316
## 0.02877238 0.02877238 0.02877238 0.02877238 0.02877238 0.02877238
## 2319 2320 2321 2323 2325 2328
## 0.02877238 0.11764706 0.02877238 0.15584416 0.85915493 0.65000000
## 2331 2332 2340 2343 2344 2351
## 0.02877238 0.02877238 0.02877238 0.02877238 1.00000000 0.02877238
## 2354 2355 2356 2360 2362 2369
## 0.05084746 0.95522388 0.11162791 0.02877238 0.02877238 0.11162791
## 2370 2372 2374 2377 2379 2381
## 0.65000000 0.02877238 0.11162791 0.95522388 0.02877238 0.65000000
## 2382 2386 2393 2396 2397 2399
## 0.02877238 0.02877238 0.02877238 0.15584416 0.05084746 0.02877238
## 2407 2413 2416 2427 2428 2436
## 0.02877238 0.68421053 0.85915493 0.02877238 0.02877238 0.11764706
## 2440 2441 2444 2445 2448 2449
## 0.02877238 0.02877238 0.02877238 0.15584416 0.02877238 0.11162791
## 2452 2454 2456 2468 2470 2472
## 0.95522388 0.02877238 0.02877238 0.05084746 0.02877238 0.02877238
## 2474 2475 2476 2477 2480 2482
## 0.02877238 0.02877238 0.02877238 0.02877238 1.00000000 0.02877238
## 2485 2487 2488 2492 2495 2496
## 0.02877238 0.02877238 0.02877238 0.02877238 0.11162791 0.02877238
## 2497 2507 2508 2509 2512 2523
## 0.02877238 0.11162791 0.02877238 0.02877238 0.02877238 0.02877238
## 2524 2527 2529 2533 2539 2544
## 0.02877238 1.00000000 0.02877238 0.02877238 0.02877238 0.02877238
## 2549 2551 2552 2555 2556 2561
## 0.02877238 0.02877238 0.02877238 0.02877238 0.02877238 0.02877238
## 2565 2569 2571 2574 2579 2580
## 0.02877238 0.05084746 0.02877238 0.68421053 0.02877238 0.02877238
## 2582 2583 2584 2585 2590 2592
## 0.85915493 0.02877238 0.02877238 0.02877238 0.02877238 0.72222222
## 2593 2598 2600 2602 2604 2605
## 0.15584416 0.02877238 0.72222222 0.02877238 0.11764706 0.02877238
## 2606 2611 2614 2616 2617 2619
## 0.72222222 0.02877238 0.11162791 0.02877238 0.02877238 0.05084746
## 2620 2623 2632 2634 2635 2636
## 1.00000000 0.85915493 0.02877238 0.02877238 0.02877238 0.02877238
## 2637 2640 2643 2645 2647 2651
## 0.02877238 0.02877238 0.05084746 0.02877238 0.15584416 0.02877238
## 2655 2656 2658 2660 2661 2662
## 0.02877238 0.02877238 0.11162791 0.02877238 0.95522388 0.11162791
## 2665 2666 2668 2671 2678 2688
## 1.00000000 0.05084746 0.02877238 0.02877238 0.68421053 1.00000000
## 2689 2697 2700 2705 2710 2712
## 0.02877238 0.95522388 0.02877238 0.02877238 0.02877238 0.02877238
## 2714 2718 2720 2725 2728 2730
## 0.02877238 0.02877238 0.02877238 0.16666667 0.05084746 0.02877238
## 2740 2743 2752 2753 2754 2756
## 0.02877238 0.02877238 0.02877238 0.02877238 0.02877238 0.02877238
## 2761 2768 2770 2772 2780 2782
## 1.00000000 0.02877238 0.02877238 0.02877238 0.02877238 0.02877238
## 2783 2785 2786 2787 2791 2793
## 0.02877238 0.11162791 0.15584416 0.85915493 0.02877238 0.02877238
## 2795 2798 2801 2803 2810 2812
## 0.05084746 0.02877238 1.00000000 0.02877238 0.02877238 0.02877238
## 2813 2822 2823 2827 2835 2837
## 0.02877238 0.11162791 0.05084746 0.02877238 0.02877238 0.02877238
## 2838 2839 2840 2850 2851 2855
## 0.02877238 0.02877238 0.95522388 0.11162791 0.02877238 0.02877238
## 2862 2865 2866 2868 2869 2872
## 0.15584416 0.11162791 0.02877238 0.11162791 1.00000000 0.02877238
## 2874 2877 2878 2881 2882 2891
## 0.02877238 0.11162791 0.02877238 0.11162791 0.02877238 0.11162791
## 2892 2900 2905 2906 2908 2910
## 0.02877238 0.02877238 0.05084746 0.02877238 0.02877238 0.02877238
## 2920 2923 2938 2940 2944 2948
## 0.02877238 0.02877238 0.02877238 0.02877238 0.02877238 0.72222222
## 2951 2954 2955 2960 2962 2963
## 0.02877238 0.15584416 0.02877238 0.11162791 0.15584416 0.11162791
## 2966 2973 2975 2981 2982 2984
## 0.02877238 0.02877238 0.02877238 0.72222222 0.02877238 0.02877238
## 2985 2992 2996 2997 2998 3002
## 0.02877238 0.05084746 0.02877238 0.02877238 0.11162791 0.02877238
## 3005 3008 3013 3016 3018 3026
## 0.02877238 0.02877238 0.02877238 0.02877238 0.15584416 0.02877238
## 3027 3032 3034 3037 3038 3043
## 0.15584416 0.05084746 0.02877238 0.02877238 0.02877238 0.02877238
## 3048 3049 3050 3051 3057 3061
## 0.02877238 0.02877238 0.02877238 0.85915493 0.11162791 0.02877238
## 3063 3064 3069 3071 3072 3073
## 0.02877238 0.02877238 0.02877238 0.02877238 0.02877238 0.68421053
## 3084 3091 3092 3097 3098 3099
## 0.11162791 0.02877238 0.02877238 0.11764706 0.02877238 0.02877238
## 3107 3110 3112 3115 3116 3123
## 0.02877238 0.05084746 0.02877238 0.02877238 0.15584416 0.02877238
## 3125 3127 3129 3131 3132 3135
## 0.11162791 0.11162791 0.02877238 0.02877238 0.15584416 0.02877238
## 3140 3143 3144 3150 3152 3154
## 0.02877238 0.02877238 0.02877238 0.02877238 0.02877238 0.02877238
## 3155 3159 3165 3168 3169 3176
## 0.02877238 0.02877238 0.02877238 0.11162791 0.72222222 0.11764706
## 3180 3184 3190 3193 3196 3198
## 0.02877238 0.02877238 1.00000000 0.02877238 0.02877238 0.11162791
## 3200 3207 3217 3218 3223 3226
## 0.02877238 0.02877238 0.02877238 0.11162791 0.05084746 0.11162791
## 3227 3229 3232 3235 3245 3254
## 0.11162791 0.15584416 0.02877238 0.02877238 0.11162791 0.02877238
## 3258 3259 3260 3261 3267 3270
## 0.02877238 0.11162791 0.02877238 0.11162791 0.02877238 0.05084746
## 3277 3279 3285 3290 3292 3296
## 0.11162791 0.11162791 0.02877238 0.02877238 1.00000000 0.11162791
## 3301 3302 3307 3311 3315 3316
## 0.02877238 0.95522388 0.02877238 0.02877238 0.02877238 0.02877238
## 3323 3325 3329 3333
## 0.95522388 0.02877238 0.02877238 0.72222222
##
##
## Slot "labels":
## [[1]]
## [1] no no no no no yes no no no no yes no no no no no no
## [18] no no no yes no no no no no yes no yes no no no no no
## [35] no no yes no no no no no no no no no no no yes no no
## [52] no no no no no no no no no no no no no no no no yes
## [69] no no no no no no no no no yes no yes no no no no no
## [86] no no no no no no no no no no no no no no no yes yes
## [103] no no yes no no no no no no no no no no no no no yes
## [120] yes no no no no yes no no no no no no yes no no no no
## [137] yes yes no no no yes no no no no no no no no no no no
## [154] no no no no no no no no no no no no no no no yes no
## [171] yes no no no no no no no no no no no no no yes no no
## [188] no no no no no yes no no no no no no no no no no yes
## [205] no no no no no no no no no no no no no no no no no
## [222] no no yes no no no no no no no no no no no no no no
## [239] no no no no no no no yes no no no no no no no no yes
## [256] no no no no no yes no no no no no no no no no no no
## [273] no yes yes no no yes no no no no yes no no no no yes no
## [290] no no no no no yes no yes no no yes no no no no no no
## [307] yes no no no no no no no no yes no no no no no no no
## [324] no no no no no no no no yes no no no no no yes no no
## [341] no no no no no no no no no no no no no yes no no yes
## [358] no no no no no no no no no no no no no no no no no
## [375] no yes no no no no no no no no no no no no yes yes no
## [392] no no no no no no no no no no no no yes no no yes no
## [409] no no yes no yes no no no no no yes no yes no no no no
## [426] yes no no no no no no no no no yes no no no no no no
## [443] no no no no no no no no no no no no no no yes no no
## [460] no no no no no no no no no yes no no no no no no no
## [477] yes yes yes yes no no no no no no no no no no no no no
## [494] no no no no no no no yes no no no yes no no no no no
## [511] no no no yes no no no no no no no no no no yes no yes
## [528] yes no no yes no yes no no no no no no no no no no no
## [545] no no yes yes no no no no yes no no no no no no no no
## [562] no no no no no yes no yes no yes yes no yes no yes no no
## [579] no no yes no no yes no no no no no no no no no no no
## [596] no yes no no no no yes no no no no yes no no no no no
## [613] no no no no no no no no no no yes no yes no no no no
## [630] no no no no no yes no no no yes no yes no no no no no
## [647] no no no yes no no no no no no no no no no no no yes
## [664] no yes no no no no no no no no no no no yes no no yes
## [681] no no yes no no no no no no yes no no no no no no no
## [698] no no no no no no no no no no no no no no no yes yes
## [715] no no no no yes no no yes no no no no yes no no yes no
## [732] yes no no no no no no no yes yes no no no no no no no
## [749] no no yes no no no no no no no no no yes no no no no
## [766] no no no no no no no no no no yes no no no no no no
## [783] no no no no no no no yes no no yes no no no no yes no
## [800] no yes no no no no no no no no no yes no no no no no
## [817] no no no no no no no no no no yes no yes no no no yes
## [834] yes no yes no no no no no no no yes no no no no no no
## [851] no no yes no no no no no no yes yes yes no no yes no yes
## [868] no no no no no no no no no no no yes no no no no no
## [885] yes no yes no no no no no no no no no no no no no no
## [902] no no no no yes yes no no no yes no no yes no yes no no
## [919] no no no no no no no no no no no no no no no no no
## [936] no no no no yes no no no no no yes no yes no no no no
## [953] no no no no no no no no no no no no no no no no no
## [970] no yes no no no no no yes no no no yes no no no no no
## [987] no no no no yes no no no no no no no no no no no no
## [1004] no no no yes no no yes no no no no yes no no no
## Levels: no < yes
##
##
## Slot "cutoffs":
## [[1]]
## 3292 3323 3051 3333 3073
## Inf 1.00000000 0.95522388 0.85915493 0.72222222 0.68421053
## 2381 2725 3229 3176 3296 3270
## 0.65000000 0.16666667 0.15584416 0.11764706 0.11162791 0.05084746
## 3329
## 0.02877238
##
##
## Slot "fp":
## [[1]]
## [1] 0 0 2 5 12 14 18 21 56 70 174 228 877
##
##
## Slot "tp":
## [[1]]
## [1] 0 24 51 79 89 95 100 103 111 113 124 125 141
##
##
## Slot "tn":
## [[1]]
## [1] 877 877 875 872 865 863 859 856 821 807 703 649 0
##
##
## Slot "fn":
## [[1]]
## [1] 141 117 90 62 52 46 41 38 30 28 17 16 0
##
##
## Slot "n.pos":
## [[1]]
## [1] 141
##
##
## Slot "n.neg":
## [[1]]
## [1] 877
##
##
## Slot "n.pos.pred":
## [[1]]
## [1] 0 24 53 84 101 109 118 124 167 183 298 353 1018
##
##
## Slot "n.neg.pred":
## [[1]]
## [1] 1018 994 965 934 917 909 900 894 851 835 720 665 0
perf.rocr<-performance(pred.rocr, measure ="auc", x.measure="cutoff")
perf.tpr.rocr<-performance(pred.rocr, "tpr","fpr")
plot(perf.tpr.rocr,main=paste("AUC:",(perf.rocr@y.values)))
#rpart
library('rpart')
churn.rp<-rpart(churn ~., data=trainset)
#ctree
#install.packages("party")
library('party')
ctree.model = ctree(churn ~ . , data = trainset)
#C5.0
library(C50)
c50.model = C5.0(churn ~., data=trainset)
rp.predict.prob = predict(churn.rp, testset,type='prob')
c50.predict.prob = predict(c50.model,testset,type='prob')
ctree.predict.prob = sapply(predict(ctree.model ,testset,type='prob'),function(e){unlist(e)[1]})
rp.prediction = prediction(rp.predict.prob[,1],testset$churn)
c50.prediction = prediction(c50.predict.prob[,1],testset$churn)
ctree.prediction = prediction(ctree.predict.prob,testset$churn)
rp.performance = performance(rp.prediction, "tpr","fpr")
c50.performance = performance(c50.prediction, "tpr","fpr")
ctree.performance = performance(ctree.prediction, "tpr","fpr")
plot(rp.performance,col='red')
plot(c50.performance, add=T,col='green')
plot(ctree.performance, add=T,col='blue')
library(randomForest)
## randomForest 4.6-12
## Type rfNews() to see new features/changes/bug fixes.
##
## Attaching package: 'randomForest'
## The following object is masked from 'package:ggplot2':
##
## margin
## The following object is masked from 'package:dplyr':
##
## combine
forest <- randomForest(churn ~., data = trainset, ntree=200,importance=T, proximity=T)
rf.predict.prob <- predict(forest, testset, type="prob")
rf.prediction <- prediction(rf.predict.prob[,1], as.factor(testset$churn))
rf.auc <- performance(rf.prediction, measure = "auc", x.measure = "cutoff")
rf.performance <- performance(rf.prediction, "tpr","fpr")
# Roc curve
plot(rp.performance,main='ROC Curve', col=1)
legend(0.7, 0.2, c('rpart', 'randomforest'), 1:2)
plot(rf.performance, col=2, add=TRUE)