Loading & Preparing Data
Sys.setlocale("LC_ALL","C")
[1] "C"
library(dplyr)
library(ggplot2)
library(caTools)
library(Matrix)
library(slam)
library(rpart)
library(rpart.plot)
rm(list=ls(all=TRUE))
load("data/tf2.rdata")
A2 = subset(A, buy)
c(sum(spl), sum(spl2))
[1] 20008 9269
Weekday Percentage: W1 ~ W7
X = X %>% mutate(wday = format(date, "%w"))
table(X$wday)
0 1 2 3 4 5 6
18011 12615 11288 9898 11245 10651 14587
mx = xtabs(~ cust + wday, X)
dim(mx)
[1] 28584 7
mx[1:5,]
wday
cust 0 1 2 3 4 5 6
1069 1 1 0 0 0 0 0
1113 2 1 0 0 0 0 1
1359 0 1 0 0 0 0 0
1823 0 1 0 1 1 0 0
2189 0 0 0 1 0 0 1
mx = mx / rowSums(mx)
mx[1:5,]
wday
cust 0 1 2 3 4 5
1069 0.5000000 0.5000000 0.0000000 0.0000000 0.0000000 0.0000000
1113 0.5000000 0.2500000 0.0000000 0.0000000 0.0000000 0.0000000
1359 0.0000000 1.0000000 0.0000000 0.0000000 0.0000000 0.0000000
1823 0.0000000 0.3333333 0.0000000 0.3333333 0.3333333 0.0000000
2189 0.0000000 0.0000000 0.0000000 0.5000000 0.0000000 0.0000000
wday
cust 6
1069 0.0000000
1113 0.2500000
1359 0.0000000
1823 0.0000000
2189 0.5000000
A = data.frame(as.integer(rownames(mx)), as.matrix.data.frame(mx)) %>%
setNames(c("cust","W1","W2","W3","W4","W5","W6","W7")) %>%
right_join(A, by='cust')
head(A)
Classification (Buy) Model
TR = subset(A, spl)
TS = subset(A, !spl)
library(rpart)
library(rpart.plot)
rpart1 = rpart(buy ~ ., TR[,c(2:16,18)], method="class")
pred = predict(rpart1, TS)[,2] # predict prob
cm = table(actual = TS$buy, predict = pred > 0.5); cm
predict
actual FALSE TRUE
FALSE 3730 873
TRUE 1643 2330
acc.ts = cm %>% {sum(diag(.))/sum(.)}; acc.ts # 0.70662
[1] 0.7066231
colAUC(pred, TS$buy) # 0.6984
[,1]
FALSE vs. TRUE 0.6983998
- 利用CART – Classification & Regression Tree建立預測模型
- 使用CART 預測 類別
- 檢視測試資料的準確度
- 檢視AUC
rr rpart.plot(rpart1,cex=0.6)

[1] 0.70417
rr colAUC(pred, TS$buy) # 0.7169
[,1]
FALSE vs. TRUE 0.7169
rr rpart.plot(rpart2,cex=0.6)

Regression (Amount) Model
- 由於是預測數量,將資料取Log10可以避免單位不同所造成的數字差異
rr rpart3 = rpart(amount ~ ., TR2[,c(2:17)], cp=0.002) SST = sum((TS2\(amount - mean(TR2\)amount))^ 2) SSE = sum((predict(rpart3, TS2) - TS2$amount)^2) 1 - (SSE/SST)
[1] 0.24606
- 即總變異(SST)=已解釋變異(SSR)+ 未解釋變異(SSE)
LS0tDQp0aXRsZTogIkZlYXR1cmUgRW5naW5lZXJpbmcsIFRhLUZlbmciDQphdXRob3I6ICLljZPpm43nhLYsIOS4reWxseWkp+WtuCDnrqHnkIblrbjooZPnoJTnqbbkuK3lv4MiDQpkYXRlOiAiYHIgU3lzLnRpbWUoKWAiDQpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sNCi0tLQ0KDQo8YnI+DQoNCiMjIyDorormlbjoo73kvZzoiIfos4fmlpnliIblibLmtYHnqIsNCg0KPGNlbnRlcj4NCg0KIVtGaWctMTogRmVhdHVyZSBFbmdpbmVlcmluZ10oZmlnL2ZlYXR1cmluZy5qcGcpDQoNCiFbRmlnLTI6IEZlYXR1cmUgRW5nci4gJiBEYXRhIFNwbGl0aW5nIFByb2Nlc3NdKGZpZy9mZWF0dXJlX2VuZ3IuanBnKQ0KDQoNCjwvY2VudGVyPg0KDQo8YnI+PGhyPg0KDQojIyMgTG9hZGluZyAmIFByZXBhcmluZyBEYXRhDQpgYGB7ciBlY2hvPVQsIG1lc3NhZ2U9RiwgY2FjaGU9Riwgd2FybmluZz1GfQ0KU3lzLnNldGxvY2FsZSgiTENfQUxMIiwiQyIpDQpsaWJyYXJ5KGRwbHlyKQ0KbGlicmFyeShnZ3Bsb3QyKQ0KbGlicmFyeShjYVRvb2xzKQ0KbGlicmFyeShNYXRyaXgpDQpsaWJyYXJ5KHNsYW0pDQpsaWJyYXJ5KHJwYXJ0KQ0KbGlicmFyeShycGFydC5wbG90KQ0KYGBgDQoNCmBgYHtyfQ0Kcm0obGlzdD1scyhhbGw9VFJVRSkpDQpsb2FkKCJkYXRhL3RmMi5yZGF0YSIpDQpBMiA9IHN1YnNldChBLCBidXkpDQpjKHN1bShzcGwpLCBzdW0oc3BsMikpDQpgYGANCjxicj48aHI+DQoNCiMjIyBXZWVrZGF5IFBlcmNlbnRhZ2U6IFcxIH4gVzcNCmBgYHtyfQ0KWCA9IFggJT4lIG11dGF0ZSh3ZGF5ID0gZm9ybWF0KGRhdGUsICIldyIpKQ0KdGFibGUoWCR3ZGF5KQ0KYGBgDQoNCg0KYGBge3J9DQpteCA9IHh0YWJzKH4gY3VzdCArIHdkYXksIFgpDQpkaW0obXgpDQpgYGANCg0KYGBge3J9DQpteFsxOjUsXQ0KYGBgDQoNCmBgYHtyfQ0KbXggPSBteCAvIHJvd1N1bXMobXgpDQpteFsxOjUsXQ0KYGBgDQoNCmBgYHtyfQ0KQSA9IGRhdGEuZnJhbWUoYXMuaW50ZWdlcihyb3duYW1lcyhteCkpLCBhcy5tYXRyaXguZGF0YS5mcmFtZShteCkpICU+JSANCiAgc2V0TmFtZXMoYygiY3VzdCIsIlcxIiwiVzIiLCJXMyIsIlc0IiwiVzUiLCJXNiIsIlc3IikpICU+JSANCiAgcmlnaHRfam9pbihBLCBieT0nY3VzdCcpDQpoZWFkKEEpDQpgYGANCjxicj48aHI+DQoNCiMjIyBDbGFzc2lmaWNhdGlvbiAoQnV5KSBNb2RlbA0KYGBge3J9DQpUUiA9IHN1YnNldChBLCBzcGwpDQpUUyA9IHN1YnNldChBLCAhc3BsKQ0KYGBgDQoNCmBgYHtyfQ0KbGlicmFyeShycGFydCkNCmxpYnJhcnkocnBhcnQucGxvdCkNCnJwYXJ0MSA9IHJwYXJ0KGJ1eSB+IC4sIFRSWyxjKDI6MTYsMTgpXSwgbWV0aG9kPSJjbGFzcyIpDQpwcmVkID0gIHByZWRpY3QocnBhcnQxLCBUUylbLDJdICAjIHByZWRpY3QgcHJvYg0KY20gPSB0YWJsZShhY3R1YWwgPSBUUyRidXksIHByZWRpY3QgPSBwcmVkID4gMC41KTsgY20NCmFjYy50cyA9IGNtICU+JSB7c3VtKGRpYWcoLikpL3N1bSguKX07IGFjYy50cyAgICMgMC43MDY2MiAgICAgICAgICANCmNvbEFVQyhwcmVkLCBUUyRidXkpICAgICAgICAgICAgICAgICAgICAgICAgICAgICMgMC42OTg0DQpgYGANCisg5Yip55SoQ0FSVCDigJMgQ2xhc3NpZmljYXRpb24gJiBSZWdyZXNzaW9uIFRyZWXlu7rnq4vpoJDmuKzmqKHlnosNCisg5L2/55SoQ0FSVCDpoJDmuKwg6aGe5YilDQorIOaqouimlua4rOippuizh+aWmeeahOa6lueiuuW6pg0KKyDmqqLoppZBVUMNCg0KYGBge3IgZmlnLmhlaWdodD0zLCBmaWcud2lkdGg9Ny4yfQ0KcnBhcnQucGxvdChycGFydDEsY2V4PTAuNikNCmBgYA0KDQpgYGB7cn0NCnJwYXJ0MiA9IHJwYXJ0KGJ1eSB+IC4sIFRSWyxjKDI6MTYsMTgpXSwgbWV0aG9kPSJjbGFzcyIsY3A9MC4wMDEpDQpwcmVkID0gIHByZWRpY3QocnBhcnQyLCBUUylbLDJdICAjIHByZWRpY3QgcHJvYg0KY20gPSB0YWJsZShhY3R1YWwgPSBUUyRidXksIHByZWRpY3QgPSBwcmVkID4gMC41KTsgY20NCmFjYy50cyA9IGNtICU+JSB7c3VtKGRpYWcoLikpL3N1bSguKX07IGFjYy50cyAgICMgMC43MDQxNyAgICAgICAgICANCmNvbEFVQyhwcmVkLCBUUyRidXkpICAgICAgICAgICAgICAgICAgICAgICAgICAgICMgMC43MTY5ICAgICAgICAgDQpgYGANCg0KYGBge3J9DQpycGFydC5wbG90KHJwYXJ0MixjZXg9MC42KQ0KYGBgDQoNCiMjIyBSZWdyZXNzaW9uIChBbW91bnQpIE1vZGVsDQpgYGB7cn0NCkEyID0gc3Vic2V0KEEsIGJ1eSkgJT4lIG11dGF0ZV9hdChjKCJtIiwicmV2IiwiYW1vdW50IiksIGxvZzEwKQ0KVFIyID0gc3Vic2V0KEEyLCBzcGwyKQ0KVFMyID0gc3Vic2V0KEEyLCAhc3BsMikNCmBgYA0KKyDnlLHmlrzmmK/poJDmuKzmlbjph4/vvIzlsIfos4fmlpnlj5ZMb2cxMOWPr+S7pemBv+WFjeWWruS9jeS4jeWQjOaJgOmAoOaIkOeahOaVuOWtl+W3rueVsA0KYGBge3J9DQpycGFydDMgPSBycGFydChhbW91bnQgfiAuLCBUUjJbLGMoMjoxNyldLCBjcD0wLjAwMikNClNTVCA9IHN1bSgoVFMyJGFtb3VudCAtIG1lYW4oVFIyJGFtb3VudCkpXiAyKQ0KU1NFID0gc3VtKChwcmVkaWN0KHJwYXJ0MywgVFMyKSAtICBUUzIkYW1vdW50KV4yKQ0KMSAtIChTU0UvU1NUKQ0KYGBgDQoNCisg5Y2z57i96K6K55WwKFNTVCk95bey6Kej6YeL6K6K55WwKFNTUikrIOacquino+mHi+iuiueVsChTU0UpDQoNCg0KDQoNCg0KPGJyPjxicj48aHI+PGJyPjxicj48YnI+PGJyPg0KPHN0eWxlPg0KDQouY2FwdGlvbiB7DQogIGNvbG9yOiAjNzc3Ow0KICBtYXJnaW4tdG9wOiAxMHB4Ow0KfQ0KcCBjb2RlIHsNCiAgd2hpdGUtc3BhY2U6IGluaGVyaXQ7DQp9DQpwcmUgew0KICB3b3JkLWJyZWFrOiBub3JtYWw7DQogIHdvcmQtd3JhcDogbm9ybWFsOw0KICBsaW5lLWhlaWdodDogMTsNCn0NCnByZSBjb2RlIHsNCiAgd2hpdGUtc3BhY2U6IGluaGVyaXQ7DQp9DQpwLGxpIHsNCiAgZm9udC1mYW1pbHk6ICJUcmVidWNoZXQgTVMiLCAi5b6u6Luf5q2j6buR6auUIiwgIk1pY3Jvc29mdCBKaGVuZ0hlaSI7DQp9DQoNCi5yew0KICBsaW5lLWhlaWdodDogMS4yOw0KfQ0KDQoucWl6IHsNCiAgbGluZS1oZWlnaHQ6IDEuNzU7DQogIGJhY2tncm91bmQ6ICNmMGYwZjA7DQogIGJvcmRlci1sZWZ0OiAxMnB4IHNvbGlkICNjY2ZmY2M7DQogIHBhZGRpbmc6IDRweDsNCiAgcGFkZGluZy1sZWZ0OiAxMHB4Ow0KICBjb2xvcjogIzAwOTkwMDsNCn0NCg0KdGl0bGV7DQogIGNvbG9yOiAjY2MwMDAwOw0KICBmb250LWZhbWlseTogIlRyZWJ1Y2hldCBNUyIsICLlvq7ou5/mraPpu5Hpq5QiLCAiTWljcm9zb2Z0IEpoZW5nSGVpIjsNCn0NCg0KYm9keXsNCiAgZm9udC1mYW1pbHk6ICJUcmVidWNoZXQgTVMiLCAi5b6u6Luf5q2j6buR6auUIiwgIk1pY3Jvc29mdCBKaGVuZ0hlaSI7DQp9DQoNCmgxLGgyLGgzLGg0LGg1ew0KICBjb2xvcjogIzAwNjZmZjsNCiAgZm9udC1mYW1pbHk6ICJUcmVidWNoZXQgTVMiLCAi5b6u6Luf5q2j6buR6auUIiwgIk1pY3Jvc29mdCBKaGVuZ0hlaSI7DQp9DQoNCg0KaDN7DQogIGNvbG9yOiAjMDA4ODAwOw0KICBiYWNrZ3JvdW5kOiAjZTZmZmU2Ow0KICBsaW5lLWhlaWdodDogMjsNCiAgZm9udC13ZWlnaHQ6IGJvbGQ7DQp9DQoNCmg1ew0KICBjb2xvcjogIzAwNjAwMDsNCiAgYmFja2dyb3VuZDogI2Y4ZjhmODsNCiAgbGluZS1oZWlnaHQ6IDEuNTsNCiAgZm9udC13ZWlnaHQ6IGJvbGQ7DQp9DQoNCjwvc3R5bGU+DQo=