Loading & Preparing Data
Sys.setlocale("LC_ALL","C")
[1] "C"
library(dplyr)
library(ggplot2)
library(caTools)
Loading Data
rm(list=ls(all=TRUE))
load("data/tf2.rdata")
Spliting for Classification
TR = subset(A, spl)
TS = subset(A, !spl)
- 將顧客資料分成訓練資料及測試資料
- 利用訓練資料來製作模型,並且預測測試資料看此模型準不準
Classification Model
rr pred = predict(glm1, TS, type=) cm = table(actual = TS$buy, predict = pred > 0.5); cm
predict
actual FALSE TRUE
FALSE 3730 873
TRUE 1700 2273
rr acc.ts = cm %>% {sum(diag(.))/sum(.)}; acc.ts # 0.69998
[1] 0.69998
rr colAUC(pred, TS$buy) # 0.7556
[,1]
FALSE vs. TRUE 0.7556
- 檢視此模型,我們可以查看各個X對於Y的顯著程度
- AIC 越小越好
- 檢視acc , AUC
Regression Model
rr lm1 = lm(amount ~ ., TR2[,c(2:6,8:10)]) summary(lm1)
Call:
lm(formula = amount ~ ., data = TR2[, c(2:6, 8:10)])
Residuals:
Min 1Q Median 3Q Max
-1.8330 -0.2281 0.0485 0.2810 1.6424
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 1.1403704 0.0504979 22.58 < 2e-16 ***
r 0.0000702 0.0003090 0.23 0.8203
s 0.0001173 0.0003123 0.38 0.7072
f 0.0256836 0.0017965 14.30 < 2e-16 ***
m 0.5045943 0.0372711 13.54 < 2e-16 ***
rev 0.0450307 0.0360945 1.25 0.2122
ageB 0.0737926 0.0251165 2.94 0.0033 **
ageC 0.1204660 0.0230651 5.22 0.0000001800 ***
ageD 0.1264592 0.0227496 5.56 0.0000000279 ***
ageE 0.1382214 0.0232522 5.94 0.0000000029 ***
ageF 0.1085828 0.0242698 4.47 0.0000077690 ***
ageG 0.0787808 0.0264917 2.97 0.0029 **
ageH 0.0703242 0.0312462 2.25 0.0244 *
ageI 0.0694822 0.0321119 2.16 0.0305 *
ageJ -0.0284007 0.0282282 -1.01 0.3144
ageK 0.1124434 0.0395589 2.84 0.0045 **
areaB 0.0789586 0.0435321 1.81 0.0697 .
areaC 0.0375241 0.0353641 1.06 0.2887
areaD -0.0111101 0.0371762 -0.30 0.7651
areaE 0.0111809 0.0325803 0.34 0.7315
areaF 0.0147066 0.0328141 0.45 0.6540
areaG 0.0249228 0.0349567 0.71 0.4759
areaH 0.0105550 0.0388962 0.27 0.7861
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 0.422 on 9246 degrees of freedom
Multiple R-squared: 0.291, Adjusted R-squared: 0.289
F-statistic: 172 on 22 and 9246 DF, p-value: <2e-16
- 檢視此預測模型
- 斜率的+/-表示正/負相關,大小表示對應變數影響程度
- R2表示此模型能夠解釋的變異程度
- 星號代表顯著的自變數
rr r2.tr = summary(lm1)\(r.sq SST = sum((TS2\)amount - mean(TR2\(amount))^ 2) SSE = sum((predict(lm1, TS2) - TS2\)amount)^2) r2.ts = 1 - (SSE/SST) c(r2.tr, r2.ts)
[1] 0.29099 0.25760
- 即總變異(SST)=已解釋變異(SSR)+ 未解釋變異(SSE)
LS0tDQp0aXRsZTogIkZpcnN0IE1vZGVsLCBUYS1GZW5nIg0KYXV0aG9yOiAi5Y2T6ZuN54S2LCDkuK3lsbHlpKflrbgg566h55CG5a246KGT56CU56m25Lit5b+DIg0KZGF0ZTogImByIFN5cy50aW1lKClgIg0Kb3V0cHV0OiBodG1sX25vdGVib29rDQotLS0NCg0KDQojIyMg5qih5Z6L6KiT57e06IiH5ris6Kmm5rWB56iLDQoNCjxjZW50ZXI+DQoNCiFbRmlnLTE6IFRoZSBGaXJzdCBNb2RlbF0oZmlnL21vZGVsaW5nLmpwZykNCg0KPC9jZW50ZXI+DQoNCjxocj4NCg0KIyMjIExvYWRpbmcgJiBQcmVwYXJpbmcgRGF0YQ0KDQpgYGB7ciBlY2hvPVQsIG1lc3NhZ2U9RiwgY2FjaGU9Riwgd2FybmluZz1GfQ0KU3lzLnNldGxvY2FsZSgiTENfQUxMIiwiQyIpDQpsaWJyYXJ5KGRwbHlyKQ0KbGlicmFyeShnZ3Bsb3QyKQ0KbGlicmFyeShjYVRvb2xzKQ0KYGBgDQoNCiMjIyMjIExvYWRpbmcgRGF0YQ0KYGBge3J9DQpybShsaXN0PWxzKGFsbD1UUlVFKSkNCmxvYWQoImRhdGEvdGYyLnJkYXRhIikNCmBgYA0KDQojIyMjIyBTcGxpdGluZyBmb3IgQ2xhc3NpZmljYXRpb24gDQpgYGB7cn0NClRSID0gc3Vic2V0KEEsIHNwbCkNClRTID0gc3Vic2V0KEEsICFzcGwpDQpgYGANCjxicj48aHI+DQoNCisg5bCH6aGn5a6i6LOH5paZ5YiG5oiQ6KiT57e06LOH5paZ5Y+K5ris6Kmm6LOH5paZDQorIOWIqeeUqOiok+e3tOizh+aWmeS+huijveS9nOaooeWei++8jOS4puS4lOmgkOa4rOa4rOippuizh+aWmeeci+atpOaooeWei+a6luS4jea6lg0KDQojIyMgQ2xhc3NpZmljYXRpb24gTW9kZWwNCmBgYHtyfQ0KZ2xtMSA9IGdsbShidXkgfiAuLCBUUlssYygyOjksIDExKV0sIGZhbWlseT1iaW5vbWlhbCgpKSANCnN1bW1hcnkoZ2xtMSkNCnByZWQgPSAgcHJlZGljdChnbG0xLCBUUywgdHlwZT0icmVzcG9uc2UiKQ0KY20gPSB0YWJsZShhY3R1YWwgPSBUUyRidXksIHByZWRpY3QgPSBwcmVkID4gMC41KTsgY20NCmFjYy50cyA9IGNtICU+JSB7c3VtKGRpYWcoLikpL3N1bSguKX07IGFjYy50cyAgICAgICAgICAjIDAuNjk5OTgNCmNvbEFVQyhwcmVkLCBUUyRidXkpICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAjIDAuNzU1Ng0KYGBgDQorIOaqouimluatpOaooeWei++8jOaIkeWAkeWPr+S7peafpeeci+WQhOWAi1jlsI3mlrxZ55qE6aGv6JGX56iL5bqmDQorIEFJQyDotorlsI/otorlpb0NCisg5qqi6KaWYWNjICwgQVVDDQoNCg0KPGJyPjxocj4NCg0KDQojIyMgUmVncmVzc2lvbiBNb2RlbA0KYGBge3J9DQpBMiA9IHN1YnNldChBLCBBJGJ1eSkgJT4lIG11dGF0ZV9hdChjKCJtIiwicmV2IiwiYW1vdW50IiksIGxvZzEwKQ0KVFIyID0gc3Vic2V0KEEyLCBzcGwyKQ0KVFMyID0gc3Vic2V0KEEyLCAhc3BsMikNCmBgYA0KDQpgYGB7cn0NCmxtMSA9IGxtKGFtb3VudCB+IC4sIFRSMlssYygyOjYsODoxMCldKQ0Kc3VtbWFyeShsbTEpDQpgYGANCisg5qqi6KaW5q2k6aCQ5ris5qih5Z6LDQorIOaWnOeOh+eahCsvLeihqOekuuatoy/osqDnm7jpl5zvvIzlpKflsI/ooajnpLrlsI3mh4norormlbjlvbHpn7/nqIvluqYNCisgUjLooajnpLrmraTmqKHlnovog73lpKDop6Pph4vnmoTorornlbDnqIvluqYNCisg5pif6Jmf5Luj6KGo6aGv6JGX55qE6Ieq6K6K5pW4DQoNCmBgYHtyfQ0KcjIudHIgPSBzdW1tYXJ5KGxtMSkkci5zcQ0KU1NUID0gc3VtKChUUzIkYW1vdW50IC0gbWVhbihUUjIkYW1vdW50KSleIDIpDQpTU0UgPSBzdW0oKHByZWRpY3QobG0xLCBUUzIpIC0gIFRTMiRhbW91bnQpXjIpDQpyMi50cyA9IDEgLSAoU1NFL1NTVCkNCmMocjIudHIsIHIyLnRzKQ0KYGBgDQorIOWNs+e4veiuiueVsChTU1QpPeW3suino+mHi+iuiueVsChTU1IpKyDmnKrop6Pph4vorornlbAoU1NFKQ0KDQo8YnI+PGJyPjxicj48aHI+PGJyPjxicj48YnI+DQoNCjxzdHlsZT4NCg0KLmNhcHRpb24gew0KICBjb2xvcjogIzc3NzsNCiAgbWFyZ2luLXRvcDogMTBweDsNCn0NCnAgY29kZSB7DQogIHdoaXRlLXNwYWNlOiBpbmhlcml0Ow0KfQ0KcHJlIHsNCiAgd29yZC1icmVhazogbm9ybWFsOw0KICB3b3JkLXdyYXA6IG5vcm1hbDsNCiAgbGluZS1oZWlnaHQ6IDE7DQp9DQpwcmUgY29kZSB7DQogIHdoaXRlLXNwYWNlOiBpbmhlcml0Ow0KfQ0KcCxsaSB7DQogIGZvbnQtZmFtaWx5OiAiVHJlYnVjaGV0IE1TIiwgIuW+rui7n+ato+m7kemrlCIsICJNaWNyb3NvZnQgSmhlbmdIZWkiOw0KfQ0KDQoucnsNCiAgbGluZS1oZWlnaHQ6IDEuMjsNCn0NCg0KLnFpeiB7DQogIGxpbmUtaGVpZ2h0OiAxLjc1Ow0KICBiYWNrZ3JvdW5kOiAjZjBmMGYwOw0KICBib3JkZXItbGVmdDogMTJweCBzb2xpZCAjY2NmZmNjOw0KICBwYWRkaW5nOiA0cHg7DQogIHBhZGRpbmctbGVmdDogMTBweDsNCiAgY29sb3I6ICMwMDk5MDA7DQp9DQoNCnRpdGxlew0KICBjb2xvcjogI2NjMDAwMDsNCiAgZm9udC1mYW1pbHk6ICJUcmVidWNoZXQgTVMiLCAi5b6u6Luf5q2j6buR6auUIiwgIk1pY3Jvc29mdCBKaGVuZ0hlaSI7DQp9DQoNCmJvZHl7DQogIGZvbnQtZmFtaWx5OiAiVHJlYnVjaGV0IE1TIiwgIuW+rui7n+ato+m7kemrlCIsICJNaWNyb3NvZnQgSmhlbmdIZWkiOw0KfQ0KDQpoMSxoMixoMyxoNCxoNXsNCiAgY29sb3I6ICMwMDY2ZmY7DQogIGZvbnQtZmFtaWx5OiAiVHJlYnVjaGV0IE1TIiwgIuW+rui7n+ato+m7kemrlCIsICJNaWNyb3NvZnQgSmhlbmdIZWkiOw0KfQ0KDQoNCmgzew0KICBjb2xvcjogIzAwODgwMDsNCiAgYmFja2dyb3VuZDogI2U2ZmZlNjsNCiAgbGluZS1oZWlnaHQ6IDI7DQogIGZvbnQtd2VpZ2h0OiBib2xkOw0KfQ0KDQpoNXsNCiAgY29sb3I6ICMwMDYwMDA7DQogIGJhY2tncm91bmQ6ICNmOGY4Zjg7DQogIGxpbmUtaGVpZ2h0OiAxLjU7DQogIGZvbnQtd2VpZ2h0OiBib2xkOw0KfQ0KDQo8L3N0eWxlPg0KDQo=