模型訓練與測試流程

Fig-1: The First Model

Fig-1: The First Model


Loading & Preparing Data

Sys.setlocale("LC_ALL","C")
[1] "C"
library(dplyr)
library(ggplot2)
library(caTools)
Loading Data
rm(list=ls(all=TRUE))
load("data/tf2.rdata")
Spliting for Classification
TR = subset(A, spl)
TS = subset(A, !spl)


  • 將顧客資料分成訓練資料及測試資料
  • 利用訓練資料來製作模型,並且預測測試資料看此模型準不準

Classification Model

rr pred = predict(glm1, TS, type=) cm = table(actual = TS$buy, predict = pred > 0.5); cm

       predict
actual  FALSE TRUE
  FALSE  3730  873
  TRUE   1700 2273

rr acc.ts = cm %>% {sum(diag(.))/sum(.)}; acc.ts # 0.69998

[1] 0.69998

rr colAUC(pred, TS$buy) # 0.7556

                 [,1]
FALSE vs. TRUE 0.7556


Regression Model

rr lm1 = lm(amount ~ ., TR2[,c(2:6,8:10)]) summary(lm1)


Call:
lm(formula = amount ~ ., data = TR2[, c(2:6, 8:10)])

Residuals:
    Min      1Q  Median      3Q     Max 
-1.8330 -0.2281  0.0485  0.2810  1.6424 

Coefficients:
              Estimate Std. Error t value     Pr(>|t|)    
(Intercept)  1.1403704  0.0504979   22.58      < 2e-16 ***
r            0.0000702  0.0003090    0.23       0.8203    
s            0.0001173  0.0003123    0.38       0.7072    
f            0.0256836  0.0017965   14.30      < 2e-16 ***
m            0.5045943  0.0372711   13.54      < 2e-16 ***
rev          0.0450307  0.0360945    1.25       0.2122    
ageB         0.0737926  0.0251165    2.94       0.0033 ** 
ageC         0.1204660  0.0230651    5.22 0.0000001800 ***
ageD         0.1264592  0.0227496    5.56 0.0000000279 ***
ageE         0.1382214  0.0232522    5.94 0.0000000029 ***
ageF         0.1085828  0.0242698    4.47 0.0000077690 ***
ageG         0.0787808  0.0264917    2.97       0.0029 ** 
ageH         0.0703242  0.0312462    2.25       0.0244 *  
ageI         0.0694822  0.0321119    2.16       0.0305 *  
ageJ        -0.0284007  0.0282282   -1.01       0.3144    
ageK         0.1124434  0.0395589    2.84       0.0045 ** 
areaB        0.0789586  0.0435321    1.81       0.0697 .  
areaC        0.0375241  0.0353641    1.06       0.2887    
areaD       -0.0111101  0.0371762   -0.30       0.7651    
areaE        0.0111809  0.0325803    0.34       0.7315    
areaF        0.0147066  0.0328141    0.45       0.6540    
areaG        0.0249228  0.0349567    0.71       0.4759    
areaH        0.0105550  0.0388962    0.27       0.7861    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.422 on 9246 degrees of freedom
Multiple R-squared:  0.291, Adjusted R-squared:  0.289 
F-statistic:  172 on 22 and 9246 DF,  p-value: <2e-16

rr r2.tr = summary(lm1)\(r.sq SST = sum((TS2\)amount - mean(TR2\(amount))^ 2) SSE = sum((predict(lm1, TS2) - TS2\)amount)^2) r2.ts = 1 - (SSE/SST) c(r2.tr, r2.ts)

[1] 0.29099 0.25760







LS0tDQp0aXRsZTogIkZpcnN0IE1vZGVsLCBUYS1GZW5nIg0KYXV0aG9yOiAi5Y2T6ZuN54S2LCDkuK3lsbHlpKflrbgg566h55CG5a246KGT56CU56m25Lit5b+DIg0KZGF0ZTogImByIFN5cy50aW1lKClgIg0Kb3V0cHV0OiBodG1sX25vdGVib29rDQotLS0NCg0KDQojIyMg5qih5Z6L6KiT57e06IiH5ris6Kmm5rWB56iLDQoNCjxjZW50ZXI+DQoNCiFbRmlnLTE6IFRoZSBGaXJzdCBNb2RlbF0oZmlnL21vZGVsaW5nLmpwZykNCg0KPC9jZW50ZXI+DQoNCjxocj4NCg0KIyMjIExvYWRpbmcgJiBQcmVwYXJpbmcgRGF0YQ0KDQpgYGB7ciBlY2hvPVQsIG1lc3NhZ2U9RiwgY2FjaGU9Riwgd2FybmluZz1GfQ0KU3lzLnNldGxvY2FsZSgiTENfQUxMIiwiQyIpDQpsaWJyYXJ5KGRwbHlyKQ0KbGlicmFyeShnZ3Bsb3QyKQ0KbGlicmFyeShjYVRvb2xzKQ0KYGBgDQoNCiMjIyMjIExvYWRpbmcgRGF0YQ0KYGBge3J9DQpybShsaXN0PWxzKGFsbD1UUlVFKSkNCmxvYWQoImRhdGEvdGYyLnJkYXRhIikNCmBgYA0KDQojIyMjIyBTcGxpdGluZyBmb3IgQ2xhc3NpZmljYXRpb24gDQpgYGB7cn0NClRSID0gc3Vic2V0KEEsIHNwbCkNClRTID0gc3Vic2V0KEEsICFzcGwpDQpgYGANCjxicj48aHI+DQoNCisg5bCH6aGn5a6i6LOH5paZ5YiG5oiQ6KiT57e06LOH5paZ5Y+K5ris6Kmm6LOH5paZDQorIOWIqeeUqOiok+e3tOizh+aWmeS+huijveS9nOaooeWei++8jOS4puS4lOmgkOa4rOa4rOippuizh+aWmeeci+atpOaooeWei+a6luS4jea6lg0KDQojIyMgQ2xhc3NpZmljYXRpb24gTW9kZWwNCmBgYHtyfQ0KZ2xtMSA9IGdsbShidXkgfiAuLCBUUlssYygyOjksIDExKV0sIGZhbWlseT1iaW5vbWlhbCgpKSANCnN1bW1hcnkoZ2xtMSkNCnByZWQgPSAgcHJlZGljdChnbG0xLCBUUywgdHlwZT0icmVzcG9uc2UiKQ0KY20gPSB0YWJsZShhY3R1YWwgPSBUUyRidXksIHByZWRpY3QgPSBwcmVkID4gMC41KTsgY20NCmFjYy50cyA9IGNtICU+JSB7c3VtKGRpYWcoLikpL3N1bSguKX07IGFjYy50cyAgICAgICAgICAjIDAuNjk5OTgNCmNvbEFVQyhwcmVkLCBUUyRidXkpICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAjIDAuNzU1Ng0KYGBgDQorIOaqouimluatpOaooeWei++8jOaIkeWAkeWPr+S7peafpeeci+WQhOWAi1jlsI3mlrxZ55qE6aGv6JGX56iL5bqmDQorIEFJQyDotorlsI/otorlpb0NCisg5qqi6KaWYWNjICwgQVVDDQoNCg0KPGJyPjxocj4NCg0KDQojIyMgUmVncmVzc2lvbiBNb2RlbA0KYGBge3J9DQpBMiA9IHN1YnNldChBLCBBJGJ1eSkgJT4lIG11dGF0ZV9hdChjKCJtIiwicmV2IiwiYW1vdW50IiksIGxvZzEwKQ0KVFIyID0gc3Vic2V0KEEyLCBzcGwyKQ0KVFMyID0gc3Vic2V0KEEyLCAhc3BsMikNCmBgYA0KDQpgYGB7cn0NCmxtMSA9IGxtKGFtb3VudCB+IC4sIFRSMlssYygyOjYsODoxMCldKQ0Kc3VtbWFyeShsbTEpDQpgYGANCisg5qqi6KaW5q2k6aCQ5ris5qih5Z6LDQorIOaWnOeOh+eahCsvLeihqOekuuatoy/osqDnm7jpl5zvvIzlpKflsI/ooajnpLrlsI3mh4norormlbjlvbHpn7/nqIvluqYNCisgUjLooajnpLrmraTmqKHlnovog73lpKDop6Pph4vnmoTorornlbDnqIvluqYNCisg5pif6Jmf5Luj6KGo6aGv6JGX55qE6Ieq6K6K5pW4DQoNCmBgYHtyfQ0KcjIudHIgPSBzdW1tYXJ5KGxtMSkkci5zcQ0KU1NUID0gc3VtKChUUzIkYW1vdW50IC0gbWVhbihUUjIkYW1vdW50KSleIDIpDQpTU0UgPSBzdW0oKHByZWRpY3QobG0xLCBUUzIpIC0gIFRTMiRhbW91bnQpXjIpDQpyMi50cyA9IDEgLSAoU1NFL1NTVCkNCmMocjIudHIsIHIyLnRzKQ0KYGBgDQorIOWNs+e4veiuiueVsChTU1QpPeW3suino+mHi+iuiueVsChTU1IpKyDmnKrop6Pph4vorornlbAoU1NFKQ0KDQo8YnI+PGJyPjxicj48aHI+PGJyPjxicj48YnI+DQoNCjxzdHlsZT4NCg0KLmNhcHRpb24gew0KICBjb2xvcjogIzc3NzsNCiAgbWFyZ2luLXRvcDogMTBweDsNCn0NCnAgY29kZSB7DQogIHdoaXRlLXNwYWNlOiBpbmhlcml0Ow0KfQ0KcHJlIHsNCiAgd29yZC1icmVhazogbm9ybWFsOw0KICB3b3JkLXdyYXA6IG5vcm1hbDsNCiAgbGluZS1oZWlnaHQ6IDE7DQp9DQpwcmUgY29kZSB7DQogIHdoaXRlLXNwYWNlOiBpbmhlcml0Ow0KfQ0KcCxsaSB7DQogIGZvbnQtZmFtaWx5OiAiVHJlYnVjaGV0IE1TIiwgIuW+rui7n+ato+m7kemrlCIsICJNaWNyb3NvZnQgSmhlbmdIZWkiOw0KfQ0KDQoucnsNCiAgbGluZS1oZWlnaHQ6IDEuMjsNCn0NCg0KLnFpeiB7DQogIGxpbmUtaGVpZ2h0OiAxLjc1Ow0KICBiYWNrZ3JvdW5kOiAjZjBmMGYwOw0KICBib3JkZXItbGVmdDogMTJweCBzb2xpZCAjY2NmZmNjOw0KICBwYWRkaW5nOiA0cHg7DQogIHBhZGRpbmctbGVmdDogMTBweDsNCiAgY29sb3I6ICMwMDk5MDA7DQp9DQoNCnRpdGxlew0KICBjb2xvcjogI2NjMDAwMDsNCiAgZm9udC1mYW1pbHk6ICJUcmVidWNoZXQgTVMiLCAi5b6u6Luf5q2j6buR6auUIiwgIk1pY3Jvc29mdCBKaGVuZ0hlaSI7DQp9DQoNCmJvZHl7DQogIGZvbnQtZmFtaWx5OiAiVHJlYnVjaGV0IE1TIiwgIuW+rui7n+ato+m7kemrlCIsICJNaWNyb3NvZnQgSmhlbmdIZWkiOw0KfQ0KDQpoMSxoMixoMyxoNCxoNXsNCiAgY29sb3I6ICMwMDY2ZmY7DQogIGZvbnQtZmFtaWx5OiAiVHJlYnVjaGV0IE1TIiwgIuW+rui7n+ato+m7kemrlCIsICJNaWNyb3NvZnQgSmhlbmdIZWkiOw0KfQ0KDQoNCmgzew0KICBjb2xvcjogIzAwODgwMDsNCiAgYmFja2dyb3VuZDogI2U2ZmZlNjsNCiAgbGluZS1oZWlnaHQ6IDI7DQogIGZvbnQtd2VpZ2h0OiBib2xkOw0KfQ0KDQpoNXsNCiAgY29sb3I6ICMwMDYwMDA7DQogIGJhY2tncm91bmQ6ICNmOGY4Zjg7DQogIGxpbmUtaGVpZ2h0OiAxLjU7DQogIGZvbnQtd2VpZ2h0OiBib2xkOw0KfQ0KDQo8L3N0eWxlPg0KDQo=