library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
library(caTools)
## Warning: package 'caTools' was built under R version 3.4.4
library(Matrix)
library(rpart)
library(rpart.plot)
## Warning: package 'rpart.plot' was built under R version 3.4.4
library(caret)
## Warning: package 'caret' was built under R version 3.4.4
## Loading required package: lattice
library(doParallel)
## Warning: package 'doParallel' was built under R version 3.4.4
## Loading required package: foreach
## Warning: package 'foreach' was built under R version 3.4.4
## Loading required package: iterators
## Warning: package 'iterators' was built under R version 3.4.4
## Loading required package: parallel

老師原本模型 取 log 分成 TR2 & TS2

rm(list=ls(all=TRUE))
load("data/tf2.rdata")
A2 = subset(A, A$buy) %>% mutate_at(c("m","rev","amount"), log10)
TR2 = subset(A2, spl2)
TS2 = subset(A2, !spl2)
lm1 = lm(amount ~ ., TR2[,c(2:6,8:10)])
pred =  predict(lm1, TS2)
r2.tr = summary(lm1)$r.sq
SST = sum((TS2$amount - mean(TR2$amount))^ 2)
SSE = sum((predict(lm1, TS2) -  TS2$amount)^2)
r2.ts = 1 - (SSE/SST)
c(r2.tr, r2.ts)
## [1] 0.2909908 0.2575966

嘗試增加變數,再分TR2 & TS2

A2$aa = A2$f*A2$m^2*A2$r^2
A2$uu = A2$f*A2$r
A2$dd = A2$m*A2$r^2
A2$cc = A2$s*A2$f
A2$bb = A2$s*A2$m^0.5
A2$vv = A2$m*A2$f^4
A2$ii = A2$f*A2$f^0.5
A2$pp = A2$r*A2$r^2
A2$jj = (A2$s-A2$r)^2*A2$m^2
A2$FF = (A2$s-A2$r)^2*A2$f*A2$s^3
TR2 = subset(A2, spl2)
TS2 = subset(A2, !spl2)
cx=c(2:10, 12:21)
colnames(TR2[,cx])
##  [1] "r"      "s"      "f"      "m"      "rev"    "raw"    "age"   
##  [8] "area"   "amount" "aa"     "uu"     "dd"     "cc"     "bb"    
## [15] "vv"     "ii"     "pp"     "jj"     "FF"
lm1 = lm(amount ~ ., TR2[,cx])
summary(lm1)
## 
## Call:
## lm(formula = amount ~ ., data = TR2[, cx])
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.83854 -0.22761  0.04917  0.27798  1.51672 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1.816e+00  1.064e-01  17.060  < 2e-16 ***
## r            5.197e-03  1.141e-03   4.556 5.29e-06 ***
## s           -2.449e-02  3.093e-03  -7.917 2.72e-15 ***
## f            2.419e-02  3.619e-02   0.668  0.50387    
## m            2.716e-01  1.224e-01   2.220  0.02647 *  
## rev          3.381e-02  1.195e-01   0.283  0.77729    
## raw          4.281e-05  9.955e-06   4.301 1.72e-05 ***
## ageB         7.397e-02  2.490e-02   2.971  0.00298 ** 
## ageC         1.186e-01  2.285e-02   5.191 2.14e-07 ***
## ageD         1.235e-01  2.254e-02   5.478 4.43e-08 ***
## ageE         1.323e-01  2.305e-02   5.738 9.86e-09 ***
## ageF         1.065e-01  2.405e-02   4.429 9.56e-06 ***
## ageG         7.922e-02  2.625e-02   3.018  0.00255 ** 
## ageH         7.100e-02  3.096e-02   2.293  0.02185 *  
## ageI         7.207e-02  3.181e-02   2.266  0.02348 *  
## ageJ        -2.037e-02  2.797e-02  -0.728  0.46651    
## ageK         1.126e-01  3.926e-02   2.867  0.00415 ** 
## areaB        8.270e-02  4.312e-02   1.918  0.05513 .  
## areaC        4.041e-02  3.502e-02   1.154  0.24859    
## areaD       -9.158e-03  3.682e-02  -0.249  0.80359    
## areaE        6.240e-03  3.228e-02   0.193  0.84670    
## areaF        1.429e-02  3.250e-02   0.440  0.66027    
## areaG        2.310e-02  3.463e-02   0.667  0.50479    
## areaH        1.376e-02  3.854e-02   0.357  0.72100    
## aa           6.068e-07  5.277e-07   1.150  0.25023    
## uu          -1.264e-03  2.936e-04  -4.306 1.68e-05 ***
## dd          -3.612e-05  7.385e-06  -4.891 1.02e-06 ***
## cc           7.126e-04  3.405e-04   2.093  0.03639 *  
## bb           1.417e-02  1.976e-03   7.173 7.92e-13 ***
## vv           2.259e-08  1.662e-08   1.360  0.17401    
## ii          -8.650e-03  3.033e-03  -2.852  0.00436 ** 
## pp           7.185e-07  1.653e-07   4.345 1.41e-05 ***
## jj          -1.670e-06  1.089e-06  -1.534  0.12502    
## FF          -2.082e-12  1.674e-12  -1.244  0.21351    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4174 on 9235 degrees of freedom
## Multiple R-squared:  0.3058, Adjusted R-squared:  0.3033 
## F-statistic: 123.3 on 33 and 9235 DF,  p-value: < 2.2e-16
r2.tr = summary(lm1)$r.sq
SST = sum((TS2$amount - mean(TR2$amount))^ 2)
SSE = sum((predict(lm1, TS2) -  TS2$amount)^2)
r2.ts = 1 - (SSE/SST)
c(r2.tr, r2.ts)
## [1] 0.3057826 0.2775772
#0.2772139

加入變數

library(lubridate)
## Warning: package 'lubridate' was built under R version 3.4.4
## 
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
## 
##     date
#顧客在1月的消費
Jan = filter(X, month(date)==1 ) %>% 
  group_by(cust) %>% 
  summarise(
    amount_m1 = sum(total),
    items_m1=sum(items),
    pieces_m1=sum(pieces),
    gross_m1=sum(gross),
    price_m1=sum(gross)
  ) 
head(Jan)
## # A tibble: 6 x 6
##    cust amount_m1 items_m1 pieces_m1 gross_m1 price_m1
##   <int>     <int>    <int>     <int>    <int>    <int>
## 1  1069       971        3         3      100      100
## 2  1113       628        7         9      112      112
## 3  1823       433        3         3       92       92
## 4  2189      4978       21        52      881      881
## 5  4282       796        7         7      151      151
## 6  4978      2445       23        25      551      551

加入變數到A2

A2 = merge(A2, Jan, by="cust", all.x=T)
head(A2)
##   cust  r  s f        m      rev  raw age area   amount  buy        aa  uu
## 1 1069 11 80 2 2.762679 3.063709  129   K    E 2.895423 TRUE  1847.039  22
## 2 3667 37 55 2 3.376486 3.677516  351   K    G 3.195900 TRUE 31214.995  74
## 3 5241 16 47 3 3.011570 3.488692  675   D    F 2.498311 TRUE  6965.419  48
## 4 5517 40 54 2 3.395763 3.696793  793   G    E 3.463146 TRUE 36899.862  80
## 5 6668 19 82 7 2.802871 3.647969  437   D    E 3.055378 TRUE 19852.335 133
## 6 7795 19 64 3 3.499366 3.976488 1396   D    G 3.262451 TRUE 13261.946  57
##          dd  cc        bb         vv        ii    pp        jj          FF
## 1  334.2841 160 132.97046   44.20286  2.828427  1331 36337.822  4875264000
## 2 4622.4089 110 101.06369   54.02377  2.828427 50653  3693.812   107811000
## 3  770.9620 141  81.56322  243.93721  5.196152  4096  8715.844   299321709
## 4 5433.2209 108  99.50902   54.33221  2.828427 64000  2260.117    61725888
## 5 1011.8366 574 137.28258 6729.69428 18.520259  6859 31180.814 15318657144
## 6 1263.2712 192 119.72220  283.44867  5.196152  6859 24797.268  1592524800
##   amount_m1 items_m1 pieces_m1 gross_m1 price_m1
## 1       971        3         3      100      100
## 2        NA       NA        NA       NA       NA
## 3      2756       26        38      606      606
## 4        NA       NA        NA       NA       NA
## 5      1731        9        19      138      138
## 6      6008       28        37      960      960







填補NA

##### 用平均值填補NA
for(i in 22:26){
  mean_col <- mean(A2[, i], na.rm = T)  # mean of col ith
  na.rows <- is.na(A2[, i])   #col ith na data
  A2[na.rows, i] <- mean_col
}
head(A2)
##   cust  r  s f        m      rev  raw age area   amount  buy        aa  uu
## 1 1069 11 80 2 2.762679 3.063709  129   K    E 2.895423 TRUE  1847.039  22
## 2 3667 37 55 2 3.376486 3.677516  351   K    G 3.195900 TRUE 31214.995  74
## 3 5241 16 47 3 3.011570 3.488692  675   D    F 2.498311 TRUE  6965.419  48
## 4 5517 40 54 2 3.395763 3.696793  793   G    E 3.463146 TRUE 36899.862  80
## 5 6668 19 82 7 2.802871 3.647969  437   D    E 3.055378 TRUE 19852.335 133
## 6 7795 19 64 3 3.499366 3.976488 1396   D    G 3.262451 TRUE 13261.946  57
##          dd  cc        bb         vv        ii    pp        jj          FF
## 1  334.2841 160 132.97046   44.20286  2.828427  1331 36337.822  4875264000
## 2 4622.4089 110 101.06369   54.02377  2.828427 50653  3693.812   107811000
## 3  770.9620 141  81.56322  243.93721  5.196152  4096  8715.844   299321709
## 4 5433.2209 108  99.50902   54.33221  2.828427 64000  2260.117    61725888
## 5 1011.8366 574 137.28258 6729.69428 18.520259  6859 31180.814 15318657144
## 6 1263.2712 192 119.72220  283.44867  5.196152  6859 24797.268  1592524800
##   amount_m1 items_m1 pieces_m1 gross_m1 price_m1
## 1   971.000   3.0000   3.00000 100.0000 100.0000
## 2  1954.485  14.9203  19.72228 309.9454 309.9454
## 3  2756.000  26.0000  38.00000 606.0000 606.0000
## 4  1954.485  14.9203  19.72228 309.9454 309.9454
## 5  1731.000   9.0000  19.00000 138.0000 138.0000
## 6  6008.000  28.0000  37.00000 960.0000 960.0000

A2新增變數加入Training Testing

TR2 = subset(A2, spl2)
TS2 = subset(A2, !spl2)
cx=c(2:10, 12:23)
colnames(TR2[,cx])
##  [1] "r"         "s"         "f"         "m"         "rev"      
##  [6] "raw"       "age"       "area"      "amount"    "aa"       
## [11] "uu"        "dd"        "cc"        "bb"        "vv"       
## [16] "ii"        "pp"        "jj"        "FF"        "amount_m1"
## [21] "items_m1"
lm1 = lm(amount ~ ., TR2[,cx])
summary(lm1)
## 
## Call:
## lm(formula = amount ~ ., data = TR2[, cx])
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.85417 -0.22684  0.04689  0.27720  1.51675 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1.799e+00  1.088e-01  16.530  < 2e-16 ***
## r            5.168e-03  1.195e-03   4.325 1.54e-05 ***
## s           -2.379e-02  3.155e-03  -7.542 5.08e-14 ***
## f            1.625e-02  3.622e-02   0.449 0.653685    
## m            2.622e-01  1.223e-01   2.144 0.032080 *  
## rev          5.165e-02  1.199e-01   0.431 0.666572    
## raw          4.835e-05  1.179e-05   4.099 4.18e-05 ***
## ageB         7.350e-02  2.488e-02   2.955 0.003138 ** 
## ageC         1.181e-01  2.283e-02   5.171 2.38e-07 ***
## ageD         1.223e-01  2.253e-02   5.428 5.84e-08 ***
## ageE         1.297e-01  2.304e-02   5.631 1.85e-08 ***
## ageF         1.038e-01  2.404e-02   4.317 1.60e-05 ***
## ageG         7.767e-02  2.623e-02   2.962 0.003068 ** 
## ageH         6.980e-02  3.093e-02   2.256 0.024073 *  
## ageI         7.282e-02  3.179e-02   2.291 0.021989 *  
## ageJ        -2.116e-02  2.795e-02  -0.757 0.449003    
## ageK         1.127e-01  3.923e-02   2.873 0.004072 ** 
## areaB        8.299e-02  4.308e-02   1.927 0.054070 .  
## areaC        3.967e-02  3.499e-02   1.134 0.257031    
## areaD       -8.264e-03  3.679e-02  -0.225 0.822295    
## areaE        4.430e-03  3.225e-02   0.137 0.890744    
## areaF        1.264e-02  3.247e-02   0.389 0.697022    
## areaG        2.361e-02  3.460e-02   0.682 0.494948    
## areaH        1.404e-02  3.851e-02   0.365 0.715413    
## aa           5.373e-07  5.281e-07   1.018 0.308913    
## uu          -1.225e-03  2.957e-04  -4.144 3.45e-05 ***
## dd          -3.543e-05  7.381e-06  -4.800 1.61e-06 ***
## cc           7.364e-04  3.409e-04   2.161 0.030759 *  
## bb           1.366e-02  2.033e-03   6.721 1.92e-11 ***
## vv           1.286e-08  1.676e-08   0.767 0.442955    
## ii          -7.657e-03  3.044e-03  -2.515 0.011919 *  
## pp           7.111e-07  1.655e-07   4.296 1.76e-05 ***
## jj          -1.459e-06  1.115e-06  -1.309 0.190518    
## FF          -2.299e-12  1.673e-12  -1.374 0.169327    
## amount_m1   -1.679e-05  4.897e-06  -3.428 0.000611 ***
## items_m1     2.571e-03  6.205e-04   4.143 3.46e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.417 on 9233 degrees of freedom
## Multiple R-squared:  0.3072, Adjusted R-squared:  0.3045 
## F-statistic:   117 on 35 and 9233 DF,  p-value: < 2.2e-16
r2.tr = summary(lm1)$r.sq
SST = sum((TS2$amount - mean(TR2$amount))^ 2)
SSE = sum((predict(lm1, TS2) -  TS2$amount)^2)
r2.ts = 1 - (SSE/SST)
c(r2.tr, r2.ts)
## [1] 0.3071653 0.2792819
#0.2793

2.嘗試先集群再LM

LTR = TR2[,c(2,4,5)]   ##r,f,m
LTS = TS2[,c(2,4,5)]
library(caret)
preproc = preProcess(LTR)
NTR = predict(preproc, LTR) ##標準化資料
NTS = predict(preproc, LTS)
km <- kmeans(NTR,5)
library(flexclust)
## Warning: package 'flexclust' was built under R version 3.4.4
## Loading required package: grid
## Loading required package: modeltools
## Loading required package: stats4
km.kcca = as.kcca(km,NTR)  ##拿TR2做分群
## Found more than one class "kcca" in cache; using the first, from namespace 'kernlab'
## Also defined by 'flexclust'
## Found more than one class "kcca" in cache; using the first, from namespace 'kernlab'
## Also defined by 'flexclust'
CTR = predict(km.kcca)     
## Found more than one class "kcca" in cache; using the first, from namespace 'kernlab'
## Also defined by 'flexclust'
CTS = predict(km.kcca, newdata=NTS)   ##預測TS2群

拿分群做回歸

apple = split(TR2, CTR)
M = lapply(1:5, function(x) 
  lm(amount ~ ., data = apple[[x]][,c(2:7,10,12:23)]))

預測log(amount)

Pred = lapply(1:5, function(i) 
  predict(M[[i]], TS2[CTS==i,]) )
## Warning in predict.lm(M[[i]], TS2[CTS == i, ]): prediction from a rank-
## deficient fit may be misleading
t = do.call(c, split(TS2$amount,CTS))
y = do.call(c, Pred)

計算R^2

SST = sum((TS2$amount - mean(TR2$amount))^ 2)
SSE = sum((y - t)^2)
r2.ts = 1 - (SSE/SST)
r2.ts
## [1] 0.2598314
cor(TR2[,c(2:7,10,12:23)])
##                     r           s           f            m         rev
## r          1.00000000  0.09280689 -0.40129374 -0.081126640 -0.44027271
## s          0.09280689  1.00000000  0.42269893 -0.038503261  0.37382516
## f         -0.40129374  0.42269893  1.00000000 -0.052591356  0.53746879
## m         -0.08112664 -0.03850326 -0.05259136  1.000000000  0.73023789
## rev       -0.44027271  0.37382516  0.53746879  0.730237893  1.00000000
## raw       -0.29568266  0.27730825  0.58067392  0.444250605  0.69106801
## amount    -0.15318868  0.11573180  0.26075411  0.448832752  0.49819086
## aa         0.73885222  0.23988889 -0.15141996  0.129895002 -0.01798996
## uu         0.45421111  0.41233382  0.20091882 -0.093705354  0.10175458
## dd         0.94090159  0.13572184 -0.32432534  0.055867743 -0.29336220
## cc        -0.35813848  0.48208952  0.99335208 -0.053482978  0.52699089
## bb         0.07156626  0.97702793  0.40478867  0.160789068  0.51390410
## vv        -0.08120938  0.08234599  0.56792007 -0.008137503  0.16138921
## ii        -0.29750436  0.31169698  0.96107444 -0.049063048  0.43032686
## pp         0.89744937  0.16148585 -0.28441875 -0.076304783 -0.36618575
## jj        -0.61988527  0.61834594  0.58949999  0.220160172  0.68107758
## FF        -0.35556936  0.39572205  0.95126182 -0.042927934  0.45918225
## amount_m1 -0.07099274  0.13250769  0.40762238  0.375026779  0.47183446
## items_m1  -0.07773565  0.16806445  0.50023232  0.271614719  0.43343367
##                   raw       amount          aa           uu          dd
## r         -0.29568266 -0.153188684  0.73885222  0.454211113  0.94090159
## s          0.27730825  0.115731805  0.23988889  0.412333822  0.13572184
## f          0.58067392  0.260754113 -0.15141996  0.200918822 -0.32432534
## m          0.44425061  0.448832752  0.12989500 -0.093705354  0.05586774
## rev        0.69106801  0.498190855 -0.01798996  0.101754584 -0.29336220
## raw        1.00000000  0.421625952 -0.06731293  0.063309711 -0.21378675
## amount     0.42162595  1.000000000 -0.00180071 -0.009755721 -0.06830873
## aa        -0.06731293 -0.001800710  1.00000000  0.753791502  0.75091862
## uu         0.06330971 -0.009755721  0.75379150  1.000000000  0.35916408
## dd        -0.21378675 -0.068308730  0.75091862  0.359164076  1.00000000
## cc         0.57852509  0.259085976 -0.11901949  0.224913468 -0.28322191
## bb         0.37057984  0.208395242  0.26543145  0.384065368  0.14421903
## vv         0.36043920  0.132637142 -0.04356514  0.036113917 -0.05309565
## ii         0.54955750  0.241391578 -0.12293414  0.155115137 -0.22846113
## pp        -0.21288031 -0.108296885  0.64532873  0.303325579  0.95401175
## jj         0.59298064  0.305639127 -0.37252335 -0.119131389 -0.51814039
## FF         0.55855316  0.253255869 -0.19688190  0.066773206 -0.26928466
## amount_m1  0.74927260  0.319771668 -0.01261175  0.007903888 -0.01865575
## items_m1   0.67853640  0.311077080 -0.01669654  0.027278846 -0.02135074
##                    cc         bb           vv          ii           pp
## r         -0.35813848 0.07156626 -0.081209378 -0.29750436  0.897449375
## s          0.48208952 0.97702793  0.082345995  0.31169698  0.161485854
## f          0.99335208 0.40478867  0.567920071  0.96107444 -0.284418746
## m         -0.05348298 0.16078907 -0.008137503 -0.04906305 -0.076304783
## rev        0.52699089 0.51390410  0.161389208  0.43032686 -0.366185746
## raw        0.57852509 0.37057984  0.360439203  0.54955750 -0.212880308
## amount     0.25908598 0.20839524  0.132637142  0.24139158 -0.108296885
## aa        -0.11901949 0.26543145 -0.043565137 -0.12293414  0.645328733
## uu         0.22491347 0.38406537  0.036113917  0.15511514  0.303325579
## dd        -0.28322191 0.14421903 -0.053095651 -0.22846113  0.954011750
## cc         1.00000000 0.46279346  0.569576385  0.95804383 -0.244884522
## bb         0.46279346 1.00000000  0.079370124  0.29576266  0.137911969
## vv         0.56957638 0.07937012  1.000000000  0.73453682 -0.042572013
## ii         0.95804383 0.29576266  0.734536822  1.00000000 -0.195318561
## pp        -0.24488452 0.13791197 -0.042572013 -0.19531856  1.000000000
## jj         0.61154559 0.66406621  0.142146860  0.45316346 -0.460465063
## FF         0.96745798 0.37985761  0.617988624  0.95406941 -0.226752835
## amount_m1  0.40196061 0.20261448  0.339104065  0.42714038 -0.008194315
## items_m1   0.49531676 0.21831856  0.451695695  0.53121837 -0.008228728
##                   jj          FF    amount_m1     items_m1
## r         -0.6198853 -0.35556936 -0.070992738 -0.077735651
## s          0.6183459  0.39572205  0.132507692  0.168064445
## f          0.5895000  0.95126182  0.407622380  0.500232322
## m          0.2201602 -0.04292793  0.375026779  0.271614719
## rev        0.6810776  0.45918225  0.471834464  0.433433673
## raw        0.5929806  0.55855316  0.749272604  0.678536397
## amount     0.3056391  0.25325587  0.319771668  0.311077080
## aa        -0.3725233 -0.19688190 -0.012611751 -0.016696538
## uu        -0.1191314  0.06677321  0.007903888  0.027278846
## dd        -0.5181404 -0.26928466 -0.018655749 -0.021350742
## cc         0.6115456  0.96745798  0.401960609  0.495316755
## bb         0.6640662  0.37985761  0.202614476  0.218318564
## vv         0.1421469  0.61798862  0.339104065  0.451695695
## ii         0.4531635  0.95406941  0.427140381  0.531218371
## pp        -0.4604651 -0.22675284 -0.008194315 -0.008228728
## jj         1.0000000  0.58762774  0.333491636  0.327896928
## FF         0.5876277  1.00000000  0.411214247  0.507448063
## amount_m1  0.3334916  0.41121425  1.000000000  0.832241010
## items_m1   0.3278969  0.50744806  0.832241010  1.000000000
apple = split(TR2, CTR)
M = lapply(1:5, function(x) 
  lm(amount ~ ., data = apple[[x]][,c(2:6,10,13,14,17,18,20)]))
Pred = lapply(1:5, function(i) 
  predict(M[[i]], TS2[CTS==i,]) )
t = do.call(c, split(TS2$amount,CTS))
y = do.call(c, Pred)
SST = sum((TS2$amount - mean(TR2$amount))^ 2)
SSE = sum((y - t)^2)
r2.ts = 1 - (SSE/SST)
r2.ts
## [1] 0.2638304

交叉驗證~~

ctrl = trainControl(
  method="repeatedcv", number=10,    # 10-fold, Repeated CV
  savePredictions = "final", classProbs=TRUE,
  summaryFunction=twoClassSummary)
ctrl2 = trainControl(
  method="repeatedcv", number=10,    # 10-fold, Repeated CV
  savePredictions = "final")
ctrl$repeats = 2
set.seed(2)
cv.lm2 = train(
  amount ~ ., data=TR2[,c(2:10, 12:23)], method="lm", 
  trControl=ctrl2, metric="Rsquared",
    tuneGrid = expand.grid( intercept = seq(1,2,0.01) ) 
  )
plot(cv.lm2)

cv.lm2$results
##     intercept      RMSE  Rsquared       MAE     RMSESD RsquaredSD
## 1        1.00 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 2        1.01 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 3        1.02 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 4        1.03 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 5        1.04 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 6        1.05 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 7        1.06 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 8        1.07 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 9        1.08 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 10       1.09 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 11       1.10 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 12       1.11 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 13       1.12 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 14       1.13 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 15       1.14 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 16       1.15 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 17       1.16 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 18       1.17 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 19       1.18 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 20       1.19 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 21       1.20 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 22       1.21 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 23       1.22 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 24       1.23 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 25       1.24 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 26       1.25 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 27       1.26 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 28       1.27 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 29       1.28 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 30       1.29 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 31       1.30 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 32       1.31 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 33       1.32 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 34       1.33 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 35       1.34 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 36       1.35 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 37       1.36 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 38       1.37 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 39       1.38 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 40       1.39 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 41       1.40 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 42       1.41 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 43       1.42 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 44       1.43 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 45       1.44 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 46       1.45 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 47       1.46 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 48       1.47 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 49       1.48 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 50       1.49 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 51       1.50 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 52       1.51 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 53       1.52 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 54       1.53 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 55       1.54 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 56       1.55 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 57       1.56 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 58       1.57 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 59       1.58 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 60       1.59 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 61       1.60 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 62       1.61 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 63       1.62 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 64       1.63 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 65       1.64 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 66       1.65 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 67       1.66 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 68       1.67 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 69       1.68 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 70       1.69 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 71       1.70 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 72       1.71 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 73       1.72 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 74       1.73 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 75       1.74 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 76       1.75 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 77       1.76 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 78       1.77 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 79       1.78 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 80       1.79 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 81       1.80 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 82       1.81 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 83       1.82 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 84       1.83 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 85       1.84 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 86       1.85 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 87       1.86 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 88       1.87 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 89       1.88 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 90       1.89 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 91       1.90 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 92       1.91 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 93       1.92 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 94       1.93 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 95       1.94 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 96       1.95 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 97       1.96 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 98       1.97 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 99       1.98 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 100      1.99 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 101      2.00 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
##           MAESD
## 1   0.006147461
## 2   0.006147461
## 3   0.006147461
## 4   0.006147461
## 5   0.006147461
## 6   0.006147461
## 7   0.006147461
## 8   0.006147461
## 9   0.006147461
## 10  0.006147461
## 11  0.006147461
## 12  0.006147461
## 13  0.006147461
## 14  0.006147461
## 15  0.006147461
## 16  0.006147461
## 17  0.006147461
## 18  0.006147461
## 19  0.006147461
## 20  0.006147461
## 21  0.006147461
## 22  0.006147461
## 23  0.006147461
## 24  0.006147461
## 25  0.006147461
## 26  0.006147461
## 27  0.006147461
## 28  0.006147461
## 29  0.006147461
## 30  0.006147461
## 31  0.006147461
## 32  0.006147461
## 33  0.006147461
## 34  0.006147461
## 35  0.006147461
## 36  0.006147461
## 37  0.006147461
## 38  0.006147461
## 39  0.006147461
## 40  0.006147461
## 41  0.006147461
## 42  0.006147461
## 43  0.006147461
## 44  0.006147461
## 45  0.006147461
## 46  0.006147461
## 47  0.006147461
## 48  0.006147461
## 49  0.006147461
## 50  0.006147461
## 51  0.006147461
## 52  0.006147461
## 53  0.006147461
## 54  0.006147461
## 55  0.006147461
## 56  0.006147461
## 57  0.006147461
## 58  0.006147461
## 59  0.006147461
## 60  0.006147461
## 61  0.006147461
## 62  0.006147461
## 63  0.006147461
## 64  0.006147461
## 65  0.006147461
## 66  0.006147461
## 67  0.006147461
## 68  0.006147461
## 69  0.006147461
## 70  0.006147461
## 71  0.006147461
## 72  0.006147461
## 73  0.006147461
## 74  0.006147461
## 75  0.006147461
## 76  0.006147461
## 77  0.006147461
## 78  0.006147461
## 79  0.006147461
## 80  0.006147461
## 81  0.006147461
## 82  0.006147461
## 83  0.006147461
## 84  0.006147461
## 85  0.006147461
## 86  0.006147461
## 87  0.006147461
## 88  0.006147461
## 89  0.006147461
## 90  0.006147461
## 91  0.006147461
## 92  0.006147461
## 93  0.006147461
## 94  0.006147461
## 95  0.006147461
## 96  0.006147461
## 97  0.006147461
## 98  0.006147461
## 99  0.006147461
## 100 0.006147461
## 101 0.006147461
summary(cv.lm2)
## 
## Call:
## lm(formula = .outcome ~ ., data = dat)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.85417 -0.22684  0.04689  0.27720  1.51675 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1.799e+00  1.088e-01  16.530  < 2e-16 ***
## r            5.168e-03  1.195e-03   4.325 1.54e-05 ***
## s           -2.379e-02  3.155e-03  -7.542 5.08e-14 ***
## f            1.625e-02  3.622e-02   0.449 0.653685    
## m            2.622e-01  1.223e-01   2.144 0.032080 *  
## rev          5.165e-02  1.199e-01   0.431 0.666572    
## raw          4.835e-05  1.179e-05   4.099 4.18e-05 ***
## ageB         7.350e-02  2.488e-02   2.955 0.003138 ** 
## ageC         1.181e-01  2.283e-02   5.171 2.38e-07 ***
## ageD         1.223e-01  2.253e-02   5.428 5.84e-08 ***
## ageE         1.297e-01  2.304e-02   5.631 1.85e-08 ***
## ageF         1.038e-01  2.404e-02   4.317 1.60e-05 ***
## ageG         7.767e-02  2.623e-02   2.962 0.003068 ** 
## ageH         6.980e-02  3.093e-02   2.256 0.024073 *  
## ageI         7.282e-02  3.179e-02   2.291 0.021989 *  
## ageJ        -2.116e-02  2.795e-02  -0.757 0.449003    
## ageK         1.127e-01  3.923e-02   2.873 0.004072 ** 
## areaB        8.299e-02  4.308e-02   1.927 0.054070 .  
## areaC        3.967e-02  3.499e-02   1.134 0.257031    
## areaD       -8.264e-03  3.679e-02  -0.225 0.822295    
## areaE        4.430e-03  3.225e-02   0.137 0.890744    
## areaF        1.264e-02  3.247e-02   0.389 0.697022    
## areaG        2.361e-02  3.460e-02   0.682 0.494948    
## areaH        1.404e-02  3.851e-02   0.365 0.715413    
## aa           5.373e-07  5.281e-07   1.018 0.308913    
## uu          -1.225e-03  2.957e-04  -4.144 3.45e-05 ***
## dd          -3.543e-05  7.381e-06  -4.800 1.61e-06 ***
## cc           7.364e-04  3.409e-04   2.161 0.030759 *  
## bb           1.366e-02  2.033e-03   6.721 1.92e-11 ***
## vv           1.286e-08  1.676e-08   0.767 0.442955    
## ii          -7.657e-03  3.044e-03  -2.515 0.011919 *  
## pp           7.111e-07  1.655e-07   4.296 1.76e-05 ***
## jj          -1.459e-06  1.115e-06  -1.309 0.190518    
## FF          -2.299e-12  1.673e-12  -1.374 0.169327    
## amount_m1   -1.679e-05  4.897e-06  -3.428 0.000611 ***
## items_m1     2.571e-03  6.205e-04   4.143 3.46e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.417 on 9233 degrees of freedom
## Multiple R-squared:  0.3072, Adjusted R-squared:  0.3045 
## F-statistic:   117 on 35 and 9233 DF,  p-value: < 2.2e-16

結論

### 一般直接線性有時比群集再線性更有效
### 透過交叉驗證可以說明我們資料不只對TEST DATA 有效 ~~~