library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(caTools)
## Warning: package 'caTools' was built under R version 3.4.4
library(Matrix)
library(rpart)
library(rpart.plot)
## Warning: package 'rpart.plot' was built under R version 3.4.4
library(caret)
## Warning: package 'caret' was built under R version 3.4.4
## Loading required package: lattice
library(doParallel)
## Warning: package 'doParallel' was built under R version 3.4.4
## Loading required package: foreach
## Warning: package 'foreach' was built under R version 3.4.4
## Loading required package: iterators
## Warning: package 'iterators' was built under R version 3.4.4
## Loading required package: parallel
老師原本模型 取 log 分成 TR2 & TS2
rm(list=ls(all=TRUE))
load("data/tf2.rdata")
A2 = subset(A, A$buy) %>% mutate_at(c("m","rev","amount"), log10)
TR2 = subset(A2, spl2)
TS2 = subset(A2, !spl2)
lm1 = lm(amount ~ ., TR2[,c(2:6,8:10)])
pred = predict(lm1, TS2)
r2.tr = summary(lm1)$r.sq
SST = sum((TS2$amount - mean(TR2$amount))^ 2)
SSE = sum((predict(lm1, TS2) - TS2$amount)^2)
r2.ts = 1 - (SSE/SST)
c(r2.tr, r2.ts)
## [1] 0.2909908 0.2575966
嘗試增加變數,再分TR2 & TS2
A2$aa = A2$f*A2$m^2*A2$r^2
A2$uu = A2$f*A2$r
A2$dd = A2$m*A2$r^2
A2$cc = A2$s*A2$f
A2$bb = A2$s*A2$m^0.5
A2$vv = A2$m*A2$f^4
A2$ii = A2$f*A2$f^0.5
A2$pp = A2$r*A2$r^2
A2$jj = (A2$s-A2$r)^2*A2$m^2
A2$FF = (A2$s-A2$r)^2*A2$f*A2$s^3
TR2 = subset(A2, spl2)
TS2 = subset(A2, !spl2)
cx=c(2:10, 12:21)
colnames(TR2[,cx])
## [1] "r" "s" "f" "m" "rev" "raw" "age"
## [8] "area" "amount" "aa" "uu" "dd" "cc" "bb"
## [15] "vv" "ii" "pp" "jj" "FF"
lm1 = lm(amount ~ ., TR2[,cx])
summary(lm1)
##
## Call:
## lm(formula = amount ~ ., data = TR2[, cx])
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.83854 -0.22761 0.04917 0.27798 1.51672
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.816e+00 1.064e-01 17.060 < 2e-16 ***
## r 5.197e-03 1.141e-03 4.556 5.29e-06 ***
## s -2.449e-02 3.093e-03 -7.917 2.72e-15 ***
## f 2.419e-02 3.619e-02 0.668 0.50387
## m 2.716e-01 1.224e-01 2.220 0.02647 *
## rev 3.381e-02 1.195e-01 0.283 0.77729
## raw 4.281e-05 9.955e-06 4.301 1.72e-05 ***
## ageB 7.397e-02 2.490e-02 2.971 0.00298 **
## ageC 1.186e-01 2.285e-02 5.191 2.14e-07 ***
## ageD 1.235e-01 2.254e-02 5.478 4.43e-08 ***
## ageE 1.323e-01 2.305e-02 5.738 9.86e-09 ***
## ageF 1.065e-01 2.405e-02 4.429 9.56e-06 ***
## ageG 7.922e-02 2.625e-02 3.018 0.00255 **
## ageH 7.100e-02 3.096e-02 2.293 0.02185 *
## ageI 7.207e-02 3.181e-02 2.266 0.02348 *
## ageJ -2.037e-02 2.797e-02 -0.728 0.46651
## ageK 1.126e-01 3.926e-02 2.867 0.00415 **
## areaB 8.270e-02 4.312e-02 1.918 0.05513 .
## areaC 4.041e-02 3.502e-02 1.154 0.24859
## areaD -9.158e-03 3.682e-02 -0.249 0.80359
## areaE 6.240e-03 3.228e-02 0.193 0.84670
## areaF 1.429e-02 3.250e-02 0.440 0.66027
## areaG 2.310e-02 3.463e-02 0.667 0.50479
## areaH 1.376e-02 3.854e-02 0.357 0.72100
## aa 6.068e-07 5.277e-07 1.150 0.25023
## uu -1.264e-03 2.936e-04 -4.306 1.68e-05 ***
## dd -3.612e-05 7.385e-06 -4.891 1.02e-06 ***
## cc 7.126e-04 3.405e-04 2.093 0.03639 *
## bb 1.417e-02 1.976e-03 7.173 7.92e-13 ***
## vv 2.259e-08 1.662e-08 1.360 0.17401
## ii -8.650e-03 3.033e-03 -2.852 0.00436 **
## pp 7.185e-07 1.653e-07 4.345 1.41e-05 ***
## jj -1.670e-06 1.089e-06 -1.534 0.12502
## FF -2.082e-12 1.674e-12 -1.244 0.21351
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4174 on 9235 degrees of freedom
## Multiple R-squared: 0.3058, Adjusted R-squared: 0.3033
## F-statistic: 123.3 on 33 and 9235 DF, p-value: < 2.2e-16
r2.tr = summary(lm1)$r.sq
SST = sum((TS2$amount - mean(TR2$amount))^ 2)
SSE = sum((predict(lm1, TS2) - TS2$amount)^2)
r2.ts = 1 - (SSE/SST)
c(r2.tr, r2.ts)
## [1] 0.3057826 0.2775772
#0.2772139
加入變數
library(lubridate)
## Warning: package 'lubridate' was built under R version 3.4.4
##
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
##
## date
#顧客在1月的消費
Jan = filter(X, month(date)==1 ) %>%
group_by(cust) %>%
summarise(
amount_m1 = sum(total),
items_m1=sum(items),
pieces_m1=sum(pieces),
gross_m1=sum(gross),
price_m1=sum(gross)
)
head(Jan)
## # A tibble: 6 x 6
## cust amount_m1 items_m1 pieces_m1 gross_m1 price_m1
## <int> <int> <int> <int> <int> <int>
## 1 1069 971 3 3 100 100
## 2 1113 628 7 9 112 112
## 3 1823 433 3 3 92 92
## 4 2189 4978 21 52 881 881
## 5 4282 796 7 7 151 151
## 6 4978 2445 23 25 551 551
加入變數到A2
A2 = merge(A2, Jan, by="cust", all.x=T)
head(A2)
## cust r s f m rev raw age area amount buy aa uu
## 1 1069 11 80 2 2.762679 3.063709 129 K E 2.895423 TRUE 1847.039 22
## 2 3667 37 55 2 3.376486 3.677516 351 K G 3.195900 TRUE 31214.995 74
## 3 5241 16 47 3 3.011570 3.488692 675 D F 2.498311 TRUE 6965.419 48
## 4 5517 40 54 2 3.395763 3.696793 793 G E 3.463146 TRUE 36899.862 80
## 5 6668 19 82 7 2.802871 3.647969 437 D E 3.055378 TRUE 19852.335 133
## 6 7795 19 64 3 3.499366 3.976488 1396 D G 3.262451 TRUE 13261.946 57
## dd cc bb vv ii pp jj FF
## 1 334.2841 160 132.97046 44.20286 2.828427 1331 36337.822 4875264000
## 2 4622.4089 110 101.06369 54.02377 2.828427 50653 3693.812 107811000
## 3 770.9620 141 81.56322 243.93721 5.196152 4096 8715.844 299321709
## 4 5433.2209 108 99.50902 54.33221 2.828427 64000 2260.117 61725888
## 5 1011.8366 574 137.28258 6729.69428 18.520259 6859 31180.814 15318657144
## 6 1263.2712 192 119.72220 283.44867 5.196152 6859 24797.268 1592524800
## amount_m1 items_m1 pieces_m1 gross_m1 price_m1
## 1 971 3 3 100 100
## 2 NA NA NA NA NA
## 3 2756 26 38 606 606
## 4 NA NA NA NA NA
## 5 1731 9 19 138 138
## 6 6008 28 37 960 960
填補NA
##### 用平均值填補NA
for(i in 22:26){
mean_col <- mean(A2[, i], na.rm = T) # mean of col ith
na.rows <- is.na(A2[, i]) #col ith na data
A2[na.rows, i] <- mean_col
}
head(A2)
## cust r s f m rev raw age area amount buy aa uu
## 1 1069 11 80 2 2.762679 3.063709 129 K E 2.895423 TRUE 1847.039 22
## 2 3667 37 55 2 3.376486 3.677516 351 K G 3.195900 TRUE 31214.995 74
## 3 5241 16 47 3 3.011570 3.488692 675 D F 2.498311 TRUE 6965.419 48
## 4 5517 40 54 2 3.395763 3.696793 793 G E 3.463146 TRUE 36899.862 80
## 5 6668 19 82 7 2.802871 3.647969 437 D E 3.055378 TRUE 19852.335 133
## 6 7795 19 64 3 3.499366 3.976488 1396 D G 3.262451 TRUE 13261.946 57
## dd cc bb vv ii pp jj FF
## 1 334.2841 160 132.97046 44.20286 2.828427 1331 36337.822 4875264000
## 2 4622.4089 110 101.06369 54.02377 2.828427 50653 3693.812 107811000
## 3 770.9620 141 81.56322 243.93721 5.196152 4096 8715.844 299321709
## 4 5433.2209 108 99.50902 54.33221 2.828427 64000 2260.117 61725888
## 5 1011.8366 574 137.28258 6729.69428 18.520259 6859 31180.814 15318657144
## 6 1263.2712 192 119.72220 283.44867 5.196152 6859 24797.268 1592524800
## amount_m1 items_m1 pieces_m1 gross_m1 price_m1
## 1 971.000 3.0000 3.00000 100.0000 100.0000
## 2 1954.485 14.9203 19.72228 309.9454 309.9454
## 3 2756.000 26.0000 38.00000 606.0000 606.0000
## 4 1954.485 14.9203 19.72228 309.9454 309.9454
## 5 1731.000 9.0000 19.00000 138.0000 138.0000
## 6 6008.000 28.0000 37.00000 960.0000 960.0000
A2新增變數加入Training Testing
TR2 = subset(A2, spl2)
TS2 = subset(A2, !spl2)
cx=c(2:10, 12:23)
colnames(TR2[,cx])
## [1] "r" "s" "f" "m" "rev"
## [6] "raw" "age" "area" "amount" "aa"
## [11] "uu" "dd" "cc" "bb" "vv"
## [16] "ii" "pp" "jj" "FF" "amount_m1"
## [21] "items_m1"
lm1 = lm(amount ~ ., TR2[,cx])
summary(lm1)
##
## Call:
## lm(formula = amount ~ ., data = TR2[, cx])
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.85417 -0.22684 0.04689 0.27720 1.51675
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.799e+00 1.088e-01 16.530 < 2e-16 ***
## r 5.168e-03 1.195e-03 4.325 1.54e-05 ***
## s -2.379e-02 3.155e-03 -7.542 5.08e-14 ***
## f 1.625e-02 3.622e-02 0.449 0.653685
## m 2.622e-01 1.223e-01 2.144 0.032080 *
## rev 5.165e-02 1.199e-01 0.431 0.666572
## raw 4.835e-05 1.179e-05 4.099 4.18e-05 ***
## ageB 7.350e-02 2.488e-02 2.955 0.003138 **
## ageC 1.181e-01 2.283e-02 5.171 2.38e-07 ***
## ageD 1.223e-01 2.253e-02 5.428 5.84e-08 ***
## ageE 1.297e-01 2.304e-02 5.631 1.85e-08 ***
## ageF 1.038e-01 2.404e-02 4.317 1.60e-05 ***
## ageG 7.767e-02 2.623e-02 2.962 0.003068 **
## ageH 6.980e-02 3.093e-02 2.256 0.024073 *
## ageI 7.282e-02 3.179e-02 2.291 0.021989 *
## ageJ -2.116e-02 2.795e-02 -0.757 0.449003
## ageK 1.127e-01 3.923e-02 2.873 0.004072 **
## areaB 8.299e-02 4.308e-02 1.927 0.054070 .
## areaC 3.967e-02 3.499e-02 1.134 0.257031
## areaD -8.264e-03 3.679e-02 -0.225 0.822295
## areaE 4.430e-03 3.225e-02 0.137 0.890744
## areaF 1.264e-02 3.247e-02 0.389 0.697022
## areaG 2.361e-02 3.460e-02 0.682 0.494948
## areaH 1.404e-02 3.851e-02 0.365 0.715413
## aa 5.373e-07 5.281e-07 1.018 0.308913
## uu -1.225e-03 2.957e-04 -4.144 3.45e-05 ***
## dd -3.543e-05 7.381e-06 -4.800 1.61e-06 ***
## cc 7.364e-04 3.409e-04 2.161 0.030759 *
## bb 1.366e-02 2.033e-03 6.721 1.92e-11 ***
## vv 1.286e-08 1.676e-08 0.767 0.442955
## ii -7.657e-03 3.044e-03 -2.515 0.011919 *
## pp 7.111e-07 1.655e-07 4.296 1.76e-05 ***
## jj -1.459e-06 1.115e-06 -1.309 0.190518
## FF -2.299e-12 1.673e-12 -1.374 0.169327
## amount_m1 -1.679e-05 4.897e-06 -3.428 0.000611 ***
## items_m1 2.571e-03 6.205e-04 4.143 3.46e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.417 on 9233 degrees of freedom
## Multiple R-squared: 0.3072, Adjusted R-squared: 0.3045
## F-statistic: 117 on 35 and 9233 DF, p-value: < 2.2e-16
r2.tr = summary(lm1)$r.sq
SST = sum((TS2$amount - mean(TR2$amount))^ 2)
SSE = sum((predict(lm1, TS2) - TS2$amount)^2)
r2.ts = 1 - (SSE/SST)
c(r2.tr, r2.ts)
## [1] 0.3071653 0.2792819
#0.2793
2.嘗試先集群再LM
LTR = TR2[,c(2,4,5)] ##r,f,m
LTS = TS2[,c(2,4,5)]
library(caret)
preproc = preProcess(LTR)
NTR = predict(preproc, LTR) ##標準化資料
NTS = predict(preproc, LTS)
km <- kmeans(NTR,5)
library(flexclust)
## Warning: package 'flexclust' was built under R version 3.4.4
## Loading required package: grid
## Loading required package: modeltools
## Loading required package: stats4
km.kcca = as.kcca(km,NTR) ##拿TR2做分群
## Found more than one class "kcca" in cache; using the first, from namespace 'kernlab'
## Also defined by 'flexclust'
## Found more than one class "kcca" in cache; using the first, from namespace 'kernlab'
## Also defined by 'flexclust'
CTR = predict(km.kcca)
## Found more than one class "kcca" in cache; using the first, from namespace 'kernlab'
## Also defined by 'flexclust'
CTS = predict(km.kcca, newdata=NTS) ##預測TS2群
拿分群做回歸
apple = split(TR2, CTR)
M = lapply(1:5, function(x)
lm(amount ~ ., data = apple[[x]][,c(2:7,10,12:23)]))
預測log(amount)
Pred = lapply(1:5, function(i)
predict(M[[i]], TS2[CTS==i,]) )
## Warning in predict.lm(M[[i]], TS2[CTS == i, ]): prediction from a rank-
## deficient fit may be misleading
t = do.call(c, split(TS2$amount,CTS))
y = do.call(c, Pred)
計算R^2
SST = sum((TS2$amount - mean(TR2$amount))^ 2)
SSE = sum((y - t)^2)
r2.ts = 1 - (SSE/SST)
r2.ts
## [1] 0.2598314
cor(TR2[,c(2:7,10,12:23)])
## r s f m rev
## r 1.00000000 0.09280689 -0.40129374 -0.081126640 -0.44027271
## s 0.09280689 1.00000000 0.42269893 -0.038503261 0.37382516
## f -0.40129374 0.42269893 1.00000000 -0.052591356 0.53746879
## m -0.08112664 -0.03850326 -0.05259136 1.000000000 0.73023789
## rev -0.44027271 0.37382516 0.53746879 0.730237893 1.00000000
## raw -0.29568266 0.27730825 0.58067392 0.444250605 0.69106801
## amount -0.15318868 0.11573180 0.26075411 0.448832752 0.49819086
## aa 0.73885222 0.23988889 -0.15141996 0.129895002 -0.01798996
## uu 0.45421111 0.41233382 0.20091882 -0.093705354 0.10175458
## dd 0.94090159 0.13572184 -0.32432534 0.055867743 -0.29336220
## cc -0.35813848 0.48208952 0.99335208 -0.053482978 0.52699089
## bb 0.07156626 0.97702793 0.40478867 0.160789068 0.51390410
## vv -0.08120938 0.08234599 0.56792007 -0.008137503 0.16138921
## ii -0.29750436 0.31169698 0.96107444 -0.049063048 0.43032686
## pp 0.89744937 0.16148585 -0.28441875 -0.076304783 -0.36618575
## jj -0.61988527 0.61834594 0.58949999 0.220160172 0.68107758
## FF -0.35556936 0.39572205 0.95126182 -0.042927934 0.45918225
## amount_m1 -0.07099274 0.13250769 0.40762238 0.375026779 0.47183446
## items_m1 -0.07773565 0.16806445 0.50023232 0.271614719 0.43343367
## raw amount aa uu dd
## r -0.29568266 -0.153188684 0.73885222 0.454211113 0.94090159
## s 0.27730825 0.115731805 0.23988889 0.412333822 0.13572184
## f 0.58067392 0.260754113 -0.15141996 0.200918822 -0.32432534
## m 0.44425061 0.448832752 0.12989500 -0.093705354 0.05586774
## rev 0.69106801 0.498190855 -0.01798996 0.101754584 -0.29336220
## raw 1.00000000 0.421625952 -0.06731293 0.063309711 -0.21378675
## amount 0.42162595 1.000000000 -0.00180071 -0.009755721 -0.06830873
## aa -0.06731293 -0.001800710 1.00000000 0.753791502 0.75091862
## uu 0.06330971 -0.009755721 0.75379150 1.000000000 0.35916408
## dd -0.21378675 -0.068308730 0.75091862 0.359164076 1.00000000
## cc 0.57852509 0.259085976 -0.11901949 0.224913468 -0.28322191
## bb 0.37057984 0.208395242 0.26543145 0.384065368 0.14421903
## vv 0.36043920 0.132637142 -0.04356514 0.036113917 -0.05309565
## ii 0.54955750 0.241391578 -0.12293414 0.155115137 -0.22846113
## pp -0.21288031 -0.108296885 0.64532873 0.303325579 0.95401175
## jj 0.59298064 0.305639127 -0.37252335 -0.119131389 -0.51814039
## FF 0.55855316 0.253255869 -0.19688190 0.066773206 -0.26928466
## amount_m1 0.74927260 0.319771668 -0.01261175 0.007903888 -0.01865575
## items_m1 0.67853640 0.311077080 -0.01669654 0.027278846 -0.02135074
## cc bb vv ii pp
## r -0.35813848 0.07156626 -0.081209378 -0.29750436 0.897449375
## s 0.48208952 0.97702793 0.082345995 0.31169698 0.161485854
## f 0.99335208 0.40478867 0.567920071 0.96107444 -0.284418746
## m -0.05348298 0.16078907 -0.008137503 -0.04906305 -0.076304783
## rev 0.52699089 0.51390410 0.161389208 0.43032686 -0.366185746
## raw 0.57852509 0.37057984 0.360439203 0.54955750 -0.212880308
## amount 0.25908598 0.20839524 0.132637142 0.24139158 -0.108296885
## aa -0.11901949 0.26543145 -0.043565137 -0.12293414 0.645328733
## uu 0.22491347 0.38406537 0.036113917 0.15511514 0.303325579
## dd -0.28322191 0.14421903 -0.053095651 -0.22846113 0.954011750
## cc 1.00000000 0.46279346 0.569576385 0.95804383 -0.244884522
## bb 0.46279346 1.00000000 0.079370124 0.29576266 0.137911969
## vv 0.56957638 0.07937012 1.000000000 0.73453682 -0.042572013
## ii 0.95804383 0.29576266 0.734536822 1.00000000 -0.195318561
## pp -0.24488452 0.13791197 -0.042572013 -0.19531856 1.000000000
## jj 0.61154559 0.66406621 0.142146860 0.45316346 -0.460465063
## FF 0.96745798 0.37985761 0.617988624 0.95406941 -0.226752835
## amount_m1 0.40196061 0.20261448 0.339104065 0.42714038 -0.008194315
## items_m1 0.49531676 0.21831856 0.451695695 0.53121837 -0.008228728
## jj FF amount_m1 items_m1
## r -0.6198853 -0.35556936 -0.070992738 -0.077735651
## s 0.6183459 0.39572205 0.132507692 0.168064445
## f 0.5895000 0.95126182 0.407622380 0.500232322
## m 0.2201602 -0.04292793 0.375026779 0.271614719
## rev 0.6810776 0.45918225 0.471834464 0.433433673
## raw 0.5929806 0.55855316 0.749272604 0.678536397
## amount 0.3056391 0.25325587 0.319771668 0.311077080
## aa -0.3725233 -0.19688190 -0.012611751 -0.016696538
## uu -0.1191314 0.06677321 0.007903888 0.027278846
## dd -0.5181404 -0.26928466 -0.018655749 -0.021350742
## cc 0.6115456 0.96745798 0.401960609 0.495316755
## bb 0.6640662 0.37985761 0.202614476 0.218318564
## vv 0.1421469 0.61798862 0.339104065 0.451695695
## ii 0.4531635 0.95406941 0.427140381 0.531218371
## pp -0.4604651 -0.22675284 -0.008194315 -0.008228728
## jj 1.0000000 0.58762774 0.333491636 0.327896928
## FF 0.5876277 1.00000000 0.411214247 0.507448063
## amount_m1 0.3334916 0.41121425 1.000000000 0.832241010
## items_m1 0.3278969 0.50744806 0.832241010 1.000000000
apple = split(TR2, CTR)
M = lapply(1:5, function(x)
lm(amount ~ ., data = apple[[x]][,c(2:6,10,13,14,17,18,20)]))
Pred = lapply(1:5, function(i)
predict(M[[i]], TS2[CTS==i,]) )
t = do.call(c, split(TS2$amount,CTS))
y = do.call(c, Pred)
SST = sum((TS2$amount - mean(TR2$amount))^ 2)
SSE = sum((y - t)^2)
r2.ts = 1 - (SSE/SST)
r2.ts
## [1] 0.2638304
交叉驗證~~
ctrl = trainControl(
method="repeatedcv", number=10, # 10-fold, Repeated CV
savePredictions = "final", classProbs=TRUE,
summaryFunction=twoClassSummary)
ctrl2 = trainControl(
method="repeatedcv", number=10, # 10-fold, Repeated CV
savePredictions = "final")
ctrl$repeats = 2
set.seed(2)
cv.lm2 = train(
amount ~ ., data=TR2[,c(2:10, 12:23)], method="lm",
trControl=ctrl2, metric="Rsquared",
tuneGrid = expand.grid( intercept = seq(1,2,0.01) )
)
plot(cv.lm2)

cv.lm2$results
## intercept RMSE Rsquared MAE RMSESD RsquaredSD
## 1 1.00 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 2 1.01 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 3 1.02 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 4 1.03 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 5 1.04 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 6 1.05 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 7 1.06 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 8 1.07 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 9 1.08 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 10 1.09 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 11 1.10 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 12 1.11 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 13 1.12 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 14 1.13 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 15 1.14 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 16 1.15 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 17 1.16 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 18 1.17 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 19 1.18 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 20 1.19 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 21 1.20 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 22 1.21 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 23 1.22 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 24 1.23 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 25 1.24 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 26 1.25 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 27 1.26 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 28 1.27 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 29 1.28 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 30 1.29 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 31 1.30 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 32 1.31 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 33 1.32 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 34 1.33 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 35 1.34 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 36 1.35 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 37 1.36 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 38 1.37 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 39 1.38 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 40 1.39 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 41 1.40 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 42 1.41 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 43 1.42 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 44 1.43 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 45 1.44 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 46 1.45 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 47 1.46 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 48 1.47 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 49 1.48 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 50 1.49 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 51 1.50 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 52 1.51 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 53 1.52 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 54 1.53 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 55 1.54 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 56 1.55 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 57 1.56 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 58 1.57 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 59 1.58 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 60 1.59 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 61 1.60 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 62 1.61 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 63 1.62 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 64 1.63 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 65 1.64 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 66 1.65 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 67 1.66 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 68 1.67 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 69 1.68 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 70 1.69 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 71 1.70 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 72 1.71 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 73 1.72 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 74 1.73 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 75 1.74 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 76 1.75 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 77 1.76 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 78 1.77 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 79 1.78 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 80 1.79 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 81 1.80 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 82 1.81 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 83 1.82 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 84 1.83 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 85 1.84 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 86 1.85 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 87 1.86 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 88 1.87 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 89 1.88 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 90 1.89 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 91 1.90 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 92 1.91 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 93 1.92 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 94 1.93 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 95 1.94 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 96 1.95 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 97 1.96 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 98 1.97 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 99 1.98 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 100 1.99 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## 101 2.00 0.4177955 0.3031369 0.3213384 0.01144972 0.02721422
## MAESD
## 1 0.006147461
## 2 0.006147461
## 3 0.006147461
## 4 0.006147461
## 5 0.006147461
## 6 0.006147461
## 7 0.006147461
## 8 0.006147461
## 9 0.006147461
## 10 0.006147461
## 11 0.006147461
## 12 0.006147461
## 13 0.006147461
## 14 0.006147461
## 15 0.006147461
## 16 0.006147461
## 17 0.006147461
## 18 0.006147461
## 19 0.006147461
## 20 0.006147461
## 21 0.006147461
## 22 0.006147461
## 23 0.006147461
## 24 0.006147461
## 25 0.006147461
## 26 0.006147461
## 27 0.006147461
## 28 0.006147461
## 29 0.006147461
## 30 0.006147461
## 31 0.006147461
## 32 0.006147461
## 33 0.006147461
## 34 0.006147461
## 35 0.006147461
## 36 0.006147461
## 37 0.006147461
## 38 0.006147461
## 39 0.006147461
## 40 0.006147461
## 41 0.006147461
## 42 0.006147461
## 43 0.006147461
## 44 0.006147461
## 45 0.006147461
## 46 0.006147461
## 47 0.006147461
## 48 0.006147461
## 49 0.006147461
## 50 0.006147461
## 51 0.006147461
## 52 0.006147461
## 53 0.006147461
## 54 0.006147461
## 55 0.006147461
## 56 0.006147461
## 57 0.006147461
## 58 0.006147461
## 59 0.006147461
## 60 0.006147461
## 61 0.006147461
## 62 0.006147461
## 63 0.006147461
## 64 0.006147461
## 65 0.006147461
## 66 0.006147461
## 67 0.006147461
## 68 0.006147461
## 69 0.006147461
## 70 0.006147461
## 71 0.006147461
## 72 0.006147461
## 73 0.006147461
## 74 0.006147461
## 75 0.006147461
## 76 0.006147461
## 77 0.006147461
## 78 0.006147461
## 79 0.006147461
## 80 0.006147461
## 81 0.006147461
## 82 0.006147461
## 83 0.006147461
## 84 0.006147461
## 85 0.006147461
## 86 0.006147461
## 87 0.006147461
## 88 0.006147461
## 89 0.006147461
## 90 0.006147461
## 91 0.006147461
## 92 0.006147461
## 93 0.006147461
## 94 0.006147461
## 95 0.006147461
## 96 0.006147461
## 97 0.006147461
## 98 0.006147461
## 99 0.006147461
## 100 0.006147461
## 101 0.006147461
summary(cv.lm2)
##
## Call:
## lm(formula = .outcome ~ ., data = dat)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.85417 -0.22684 0.04689 0.27720 1.51675
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.799e+00 1.088e-01 16.530 < 2e-16 ***
## r 5.168e-03 1.195e-03 4.325 1.54e-05 ***
## s -2.379e-02 3.155e-03 -7.542 5.08e-14 ***
## f 1.625e-02 3.622e-02 0.449 0.653685
## m 2.622e-01 1.223e-01 2.144 0.032080 *
## rev 5.165e-02 1.199e-01 0.431 0.666572
## raw 4.835e-05 1.179e-05 4.099 4.18e-05 ***
## ageB 7.350e-02 2.488e-02 2.955 0.003138 **
## ageC 1.181e-01 2.283e-02 5.171 2.38e-07 ***
## ageD 1.223e-01 2.253e-02 5.428 5.84e-08 ***
## ageE 1.297e-01 2.304e-02 5.631 1.85e-08 ***
## ageF 1.038e-01 2.404e-02 4.317 1.60e-05 ***
## ageG 7.767e-02 2.623e-02 2.962 0.003068 **
## ageH 6.980e-02 3.093e-02 2.256 0.024073 *
## ageI 7.282e-02 3.179e-02 2.291 0.021989 *
## ageJ -2.116e-02 2.795e-02 -0.757 0.449003
## ageK 1.127e-01 3.923e-02 2.873 0.004072 **
## areaB 8.299e-02 4.308e-02 1.927 0.054070 .
## areaC 3.967e-02 3.499e-02 1.134 0.257031
## areaD -8.264e-03 3.679e-02 -0.225 0.822295
## areaE 4.430e-03 3.225e-02 0.137 0.890744
## areaF 1.264e-02 3.247e-02 0.389 0.697022
## areaG 2.361e-02 3.460e-02 0.682 0.494948
## areaH 1.404e-02 3.851e-02 0.365 0.715413
## aa 5.373e-07 5.281e-07 1.018 0.308913
## uu -1.225e-03 2.957e-04 -4.144 3.45e-05 ***
## dd -3.543e-05 7.381e-06 -4.800 1.61e-06 ***
## cc 7.364e-04 3.409e-04 2.161 0.030759 *
## bb 1.366e-02 2.033e-03 6.721 1.92e-11 ***
## vv 1.286e-08 1.676e-08 0.767 0.442955
## ii -7.657e-03 3.044e-03 -2.515 0.011919 *
## pp 7.111e-07 1.655e-07 4.296 1.76e-05 ***
## jj -1.459e-06 1.115e-06 -1.309 0.190518
## FF -2.299e-12 1.673e-12 -1.374 0.169327
## amount_m1 -1.679e-05 4.897e-06 -3.428 0.000611 ***
## items_m1 2.571e-03 6.205e-04 4.143 3.46e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.417 on 9233 degrees of freedom
## Multiple R-squared: 0.3072, Adjusted R-squared: 0.3045
## F-statistic: 117 on 35 and 9233 DF, p-value: < 2.2e-16
結論
### 一般直接線性有時比群集再線性更有效
### 透過交叉驗證可以說明我們資料不只對TEST DATA 有效 ~~~