packages = c(
  "dplyr","ggplot2","googleVis","devtools","magrittr","slam","irlba","plotly",
  "arules","arulesViz","Matrix","recommenderlab")
existing = as.character(installed.packages()[,1])
for(pkg in packages[!(packages %in% existing)]) install.packages(pkg)
rm(list=ls(all=TRUE))
LOAD = TRUE
library(dplyr)
library(ggplot2)
library(googleVis)
library(Matrix)
library(slam)
library(irlba)
library(plotly)
library(arules)
library(arulesViz)
library(recommenderlab)


A. 顧客產品矩陣

Load data frame and rename

load("data/tf0.rdata")
A = A0; X = X0; Z = Z0; rm(A0,X0,Z0); gc()
##           used  (Mb) gc trigger  (Mb) max used  (Mb)
## Ncells 2536407 135.5    3886542 207.6  3205452 171.2
## Vcells 9943933  75.9   14442948 110.2  9993740  76.3
Z = subset(Z, cust %in% A$cust)
n_distinct(Z$cust)  # 32241
## [1] 32241
n_distinct(Z$prod)  # 23787
## [1] 23787

製作顧客產品矩陣其實很快、也很容易

library(Matrix)
library(slam)
cpm = xtabs(~ cust + prod, Z, sparse=T)  # customer product matrix
dim(cpm)             # 32241 23787
## [1] 32241 23787
mean(cpm > 0)        # 0.00096799
## [1] 0.0009674258

顧客產品矩陣通常是一個很稀疏的矩,陣有一些產品沒什麼人買

colSums(cpm) %>% quantile(seq(0,1,0.1))
##   0%  10%  20%  30%  40%  50%  60%  70%  80%  90% 100% 
##    1    1    2    4    6    8   13   20   35   76 8475
mean(colSums(cpm) > 10)
## [1] 0.4483541

刪去購買次數小於6的產品;為了方便顧客產品矩陣和顧客資料框的合併,我們選擇先保留沒有購買產品的顧客

cpm = cpm[, colSums(cpm) >= 6]      # remove the least frequent products
# cpm = cpm[rowSums(cpm) > 0, ]     # remove non-buying customers
cpm = cpm[, order(-colSums(cpm))]   # order product by frequency
dim(cpm)                            # 32241 23787>14621
## [1] 32241 14621
max(cpm)         # 49
## [1] 49
mean(cpm > 0)    # 0.0015248
## [1] 0.001524785
table(cpm@x) %>% prop.table %>% round(4) %>% head(10)
## 
##      1      2      3      4      5      6      7      8      9     10 
## 0.9256 0.0579 0.0108 0.0032 0.0012 0.0006 0.0003 0.0002 0.0001 0.0001

請你用一個指令列出被購買最多次的10個產品,和它們被購買的次數。

# 

■ 在什麼前提之下,我們可以把購買這十個產品的次數當作變數,用來預測顧客在下一期會不會來購買呢?
■ 我們如何把這十個變數,併入顧客資料框呢?
■ 我們可不可以(在什麼前提之下我們可以)直接用cbind()新變數併入顧客資料框呢?
■ 我們期中競賽的資料,符合直接用cbind()併入新變數的條件嗎? 我們要如何確認這一件事呢?



B. 直接以產品的被購買頻率作為變數

以產品的被購買頻率製作(顧客)變數的時候,cpm在最前邊的(N個)欄位就是變數!

B1. 以(最常被購買的)產品的購買次數對顧客分群
nop= 400  # no. product = no. variables
k = 200   # no. cluster
set.seed(111); kg = kmeans(cpm[,1:nop], k)$cluster
table(kg) %>% as.vector %>% sort
##   [1]     1     1     1     1     1     1     1     1     1     1     1
##  [12]     1     1     1     1     1     1     1     1     1     1     1
##  [23]     2     2     2     2     2     2     3     3     3     3     3
##  [34]     3     3     4     4     4     4     4     4     4     5     5
##  [45]     6     6     6     6     7     7     7     8     8     8     9
##  [56]     9     9     9    10    10    10    10    11    11    11    11
##  [67]    11    12    13    13    15    15    15    16    16    18    19
##  [78]    20    20    20    20    21    22    22    22    24    24    25
##  [89]    25    27    28    28    32    32    35    36    39    40    41
## [100]    42    44    45    46    47    47    48    49    49    50    51
## [111]    52    53    56    58    58    61    63    66    67    68    69
## [122]    69    72    81    85    85    86    87    90    94    96    97
## [133]    97   100   100   101   110   111   113   114   116   118   123
## [144]   123   126   130   134   136   141   141   142   143   162   165
## [155]   172   175   178   179   182   182   184   187   195   210   222
## [166]   225   228   228   237   239   242   253   254   258   258   266
## [177]   268   272   287   293   301   311   325   329   350   351   363
## [188]   396   407   407   410   418   432   448   473   523   561  1156
## [199]  1266 11215
B2. 各群組平均屬性

將分群結果併入顧客資料框(A)

df = A %>% inner_join(data.frame(
  cust = as.integer(rownames(cpm)), 
  kg) )
## Joining, by = "cust"
head(df)  # 32241
##   cust  r   s f      m   rev  raw age area  kg
## 1 1069 19 108 4  486.0  1944   15   K    E 196
## 2 1113 54 109 4  557.5  2230  241   K    F  81
## 3 1250 19  25 2  791.5  1583  354   D    D 169
## 4 1359 87  87 1  364.0   364  104   K    G  49
## 5 1823 36 119 3  869.0  2607  498   K    D 109
## 6 2189 57  89 2 7028.0 14056 3299   K    B 158

計算各群組的平均屬性

df = data.frame(
  aggregate(. ~ kg, df[,c(2:7,10)], mean), # averages
  size = as.vector(table(kg)),   # no. customers in the group
  dummy = 2001                   # dummy column for googleViz
  )
head(df)
##   kg         r         s         f         m       rev       raw size
## 1  1 10.772727 113.54545 13.318182  872.7878 10697.909 1733.8182   22
## 2  2 43.934959  95.96748  3.455285 1252.9938  3819.837  533.0407  123
## 3  3  5.384615 111.15385 14.307692 1325.2611 12617.769 1951.8462   13
## 4  4  8.000000 119.00000 24.000000  955.6250 22935.000 3658.0000    1
## 5  5  6.666667 116.66667 20.333333  579.9135 10741.444 1554.2222    9
## 6  6  2.500000 114.00000 23.500000  411.9918  9775.000 1324.5000    2
##   dummy
## 1  2001
## 2  2001
## 3  2001
## 4  2001
## 5  2001
## 6  2001
B3. 互動式泡泡圖
library(googleVis)
op <- options(gvis.plot.tag='chart')
plot( gvisMotionChart(
  subset(df[,c(1,4,5,6,8,2,3,7,9)], 
         size >= 20 & size <= 1000),  # range of group size 
  "kg", "dummy", options=list(width=800, height=600) ) )
## Set options back to original options
## options(op)
B4. 各群組的代表性產品 (Signature Product)
# use global variables: cpm, kg
Sig = function(gx, P=1000, H=10) {
  print(sprintf("Group %d: No. Customers = %d", gx, sum(kg==gx)))
  bx = cpm[,1:P]
  data.frame(n = col_sums(bx[kg==gx,])) %>%      # frequency
    mutate(
      share = round(100*n/col_sums(bx),2),       # %prod sold to this cluster
      conf = round(100*n/sum(kg==gx),2),         # %buy this product, given cluster
      base = round(100*col_sums(bx)/nrow(bx),2), # %buy this product, all cust 
      lift = round(conf/base,1),                 # conf/base  
      name = colnames(bx)                        # name of prod
    ) %>% arrange(desc(lift)) %>% head(H)
  }
Sig(130)
## [1] "Group 130: No. Customers = 97"
##      n share   conf  base lift          name
## 1   29  6.65  29.90  1.35 22.1 4719090106009
## 2   19  6.62  19.59  0.89 22.0 4710632001172
## 3   12  5.38  12.37  0.69 17.9 4710093080044
## 4  389  4.59 401.03 26.29 15.3 4714981010038
## 5   20  4.59  20.62  1.35 15.3 4719090105002
## 6   14  4.53  14.43  0.96 15.0 4719090106016
## 7    8  4.40   8.25  0.56 14.7 9310022862601
## 8   10  4.31  10.31  0.72 14.3 4710189820851
## 9    9  4.25   9.28  0.66 14.1 4710088434258
## 10  15  4.19  15.46  1.11 13.9 4710189851282


C. 使用尺度縮減方法抽取顧客(產品)的特徵向量

C1. 巨大尺度縮減 (SVD, Sigular Value Decomposition)
library(irlba)
if(LOAD) {
  load("data/svd2a.rdata")
} else {
  smx = cpm
  smx@x = pmin(smx@x, 2)            # cap at 2, similar to normalization  
  t0 = Sys.time()
  svd = irlba(smx, 
              nv=400,               # length of feature vector
              maxit=800, work=800)    
  print(Sys.time() - t0)            # 1.8795 mins
  save(svd, file = "data/svd2a.rdata")
}

■ 在什麼前提之下,我們可以把顧客購買產品的特徵向量當作變數,用來預測顧客在下一期會不會來購買呢?
■ 如果可以的話,我們如何把顧客購買產品的特徵向量,併入顧客資料框呢?
■ 我們可不可以(在什麼前提之下我們可以)直接用cbind()將特徵向量併入顧客資料框呢?
■ 我們期中競賽的資料,符合直接用cbind()併入特徵向量的條件嗎? 我們要如何確認這一件事呢?


C2. 依特徵向量對顧客分群
set.seed(111); kg = kmeans(svd$u, 200)$cluster
table(kg) %>% as.vector %>% sort
##   [1]    1    1    1    1    1    1    1    1    1    1    1    1    1    1
##  [15]    1    1    1    1    1    1    1    1    1    1    1    1    1    1
##  [29]    1    1    1    1    1    1    1    1    1    1    1    1    1    1
##  [43]    1    1    1    1    1    1    1    1    1    1    1    1    1    1
##  [57]    1    2    2    2    2    2    3    4    4    5    7   10   14   30
##  [71]   31   32   36   38   38   39   39   40   40   41   44   45   46   47
##  [85]   49   54   59   62   62   69   71   77   79   79   80   82   82   84
##  [99]   87   91  101  103  109  110  111  113  117  120  123  127  127  129
## [113]  132  133  134  135  136  139  141  143  143  147  147  157  159  159
## [127]  160  160  160  166  168  169  172  175  180  181  181  182  183  184
## [141]  184  188  190  190  193  194  195  196  198  198  200  201  201  202
## [155]  202  204  204  204  207  209  209  210  213  214  216  219  219  222
## [169]  225  233  234  235  236  237  237  238  239  241  248  248  248  253
## [183]  256  257  258  259  261  261  264  269  277  281  285  293  305  411
## [197]  612  896 1092 8987
C3. 互動式泡泡圖 (Google Motion Chart)
# clustster summary
df = inner_join(A, data.frame(         
  cust = as.integer(rownames(cpm)), kg)) %>% 
  group_by(kg) %>% summarise(
    avg_frequency = mean(f),
    avg_monetary = mean(m),
    avg_revenue_contr = mean(rev),
    group_size = n(),
    avg_recency = mean(r),
    avg_gross_profit = mean(raw)) %>% 
  ungroup %>% 
  mutate(dummy = 2001, kg = sprintf("G%03d",kg)) %>% 
  data.frame
## Joining, by = "cust"
# Google Motion Chart
plot( gvisMotionChart(
  subset(df, group_size >= 20 & group_size <= 1200),     
  "kg", "dummy", options=list(width=800, height=600) ) )
C4. 各群組的代表性產品 (Signature Product)
Sig(162)
## [1] "Group 162: No. Customers = 87"
##      n share   conf base  lift          name
## 1  165 38.64 189.66 1.32 143.7 4710105045313
## 2  133 35.37 152.87 1.17 130.7 4710105045320
## 3   59 13.50  67.82 1.36  49.9 4710105045450
## 4   19  9.60  21.84 0.61  35.8 4710105051147
## 5   35  8.31  40.23 1.31  30.7 4710105045443
## 6    9  5.33  10.34 0.52  19.9 4710088432650
## 7    8  4.91   9.20 0.51  18.0 4710105051185
## 8    8  4.19   9.20 0.59  15.6 4710171028326
## 9    7  3.89   8.05 0.56  14.4 4710058137059
## 10   7  3.70   8.05 0.59  13.6 4710010020061


D. 購物籃分析 Baskets Analysis

n_distinct(Z$tid)
## [1] 119407
n_distinct(Z$prod)
## [1] 23787
D1. 準備資料 (for Association Rule Analysis)

購物籃分析會使用arules這個套件,它要用的資料很容易準備,直接使用交易項目資料,一行程式就可以搞定。

library(arules)
library(arulesViz)
bx = as(split(Z$prod, Z$tid), "transactions")   
D2. Top20 熱賣產品
itemFrequencyPlot(bx, topN=20, type="absolute", cex=0.8)

D3. 關聯規則和Apriori演算法

關聯規則(A => B)

  • support: A被購買的機率 (A的基礎機率)
  • confidence: A被購買時,B被購買的機率
  • lift: A被購買時,B被購買的機率增加的倍數 (與B的基礎機率相比)
  • 一般來講support、confidence和lift越高的關聯規則越重要
  • support、confidence和lift設的越低(高),找到的關聯規則越多(少)
  • 建議一開始把標準設低,先找到多一點規則,之後再用subset篩選出特定的規則來看
rules = apriori(bx, parameter=list(supp=0.002, conf=0.6))
## Apriori
## 
## Parameter specification:
##  confidence minval smax arem  aval originalSupport maxtime support minlen
##         0.6    0.1    1 none FALSE            TRUE       5   0.002      1
##  maxlen target   ext
##      10  rules FALSE
## 
## Algorithmic control:
##  filter tree heap memopt load sort verbose
##     0.1 TRUE TRUE  FALSE TRUE    2    TRUE
## 
## Absolute minimum support count: 238 
## 
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[23787 item(s), 119407 transaction(s)] done [0.15s].
## sorting and recoding items ... [569 item(s)] done [0.01s].
## creating transaction tree ... done [0.05s].
## checking subsets of size 1 2 3 done [0.01s].
## writing ... [14 rule(s)] done [0.01s].
## creating S4 object  ... done [0.01s].
summary(rules)
## set of 14 rules
## 
## rule length distribution (lhs + rhs):sizes
## 2 3 
## 8 6 
## 
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   2.000   2.000   2.000   2.429   3.000   3.000 
## 
## summary of quality measures:
##     support           confidence          lift            count      
##  Min.   :0.002102   Min.   :0.6125   Min.   : 48.12   Min.   :251.0  
##  1st Qu.:0.002512   1st Qu.:0.6798   1st Qu.: 55.10   1st Qu.:300.0  
##  Median :0.002826   Median :0.7267   Median : 58.58   Median :337.5  
##  Mean   :0.003208   Mean   :0.7240   Mean   : 85.23   Mean   :383.1  
##  3rd Qu.:0.003335   3rd Qu.:0.7744   3rd Qu.: 97.86   3rd Qu.:398.2  
##  Max.   :0.005854   Max.   :0.8088   Max.   :170.92   Max.   :699.0  
## 
## mining info:
##  data ntransactions support confidence
##    bx        119407   0.002        0.6
D4. 檢視關聯規則

關聯規則 (A => B):

  • support: A被購買的機率 (A的基礎機率)
  • confidence: A被購買時,B被購買的機率
  • lift: A被購買時,B被購買的機率增加的倍數 (與B的基礎機率相比)
options(digits=4)
inspect(rules)
##      lhs                              rhs             support  confidence
## [1]  {4719090790017}               => {4719090790000} 0.002940 0.8088    
## [2]  {4719090790000}               => {4719090790017} 0.002940 0.6212    
## [3]  {719859796124}                => {719859796117}  0.002102 0.6972    
## [4]  {4710011402026}               => {4710011402019} 0.002822 0.6740    
## [5]  {4710085120697}               => {4710085120680} 0.003467 0.7753    
## [6]  {4710011409056}               => {4710011401128} 0.004430 0.6988    
## [7]  {4710011401135}               => {4710011401128} 0.005854 0.7524    
## [8]  {4710011405133}               => {4710011401128} 0.005176 0.6588    
## [9]  {4710011401135,4710011409056} => {4710011401128} 0.002713 0.8020    
## [10] {4710011401128,4710011409056} => {4710011401135} 0.002713 0.6125    
## [11] {4710011405133,4710011409056} => {4710011401128} 0.002261 0.7606    
## [12] {4710011401135,4710011405133} => {4710011401128} 0.002831 0.7717    
## [13] {4710011401135,4710011406123} => {4710011401128} 0.002445 0.8022    
## [14] {4710011405133,4710011406123} => {4710011401128} 0.002219 0.7011    
##      lift   count
## [1]  170.92 351  
## [2]  170.92 351  
## [3]  147.61 251  
## [4]   90.22 337  
## [5]  100.41 414  
## [6]   51.04 529  
## [7]   54.95 699  
## [8]   48.12 618  
## [9]   58.57 324  
## [10]  78.72 324  
## [11]  55.55 270  
## [12]  56.36 338  
## [13]  58.59 292  
## [14]  51.20 265
D5. 互動圖表顯示

有互動圖表的幫助,我們可以把條件放寬,多找一些關聯規則進來

rules = apriori(bx, parameter=list(supp=0.0003, conf=0.5))
## Apriori
## 
## Parameter specification:
##  confidence minval smax arem  aval originalSupport maxtime support minlen
##         0.5    0.1    1 none FALSE            TRUE       5   3e-04      1
##  maxlen target   ext
##      10  rules FALSE
## 
## Algorithmic control:
##  filter tree heap memopt load sort verbose
##     0.1 TRUE TRUE  FALSE TRUE    2    TRUE
## 
## Absolute minimum support count: 35 
## 
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[23787 item(s), 119407 transaction(s)] done [0.15s].
## sorting and recoding items ... [4691 item(s)] done [0.01s].
## creating transaction tree ... done [0.06s].
## checking subsets of size 1 2 3 4 5 6
## Warning in apriori(bx, parameter = list(supp = 3e-04, conf = 0.5)): Mining
## stopped (maxlen reached). Only patterns up to a length of 6 returned!
##  done [0.24s].
## writing ... [581 rule(s)] done [0.04s].
## creating S4 object  ... done [0.03s].
plot(rules,colors=c("red","green"),engine="htmlwidget",
     marker=list(opacity=.6,size=8))
## To reduce overplotting, jitter is added! Use jitter = 0 to prevent jitter.
plot(rules,method="matrix",shading="lift",engine="htmlwidget",
     colors=c("red", "green"))
D6. 篩選產品、互動式關聯圖
r1 = subset(rules, subset = rhs %in% c("93362993"))
plot(r1,method="graph",engine="htmlwidget",itemCol="cyan") 
  • 泡泡大小:support: A被購買的機率 (A的基礎機率)
  • 泡泡顏色:lift: A被購買時,B被購買的機率增加的倍數 (與B的基礎機率相比)


E. 產品推薦 Product Recommendation

E1. 篩選顧客、產品

太少被購買的產品和購買太少產品的顧客都不適合使用Collaborative Filtering這種產品推薦方法,所以我們先對顧客和產品做一次篩選

library(recommenderlab)
rx = cpm[, colSums(cpm > 0) >= 50]
rx = rx[rowSums(rx > 0) >= 20 & rowSums(rx > 0) <= 300, ]
dim(rx)  # 8846 3354
## [1] 8846 3354
E2. 選擇產品評分方式

可以選擇要用

  • 購買次數 (realRatingMatrix) 或
  • 是否購買 (binaryRatingMatrix)

做模型。

rx = as(rx, "realRatingMatrix")  # realRatingMatrix
bx = binarize(rx, minRating=1)   # binaryRatingMatrix
E3. 建立模型、產生建議 - UBCF

UBCF:User Based Collaborative Filtering

rUBCF = Recommender(bx[1:8800,], method = "UBCF")
pred = predict(rUBCF, bx[8801:8846,], n=4)
do.call(rbind, as(pred, "list")) %>% head(15)
##         [,1]            [,2]            [,3]            [,4]           
## 2170855 "4711271000014" "4710114128038" "4714981010038" "4713985863121"
## 2171265 "4719090900065" "4710254049521" "4710036008562" "4714981010038"
## 2171340 "723125488040"  "723125488064"  "723125485032"  "4714981010038"
## 2171425 "4710011401135" "4710011409056" "4711080010112" "4710011401142"
## 2171432 "4714981010038" "4710011406123" "4711258007371" "4710011401128"
## 2171555 "4719090900065" "4711271000014" "37000329169"   "4710943109352"
## 2171883 "4711271000014" "4710583996008" "4710291112172" "4710018004704"
## 2172194 "4711271000014" "4714981010038" "4710114128038" "4710114105046"
## 2172392 "4903111345717" "4710908131589" "4710168705056" "4711271000014"
## 2172569 "4711271000014" "4714981010038" "4710128030037" "4712162000038"
## 2172583 "4714981010038" "4710085120093" "4719090900065" "4710154015206"
## 2172590 "4710011406123" "4710011401142" "4710857000028" "4710011432856"
## 2172668 "4711271000014" "4710088620156" "4719090900065" "4712425010712"
## 2172705 "4711271000014" "4714981010038" "37000445111"   "37000440192"  
## 2172811 "4714981010038" "37000442127"   "4710719000333" "4710114128038"
E4. 建立模型、產生建議 - IBCF

IBCF:Item Based Collaborative Filtering

if(LOAD) {
  load("data/recommenders.rdata")
} else{
  rIBCF <- Recommender(bx[1:6000,], method = "IBCF")
}
pred = predict(rIBCF, bx[8801:8846,], n=4)
do.call(rbind, as(pred, "list")) %>% head(15)
##         [,1]            [,2]            [,3]            [,4]           
## 2170855 "4719090900065" "4714981010038" "4711271000014" "4712162000038"
## 2171265 "4719090900065" "4710015103288" "4714981010038" "4711271000014"
## 2171340 "37000445111"   "4710036005608" "37000442127"   "723125485032" 
## 2171425 "4711311617899" "4711311218836" "4710011401135" "4710011409056"
## 2171432 "4714981010038" "4710321791698" "4710857000042" "4710626111252"
## 2171555 "93432641"      "93362993"      "4710105045320" "4711271000014"
## 2171883 "4710670200100" "4710670200407" "4711271000014" "3228020490329"
## 2172194 "4714108700019" "4714108700064" "4909978199111" "20332433"     
## 2172392 "4710706211759" "4710908131589" "4719090900058" "4710731040614"
## 2172569 "4711371850243" "84501297329"   "84501293529"   "4710085121007"
## 2172583 "4710085172702" "4710085120093" "34000100095"   "34000231508"  
## 2172590 "4710011406123" "4711271000014" "4711437000162" "4710011401142"
## 2172668 "4711371850243" "4719090900065" "4714981010038" "4711437000117"
## 2172705 "37000445111"   "4710018004605" "37000442127"   "37000304593"  
## 2172811 "4719581980293" "4712067899287" "4719581980279" "4710908131589"
save(rIBCF, rUBCF, file="data/recommenders.rdata")
E5. 設定模型(準確性)驗證方式
set.seed(4321)
scheme = evaluationScheme(     
  bx, method="split", train = .75,  given=5)
E6. 設定推薦方法(參數)
algorithms = list(            
  AR53 = list(name="AR", param=list(support=0.0005, confidence=0.3)),
  AR43 = list(name="AR", param=list(support=0.0004, confidence=0.3)),
  RANDOM = list(name="RANDOM", param=NULL),
  POPULAR = list(name="POPULAR", param=NULL),
  UBCF = list(name="UBCF", param=NULL),
  IBCF = list(name="IBCF", param=NULL) )
E7. 建模、預測、驗證(準確性)
if(LOAD) {
  load("data/results2a.rdata")
} else {
  t0 = Sys.time()
  results = evaluate(            
    scheme, algorithms, 
    type="topNList",     # method of evaluation
    n=c(5, 10, 15, 20)   # no. recom. to be evaluated
    )
  print(Sys.time() - t0)
  save(results, file="data/results2a.rdata")
}
## AR run fold/sample [model time/prediction time]
##   1  [4.02sec/214.6sec] 
## AR run fold/sample [model time/prediction time]
##   1  [10.49sec/538.5sec] 
## RANDOM run fold/sample [model time/prediction time]
##   1  [0sec/9.48sec] 
## POPULAR run fold/sample [model time/prediction time]
##   1  [0sec/11.09sec] 
## UBCF run fold/sample [model time/prediction time]
##   1  [0sec/75.42sec] 
## IBCF run fold/sample [model time/prediction time]
##   1  [198.2sec/1.63sec] 
## Time difference of 18.72 mins
E8. 模型準確性比較
# load("data/results.rdata")
par(mar=c(4,4,3,2),cex=0.8)
cols = c("red", "magenta", "gray", "orange", "blue", "green")
plot(results, annotate=c(1,3), legend="topleft", pch=19, lwd=2, col=cols)
abline(v=seq(0,0.006,0.001), h=seq(0,0.08,0.01), col='lightgray', lty=2)

getConfusionMatrix(results$IBCF)
## [[1]]
##       TP     FP    FN   TN precision  recall     TPR      FPR
## 5  1.116  3.884 32.97 3311    0.2231 0.03899 0.03899 0.001171
## 10 1.699  8.301 32.39 3307    0.1699 0.05812 0.05812 0.002503
## 15 2.075 12.925 32.01 3302    0.1383 0.07021 0.07021 0.003898
## 20 2.385 17.615 31.70 3297    0.1193 0.08002 0.08002 0.005313