1 Load packages, prepare and inspect the data

# 1A
library(C50)
library(psych)
library(RWeka)
library(caret)
## Loading required package: ggplot2
## 
## Attaching package: 'ggplot2'
## The following objects are masked from 'package:psych':
## 
##     %+%, alpha
## Loading required package: lattice
library(rminer)
## Warning in rgl.init(initValue, onlyNULL): RGL: unable to open X11 display
## Warning: 'rgl.init' failed, running with 'rgl.useNULL = TRUE'.
library(matrixStats)
library(knitr)
library(arules)
## Loading required package: Matrix
## 
## Attaching package: 'arules'
## The following objects are masked from 'package:base':
## 
##     abbreviate, write
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.1     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ lubridate 1.9.2     ✔ tibble    3.2.1
## ✔ purrr     1.0.1     ✔ tidyr     1.3.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ ggplot2::%+%()   masks psych::%+%()
## ✖ ggplot2::alpha() masks psych::alpha()
## ✖ dplyr::count()   masks matrixStats::count()
## ✖ tidyr::expand()  masks Matrix::expand()
## ✖ dplyr::filter()  masks stats::filter()
## ✖ dplyr::lag()     masks stats::lag()
## ✖ purrr::lift()    masks caret::lift()
## ✖ tidyr::pack()    masks Matrix::pack()
## ✖ dplyr::recode()  masks arules::recode()
## ✖ tidyr::unpack()  masks Matrix::unpack()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
mydir <- getwd()
setwd(mydir)

wm <- read.csv(file = "Walmart_visits_7trips.csv", stringsAsFactors = FALSE)

str(wm)
## 'data.frame':    12734 obs. of  9 variables:
##  $ TripType    : int  999 999 999 999 999 999 999 999 999 999 ...
##  $ DOW         : chr  "Friday" "Friday" "Friday" "Friday" ...
##  $ UniqueItems : int  1 2 1 1 2 1 1 1 1 1 ...
##  $ TotalQty    : int  0 1 0 0 3 0 0 0 1 0 ...
##  $ RtrnQty     : int  1 1 1 1 0 1 1 1 0 1 ...
##  $ NetQty      : int  -1 0 -1 -1 3 -1 -1 -1 1 -1 ...
##  $ UniqDepts   : int  1 1 1 1 2 1 1 1 1 1 ...
##  $ OneItemDepts: int  1 0 1 1 2 1 1 1 1 1 ...
##  $ RtrnDepts   : int  1 1 1 1 0 1 1 1 0 1 ...
wm$TripType <- factor(wm$TripType)
wm$DOW <- factor(wm$DOW)

summary(wm)
##  TripType          DOW        UniqueItems         TotalQty      
##  5  :1106   Friday   :1851   Min.   :  1.000   Min.   :  0.000  
##  7  :1305   Monday   :1765   1st Qu.:  1.000   1st Qu.:  1.000  
##  8  :2808   Saturday :1923   Median :  2.000   Median :  3.000  
##  9  :2046   Sunday   :2014   Mean   :  6.203   Mean   :  7.185  
##  39 :2229   Thursday :1738   3rd Qu.:  7.000   3rd Qu.:  8.000  
##  40 :1373   Tuesday  :1703   Max.   :113.000   Max.   :137.000  
##  999:1867   Wednesday:1740                                      
##     RtrnQty            NetQty          UniqDepts       OneItemDepts   
##  Min.   : 0.0000   Min.   :-52.000   Min.   : 1.000   Min.   : 0.000  
##  1st Qu.: 0.0000   1st Qu.:  1.000   1st Qu.: 1.000   1st Qu.: 1.000  
##  Median : 0.0000   Median :  2.000   Median : 2.000   Median : 1.000  
##  Mean   : 0.2431   Mean   :  6.942   Mean   : 3.194   Mean   : 1.918  
##  3rd Qu.: 0.0000   3rd Qu.:  8.000   3rd Qu.: 4.000   3rd Qu.: 3.000  
##  Max.   :52.0000   Max.   :137.000   Max.   :21.000   Max.   :12.000  
##                                                                       
##    RtrnDepts      
##  Min.   : 0.0000  
##  1st Qu.: 0.0000  
##  Median : 0.0000  
##  Mean   : 0.1756  
##  3rd Qu.: 0.0000  
##  Max.   :10.0000  
## 
# 1B
wm %>% select(where(is.numeric)) %>% pairs.panels() # high correlation: totalqty = netqty; uniqueitems ~ totalqty, netqty, uniqdepts; rtnqty ~ rtrndepts; uniqdepts ~ oneitemdepts

# 1C
model_tree1 <- C5.0(formula = TripType ~., control = C5.0Control(CF=.2), data = wm)
model_tree1
## 
## Call:
## C5.0.formula(formula = TripType ~ ., data = wm, control = C5.0Control(CF = 0.2))
## 
## Classification Tree
## Number of samples: 12734 
## Number of predictors: 8 
## 
## Tree size: 13 
## 
## Non-standard options: attempt to group attributes, confidence level: 0.2
tree1_prediction <- predict(model_tree1, wm)

mmetric(wm$TripType, tree1_prediction, metric="CONF")
## $res
## NULL
## 
## $conf
##       pred
## target    5    7    8    9   39   40  999
##    5     45   58  761    2  228   12    0
##    7     20  116  673    1  491    4    0
##    8      4   21 2746   15   22    0    0
##    9      0   11 1993   25   15    0    2
##    39    17   61    7    0 2101   43    0
##    40     0    2    0    0   53 1318    0
##    999    4   26  421    1   57    0 1358
## 
## $roc
## NULL
## 
## $lift
## NULL

2 Use SimpleKMeans clustering to understand visits

# 2A
TripType.levels <- length(unique(wm))
wm_train <- wm[,-1]

# 2B
nClusters <- TripType.levels
wm_clustering1 <- SimpleKMeans(wm_train, Weka_control(N = nClusters, V=TRUE))
wm_clustering1
## 
## kMeans
## ======
## 
## Number of iterations: 20
## Within cluster sum of squared errors: 3892.6037859528574
## 
## Initial starting points (random):
## 
## Cluster 0: Friday,4,0,4,-4,2,1,2
## Cluster 1: Wednesday,2,2,0,2,2,2,0
## Cluster 2: Thursday,1,0,1,-1,1,1,1
## Cluster 3: Wednesday,1,1,0,1,1,1,0
## Cluster 4: Sunday,1,1,1,0,1,1,1
## Cluster 5: Saturday,5,5,0,5,3,2,0
## Cluster 6: Friday,4,4,0,4,4,4,0
## Cluster 7: Saturday,2,1,1,0,2,2,1
## Cluster 8: Saturday,24,25,0,25,10,5,0
## 
## Missing values globally replaced with mean/mode
## 
## Final cluster centroids:
##                               Cluster#
## Attribute        Full Data           0           1           2           3           4           5           6           7           8
##                  (12734.0)    (1715.0)     (671.0)    (1984.0)    (2558.0)    (2342.0)    (1648.0)     (546.0)     (753.0)     (517.0)
## ======================================================================================================================================
## DOW                 Sunday      Friday   Wednesday    Thursday   Wednesday      Sunday    Saturday      Friday    Saturday    Saturday
##   Friday       1851.0 ( 14%)1476.0 ( 86%)  0.0 (  0%)  0.0 (  0%)  0.0 (  0%)  0.0 (  0%)  0.0 (  0%)375.0 ( 68%)  0.0 (  0%)  0.0 (  0%)
##   Monday       1765.0 ( 13%)122.0 (  7%)153.0 ( 22%)119.0 (  5%)563.0 ( 22%)169.0 (  7%)221.0 ( 13%)100.0 ( 18%)206.0 ( 27%)112.0 ( 21%)
##   Saturday     1923.0 ( 15%)  0.0 (  0%)  0.0 (  0%)  0.0 (  0%)  0.0 (  0%)  0.0 (  0%)1239.0 ( 75%)  0.0 (  0%)355.0 ( 47%)329.0 ( 63%)
##   Sunday       2014.0 ( 15%)  0.0 (  0%)  0.0 (  0%)  0.0 (  0%)  0.0 (  0%)2014.0 ( 85%)  0.0 (  0%)  0.0 (  0%)  0.0 (  0%)  0.0 (  0%)
##   Thursday     1738.0 ( 13%)  0.0 (  0%)  0.0 (  0%)1737.0 ( 87%)  0.0 (  0%)  0.0 (  0%)  0.0 (  0%)  0.0 (  0%)  0.0 (  0%)  1.0 (  0%)
##   Tuesday      1703.0 ( 13%)117.0 (  6%)154.0 ( 22%)128.0 (  6%)619.0 ( 24%)159.0 (  6%)188.0 ( 11%) 71.0 ( 13%)192.0 ( 25%) 75.0 ( 14%)
##   Wednesday    1740.0 ( 13%)  0.0 (  0%)364.0 ( 54%)  0.0 (  0%)1376.0 ( 53%)  0.0 (  0%)  0.0 (  0%)  0.0 (  0%)  0.0 (  0%)  0.0 (  0%)
## 
## UniqueItems         6.2033      2.6303     13.5618      5.3075      1.9277      7.8715      3.3101     19.3443      1.8141     27.2785
##                  +/-9.1936   +/-2.1792   +/-8.9262   +/-7.9214   +/-1.7514  +/-10.0825   +/-3.5615  +/-10.0707    +/-1.432  +/-14.0202
## 
## TotalQty            7.1849      2.9114     15.8003      6.1749      2.1388      9.1721      3.8501     23.5604      1.0624     32.2727
##                 +/-11.2831   +/-3.0489  +/-11.2094  +/-10.0334   +/-2.3375  +/-12.2871   +/-4.5658  +/-12.5716   +/-1.6822  +/-17.3518
## 
## RtrnQty             0.2431      0.2099      0.2206      0.2092      0.1357      0.2293      0.0492      0.1447      1.3506      0.2166
##                  +/-0.9149   +/-0.7191   +/-2.1109   +/-0.6344   +/-0.6913   +/-0.8058   +/-0.2711   +/-0.4951   +/-1.5503   +/-0.5529
## 
## NetQty              6.9417      2.7015     15.5797      5.9657      2.0031      8.9428       3.801     23.4158     -0.2882     32.0561
##                 +/-11.3513    +/-3.237  +/-11.4523  +/-10.0591   +/-2.4346  +/-12.3413   +/-4.5528  +/-12.5337   +/-2.3702  +/-17.2789
## 
## UniqDepts           3.1938      1.7749      7.2519      2.9451      1.3804      3.8412      2.3161      7.9652      1.1647     10.3424
##                  +/-3.0952   +/-0.9501   +/-2.1753   +/-2.6343   +/-0.7738   +/-3.2574   +/-1.4104   +/-2.3085   +/-0.4777   +/-2.6209
## 
## OneItemDepts        1.9179      1.1936      4.4993      1.9108      0.9992      2.1426      1.7779      3.7033      0.7198      4.8298
##                  +/-1.6439   +/-0.8126    +/-1.396   +/-1.4844   +/-0.6907   +/-1.6648   +/-0.8763   +/-1.7442   +/-0.5833   +/-1.8837
## 
## RtrnDepts           0.1756      0.1481      0.1252      0.1658      0.1001      0.1691      0.0407      0.1209      0.9163      0.1818
##                  +/-0.4553   +/-0.4144   +/-0.5565   +/-0.4263   +/-0.3439   +/-0.4681   +/-0.2006   +/-0.3481   +/-0.5989   +/-0.4334
table(predict(wm_clustering1), wm$TripType)
##    
##       5   7   8   9  39  40 999
##   0 191 212 524 353 147  11 277
##   1  22  60   1   0 422 157   9
##   2 202 237 437 341 356 145 266
##   3 337 270 891 598 115   6 341
##   4 140 263 400 329 612 321 277
##   5 180 220 495 371 253  25 104
##   6   7  18   0   0 199 319   3
##   7  21  21  60  54   7   0 590
##   8   6   4   0   0 118 389   0
# 2C
wm_clustering2 <- SimpleKMeans(wm_train, Weka_control(N = nClusters, init = 1, V=TRUE))
wm_clustering2
## 
## kMeans
## ======
## 
## Number of iterations: 38
## Within cluster sum of squared errors: 670.7999502003128
## 
## Initial starting points (k-means++):
## 
## Cluster 0: Friday,4,0,4,-4,2,1,2
## Cluster 1: Wednesday,3,3,0,3,3,3,0
## Cluster 2: Tuesday,1,1,0,1,1,1,0
## Cluster 3: Sunday,3,4,0,4,3,3,0
## Cluster 4: Sunday,1,1,0,1,1,1,0
## Cluster 5: Saturday,3,3,0,3,3,3,0
## Cluster 6: Sunday,7,7,0,7,5,3,0
## Cluster 7: Thursday,12,16,0,16,9,8,0
## Cluster 8: Monday,2,2,0,2,2,2,0
## 
## Missing values globally replaced with mean/mode
## 
## Final cluster centroids:
##                               Cluster#
## Attribute        Full Data           0           1           2           3           4           5           6           7           8
##                  (12734.0)    (1851.0)    (1738.0)    (1699.0)     (509.0)    (1393.0)    (1915.0)     (134.0)    (1735.0)    (1760.0)
## ======================================================================================================================================
## DOW                 Sunday      Friday   Wednesday     Tuesday      Sunday      Sunday    Saturday      Sunday    Thursday      Monday
##   Friday       1851.0 ( 14%)1851.0 (100%)  0.0 (  0%)  0.0 (  0%)  0.0 (  0%)  0.0 (  0%)  0.0 (  0%)  0.0 (  0%)  0.0 (  0%)  0.0 (  0%)
##   Monday       1765.0 ( 13%)  0.0 (  0%)  0.0 (  0%)  0.0 (  0%)  0.0 (  0%)  0.0 (  0%)  0.0 (  0%)  5.0 (  3%)  0.0 (  0%)1760.0 (100%)
##   Saturday     1923.0 ( 15%)  0.0 (  0%)  0.0 (  0%)  0.0 (  0%)  0.0 (  0%)  0.0 (  0%)1915.0 (100%)  8.0 (  5%)  0.0 (  0%)  0.0 (  0%)
##   Sunday       2014.0 ( 15%)  0.0 (  0%)  0.0 (  0%)  0.0 (  0%)509.0 (100%)1393.0 (100%)  0.0 (  0%)112.0 ( 83%)  0.0 (  0%)  0.0 (  0%)
##   Thursday     1738.0 ( 13%)  0.0 (  0%)  0.0 (  0%)  0.0 (  0%)  0.0 (  0%)  0.0 (  0%)  0.0 (  0%)  3.0 (  2%)1735.0 (100%)  0.0 (  0%)
##   Tuesday      1703.0 ( 13%)  0.0 (  0%)  0.0 (  0%)1699.0 (100%)  0.0 (  0%)  0.0 (  0%)  0.0 (  0%)  4.0 (  2%)  0.0 (  0%)  0.0 (  0%)
##   Wednesday    1740.0 ( 13%)  0.0 (  0%)1738.0 (100%)  0.0 (  0%)  0.0 (  0%)  0.0 (  0%)  0.0 (  0%)  2.0 (  1%)  0.0 (  0%)  0.0 (  0%)
## 
## UniqueItems         6.2033      5.7736      5.2215      5.3561      14.725      2.4982      6.5206     44.7388      5.4277       6.396
##                  +/-9.1936   +/-8.5904   +/-7.4773    +/-7.754   +/-7.0131   +/-2.2992   +/-8.7791  +/-15.5911   +/-8.1339   +/-9.3561
## 
## TotalQty            7.1849      6.7634      5.9591      6.1489     17.2947      2.7186      7.4966     53.3731      6.3256      7.4409
##                 +/-11.2831  +/-10.6401   +/-9.1206    +/-9.515   +/-8.8277    +/-3.101   +/-10.667  +/-20.5066  +/-10.3289  +/-11.4258
## 
## RtrnQty             0.2431       0.222      0.2371      0.2596      0.1297      0.2757      0.2564      0.3657      0.2294      0.2523
##                  +/-0.9149   +/-0.7135   +/-0.9049   +/-1.4759   +/-0.4726    +/-0.909   +/-0.8635   +/-0.8889   +/-0.6675   +/-0.7592
## 
## NetQty              6.9417      6.5413      5.7221      5.8893      17.165      2.4429      7.2402     53.0075      6.0963      7.1886
##                 +/-11.3513  +/-10.7088   +/-9.1775   +/-9.7114    +/-8.832   +/-3.3181  +/-10.7564   +/-20.439   +/-10.363  +/-11.4812
## 
## UniqDepts           3.1938       3.013      2.9143      2.9253      7.1768      1.7215      3.3446     13.3881      2.9216      3.2608
##                  +/-3.0952   +/-2.9741   +/-2.7833   +/-2.7515   +/-1.9094    +/-0.987   +/-3.1389   +/-2.5866   +/-2.7912   +/-3.1382
## 
## OneItemDepts        1.9179      1.8293      1.8354      1.8293      3.8743      1.2247       1.965      5.2239      1.8282       1.946
##                  +/-1.6439   +/-1.6318   +/-1.5481   +/-1.5985   +/-1.5433   +/-0.8133   +/-1.6553   +/-2.0024    +/-1.549   +/-1.6471
## 
## RtrnDepts           0.1756      0.1621      0.1743      0.1654        0.11      0.1974      0.1796      0.2836      0.1804      0.1852
##                  +/-0.4553    +/-0.427   +/-0.4503   +/-0.4759   +/-0.3761   +/-0.4904   +/-0.4686   +/-0.5831   +/-0.4432   +/-0.4445
table(predict(wm_clustering2), wm$TripType)
##    
##       5   7   8   9  39  40 999
##   0 167 185 435 317 283 193 271
##   1 184 179 420 281 286 140 248
##   2 142 184 397 258 304 147 267
##   3  10  38   3   1 276 177   4
##   4 113 168 393 327 124   6 262
##   5 171 176 377 318 365 227 281
##   6   0   0   0   0   0 134   0
##   7 170 189 379 310 293 143 251
##   8 149 186 404 234 298 206 283
# 2D
wm_clustering3 <- SimpleKMeans(wm_train, Weka_control(N = nClusters, A="weka.core.ManhattanDistance", init = 1, V=TRUE))
wm_clustering3
## 
## kMeans
## ======
## 
## Number of iterations: 15
## Sum of within cluster distances: 5245.5138888146075
## 
## Initial starting points (k-means++):
## 
## Cluster 0: Friday,4,0,4,-4,2,1,2
## Cluster 1: Wednesday,2,3,0,3,2,2,0
## Cluster 2: Monday,2,2,0,2,2,2,0
## Cluster 3: Sunday,1,1,0,1,1,1,0
## Cluster 4: Friday,2,2,0,2,1,0,0
## Cluster 5: Thursday,25,26,0,26,9,3,0
## Cluster 6: Sunday,9,10,0,10,7,6,0
## Cluster 7: Wednesday,10,13,0,13,6,3,0
## Cluster 8: Tuesday,2,2,0,2,2,2,0
## 
## Missing values globally replaced with mean/mode
## 
## Final cluster centroids:
##                               Cluster#
## Attribute        Full Data           0           1           2           3           4           5           6           7           8
##                  (12734.0)     (460.0)    (1528.0)    (2069.0)    (1924.0)    (1716.0)    (1673.0)     (967.0)     (746.0)    (1651.0)
## ======================================================================================================================================
## DOW                 Sunday      Friday   Wednesday      Monday      Sunday      Friday    Thursday      Sunday   Wednesday     Tuesday
##   Friday       1851.0 ( 14%)256.0 ( 55%)  0.0 (  0%)  0.0 (  0%)  0.0 (  0%)1528.0 ( 89%)  0.0 (  0%) 67.0 (  6%)  0.0 (  0%)  0.0 (  0%)
##   Monday       1765.0 ( 13%)  0.0 (  0%)  0.0 (  0%)1696.0 ( 81%)  0.0 (  0%)  0.0 (  0%)  0.0 (  0%) 69.0 (  7%)  0.0 (  0%)  0.0 (  0%)
##   Saturday     1923.0 ( 15%)204.0 ( 44%)212.0 ( 13%)373.0 ( 18%)402.0 ( 20%)188.0 ( 10%)  0.0 (  0%)222.0 ( 22%)322.0 ( 43%)  0.0 (  0%)
##   Sunday       2014.0 ( 15%)  0.0 (  0%)  0.0 (  0%)  0.0 (  0%)1522.0 ( 79%)  0.0 (  0%)  0.0 (  0%)492.0 ( 50%)  0.0 (  0%)  0.0 (  0%)
##   Thursday     1738.0 ( 13%)  0.0 (  0%)  0.0 (  0%)  0.0 (  0%)  0.0 (  0%)  0.0 (  0%)1673.0 (100%) 65.0 (  6%)  0.0 (  0%)  0.0 (  0%)
##   Tuesday      1703.0 ( 13%)  0.0 (  0%)  0.0 (  0%)  0.0 (  0%)  0.0 (  0%)  0.0 (  0%)  0.0 (  0%) 52.0 (  5%)  0.0 (  0%)1651.0 (100%)
##   Wednesday    1740.0 ( 13%)  0.0 (  0%)1316.0 ( 86%)  0.0 (  0%)  0.0 (  0%)  0.0 (  0%)  0.0 (  0%)  0.0 (  0%)424.0 ( 56%)  0.0 (  0%)
## 
## UniqueItems              2           1           2           2           1           3           2          25          11           2
##                  +/-9.1936   +/-5.5797   +/-1.6121   +/-6.3293   +/-2.5128   +/-6.1771   +/-5.9478  +/-13.5605   +/-8.6756   +/-6.0858
## 
## TotalQty                 3           0           2           2           1           3           2          29          13           2
##                 +/-11.2831   +/-7.1053   +/-2.9168   +/-7.9187   +/-3.2492   +/-7.7884   +/-7.8594   +/-16.772  +/-10.6668   +/-7.6701
## 
## RtrnQty                  0           1           0           0           0           0           0           0           0           0
##                  +/-0.9149   +/-1.5016   +/-0.8368   +/-0.7421   +/-0.7959        +/-0   +/-0.6599   +/-0.6057   +/-0.7473   +/-1.4931
## 
## NetQty                   2          -1           2           2           1           3           2          29          13           2
##                 +/-11.3513   +/-7.3248   +/-3.0505   +/-7.9864   +/-3.3956   +/-7.7884   +/-7.9228  +/-16.7026  +/-10.6468   +/-7.9208
## 
## UniqDepts                2           1           1           2           1           2           2          10           6           2
##                  +/-3.0952   +/-1.8163   +/-0.7218   +/-2.3623    +/-1.256   +/-2.3203   +/-2.2436    +/-2.825   +/-2.2901    +/-2.339
## 
## OneItemDepts             1           1           1           2           1           1           1           4           4           1
##                  +/-1.6439   +/-1.1072   +/-0.7781   +/-1.4008   +/-0.9345   +/-1.4298   +/-1.3976   +/-1.8225   +/-1.6043   +/-1.4851
## 
## RtrnDepts                0           1           0           0           0           0           0           0           0           0
##                  +/-0.4553   +/-0.4818   +/-0.4219   +/-0.4369   +/-0.4451        +/-0    +/-0.439   +/-0.4246   +/-0.4139    +/-0.476
table(predict(wm_clustering3), wm$TripType)
##    
##       5   7   8   9  39  40 999
##   0   6  11  12  13  23  13 382
##   1 206 166 491 330  69   3 263
##   2 175 249 516 324 346 144 315
##   3 179 224 549 458 215   7 292
##   4 192 206 462 350 301 116  89
##   5 170 189 379 310 293  81 251
##   6   4  14   0   0 205 743   1
##   7  32  62   2   3 473 167   7
##   8 142 184 397 258 304  99 267
# 2E
wm_clustering4 <- SimpleKMeans(wm_train, Weka_control(N = nClusters, A="weka.core.ManhattanDistance", V=TRUE))
wm_clustering4
## 
## kMeans
## ======
## 
## Number of iterations: 10
## Sum of within cluster distances: 6405.095046812456
## 
## Initial starting points (random):
## 
## Cluster 0: Friday,4,0,4,-4,2,1,2
## Cluster 1: Wednesday,2,2,0,2,2,2,0
## Cluster 2: Thursday,1,0,1,-1,1,1,1
## Cluster 3: Wednesday,1,1,0,1,1,1,0
## Cluster 4: Sunday,1,1,1,0,1,1,1
## Cluster 5: Saturday,5,5,0,5,3,2,0
## Cluster 6: Friday,4,4,0,4,4,4,0
## Cluster 7: Saturday,2,1,1,0,2,2,1
## Cluster 8: Saturday,24,25,0,25,10,5,0
## 
## Missing values globally replaced with mean/mode
## 
## Final cluster centroids:
##                               Cluster#
## Attribute        Full Data           0           1           2           3           4           5           6           7           8
##                  (12734.0)    (2466.0)    (1363.0)    (2091.0)     (777.0)    (2160.0)    (1198.0)     (983.0)     (926.0)     (770.0)
## ======================================================================================================================================
## DOW                 Sunday      Friday   Wednesday    Thursday   Wednesday      Sunday    Saturday      Friday    Saturday    Saturday
##   Friday       1851.0 ( 14%)1384.0 ( 56%)  0.0 (  0%)  0.0 (  0%)  0.0 (  0%)  0.0 (  0%)  0.0 (  0%)467.0 ( 47%)  0.0 (  0%)  0.0 (  0%)
##   Monday       1765.0 ( 13%)535.0 ( 21%)232.0 ( 17%)197.0 (  9%)  0.0 (  0%)128.0 (  5%)245.0 ( 20%)269.0 ( 27%)  0.0 (  0%)159.0 ( 20%)
##   Saturday     1923.0 ( 15%)  0.0 (  0%)  0.0 (  0%)  0.0 (  0%)  0.0 (  0%)  0.0 (  0%)701.0 ( 58%)  0.0 (  0%)926.0 (100%)296.0 ( 38%)
##   Sunday       2014.0 ( 15%)  0.0 (  0%)  0.0 (  0%)  0.0 (  0%)  0.0 (  0%)1905.0 ( 88%)  0.0 (  0%)  0.0 (  0%)  0.0 (  0%)109.0 ( 14%)
##   Thursday     1738.0 ( 13%)  0.0 (  0%)  0.0 (  0%)1671.0 ( 79%)  0.0 (  0%)  0.0 (  0%)  0.0 (  0%)  0.0 (  0%)  0.0 (  0%) 67.0 (  8%)
##   Tuesday      1703.0 ( 13%)547.0 ( 22%)200.0 ( 14%)223.0 ( 10%)  0.0 (  0%)127.0 (  5%)252.0 ( 21%)247.0 ( 25%)  0.0 (  0%)107.0 ( 13%)
##   Wednesday    1740.0 ( 13%)  0.0 (  0%)931.0 ( 68%)  0.0 (  0%)777.0 (100%)  0.0 (  0%)  0.0 (  0%)  0.0 (  0%)  0.0 (  0%) 32.0 (  4%)
## 
## UniqueItems              2           1           3           2           1           3           5          12           1          27
##                  +/-9.1936    +/-1.349   +/-6.7144   +/-5.4052   +/-0.6958   +/-7.6066   +/-3.9263     +/-8.71   +/-0.9338  +/-13.3238
## 
## TotalQty                 3           1           3           2           1           3           5          14           1          32
##                 +/-11.2831   +/-1.9959   +/-8.3553   +/-7.1149   +/-1.1059    +/-9.489   +/-5.1685  +/-10.9461   +/-1.8209  +/-16.4722
## 
## RtrnQty                  0           0           0           0           0           0           0           0           0           0
##                  +/-0.9149   +/-0.8294   +/-0.7236   +/-0.6307   +/-1.0745   +/-0.8717   +/-0.9267   +/-0.5479   +/-0.7862   +/-1.9536
## 
## NetQty                   2           1           3           2           1           3           5          14           1          32
##                 +/-11.3513   +/-2.3103   +/-8.3806   +/-7.1689   +/-1.3681   +/-9.5692   +/-5.3275  +/-10.9287   +/-2.1299  +/-16.6532
## 
## UniqDepts                2           1           2           1           1           2           3           6           1          10
##                  +/-3.0952   +/-0.6246   +/-2.3827   +/-2.0802   +/-0.1334   +/-2.6724   +/-1.3096   +/-2.1763   +/-0.3584   +/-2.5636
## 
## OneItemDepts             1           1           2           1           1           1           2           4           1           5
##                  +/-1.6439   +/-0.6049   +/-1.2753   +/-1.3732   +/-0.4254   +/-1.5018    +/-1.067   +/-1.6974   +/-0.3917   +/-1.8983
## 
## RtrnDepts                0           0           0           0           0           0           0           0           0           0
##                  +/-0.4553   +/-0.4484   +/-0.4567    +/-0.407    +/-0.456   +/-0.4698   +/-0.4999   +/-0.3805   +/-0.4261   +/-0.5633
table(predict(wm_clustering4), wm$TripType)
##    
##       5   7   8   9  39  40 999
##   0 257 215 741 561  61   6 625
##   1 109 196 347 202 287 111 111
##   2 225 227 551 386 302  79 321
##   3 106  50 242 176   3   0 200
##   4 153 257 487 369 418 186 290
##   5 101 198 175 123 493  32  76
##   6  28  87   1   2 564 288  13
##   7 121  73 264 227  10   1 230
##   8   6   2   0   0  91 670   1

3 Market Basket Analysis with the Walmart dept baskets

# 3A
Dept_baskets <- read.transactions("Walmart_baskets_1week.csv", format="single", sep = ",", header = TRUE, cols=c("VisitNumber","DepartmentDescription"))

# 3B
inspect(Dept_baskets[1:15])
##      items                         transactionID
## [1]  {IMPULSE MERCHANDISE}                 10009
## [2]  {CANDY, TOBACCO, COOKIES,                  
##       HOME MANAGEMENT,                          
##       JEWELRY AND SUNGLASSES,                   
##       MENS WEAR}                           10034
## [3]  {GIRLS WEAR, 4-6X  AND 7-14,               
##       GROCERY DRY GOODS,                        
##       IMPULSE MERCHANDISE,                      
##       PERSONAL CARE}                       10051
## [4]  {OTHER DEPARTMENTS}                   10118
## [5]  {FINANCIAL SERVICES}                  10167
## [6]  {DAIRY,                                    
##       DSD GROCERY,                              
##       FROZEN FOODS,                             
##       GROCERY DRY GOODS,                        
##       MEAT - FRESH & FROZEN,                    
##       PERSONAL CARE,                            
##       PRE PACKED DELI,                          
##       SLEEPWEAR/FOUNDATIONS}               10178
## [7]  {PHARMACY OTC}                        10191
## [8]  {FINANCIAL SERVICES}                  10206
## [9]  {INFANT CONSUMABLE HARDLINES}         1022 
## [10] {CELEBRATION,                              
##       FABRICS AND CRAFTS,                       
##       OFFICE SUPPLIES,                          
##       TOYS}                                10250
## [11] {PHARMACY OTC}                        10272
## [12] {FABRICS AND CRAFTS,                       
##       GROCERY DRY GOODS,                        
##       PRODUCE}                             10273
## [13] {SERVICE DELI}                        1029 
## [14] {BEAUTY,                                   
##       DSD GROCERY,                              
##       GROCERY DRY GOODS,                        
##       HOME DECOR,                               
##       HOME MANAGEMENT,                          
##       HOUSEHOLD CHEMICALS/SUPP,                 
##       HOUSEHOLD PAPER GOODS,                    
##       LAWN AND GARDEN,                          
##       PAINT AND ACCESSORIES,                    
##       PERSONAL CARE,                            
##       PETS AND SUPPLIES}                   10296
## [15] {BEAUTY}                              10302
# 3C
itemFrequencyPlot(Dept_baskets, type="relative", topN = 15)

# 3Di
ords_rules <- apriori(Dept_baskets, parameter = list(support = 0.05, confidence = 0.25, minlen = 2))
## Apriori
## 
## Parameter specification:
##  confidence minval smax arem  aval originalSupport maxtime support minlen
##        0.25    0.1    1 none FALSE            TRUE       5    0.05      2
##  maxlen target  ext
##      10  rules TRUE
## 
## Algorithmic control:
##  filter tree heap memopt load sort verbose
##     0.1 TRUE TRUE  FALSE TRUE    2    TRUE
## 
## Absolute minimum support count: 100 
## 
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[66 item(s), 2000 transaction(s)] done [0.00s].
## sorting and recoding items ... [20 item(s)] done [0.00s].
## creating transaction tree ... done [0.00s].
## checking subsets of size 1 2 3 4 done [0.00s].
## writing ... [78 rule(s)] done [0.00s].
## creating S4 object  ... done [0.00s].
ords_rules
## set of 78 rules
summary(ords_rules)
## set of 78 rules
## 
## rule length distribution (lhs + rhs):sizes
##  2  3  4 
## 44 30  4 
## 
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   2.000   2.000   2.000   2.487   3.000   4.000 
## 
## summary of quality measures:
##     support          confidence        coverage            lift      
##  Min.   :0.05000   Min.   :0.2516   Min.   :0.06200   Min.   :1.193  
##  1st Qu.:0.05450   1st Qu.:0.4032   1st Qu.:0.09888   1st Qu.:2.224  
##  Median :0.06200   Median :0.5550   Median :0.12250   Median :2.815  
##  Mean   :0.07008   Mean   :0.5500   Mean   :0.14456   Mean   :2.769  
##  3rd Qu.:0.07650   3rd Qu.:0.7057   3rd Qu.:0.19200   3rd Qu.:3.396  
##  Max.   :0.13050   Max.   :0.8615   Max.   :0.30600   Max.   :4.558  
##      count      
##  Min.   :100.0  
##  1st Qu.:109.0  
##  Median :124.0  
##  Mean   :140.2  
##  3rd Qu.:153.0  
##  Max.   :261.0  
## 
## mining info:
##          data ntransactions support confidence
##  Dept_baskets          2000    0.05       0.25
##                                                                                           call
##  apriori(data = Dept_baskets, parameter = list(support = 0.05, confidence = 0.25, minlen = 2))
inspect(sort(ords_rules, by = "lift"))
##      lhs                                          rhs                    
## [1]  {DAIRY, GROCERY DRY GOODS}                => {COMM BREAD}           
## [2]  {DSD GROCERY, GROCERY DRY GOODS}          => {COMM BREAD}           
## [3]  {DSD GROCERY, GROCERY DRY GOODS, PRODUCE} => {DAIRY}                
## [4]  {DAIRY, GROCERY DRY GOODS}                => {FROZEN FOODS}         
## [5]  {DSD GROCERY, GROCERY DRY GOODS}          => {FROZEN FOODS}         
## [6]  {FROZEN FOODS, GROCERY DRY GOODS}         => {DAIRY}                
## [7]  {DAIRY, DSD GROCERY}                      => {FROZEN FOODS}         
## [8]  {DAIRY, DSD GROCERY}                      => {COMM BREAD}           
## [9]  {COMM BREAD, DAIRY}                       => {GROCERY DRY GOODS}    
## [10] {DAIRY, DSD GROCERY, PRODUCE}             => {GROCERY DRY GOODS}    
## [11] {COMM BREAD, GROCERY DRY GOODS}           => {DAIRY}                
## [12] {DAIRY, FROZEN FOODS}                     => {GROCERY DRY GOODS}    
## [13] {COMM BREAD, DSD GROCERY}                 => {GROCERY DRY GOODS}    
## [14] {COMM BREAD, DSD GROCERY}                 => {DAIRY}                
## [15] {DSD GROCERY, FROZEN FOODS}               => {DAIRY}                
## [16] {GROCERY DRY GOODS, PRODUCE}              => {DAIRY}                
## [17] {DSD GROCERY, GROCERY DRY GOODS}          => {DAIRY}                
## [18] {DSD GROCERY, FROZEN FOODS}               => {GROCERY DRY GOODS}    
## [19] {DSD GROCERY, PRODUCE}                    => {DAIRY}                
## [20] {DAIRY, DSD GROCERY, GROCERY DRY GOODS}   => {PRODUCE}              
## [21] {DAIRY, DSD GROCERY}                      => {GROCERY DRY GOODS}    
## [22] {DAIRY, PRODUCE}                          => {GROCERY DRY GOODS}    
## [23] {COMM BREAD}                              => {GROCERY DRY GOODS}    
## [24] {GROCERY DRY GOODS}                       => {COMM BREAD}           
## [25] {COMM BREAD}                              => {DAIRY}                
## [26] {DAIRY}                                   => {COMM BREAD}           
## [27] {FROZEN FOODS}                            => {DAIRY}                
## [28] {DAIRY}                                   => {FROZEN FOODS}         
## [29] {DAIRY, GROCERY DRY GOODS}                => {PRODUCE}              
## [30] {DSD GROCERY, PRODUCE}                    => {GROCERY DRY GOODS}    
## [31] {DAIRY, DSD GROCERY}                      => {PRODUCE}              
## [32] {FROZEN FOODS}                            => {GROCERY DRY GOODS}    
## [33] {GROCERY DRY GOODS}                       => {FROZEN FOODS}         
## [34] {DSD GROCERY, GROCERY DRY GOODS}          => {PRODUCE}              
## [35] {COMM BREAD}                              => {PRODUCE}              
## [36] {PRODUCE}                                 => {COMM BREAD}           
## [37] {HOUSEHOLD PAPER GOODS}                   => {GROCERY DRY GOODS}    
## [38] {GROCERY DRY GOODS}                       => {HOUSEHOLD PAPER GOODS}
## [39] {DAIRY}                                   => {GROCERY DRY GOODS}    
## [40] {GROCERY DRY GOODS}                       => {DAIRY}                
## [41] {FROZEN FOODS, GROCERY DRY GOODS}         => {DSD GROCERY}          
## [42] {DAIRY, GROCERY DRY GOODS, PRODUCE}       => {DSD GROCERY}          
## [43] {FROZEN FOODS}                            => {PRODUCE}              
## [44] {PRODUCE}                                 => {FROZEN FOODS}         
## [45] {DAIRY, FROZEN FOODS}                     => {DSD GROCERY}          
## [46] {PRODUCE}                                 => {DAIRY}                
## [47] {DAIRY}                                   => {PRODUCE}              
## [48] {COMM BREAD, DAIRY}                       => {DSD GROCERY}          
## [49] {COMM BREAD, GROCERY DRY GOODS}           => {DSD GROCERY}          
## [50] {DAIRY, GROCERY DRY GOODS}                => {DSD GROCERY}          
## [51] {PRODUCE}                                 => {GROCERY DRY GOODS}    
## [52] {GROCERY DRY GOODS}                       => {PRODUCE}              
## [53] {DAIRY, PRODUCE}                          => {DSD GROCERY}          
## [54] {FROZEN FOODS}                            => {DSD GROCERY}          
## [55] {PHARMACY OTC}                            => {PERSONAL CARE}        
## [56] {PERSONAL CARE}                           => {PHARMACY OTC}         
## [57] {GROCERY DRY GOODS, PRODUCE}              => {DSD GROCERY}          
## [58] {COMM BREAD}                              => {DSD GROCERY}          
## [59] {HOUSEHOLD PAPER GOODS}                   => {DSD GROCERY}          
## [60] {DAIRY}                                   => {DSD GROCERY}          
## [61] {DSD GROCERY}                             => {DAIRY}                
## [62] {HOUSEHOLD CHEMICALS/SUPP}                => {GROCERY DRY GOODS}    
## [63] {GROCERY DRY GOODS}                       => {DSD GROCERY}          
## [64] {DSD GROCERY}                             => {GROCERY DRY GOODS}    
## [65] {DSD GROCERY}                             => {PRODUCE}              
## [66] {PRODUCE}                                 => {DSD GROCERY}          
## [67] {HOUSEHOLD CHEMICALS/SUPP}                => {DSD GROCERY}          
## [68] {DAIRY}                                   => {PERSONAL CARE}        
## [69] {PERSONAL CARE}                           => {DAIRY}                
## [70] {PERSONAL CARE}                           => {DSD GROCERY}          
## [71] {DSD GROCERY}                             => {PERSONAL CARE}        
## [72] {PERSONAL CARE}                           => {IMPULSE MERCHANDISE}  
## [73] {PERSONAL CARE}                           => {GROCERY DRY GOODS}    
## [74] {DSD GROCERY}                             => {IMPULSE MERCHANDISE}  
## [75] {IMPULSE MERCHANDISE}                     => {DSD GROCERY}          
## [76] {IMPULSE MERCHANDISE}                     => {GROCERY DRY GOODS}    
## [77] {GROCERY DRY GOODS}                       => {IMPULSE MERCHANDISE}  
## [78] {PHARMACY OTC}                            => {DSD GROCERY}          
##      support confidence coverage lift     count
## [1]  0.0560  0.4786325  0.1170   4.558405 112  
## [2]  0.0590  0.4521073  0.1305   4.305784 118  
## [3]  0.0590  0.8137931  0.0725   4.238506 118  
## [4]  0.0500  0.4273504  0.1170   4.210349 100  
## [5]  0.0545  0.4176245  0.1305   4.114527 109  
## [6]  0.0500  0.7874016  0.0635   4.101050 100  
## [7]  0.0505  0.4122449  0.1225   4.061526 101  
## [8]  0.0520  0.4244898  0.1225   4.042760 104  
## [9]  0.0560  0.8615385  0.0650   3.979392 112  
## [10] 0.0590  0.8368794  0.0705   3.865494 118  
## [11] 0.0560  0.7320261  0.0765   3.812636 112  
## [12] 0.0500  0.8064516  0.0620   3.724950 100  
## [13] 0.0590  0.8027211  0.0735   3.707719 118  
## [14] 0.0520  0.7074830  0.0735   3.684807 104  
## [15] 0.0505  0.6965517  0.0725   3.627874 101  
## [16] 0.0715  0.6908213  0.1035   3.598027 143  
## [17] 0.0900  0.6896552  0.1305   3.591954 180  
## [18] 0.0545  0.7517241  0.0725   3.472167 109  
## [19] 0.0705  0.6588785  0.1070   3.431659 141  
## [20] 0.0590  0.6555556  0.0900   3.396661 118  
## [21] 0.0900  0.7346939  0.1225   3.393505 180  
## [22] 0.0715  0.7295918  0.0980   3.369939 143  
## [23] 0.0765  0.7285714  0.1050   3.365226 153  
## [24] 0.0765  0.3533487  0.2165   3.365226 153  
## [25] 0.0650  0.6190476  0.1050   3.224206 130  
## [26] 0.0650  0.3385417  0.1920   3.224206 130  
## [27] 0.0620  0.6108374  0.1015   3.181445 124  
## [28] 0.0620  0.3229167  0.1920   3.181445 124  
## [29] 0.0715  0.6111111  0.1170   3.166379 143  
## [30] 0.0725  0.6775701  0.1070   3.129654 145  
## [31] 0.0705  0.5755102  0.1225   2.981918 141  
## [32] 0.0635  0.6256158  0.1015   2.889680 127  
## [33] 0.0635  0.2933025  0.2165   2.889680 127  
## [34] 0.0725  0.5555556  0.1305   2.878526 145  
## [35] 0.0575  0.5476190  0.1050   2.837404 115  
## [36] 0.0575  0.2979275  0.1930   2.837404 115  
## [37] 0.0565  0.6141304  0.0920   2.836630 113  
## [38] 0.0565  0.2609700  0.2165   2.836630 113  
## [39] 0.1170  0.6093750  0.1920   2.814665 234  
## [40] 0.1170  0.5404157  0.2165   2.814665 234  
## [41] 0.0545  0.8582677  0.0635   2.804796 109  
## [42] 0.0590  0.8251748  0.0715   2.696650 118  
## [43] 0.0525  0.5172414  0.1015   2.680007 105  
## [44] 0.0525  0.2720207  0.1930   2.680007 105  
## [45] 0.0505  0.8145161  0.0620   2.661817 101  
## [46] 0.0980  0.5077720  0.1930   2.644646 196  
## [47] 0.0980  0.5104167  0.1920   2.644646 196  
## [48] 0.0520  0.8000000  0.0650   2.614379 104  
## [49] 0.0590  0.7712418  0.0765   2.520398 118  
## [50] 0.0900  0.7692308  0.1170   2.513826 180  
## [51] 0.1035  0.5362694  0.1930   2.476995 207  
## [52] 0.1035  0.4780600  0.2165   2.476995 207  
## [53] 0.0705  0.7193878  0.0980   2.350940 141  
## [54] 0.0725  0.7142857  0.1015   2.334267 145  
## [55] 0.0620  0.4350877  0.1425   2.326672 124  
## [56] 0.0620  0.3315508  0.1870   2.326672 124  
## [57] 0.0725  0.7004831  0.1035   2.289160 145  
## [58] 0.0735  0.7000000  0.1050   2.287582 147  
## [59] 0.0620  0.6739130  0.0920   2.202330 124  
## [60] 0.1225  0.6380208  0.1920   2.085035 245  
## [61] 0.1225  0.4003268  0.3060   2.085035 245  
## [62] 0.0525  0.4338843  0.1210   2.004085 105  
## [63] 0.1305  0.6027714  0.2165   1.969841 261  
## [64] 0.1305  0.4264706  0.3060   1.969841 261  
## [65] 0.1070  0.3496732  0.3060   1.811778 214  
## [66] 0.1070  0.5544041  0.1930   1.811778 214  
## [67] 0.0655  0.5413223  0.1210   1.769027 131  
## [68] 0.0500  0.2604167  0.1920   1.392602 100  
## [69] 0.0500  0.2673797  0.1870   1.392602 100  
## [70] 0.0770  0.4117647  0.1870   1.345636 154  
## [71] 0.0770  0.2516340  0.3060   1.345636 154  
## [72] 0.0515  0.2754011  0.1870   1.330440 103  
## [73] 0.0515  0.2754011  0.1870   1.272060 103  
## [74] 0.0795  0.2598039  0.3060   1.255091 159  
## [75] 0.0795  0.3840580  0.2070   1.255091 159  
## [76] 0.0545  0.2632850  0.2070   1.216097 109  
## [77] 0.0545  0.2517321  0.2165   1.216097 109  
## [78] 0.0520  0.3649123  0.1425   1.192524 104
# 3Dii
ords_rules_2 <- apriori(Dept_baskets, parameter = list(support = 0.04, confidence = 0.2, minlen = 2))
## Apriori
## 
## Parameter specification:
##  confidence minval smax arem  aval originalSupport maxtime support minlen
##         0.2    0.1    1 none FALSE            TRUE       5    0.04      2
##  maxlen target  ext
##      10  rules TRUE
## 
## Algorithmic control:
##  filter tree heap memopt load sort verbose
##     0.1 TRUE TRUE  FALSE TRUE    2    TRUE
## 
## Absolute minimum support count: 80 
## 
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[66 item(s), 2000 transaction(s)] done [0.00s].
## sorting and recoding items ... [25 item(s)] done [0.00s].
## creating transaction tree ... done [0.00s].
## checking subsets of size 1 2 3 4 5 done [0.00s].
## writing ... [152 rule(s)] done [0.00s].
## creating S4 object  ... done [0.00s].
ords_rules_2
## set of 152 rules
summary(ords_rules_2)
## set of 152 rules
## 
## rule length distribution (lhs + rhs):sizes
##  2  3  4 
## 80 48 24 
## 
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   2.000   2.000   2.000   2.632   3.000   4.000 
## 
## summary of quality measures:
##     support          confidence        coverage           lift      
##  Min.   :0.04050   Min.   :0.2026   Min.   :0.0445   Min.   :1.164  
##  1st Qu.:0.04450   1st Qu.:0.3376   1st Qu.:0.0705   1st Qu.:2.266  
##  Median :0.05150   Median :0.5550   Median :0.1070   Median :2.884  
##  Mean   :0.05833   Mean   :0.5443   Mean   :0.1303   Mean   :2.969  
##  3rd Qu.:0.06388   3rd Qu.:0.7302   3rd Qu.:0.1920   3rd Qu.:3.731  
##  Max.   :0.13050   Max.   :0.9327   Max.   :0.3060   Max.   :5.471  
##      count      
##  Min.   : 81.0  
##  1st Qu.: 89.0  
##  Median :103.0  
##  Mean   :116.7  
##  3rd Qu.:127.8  
##  Max.   :261.0  
## 
## mining info:
##          data ntransactions support confidence
##  Dept_baskets          2000    0.04        0.2
##                                                                                          call
##  apriori(data = Dept_baskets, parameter = list(support = 0.04, confidence = 0.2, minlen = 2))
inspect(sort(ords_rules_2, by = "lift"))
##       lhs                           rhs                        support confidence coverage     lift count
## [1]   {DAIRY,                                                                                            
##        DSD GROCERY,                                                                                      
##        PRODUCE}                  => {COMM BREAD}                0.0405  0.5744681   0.0705 5.471125    81
## [2]   {DAIRY,                                                                                            
##        GROCERY DRY GOODS,                                                                                
##        PRODUCE}                  => {COMM BREAD}                0.0405  0.5664336   0.0715 5.394605    81
## [3]   {DSD GROCERY,                                                                                      
##        GROCERY DRY GOODS,                                                                                
##        PRODUCE}                  => {COMM BREAD}                0.0410  0.5655172   0.0725 5.385878    82
## [4]   {DAIRY,                                                                                            
##        DSD GROCERY,                                                                                      
##        GROCERY DRY GOODS}        => {COMM BREAD}                0.0485  0.5388889   0.0900 5.132275    97
## [5]   {DAIRY,                                                                                            
##        DSD GROCERY,                                                                                      
##        GROCERY DRY GOODS}        => {FROZEN FOODS}              0.0445  0.4944444   0.0900 4.871374    89
## [6]   {DAIRY,                                                                                            
##        GROCERY DRY GOODS}        => {COMM BREAD}                0.0560  0.4786325   0.1170 4.558405   112
## [7]   {COMM BREAD,                                                                                       
##        DSD GROCERY,                                                                                      
##        PRODUCE}                  => {DAIRY}                     0.0405  0.8709677   0.0465 4.536290    81
## [8]   {GROCERY DRY GOODS,                                                                                
##        PRODUCE}                  => {COMM BREAD}                0.0490  0.4734300   0.1035 4.508857    98
## [9]   {DAIRY,                                                                                            
##        PRODUCE}                  => {COMM BREAD}                0.0445  0.4540816   0.0980 4.324587    89
## [10]  {COMM BREAD,                                                                                       
##        DAIRY,                                                                                            
##        DSD GROCERY}              => {GROCERY DRY GOODS}         0.0485  0.9326923   0.0520 4.308048    97
## [11]  {DSD GROCERY,                                                                                      
##        GROCERY DRY GOODS}        => {COMM BREAD}                0.0590  0.4521073   0.1305 4.305784   118
## [12]  {COMM BREAD,                                                                                       
##        GROCERY DRY GOODS,                                                                                
##        PRODUCE}                  => {DAIRY}                     0.0405  0.8265306   0.0490 4.304847    81
## [13]  {PRE PACKED DELI}          => {DAIRY}                     0.0450  0.8256881   0.0545 4.300459    90
## [14]  {DAIRY}                    => {PRE PACKED DELI}           0.0450  0.2343750   0.1920 4.300459    90
## [15]  {COMM BREAD,                                                                                       
##        DSD GROCERY,                                                                                      
##        GROCERY DRY GOODS}        => {DAIRY}                     0.0485  0.8220339   0.0590 4.281427    97
## [16]  {DSD GROCERY,                                                                                      
##        FROZEN FOODS,                                                                                     
##        GROCERY DRY GOODS}        => {DAIRY}                     0.0445  0.8165138   0.0545 4.252676    89
## [17]  {DSD GROCERY,                                                                                      
##        GROCERY DRY GOODS,                                                                                
##        PRODUCE}                  => {DAIRY}                     0.0590  0.8137931   0.0725 4.238506   118
## [18]  {DAIRY,                                                                                            
##        PRODUCE}                  => {FROZEN FOODS}              0.0420  0.4285714   0.0980 4.222379    84
## [19]  {DAIRY,                                                                                            
##        GROCERY DRY GOODS}        => {FROZEN FOODS}              0.0500  0.4273504   0.1170 4.210349   100
## [20]  {COMM BREAD,                                                                                       
##        DAIRY,                                                                                            
##        PRODUCE}                  => {GROCERY DRY GOODS}         0.0405  0.9101124   0.0445 4.203752    81
## [21]  {FROZEN FOODS,                                                                                     
##        PRODUCE}                  => {DAIRY}                     0.0420  0.8000000   0.0525 4.166667    84
## [22]  {DSD GROCERY,                                                                                      
##        PRODUCE}                  => {COMM BREAD}                0.0465  0.4345794   0.1070 4.138852    93
## [23]  {DSD GROCERY,                                                                                      
##        GROCERY DRY GOODS}        => {FROZEN FOODS}              0.0545  0.4176245   0.1305 4.114527   109
## [24]  {FROZEN FOODS,                                                                                     
##        GROCERY DRY GOODS}        => {DAIRY}                     0.0500  0.7874016   0.0635 4.101050   100
## [25]  {COMM BREAD,                                                                                       
##        DSD GROCERY,                                                                                      
##        PRODUCE}                  => {GROCERY DRY GOODS}         0.0410  0.8817204   0.0465 4.072612    82
## [26]  {DAIRY,                                                                                            
##        DSD GROCERY,                                                                                      
##        FROZEN FOODS}             => {GROCERY DRY GOODS}         0.0445  0.8811881   0.0505 4.070153    89
## [27]  {DAIRY,                                                                                            
##        DSD GROCERY}              => {FROZEN FOODS}              0.0505  0.4122449   0.1225 4.061526   101
## [28]  {DAIRY,                                                                                            
##        DSD GROCERY}              => {COMM BREAD}                0.0520  0.4244898   0.1225 4.042760   104
## [29]  {COMM BREAD,                                                                                       
##        DAIRY,                                                                                            
##        DSD GROCERY}              => {PRODUCE}                   0.0405  0.7788462   0.0520 4.035472    81
## [30]  {COMM BREAD,                                                                                       
##        PRODUCE}                  => {DAIRY}                     0.0445  0.7739130   0.0575 4.030797    89
## [31]  {COMM BREAD,                                                                                       
##        DAIRY}                    => {GROCERY DRY GOODS}         0.0560  0.8615385   0.0650 3.979392   112
## [32]  {DSD GROCERY,                                                                                      
##        PRODUCE}                  => {FROZEN FOODS}              0.0430  0.4018692   0.1070 3.959302    86
## [33]  {COMM BREAD,                                                                                       
##        PRODUCE}                  => {GROCERY DRY GOODS}         0.0490  0.8521739   0.0575 3.936138    98
## [34]  {MEAT - FRESH & FROZEN}    => {PRODUCE}                   0.0470  0.7520000   0.0625 3.896373    94
## [35]  {PRODUCE}                  => {MEAT - FRESH & FROZEN}     0.0470  0.2435233   0.1930 3.896373    94
## [36]  {DAIRY,                                                                                            
##        DSD GROCERY,                                                                                      
##        PRODUCE}                  => {GROCERY DRY GOODS}         0.0590  0.8368794   0.0705 3.865494   118
## [37]  {COMM BREAD,                                                                                       
##        GROCERY DRY GOODS}        => {DAIRY}                     0.0560  0.7320261   0.0765 3.812636   112
## [38]  {COMM BREAD,                                                                                       
##        DAIRY,                                                                                            
##        GROCERY DRY GOODS}        => {PRODUCE}                   0.0405  0.7232143   0.0560 3.747224    81
## [39]  {DAIRY,                                                                                            
##        FROZEN FOODS}             => {GROCERY DRY GOODS}         0.0500  0.8064516   0.0620 3.724950   100
## [40]  {COMM BREAD,                                                                                       
##        DSD GROCERY}              => {GROCERY DRY GOODS}         0.0590  0.8027211   0.0735 3.707719   118
## [41]  {DSD GROCERY,                                                                                      
##        GROCERY DRY GOODS}        => {HOUSEHOLD PAPER GOODS}     0.0445  0.3409962   0.1305 3.706480    89
## [42]  {COMM BREAD,                                                                                       
##        DSD GROCERY}              => {DAIRY}                     0.0520  0.7074830   0.0735 3.684807   104
## [43]  {HOUSEHOLD PAPER GOODS}    => {HOUSEHOLD CHEMICALS/SUPP}  0.0405  0.4402174   0.0920 3.638160    81
## [44]  {HOUSEHOLD CHEMICALS/SUPP} => {HOUSEHOLD PAPER GOODS}     0.0405  0.3347107   0.1210 3.638160    81
## [45]  {DSD GROCERY,                                                                                      
##        FROZEN FOODS}             => {DAIRY}                     0.0505  0.6965517   0.0725 3.627874   101
## [46]  {MEAT - FRESH & FROZEN}    => {DAIRY}                     0.0435  0.6960000   0.0625 3.625000    87
## [47]  {DAIRY}                    => {MEAT - FRESH & FROZEN}     0.0435  0.2265625   0.1920 3.625000    87
## [48]  {COMM BREAD,                                                                                       
##        DSD GROCERY,                                                                                      
##        GROCERY DRY GOODS}        => {PRODUCE}                   0.0410  0.6949153   0.0590 3.600597    82
## [49]  {GROCERY DRY GOODS,                                                                                
##        PRODUCE}                  => {DAIRY}                     0.0715  0.6908213   0.1035 3.598027   143
## [50]  {DSD GROCERY,                                                                                      
##        GROCERY DRY GOODS}        => {DAIRY}                     0.0900  0.6896552   0.1305 3.591954   180
## [51]  {COMM BREAD,                                                                                       
##        DAIRY}                    => {PRODUCE}                   0.0445  0.6846154   0.0650 3.547230    89
## [52]  {DAIRY,                                                                                            
##        FROZEN FOODS}             => {PRODUCE}                   0.0420  0.6774194   0.0620 3.509945    84
## [53]  {DSD GROCERY,                                                                                      
##        FROZEN FOODS}             => {GROCERY DRY GOODS}         0.0545  0.7517241   0.0725 3.472167   109
## [54]  {DSD GROCERY,                                                                                      
##        PRODUCE}                  => {DAIRY}                     0.0705  0.6588785   0.1070 3.431659   141
## [55]  {MEAT - FRESH & FROZEN}    => {GROCERY DRY GOODS}         0.0460  0.7360000   0.0625 3.399538    92
## [56]  {GROCERY DRY GOODS}        => {MEAT - FRESH & FROZEN}     0.0460  0.2124711   0.2165 3.399538    92
## [57]  {DAIRY,                                                                                            
##        DSD GROCERY,                                                                                      
##        GROCERY DRY GOODS}        => {PRODUCE}                   0.0590  0.6555556   0.0900 3.396661   118
## [58]  {DAIRY,                                                                                            
##        DSD GROCERY}              => {GROCERY DRY GOODS}         0.0900  0.7346939   0.1225 3.393505   180
## [59]  {DAIRY,                                                                                            
##        PRODUCE}                  => {GROCERY DRY GOODS}         0.0715  0.7295918   0.0980 3.369939   143
## [60]  {COMM BREAD}               => {GROCERY DRY GOODS}         0.0765  0.7285714   0.1050 3.365226   153
## [61]  {GROCERY DRY GOODS}        => {COMM BREAD}                0.0765  0.3533487   0.2165 3.365226   153
## [62]  {COMM BREAD,                                                                                       
##        GROCERY DRY GOODS}        => {PRODUCE}                   0.0490  0.6405229   0.0765 3.318771    98
## [63]  {DSD GROCERY,                                                                                      
##        HOUSEHOLD PAPER GOODS}    => {GROCERY DRY GOODS}         0.0445  0.7177419   0.0620 3.315205    89
## [64]  {COMM BREAD,                                                                                       
##        DSD GROCERY}              => {PRODUCE}                   0.0465  0.6326531   0.0735 3.277995    93
## [65]  {COMM BREAD}               => {DAIRY}                     0.0650  0.6190476   0.1050 3.224206   130
## [66]  {DAIRY}                    => {COMM BREAD}                0.0650  0.3385417   0.1920 3.224206   130
## [67]  {FROZEN FOODS}             => {DAIRY}                     0.0620  0.6108374   0.1015 3.181445   124
## [68]  {DAIRY}                    => {FROZEN FOODS}              0.0620  0.3229167   0.1920 3.181445   124
## [69]  {DAIRY,                                                                                            
##        GROCERY DRY GOODS}        => {PRODUCE}                   0.0715  0.6111111   0.1170 3.166379   143
## [70]  {DSD GROCERY,                                                                                      
##        PRODUCE}                  => {GROCERY DRY GOODS}         0.0725  0.6775701   0.1070 3.129654   145
## [71]  {DSD GROCERY,                                                                                      
##        FROZEN FOODS}             => {PRODUCE}                   0.0430  0.5931034   0.0725 3.073075    86
## [72]  {DAIRY,                                                                                            
##        DSD GROCERY}              => {PRODUCE}                   0.0705  0.5755102   0.1225 2.981918   141
## [73]  {COMM BREAD,                                                                                       
##        DAIRY,                                                                                            
##        PRODUCE}                  => {DSD GROCERY}               0.0405  0.9101124   0.0445 2.974223    81
## [74]  {DAIRY,                                                                                            
##        FROZEN FOODS,                                                                                     
##        GROCERY DRY GOODS}        => {DSD GROCERY}               0.0445  0.8900000   0.0500 2.908497    89
## [75]  {FROZEN FOODS}             => {GROCERY DRY GOODS}         0.0635  0.6256158   0.1015 2.889680   127
## [76]  {GROCERY DRY GOODS}        => {FROZEN FOODS}              0.0635  0.2933025   0.2165 2.889680   127
## [77]  {DSD GROCERY,                                                                                      
##        GROCERY DRY GOODS}        => {PRODUCE}                   0.0725  0.5555556   0.1305 2.878526   145
## [78]  {PERSONAL CARE}            => {BEAUTY}                    0.0460  0.2459893   0.1870 2.860341    92
## [79]  {BEAUTY}                   => {PERSONAL CARE}             0.0460  0.5348837   0.0860 2.860341    92
## [80]  {COMM BREAD}               => {PRODUCE}                   0.0575  0.5476190   0.1050 2.837404   115
## [81]  {PRODUCE}                  => {COMM BREAD}                0.0575  0.2979275   0.1930 2.837404   115
## [82]  {HOUSEHOLD PAPER GOODS}    => {GROCERY DRY GOODS}         0.0565  0.6141304   0.0920 2.836630   113
## [83]  {GROCERY DRY GOODS}        => {HOUSEHOLD PAPER GOODS}     0.0565  0.2609700   0.2165 2.836630   113
## [84]  {COMM BREAD,                                                                                       
##        DAIRY,                                                                                            
##        GROCERY DRY GOODS}        => {DSD GROCERY}               0.0485  0.8660714   0.0560 2.830299    97
## [85]  {DAIRY}                    => {GROCERY DRY GOODS}         0.1170  0.6093750   0.1920 2.814665   234
## [86]  {GROCERY DRY GOODS}        => {DAIRY}                     0.1170  0.5404157   0.2165 2.814665   234
## [87]  {FROZEN FOODS,                                                                                     
##        GROCERY DRY GOODS}        => {DSD GROCERY}               0.0545  0.8582677   0.0635 2.804796   109
## [88]  {COMM BREAD,                                                                                       
##        GROCERY DRY GOODS,                                                                                
##        PRODUCE}                  => {DSD GROCERY}               0.0410  0.8367347   0.0490 2.734427    82
## [89]  {DAIRY,                                                                                            
##        GROCERY DRY GOODS,                                                                                
##        PRODUCE}                  => {DSD GROCERY}               0.0590  0.8251748   0.0715 2.696650   118
## [90]  {FROZEN FOODS}             => {PRODUCE}                   0.0525  0.5172414   0.1015 2.680007   105
## [91]  {PRODUCE}                  => {FROZEN FOODS}              0.0525  0.2720207   0.1930 2.680007   105
## [92]  {FROZEN FOODS,                                                                                     
##        PRODUCE}                  => {DSD GROCERY}               0.0430  0.8190476   0.0525 2.676626    86
## [93]  {DAIRY,                                                                                            
##        FROZEN FOODS}             => {DSD GROCERY}               0.0505  0.8145161   0.0620 2.661817   101
## [94]  {PRODUCE}                  => {DAIRY}                     0.0980  0.5077720   0.1930 2.644646   196
## [95]  {DAIRY}                    => {PRODUCE}                   0.0980  0.5104167   0.1920 2.644646   196
## [96]  {COMM BREAD,                                                                                       
##        PRODUCE}                  => {DSD GROCERY}               0.0465  0.8086957   0.0575 2.642796    93
## [97]  {COMM BREAD,                                                                                       
##        DAIRY}                    => {DSD GROCERY}               0.0520  0.8000000   0.0650 2.614379   104
## [98]  {GROCERY DRY GOODS,                                                                                
##        HOUSEHOLD PAPER GOODS}    => {DSD GROCERY}               0.0445  0.7876106   0.0565 2.573891    89
## [99]  {COMM BREAD,                                                                                       
##        GROCERY DRY GOODS}        => {DSD GROCERY}               0.0590  0.7712418   0.0765 2.520398   118
## [100] {DAIRY}                    => {HOUSEHOLD PAPER GOODS}     0.0445  0.2317708   0.1920 2.519248    89
## [101] {HOUSEHOLD PAPER GOODS}    => {DAIRY}                     0.0445  0.4836957   0.0920 2.519248    89
## [102] {DAIRY,                                                                                            
##        GROCERY DRY GOODS}        => {DSD GROCERY}               0.0900  0.7692308   0.1170 2.513826   180
## [103] {PRE PACKED DELI}          => {DSD GROCERY}               0.0415  0.7614679   0.0545 2.488457    83
## [104] {PRODUCE}                  => {GROCERY DRY GOODS}         0.1035  0.5362694   0.1930 2.476995   207
## [105] {GROCERY DRY GOODS}        => {PRODUCE}                   0.1035  0.4780600   0.2165 2.476995   207
## [106] {DAIRY,                                                                                            
##        PRODUCE}                  => {DSD GROCERY}               0.0705  0.7193878   0.0980 2.350940   141
## [107] {FROZEN FOODS}             => {DSD GROCERY}               0.0725  0.7142857   0.1015 2.334267   145
## [108] {DSD GROCERY}              => {FROZEN FOODS}              0.0725  0.2369281   0.3060 2.334267   145
## [109] {PHARMACY OTC}             => {PERSONAL CARE}             0.0620  0.4350877   0.1425 2.326672   124
## [110] {PERSONAL CARE}            => {PHARMACY OTC}              0.0620  0.3315508   0.1870 2.326672   124
## [111] {MEAT - FRESH & FROZEN}    => {DSD GROCERY}               0.0440  0.7040000   0.0625 2.300654    88
## [112] {GROCERY DRY GOODS,                                                                                
##        PRODUCE}                  => {DSD GROCERY}               0.0725  0.7004831   0.1035 2.289160   145
## [113] {DSD GROCERY}              => {COMM BREAD}                0.0735  0.2401961   0.3060 2.287582   147
## [114] {COMM BREAD}               => {DSD GROCERY}               0.0735  0.7000000   0.1050 2.287582   147
## [115] {HOUSEHOLD PAPER GOODS}    => {DSD GROCERY}               0.0620  0.6739130   0.0920 2.202330   124
## [116] {DSD GROCERY}              => {HOUSEHOLD PAPER GOODS}     0.0620  0.2026144   0.3060 2.202330   124
## [117] {HOUSEHOLD CHEMICALS/SUPP} => {DAIRY}                     0.0490  0.4049587   0.1210 2.109160    98
## [118] {DAIRY}                    => {HOUSEHOLD CHEMICALS/SUPP}  0.0490  0.2552083   0.1920 2.109160    98
## [119] {DAIRY}                    => {DSD GROCERY}               0.1225  0.6380208   0.1920 2.085035   245
## [120] {DSD GROCERY}              => {DAIRY}                     0.1225  0.4003268   0.3060 2.085035   245
## [121] {CANDY, TOBACCO, COOKIES}  => {DSD GROCERY}               0.0430  0.6142857   0.0700 2.007470    86
## [122] {HOUSEHOLD CHEMICALS/SUPP} => {GROCERY DRY GOODS}         0.0525  0.4338843   0.1210 2.004085   105
## [123] {GROCERY DRY GOODS}        => {HOUSEHOLD CHEMICALS/SUPP}  0.0525  0.2424942   0.2165 2.004085   105
## [124] {GROCERY DRY GOODS}        => {DSD GROCERY}               0.1305  0.6027714   0.2165 1.969841   261
## [125] {DSD GROCERY}              => {GROCERY DRY GOODS}         0.1305  0.4264706   0.3060 1.969841   261
## [126] {PERSONAL CARE}            => {HOUSEHOLD CHEMICALS/SUPP}  0.0420  0.2245989   0.1870 1.856190    84
## [127] {HOUSEHOLD CHEMICALS/SUPP} => {PERSONAL CARE}             0.0420  0.3471074   0.1210 1.856190    84
## [128] {DSD GROCERY}              => {PRODUCE}                   0.1070  0.3496732   0.3060 1.811778   214
## [129] {PRODUCE}                  => {DSD GROCERY}               0.1070  0.5544041   0.1930 1.811778   214
## [130] {HOUSEHOLD CHEMICALS/SUPP} => {DSD GROCERY}               0.0655  0.5413223   0.1210 1.769027   131
## [131] {DSD GROCERY}              => {HOUSEHOLD CHEMICALS/SUPP}  0.0655  0.2140523   0.3060 1.769027   131
## [132] {HOUSEHOLD CHEMICALS/SUPP} => {PRODUCE}                   0.0405  0.3347107   0.1210 1.734253    81
## [133] {PRODUCE}                  => {HOUSEHOLD CHEMICALS/SUPP}  0.0405  0.2098446   0.1930 1.734253    81
## [134] {DAIRY}                    => {PERSONAL CARE}             0.0500  0.2604167   0.1920 1.392602   100
## [135] {PERSONAL CARE}            => {DAIRY}                     0.0500  0.2673797   0.1870 1.392602   100
## [136] {PERSONAL CARE}            => {DSD GROCERY}               0.0770  0.4117647   0.1870 1.345636   154
## [137] {DSD GROCERY}              => {PERSONAL CARE}             0.0770  0.2516340   0.3060 1.345636   154
## [138] {IMPULSE MERCHANDISE}      => {PERSONAL CARE}             0.0515  0.2487923   0.2070 1.330440   103
## [139] {PERSONAL CARE}            => {IMPULSE MERCHANDISE}       0.0515  0.2754011   0.1870 1.330440   103
## [140] {PERSONAL CARE}            => {PRODUCE}                   0.0475  0.2540107   0.1870 1.316118    95
## [141] {PRODUCE}                  => {PERSONAL CARE}             0.0475  0.2461140   0.1930 1.316118    95
## [142] {GROCERY DRY GOODS}        => {PERSONAL CARE}             0.0515  0.2378753   0.2165 1.272060   103
## [143] {PERSONAL CARE}            => {GROCERY DRY GOODS}         0.0515  0.2754011   0.1870 1.272060   103
## [144] {DSD GROCERY}              => {IMPULSE MERCHANDISE}       0.0795  0.2598039   0.3060 1.255091   159
## [145] {IMPULSE MERCHANDISE}      => {DSD GROCERY}               0.0795  0.3840580   0.2070 1.255091   159
## [146] {IMPULSE MERCHANDISE}      => {DAIRY}                     0.0485  0.2342995   0.2070 1.220310    97
## [147] {DAIRY}                    => {IMPULSE MERCHANDISE}       0.0485  0.2526042   0.1920 1.220310    97
## [148] {IMPULSE MERCHANDISE}      => {GROCERY DRY GOODS}         0.0545  0.2632850   0.2070 1.216097   109
## [149] {GROCERY DRY GOODS}        => {IMPULSE MERCHANDISE}       0.0545  0.2517321   0.2165 1.216097   109
## [150] {PHARMACY OTC}             => {DSD GROCERY}               0.0520  0.3649123   0.1425 1.192524   104
## [151] {IMPULSE MERCHANDISE}      => {PRODUCE}                   0.0465  0.2246377   0.2070 1.163926    93
## [152] {PRODUCE}                  => {IMPULSE MERCHANDISE}       0.0465  0.2409326   0.1930 1.163926    93

4 Reflection

The K-means clustering model revealed distinct clusters based on customer purchasing behavior. Instead of random initial centroids, adjusting the initial cluster assignment to k-means++ initializes centroids strategically to improve convergence. The model was improved by lowering the within cluster sum of squared errors from 3892.61 to 670.80, while increasing the number of iterations from 20 to 38.

Whereas, changing the distance function from Euclidean distance to Manhattan distance in the Walmart data set decreased the number of iterations from 20 to 15 (less defined). The sum of within cluster distances also increased from 3892.61 to 5245.51. Since the performance of the clusters was worsen, it probably implies that the data set has a linear shape.

Adjustments to support and confidence thresholds in association rule mining influenced the number and strength of discovered rules. In the model with 0.05 support and 0.25 confidence, I obtained a set of 78 rules. By slightly lowering both support and confidence to 0.04 and 0.2 respectively, I managed to obtain a set of 152 rules without sacrificing the strength of the rule significantly.