# 1A
library(C50)
library(psych)
library(RWeka)
library(caret)
## Loading required package: ggplot2
##
## Attaching package: 'ggplot2'
## The following objects are masked from 'package:psych':
##
## %+%, alpha
## Loading required package: lattice
library(rminer)
## Warning in rgl.init(initValue, onlyNULL): RGL: unable to open X11 display
## Warning: 'rgl.init' failed, running with 'rgl.useNULL = TRUE'.
library(matrixStats)
library(knitr)
library(arules)
## Loading required package: Matrix
##
## Attaching package: 'arules'
## The following objects are masked from 'package:base':
##
## abbreviate, write
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.1 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ lubridate 1.9.2 ✔ tibble 3.2.1
## ✔ purrr 1.0.1 ✔ tidyr 1.3.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ ggplot2::%+%() masks psych::%+%()
## ✖ ggplot2::alpha() masks psych::alpha()
## ✖ dplyr::count() masks matrixStats::count()
## ✖ tidyr::expand() masks Matrix::expand()
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ✖ purrr::lift() masks caret::lift()
## ✖ tidyr::pack() masks Matrix::pack()
## ✖ dplyr::recode() masks arules::recode()
## ✖ tidyr::unpack() masks Matrix::unpack()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
mydir <- getwd()
setwd(mydir)
wm <- read.csv(file = "Walmart_visits_7trips.csv", stringsAsFactors = FALSE)
str(wm)
## 'data.frame': 12734 obs. of 9 variables:
## $ TripType : int 999 999 999 999 999 999 999 999 999 999 ...
## $ DOW : chr "Friday" "Friday" "Friday" "Friday" ...
## $ UniqueItems : int 1 2 1 1 2 1 1 1 1 1 ...
## $ TotalQty : int 0 1 0 0 3 0 0 0 1 0 ...
## $ RtrnQty : int 1 1 1 1 0 1 1 1 0 1 ...
## $ NetQty : int -1 0 -1 -1 3 -1 -1 -1 1 -1 ...
## $ UniqDepts : int 1 1 1 1 2 1 1 1 1 1 ...
## $ OneItemDepts: int 1 0 1 1 2 1 1 1 1 1 ...
## $ RtrnDepts : int 1 1 1 1 0 1 1 1 0 1 ...
wm$TripType <- factor(wm$TripType)
wm$DOW <- factor(wm$DOW)
summary(wm)
## TripType DOW UniqueItems TotalQty
## 5 :1106 Friday :1851 Min. : 1.000 Min. : 0.000
## 7 :1305 Monday :1765 1st Qu.: 1.000 1st Qu.: 1.000
## 8 :2808 Saturday :1923 Median : 2.000 Median : 3.000
## 9 :2046 Sunday :2014 Mean : 6.203 Mean : 7.185
## 39 :2229 Thursday :1738 3rd Qu.: 7.000 3rd Qu.: 8.000
## 40 :1373 Tuesday :1703 Max. :113.000 Max. :137.000
## 999:1867 Wednesday:1740
## RtrnQty NetQty UniqDepts OneItemDepts
## Min. : 0.0000 Min. :-52.000 Min. : 1.000 Min. : 0.000
## 1st Qu.: 0.0000 1st Qu.: 1.000 1st Qu.: 1.000 1st Qu.: 1.000
## Median : 0.0000 Median : 2.000 Median : 2.000 Median : 1.000
## Mean : 0.2431 Mean : 6.942 Mean : 3.194 Mean : 1.918
## 3rd Qu.: 0.0000 3rd Qu.: 8.000 3rd Qu.: 4.000 3rd Qu.: 3.000
## Max. :52.0000 Max. :137.000 Max. :21.000 Max. :12.000
##
## RtrnDepts
## Min. : 0.0000
## 1st Qu.: 0.0000
## Median : 0.0000
## Mean : 0.1756
## 3rd Qu.: 0.0000
## Max. :10.0000
##
# 1B
wm %>% select(where(is.numeric)) %>% pairs.panels() # high correlation: totalqty = netqty; uniqueitems ~ totalqty, netqty, uniqdepts; rtnqty ~ rtrndepts; uniqdepts ~ oneitemdepts
# 1C
model_tree1 <- C5.0(formula = TripType ~., control = C5.0Control(CF=.2), data = wm)
model_tree1
##
## Call:
## C5.0.formula(formula = TripType ~ ., data = wm, control = C5.0Control(CF = 0.2))
##
## Classification Tree
## Number of samples: 12734
## Number of predictors: 8
##
## Tree size: 13
##
## Non-standard options: attempt to group attributes, confidence level: 0.2
tree1_prediction <- predict(model_tree1, wm)
mmetric(wm$TripType, tree1_prediction, metric="CONF")
## $res
## NULL
##
## $conf
## pred
## target 5 7 8 9 39 40 999
## 5 45 58 761 2 228 12 0
## 7 20 116 673 1 491 4 0
## 8 4 21 2746 15 22 0 0
## 9 0 11 1993 25 15 0 2
## 39 17 61 7 0 2101 43 0
## 40 0 2 0 0 53 1318 0
## 999 4 26 421 1 57 0 1358
##
## $roc
## NULL
##
## $lift
## NULL
# 2A
TripType.levels <- length(unique(wm))
wm_train <- wm[,-1]
# 2B
nClusters <- TripType.levels
wm_clustering1 <- SimpleKMeans(wm_train, Weka_control(N = nClusters, V=TRUE))
wm_clustering1
##
## kMeans
## ======
##
## Number of iterations: 20
## Within cluster sum of squared errors: 3892.6037859528574
##
## Initial starting points (random):
##
## Cluster 0: Friday,4,0,4,-4,2,1,2
## Cluster 1: Wednesday,2,2,0,2,2,2,0
## Cluster 2: Thursday,1,0,1,-1,1,1,1
## Cluster 3: Wednesday,1,1,0,1,1,1,0
## Cluster 4: Sunday,1,1,1,0,1,1,1
## Cluster 5: Saturday,5,5,0,5,3,2,0
## Cluster 6: Friday,4,4,0,4,4,4,0
## Cluster 7: Saturday,2,1,1,0,2,2,1
## Cluster 8: Saturday,24,25,0,25,10,5,0
##
## Missing values globally replaced with mean/mode
##
## Final cluster centroids:
## Cluster#
## Attribute Full Data 0 1 2 3 4 5 6 7 8
## (12734.0) (1715.0) (671.0) (1984.0) (2558.0) (2342.0) (1648.0) (546.0) (753.0) (517.0)
## ======================================================================================================================================
## DOW Sunday Friday Wednesday Thursday Wednesday Sunday Saturday Friday Saturday Saturday
## Friday 1851.0 ( 14%)1476.0 ( 86%) 0.0 ( 0%) 0.0 ( 0%) 0.0 ( 0%) 0.0 ( 0%) 0.0 ( 0%)375.0 ( 68%) 0.0 ( 0%) 0.0 ( 0%)
## Monday 1765.0 ( 13%)122.0 ( 7%)153.0 ( 22%)119.0 ( 5%)563.0 ( 22%)169.0 ( 7%)221.0 ( 13%)100.0 ( 18%)206.0 ( 27%)112.0 ( 21%)
## Saturday 1923.0 ( 15%) 0.0 ( 0%) 0.0 ( 0%) 0.0 ( 0%) 0.0 ( 0%) 0.0 ( 0%)1239.0 ( 75%) 0.0 ( 0%)355.0 ( 47%)329.0 ( 63%)
## Sunday 2014.0 ( 15%) 0.0 ( 0%) 0.0 ( 0%) 0.0 ( 0%) 0.0 ( 0%)2014.0 ( 85%) 0.0 ( 0%) 0.0 ( 0%) 0.0 ( 0%) 0.0 ( 0%)
## Thursday 1738.0 ( 13%) 0.0 ( 0%) 0.0 ( 0%)1737.0 ( 87%) 0.0 ( 0%) 0.0 ( 0%) 0.0 ( 0%) 0.0 ( 0%) 0.0 ( 0%) 1.0 ( 0%)
## Tuesday 1703.0 ( 13%)117.0 ( 6%)154.0 ( 22%)128.0 ( 6%)619.0 ( 24%)159.0 ( 6%)188.0 ( 11%) 71.0 ( 13%)192.0 ( 25%) 75.0 ( 14%)
## Wednesday 1740.0 ( 13%) 0.0 ( 0%)364.0 ( 54%) 0.0 ( 0%)1376.0 ( 53%) 0.0 ( 0%) 0.0 ( 0%) 0.0 ( 0%) 0.0 ( 0%) 0.0 ( 0%)
##
## UniqueItems 6.2033 2.6303 13.5618 5.3075 1.9277 7.8715 3.3101 19.3443 1.8141 27.2785
## +/-9.1936 +/-2.1792 +/-8.9262 +/-7.9214 +/-1.7514 +/-10.0825 +/-3.5615 +/-10.0707 +/-1.432 +/-14.0202
##
## TotalQty 7.1849 2.9114 15.8003 6.1749 2.1388 9.1721 3.8501 23.5604 1.0624 32.2727
## +/-11.2831 +/-3.0489 +/-11.2094 +/-10.0334 +/-2.3375 +/-12.2871 +/-4.5658 +/-12.5716 +/-1.6822 +/-17.3518
##
## RtrnQty 0.2431 0.2099 0.2206 0.2092 0.1357 0.2293 0.0492 0.1447 1.3506 0.2166
## +/-0.9149 +/-0.7191 +/-2.1109 +/-0.6344 +/-0.6913 +/-0.8058 +/-0.2711 +/-0.4951 +/-1.5503 +/-0.5529
##
## NetQty 6.9417 2.7015 15.5797 5.9657 2.0031 8.9428 3.801 23.4158 -0.2882 32.0561
## +/-11.3513 +/-3.237 +/-11.4523 +/-10.0591 +/-2.4346 +/-12.3413 +/-4.5528 +/-12.5337 +/-2.3702 +/-17.2789
##
## UniqDepts 3.1938 1.7749 7.2519 2.9451 1.3804 3.8412 2.3161 7.9652 1.1647 10.3424
## +/-3.0952 +/-0.9501 +/-2.1753 +/-2.6343 +/-0.7738 +/-3.2574 +/-1.4104 +/-2.3085 +/-0.4777 +/-2.6209
##
## OneItemDepts 1.9179 1.1936 4.4993 1.9108 0.9992 2.1426 1.7779 3.7033 0.7198 4.8298
## +/-1.6439 +/-0.8126 +/-1.396 +/-1.4844 +/-0.6907 +/-1.6648 +/-0.8763 +/-1.7442 +/-0.5833 +/-1.8837
##
## RtrnDepts 0.1756 0.1481 0.1252 0.1658 0.1001 0.1691 0.0407 0.1209 0.9163 0.1818
## +/-0.4553 +/-0.4144 +/-0.5565 +/-0.4263 +/-0.3439 +/-0.4681 +/-0.2006 +/-0.3481 +/-0.5989 +/-0.4334
table(predict(wm_clustering1), wm$TripType)
##
## 5 7 8 9 39 40 999
## 0 191 212 524 353 147 11 277
## 1 22 60 1 0 422 157 9
## 2 202 237 437 341 356 145 266
## 3 337 270 891 598 115 6 341
## 4 140 263 400 329 612 321 277
## 5 180 220 495 371 253 25 104
## 6 7 18 0 0 199 319 3
## 7 21 21 60 54 7 0 590
## 8 6 4 0 0 118 389 0
# 2C
wm_clustering2 <- SimpleKMeans(wm_train, Weka_control(N = nClusters, init = 1, V=TRUE))
wm_clustering2
##
## kMeans
## ======
##
## Number of iterations: 38
## Within cluster sum of squared errors: 670.7999502003128
##
## Initial starting points (k-means++):
##
## Cluster 0: Friday,4,0,4,-4,2,1,2
## Cluster 1: Wednesday,3,3,0,3,3,3,0
## Cluster 2: Tuesday,1,1,0,1,1,1,0
## Cluster 3: Sunday,3,4,0,4,3,3,0
## Cluster 4: Sunday,1,1,0,1,1,1,0
## Cluster 5: Saturday,3,3,0,3,3,3,0
## Cluster 6: Sunday,7,7,0,7,5,3,0
## Cluster 7: Thursday,12,16,0,16,9,8,0
## Cluster 8: Monday,2,2,0,2,2,2,0
##
## Missing values globally replaced with mean/mode
##
## Final cluster centroids:
## Cluster#
## Attribute Full Data 0 1 2 3 4 5 6 7 8
## (12734.0) (1851.0) (1738.0) (1699.0) (509.0) (1393.0) (1915.0) (134.0) (1735.0) (1760.0)
## ======================================================================================================================================
## DOW Sunday Friday Wednesday Tuesday Sunday Sunday Saturday Sunday Thursday Monday
## Friday 1851.0 ( 14%)1851.0 (100%) 0.0 ( 0%) 0.0 ( 0%) 0.0 ( 0%) 0.0 ( 0%) 0.0 ( 0%) 0.0 ( 0%) 0.0 ( 0%) 0.0 ( 0%)
## Monday 1765.0 ( 13%) 0.0 ( 0%) 0.0 ( 0%) 0.0 ( 0%) 0.0 ( 0%) 0.0 ( 0%) 0.0 ( 0%) 5.0 ( 3%) 0.0 ( 0%)1760.0 (100%)
## Saturday 1923.0 ( 15%) 0.0 ( 0%) 0.0 ( 0%) 0.0 ( 0%) 0.0 ( 0%) 0.0 ( 0%)1915.0 (100%) 8.0 ( 5%) 0.0 ( 0%) 0.0 ( 0%)
## Sunday 2014.0 ( 15%) 0.0 ( 0%) 0.0 ( 0%) 0.0 ( 0%)509.0 (100%)1393.0 (100%) 0.0 ( 0%)112.0 ( 83%) 0.0 ( 0%) 0.0 ( 0%)
## Thursday 1738.0 ( 13%) 0.0 ( 0%) 0.0 ( 0%) 0.0 ( 0%) 0.0 ( 0%) 0.0 ( 0%) 0.0 ( 0%) 3.0 ( 2%)1735.0 (100%) 0.0 ( 0%)
## Tuesday 1703.0 ( 13%) 0.0 ( 0%) 0.0 ( 0%)1699.0 (100%) 0.0 ( 0%) 0.0 ( 0%) 0.0 ( 0%) 4.0 ( 2%) 0.0 ( 0%) 0.0 ( 0%)
## Wednesday 1740.0 ( 13%) 0.0 ( 0%)1738.0 (100%) 0.0 ( 0%) 0.0 ( 0%) 0.0 ( 0%) 0.0 ( 0%) 2.0 ( 1%) 0.0 ( 0%) 0.0 ( 0%)
##
## UniqueItems 6.2033 5.7736 5.2215 5.3561 14.725 2.4982 6.5206 44.7388 5.4277 6.396
## +/-9.1936 +/-8.5904 +/-7.4773 +/-7.754 +/-7.0131 +/-2.2992 +/-8.7791 +/-15.5911 +/-8.1339 +/-9.3561
##
## TotalQty 7.1849 6.7634 5.9591 6.1489 17.2947 2.7186 7.4966 53.3731 6.3256 7.4409
## +/-11.2831 +/-10.6401 +/-9.1206 +/-9.515 +/-8.8277 +/-3.101 +/-10.667 +/-20.5066 +/-10.3289 +/-11.4258
##
## RtrnQty 0.2431 0.222 0.2371 0.2596 0.1297 0.2757 0.2564 0.3657 0.2294 0.2523
## +/-0.9149 +/-0.7135 +/-0.9049 +/-1.4759 +/-0.4726 +/-0.909 +/-0.8635 +/-0.8889 +/-0.6675 +/-0.7592
##
## NetQty 6.9417 6.5413 5.7221 5.8893 17.165 2.4429 7.2402 53.0075 6.0963 7.1886
## +/-11.3513 +/-10.7088 +/-9.1775 +/-9.7114 +/-8.832 +/-3.3181 +/-10.7564 +/-20.439 +/-10.363 +/-11.4812
##
## UniqDepts 3.1938 3.013 2.9143 2.9253 7.1768 1.7215 3.3446 13.3881 2.9216 3.2608
## +/-3.0952 +/-2.9741 +/-2.7833 +/-2.7515 +/-1.9094 +/-0.987 +/-3.1389 +/-2.5866 +/-2.7912 +/-3.1382
##
## OneItemDepts 1.9179 1.8293 1.8354 1.8293 3.8743 1.2247 1.965 5.2239 1.8282 1.946
## +/-1.6439 +/-1.6318 +/-1.5481 +/-1.5985 +/-1.5433 +/-0.8133 +/-1.6553 +/-2.0024 +/-1.549 +/-1.6471
##
## RtrnDepts 0.1756 0.1621 0.1743 0.1654 0.11 0.1974 0.1796 0.2836 0.1804 0.1852
## +/-0.4553 +/-0.427 +/-0.4503 +/-0.4759 +/-0.3761 +/-0.4904 +/-0.4686 +/-0.5831 +/-0.4432 +/-0.4445
table(predict(wm_clustering2), wm$TripType)
##
## 5 7 8 9 39 40 999
## 0 167 185 435 317 283 193 271
## 1 184 179 420 281 286 140 248
## 2 142 184 397 258 304 147 267
## 3 10 38 3 1 276 177 4
## 4 113 168 393 327 124 6 262
## 5 171 176 377 318 365 227 281
## 6 0 0 0 0 0 134 0
## 7 170 189 379 310 293 143 251
## 8 149 186 404 234 298 206 283
# 2D
wm_clustering3 <- SimpleKMeans(wm_train, Weka_control(N = nClusters, A="weka.core.ManhattanDistance", init = 1, V=TRUE))
wm_clustering3
##
## kMeans
## ======
##
## Number of iterations: 15
## Sum of within cluster distances: 5245.5138888146075
##
## Initial starting points (k-means++):
##
## Cluster 0: Friday,4,0,4,-4,2,1,2
## Cluster 1: Wednesday,2,3,0,3,2,2,0
## Cluster 2: Monday,2,2,0,2,2,2,0
## Cluster 3: Sunday,1,1,0,1,1,1,0
## Cluster 4: Friday,2,2,0,2,1,0,0
## Cluster 5: Thursday,25,26,0,26,9,3,0
## Cluster 6: Sunday,9,10,0,10,7,6,0
## Cluster 7: Wednesday,10,13,0,13,6,3,0
## Cluster 8: Tuesday,2,2,0,2,2,2,0
##
## Missing values globally replaced with mean/mode
##
## Final cluster centroids:
## Cluster#
## Attribute Full Data 0 1 2 3 4 5 6 7 8
## (12734.0) (460.0) (1528.0) (2069.0) (1924.0) (1716.0) (1673.0) (967.0) (746.0) (1651.0)
## ======================================================================================================================================
## DOW Sunday Friday Wednesday Monday Sunday Friday Thursday Sunday Wednesday Tuesday
## Friday 1851.0 ( 14%)256.0 ( 55%) 0.0 ( 0%) 0.0 ( 0%) 0.0 ( 0%)1528.0 ( 89%) 0.0 ( 0%) 67.0 ( 6%) 0.0 ( 0%) 0.0 ( 0%)
## Monday 1765.0 ( 13%) 0.0 ( 0%) 0.0 ( 0%)1696.0 ( 81%) 0.0 ( 0%) 0.0 ( 0%) 0.0 ( 0%) 69.0 ( 7%) 0.0 ( 0%) 0.0 ( 0%)
## Saturday 1923.0 ( 15%)204.0 ( 44%)212.0 ( 13%)373.0 ( 18%)402.0 ( 20%)188.0 ( 10%) 0.0 ( 0%)222.0 ( 22%)322.0 ( 43%) 0.0 ( 0%)
## Sunday 2014.0 ( 15%) 0.0 ( 0%) 0.0 ( 0%) 0.0 ( 0%)1522.0 ( 79%) 0.0 ( 0%) 0.0 ( 0%)492.0 ( 50%) 0.0 ( 0%) 0.0 ( 0%)
## Thursday 1738.0 ( 13%) 0.0 ( 0%) 0.0 ( 0%) 0.0 ( 0%) 0.0 ( 0%) 0.0 ( 0%)1673.0 (100%) 65.0 ( 6%) 0.0 ( 0%) 0.0 ( 0%)
## Tuesday 1703.0 ( 13%) 0.0 ( 0%) 0.0 ( 0%) 0.0 ( 0%) 0.0 ( 0%) 0.0 ( 0%) 0.0 ( 0%) 52.0 ( 5%) 0.0 ( 0%)1651.0 (100%)
## Wednesday 1740.0 ( 13%) 0.0 ( 0%)1316.0 ( 86%) 0.0 ( 0%) 0.0 ( 0%) 0.0 ( 0%) 0.0 ( 0%) 0.0 ( 0%)424.0 ( 56%) 0.0 ( 0%)
##
## UniqueItems 2 1 2 2 1 3 2 25 11 2
## +/-9.1936 +/-5.5797 +/-1.6121 +/-6.3293 +/-2.5128 +/-6.1771 +/-5.9478 +/-13.5605 +/-8.6756 +/-6.0858
##
## TotalQty 3 0 2 2 1 3 2 29 13 2
## +/-11.2831 +/-7.1053 +/-2.9168 +/-7.9187 +/-3.2492 +/-7.7884 +/-7.8594 +/-16.772 +/-10.6668 +/-7.6701
##
## RtrnQty 0 1 0 0 0 0 0 0 0 0
## +/-0.9149 +/-1.5016 +/-0.8368 +/-0.7421 +/-0.7959 +/-0 +/-0.6599 +/-0.6057 +/-0.7473 +/-1.4931
##
## NetQty 2 -1 2 2 1 3 2 29 13 2
## +/-11.3513 +/-7.3248 +/-3.0505 +/-7.9864 +/-3.3956 +/-7.7884 +/-7.9228 +/-16.7026 +/-10.6468 +/-7.9208
##
## UniqDepts 2 1 1 2 1 2 2 10 6 2
## +/-3.0952 +/-1.8163 +/-0.7218 +/-2.3623 +/-1.256 +/-2.3203 +/-2.2436 +/-2.825 +/-2.2901 +/-2.339
##
## OneItemDepts 1 1 1 2 1 1 1 4 4 1
## +/-1.6439 +/-1.1072 +/-0.7781 +/-1.4008 +/-0.9345 +/-1.4298 +/-1.3976 +/-1.8225 +/-1.6043 +/-1.4851
##
## RtrnDepts 0 1 0 0 0 0 0 0 0 0
## +/-0.4553 +/-0.4818 +/-0.4219 +/-0.4369 +/-0.4451 +/-0 +/-0.439 +/-0.4246 +/-0.4139 +/-0.476
table(predict(wm_clustering3), wm$TripType)
##
## 5 7 8 9 39 40 999
## 0 6 11 12 13 23 13 382
## 1 206 166 491 330 69 3 263
## 2 175 249 516 324 346 144 315
## 3 179 224 549 458 215 7 292
## 4 192 206 462 350 301 116 89
## 5 170 189 379 310 293 81 251
## 6 4 14 0 0 205 743 1
## 7 32 62 2 3 473 167 7
## 8 142 184 397 258 304 99 267
# 2E
wm_clustering4 <- SimpleKMeans(wm_train, Weka_control(N = nClusters, A="weka.core.ManhattanDistance", V=TRUE))
wm_clustering4
##
## kMeans
## ======
##
## Number of iterations: 10
## Sum of within cluster distances: 6405.095046812456
##
## Initial starting points (random):
##
## Cluster 0: Friday,4,0,4,-4,2,1,2
## Cluster 1: Wednesday,2,2,0,2,2,2,0
## Cluster 2: Thursday,1,0,1,-1,1,1,1
## Cluster 3: Wednesday,1,1,0,1,1,1,0
## Cluster 4: Sunday,1,1,1,0,1,1,1
## Cluster 5: Saturday,5,5,0,5,3,2,0
## Cluster 6: Friday,4,4,0,4,4,4,0
## Cluster 7: Saturday,2,1,1,0,2,2,1
## Cluster 8: Saturday,24,25,0,25,10,5,0
##
## Missing values globally replaced with mean/mode
##
## Final cluster centroids:
## Cluster#
## Attribute Full Data 0 1 2 3 4 5 6 7 8
## (12734.0) (2466.0) (1363.0) (2091.0) (777.0) (2160.0) (1198.0) (983.0) (926.0) (770.0)
## ======================================================================================================================================
## DOW Sunday Friday Wednesday Thursday Wednesday Sunday Saturday Friday Saturday Saturday
## Friday 1851.0 ( 14%)1384.0 ( 56%) 0.0 ( 0%) 0.0 ( 0%) 0.0 ( 0%) 0.0 ( 0%) 0.0 ( 0%)467.0 ( 47%) 0.0 ( 0%) 0.0 ( 0%)
## Monday 1765.0 ( 13%)535.0 ( 21%)232.0 ( 17%)197.0 ( 9%) 0.0 ( 0%)128.0 ( 5%)245.0 ( 20%)269.0 ( 27%) 0.0 ( 0%)159.0 ( 20%)
## Saturday 1923.0 ( 15%) 0.0 ( 0%) 0.0 ( 0%) 0.0 ( 0%) 0.0 ( 0%) 0.0 ( 0%)701.0 ( 58%) 0.0 ( 0%)926.0 (100%)296.0 ( 38%)
## Sunday 2014.0 ( 15%) 0.0 ( 0%) 0.0 ( 0%) 0.0 ( 0%) 0.0 ( 0%)1905.0 ( 88%) 0.0 ( 0%) 0.0 ( 0%) 0.0 ( 0%)109.0 ( 14%)
## Thursday 1738.0 ( 13%) 0.0 ( 0%) 0.0 ( 0%)1671.0 ( 79%) 0.0 ( 0%) 0.0 ( 0%) 0.0 ( 0%) 0.0 ( 0%) 0.0 ( 0%) 67.0 ( 8%)
## Tuesday 1703.0 ( 13%)547.0 ( 22%)200.0 ( 14%)223.0 ( 10%) 0.0 ( 0%)127.0 ( 5%)252.0 ( 21%)247.0 ( 25%) 0.0 ( 0%)107.0 ( 13%)
## Wednesday 1740.0 ( 13%) 0.0 ( 0%)931.0 ( 68%) 0.0 ( 0%)777.0 (100%) 0.0 ( 0%) 0.0 ( 0%) 0.0 ( 0%) 0.0 ( 0%) 32.0 ( 4%)
##
## UniqueItems 2 1 3 2 1 3 5 12 1 27
## +/-9.1936 +/-1.349 +/-6.7144 +/-5.4052 +/-0.6958 +/-7.6066 +/-3.9263 +/-8.71 +/-0.9338 +/-13.3238
##
## TotalQty 3 1 3 2 1 3 5 14 1 32
## +/-11.2831 +/-1.9959 +/-8.3553 +/-7.1149 +/-1.1059 +/-9.489 +/-5.1685 +/-10.9461 +/-1.8209 +/-16.4722
##
## RtrnQty 0 0 0 0 0 0 0 0 0 0
## +/-0.9149 +/-0.8294 +/-0.7236 +/-0.6307 +/-1.0745 +/-0.8717 +/-0.9267 +/-0.5479 +/-0.7862 +/-1.9536
##
## NetQty 2 1 3 2 1 3 5 14 1 32
## +/-11.3513 +/-2.3103 +/-8.3806 +/-7.1689 +/-1.3681 +/-9.5692 +/-5.3275 +/-10.9287 +/-2.1299 +/-16.6532
##
## UniqDepts 2 1 2 1 1 2 3 6 1 10
## +/-3.0952 +/-0.6246 +/-2.3827 +/-2.0802 +/-0.1334 +/-2.6724 +/-1.3096 +/-2.1763 +/-0.3584 +/-2.5636
##
## OneItemDepts 1 1 2 1 1 1 2 4 1 5
## +/-1.6439 +/-0.6049 +/-1.2753 +/-1.3732 +/-0.4254 +/-1.5018 +/-1.067 +/-1.6974 +/-0.3917 +/-1.8983
##
## RtrnDepts 0 0 0 0 0 0 0 0 0 0
## +/-0.4553 +/-0.4484 +/-0.4567 +/-0.407 +/-0.456 +/-0.4698 +/-0.4999 +/-0.3805 +/-0.4261 +/-0.5633
table(predict(wm_clustering4), wm$TripType)
##
## 5 7 8 9 39 40 999
## 0 257 215 741 561 61 6 625
## 1 109 196 347 202 287 111 111
## 2 225 227 551 386 302 79 321
## 3 106 50 242 176 3 0 200
## 4 153 257 487 369 418 186 290
## 5 101 198 175 123 493 32 76
## 6 28 87 1 2 564 288 13
## 7 121 73 264 227 10 1 230
## 8 6 2 0 0 91 670 1
# 3A
Dept_baskets <- read.transactions("Walmart_baskets_1week.csv", format="single", sep = ",", header = TRUE, cols=c("VisitNumber","DepartmentDescription"))
# 3B
inspect(Dept_baskets[1:15])
## items transactionID
## [1] {IMPULSE MERCHANDISE} 10009
## [2] {CANDY, TOBACCO, COOKIES,
## HOME MANAGEMENT,
## JEWELRY AND SUNGLASSES,
## MENS WEAR} 10034
## [3] {GIRLS WEAR, 4-6X AND 7-14,
## GROCERY DRY GOODS,
## IMPULSE MERCHANDISE,
## PERSONAL CARE} 10051
## [4] {OTHER DEPARTMENTS} 10118
## [5] {FINANCIAL SERVICES} 10167
## [6] {DAIRY,
## DSD GROCERY,
## FROZEN FOODS,
## GROCERY DRY GOODS,
## MEAT - FRESH & FROZEN,
## PERSONAL CARE,
## PRE PACKED DELI,
## SLEEPWEAR/FOUNDATIONS} 10178
## [7] {PHARMACY OTC} 10191
## [8] {FINANCIAL SERVICES} 10206
## [9] {INFANT CONSUMABLE HARDLINES} 1022
## [10] {CELEBRATION,
## FABRICS AND CRAFTS,
## OFFICE SUPPLIES,
## TOYS} 10250
## [11] {PHARMACY OTC} 10272
## [12] {FABRICS AND CRAFTS,
## GROCERY DRY GOODS,
## PRODUCE} 10273
## [13] {SERVICE DELI} 1029
## [14] {BEAUTY,
## DSD GROCERY,
## GROCERY DRY GOODS,
## HOME DECOR,
## HOME MANAGEMENT,
## HOUSEHOLD CHEMICALS/SUPP,
## HOUSEHOLD PAPER GOODS,
## LAWN AND GARDEN,
## PAINT AND ACCESSORIES,
## PERSONAL CARE,
## PETS AND SUPPLIES} 10296
## [15] {BEAUTY} 10302
# 3C
itemFrequencyPlot(Dept_baskets, type="relative", topN = 15)
# 3Di
ords_rules <- apriori(Dept_baskets, parameter = list(support = 0.05, confidence = 0.25, minlen = 2))
## Apriori
##
## Parameter specification:
## confidence minval smax arem aval originalSupport maxtime support minlen
## 0.25 0.1 1 none FALSE TRUE 5 0.05 2
## maxlen target ext
## 10 rules TRUE
##
## Algorithmic control:
## filter tree heap memopt load sort verbose
## 0.1 TRUE TRUE FALSE TRUE 2 TRUE
##
## Absolute minimum support count: 100
##
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[66 item(s), 2000 transaction(s)] done [0.00s].
## sorting and recoding items ... [20 item(s)] done [0.00s].
## creating transaction tree ... done [0.00s].
## checking subsets of size 1 2 3 4 done [0.00s].
## writing ... [78 rule(s)] done [0.00s].
## creating S4 object ... done [0.00s].
ords_rules
## set of 78 rules
summary(ords_rules)
## set of 78 rules
##
## rule length distribution (lhs + rhs):sizes
## 2 3 4
## 44 30 4
##
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 2.000 2.000 2.000 2.487 3.000 4.000
##
## summary of quality measures:
## support confidence coverage lift
## Min. :0.05000 Min. :0.2516 Min. :0.06200 Min. :1.193
## 1st Qu.:0.05450 1st Qu.:0.4032 1st Qu.:0.09888 1st Qu.:2.224
## Median :0.06200 Median :0.5550 Median :0.12250 Median :2.815
## Mean :0.07008 Mean :0.5500 Mean :0.14456 Mean :2.769
## 3rd Qu.:0.07650 3rd Qu.:0.7057 3rd Qu.:0.19200 3rd Qu.:3.396
## Max. :0.13050 Max. :0.8615 Max. :0.30600 Max. :4.558
## count
## Min. :100.0
## 1st Qu.:109.0
## Median :124.0
## Mean :140.2
## 3rd Qu.:153.0
## Max. :261.0
##
## mining info:
## data ntransactions support confidence
## Dept_baskets 2000 0.05 0.25
## call
## apriori(data = Dept_baskets, parameter = list(support = 0.05, confidence = 0.25, minlen = 2))
inspect(sort(ords_rules, by = "lift"))
## lhs rhs
## [1] {DAIRY, GROCERY DRY GOODS} => {COMM BREAD}
## [2] {DSD GROCERY, GROCERY DRY GOODS} => {COMM BREAD}
## [3] {DSD GROCERY, GROCERY DRY GOODS, PRODUCE} => {DAIRY}
## [4] {DAIRY, GROCERY DRY GOODS} => {FROZEN FOODS}
## [5] {DSD GROCERY, GROCERY DRY GOODS} => {FROZEN FOODS}
## [6] {FROZEN FOODS, GROCERY DRY GOODS} => {DAIRY}
## [7] {DAIRY, DSD GROCERY} => {FROZEN FOODS}
## [8] {DAIRY, DSD GROCERY} => {COMM BREAD}
## [9] {COMM BREAD, DAIRY} => {GROCERY DRY GOODS}
## [10] {DAIRY, DSD GROCERY, PRODUCE} => {GROCERY DRY GOODS}
## [11] {COMM BREAD, GROCERY DRY GOODS} => {DAIRY}
## [12] {DAIRY, FROZEN FOODS} => {GROCERY DRY GOODS}
## [13] {COMM BREAD, DSD GROCERY} => {GROCERY DRY GOODS}
## [14] {COMM BREAD, DSD GROCERY} => {DAIRY}
## [15] {DSD GROCERY, FROZEN FOODS} => {DAIRY}
## [16] {GROCERY DRY GOODS, PRODUCE} => {DAIRY}
## [17] {DSD GROCERY, GROCERY DRY GOODS} => {DAIRY}
## [18] {DSD GROCERY, FROZEN FOODS} => {GROCERY DRY GOODS}
## [19] {DSD GROCERY, PRODUCE} => {DAIRY}
## [20] {DAIRY, DSD GROCERY, GROCERY DRY GOODS} => {PRODUCE}
## [21] {DAIRY, DSD GROCERY} => {GROCERY DRY GOODS}
## [22] {DAIRY, PRODUCE} => {GROCERY DRY GOODS}
## [23] {COMM BREAD} => {GROCERY DRY GOODS}
## [24] {GROCERY DRY GOODS} => {COMM BREAD}
## [25] {COMM BREAD} => {DAIRY}
## [26] {DAIRY} => {COMM BREAD}
## [27] {FROZEN FOODS} => {DAIRY}
## [28] {DAIRY} => {FROZEN FOODS}
## [29] {DAIRY, GROCERY DRY GOODS} => {PRODUCE}
## [30] {DSD GROCERY, PRODUCE} => {GROCERY DRY GOODS}
## [31] {DAIRY, DSD GROCERY} => {PRODUCE}
## [32] {FROZEN FOODS} => {GROCERY DRY GOODS}
## [33] {GROCERY DRY GOODS} => {FROZEN FOODS}
## [34] {DSD GROCERY, GROCERY DRY GOODS} => {PRODUCE}
## [35] {COMM BREAD} => {PRODUCE}
## [36] {PRODUCE} => {COMM BREAD}
## [37] {HOUSEHOLD PAPER GOODS} => {GROCERY DRY GOODS}
## [38] {GROCERY DRY GOODS} => {HOUSEHOLD PAPER GOODS}
## [39] {DAIRY} => {GROCERY DRY GOODS}
## [40] {GROCERY DRY GOODS} => {DAIRY}
## [41] {FROZEN FOODS, GROCERY DRY GOODS} => {DSD GROCERY}
## [42] {DAIRY, GROCERY DRY GOODS, PRODUCE} => {DSD GROCERY}
## [43] {FROZEN FOODS} => {PRODUCE}
## [44] {PRODUCE} => {FROZEN FOODS}
## [45] {DAIRY, FROZEN FOODS} => {DSD GROCERY}
## [46] {PRODUCE} => {DAIRY}
## [47] {DAIRY} => {PRODUCE}
## [48] {COMM BREAD, DAIRY} => {DSD GROCERY}
## [49] {COMM BREAD, GROCERY DRY GOODS} => {DSD GROCERY}
## [50] {DAIRY, GROCERY DRY GOODS} => {DSD GROCERY}
## [51] {PRODUCE} => {GROCERY DRY GOODS}
## [52] {GROCERY DRY GOODS} => {PRODUCE}
## [53] {DAIRY, PRODUCE} => {DSD GROCERY}
## [54] {FROZEN FOODS} => {DSD GROCERY}
## [55] {PHARMACY OTC} => {PERSONAL CARE}
## [56] {PERSONAL CARE} => {PHARMACY OTC}
## [57] {GROCERY DRY GOODS, PRODUCE} => {DSD GROCERY}
## [58] {COMM BREAD} => {DSD GROCERY}
## [59] {HOUSEHOLD PAPER GOODS} => {DSD GROCERY}
## [60] {DAIRY} => {DSD GROCERY}
## [61] {DSD GROCERY} => {DAIRY}
## [62] {HOUSEHOLD CHEMICALS/SUPP} => {GROCERY DRY GOODS}
## [63] {GROCERY DRY GOODS} => {DSD GROCERY}
## [64] {DSD GROCERY} => {GROCERY DRY GOODS}
## [65] {DSD GROCERY} => {PRODUCE}
## [66] {PRODUCE} => {DSD GROCERY}
## [67] {HOUSEHOLD CHEMICALS/SUPP} => {DSD GROCERY}
## [68] {DAIRY} => {PERSONAL CARE}
## [69] {PERSONAL CARE} => {DAIRY}
## [70] {PERSONAL CARE} => {DSD GROCERY}
## [71] {DSD GROCERY} => {PERSONAL CARE}
## [72] {PERSONAL CARE} => {IMPULSE MERCHANDISE}
## [73] {PERSONAL CARE} => {GROCERY DRY GOODS}
## [74] {DSD GROCERY} => {IMPULSE MERCHANDISE}
## [75] {IMPULSE MERCHANDISE} => {DSD GROCERY}
## [76] {IMPULSE MERCHANDISE} => {GROCERY DRY GOODS}
## [77] {GROCERY DRY GOODS} => {IMPULSE MERCHANDISE}
## [78] {PHARMACY OTC} => {DSD GROCERY}
## support confidence coverage lift count
## [1] 0.0560 0.4786325 0.1170 4.558405 112
## [2] 0.0590 0.4521073 0.1305 4.305784 118
## [3] 0.0590 0.8137931 0.0725 4.238506 118
## [4] 0.0500 0.4273504 0.1170 4.210349 100
## [5] 0.0545 0.4176245 0.1305 4.114527 109
## [6] 0.0500 0.7874016 0.0635 4.101050 100
## [7] 0.0505 0.4122449 0.1225 4.061526 101
## [8] 0.0520 0.4244898 0.1225 4.042760 104
## [9] 0.0560 0.8615385 0.0650 3.979392 112
## [10] 0.0590 0.8368794 0.0705 3.865494 118
## [11] 0.0560 0.7320261 0.0765 3.812636 112
## [12] 0.0500 0.8064516 0.0620 3.724950 100
## [13] 0.0590 0.8027211 0.0735 3.707719 118
## [14] 0.0520 0.7074830 0.0735 3.684807 104
## [15] 0.0505 0.6965517 0.0725 3.627874 101
## [16] 0.0715 0.6908213 0.1035 3.598027 143
## [17] 0.0900 0.6896552 0.1305 3.591954 180
## [18] 0.0545 0.7517241 0.0725 3.472167 109
## [19] 0.0705 0.6588785 0.1070 3.431659 141
## [20] 0.0590 0.6555556 0.0900 3.396661 118
## [21] 0.0900 0.7346939 0.1225 3.393505 180
## [22] 0.0715 0.7295918 0.0980 3.369939 143
## [23] 0.0765 0.7285714 0.1050 3.365226 153
## [24] 0.0765 0.3533487 0.2165 3.365226 153
## [25] 0.0650 0.6190476 0.1050 3.224206 130
## [26] 0.0650 0.3385417 0.1920 3.224206 130
## [27] 0.0620 0.6108374 0.1015 3.181445 124
## [28] 0.0620 0.3229167 0.1920 3.181445 124
## [29] 0.0715 0.6111111 0.1170 3.166379 143
## [30] 0.0725 0.6775701 0.1070 3.129654 145
## [31] 0.0705 0.5755102 0.1225 2.981918 141
## [32] 0.0635 0.6256158 0.1015 2.889680 127
## [33] 0.0635 0.2933025 0.2165 2.889680 127
## [34] 0.0725 0.5555556 0.1305 2.878526 145
## [35] 0.0575 0.5476190 0.1050 2.837404 115
## [36] 0.0575 0.2979275 0.1930 2.837404 115
## [37] 0.0565 0.6141304 0.0920 2.836630 113
## [38] 0.0565 0.2609700 0.2165 2.836630 113
## [39] 0.1170 0.6093750 0.1920 2.814665 234
## [40] 0.1170 0.5404157 0.2165 2.814665 234
## [41] 0.0545 0.8582677 0.0635 2.804796 109
## [42] 0.0590 0.8251748 0.0715 2.696650 118
## [43] 0.0525 0.5172414 0.1015 2.680007 105
## [44] 0.0525 0.2720207 0.1930 2.680007 105
## [45] 0.0505 0.8145161 0.0620 2.661817 101
## [46] 0.0980 0.5077720 0.1930 2.644646 196
## [47] 0.0980 0.5104167 0.1920 2.644646 196
## [48] 0.0520 0.8000000 0.0650 2.614379 104
## [49] 0.0590 0.7712418 0.0765 2.520398 118
## [50] 0.0900 0.7692308 0.1170 2.513826 180
## [51] 0.1035 0.5362694 0.1930 2.476995 207
## [52] 0.1035 0.4780600 0.2165 2.476995 207
## [53] 0.0705 0.7193878 0.0980 2.350940 141
## [54] 0.0725 0.7142857 0.1015 2.334267 145
## [55] 0.0620 0.4350877 0.1425 2.326672 124
## [56] 0.0620 0.3315508 0.1870 2.326672 124
## [57] 0.0725 0.7004831 0.1035 2.289160 145
## [58] 0.0735 0.7000000 0.1050 2.287582 147
## [59] 0.0620 0.6739130 0.0920 2.202330 124
## [60] 0.1225 0.6380208 0.1920 2.085035 245
## [61] 0.1225 0.4003268 0.3060 2.085035 245
## [62] 0.0525 0.4338843 0.1210 2.004085 105
## [63] 0.1305 0.6027714 0.2165 1.969841 261
## [64] 0.1305 0.4264706 0.3060 1.969841 261
## [65] 0.1070 0.3496732 0.3060 1.811778 214
## [66] 0.1070 0.5544041 0.1930 1.811778 214
## [67] 0.0655 0.5413223 0.1210 1.769027 131
## [68] 0.0500 0.2604167 0.1920 1.392602 100
## [69] 0.0500 0.2673797 0.1870 1.392602 100
## [70] 0.0770 0.4117647 0.1870 1.345636 154
## [71] 0.0770 0.2516340 0.3060 1.345636 154
## [72] 0.0515 0.2754011 0.1870 1.330440 103
## [73] 0.0515 0.2754011 0.1870 1.272060 103
## [74] 0.0795 0.2598039 0.3060 1.255091 159
## [75] 0.0795 0.3840580 0.2070 1.255091 159
## [76] 0.0545 0.2632850 0.2070 1.216097 109
## [77] 0.0545 0.2517321 0.2165 1.216097 109
## [78] 0.0520 0.3649123 0.1425 1.192524 104
# 3Dii
ords_rules_2 <- apriori(Dept_baskets, parameter = list(support = 0.04, confidence = 0.2, minlen = 2))
## Apriori
##
## Parameter specification:
## confidence minval smax arem aval originalSupport maxtime support minlen
## 0.2 0.1 1 none FALSE TRUE 5 0.04 2
## maxlen target ext
## 10 rules TRUE
##
## Algorithmic control:
## filter tree heap memopt load sort verbose
## 0.1 TRUE TRUE FALSE TRUE 2 TRUE
##
## Absolute minimum support count: 80
##
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[66 item(s), 2000 transaction(s)] done [0.00s].
## sorting and recoding items ... [25 item(s)] done [0.00s].
## creating transaction tree ... done [0.00s].
## checking subsets of size 1 2 3 4 5 done [0.00s].
## writing ... [152 rule(s)] done [0.00s].
## creating S4 object ... done [0.00s].
ords_rules_2
## set of 152 rules
summary(ords_rules_2)
## set of 152 rules
##
## rule length distribution (lhs + rhs):sizes
## 2 3 4
## 80 48 24
##
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 2.000 2.000 2.000 2.632 3.000 4.000
##
## summary of quality measures:
## support confidence coverage lift
## Min. :0.04050 Min. :0.2026 Min. :0.0445 Min. :1.164
## 1st Qu.:0.04450 1st Qu.:0.3376 1st Qu.:0.0705 1st Qu.:2.266
## Median :0.05150 Median :0.5550 Median :0.1070 Median :2.884
## Mean :0.05833 Mean :0.5443 Mean :0.1303 Mean :2.969
## 3rd Qu.:0.06388 3rd Qu.:0.7302 3rd Qu.:0.1920 3rd Qu.:3.731
## Max. :0.13050 Max. :0.9327 Max. :0.3060 Max. :5.471
## count
## Min. : 81.0
## 1st Qu.: 89.0
## Median :103.0
## Mean :116.7
## 3rd Qu.:127.8
## Max. :261.0
##
## mining info:
## data ntransactions support confidence
## Dept_baskets 2000 0.04 0.2
## call
## apriori(data = Dept_baskets, parameter = list(support = 0.04, confidence = 0.2, minlen = 2))
inspect(sort(ords_rules_2, by = "lift"))
## lhs rhs support confidence coverage lift count
## [1] {DAIRY,
## DSD GROCERY,
## PRODUCE} => {COMM BREAD} 0.0405 0.5744681 0.0705 5.471125 81
## [2] {DAIRY,
## GROCERY DRY GOODS,
## PRODUCE} => {COMM BREAD} 0.0405 0.5664336 0.0715 5.394605 81
## [3] {DSD GROCERY,
## GROCERY DRY GOODS,
## PRODUCE} => {COMM BREAD} 0.0410 0.5655172 0.0725 5.385878 82
## [4] {DAIRY,
## DSD GROCERY,
## GROCERY DRY GOODS} => {COMM BREAD} 0.0485 0.5388889 0.0900 5.132275 97
## [5] {DAIRY,
## DSD GROCERY,
## GROCERY DRY GOODS} => {FROZEN FOODS} 0.0445 0.4944444 0.0900 4.871374 89
## [6] {DAIRY,
## GROCERY DRY GOODS} => {COMM BREAD} 0.0560 0.4786325 0.1170 4.558405 112
## [7] {COMM BREAD,
## DSD GROCERY,
## PRODUCE} => {DAIRY} 0.0405 0.8709677 0.0465 4.536290 81
## [8] {GROCERY DRY GOODS,
## PRODUCE} => {COMM BREAD} 0.0490 0.4734300 0.1035 4.508857 98
## [9] {DAIRY,
## PRODUCE} => {COMM BREAD} 0.0445 0.4540816 0.0980 4.324587 89
## [10] {COMM BREAD,
## DAIRY,
## DSD GROCERY} => {GROCERY DRY GOODS} 0.0485 0.9326923 0.0520 4.308048 97
## [11] {DSD GROCERY,
## GROCERY DRY GOODS} => {COMM BREAD} 0.0590 0.4521073 0.1305 4.305784 118
## [12] {COMM BREAD,
## GROCERY DRY GOODS,
## PRODUCE} => {DAIRY} 0.0405 0.8265306 0.0490 4.304847 81
## [13] {PRE PACKED DELI} => {DAIRY} 0.0450 0.8256881 0.0545 4.300459 90
## [14] {DAIRY} => {PRE PACKED DELI} 0.0450 0.2343750 0.1920 4.300459 90
## [15] {COMM BREAD,
## DSD GROCERY,
## GROCERY DRY GOODS} => {DAIRY} 0.0485 0.8220339 0.0590 4.281427 97
## [16] {DSD GROCERY,
## FROZEN FOODS,
## GROCERY DRY GOODS} => {DAIRY} 0.0445 0.8165138 0.0545 4.252676 89
## [17] {DSD GROCERY,
## GROCERY DRY GOODS,
## PRODUCE} => {DAIRY} 0.0590 0.8137931 0.0725 4.238506 118
## [18] {DAIRY,
## PRODUCE} => {FROZEN FOODS} 0.0420 0.4285714 0.0980 4.222379 84
## [19] {DAIRY,
## GROCERY DRY GOODS} => {FROZEN FOODS} 0.0500 0.4273504 0.1170 4.210349 100
## [20] {COMM BREAD,
## DAIRY,
## PRODUCE} => {GROCERY DRY GOODS} 0.0405 0.9101124 0.0445 4.203752 81
## [21] {FROZEN FOODS,
## PRODUCE} => {DAIRY} 0.0420 0.8000000 0.0525 4.166667 84
## [22] {DSD GROCERY,
## PRODUCE} => {COMM BREAD} 0.0465 0.4345794 0.1070 4.138852 93
## [23] {DSD GROCERY,
## GROCERY DRY GOODS} => {FROZEN FOODS} 0.0545 0.4176245 0.1305 4.114527 109
## [24] {FROZEN FOODS,
## GROCERY DRY GOODS} => {DAIRY} 0.0500 0.7874016 0.0635 4.101050 100
## [25] {COMM BREAD,
## DSD GROCERY,
## PRODUCE} => {GROCERY DRY GOODS} 0.0410 0.8817204 0.0465 4.072612 82
## [26] {DAIRY,
## DSD GROCERY,
## FROZEN FOODS} => {GROCERY DRY GOODS} 0.0445 0.8811881 0.0505 4.070153 89
## [27] {DAIRY,
## DSD GROCERY} => {FROZEN FOODS} 0.0505 0.4122449 0.1225 4.061526 101
## [28] {DAIRY,
## DSD GROCERY} => {COMM BREAD} 0.0520 0.4244898 0.1225 4.042760 104
## [29] {COMM BREAD,
## DAIRY,
## DSD GROCERY} => {PRODUCE} 0.0405 0.7788462 0.0520 4.035472 81
## [30] {COMM BREAD,
## PRODUCE} => {DAIRY} 0.0445 0.7739130 0.0575 4.030797 89
## [31] {COMM BREAD,
## DAIRY} => {GROCERY DRY GOODS} 0.0560 0.8615385 0.0650 3.979392 112
## [32] {DSD GROCERY,
## PRODUCE} => {FROZEN FOODS} 0.0430 0.4018692 0.1070 3.959302 86
## [33] {COMM BREAD,
## PRODUCE} => {GROCERY DRY GOODS} 0.0490 0.8521739 0.0575 3.936138 98
## [34] {MEAT - FRESH & FROZEN} => {PRODUCE} 0.0470 0.7520000 0.0625 3.896373 94
## [35] {PRODUCE} => {MEAT - FRESH & FROZEN} 0.0470 0.2435233 0.1930 3.896373 94
## [36] {DAIRY,
## DSD GROCERY,
## PRODUCE} => {GROCERY DRY GOODS} 0.0590 0.8368794 0.0705 3.865494 118
## [37] {COMM BREAD,
## GROCERY DRY GOODS} => {DAIRY} 0.0560 0.7320261 0.0765 3.812636 112
## [38] {COMM BREAD,
## DAIRY,
## GROCERY DRY GOODS} => {PRODUCE} 0.0405 0.7232143 0.0560 3.747224 81
## [39] {DAIRY,
## FROZEN FOODS} => {GROCERY DRY GOODS} 0.0500 0.8064516 0.0620 3.724950 100
## [40] {COMM BREAD,
## DSD GROCERY} => {GROCERY DRY GOODS} 0.0590 0.8027211 0.0735 3.707719 118
## [41] {DSD GROCERY,
## GROCERY DRY GOODS} => {HOUSEHOLD PAPER GOODS} 0.0445 0.3409962 0.1305 3.706480 89
## [42] {COMM BREAD,
## DSD GROCERY} => {DAIRY} 0.0520 0.7074830 0.0735 3.684807 104
## [43] {HOUSEHOLD PAPER GOODS} => {HOUSEHOLD CHEMICALS/SUPP} 0.0405 0.4402174 0.0920 3.638160 81
## [44] {HOUSEHOLD CHEMICALS/SUPP} => {HOUSEHOLD PAPER GOODS} 0.0405 0.3347107 0.1210 3.638160 81
## [45] {DSD GROCERY,
## FROZEN FOODS} => {DAIRY} 0.0505 0.6965517 0.0725 3.627874 101
## [46] {MEAT - FRESH & FROZEN} => {DAIRY} 0.0435 0.6960000 0.0625 3.625000 87
## [47] {DAIRY} => {MEAT - FRESH & FROZEN} 0.0435 0.2265625 0.1920 3.625000 87
## [48] {COMM BREAD,
## DSD GROCERY,
## GROCERY DRY GOODS} => {PRODUCE} 0.0410 0.6949153 0.0590 3.600597 82
## [49] {GROCERY DRY GOODS,
## PRODUCE} => {DAIRY} 0.0715 0.6908213 0.1035 3.598027 143
## [50] {DSD GROCERY,
## GROCERY DRY GOODS} => {DAIRY} 0.0900 0.6896552 0.1305 3.591954 180
## [51] {COMM BREAD,
## DAIRY} => {PRODUCE} 0.0445 0.6846154 0.0650 3.547230 89
## [52] {DAIRY,
## FROZEN FOODS} => {PRODUCE} 0.0420 0.6774194 0.0620 3.509945 84
## [53] {DSD GROCERY,
## FROZEN FOODS} => {GROCERY DRY GOODS} 0.0545 0.7517241 0.0725 3.472167 109
## [54] {DSD GROCERY,
## PRODUCE} => {DAIRY} 0.0705 0.6588785 0.1070 3.431659 141
## [55] {MEAT - FRESH & FROZEN} => {GROCERY DRY GOODS} 0.0460 0.7360000 0.0625 3.399538 92
## [56] {GROCERY DRY GOODS} => {MEAT - FRESH & FROZEN} 0.0460 0.2124711 0.2165 3.399538 92
## [57] {DAIRY,
## DSD GROCERY,
## GROCERY DRY GOODS} => {PRODUCE} 0.0590 0.6555556 0.0900 3.396661 118
## [58] {DAIRY,
## DSD GROCERY} => {GROCERY DRY GOODS} 0.0900 0.7346939 0.1225 3.393505 180
## [59] {DAIRY,
## PRODUCE} => {GROCERY DRY GOODS} 0.0715 0.7295918 0.0980 3.369939 143
## [60] {COMM BREAD} => {GROCERY DRY GOODS} 0.0765 0.7285714 0.1050 3.365226 153
## [61] {GROCERY DRY GOODS} => {COMM BREAD} 0.0765 0.3533487 0.2165 3.365226 153
## [62] {COMM BREAD,
## GROCERY DRY GOODS} => {PRODUCE} 0.0490 0.6405229 0.0765 3.318771 98
## [63] {DSD GROCERY,
## HOUSEHOLD PAPER GOODS} => {GROCERY DRY GOODS} 0.0445 0.7177419 0.0620 3.315205 89
## [64] {COMM BREAD,
## DSD GROCERY} => {PRODUCE} 0.0465 0.6326531 0.0735 3.277995 93
## [65] {COMM BREAD} => {DAIRY} 0.0650 0.6190476 0.1050 3.224206 130
## [66] {DAIRY} => {COMM BREAD} 0.0650 0.3385417 0.1920 3.224206 130
## [67] {FROZEN FOODS} => {DAIRY} 0.0620 0.6108374 0.1015 3.181445 124
## [68] {DAIRY} => {FROZEN FOODS} 0.0620 0.3229167 0.1920 3.181445 124
## [69] {DAIRY,
## GROCERY DRY GOODS} => {PRODUCE} 0.0715 0.6111111 0.1170 3.166379 143
## [70] {DSD GROCERY,
## PRODUCE} => {GROCERY DRY GOODS} 0.0725 0.6775701 0.1070 3.129654 145
## [71] {DSD GROCERY,
## FROZEN FOODS} => {PRODUCE} 0.0430 0.5931034 0.0725 3.073075 86
## [72] {DAIRY,
## DSD GROCERY} => {PRODUCE} 0.0705 0.5755102 0.1225 2.981918 141
## [73] {COMM BREAD,
## DAIRY,
## PRODUCE} => {DSD GROCERY} 0.0405 0.9101124 0.0445 2.974223 81
## [74] {DAIRY,
## FROZEN FOODS,
## GROCERY DRY GOODS} => {DSD GROCERY} 0.0445 0.8900000 0.0500 2.908497 89
## [75] {FROZEN FOODS} => {GROCERY DRY GOODS} 0.0635 0.6256158 0.1015 2.889680 127
## [76] {GROCERY DRY GOODS} => {FROZEN FOODS} 0.0635 0.2933025 0.2165 2.889680 127
## [77] {DSD GROCERY,
## GROCERY DRY GOODS} => {PRODUCE} 0.0725 0.5555556 0.1305 2.878526 145
## [78] {PERSONAL CARE} => {BEAUTY} 0.0460 0.2459893 0.1870 2.860341 92
## [79] {BEAUTY} => {PERSONAL CARE} 0.0460 0.5348837 0.0860 2.860341 92
## [80] {COMM BREAD} => {PRODUCE} 0.0575 0.5476190 0.1050 2.837404 115
## [81] {PRODUCE} => {COMM BREAD} 0.0575 0.2979275 0.1930 2.837404 115
## [82] {HOUSEHOLD PAPER GOODS} => {GROCERY DRY GOODS} 0.0565 0.6141304 0.0920 2.836630 113
## [83] {GROCERY DRY GOODS} => {HOUSEHOLD PAPER GOODS} 0.0565 0.2609700 0.2165 2.836630 113
## [84] {COMM BREAD,
## DAIRY,
## GROCERY DRY GOODS} => {DSD GROCERY} 0.0485 0.8660714 0.0560 2.830299 97
## [85] {DAIRY} => {GROCERY DRY GOODS} 0.1170 0.6093750 0.1920 2.814665 234
## [86] {GROCERY DRY GOODS} => {DAIRY} 0.1170 0.5404157 0.2165 2.814665 234
## [87] {FROZEN FOODS,
## GROCERY DRY GOODS} => {DSD GROCERY} 0.0545 0.8582677 0.0635 2.804796 109
## [88] {COMM BREAD,
## GROCERY DRY GOODS,
## PRODUCE} => {DSD GROCERY} 0.0410 0.8367347 0.0490 2.734427 82
## [89] {DAIRY,
## GROCERY DRY GOODS,
## PRODUCE} => {DSD GROCERY} 0.0590 0.8251748 0.0715 2.696650 118
## [90] {FROZEN FOODS} => {PRODUCE} 0.0525 0.5172414 0.1015 2.680007 105
## [91] {PRODUCE} => {FROZEN FOODS} 0.0525 0.2720207 0.1930 2.680007 105
## [92] {FROZEN FOODS,
## PRODUCE} => {DSD GROCERY} 0.0430 0.8190476 0.0525 2.676626 86
## [93] {DAIRY,
## FROZEN FOODS} => {DSD GROCERY} 0.0505 0.8145161 0.0620 2.661817 101
## [94] {PRODUCE} => {DAIRY} 0.0980 0.5077720 0.1930 2.644646 196
## [95] {DAIRY} => {PRODUCE} 0.0980 0.5104167 0.1920 2.644646 196
## [96] {COMM BREAD,
## PRODUCE} => {DSD GROCERY} 0.0465 0.8086957 0.0575 2.642796 93
## [97] {COMM BREAD,
## DAIRY} => {DSD GROCERY} 0.0520 0.8000000 0.0650 2.614379 104
## [98] {GROCERY DRY GOODS,
## HOUSEHOLD PAPER GOODS} => {DSD GROCERY} 0.0445 0.7876106 0.0565 2.573891 89
## [99] {COMM BREAD,
## GROCERY DRY GOODS} => {DSD GROCERY} 0.0590 0.7712418 0.0765 2.520398 118
## [100] {DAIRY} => {HOUSEHOLD PAPER GOODS} 0.0445 0.2317708 0.1920 2.519248 89
## [101] {HOUSEHOLD PAPER GOODS} => {DAIRY} 0.0445 0.4836957 0.0920 2.519248 89
## [102] {DAIRY,
## GROCERY DRY GOODS} => {DSD GROCERY} 0.0900 0.7692308 0.1170 2.513826 180
## [103] {PRE PACKED DELI} => {DSD GROCERY} 0.0415 0.7614679 0.0545 2.488457 83
## [104] {PRODUCE} => {GROCERY DRY GOODS} 0.1035 0.5362694 0.1930 2.476995 207
## [105] {GROCERY DRY GOODS} => {PRODUCE} 0.1035 0.4780600 0.2165 2.476995 207
## [106] {DAIRY,
## PRODUCE} => {DSD GROCERY} 0.0705 0.7193878 0.0980 2.350940 141
## [107] {FROZEN FOODS} => {DSD GROCERY} 0.0725 0.7142857 0.1015 2.334267 145
## [108] {DSD GROCERY} => {FROZEN FOODS} 0.0725 0.2369281 0.3060 2.334267 145
## [109] {PHARMACY OTC} => {PERSONAL CARE} 0.0620 0.4350877 0.1425 2.326672 124
## [110] {PERSONAL CARE} => {PHARMACY OTC} 0.0620 0.3315508 0.1870 2.326672 124
## [111] {MEAT - FRESH & FROZEN} => {DSD GROCERY} 0.0440 0.7040000 0.0625 2.300654 88
## [112] {GROCERY DRY GOODS,
## PRODUCE} => {DSD GROCERY} 0.0725 0.7004831 0.1035 2.289160 145
## [113] {DSD GROCERY} => {COMM BREAD} 0.0735 0.2401961 0.3060 2.287582 147
## [114] {COMM BREAD} => {DSD GROCERY} 0.0735 0.7000000 0.1050 2.287582 147
## [115] {HOUSEHOLD PAPER GOODS} => {DSD GROCERY} 0.0620 0.6739130 0.0920 2.202330 124
## [116] {DSD GROCERY} => {HOUSEHOLD PAPER GOODS} 0.0620 0.2026144 0.3060 2.202330 124
## [117] {HOUSEHOLD CHEMICALS/SUPP} => {DAIRY} 0.0490 0.4049587 0.1210 2.109160 98
## [118] {DAIRY} => {HOUSEHOLD CHEMICALS/SUPP} 0.0490 0.2552083 0.1920 2.109160 98
## [119] {DAIRY} => {DSD GROCERY} 0.1225 0.6380208 0.1920 2.085035 245
## [120] {DSD GROCERY} => {DAIRY} 0.1225 0.4003268 0.3060 2.085035 245
## [121] {CANDY, TOBACCO, COOKIES} => {DSD GROCERY} 0.0430 0.6142857 0.0700 2.007470 86
## [122] {HOUSEHOLD CHEMICALS/SUPP} => {GROCERY DRY GOODS} 0.0525 0.4338843 0.1210 2.004085 105
## [123] {GROCERY DRY GOODS} => {HOUSEHOLD CHEMICALS/SUPP} 0.0525 0.2424942 0.2165 2.004085 105
## [124] {GROCERY DRY GOODS} => {DSD GROCERY} 0.1305 0.6027714 0.2165 1.969841 261
## [125] {DSD GROCERY} => {GROCERY DRY GOODS} 0.1305 0.4264706 0.3060 1.969841 261
## [126] {PERSONAL CARE} => {HOUSEHOLD CHEMICALS/SUPP} 0.0420 0.2245989 0.1870 1.856190 84
## [127] {HOUSEHOLD CHEMICALS/SUPP} => {PERSONAL CARE} 0.0420 0.3471074 0.1210 1.856190 84
## [128] {DSD GROCERY} => {PRODUCE} 0.1070 0.3496732 0.3060 1.811778 214
## [129] {PRODUCE} => {DSD GROCERY} 0.1070 0.5544041 0.1930 1.811778 214
## [130] {HOUSEHOLD CHEMICALS/SUPP} => {DSD GROCERY} 0.0655 0.5413223 0.1210 1.769027 131
## [131] {DSD GROCERY} => {HOUSEHOLD CHEMICALS/SUPP} 0.0655 0.2140523 0.3060 1.769027 131
## [132] {HOUSEHOLD CHEMICALS/SUPP} => {PRODUCE} 0.0405 0.3347107 0.1210 1.734253 81
## [133] {PRODUCE} => {HOUSEHOLD CHEMICALS/SUPP} 0.0405 0.2098446 0.1930 1.734253 81
## [134] {DAIRY} => {PERSONAL CARE} 0.0500 0.2604167 0.1920 1.392602 100
## [135] {PERSONAL CARE} => {DAIRY} 0.0500 0.2673797 0.1870 1.392602 100
## [136] {PERSONAL CARE} => {DSD GROCERY} 0.0770 0.4117647 0.1870 1.345636 154
## [137] {DSD GROCERY} => {PERSONAL CARE} 0.0770 0.2516340 0.3060 1.345636 154
## [138] {IMPULSE MERCHANDISE} => {PERSONAL CARE} 0.0515 0.2487923 0.2070 1.330440 103
## [139] {PERSONAL CARE} => {IMPULSE MERCHANDISE} 0.0515 0.2754011 0.1870 1.330440 103
## [140] {PERSONAL CARE} => {PRODUCE} 0.0475 0.2540107 0.1870 1.316118 95
## [141] {PRODUCE} => {PERSONAL CARE} 0.0475 0.2461140 0.1930 1.316118 95
## [142] {GROCERY DRY GOODS} => {PERSONAL CARE} 0.0515 0.2378753 0.2165 1.272060 103
## [143] {PERSONAL CARE} => {GROCERY DRY GOODS} 0.0515 0.2754011 0.1870 1.272060 103
## [144] {DSD GROCERY} => {IMPULSE MERCHANDISE} 0.0795 0.2598039 0.3060 1.255091 159
## [145] {IMPULSE MERCHANDISE} => {DSD GROCERY} 0.0795 0.3840580 0.2070 1.255091 159
## [146] {IMPULSE MERCHANDISE} => {DAIRY} 0.0485 0.2342995 0.2070 1.220310 97
## [147] {DAIRY} => {IMPULSE MERCHANDISE} 0.0485 0.2526042 0.1920 1.220310 97
## [148] {IMPULSE MERCHANDISE} => {GROCERY DRY GOODS} 0.0545 0.2632850 0.2070 1.216097 109
## [149] {GROCERY DRY GOODS} => {IMPULSE MERCHANDISE} 0.0545 0.2517321 0.2165 1.216097 109
## [150] {PHARMACY OTC} => {DSD GROCERY} 0.0520 0.3649123 0.1425 1.192524 104
## [151] {IMPULSE MERCHANDISE} => {PRODUCE} 0.0465 0.2246377 0.2070 1.163926 93
## [152] {PRODUCE} => {IMPULSE MERCHANDISE} 0.0465 0.2409326 0.1930 1.163926 93
The K-means clustering model revealed distinct clusters based on customer purchasing behavior. Instead of random initial centroids, adjusting the initial cluster assignment to k-means++ initializes centroids strategically to improve convergence. The model was improved by lowering the within cluster sum of squared errors from 3892.61 to 670.80, while increasing the number of iterations from 20 to 38.
Whereas, changing the distance function from Euclidean distance to Manhattan distance in the Walmart data set decreased the number of iterations from 20 to 15 (less defined). The sum of within cluster distances also increased from 3892.61 to 5245.51. Since the performance of the clusters was worsen, it probably implies that the data set has a linear shape.
Adjustments to support and confidence thresholds in association rule mining influenced the number and strength of discovered rules. In the model with 0.05 support and 0.25 confidence, I obtained a set of 78 rules. By slightly lowering both support and confidence to 0.04 and 0.2 respectively, I managed to obtain a set of 152 rules without sacrificing the strength of the rule significantly.