연관 규칙 분석

library(readr)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tidyr)
library(arules)
## Loading required package: Matrix
## 
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack
## 
## Attaching package: 'arules'
## The following object is masked from 'package:dplyr':
## 
##     recode
## The following objects are masked from 'package:base':
## 
##     abbreviate, write
library(arulesViz)
## Loading required package: grid
## Registered S3 method overwritten by 'seriation':
##   method         from 
##   reorder.hclust gclus
library(methods)

데이터 불러오기

ordr_pr <- read_csv("dataset/instacartData/order_products__prior.csv")
## Parsed with column specification:
## cols(
##   order_id = col_double(),
##   product_id = col_double(),
##   add_to_cart_order = col_double(),
##   reordered = col_double()
## )
prods <- read_csv("dataset/instacartData/products.csv")
## Parsed with column specification:
## cols(
##   product_id = col_double(),
##   product_name = col_character(),
##   aisle_id = col_double(),
##   department_id = col_double()
## )

데이터 준비 orderid로 그룹핑해서 상품을 리스트로 쪼개기

order_baskets <- ordr_pr %>% 
  inner_join(prods, by = "product_id") %>% 
  group_by(order_id) %>% 
  summarise(basket = as.vector(list(product_name)))

transaction 타입으로 변경

transactions <- as(order_baskets$basket,"transactions")
head(transactions)
## transactions in sparse format with
##  6 transactions (rows) and
##  31325 items (columns)

총 6개의 거래 정보가 정상적으로 변환 장바구니 분석하기 *먼저 데이터를 살펴보자

hist(size(transactions), breaks = 0:100, xaxt="n", ylim=c(0,10000), main = "Number of Items per basket", xlab = "#Items")
axis(1, at=seq(0,90,by=10), cex.axis=0.8)
mtext(paste("Total:", length(transactions), "baskets,", sum(size(transactions)), "items"))

어떤 아이템이 빈번하게 나타날까? support 기준을 0.02 : 아이템은 적어도 모든 장바구니에 2% 정도는 나와야한다는 의미

item_frequency <- itemFrequency(transactions, type ="a")
support <- 0.02
freq_items <- sort(item_frequency,decreasing = F)
freq_items <- freq_items[freq_items>support*length(transactions)]
par(mar=c(2,10,2,2)); options(scipen = 5)
barplot(freq_items, horiz=T, las=1, main = "Frequent Items", cex.names = .8, xlim=c(0,11000))
mtext(paste("support:",support), padj = .8)
abline(v=support*length(transactions), col="red")

과일이랑 야채들이 많음 - 바나나, 딸기, 아보카도 등등 Frequent Itemsets 빈발 아이템을 계산!, 크기가 2이상인 빈번한 항목 집합을 관찰할 가능성이 작은 것을 고려해 support 기준을 낮춤

support <- 0.008
itemsets <- apriori(transactions, parameter = list(target = "frequent itemsets", supp = support, minlen = 2),
                    control = list(verbose = FALSE))
par(mar=c(5,18,2,2) +.1)
sets_order_supp <- DATAFRAME(sort(itemsets, by = "support", decreasing = F))
barplot(sets_order_supp$support, names.arg = sets_order_supp$items, xlim = c(0,0.02), horiz=T, las=2, 
        cex.names = .8, main = "Frequent Itemsets")
mtext(paste("support:", support), padj =.8)

우선 support 임계값이 0.008인 경우 빈번한 쌍만 관찰하고 두번째로 바나나가 많음 support 가 가장 높은 8쌍에는 바나나가 들어있음 거의 모든 아이템들은 과일과 야채가 들어있음 우유를 포함한 빈번하게 나타내는 쌍이 보임

Association Rules

낮은 support 기준과 높은 confidence 를 사용해서 작은 항목에 대해서도 강력한 규칙을 생성

rules1 <- apriori(transactions, parameter = list(supp = 0.0001, conf = 0.6, maxlen=3), control=list(verbose = FALSE))
summary(quality(rules1))
##     support            confidence          lift              count       
##  Min.   :0.0001089   Min.   :0.6000   Min.   :   4.088   Min.   : 7.000  
##  1st Qu.:0.0001089   1st Qu.:0.6364   1st Qu.:   5.116   1st Qu.: 7.000  
##  Median :0.0001245   Median :0.6667   Median :   8.254   Median : 8.000  
##  Mean   :0.0001491   Mean   :0.6981   Mean   : 143.684   Mean   : 9.586  
##  3rd Qu.:0.0001556   3rd Qu.:0.7333   3rd Qu.:  93.511   3rd Qu.:10.000  
##  Max.   :0.0010424   Max.   :1.0000   Max.   :4249.719   Max.   :67.000
plot(rules1)
## To reduce overplotting, jitter is added! Use jitter = 0 to prevent jitter.

품목간에 강한 연관성을 나타내는 강한 향상도가 있는 규칙이 있다! 이러한 규칙을 자세히 살펴보자

inspect(sort(rules1, by="lift")[1:10])
##      lhs                                                 rhs                                                  support confidence     lift count
## [1]  {Purina Mixed Grill Classic Pate Cat Food}       => {Liver & Chicken Dinner Classic Pate Cat Food}  0.0001244613  0.7272727 4249.719     8
## [2]  {Liver & Chicken Dinner Classic Pate Cat Food}   => {Purina Mixed Grill Classic Pate Cat Food}      0.0001244613  0.7272727 4249.719     8
## [3]  {Fancy Feast Wet Classic Chicken Feast Cat Food} => {Classic Tender Liver & Chicken Feast Cat Food} 0.0001089037  0.7000000 3461.069     7
## [4]  {Organic Basil Babe Level 1 Baby Food}           => {Peachy Keen Organic Level 1}                   0.0001089037  0.6363636 3146.427     7
## [5]  {Vanilla Pot De Creme}                           => {Dark Chocolate Pot De Creme}                   0.0001244613  0.7272727 2921.682     8
## [6]  {Organic Nondairy Lemon Cashew Yogurt}           => {Organic Nondairy Strawberry Cashew Yogurt}     0.0001244613  0.8000000 2448.648     8
## [7]  {Organic Blueberry Lowfat Yogurt}                => {Yogurt, Organic, Lowfat, Strawberry}           0.0001089037  0.6363636 2406.091     7
## [8]  {Stage 1 First Apples}                           => {Organic Stage 1 First Peas Baby Food}          0.0001244613  0.8888889 2285.404     8
## [9]  {Unsweet Peach Water,                                                                                                                     
##       Unsweetened Blackberry Water}                   => {Water, Unsweet, Blood Orange}                  0.0001089037  0.6363636 2272.419     7
## [10] {9 Inch Plates,                                                                                                                           
##       Plastic Spoons}                                 => {Compostable Forks}                             0.0001089037  1.0000000 2008.656     7
inspect(sort(rules1, by="confidence")[1:10])
##      lhs                                                                             rhs                                                                       support confidence       lift count
## [1]  {Water Mineral,                                                                                                                                                                              
##       Zero Calorie Cola}                                                          => {Soda}                                                               0.0001089037          1   87.33288     7
## [2]  {Sandwich Cookies & Crackers Variety Snack Packs,                                                                                                                                            
##       Zero Calorie Cola}                                                          => {Soda}                                                               0.0001089037          1   87.33288     7
## [3]  {Mighty 4 Sweet Potato, Blueberry, Millet & Greek Yogurt Tots Snack,                                                                                                                         
##       Mighty 4: Pumpkin Pomegranate Quinoa Greek Yogurt Nutrition Blend for Tots} => {Mighty 4 Purple Carrot Blackberry Quinoa & Greek Yogurt Tots Snack} 0.0001400190          1 1260.33333     9
## [4]  {Chocolate Sandwich Cookies,                                                                                                                                                                 
##       Clementines}                                                                => {Hass Avocados}                                                      0.0001089037          1   65.58878     7
## [5]  {Chocolate Sandwich Cookies,                                                                                                                                                                 
##       Hass Avocados}                                                              => {Clementines}                                                        0.0001089037          1  113.96631     7
## [6]  {9 Inch Plates,                                                                                                                                                                              
##       Plastic Spoons}                                                             => {Compostable Forks}                                                  0.0001089037          1 2008.65625     7
## [7]  {Milk Chocolate Covered Raisins,                                                                                                                                                             
##       Popcorn}                                                                    => {Extra Fancy Unsalted Mixed Nuts}                                    0.0001089037          1  389.55758     7
## [8]  {Lemon Sparkling Water,                                                                                                                                                                      
##       Peach-Pear Sparkling Water}                                                 => {Grapefruit Sparkling Water}                                         0.0001400190          1  209.37134     9
## [9]  {Mixed Fruit Fruit Snacks,                                                                                                                                                                   
##       Orange & Lemon Flavor Variety Pack Sparkling Fruit Beverage}                => {Soda}                                                               0.0001089037          1   87.33288     7
## [10] {Lemon Fruit & Nut Food Bar,                                                                                                                                                                 
##       Pecan Pie Fruit & Nut Food Bar}                                             => {Blueberry Muffin Bar}                                               0.0001400190          1  455.86525     9

이러한 규칙은 대개 함께 구입한 유사한 품목에 영향을 주는 것 같음 바나나를 포함하는 규칙은 없음 *다음으로 support 를 높이고 confidence를 낮추어 좀 더 빈번한 품목 규칙을 보자

rules2 <- apriori(transactions, parameter = list(supp = 0.0001, conf = 0.4, maxlen=3), control=list(verbose = FALSE))
summary(quality(rules2))
##     support            confidence          lift              count       
##  Min.   :0.0001089   Min.   :0.4000   Min.   :   2.725   Min.   :  7.00  
##  1st Qu.:0.0001089   1st Qu.:0.4324   1st Qu.:   3.634   1st Qu.:  7.00  
##  Median :0.0001400   Median :0.4737   Median :   5.260   Median :  9.00  
##  Mean   :0.0001771   Mean   :0.5082   Mean   :  67.092   Mean   : 11.38  
##  3rd Qu.:0.0001867   3rd Qu.:0.5500   3rd Qu.:  13.151   3rd Qu.: 12.00  
##  Max.   :0.0034538   Max.   :1.0000   Max.   :4249.719   Max.   :222.00
plot(rules2)
## To reduce overplotting, jitter is added! Use jitter = 0 to prevent jitter.

inspect(sort(rules2, by="lift")[1:10])
##      lhs                                                      rhs                                                        support confidence     lift count
## [1]  {Purina Mixed Grill Classic Pate Cat Food}            => {Liver & Chicken Dinner Classic Pate Cat Food}        0.0001244613  0.7272727 4249.719     8
## [2]  {Liver & Chicken Dinner Classic Pate Cat Food}        => {Purina Mixed Grill Classic Pate Cat Food}            0.0001244613  0.7272727 4249.719     8
## [3]  {Classic Tender Liver & Chicken Feast Cat Food}       => {Fancy Feast Wet Classic Chicken Feast Cat Food}      0.0001089037  0.5384615 3461.069     7
## [4]  {Fancy Feast Wet Classic Chicken Feast Cat Food}      => {Classic Tender Liver & Chicken Feast Cat Food}       0.0001089037  0.7000000 3461.069     7
## [5]  {Organic Basil Babe Level 1 Baby Food}                => {Peachy Keen Organic Level 1}                         0.0001089037  0.6363636 3146.427     7
## [6]  {Peachy Keen Organic Level 1}                         => {Organic Basil Babe Level 1 Baby Food}                0.0001089037  0.5384615 3146.427     7
## [7]  {Vanilla Pot De Creme}                                => {Dark Chocolate Pot De Creme}                         0.0001244613  0.7272727 2921.682     8
## [8]  {Dark Chocolate Pot De Creme}                         => {Vanilla Pot De Creme}                                0.0001244613  0.5000000 2921.682     8
## [9]  {Organic Forest Berry Cream On Top Whole Milk Yogurt} => {Organic Blueberry Cream On Top Whole Milk Yogurt}    0.0001089037  0.5833333 2884.224     7
## [10] {Organic Blueberry Cream On Top Whole Milk Yogurt}    => {Organic Forest Berry Cream On Top Whole Milk Yogurt} 0.0001089037  0.5384615 2884.224     7
inspect(sort(rules2, by="confidence")[1:10])
##      lhs                                                                             rhs                                                                       support confidence       lift count
## [1]  {Water Mineral,                                                                                                                                                                              
##       Zero Calorie Cola}                                                          => {Soda}                                                               0.0001089037          1   87.33288     7
## [2]  {Sandwich Cookies & Crackers Variety Snack Packs,                                                                                                                                            
##       Zero Calorie Cola}                                                          => {Soda}                                                               0.0001089037          1   87.33288     7
## [3]  {Mighty 4 Sweet Potato, Blueberry, Millet & Greek Yogurt Tots Snack,                                                                                                                         
##       Mighty 4: Pumpkin Pomegranate Quinoa Greek Yogurt Nutrition Blend for Tots} => {Mighty 4 Purple Carrot Blackberry Quinoa & Greek Yogurt Tots Snack} 0.0001400190          1 1260.33333     9
## [4]  {Chocolate Sandwich Cookies,                                                                                                                                                                 
##       Clementines}                                                                => {Hass Avocados}                                                      0.0001089037          1   65.58878     7
## [5]  {Chocolate Sandwich Cookies,                                                                                                                                                                 
##       Hass Avocados}                                                              => {Clementines}                                                        0.0001089037          1  113.96631     7
## [6]  {9 Inch Plates,                                                                                                                                                                              
##       Plastic Spoons}                                                             => {Compostable Forks}                                                  0.0001089037          1 2008.65625     7
## [7]  {Milk Chocolate Covered Raisins,                                                                                                                                                             
##       Popcorn}                                                                    => {Extra Fancy Unsalted Mixed Nuts}                                    0.0001089037          1  389.55758     7
## [8]  {Lemon Sparkling Water,                                                                                                                                                                      
##       Peach-Pear Sparkling Water}                                                 => {Grapefruit Sparkling Water}                                         0.0001400190          1  209.37134     9
## [9]  {Mixed Fruit Fruit Snacks,                                                                                                                                                                   
##       Orange & Lemon Flavor Variety Pack Sparkling Fruit Beverage}                => {Soda}                                                               0.0001089037          1   87.33288     7
## [10] {Lemon Fruit & Nut Food Bar,                                                                                                                                                                 
##       Pecan Pie Fruit & Nut Food Bar}                                             => {Blueberry Muffin Bar}                                               0.0001400190          1  455.86525     9

support 더 올리고 confidence 더 내려보자

rules3 <- apriori(transactions, parameter = list(supp = 0.0005, conf = 0.1, maxlen=3), control=list(verbose = FALSE))
summary(quality(rules3))
##     support            confidence          lift              count        
##  Min.   :0.0005134   Min.   :0.1000   Min.   :  0.7237   Min.   :  33.00  
##  1st Qu.:0.0006067   1st Qu.:0.1409   1st Qu.:  1.9424   1st Qu.:  39.00  
##  Median :0.0007468   Median :0.1912   Median :  2.7183   Median :  48.00  
##  Mean   :0.0011682   Mean   :0.2104   Mean   :  7.2884   Mean   :  75.09  
##  3rd Qu.:0.0011202   3rd Qu.:0.2570   3rd Qu.:  3.9553   3rd Qu.:  72.00  
##  Max.   :0.1467710   Max.   :0.7679   Max.   :766.8133   Max.   :9434.00
plot(rules3)
## To reduce overplotting, jitter is added! Use jitter = 0 to prevent jitter.

inspect(sort(rules3, by="lift")[1:10])
##      lhs                                                                    rhs                                                                      support confidence     lift count
## [1]  {Oh My Yog! Organic Wild Quebec Blueberry Cream Top Yogurt & Fruit} => {Oh My Yog! Pacific Coast Strawberry Trilayer Yogurt}               0.0005289606  0.6800000 766.8133    34
## [2]  {Oh My Yog! Pacific Coast Strawberry Trilayer Yogurt}               => {Oh My Yog! Organic Wild Quebec Blueberry Cream Top Yogurt & Fruit} 0.0005289606  0.5964912 766.8133    34
## [3]  {Greek Whole Milk Blended Blueberry Yogurt}                         => {Organic Greek Whole Milk Blended Strawberry Yogurt}                0.0005445183  0.6481481 534.1156    35
## [4]  {Organic Greek Whole Milk Blended Strawberry Yogurt}                => {Greek Whole Milk Blended Blueberry Yogurt}                         0.0005445183  0.4487179 534.1156    35
## [5]  {Coconut  Chocolate Bar}                                            => {Chocolate Sea Salt}                                                0.0005289606  0.5230769 400.2597    34
## [6]  {Chocolate Sea Salt}                                                => {Coconut  Chocolate Bar}                                            0.0005289606  0.4047619 400.2597    34
## [7]  {Almond Milk Strawberry Yogurt}                                     => {Almond Milk Blueberry Yogurt}                                      0.0008089986  0.4482759 335.0445    52
## [8]  {Almond Milk Blueberry Yogurt}                                      => {Almond Milk Strawberry Yogurt}                                     0.0008089986  0.6046512 335.0445    52
## [9]  {Almond Milk Peach Yogurt}                                          => {Almond Milk Strawberry Yogurt}                                     0.0009179022  0.6020408 333.5981    59
## [10] {Almond Milk Strawberry Yogurt}                                     => {Almond Milk Peach Yogurt}                                          0.0009179022  0.5086207 333.5981    59
inspect(sort(rules3, by="confidence")[1:10])
##      lhs                                                                    rhs                                                          support confidence      lift count
## [1]  {Peach Pear Flavored Sparkling Water,                                                                                                                                 
##       Pure Sparkling Water}                                              => {Sparkling Water Grapefruit}                            0.0006689796  0.7678571  31.13915    43
## [2]  {Total 2% All Natural Greek Strained Yogurt with Honey,                                                                                                               
##       Total 2% Lowfat Greek Strained Yogurt With Blueberry}              => {Total 2% with Strawberry Lowfat Greek Strained Yogurt} 0.0008867869  0.7307692  76.87668    57
## [3]  {Pure Sparkling Water,                                                                                                                                                
##       Sparkling Water Berry}                                             => {Sparkling Water Grapefruit}                            0.0006845372  0.6875000  27.88040    44
## [4]  {Lime Sparkling Water,                                                                                                                                                
##       Peach Pear Flavored Sparkling Water}                               => {Sparkling Water Grapefruit}                            0.0010112482  0.6842105  27.74700    65
## [5]  {Total 2% All Natural Greek Strained Yogurt with Honey,                                                                                                               
##       Total 2% Lowfat Greek Strained Yogurt with Peach}                  => {Total 2% with Strawberry Lowfat Greek Strained Yogurt} 0.0006689796  0.6825397  71.80295    43
## [6]  {Oh My Yog! Organic Wild Quebec Blueberry Cream Top Yogurt & Fruit} => {Oh My Yog! Pacific Coast Strawberry Trilayer Yogurt}   0.0005289606  0.6800000 766.81333    34
## [7]  {Blueberry on the Bottom Nonfat Greek Yogurt,                                                                                                                         
##       Peach on the Bottom Nonfat Greek Yogurt}                           => {Strawberry on the Bottom Nonfat Greek Yogurt}          0.0005289606  0.6800000 291.38907    34
## [8]  {Total 2% All Natural Greek Strained Yogurt with Honey,                                                                                                               
##       Total 2% Greek Strained Yogurt with Cherry 5.3 oz}                 => {Total 2% with Strawberry Lowfat Greek Strained Yogurt} 0.0006067489  0.6724138  70.73771    39
## [9]  {Pure Sparkling Water,                                                                                                                                                
##       Sparkling Lemon Water}                                             => {Sparkling Water Grapefruit}                            0.0009801329  0.6702128  27.17935    63
## [10] {Lime Sparkling Water,                                                                                                                                                
##       Sparkling Water Berry}                                             => {Sparkling Water Grapefruit}                            0.0009334599  0.6666667  27.03554    60