Apriori algorithm based on products

library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.5     ✓ purrr   0.3.4
## ✓ tibble  3.1.3     ✓ dplyr   1.0.7
## ✓ tidyr   1.1.3     ✓ stringr 1.4.0
## ✓ readr   2.0.1     ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(arules)
## Loading required package: Matrix
## 
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack
## 
## Attaching package: 'arules'
## The following object is masked from 'package:dplyr':
## 
##     recode
## The following objects are masked from 'package:base':
## 
##     abbreviate, write
library(arulesViz)
library(RColorBrewer) 
library(shinythemes)  
library(readr)
library(data.table)
## 
## Attaching package: 'data.table'
## The following objects are masked from 'package:dplyr':
## 
##     between, first, last
## The following object is masked from 'package:purrr':
## 
##     transpose
setwd("/Users/hanxiao/Downloads/EmoryU/Fall/Social Network Analysis/Group Project")
order = fread("order_products__train.csv")
department =fread("departments.csv")
aisles =fread("aisles.csv")
product = fread("products.csv")
order_data = merge(order,product,by.x = "product_id",by.y = "product_id",all.x = TRUE,all.y = FALSE)


#write.csv(order_data[,c(2,5)],"ultimate.csv",fileEncoding = "GBK")
ult <- read.transactions("ultimate.csv", format="single",sep = ",", col = c(2,3))
rules1 <- apriori(ult, parameter=list(support = 0.003, confidence = 0.3))
## Apriori
## 
## Parameter specification:
##  confidence minval smax arem  aval originalSupport maxtime support minlen
##         0.3    0.1    1 none FALSE            TRUE       5   0.003      1
##  maxlen target  ext
##      10  rules TRUE
## 
## Algorithmic control:
##  filter tree heap memopt load sort verbose
##     0.1 TRUE TRUE  FALSE TRUE    2    TRUE
## 
## Absolute minimum support count: 390 
## 
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[37114 item(s), 130329 transaction(s)] done [0.98s].
## sorting and recoding items ... [466 item(s)] done [0.03s].
## creating transaction tree ... done [0.10s].
## checking subsets of size 1 2 3 done [0.03s].
## writing ... [21 rule(s)] done [0.00s].
## creating S4 object  ... done [0.06s].
inspectDT(rules1)
plot(rules1, method = "graph")

plot(rules1, method = "graph", engine = "html")

Apriori algorithm based on departments

library(tidyverse)
library(arules)
library(arulesViz)
library(RColorBrewer)  # color palettes for plots
library(shinythemes)   
library(readr)


order = read.csv("order_products__train.csv")
department =read.csv("departments.csv")
aisles =read.csv("aisles.csv")
product = read.csv("products.csv")
a = merge(order,product,by.x = "product_id",by.y = "product_id",all.x = TRUE,all.y = FALSE)
b = merge(a,department,by.x = "department_id",by.y = "department_id",all.x = TRUE,all.y = FALSE)
order_data = merge(b,aisles,by.x = "aisle_id",by.y = "aisle_id",all.x = TRUE,all.y = FALSE)

order_data_1 = order_data[,c(4,8)]
c1=which(is.na(order_data_1$department))
order_data_1 = order_data_1[-c1,]


order_data_1 = unique(order_data_1)
order_data_1 = na.omit(order_data_1)


write_csv(order_data_1,"sub_department_1.csv",col_names=FALSE)
subdata_1=read.transactions("sub_department_1.csv", format="single",sep = ",", col = c(1,2))


inspect(subdata_1[1:5])
##     items            transactionID
## [1] {canned goods,                
##      dairy eggs,                  
##      produce}              1      
## [2] {bakery,                      
##      canned goods,                
##      dairy eggs,                  
##      deli,                        
##      pantry,                      
##      personal care,               
##      pets,                        
##      produce}              100000 
## [3] {beverages,                   
##      frozen,                      
##      personal care}        1000008
## [4] {breakfast,                   
##      meat seafood,                
##      produce,                     
##      snacks}               1000029
## [5] {breakfast,                   
##      dairy eggs}           100003
rules <- apriori(subdata_1, parameter=list(support = 0.1, confidence = 0.4))
## Apriori
## 
## Parameter specification:
##  confidence minval smax arem  aval originalSupport maxtime support minlen
##         0.4    0.1    1 none FALSE            TRUE       5     0.1      1
##  maxlen target  ext
##      10  rules TRUE
## 
## Algorithmic control:
##  filter tree heap memopt load sort verbose
##     0.1 TRUE TRUE  FALSE TRUE    2    TRUE
## 
## Absolute minimum support count: 13120 
## 
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[21 item(s), 131209 transaction(s)] done [0.06s].
## sorting and recoding items ... [14 item(s)] done [0.01s].
## creating transaction tree ... done [0.10s].
## checking subsets of size 1 2 3 4 5 done [0.01s].
## writing ... [228 rule(s)] done [0.00s].
## creating S4 object  ... done [0.04s].
inspectDT(rules)
plot(rules, control=list(jitter=2, col = rev(brewer.pal(9, "Greens")[c(3,7,8,9)])),shading = "lift")

plot(rules, method="grouped", control=list(col = rev(brewer.pal(8, "Greens")[c(1,5,8)])))

plot(rules[1:10], measure="confidence", method="graph", control=list(type="items"), shading = "lift")
## Warning: Unknown control parameters: type
## Available control parameters (with default values):
## layout    =  stress
## circular  =  FALSE
## ggraphdots    =  NULL
## edges     =  <environment>
## nodes     =  <environment>
## nodetext  =  <environment>
## colors    =  c("#EE0000FF", "#EEEEEEFF")
## engine    =  ggplot2
## max   =  100
## verbose   =  FALSE

rules.sub <- subset(rules, subset = lift > 1.126)
plot(rules.sub, method = "graph", engine = "html")
## Warning: Too many rules supplied. Only plotting the best 100 using
## 'lift' (change control parameter max if needed).