Apriori algorithm based on products
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.5 ✓ purrr 0.3.4
## ✓ tibble 3.1.3 ✓ dplyr 1.0.7
## ✓ tidyr 1.1.3 ✓ stringr 1.4.0
## ✓ readr 2.0.1 ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(arules)
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
##
## Attaching package: 'arules'
## The following object is masked from 'package:dplyr':
##
## recode
## The following objects are masked from 'package:base':
##
## abbreviate, write
library(arulesViz)
library(RColorBrewer)
library(shinythemes)
library(readr)
library(data.table)
##
## Attaching package: 'data.table'
## The following objects are masked from 'package:dplyr':
##
## between, first, last
## The following object is masked from 'package:purrr':
##
## transpose
setwd("/Users/hanxiao/Downloads/EmoryU/Fall/Social Network Analysis/Group Project")
order = fread("order_products__train.csv")
department =fread("departments.csv")
aisles =fread("aisles.csv")
product = fread("products.csv")
order_data = merge(order,product,by.x = "product_id",by.y = "product_id",all.x = TRUE,all.y = FALSE)
#write.csv(order_data[,c(2,5)],"ultimate.csv",fileEncoding = "GBK")
ult <- read.transactions("ultimate.csv", format="single",sep = ",", col = c(2,3))
rules1 <- apriori(ult, parameter=list(support = 0.003, confidence = 0.3))
## Apriori
##
## Parameter specification:
## confidence minval smax arem aval originalSupport maxtime support minlen
## 0.3 0.1 1 none FALSE TRUE 5 0.003 1
## maxlen target ext
## 10 rules TRUE
##
## Algorithmic control:
## filter tree heap memopt load sort verbose
## 0.1 TRUE TRUE FALSE TRUE 2 TRUE
##
## Absolute minimum support count: 390
##
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[37114 item(s), 130329 transaction(s)] done [0.98s].
## sorting and recoding items ... [466 item(s)] done [0.03s].
## creating transaction tree ... done [0.10s].
## checking subsets of size 1 2 3 done [0.03s].
## writing ... [21 rule(s)] done [0.00s].
## creating S4 object ... done [0.06s].
inspectDT(rules1)
plot(rules1, method = "graph")

plot(rules1, method = "graph", engine = "html")
Apriori algorithm based on departments
library(tidyverse)
library(arules)
library(arulesViz)
library(RColorBrewer) # color palettes for plots
library(shinythemes)
library(readr)
order = read.csv("order_products__train.csv")
department =read.csv("departments.csv")
aisles =read.csv("aisles.csv")
product = read.csv("products.csv")
a = merge(order,product,by.x = "product_id",by.y = "product_id",all.x = TRUE,all.y = FALSE)
b = merge(a,department,by.x = "department_id",by.y = "department_id",all.x = TRUE,all.y = FALSE)
order_data = merge(b,aisles,by.x = "aisle_id",by.y = "aisle_id",all.x = TRUE,all.y = FALSE)
order_data_1 = order_data[,c(4,8)]
c1=which(is.na(order_data_1$department))
order_data_1 = order_data_1[-c1,]
order_data_1 = unique(order_data_1)
order_data_1 = na.omit(order_data_1)
write_csv(order_data_1,"sub_department_1.csv",col_names=FALSE)
subdata_1=read.transactions("sub_department_1.csv", format="single",sep = ",", col = c(1,2))
inspect(subdata_1[1:5])
## items transactionID
## [1] {canned goods,
## dairy eggs,
## produce} 1
## [2] {bakery,
## canned goods,
## dairy eggs,
## deli,
## pantry,
## personal care,
## pets,
## produce} 100000
## [3] {beverages,
## frozen,
## personal care} 1000008
## [4] {breakfast,
## meat seafood,
## produce,
## snacks} 1000029
## [5] {breakfast,
## dairy eggs} 100003
rules <- apriori(subdata_1, parameter=list(support = 0.1, confidence = 0.4))
## Apriori
##
## Parameter specification:
## confidence minval smax arem aval originalSupport maxtime support minlen
## 0.4 0.1 1 none FALSE TRUE 5 0.1 1
## maxlen target ext
## 10 rules TRUE
##
## Algorithmic control:
## filter tree heap memopt load sort verbose
## 0.1 TRUE TRUE FALSE TRUE 2 TRUE
##
## Absolute minimum support count: 13120
##
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[21 item(s), 131209 transaction(s)] done [0.06s].
## sorting and recoding items ... [14 item(s)] done [0.01s].
## creating transaction tree ... done [0.10s].
## checking subsets of size 1 2 3 4 5 done [0.01s].
## writing ... [228 rule(s)] done [0.00s].
## creating S4 object ... done [0.04s].
inspectDT(rules)
plot(rules, control=list(jitter=2, col = rev(brewer.pal(9, "Greens")[c(3,7,8,9)])),shading = "lift")

plot(rules, method="grouped", control=list(col = rev(brewer.pal(8, "Greens")[c(1,5,8)])))

plot(rules[1:10], measure="confidence", method="graph", control=list(type="items"), shading = "lift")
## Warning: Unknown control parameters: type
## Available control parameters (with default values):
## layout = stress
## circular = FALSE
## ggraphdots = NULL
## edges = <environment>
## nodes = <environment>
## nodetext = <environment>
## colors = c("#EE0000FF", "#EEEEEEFF")
## engine = ggplot2
## max = 100
## verbose = FALSE

rules.sub <- subset(rules, subset = lift > 1.126)
plot(rules.sub, method = "graph", engine = "html")
## Warning: Too many rules supplied. Only plotting the best 100 using
## 'lift' (change control parameter max if needed).