### Set the work directory, change as per your work folder
setwd("C:/Users/rpandey/Desktop/Classes")
### Import/read the data
txn_data<-read.csv("Retail_Data.csv")
### See the data summary (verify Data)
head(txn_data)
## Trans_ID Product1 Product2 Product3
## 1 1485 Bread Butter Egg
## 2 1147 Bread Butter Egg
## 3 1445 Bread Butter Egg
## 4 493 Bread Butter Egg
## 5 163 Bread Butter Egg
## 6 861 Bread Butter Egg
tail(txn_data)
## Trans_ID Product1 Product2 Product3
## 1995 1635 Fruits JAM Salad
## 1996 1937 Fruits JAM Salad
## 1997 1020 Fruits JAM Salad
## 1998 500 Fruits JAM Salad
## 1999 1364 Fruits JAM Salad
## 2000 297 Fruits JAM Salad
summary(txn_data)
## Trans_ID Product1 Product2 Product3
## Min. : 1.0 Bread :702 Butter :493 Dairy: 406
## 1st Qu.: 517.8 Fruits:365 Jam :658 Egg :1152
## Median : 985.5 Juice :493 JAM : 24 Salad: 442
## Mean : 999.5 Nachos:440 Salsa :393
## 3rd Qu.:1504.0 Vegetable:432
## Max. :2000.0
str(txn_data)
## 'data.frame': 2000 obs. of 4 variables:
## $ Trans_ID: int 1485 1147 1445 493 163 861 1052 789 123 1796 ...
## $ Product1: Factor w/ 4 levels "Bread","Fruits",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ Product2: Factor w/ 5 levels "Butter","Jam",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ Product3: Factor w/ 3 levels "Dairy","Egg",..: 2 2 2 2 2 2 2 2 2 2 ...
# factorization of variables
for ( i in 1:ncol(txn_data))
{
txn_data[,i]=as.factor(txn_data[,i])
}
library(arules)
## Loading required package: Matrix
##
## Attaching package: 'arules'
## The following objects are masked from 'package:base':
##
## abbreviate, write
#Running aproiori command
basket_rules <- apriori(txn_data, parameter = list(sup = 0.005, conf = 0.01, target="rules",minlen=2,maxlen=3), appearance = list(rhs=c("Product3=Dairy", "Product3=Egg"), default = "lhs"))
## Apriori
##
## Parameter specification:
## confidence minval smax arem aval originalSupport maxtime support minlen
## 0.01 0.1 1 none FALSE TRUE 5 0.005 2
## maxlen target ext
## 3 rules FALSE
##
## Algorithmic control:
## filter tree heap memopt load sort verbose
## 0.1 TRUE TRUE FALSE TRUE 2 TRUE
##
## Absolute minimum support count: 10
##
## set item appearances ...[2 item(s)] done [0.00s].
## set transactions ...[1282 item(s), 2000 transaction(s)] done [0.00s].
## sorting and recoding items ... [12 item(s)] done [0.00s].
## creating transaction tree ... done [0.00s].
## checking subsets of size 1 2 3 done [0.00s].
## writing ... [22 rule(s)] done [0.00s].
## creating S4 object ... done [0.00s].
summary(basket_rules)
## set of 22 rules
##
## rule length distribution (lhs + rhs):sizes
## 2 3
## 12 10
##
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 2.000 2.000 2.000 2.455 3.000 3.000
##
## summary of quality measures:
## support confidence lift count
## Min. :0.00700 Min. :0.03241 Min. :0.05626 Min. : 14.0
## 1st Qu.:0.05300 1st Qu.:0.30849 1st Qu.:1.03839 1st Qu.:106.0
## Median :0.08975 Median :0.54830 Median :1.36302 Median :179.5
## Mean :0.10623 Mean :0.54682 Mean :1.61849 Mean :212.5
## 3rd Qu.:0.17525 3rd Qu.:0.78707 3rd Qu.:1.81950 3rd Qu.:350.5
## Max. :0.25700 Max. :1.00000 Max. :4.92611 Max. :514.0
##
## mining info:
## data ntransactions support confidence
## txn_data 2000 0.005 0.01
inspect(basket_rules) # see all the rules
## lhs rhs support confidence lift count
## [1] {Product2=Salsa} => {Product3=Dairy} 0.0790 0.40203562 1.98047105 158
## [2] {Product2=Salsa} => {Product3=Egg} 0.1175 0.59796438 1.03813260 235
## [3] {Product1=Nachos} => {Product3=Dairy} 0.1025 0.46590909 2.29511867 205
## [4] {Product2=Butter} => {Product3=Dairy} 0.0520 0.21095335 1.03917905 104
## [5] {Product2=Jam} => {Product3=Dairy} 0.0720 0.21884498 1.07805411 144
## [6] {Product1=Bread} => {Product3=Dairy} 0.1005 0.28632479 1.41046693 201
## [7] {Product2=Vegetable} => {Product3=Egg} 0.0070 0.03240741 0.05626286 14
## [8] {Product1=Nachos} => {Product3=Egg} 0.1175 0.53409091 0.92724116 235
## [9] {Product1=Juice} => {Product3=Egg} 0.2080 0.84381339 1.46495380 416
## [10] {Product2=Butter} => {Product3=Egg} 0.1945 0.78904665 1.36987266 389
## [11] {Product2=Jam} => {Product3=Egg} 0.2570 0.78115502 1.35617190 514
## [12] {Product1=Bread} => {Product3=Egg} 0.2505 0.71367521 1.23901947 501
## [13] {Product1=Nachos,
## Product2=Salsa} => {Product3=Dairy} 0.0705 0.37500000 1.84729064 141
## [14] {Product1=Bread,
## Product2=Salsa} => {Product3=Dairy} 0.0085 1.00000000 4.92610837 17
## [15] {Product1=Nachos,
## Product2=Salsa} => {Product3=Egg} 0.1175 0.62500000 1.08506944 235
## [16] {Product1=Nachos,
## Product2=Butter} => {Product3=Dairy} 0.0320 1.00000000 4.92610837 64
## [17] {Product1=Bread,
## Product2=Butter} => {Product3=Dairy} 0.0200 0.09324009 0.45931080 40
## [18] {Product1=Bread,
## Product2=Jam} => {Product3=Dairy} 0.0720 0.56250000 2.77093596 144
## [19] {Product1=Juice,
## Product2=Vegetable} => {Product3=Egg} 0.0070 0.15384615 0.26709402 14
## [20] {Product1=Juice,
## Product2=Jam} => {Product3=Egg} 0.2010 1.00000000 1.73611111 402
## [21] {Product1=Bread,
## Product2=Butter} => {Product3=Egg} 0.1945 0.90675991 1.57423595 389
## [22] {Product1=Bread,
## Product2=Jam} => {Product3=Egg} 0.0560 0.43750000 0.75954861 112
inspect(head(sort( basket_rules,by="lift"),20))
## lhs rhs support confidence lift count
## [1] {Product1=Bread,
## Product2=Salsa} => {Product3=Dairy} 0.0085 1.00000000 4.9261084 17
## [2] {Product1=Nachos,
## Product2=Butter} => {Product3=Dairy} 0.0320 1.00000000 4.9261084 64
## [3] {Product1=Bread,
## Product2=Jam} => {Product3=Dairy} 0.0720 0.56250000 2.7709360 144
## [4] {Product1=Nachos} => {Product3=Dairy} 0.1025 0.46590909 2.2951187 205
## [5] {Product2=Salsa} => {Product3=Dairy} 0.0790 0.40203562 1.9804711 158
## [6] {Product1=Nachos,
## Product2=Salsa} => {Product3=Dairy} 0.0705 0.37500000 1.8472906 141
## [7] {Product1=Juice,
## Product2=Jam} => {Product3=Egg} 0.2010 1.00000000 1.7361111 402
## [8] {Product1=Bread,
## Product2=Butter} => {Product3=Egg} 0.1945 0.90675991 1.5742359 389
## [9] {Product1=Juice} => {Product3=Egg} 0.2080 0.84381339 1.4649538 416
## [10] {Product1=Bread} => {Product3=Dairy} 0.1005 0.28632479 1.4104669 201
## [11] {Product2=Butter} => {Product3=Egg} 0.1945 0.78904665 1.3698727 389
## [12] {Product2=Jam} => {Product3=Egg} 0.2570 0.78115502 1.3561719 514
## [13] {Product1=Bread} => {Product3=Egg} 0.2505 0.71367521 1.2390195 501
## [14] {Product1=Nachos,
## Product2=Salsa} => {Product3=Egg} 0.1175 0.62500000 1.0850694 235
## [15] {Product2=Jam} => {Product3=Dairy} 0.0720 0.21884498 1.0780541 144
## [16] {Product2=Butter} => {Product3=Dairy} 0.0520 0.21095335 1.0391790 104
## [17] {Product2=Salsa} => {Product3=Egg} 0.1175 0.59796438 1.0381326 235
## [18] {Product1=Nachos} => {Product3=Egg} 0.1175 0.53409091 0.9272412 235
## [19] {Product1=Bread,
## Product2=Jam} => {Product3=Egg} 0.0560 0.43750000 0.7595486 112
## [20] {Product1=Bread,
## Product2=Butter} => {Product3=Dairy} 0.0200 0.09324009 0.4593108 40
library(arulesViz)
## Loading required package: grid
plot(basket_rules)
plot(basket_rules, method="graph", control=list(type="items"))
## Warning: Unknown control parameters: type
## Available control parameters (with default values):
## main = Graph for 22 rules
## nodeColors = c("#66CC6680", "#9999CC80")
## nodeCol = c("#EE0000FF", "#EE0303FF", "#EE0606FF", "#EE0909FF", "#EE0C0CFF", "#EE0F0FFF", "#EE1212FF", "#EE1515FF", "#EE1818FF", "#EE1B1BFF", "#EE1E1EFF", "#EE2222FF", "#EE2525FF", "#EE2828FF", "#EE2B2BFF", "#EE2E2EFF", "#EE3131FF", "#EE3434FF", "#EE3737FF", "#EE3A3AFF", "#EE3D3DFF", "#EE4040FF", "#EE4444FF", "#EE4747FF", "#EE4A4AFF", "#EE4D4DFF", "#EE5050FF", "#EE5353FF", "#EE5656FF", "#EE5959FF", "#EE5C5CFF", "#EE5F5FFF", "#EE6262FF", "#EE6666FF", "#EE6969FF", "#EE6C6CFF", "#EE6F6FFF", "#EE7272FF", "#EE7575FF", "#EE7878FF", "#EE7B7BFF", "#EE7E7EFF", "#EE8181FF", "#EE8484FF", "#EE8888FF", "#EE8B8BFF", "#EE8E8EFF", "#EE9191FF", "#EE9494FF", "#EE9797FF", "#EE9999FF", "#EE9B9BFF", "#EE9D9DFF", "#EE9F9FFF", "#EEA0A0FF", "#EEA2A2FF", "#EEA4A4FF", "#EEA5A5FF", "#EEA7A7FF", "#EEA9A9FF", "#EEABABFF", "#EEACACFF", "#EEAEAEFF", "#EEB0B0FF", "#EEB1B1FF", "#EEB3B3FF", "#EEB5B5FF", "#EEB7B7FF", "#EEB8B8FF", "#EEBABAFF", "#EEBCBCFF", "#EEBDBDFF", "#EEBFBFFF", "#EEC1C1FF", "#EEC3C3FF", "#EEC4C4FF", "#EEC6C6FF", "#EEC8C8FF", "#EEC9C9FF", "#EECBCBFF", "#EECDCDFF", "#EECFCFFF", "#EED0D0FF", "#EED2D2FF", "#EED4D4FF", "#EED5D5FF", "#EED7D7FF", "#EED9D9FF", "#EEDBDBFF", "#EEDCDCFF", "#EEDEDEFF", "#EEE0E0FF", "#EEE1E1FF", "#EEE3E3FF", "#EEE5E5FF", "#EEE7E7FF", "#EEE8E8FF", "#EEEAEAFF", "#EEECECFF", "#EEEEEEFF")
## edgeCol = c("#474747FF", "#494949FF", "#4B4B4BFF", "#4D4D4DFF", "#4F4F4FFF", "#515151FF", "#535353FF", "#555555FF", "#575757FF", "#595959FF", "#5B5B5BFF", "#5E5E5EFF", "#606060FF", "#626262FF", "#646464FF", "#666666FF", "#686868FF", "#6A6A6AFF", "#6C6C6CFF", "#6E6E6EFF", "#707070FF", "#727272FF", "#747474FF", "#767676FF", "#787878FF", "#7A7A7AFF", "#7C7C7CFF", "#7E7E7EFF", "#808080FF", "#828282FF", "#848484FF", "#868686FF", "#888888FF", "#8A8A8AFF", "#8C8C8CFF", "#8D8D8DFF", "#8F8F8FFF", "#919191FF", "#939393FF", "#959595FF", "#979797FF", "#999999FF", "#9A9A9AFF", "#9C9C9CFF", "#9E9E9EFF", "#A0A0A0FF", "#A2A2A2FF", "#A3A3A3FF", "#A5A5A5FF", "#A7A7A7FF", "#A9A9A9FF", "#AAAAAAFF", "#ACACACFF", "#AEAEAEFF", "#AFAFAFFF", "#B1B1B1FF", "#B3B3B3FF", "#B4B4B4FF", "#B6B6B6FF", "#B7B7B7FF", "#B9B9B9FF", "#BBBBBBFF", "#BCBCBCFF", "#BEBEBEFF", "#BFBFBFFF", "#C1C1C1FF", "#C2C2C2FF", "#C3C3C4FF", "#C5C5C5FF", "#C6C6C6FF", "#C8C8C8FF", "#C9C9C9FF", "#CACACAFF", "#CCCCCCFF", "#CDCDCDFF", "#CECECEFF", "#CFCFCFFF", "#D1D1D1FF", "#D2D2D2FF", "#D3D3D3FF", "#D4D4D4FF", "#D5D5D5FF", "#D6D6D6FF", "#D7D7D7FF", "#D8D8D8FF", "#D9D9D9FF", "#DADADAFF", "#DBDBDBFF", "#DCDCDCFF", "#DDDDDDFF", "#DEDEDEFF", "#DEDEDEFF", "#DFDFDFFF", "#E0E0E0FF", "#E0E0E0FF", "#E1E1E1FF", "#E1E1E1FF", "#E2E2E2FF", "#E2E2E2FF", "#E2E2E2FF")
## alpha = 0.5
## cex = 1
## itemLabels = TRUE
## labelCol = #000000B3
## measureLabels = FALSE
## precision = 3
## layout = NULL
## layoutParams = list()
## arrowSize = 0.5
## engine = igraph
## plot = TRUE
## plot_options = list()
## max = 100
## verbose = FALSE
This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
summary(cars)
## speed dist
## Min. : 4.0 Min. : 2.00
## 1st Qu.:12.0 1st Qu.: 26.00
## Median :15.0 Median : 36.00
## Mean :15.4 Mean : 42.98
## 3rd Qu.:19.0 3rd Qu.: 56.00
## Max. :25.0 Max. :120.00
You can also embed plots, for example:
Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.