Association Rules

Book dataset

Assignment 23

library(arules)

## Loading required package: Matrix

## 
## Attaching package: 'arules'

## The following objects are masked from 'package:base':
## 
##     abbreviate, write

library(arulesViz)

## Warning: package 'arulesViz' was built under R version 3.5.1

## Loading required package: grid

data <- read.csv("C:\\Users\\RISHI RAHUL\\Desktop\\DS\\8 arules\\Assignment\\book.csv")

class(data)

## [1] "data.frame"

colnames(data)

##  [1] "ChildBks"  "YouthBks"  "CookBks"   "DoItYBks"  "RefBks"   
##  [6] "ArtBks"    "GeogBks"   "ItalCook"  "ItalAtlas" "ItalArt"  
## [11] "Florence"

data$ChildBks <- factor(data$ChildBks,levels = c("1","0"),labels = c("ChildBks",""))
data$YouthBks <- factor(data$YouthBks,levels = c("1","0"),labels = c("YouthBks",""))
data$CookBks <- factor(data$CookBks,levels = c("1","0"),labels = c("CookBks",""))
data$DoItYBks <- factor(data$DoItYBks,levels = c("1","0"),labels = c("DoItYBks",""))
data$RefBks <- factor(data$RefBks,levels = c("1","0"),labels = c("RefBks",""))
data$ArtBks <- factor(data$ArtBks,levels = c("1","0"),labels = c("ArtBks",""))
data$GeogBks <- factor(data$GeogBks,levels = c("1","0"),labels = c("GeogBks",""))
data$ItalCook <- factor(data$ItalCook,levels = c("1","0"),labels = c("ItalCook",""))
data$ItalAtlas <- factor(data$ItalAtlas,levels = c("1","0"),labels = c("ItalAtlas",""))
data$ItalArt <- factor(data$ItalArt,levels = c("1","0"),labels = c("ItalArt",""))
data$Florence <- factor(data$Florence,levels = c("1","0"),labels = c("Florence",""))

#library(car)
#library(carData)
#library(mvinfluence)

data1 <- as(data,"transactions")

itemFrequencyPlot(data1,topN=25)

rules <- apriori(data1, parameter = list(supp = 0.005, confidence = 0.50, minlen = 2, maxlen = 4))

## Apriori
## 
## Parameter specification:
##  confidence minval smax arem  aval originalSupport maxtime support minlen
##         0.5    0.1    1 none FALSE            TRUE       5   0.005      2
##  maxlen target   ext
##       4  rules FALSE
## 
## Algorithmic control:
##  filter tree heap memopt load sort verbose
##     0.1 TRUE TRUE  FALSE TRUE    2    TRUE
## 
## Absolute minimum support count: 10 
## 
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[22 item(s), 2000 transaction(s)] done [0.00s].
## sorting and recoding items ... [22 item(s)] done [0.00s].
## creating transaction tree ... done [0.00s].
## checking subsets of size 1 2 3 4

## Warning in apriori(data1, parameter = list(supp = 0.005, confidence =
## 0.5, : Mining stopped (maxlen reached). Only patterns up to a length of 4
## returned!

##  done [0.01s].
## writing ... [11640 rule(s)] done [0.00s].
## creating S4 object  ... done [0.01s].

inspect(head(sort(rules), n = 10))

##      lhs                       rhs          support confidence lift    
## [1]  {ItalArt=}             => {ItalAtlas=} 0.9310  0.9784551  1.016049
## [2]  {ItalAtlas=}           => {ItalArt=}   0.9310  0.9667705  1.016049
## [3]  {ItalCook=}            => {ItalArt=}   0.8755  0.9875917  1.037931
## [4]  {ItalArt=}             => {ItalCook=}  0.8755  0.9201261  1.037931
## [5]  {ItalCook=}            => {ItalAtlas=} 0.8725  0.9842076  1.022022
## [6]  {ItalAtlas=}           => {ItalCook=}  0.8725  0.9060228  1.022022
## [7]  {ItalCook=,ItalArt=}   => {ItalAtlas=} 0.8655  0.9885780  1.026561
## [8]  {ItalCook=,ItalAtlas=} => {ItalArt=}   0.8655  0.9919771  1.042540
## [9]  {ItalAtlas=,ItalArt=}  => {ItalCook=}  0.8655  0.9296455  1.048670
## [10] {Florence=}            => {ItalAtlas=} 0.8610  0.9657880  1.002895
##      count
## [1]  1862 
## [2]  1862 
## [3]  1751 
## [4]  1751 
## [5]  1745 
## [6]  1745 
## [7]  1731 
## [8]  1731 
## [9]  1731 
## [10] 1722

plot(head(sort(rules, by = "lift"), n = 10), method = "graph", control = list(cex = 1.0)) #cex = font size

plot(rules)

## To reduce overplotting, jitter is added! Use jitter = 0 to prevent jitter.

plot(head(sort(rules), n = 10), method = "grouped", control = list(cex = 0.2))

## Warning: Unknown control parameters: cex

## Available control parameters (with default values):
## main  =  Grouped Matrix for 10 Rules
## k     =  20
## rhs_max   =  10
## lhs_items     =  2
## aggr.fun  =  function (x, ...)  UseMethod("mean")
## col   =  c("#EE0000FF", "#EE0303FF", "#EE0606FF", "#EE0909FF", "#EE0C0CFF", "#EE0F0FFF", "#EE1212FF", "#EE1515FF", "#EE1818FF", "#EE1B1BFF", "#EE1E1EFF", "#EE2222FF", "#EE2525FF", "#EE2828FF", "#EE2B2BFF", "#EE2E2EFF", "#EE3131FF", "#EE3434FF", "#EE3737FF", "#EE3A3AFF", "#EE3D3DFF", "#EE4040FF", "#EE4444FF", "#EE4747FF", "#EE4A4AFF", "#EE4D4DFF", "#EE5050FF", "#EE5353FF", "#EE5656FF", "#EE5959FF", "#EE5C5CFF", "#EE5F5FFF", "#EE6262FF", "#EE6666FF", "#EE6969FF", "#EE6C6CFF", "#EE6F6FFF", "#EE7272FF", "#EE7575FF",  "#EE7878FF", "#EE7B7BFF", "#EE7E7EFF", "#EE8181FF", "#EE8484FF", "#EE8888FF", "#EE8B8BFF", "#EE8E8EFF", "#EE9191FF", "#EE9494FF", "#EE9797FF", "#EE9999FF", "#EE9B9BFF", "#EE9D9DFF", "#EE9F9FFF", "#EEA0A0FF", "#EEA2A2FF", "#EEA4A4FF", "#EEA5A5FF", "#EEA7A7FF", "#EEA9A9FF", "#EEABABFF", "#EEACACFF", "#EEAEAEFF", "#EEB0B0FF", "#EEB1B1FF", "#EEB3B3FF", "#EEB5B5FF", "#EEB7B7FF", "#EEB8B8FF", "#EEBABAFF", "#EEBCBCFF", "#EEBDBDFF", "#EEBFBFFF", "#EEC1C1FF", "#EEC3C3FF", "#EEC4C4FF", "#EEC6C6FF", "#EEC8C8FF",  "#EEC9C9FF", "#EECBCBFF", "#EECDCDFF", "#EECFCFFF", "#EED0D0FF", "#EED2D2FF", "#EED4D4FF", "#EED5D5FF", "#EED7D7FF", "#EED9D9FF", "#EEDBDBFF", "#EEDCDCFF", "#EEDEDEFF", "#EEE0E0FF", "#EEE1E1FF", "#EEE3E3FF", "#EEE5E5FF", "#EEE7E7FF", "#EEE8E8FF", "#EEEAEAFF", "#EEECECFF", "#EEEEEEFF")
## reverse   =  TRUE
## xlab  =  NULL
## ylab  =  NULL
## legend    =  Size: support  Color: lift
## spacing   =  -1
## panel.function    =  function (row, size, shading, spacing)  {     size[size == 0] <- NA     shading[is.na(shading)] <- 1     grid.circle(x = c(1:length(size)), y = row, r = size/2 * (1 - spacing), default.units = "native", gp = gpar(fill = shading, col = shading, alpha = 0.9)) }
## gp_main   =  list(cex = 1.2, fontface = "bold", font = c(bold = 2))
## gp_labels     =  list(cex = 0.8)
## gp_labs   =  list(cex = 1.2, fontface = "bold", font = c(bold = 2))
## gp_lines  =  list(col = "gray", lty = 3)
## newpage   =  TRUE
## max.shading   =  NA
## engine    =  default
## verbose   =  FALSE