# Load package arules
library(arules)
## Warning: package 'arules' was built under R version 4.1.3
## Loading required package: Matrix
## 
## Attaching package: 'arules'
## The following objects are masked from 'package:base':
## 
##     abbreviate, write
library(arulesViz)
## Warning: package 'arulesViz' was built under R version 4.1.3
## Warning in register(): Can't find generic `scale_type` in package ggplot2 to
## register S3 method.
library(grid)
library(readxl)
pathname <- "C:\\Users\\katie\\OneDrive\\Documents\\Information Systems\\Data Mining"
setwd(pathname)
fname <- "winequality-both.xlsx"
data <- read_excel(fname, col_names = T)
head(data)
## # A tibble: 6 x 13
##   fixed_acidity volatile_acidity citric_acid residual_sugar chlorides
##           <dbl>            <dbl>       <dbl>          <dbl>     <dbl>
## 1           7               0.27        0.36           20.7     0.045
## 2           6.3             0.3         0.34            1.6     0.049
## 3           8.1             0.28        0.4             6.9     0.05 
## 4           7.2             0.23        0.32            8.5     0.058
## 5           7.2             0.23        0.32            8.5     0.058
## 6           8.1             0.28        0.4             6.9     0.05 
## # ... with 8 more variables: free_sulfur_dioxide <dbl>,
## #   total_sulfur_dioxide <dbl>, density <dbl>, pH <dbl>, sulphates <dbl>,
## #   alcohol <dbl>, quality <dbl>, type <chr>
encode_ordinal <- function(x, order = unique(x)) {
  x <- as.numeric(factor(x, levels = order, exclude = NULL))
  x}

encoded_classes <- encode_ordinal(data$type)

keep_cols <- c("fixed_acidity", "volatile_acidity", "citric_acid", "residual_sugar", "chlorides", "free_sulfur_dioxide", "total_sulfur_dioxide", "density","pH","sulphates","alcohol","quality")
data <- data[keep_cols]
data$wine_type <- encoded_classes
head(data)
## # A tibble: 6 x 13
##   fixed_acidity volatile_acidity citric_acid residual_sugar chlorides
##           <dbl>            <dbl>       <dbl>          <dbl>     <dbl>
## 1           7               0.27        0.36           20.7     0.045
## 2           6.3             0.3         0.34            1.6     0.049
## 3           8.1             0.28        0.4             6.9     0.05 
## 4           7.2             0.23        0.32            8.5     0.058
## 5           7.2             0.23        0.32            8.5     0.058
## 6           8.1             0.28        0.4             6.9     0.05 
## # ... with 8 more variables: free_sulfur_dioxide <dbl>,
## #   total_sulfur_dioxide <dbl>, density <dbl>, pH <dbl>, sulphates <dbl>,
## #   alcohol <dbl>, quality <dbl>, wine_type <dbl>
rules <- apriori(data, parameter = list(support = 0.01, confidence = 0.3))
## Warning: Column(s) 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13 not logical or
## factor. Applying default discretization (see '? discretizeDF').
## Warning in discretize(x = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, : The calculated breaks are: 1, 1, 1, 2
##   Only unique breaks are used reducing the number of intervals. Look at ? discretize for details.
## Apriori
## 
## Parameter specification:
##  confidence minval smax arem  aval originalSupport maxtime support minlen
##         0.3    0.1    1 none FALSE            TRUE       5    0.01      1
##  maxlen target  ext
##      10  rules TRUE
## 
## Algorithmic control:
##  filter tree heap memopt load sort verbose
##     0.1 TRUE TRUE  FALSE TRUE    2    TRUE
## 
## Absolute minimum support count: 64 
## 
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[37 item(s), 6497 transaction(s)] done [0.01s].
## sorting and recoding items ... [37 item(s)] done [0.00s].
## creating transaction tree ... done [0.00s].
## checking subsets of size 1 2 3 4 5 6 7 8 9 10
## Warning in apriori(data, parameter = list(support = 0.01, confidence = 0.3)):
## Mining stopped (maxlen reached). Only patterns up to a length of 10 returned!
##  done [0.28s].
## writing ... [565728 rule(s)] done [0.13s].
## creating S4 object  ... done [0.42s].
summary(rules)
## set of 565728 rules
## 
## rule length distribution (lhs + rhs):sizes
##      1      2      3      4      5      6      7      8      9     10 
##     36    831  10956  75013 168182 168127  96601  36811   8292    879 
## 
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   1.000   5.000   6.000   5.724   7.000  10.000 
## 
## summary of quality measures:
##     support          confidence        coverage            lift       
##  Min.   :0.01000   Min.   :0.3000   Min.   :0.01000   Min.   :0.4739  
##  1st Qu.:0.01185   1st Qu.:0.4440   1st Qu.:0.01816   1st Qu.:1.1234  
##  Median :0.01478   Median :0.6033   Median :0.02678   Median :1.4931  
##  Mean   :0.01949   Mean   :0.6411   Mean   :0.03459   Mean   :1.6451  
##  3rd Qu.:0.02124   3rd Qu.:0.8372   3rd Qu.:0.03940   3rd Qu.:2.0922  
##  Max.   :1.00000   Max.   :1.0000   Max.   :1.00000   Max.   :3.1621  
##      count       
##  Min.   :  65.0  
##  1st Qu.:  77.0  
##  Median :  96.0  
##  Mean   : 126.7  
##  3rd Qu.: 138.0  
##  Max.   :6497.0  
## 
## mining info:
##  data ntransactions support confidence
##  data          6497    0.01        0.3
##                                                                      call
##  apriori(data = data, parameter = list(support = 0.01, confidence = 0.3))
inspect(head(sort(rules, by ="lift")))
##     lhs                                rhs                          support confidence   coverage     lift count
## [1] {citric_acid=[0,0.27),                                                                                      
##      residual_sugar=[0.6,2.1),                                                                                  
##      density=[0.987,0.993),                                                                                     
##      pH=[3.28,4.01],                                                                                            
##      alcohol=[11,14.9],                                                                                         
##      quality=[6,9]}                 => {fixed_acidity=[3.8,6.6)} 0.01000462  0.9558824 0.01046637 3.162102    65
## [2] {citric_acid=[0,0.27),                                                                                      
##      residual_sugar=[0.6,2.1),                                                                                  
##      density=[0.987,0.993),                                                                                     
##      pH=[3.28,4.01],                                                                                            
##      alcohol=[11,14.9],                                                                                         
##      quality=[6,9],                                                                                             
##      wine_type=[1,2]}               => {fixed_acidity=[3.8,6.6)} 0.01000462  0.9558824 0.01046637 3.162102    65
## [3] {fixed_acidity=[6.6,7.4),                                                                                   
##      volatile_acidity=[0.35,1.58],                                                                              
##      chlorides=[0.055,0.611],                                                                                   
##      total_sulfur_dioxide=[6,95),                                                                               
##      density=[0.993,0.996),                                                                                     
##      alcohol=[9.7,11)}              => {citric_acid=[0,0.27)}    0.01169771  0.9870130 0.01185162 3.152716    76
## [4] {fixed_acidity=[6.6,7.4),                                                                                   
##      volatile_acidity=[0.35,1.58],                                                                              
##      chlorides=[0.055,0.611],                                                                                   
##      density=[0.993,0.996),                                                                                     
##      pH=[3.28,4.01],                                                                                            
##      alcohol=[9.7,11)}              => {citric_acid=[0,0.27)}    0.01169771  0.9870130 0.01185162 3.152716    76
## [5] {fixed_acidity=[6.6,7.4),                                                                                   
##      volatile_acidity=[0.35,1.58],                                                                              
##      chlorides=[0.055,0.611],                                                                                   
##      total_sulfur_dioxide=[6,95),                                                                               
##      density=[0.993,0.996),                                                                                     
##      alcohol=[9.7,11),                                                                                          
##      wine_type=[1,2]}               => {citric_acid=[0,0.27)}    0.01169771  0.9870130 0.01185162 3.152716    76
## [6] {fixed_acidity=[6.6,7.4),                                                                                   
##      volatile_acidity=[0.35,1.58],                                                                              
##      chlorides=[0.055,0.611],                                                                                   
##      density=[0.993,0.996),                                                                                     
##      pH=[3.28,4.01],                                                                                            
##      alcohol=[9.7,11),                                                                                          
##      wine_type=[1,2]}               => {citric_acid=[0,0.27)}    0.01169771  0.9870130 0.01185162 3.152716    76
plot(rules)
## To reduce overplotting, jitter is added! Use jitter = 0 to prevent jitter.