K-NN Algorithm

data("iris")
head(iris)
##   Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1          5.1         3.5          1.4         0.2  setosa
## 2          4.9         3.0          1.4         0.2  setosa
## 3          4.7         3.2          1.3         0.2  setosa
## 4          4.6         3.1          1.5         0.2  setosa
## 5          5.0         3.6          1.4         0.2  setosa
## 6          5.4         3.9          1.7         0.4  setosa
str(iris)
## 'data.frame':    150 obs. of  5 variables:
##  $ Sepal.Length: num  5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
##  $ Sepal.Width : num  3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
##  $ Petal.Length: num  1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
##  $ Petal.Width : num  0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
##  $ Species     : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...
table(iris$Species)
## 
##     setosa versicolor  virginica 
##         50         50         50
head(iris)
##   Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1          5.1         3.5          1.4         0.2  setosa
## 2          4.9         3.0          1.4         0.2  setosa
## 3          4.7         3.2          1.3         0.2  setosa
## 4          4.6         3.1          1.5         0.2  setosa
## 5          5.0         3.6          1.4         0.2  setosa
## 6          5.4         3.9          1.7         0.4  setosa
set.seed(9850)
gp <- runif(nrow(iris))
gp
##   [1] 0.749575882 0.997086017 0.652001954 0.432928278 0.332312413
##   [6] 0.865406471 0.179331242 0.478493654 0.295795314 0.664406579
##  [11] 0.711770326 0.780119344 0.135679218 0.081918849 0.631659831
##  [16] 0.529684246 0.017083371 0.947442812 0.288261390 0.010243757
##  [21] 0.889722764 0.099838558 0.976578544 0.626311010 0.818966399
##  [26] 0.814217131 0.228938267 0.393345113 0.963064569 0.158522949
##  [31] 0.365362736 0.815672850 0.160475621 0.325075457 0.956077240
##  [36] 0.221829941 0.240358861 0.654842327 0.833372210 0.223317408
##  [41] 0.567319461 0.645643225 0.046593236 0.166861771 0.095600741
##  [46] 0.280708688 0.274372719 0.306021348 0.466236177 0.714833845
##  [51] 0.819010729 0.413965707 0.033480894 0.163171474 0.614522173
##  [56] 0.625591099 0.987969234 0.590955717 0.291943232 0.848013638
##  [61] 0.239626787 0.227112662 0.014037226 0.235208923 0.348486998
##  [66] 0.752009868 0.397804687 0.173633337 0.115410871 0.096681800
##  [71] 0.772989324 0.579852495 0.592361025 0.317120232 0.265488403
##  [76] 0.736040238 0.725467216 0.866390575 0.774121353 0.041318237
##  [81] 0.717216028 0.618082392 0.025139597 0.958615328 0.237069942
##  [86] 0.157850945 0.919268952 0.373450233 0.612524643 0.580367921
##  [91] 0.040488273 0.606024550 0.253320484 0.617464615 0.670980009
##  [96] 0.498678416 0.539632167 0.620603637 0.743601094 0.707388356
## [101] 0.881044094 0.140365195 0.009580307 0.641032226 0.919105340
## [106] 0.968964117 0.157093387 0.494844032 0.453633379 0.211360556
## [111] 0.803511472 0.522870498 0.537666918 0.922744670 0.975340607
## [116] 0.223733670 0.371116180 0.035312952 0.322225733 0.157145070
## [121] 0.168501345 0.063806432 0.489302015 0.968255649 0.474587146
## [126] 0.269455400 0.451698734 0.220212331 0.986794680 0.775118793
## [131] 0.658783297 0.088049627 0.815227871 0.743987815 0.083619478
## [136] 0.465915223 0.759169244 0.886477632 0.603841368 0.155911440
## [141] 0.747532928 0.689924665 0.986163957 0.512607964 0.311356318
## [146] 0.644091529 0.804400820 0.133151028 0.757746826 0.420075185
iris <- iris[order(gp),] # Shuffle up the data
str(iris)
## 'data.frame':    150 obs. of  5 variables:
##  $ Sepal.Length: num  7.1 5.1 6 5.4 5.8 6.9 7.7 5.5 5.7 4.4 ...
##  $ Sepal.Width : num  3 3.8 2.2 3.9 2.7 3.1 3.8 2.6 2.6 3.2 ...
##  $ Petal.Length: num  5.9 1.5 4 1.3 3.9 4.9 6.7 4.4 3.5 1.3 ...
##  $ Petal.Width : num  2.1 0.3 1 0.4 1.2 1.5 2.2 1.2 1 0.2 ...
##  $ Species     : Factor w/ 3 levels "setosa","versicolor",..: 3 1 2 1 2 2 3 2 2 1 ...
head(iris)
##     Sepal.Length Sepal.Width Petal.Length Petal.Width    Species
## 103          7.1         3.0          5.9         2.1  virginica
## 20           5.1         3.8          1.5         0.3     setosa
## 63           6.0         2.2          4.0         1.0 versicolor
## 17           5.4         3.9          1.3         0.4     setosa
## 83           5.8         2.7          3.9         1.2 versicolor
## 53           6.9         3.1          4.9         1.5 versicolor
summary(iris)
##   Sepal.Length    Sepal.Width     Petal.Length    Petal.Width   
##  Min.   :4.300   Min.   :2.000   Min.   :1.000   Min.   :0.100  
##  1st Qu.:5.100   1st Qu.:2.800   1st Qu.:1.600   1st Qu.:0.300  
##  Median :5.800   Median :3.000   Median :4.350   Median :1.300  
##  Mean   :5.843   Mean   :3.057   Mean   :3.758   Mean   :1.199  
##  3rd Qu.:6.400   3rd Qu.:3.300   3rd Qu.:5.100   3rd Qu.:1.800  
##  Max.   :7.900   Max.   :4.400   Max.   :6.900   Max.   :2.500  
##        Species  
##  setosa    :50  
##  versicolor:50  
##  virginica :50  
##                 
##                 
## 
normalize <- function(x) { # Arguments are vectors
  return((x - min(x)) / (max(x) - min(x)))
} # Uses the recicling of vectors
iris_n <- data.frame(lapply(iris[, 1:4], normalize))
str(iris_n)
## 'data.frame':    150 obs. of  4 variables:
##  $ Sepal.Length: num  0.778 0.222 0.472 0.306 0.417 ...
##  $ Sepal.Width : num  0.4167 0.75 0.0833 0.7917 0.2917 ...
##  $ Petal.Length: num  0.8305 0.0847 0.5085 0.0508 0.4915 ...
##  $ Petal.Width : num  0.8333 0.0833 0.375 0.125 0.4583 ...
summary(iris_n)
##   Sepal.Length     Sepal.Width      Petal.Length     Petal.Width     
##  Min.   :0.0000   Min.   :0.0000   Min.   :0.0000   Min.   :0.00000  
##  1st Qu.:0.2222   1st Qu.:0.3333   1st Qu.:0.1017   1st Qu.:0.08333  
##  Median :0.4167   Median :0.4167   Median :0.5678   Median :0.50000  
##  Mean   :0.4287   Mean   :0.4406   Mean   :0.4675   Mean   :0.45806  
##  3rd Qu.:0.5833   3rd Qu.:0.5417   3rd Qu.:0.6949   3rd Qu.:0.70833  
##  Max.   :1.0000   Max.   :1.0000   Max.   :1.0000   Max.   :1.00000
iris_train <- iris_n[1:129,]
iris_test <- iris_n[130:150,]
iris_train_target <- iris[1:129, 5]
iris_test_target <- iris[130:150, 5]
iris_test_target
##  [1] versicolor setosa     versicolor virginica  virginica  setosa    
##  [7] virginica  versicolor virginica  setosa     setosa     versicolor
## [13] setosa     virginica  virginica  virginica  setosa     virginica 
## [19] virginica  versicolor setosa    
## Levels: setosa versicolor virginica
sqrt(nrow(iris))
## [1] 12.24745
k = 13
m1 <- knn(train = iris_train,
          test = iris_test,
          cl = iris_train_target,
          k = 13)
m1
##  [1] versicolor setosa     virginica  virginica  virginica  setosa    
##  [7] virginica  versicolor virginica  setosa     setosa     virginica 
## [13] setosa     virginica  virginica  virginica  setosa     virginica 
## [19] virginica  versicolor setosa    
## Levels: setosa versicolor virginica
table(iris_test_target, 
      m1)
##                 m1
## iris_test_target setosa versicolor virginica
##       setosa          7          0         0
##       versicolor      0          3         2
##       virginica       0          0         9

Clasification trees

g <- runif(nrow(iris)) # Suffle up the data
irisr <- iris[order(g), ]
m1 <- C5.0(irisr[1:100, -5], # Eliminate the target (training data)
           irisr[1:100, 5])  # Target only
m1
## 
## Call:
## C5.0.default(x = irisr[1:100, -5], y = irisr[1:100, 5])
## 
## Classification Tree
## Number of samples: 100 
## Number of predictors: 4 
## 
## Tree size: 4 
## 
## Non-standard options: attempt to group attributes
summary(m1)
## 
## Call:
## C5.0.default(x = irisr[1:100, -5], y = irisr[1:100, 5])
## 
## 
## C5.0 [Release 2.07 GPL Edition]      Tue Mar 05 16:03:34 2019
## -------------------------------
## 
## Class specified by attribute `outcome'
## 
## Read 100 cases (5 attributes) from undefined.data
## 
## Decision tree:
## 
## Petal.Length <= 1.9: setosa (35)
## Petal.Length > 1.9:
## :...Petal.Width > 1.6: virginica (28)
##     Petal.Width <= 1.6:
##     :...Petal.Length <= 4.9: versicolor (34)
##         Petal.Length > 4.9: virginica (3)
## 
## 
## Evaluation on training data (100 cases):
## 
##      Decision Tree   
##    ----------------  
##    Size      Errors  
## 
##       4    0( 0.0%)   <<
## 
## 
##     (a)   (b)   (c)    <-classified as
##    ----  ----  ----
##      35                (a): class setosa
##            34          (b): class versicolor
##                  31    (c): class virginica
## 
## 
##  Attribute usage:
## 
##  100.00% Petal.Length
##   65.00% Petal.Width
## 
## 
## Time: 0.0 secs
p1 <- predict(m1, irisr[101:150, ])
p1
##  [1] virginica  versicolor setosa     virginica  versicolor virginica 
##  [7] virginica  virginica  versicolor versicolor versicolor setosa    
## [13] virginica  setosa     virginica  setosa     versicolor setosa    
## [19] versicolor setosa     virginica  virginica  virginica  virginica 
## [25] versicolor virginica  setosa     setosa     setosa     virginica 
## [31] virginica  virginica  versicolor setosa     setosa     virginica 
## [37] versicolor versicolor virginica  virginica  versicolor versicolor
## [43] setosa     setosa     virginica  virginica  setosa     setosa    
## [49] virginica  virginica 
## Levels: setosa versicolor virginica
table(irisr[101:150, 5], p1)
##             p1
##              setosa versicolor virginica
##   setosa         15          0         0
##   versicolor      0         13         3
##   virginica       0          0        19
plot(m1)

Clasification tree with Rpart

set.seed(123)
g <- runif(nrow(iris))
irisr <- iris[order(g),]
m1 <- rpart(Species ~ ., data = irisr[1:100, ], method = "class") 
m1 # The taerget is a categorical feature, that's why method = "class"
## n= 100 
## 
## node), split, n, loss, yval, (yprob)
##       * denotes terminal node
## 
## 1) root 100 62 setosa (0.38000000 0.33000000 0.29000000)  
##   2) Petal.Length< 2.6 38  0 setosa (1.00000000 0.00000000 0.00000000) *
##   3) Petal.Length>=2.6 62 29 versicolor (0.00000000 0.53225806 0.46774194)  
##     6) Petal.Width< 1.65 35  2 versicolor (0.00000000 0.94285714 0.05714286) *
##     7) Petal.Width>=1.65 27  0 virginica (0.00000000 0.00000000 1.00000000) *
rpart.plot(m1)
## Warning: Bad 'data' field in model 'call' (expected a data.frame or a matrix).
## To silence this warning:
##     Call rpart.plot with roundint=FALSE,
##     or rebuild the rpart model with model=TRUE.

p1 <- predict(m1, irisr[101:150, ], type = "class")
table(actual = irisr[101:150, 5], predicted = p1)
##             predicted
## actual       setosa versicolor virginica
##   setosa         12          0         0
##   versicolor      0         15         2
##   virginica       0          2        19

Regression trees

data("msleep")
msleep
## # A tibble: 83 x 11
##    name  genus vore  order conservation sleep_total sleep_rem sleep_cycle
##    <chr> <chr> <chr> <chr> <chr>              <dbl>     <dbl>       <dbl>
##  1 Chee~ Acin~ carni Carn~ lc                  12.1      NA        NA    
##  2 Owl ~ Aotus omni  Prim~ <NA>                17         1.8      NA    
##  3 Moun~ Aplo~ herbi Rode~ nt                  14.4       2.4      NA    
##  4 Grea~ Blar~ omni  Sori~ lc                  14.9       2.3       0.133
##  5 Cow   Bos   herbi Arti~ domesticated         4         0.7       0.667
##  6 Thre~ Brad~ herbi Pilo~ <NA>                14.4       2.2       0.767
##  7 Nort~ Call~ carni Carn~ vu                   8.7       1.4       0.383
##  8 Vesp~ Calo~ <NA>  Rode~ <NA>                 7        NA        NA    
##  9 Dog   Canis carni Carn~ domesticated        10.1       2.9       0.333
## 10 Roe ~ Capr~ herbi Arti~ lc                   3        NA        NA    
## # ... with 73 more rows, and 3 more variables: awake <dbl>, brainwt <dbl>,
## #   bodywt <dbl>
str(msleep)
## Classes 'tbl_df', 'tbl' and 'data.frame':    83 obs. of  11 variables:
##  $ name        : chr  "Cheetah" "Owl monkey" "Mountain beaver" "Greater short-tailed shrew" ...
##  $ genus       : chr  "Acinonyx" "Aotus" "Aplodontia" "Blarina" ...
##  $ vore        : chr  "carni" "omni" "herbi" "omni" ...
##  $ order       : chr  "Carnivora" "Primates" "Rodentia" "Soricomorpha" ...
##  $ conservation: chr  "lc" NA "nt" "lc" ...
##  $ sleep_total : num  12.1 17 14.4 14.9 4 14.4 8.7 7 10.1 3 ...
##  $ sleep_rem   : num  NA 1.8 2.4 2.3 0.7 2.2 1.4 NA 2.9 NA ...
##  $ sleep_cycle : num  NA NA NA 0.133 0.667 ...
##  $ awake       : num  11.9 7 9.6 9.1 20 9.6 15.3 17 13.9 21 ...
##  $ brainwt     : num  NA 0.0155 NA 0.00029 0.423 NA NA NA 0.07 0.0982 ...
##  $ bodywt      : num  50 0.48 1.35 0.019 600 ...
df <- msleep[,c(3,6,10,11)] 
df
## # A tibble: 83 x 4
##    vore  sleep_total  brainwt  bodywt
##    <chr>       <dbl>    <dbl>   <dbl>
##  1 carni        12.1 NA        50    
##  2 omni         17    0.0155    0.48 
##  3 herbi        14.4 NA         1.35 
##  4 omni         14.9  0.00029   0.019
##  5 herbi         4    0.423   600    
##  6 herbi        14.4 NA         3.85 
##  7 carni         8.7 NA        20.5  
##  8 <NA>          7   NA         0.045
##  9 carni        10.1  0.07     14    
## 10 herbi         3    0.0982   14.8  
## # ... with 73 more rows
df$vore <- as.factor(df$vore)
str(df)
## Classes 'tbl_df', 'tbl' and 'data.frame':    83 obs. of  4 variables:
##  $ vore       : Factor w/ 4 levels "carni","herbi",..: 1 4 2 4 2 2 1 NA 1 2 ...
##  $ sleep_total: num  12.1 17 14.4 14.9 4 14.4 8.7 7 10.1 3 ...
##  $ brainwt    : num  NA 0.0155 NA 0.00029 0.423 NA NA NA 0.07 0.0982 ...
##  $ bodywt     : num  50 0.48 1.35 0.019 600 ...
table(df$vore)
## 
##   carni   herbi insecti    omni 
##      19      32       5      20
# levels(df$vore)
m1 <- rpart(sleep_total ~ ., data = df, method = "anova") 
m1
## n= 83 
## 
## node), split, n, deviance, yval
##       * denotes terminal node
## 
##  1) root 83 1624.066000 10.433730  
##    2) bodywt>=167.947 9    7.868889  3.488889 *
##    3) bodywt< 167.947 74 1129.325000 11.278380  
##      6) bodywt>=1.85 31  458.593500  9.361290  
##       12) vore=herbi 7   88.337140  6.642857 *
##       13) vore=carni,insecti,omni 24  303.439600 10.154170  
##         26) brainwt>=0.136 13  128.669200  9.392308 *
##         27) brainwt< 0.136 11  158.307300 11.054550 *
##      7) bodywt< 1.85 43  474.662800 12.660470  
##       14) vore=omni 13  141.370800 11.638460 *
##       15) vore=carni,herbi,insecti 30  313.829700 13.103330 *
# Anova because the target is factor feature
rpart.plot(m1, digits = 3, type = 3)

p1 <- predict(m1, df) # In this case there's no test set
p1
##         1         2         3         4         5         6         7 
##  9.392308 11.638462 13.103333 11.638462  3.488889  6.642857  9.392308 
##         8         9        10        11        12        13        14 
## 13.103333 11.054545  6.642857  6.642857 13.103333 11.054545 13.103333 
##        15        16        17        18        19        20        21 
## 11.638462 11.638462 11.638462 11.054545  6.642857 11.638462  3.488889 
##        22        23        24        25        26        27        28 
## 13.103333  3.488889  3.488889 11.638462 11.054545 13.103333 11.054545 
##        29        30        31        32        33        34        35 
## 11.638462  3.488889  3.488889  9.392308  6.642857  9.392308 13.103333 
##        36        37        38        39        40        41        42 
##  3.488889 13.103333  9.392308 13.103333 13.103333 13.103333 13.103333 
##        43        44        45        46        47        48        49 
## 13.103333 13.103333 13.103333 13.103333 13.103333  6.642857  6.642857 
##        50        51        52        53        54        55        56 
##  9.392308  9.392308  9.392308  9.392308  9.392308 13.103333 11.638462 
##        57        58        59        60        61        62        63 
## 13.103333 13.103333  9.392308  9.392308 13.103333 11.054545 11.054545 
##        64        65        66        67        68        69        70 
## 13.103333 11.638462 11.638462 13.103333 13.103333 13.103333 13.103333 
##        71        72        73        74        75        76        77 
## 13.103333 13.103333 13.103333  9.392308 11.054545 13.103333  3.488889 
##        78        79        80        81        82        83 
## 11.638462 11.638462  3.488889 11.054545 11.054545 11.054545
MAE <- function(actual, predicted) {
  mean(abs(actual - predicted))
}
MAE(df$sleep_total, p1) # For model comparison
## [1] 2.452865
Groc <- read.transactions("C:/Users/aal_l/Documents/R/groceries.csv", sep = ",")
Groc
## transactions in sparse format with
##  9835 transactions (rows) and
##  169 items (columns)
summary(Groc)
## transactions as itemMatrix in sparse format with
##  9835 rows (elements/itemsets/transactions) and
##  169 columns (items) and a density of 0.02609146 
## 
## most frequent items:
##       whole milk other vegetables       rolls/buns             soda 
##             2513             1903             1809             1715 
##           yogurt          (Other) 
##             1372            34055 
## 
## element (itemset/transaction) length distribution:
## sizes
##    1    2    3    4    5    6    7    8    9   10   11   12   13   14   15 
## 2159 1643 1299 1005  855  645  545  438  350  246  182  117   78   77   55 
##   16   17   18   19   20   21   22   23   24   26   27   28   29   32 
##   46   29   14   14    9   11    4    6    1    1    1    1    3    1 
## 
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   1.000   2.000   3.000   4.409   6.000  32.000 
## 
## includes extended item information - examples:
##             labels
## 1 abrasive cleaner
## 2 artif. sweetener
## 3   baby cosmetics
inspect(Groc[1:5])
##     items                     
## [1] {citrus fruit,            
##      margarine,               
##      ready soups,             
##      semi-finished bread}     
## [2] {coffee,                  
##      tropical fruit,          
##      yogurt}                  
## [3] {whole milk}              
## [4] {cream cheese,            
##      meat spreads,            
##      pip fruit,               
##      yogurt}                  
## [5] {condensed milk,          
##      long life bakery product,
##      other vegetables,        
##      whole milk}
itemFrequency(Groc[,]) 
##          abrasive cleaner          artif. sweetener 
##              0.0035587189              0.0032536858 
##            baby cosmetics                 baby food 
##              0.0006100661              0.0001016777 
##                      bags             baking powder 
##              0.0004067107              0.0176919166 
##          bathroom cleaner                      beef 
##              0.0027452974              0.0524656838 
##                   berries                 beverages 
##              0.0332486019              0.0260294865 
##              bottled beer             bottled water 
##              0.0805287239              0.1105236401 
##                    brandy               brown bread 
##              0.0041687850              0.0648703610 
##                    butter               butter milk 
##              0.0554143366              0.0279613625 
##                  cake bar                   candles 
##              0.0132180986              0.0089476360 
##                     candy               canned beer 
##              0.0298932384              0.0776817489 
##               canned fish              canned fruit 
##              0.0150482969              0.0032536858 
##         canned vegetables                  cat food 
##              0.0107778343              0.0232841891 
##                   cereals               chewing gum 
##              0.0056939502              0.0210472801 
##                   chicken                 chocolate 
##              0.0429079817              0.0496187087 
##     chocolate marshmallow              citrus fruit 
##              0.0090493137              0.0827656329 
##                   cleaner           cling film/bags 
##              0.0050838841              0.0113879004 
##              cocoa drinks                    coffee 
##              0.0022369090              0.0580579563 
##            condensed milk         cooking chocolate 
##              0.0102694459              0.0025419420 
##                  cookware                     cream 
##              0.0027452974              0.0013218099 
##              cream cheese                      curd 
##              0.0396542959              0.0532791052 
##               curd cheese               decalcifier 
##              0.0050838841              0.0015251652 
##               dental care                   dessert 
##              0.0057956279              0.0371123538 
##                 detergent              dish cleaner 
##              0.0192170819              0.0104728012 
##                    dishes                  dog food 
##              0.0175902389              0.0085409253 
##             domestic eggs  female sanitary products 
##              0.0634468734              0.0061006609 
##         finished products                      fish 
##              0.0065073716              0.0029486528 
##                     flour            flower (seeds) 
##              0.0173868836              0.0103711235 
##    flower soil/fertilizer               frankfurter 
##              0.0019318760              0.0589730554 
##            frozen chicken            frozen dessert 
##              0.0006100661              0.0107778343 
##               frozen fish             frozen fruits 
##              0.0116929334              0.0012201322 
##              frozen meals    frozen potato products 
##              0.0283680732              0.0084392476 
##         frozen vegetables     fruit/vegetable juice 
##              0.0480935435              0.0722928317 
##                    grapes                hair spray 
##              0.0223690900              0.0011184545 
##                       ham            hamburger meat 
##              0.0260294865              0.0332486019 
##               hard cheese                     herbs 
##              0.0245043213              0.0162684291 
##                     honey    house keeping products 
##              0.0015251652              0.0083375699 
##          hygiene articles                 ice cream 
##              0.0329435689              0.0250127097 
##            instant coffee     Instant food products 
##              0.0074224708              0.0080325369 
##                       jam                   ketchup 
##              0.0053889171              0.0042704626 
##            kitchen towels           kitchen utensil 
##              0.0059989832              0.0004067107 
##               light bulbs                   liqueur 
##              0.0041687850              0.0009150991 
##                    liquor        liquor (appetizer) 
##              0.0110828673              0.0079308592 
##                liver loaf  long life bakery product 
##              0.0050838841              0.0374173869 
##           make up remover            male cosmetics 
##              0.0008134215              0.0045754957 
##                 margarine                mayonnaise 
##              0.0585663447              0.0091509914 
##                      meat              meat spreads 
##              0.0258261312              0.0042704626 
##           misc. beverages                   mustard 
##              0.0283680732              0.0119979664 
##                   napkins                newspapers 
##              0.0523640061              0.0798169802 
##                 nut snack               nuts/prunes 
##              0.0031520081              0.0033553635 
##                       oil                    onions 
##              0.0280630402              0.0310116929 
##          organic products           organic sausage 
##              0.0016268429              0.0022369090 
##          other vegetables packaged fruit/vegetables 
##              0.1934926284              0.0130147433 
##                     pasta                    pastry 
##              0.0150482969              0.0889679715 
##                  pet care                photo/film 
##              0.0094560244              0.0092526690 
##        pickled vegetables                 pip fruit 
##              0.0178952720              0.0756481952 
##                   popcorn                      pork 
##              0.0072191154              0.0576512456 
##                pot plants           potato products 
##              0.0172852059              0.0028469751 
##     preservation products          processed cheese 
##              0.0002033554              0.0165734621 
##                  prosecco            pudding powder 
##              0.0020335536              0.0023385867 
##               ready soups            red/blush wine 
##              0.0018301983              0.0192170819 
##                      rice             roll products 
##              0.0076258261              0.0102694459 
##                rolls/buns           root vegetables 
##              0.1839349263              0.1089984748 
##           rubbing alcohol                       rum 
##              0.0010167768              0.0044738180 
##            salad dressing                      salt 
##              0.0008134215              0.0107778343 
##               salty snack                    sauces 
##              0.0378240976              0.0054905948 
##                   sausage         seasonal products 
##              0.0939501779              0.0142348754 
##       semi-finished bread             shopping bags 
##              0.0176919166              0.0985256736 
##                 skin care             sliced cheese 
##              0.0035587189              0.0245043213 
##            snack products                      soap 
##              0.0030503305              0.0026436197 
##                      soda               soft cheese 
##              0.1743772242              0.0170818505 
##                  softener      sound storage medium 
##              0.0054905948              0.0001016777 
##                     soups            sparkling wine 
##              0.0068124047              0.0055922725 
##             specialty bar          specialty cheese 
##              0.0273512964              0.0085409253 
##       specialty chocolate             specialty fat 
##              0.0304016268              0.0036603965 
##      specialty vegetables                    spices 
##              0.0017285206              0.0051855618 
##             spread cheese                     sugar 
##              0.0111845450              0.0338586680 
##             sweet spreads                     syrup 
##              0.0090493137              0.0032536858 
##                       tea                   tidbits 
##              0.0038637519              0.0023385867 
##            toilet cleaner            tropical fruit 
##              0.0007117438              0.1049313676 
##                    turkey                  UHT-milk 
##              0.0081342145              0.0334519573 
##                   vinegar                   waffles 
##              0.0065073716              0.0384341637 
##        whipped/sour cream                    whisky 
##              0.0716827656              0.0008134215 
##               white bread                white wine 
##              0.0420945602              0.0190137265 
##                whole milk                    yogurt 
##              0.2555160142              0.1395017794 
##                  zwieback 
##              0.0069140824
# Support for first ten items, when the item show up in the transactions
itemFrequencyPlot(Groc, support = 0.10) 

# Items show up in 10 % of transactions
itemFrequencyPlot(Groc, topN = 20)

# Conditional probability 
m1 <- apriori(Groc, parameter = list(support = 0.007, 
                                     confidence  = 0.25, 
                                     minlen = 2))
## Apriori
## 
## Parameter specification:
##  confidence minval smax arem  aval originalSupport maxtime support minlen
##        0.25    0.1    1 none FALSE            TRUE       5   0.007      2
##  maxlen target   ext
##      10  rules FALSE
## 
## Algorithmic control:
##  filter tree heap memopt load sort verbose
##     0.1 TRUE TRUE  FALSE TRUE    2    TRUE
## 
## Absolute minimum support count: 68 
## 
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[169 item(s), 9835 transaction(s)] done [0.00s].
## sorting and recoding items ... [104 item(s)] done [0.00s].
## creating transaction tree ... done [0.00s].
## checking subsets of size 1 2 3 4 done [0.01s].
## writing ... [363 rule(s)] done [0.00s].
## creating S4 object  ... done [0.00s].
m1
## set of 363 rules
summary(m1)
## set of 363 rules
## 
## rule length distribution (lhs + rhs):sizes
##   2   3   4 
## 137 214  12 
## 
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   2.000   2.000   3.000   2.656   3.000   4.000 
## 
## summary of quality measures:
##     support           confidence          lift            count      
##  Min.   :0.007016   Min.   :0.2500   Min.   :0.9932   Min.   : 69.0  
##  1st Qu.:0.008134   1st Qu.:0.2962   1st Qu.:1.6060   1st Qu.: 80.0  
##  Median :0.009659   Median :0.3551   Median :1.9086   Median : 95.0  
##  Mean   :0.012945   Mean   :0.3743   Mean   :2.0072   Mean   :127.3  
##  3rd Qu.:0.013777   3rd Qu.:0.4420   3rd Qu.:2.3289   3rd Qu.:135.5  
##  Max.   :0.074835   Max.   :0.6389   Max.   :3.9565   Max.   :736.0  
## 
## mining info:
##  data ntransactions support confidence
##  Groc          9835   0.007       0.25
inspect(m1[1:10])
##      lhs                      rhs                support     confidence
## [1]  {herbs}               => {root vegetables}  0.007015760 0.4312500 
## [2]  {herbs}               => {other vegetables} 0.007727504 0.4750000 
## [3]  {herbs}               => {whole milk}       0.007727504 0.4750000 
## [4]  {processed cheese}    => {whole milk}       0.007015760 0.4233129 
## [5]  {semi-finished bread} => {whole milk}       0.007117438 0.4022989 
## [6]  {detergent}           => {whole milk}       0.008947636 0.4656085 
## [7]  {pickled vegetables}  => {whole milk}       0.007117438 0.3977273 
## [8]  {baking powder}       => {other vegetables} 0.007320793 0.4137931 
## [9]  {baking powder}       => {whole milk}       0.009252669 0.5229885 
## [10] {flour}               => {whole milk}       0.008439248 0.4853801 
##      lift     count
## [1]  3.956477 69   
## [2]  2.454874 76   
## [3]  1.858983 76   
## [4]  1.656698 69   
## [5]  1.574457 70   
## [6]  1.822228 88   
## [7]  1.556565 70   
## [8]  2.138547 72   
## [9]  2.046793 91   
## [10] 1.899607 83
inspect(sort(m1, by = "lift")[1:10])
##      lhs                   rhs                      support confidence     lift count
## [1]  {herbs}            => {root vegetables}    0.007015760  0.4312500 3.956477    69
## [2]  {berries}          => {whipped/sour cream} 0.009049314  0.2721713 3.796886    89
## [3]  {other vegetables,                                                              
##       tropical fruit,                                                                
##       whole milk}       => {root vegetables}    0.007015760  0.4107143 3.768074    69
## [4]  {beef,                                                                          
##       other vegetables} => {root vegetables}    0.007930859  0.4020619 3.688692    78
## [5]  {other vegetables,                                                              
##       tropical fruit}   => {pip fruit}          0.009456024  0.2634561 3.482649    93
## [6]  {beef,                                                                          
##       whole milk}       => {root vegetables}    0.008032537  0.3779904 3.467851    79
## [7]  {other vegetables,                                                              
##       pip fruit}        => {tropical fruit}     0.009456024  0.3618677 3.448613    93
## [8]  {citrus fruit,                                                                  
##       other vegetables} => {root vegetables}    0.010371124  0.3591549 3.295045   102
## [9]  {other vegetables,                                                              
##       whole milk,                                                                    
##       yogurt}           => {tropical fruit}     0.007625826  0.3424658 3.263712    75
## [10] {other vegetables,                                                              
##       whole milk,                                                                    
##       yogurt}           => {root vegetables}    0.007829181  0.3515982 3.225716    77