K-NN Algorithm
data("iris")
head(iris)
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1 5.1 3.5 1.4 0.2 setosa
## 2 4.9 3.0 1.4 0.2 setosa
## 3 4.7 3.2 1.3 0.2 setosa
## 4 4.6 3.1 1.5 0.2 setosa
## 5 5.0 3.6 1.4 0.2 setosa
## 6 5.4 3.9 1.7 0.4 setosa
str(iris)
## 'data.frame': 150 obs. of 5 variables:
## $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
## $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
## $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
## $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
## $ Species : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...
table(iris$Species)
##
## setosa versicolor virginica
## 50 50 50
head(iris)
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1 5.1 3.5 1.4 0.2 setosa
## 2 4.9 3.0 1.4 0.2 setosa
## 3 4.7 3.2 1.3 0.2 setosa
## 4 4.6 3.1 1.5 0.2 setosa
## 5 5.0 3.6 1.4 0.2 setosa
## 6 5.4 3.9 1.7 0.4 setosa
set.seed(9850)
gp <- runif(nrow(iris))
gp
## [1] 0.749575882 0.997086017 0.652001954 0.432928278 0.332312413
## [6] 0.865406471 0.179331242 0.478493654 0.295795314 0.664406579
## [11] 0.711770326 0.780119344 0.135679218 0.081918849 0.631659831
## [16] 0.529684246 0.017083371 0.947442812 0.288261390 0.010243757
## [21] 0.889722764 0.099838558 0.976578544 0.626311010 0.818966399
## [26] 0.814217131 0.228938267 0.393345113 0.963064569 0.158522949
## [31] 0.365362736 0.815672850 0.160475621 0.325075457 0.956077240
## [36] 0.221829941 0.240358861 0.654842327 0.833372210 0.223317408
## [41] 0.567319461 0.645643225 0.046593236 0.166861771 0.095600741
## [46] 0.280708688 0.274372719 0.306021348 0.466236177 0.714833845
## [51] 0.819010729 0.413965707 0.033480894 0.163171474 0.614522173
## [56] 0.625591099 0.987969234 0.590955717 0.291943232 0.848013638
## [61] 0.239626787 0.227112662 0.014037226 0.235208923 0.348486998
## [66] 0.752009868 0.397804687 0.173633337 0.115410871 0.096681800
## [71] 0.772989324 0.579852495 0.592361025 0.317120232 0.265488403
## [76] 0.736040238 0.725467216 0.866390575 0.774121353 0.041318237
## [81] 0.717216028 0.618082392 0.025139597 0.958615328 0.237069942
## [86] 0.157850945 0.919268952 0.373450233 0.612524643 0.580367921
## [91] 0.040488273 0.606024550 0.253320484 0.617464615 0.670980009
## [96] 0.498678416 0.539632167 0.620603637 0.743601094 0.707388356
## [101] 0.881044094 0.140365195 0.009580307 0.641032226 0.919105340
## [106] 0.968964117 0.157093387 0.494844032 0.453633379 0.211360556
## [111] 0.803511472 0.522870498 0.537666918 0.922744670 0.975340607
## [116] 0.223733670 0.371116180 0.035312952 0.322225733 0.157145070
## [121] 0.168501345 0.063806432 0.489302015 0.968255649 0.474587146
## [126] 0.269455400 0.451698734 0.220212331 0.986794680 0.775118793
## [131] 0.658783297 0.088049627 0.815227871 0.743987815 0.083619478
## [136] 0.465915223 0.759169244 0.886477632 0.603841368 0.155911440
## [141] 0.747532928 0.689924665 0.986163957 0.512607964 0.311356318
## [146] 0.644091529 0.804400820 0.133151028 0.757746826 0.420075185
iris <- iris[order(gp),] # Shuffle up the data
str(iris)
## 'data.frame': 150 obs. of 5 variables:
## $ Sepal.Length: num 7.1 5.1 6 5.4 5.8 6.9 7.7 5.5 5.7 4.4 ...
## $ Sepal.Width : num 3 3.8 2.2 3.9 2.7 3.1 3.8 2.6 2.6 3.2 ...
## $ Petal.Length: num 5.9 1.5 4 1.3 3.9 4.9 6.7 4.4 3.5 1.3 ...
## $ Petal.Width : num 2.1 0.3 1 0.4 1.2 1.5 2.2 1.2 1 0.2 ...
## $ Species : Factor w/ 3 levels "setosa","versicolor",..: 3 1 2 1 2 2 3 2 2 1 ...
head(iris)
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 103 7.1 3.0 5.9 2.1 virginica
## 20 5.1 3.8 1.5 0.3 setosa
## 63 6.0 2.2 4.0 1.0 versicolor
## 17 5.4 3.9 1.3 0.4 setosa
## 83 5.8 2.7 3.9 1.2 versicolor
## 53 6.9 3.1 4.9 1.5 versicolor
summary(iris)
## Sepal.Length Sepal.Width Petal.Length Petal.Width
## Min. :4.300 Min. :2.000 Min. :1.000 Min. :0.100
## 1st Qu.:5.100 1st Qu.:2.800 1st Qu.:1.600 1st Qu.:0.300
## Median :5.800 Median :3.000 Median :4.350 Median :1.300
## Mean :5.843 Mean :3.057 Mean :3.758 Mean :1.199
## 3rd Qu.:6.400 3rd Qu.:3.300 3rd Qu.:5.100 3rd Qu.:1.800
## Max. :7.900 Max. :4.400 Max. :6.900 Max. :2.500
## Species
## setosa :50
## versicolor:50
## virginica :50
##
##
##
normalize <- function(x) { # Arguments are vectors
return((x - min(x)) / (max(x) - min(x)))
} # Uses the recicling of vectors
iris_n <- data.frame(lapply(iris[, 1:4], normalize))
str(iris_n)
## 'data.frame': 150 obs. of 4 variables:
## $ Sepal.Length: num 0.778 0.222 0.472 0.306 0.417 ...
## $ Sepal.Width : num 0.4167 0.75 0.0833 0.7917 0.2917 ...
## $ Petal.Length: num 0.8305 0.0847 0.5085 0.0508 0.4915 ...
## $ Petal.Width : num 0.8333 0.0833 0.375 0.125 0.4583 ...
summary(iris_n)
## Sepal.Length Sepal.Width Petal.Length Petal.Width
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.00000
## 1st Qu.:0.2222 1st Qu.:0.3333 1st Qu.:0.1017 1st Qu.:0.08333
## Median :0.4167 Median :0.4167 Median :0.5678 Median :0.50000
## Mean :0.4287 Mean :0.4406 Mean :0.4675 Mean :0.45806
## 3rd Qu.:0.5833 3rd Qu.:0.5417 3rd Qu.:0.6949 3rd Qu.:0.70833
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.00000
iris_train <- iris_n[1:129,]
iris_test <- iris_n[130:150,]
iris_train_target <- iris[1:129, 5]
iris_test_target <- iris[130:150, 5]
iris_test_target
## [1] versicolor setosa versicolor virginica virginica setosa
## [7] virginica versicolor virginica setosa setosa versicolor
## [13] setosa virginica virginica virginica setosa virginica
## [19] virginica versicolor setosa
## Levels: setosa versicolor virginica
sqrt(nrow(iris))
## [1] 12.24745
k = 13
m1 <- knn(train = iris_train,
test = iris_test,
cl = iris_train_target,
k = 13)
m1
## [1] versicolor setosa virginica virginica virginica setosa
## [7] virginica versicolor virginica setosa setosa virginica
## [13] setosa virginica virginica virginica setosa virginica
## [19] virginica versicolor setosa
## Levels: setosa versicolor virginica
table(iris_test_target,
m1)
## m1
## iris_test_target setosa versicolor virginica
## setosa 7 0 0
## versicolor 0 3 2
## virginica 0 0 9
Clasification trees
g <- runif(nrow(iris)) # Suffle up the data
irisr <- iris[order(g), ]
m1 <- C5.0(irisr[1:100, -5], # Eliminate the target (training data)
irisr[1:100, 5]) # Target only
m1
##
## Call:
## C5.0.default(x = irisr[1:100, -5], y = irisr[1:100, 5])
##
## Classification Tree
## Number of samples: 100
## Number of predictors: 4
##
## Tree size: 4
##
## Non-standard options: attempt to group attributes
summary(m1)
##
## Call:
## C5.0.default(x = irisr[1:100, -5], y = irisr[1:100, 5])
##
##
## C5.0 [Release 2.07 GPL Edition] Tue Mar 05 16:03:34 2019
## -------------------------------
##
## Class specified by attribute `outcome'
##
## Read 100 cases (5 attributes) from undefined.data
##
## Decision tree:
##
## Petal.Length <= 1.9: setosa (35)
## Petal.Length > 1.9:
## :...Petal.Width > 1.6: virginica (28)
## Petal.Width <= 1.6:
## :...Petal.Length <= 4.9: versicolor (34)
## Petal.Length > 4.9: virginica (3)
##
##
## Evaluation on training data (100 cases):
##
## Decision Tree
## ----------------
## Size Errors
##
## 4 0( 0.0%) <<
##
##
## (a) (b) (c) <-classified as
## ---- ---- ----
## 35 (a): class setosa
## 34 (b): class versicolor
## 31 (c): class virginica
##
##
## Attribute usage:
##
## 100.00% Petal.Length
## 65.00% Petal.Width
##
##
## Time: 0.0 secs
p1 <- predict(m1, irisr[101:150, ])
p1
## [1] virginica versicolor setosa virginica versicolor virginica
## [7] virginica virginica versicolor versicolor versicolor setosa
## [13] virginica setosa virginica setosa versicolor setosa
## [19] versicolor setosa virginica virginica virginica virginica
## [25] versicolor virginica setosa setosa setosa virginica
## [31] virginica virginica versicolor setosa setosa virginica
## [37] versicolor versicolor virginica virginica versicolor versicolor
## [43] setosa setosa virginica virginica setosa setosa
## [49] virginica virginica
## Levels: setosa versicolor virginica
table(irisr[101:150, 5], p1)
## p1
## setosa versicolor virginica
## setosa 15 0 0
## versicolor 0 13 3
## virginica 0 0 19
plot(m1)

Clasification tree with Rpart
set.seed(123)
g <- runif(nrow(iris))
irisr <- iris[order(g),]
m1 <- rpart(Species ~ ., data = irisr[1:100, ], method = "class")
m1 # The taerget is a categorical feature, that's why method = "class"
## n= 100
##
## node), split, n, loss, yval, (yprob)
## * denotes terminal node
##
## 1) root 100 62 setosa (0.38000000 0.33000000 0.29000000)
## 2) Petal.Length< 2.6 38 0 setosa (1.00000000 0.00000000 0.00000000) *
## 3) Petal.Length>=2.6 62 29 versicolor (0.00000000 0.53225806 0.46774194)
## 6) Petal.Width< 1.65 35 2 versicolor (0.00000000 0.94285714 0.05714286) *
## 7) Petal.Width>=1.65 27 0 virginica (0.00000000 0.00000000 1.00000000) *
rpart.plot(m1)
## Warning: Bad 'data' field in model 'call' (expected a data.frame or a matrix).
## To silence this warning:
## Call rpart.plot with roundint=FALSE,
## or rebuild the rpart model with model=TRUE.

p1 <- predict(m1, irisr[101:150, ], type = "class")
table(actual = irisr[101:150, 5], predicted = p1)
## predicted
## actual setosa versicolor virginica
## setosa 12 0 0
## versicolor 0 15 2
## virginica 0 2 19
Regression trees
data("msleep")
msleep
## # A tibble: 83 x 11
## name genus vore order conservation sleep_total sleep_rem sleep_cycle
## <chr> <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl>
## 1 Chee~ Acin~ carni Carn~ lc 12.1 NA NA
## 2 Owl ~ Aotus omni Prim~ <NA> 17 1.8 NA
## 3 Moun~ Aplo~ herbi Rode~ nt 14.4 2.4 NA
## 4 Grea~ Blar~ omni Sori~ lc 14.9 2.3 0.133
## 5 Cow Bos herbi Arti~ domesticated 4 0.7 0.667
## 6 Thre~ Brad~ herbi Pilo~ <NA> 14.4 2.2 0.767
## 7 Nort~ Call~ carni Carn~ vu 8.7 1.4 0.383
## 8 Vesp~ Calo~ <NA> Rode~ <NA> 7 NA NA
## 9 Dog Canis carni Carn~ domesticated 10.1 2.9 0.333
## 10 Roe ~ Capr~ herbi Arti~ lc 3 NA NA
## # ... with 73 more rows, and 3 more variables: awake <dbl>, brainwt <dbl>,
## # bodywt <dbl>
str(msleep)
## Classes 'tbl_df', 'tbl' and 'data.frame': 83 obs. of 11 variables:
## $ name : chr "Cheetah" "Owl monkey" "Mountain beaver" "Greater short-tailed shrew" ...
## $ genus : chr "Acinonyx" "Aotus" "Aplodontia" "Blarina" ...
## $ vore : chr "carni" "omni" "herbi" "omni" ...
## $ order : chr "Carnivora" "Primates" "Rodentia" "Soricomorpha" ...
## $ conservation: chr "lc" NA "nt" "lc" ...
## $ sleep_total : num 12.1 17 14.4 14.9 4 14.4 8.7 7 10.1 3 ...
## $ sleep_rem : num NA 1.8 2.4 2.3 0.7 2.2 1.4 NA 2.9 NA ...
## $ sleep_cycle : num NA NA NA 0.133 0.667 ...
## $ awake : num 11.9 7 9.6 9.1 20 9.6 15.3 17 13.9 21 ...
## $ brainwt : num NA 0.0155 NA 0.00029 0.423 NA NA NA 0.07 0.0982 ...
## $ bodywt : num 50 0.48 1.35 0.019 600 ...
df <- msleep[,c(3,6,10,11)]
df
## # A tibble: 83 x 4
## vore sleep_total brainwt bodywt
## <chr> <dbl> <dbl> <dbl>
## 1 carni 12.1 NA 50
## 2 omni 17 0.0155 0.48
## 3 herbi 14.4 NA 1.35
## 4 omni 14.9 0.00029 0.019
## 5 herbi 4 0.423 600
## 6 herbi 14.4 NA 3.85
## 7 carni 8.7 NA 20.5
## 8 <NA> 7 NA 0.045
## 9 carni 10.1 0.07 14
## 10 herbi 3 0.0982 14.8
## # ... with 73 more rows
df$vore <- as.factor(df$vore)
str(df)
## Classes 'tbl_df', 'tbl' and 'data.frame': 83 obs. of 4 variables:
## $ vore : Factor w/ 4 levels "carni","herbi",..: 1 4 2 4 2 2 1 NA 1 2 ...
## $ sleep_total: num 12.1 17 14.4 14.9 4 14.4 8.7 7 10.1 3 ...
## $ brainwt : num NA 0.0155 NA 0.00029 0.423 NA NA NA 0.07 0.0982 ...
## $ bodywt : num 50 0.48 1.35 0.019 600 ...
table(df$vore)
##
## carni herbi insecti omni
## 19 32 5 20
# levels(df$vore)
m1 <- rpart(sleep_total ~ ., data = df, method = "anova")
m1
## n= 83
##
## node), split, n, deviance, yval
## * denotes terminal node
##
## 1) root 83 1624.066000 10.433730
## 2) bodywt>=167.947 9 7.868889 3.488889 *
## 3) bodywt< 167.947 74 1129.325000 11.278380
## 6) bodywt>=1.85 31 458.593500 9.361290
## 12) vore=herbi 7 88.337140 6.642857 *
## 13) vore=carni,insecti,omni 24 303.439600 10.154170
## 26) brainwt>=0.136 13 128.669200 9.392308 *
## 27) brainwt< 0.136 11 158.307300 11.054550 *
## 7) bodywt< 1.85 43 474.662800 12.660470
## 14) vore=omni 13 141.370800 11.638460 *
## 15) vore=carni,herbi,insecti 30 313.829700 13.103330 *
# Anova because the target is factor feature
rpart.plot(m1, digits = 3, type = 3)

p1 <- predict(m1, df) # In this case there's no test set
p1
## 1 2 3 4 5 6 7
## 9.392308 11.638462 13.103333 11.638462 3.488889 6.642857 9.392308
## 8 9 10 11 12 13 14
## 13.103333 11.054545 6.642857 6.642857 13.103333 11.054545 13.103333
## 15 16 17 18 19 20 21
## 11.638462 11.638462 11.638462 11.054545 6.642857 11.638462 3.488889
## 22 23 24 25 26 27 28
## 13.103333 3.488889 3.488889 11.638462 11.054545 13.103333 11.054545
## 29 30 31 32 33 34 35
## 11.638462 3.488889 3.488889 9.392308 6.642857 9.392308 13.103333
## 36 37 38 39 40 41 42
## 3.488889 13.103333 9.392308 13.103333 13.103333 13.103333 13.103333
## 43 44 45 46 47 48 49
## 13.103333 13.103333 13.103333 13.103333 13.103333 6.642857 6.642857
## 50 51 52 53 54 55 56
## 9.392308 9.392308 9.392308 9.392308 9.392308 13.103333 11.638462
## 57 58 59 60 61 62 63
## 13.103333 13.103333 9.392308 9.392308 13.103333 11.054545 11.054545
## 64 65 66 67 68 69 70
## 13.103333 11.638462 11.638462 13.103333 13.103333 13.103333 13.103333
## 71 72 73 74 75 76 77
## 13.103333 13.103333 13.103333 9.392308 11.054545 13.103333 3.488889
## 78 79 80 81 82 83
## 11.638462 11.638462 3.488889 11.054545 11.054545 11.054545
MAE <- function(actual, predicted) {
mean(abs(actual - predicted))
}
MAE(df$sleep_total, p1) # For model comparison
## [1] 2.452865
Groc <- read.transactions("C:/Users/aal_l/Documents/R/groceries.csv", sep = ",")
Groc
## transactions in sparse format with
## 9835 transactions (rows) and
## 169 items (columns)
summary(Groc)
## transactions as itemMatrix in sparse format with
## 9835 rows (elements/itemsets/transactions) and
## 169 columns (items) and a density of 0.02609146
##
## most frequent items:
## whole milk other vegetables rolls/buns soda
## 2513 1903 1809 1715
## yogurt (Other)
## 1372 34055
##
## element (itemset/transaction) length distribution:
## sizes
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
## 2159 1643 1299 1005 855 645 545 438 350 246 182 117 78 77 55
## 16 17 18 19 20 21 22 23 24 26 27 28 29 32
## 46 29 14 14 9 11 4 6 1 1 1 1 3 1
##
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.000 2.000 3.000 4.409 6.000 32.000
##
## includes extended item information - examples:
## labels
## 1 abrasive cleaner
## 2 artif. sweetener
## 3 baby cosmetics
inspect(Groc[1:5])
## items
## [1] {citrus fruit,
## margarine,
## ready soups,
## semi-finished bread}
## [2] {coffee,
## tropical fruit,
## yogurt}
## [3] {whole milk}
## [4] {cream cheese,
## meat spreads,
## pip fruit,
## yogurt}
## [5] {condensed milk,
## long life bakery product,
## other vegetables,
## whole milk}
itemFrequency(Groc[,])
## abrasive cleaner artif. sweetener
## 0.0035587189 0.0032536858
## baby cosmetics baby food
## 0.0006100661 0.0001016777
## bags baking powder
## 0.0004067107 0.0176919166
## bathroom cleaner beef
## 0.0027452974 0.0524656838
## berries beverages
## 0.0332486019 0.0260294865
## bottled beer bottled water
## 0.0805287239 0.1105236401
## brandy brown bread
## 0.0041687850 0.0648703610
## butter butter milk
## 0.0554143366 0.0279613625
## cake bar candles
## 0.0132180986 0.0089476360
## candy canned beer
## 0.0298932384 0.0776817489
## canned fish canned fruit
## 0.0150482969 0.0032536858
## canned vegetables cat food
## 0.0107778343 0.0232841891
## cereals chewing gum
## 0.0056939502 0.0210472801
## chicken chocolate
## 0.0429079817 0.0496187087
## chocolate marshmallow citrus fruit
## 0.0090493137 0.0827656329
## cleaner cling film/bags
## 0.0050838841 0.0113879004
## cocoa drinks coffee
## 0.0022369090 0.0580579563
## condensed milk cooking chocolate
## 0.0102694459 0.0025419420
## cookware cream
## 0.0027452974 0.0013218099
## cream cheese curd
## 0.0396542959 0.0532791052
## curd cheese decalcifier
## 0.0050838841 0.0015251652
## dental care dessert
## 0.0057956279 0.0371123538
## detergent dish cleaner
## 0.0192170819 0.0104728012
## dishes dog food
## 0.0175902389 0.0085409253
## domestic eggs female sanitary products
## 0.0634468734 0.0061006609
## finished products fish
## 0.0065073716 0.0029486528
## flour flower (seeds)
## 0.0173868836 0.0103711235
## flower soil/fertilizer frankfurter
## 0.0019318760 0.0589730554
## frozen chicken frozen dessert
## 0.0006100661 0.0107778343
## frozen fish frozen fruits
## 0.0116929334 0.0012201322
## frozen meals frozen potato products
## 0.0283680732 0.0084392476
## frozen vegetables fruit/vegetable juice
## 0.0480935435 0.0722928317
## grapes hair spray
## 0.0223690900 0.0011184545
## ham hamburger meat
## 0.0260294865 0.0332486019
## hard cheese herbs
## 0.0245043213 0.0162684291
## honey house keeping products
## 0.0015251652 0.0083375699
## hygiene articles ice cream
## 0.0329435689 0.0250127097
## instant coffee Instant food products
## 0.0074224708 0.0080325369
## jam ketchup
## 0.0053889171 0.0042704626
## kitchen towels kitchen utensil
## 0.0059989832 0.0004067107
## light bulbs liqueur
## 0.0041687850 0.0009150991
## liquor liquor (appetizer)
## 0.0110828673 0.0079308592
## liver loaf long life bakery product
## 0.0050838841 0.0374173869
## make up remover male cosmetics
## 0.0008134215 0.0045754957
## margarine mayonnaise
## 0.0585663447 0.0091509914
## meat meat spreads
## 0.0258261312 0.0042704626
## misc. beverages mustard
## 0.0283680732 0.0119979664
## napkins newspapers
## 0.0523640061 0.0798169802
## nut snack nuts/prunes
## 0.0031520081 0.0033553635
## oil onions
## 0.0280630402 0.0310116929
## organic products organic sausage
## 0.0016268429 0.0022369090
## other vegetables packaged fruit/vegetables
## 0.1934926284 0.0130147433
## pasta pastry
## 0.0150482969 0.0889679715
## pet care photo/film
## 0.0094560244 0.0092526690
## pickled vegetables pip fruit
## 0.0178952720 0.0756481952
## popcorn pork
## 0.0072191154 0.0576512456
## pot plants potato products
## 0.0172852059 0.0028469751
## preservation products processed cheese
## 0.0002033554 0.0165734621
## prosecco pudding powder
## 0.0020335536 0.0023385867
## ready soups red/blush wine
## 0.0018301983 0.0192170819
## rice roll products
## 0.0076258261 0.0102694459
## rolls/buns root vegetables
## 0.1839349263 0.1089984748
## rubbing alcohol rum
## 0.0010167768 0.0044738180
## salad dressing salt
## 0.0008134215 0.0107778343
## salty snack sauces
## 0.0378240976 0.0054905948
## sausage seasonal products
## 0.0939501779 0.0142348754
## semi-finished bread shopping bags
## 0.0176919166 0.0985256736
## skin care sliced cheese
## 0.0035587189 0.0245043213
## snack products soap
## 0.0030503305 0.0026436197
## soda soft cheese
## 0.1743772242 0.0170818505
## softener sound storage medium
## 0.0054905948 0.0001016777
## soups sparkling wine
## 0.0068124047 0.0055922725
## specialty bar specialty cheese
## 0.0273512964 0.0085409253
## specialty chocolate specialty fat
## 0.0304016268 0.0036603965
## specialty vegetables spices
## 0.0017285206 0.0051855618
## spread cheese sugar
## 0.0111845450 0.0338586680
## sweet spreads syrup
## 0.0090493137 0.0032536858
## tea tidbits
## 0.0038637519 0.0023385867
## toilet cleaner tropical fruit
## 0.0007117438 0.1049313676
## turkey UHT-milk
## 0.0081342145 0.0334519573
## vinegar waffles
## 0.0065073716 0.0384341637
## whipped/sour cream whisky
## 0.0716827656 0.0008134215
## white bread white wine
## 0.0420945602 0.0190137265
## whole milk yogurt
## 0.2555160142 0.1395017794
## zwieback
## 0.0069140824
# Support for first ten items, when the item show up in the transactions
itemFrequencyPlot(Groc, support = 0.10)

# Items show up in 10 % of transactions
itemFrequencyPlot(Groc, topN = 20)

# Conditional probability
m1 <- apriori(Groc, parameter = list(support = 0.007,
confidence = 0.25,
minlen = 2))
## Apriori
##
## Parameter specification:
## confidence minval smax arem aval originalSupport maxtime support minlen
## 0.25 0.1 1 none FALSE TRUE 5 0.007 2
## maxlen target ext
## 10 rules FALSE
##
## Algorithmic control:
## filter tree heap memopt load sort verbose
## 0.1 TRUE TRUE FALSE TRUE 2 TRUE
##
## Absolute minimum support count: 68
##
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[169 item(s), 9835 transaction(s)] done [0.00s].
## sorting and recoding items ... [104 item(s)] done [0.00s].
## creating transaction tree ... done [0.00s].
## checking subsets of size 1 2 3 4 done [0.01s].
## writing ... [363 rule(s)] done [0.00s].
## creating S4 object ... done [0.00s].
m1
## set of 363 rules
summary(m1)
## set of 363 rules
##
## rule length distribution (lhs + rhs):sizes
## 2 3 4
## 137 214 12
##
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 2.000 2.000 3.000 2.656 3.000 4.000
##
## summary of quality measures:
## support confidence lift count
## Min. :0.007016 Min. :0.2500 Min. :0.9932 Min. : 69.0
## 1st Qu.:0.008134 1st Qu.:0.2962 1st Qu.:1.6060 1st Qu.: 80.0
## Median :0.009659 Median :0.3551 Median :1.9086 Median : 95.0
## Mean :0.012945 Mean :0.3743 Mean :2.0072 Mean :127.3
## 3rd Qu.:0.013777 3rd Qu.:0.4420 3rd Qu.:2.3289 3rd Qu.:135.5
## Max. :0.074835 Max. :0.6389 Max. :3.9565 Max. :736.0
##
## mining info:
## data ntransactions support confidence
## Groc 9835 0.007 0.25
inspect(m1[1:10])
## lhs rhs support confidence
## [1] {herbs} => {root vegetables} 0.007015760 0.4312500
## [2] {herbs} => {other vegetables} 0.007727504 0.4750000
## [3] {herbs} => {whole milk} 0.007727504 0.4750000
## [4] {processed cheese} => {whole milk} 0.007015760 0.4233129
## [5] {semi-finished bread} => {whole milk} 0.007117438 0.4022989
## [6] {detergent} => {whole milk} 0.008947636 0.4656085
## [7] {pickled vegetables} => {whole milk} 0.007117438 0.3977273
## [8] {baking powder} => {other vegetables} 0.007320793 0.4137931
## [9] {baking powder} => {whole milk} 0.009252669 0.5229885
## [10] {flour} => {whole milk} 0.008439248 0.4853801
## lift count
## [1] 3.956477 69
## [2] 2.454874 76
## [3] 1.858983 76
## [4] 1.656698 69
## [5] 1.574457 70
## [6] 1.822228 88
## [7] 1.556565 70
## [8] 2.138547 72
## [9] 2.046793 91
## [10] 1.899607 83
inspect(sort(m1, by = "lift")[1:10])
## lhs rhs support confidence lift count
## [1] {herbs} => {root vegetables} 0.007015760 0.4312500 3.956477 69
## [2] {berries} => {whipped/sour cream} 0.009049314 0.2721713 3.796886 89
## [3] {other vegetables,
## tropical fruit,
## whole milk} => {root vegetables} 0.007015760 0.4107143 3.768074 69
## [4] {beef,
## other vegetables} => {root vegetables} 0.007930859 0.4020619 3.688692 78
## [5] {other vegetables,
## tropical fruit} => {pip fruit} 0.009456024 0.2634561 3.482649 93
## [6] {beef,
## whole milk} => {root vegetables} 0.008032537 0.3779904 3.467851 79
## [7] {other vegetables,
## pip fruit} => {tropical fruit} 0.009456024 0.3618677 3.448613 93
## [8] {citrus fruit,
## other vegetables} => {root vegetables} 0.010371124 0.3591549 3.295045 102
## [9] {other vegetables,
## whole milk,
## yogurt} => {tropical fruit} 0.007625826 0.3424658 3.263712 75
## [10] {other vegetables,
## whole milk,
## yogurt} => {root vegetables} 0.007829181 0.3515982 3.225716 77