Obs: Caso tenha problemas com a acentuação, consulte este link: https://support.rstudio.com/hc/en-us/articles/200532197-Character-Encoding
Configurando o diretório de trabalho Coloque entre aspas o diretório de trabalho que você está usando no seu computador Não use diretórios com espaço no nome
https://cran.r-project.org/web/packages/arules/index.html https://cran.r-project.org/web/packages/arulesViz/index.html
library(dplyr)
library(arules)
library(arulesViz)
library(htmlwidgets)
library(writexl)
Warning: package ‘writexl’ was built under R version 4.1.3
options(warn=-1)
dim(dados)
[1] 15002 20
summary(dados)
Item01 Item02 Item03 Item04 Item05 Item06 Item07 Item08 Item09
Length:15002 Length:15002 Length:15002 Length:15002 Length:15002 Length:15002 Length:15002 Length:15002 Length:15002
Class :character Class :character Class :character Class :character Class :character Class :character Class :character Class :character Class :character
Mode :character Mode :character Mode :character Mode :character Mode :character Mode :character Mode :character Mode :character Mode :character
Item10 Item11 Item12 Item13 Item14 Item15 Item16 Item17 Item18 Item19
Length:15002 Length:15002 Length:15002 Length:15002 Length:15002 Length:15002 Mode:logical Mode:logical Mode:logical Mode:logical
Class :character Class :character Class :character Class :character Class :character Class :character NA's:15002 NA's:15002 NA's:15002 NA's:15002
Mode :character Mode :character Mode :character Mode :character Mode :character Mode :character
Item20
Mode:logical
NA's:15002
str(dados)
spec_tbl_df [15,002 x 20] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
$ Item01: chr [1:15002] NA "Logitech M510 Wireless mouse" NA "Apple Lightning to Digital AV Adapter" ...
$ Item02: chr [1:15002] NA "HP 63 Ink" NA "TP-Link AC1750 Smart WiFi Router" ...
$ Item03: chr [1:15002] NA "HP 65 ink" NA "Apple Pencil" ...
$ Item04: chr [1:15002] NA "nonda USB C to USB Adapter" NA NA ...
$ Item05: chr [1:15002] NA "10ft iPHone Charger Cable" NA NA ...
$ Item06: chr [1:15002] NA "HP 902XL ink" NA NA ...
$ Item07: chr [1:15002] NA "Creative Pebble 2.0 Speakers" NA NA ...
$ Item08: chr [1:15002] NA "Cleaning Gel Universal Dust Cleaner" NA NA ...
$ Item09: chr [1:15002] NA "Micro Center 32GB Memory card" NA NA ...
$ Item10: chr [1:15002] NA "YUNSONG 3pack 6ft Nylon Lightning Cable" NA NA ...
$ Item11: chr [1:15002] NA "TopMate C5 Laptop Cooler pad" NA NA ...
$ Item12: chr [1:15002] NA "Apple USB-C Charger cable" NA NA ...
$ Item13: chr [1:15002] NA "HyperX Cloud Stinger Headset" NA NA ...
$ Item14: chr [1:15002] NA "TONOR USB Gaming Microphone" NA NA ...
$ Item15: chr [1:15002] NA "Dust-Off Compressed Gas 2 pack" NA NA ...
$ Item16: logi [1:15002] NA NA NA NA NA NA ...
$ Item17: logi [1:15002] NA NA NA NA NA NA ...
$ Item18: logi [1:15002] NA NA NA NA NA NA ...
$ Item19: logi [1:15002] NA NA NA NA NA NA ...
$ Item20: logi [1:15002] NA NA NA NA NA NA ...
- attr(*, "spec")=
.. cols(
.. Item01 = col_character(),
.. Item02 = col_character(),
.. Item03 = col_character(),
.. Item04 = col_character(),
.. Item05 = col_character(),
.. Item06 = col_character(),
.. Item07 = col_character(),
.. Item08 = col_character(),
.. Item09 = col_character(),
.. Item10 = col_character(),
.. Item11 = col_character(),
.. Item12 = col_character(),
.. Item13 = col_character(),
.. Item14 = col_character(),
.. Item15 = col_character(),
.. Item16 = col_logical(),
.. Item17 = col_logical(),
.. Item18 = col_logical(),
.. Item19 = col_logical(),
.. Item20 = col_logical()
.. )
- attr(*, "problems")=<externalptr>
Uma forma inteligente de resolver o problema no dataset Separamos as linhas pares das linhas ímpares
linhas_pares <- seq(2, nrow(dados), 2)
linhas_impares <- seq(1, nrow(dados), 2)
Separamos os dados e então usaremos o dataset com as linhas pares (linhas de dados válidos)
df1 <- dados[linhas_pares, ]
df2 <- dados[linhas_impares, ]
Verifica se temos valores ausentes no primeiro item de compra
sum(is.na(df1$Item01))
[1] 0
Verifica se temos valores ausentes no segundo item de compra (ATENÇÃO)
sum(is.na(df1$Item02))
[1] 1754
Verifica se temos valores ausentes representados por espaço em branco
which(nchar(trimws(df1$Item01))==0)
integer(0)
which(nchar(trimws(df1$Item02))==0)
integer(0)
Verifica se temos valores ausentes representados por espaço em branco (usando expressão regular)
grepl("^\\s*$", df1$Item02)
[1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[31] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[61] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[91] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[121] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[151] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[181] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[211] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[241] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[271] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[301] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[331] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[361] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[391] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[421] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[451] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[481] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[511] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[541] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[571] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[601] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[631] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[661] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[691] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[721] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[751] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[781] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[811] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[841] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[871] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[901] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[931] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[961] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[991] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[ reached getOption("max.print") -- omitted 6501 entries ]
Número de itens distintos
n_distinct(df1)
[1] 5176
Vamos trabalhar somente com os registros onde o item 2 não fos nulo
df1_two <- df1[!grepl("^\\s*$", df1$Item02), ]
Número de itens distintos
n_distinct(df1_two)
[1] 5176
Prepara o pacote convertendo as variáveis para o tipo fator (variáveis que usaremos daqui em diante)
pacote <- df1_two
pacote$Item01 <- as.factor(pacote$Item01)
pacote$Item02 <- as.factor(pacote$Item02)
pacote$Item03 <- as.factor(pacote$Item03)
pacote$Item04 <- as.factor(pacote$Item04)
pacote$Item05 <- as.factor(pacote$Item05)
pacote$Item06 <- as.factor(pacote$Item06)
summary(pacote)
Item01 Item02 Item03 Item04
Dust-Off Compressed Gas 2 pack : 577 Dust-Off Compressed Gas 2 pack : 484 Dust-Off Compressed Gas 2 pack : 375 Dust-Off Compressed Gas 2 pack : 201
Apple Lightning to Digital AV Adapter: 576 VIVO Dual LCD Monitor Desk mount: 411 VIVO Dual LCD Monitor Desk mount: 279 Apple Pencil : 181
Cat8 Ethernet Cable : 458 Apple Pencil : 302 Apple Pencil : 225 USB 2.0 Printer cable : 174
HP 61 ink : 391 SanDisk Ultra 64GB card : 291 Screen Mom Screen Cleaner kit : 213 VIVO Dual LCD Monitor Desk mount: 167
Nylon Braided Lightning to USB cable : 373 USB 2.0 Printer cable : 243 USB 2.0 Printer cable : 180 Screen Mom Screen Cleaner kit : 149
VIVO Dual LCD Monitor Desk mount : 354 (Other) :4016 (Other) :3117 (Other) :2473
(Other) :4772 NA's :1754 NA's :3112 NA's :4156
Item05 Item06 Item07 Item08 Item09 Item10 Item11 Item12
Apple USB-C Charger cable : 153 USB 2.0 Printer cable : 107 Length:7501 Length:7501 Length:7501 Length:7501 Length:7501 Length:7501
Apple Pencil : 134 Apple Pencil : 102 Class :character Class :character Class :character Class :character Class :character Class :character
USB 2.0 Printer cable : 130 Apple USB-C Charger cable: 100 Mode :character Mode :character Mode :character Mode :character Mode :character Mode :character
HP 61 ink : 115 HP 61 ink : 71
Screen Mom Screen Cleaner kit: 114 Stylus Pen for iPad : 69
(Other) :1883 (Other) :1415
NA's :4972 NA's :5637
Item13 Item14 Item15 Item16 Item17 Item18 Item19 Item20
Length:7501 Length:7501 Length:7501 Mode:logical Mode:logical Mode:logical Mode:logical Mode:logical
Class :character Class :character Class :character NA's:7501 NA's:7501 NA's:7501 NA's:7501 NA's:7501
Mode :character Mode :character Mode :character
str(pacote)
tibble [7,501 x 20] (S3: tbl_df/tbl/data.frame)
$ Item01: Factor w/ 115 levels "10ft iPHone Charger Cable",..: 61 9 104 22 28 101 6 2 73 105 ...
$ Item02: Factor w/ 117 levels "10ft iPHone Charger Cable",..: 39 104 NA 45 98 NA 107 10 113 NA ...
$ Item03: Factor w/ 115 levels "10ft iPHone Charger Cable",..: 43 11 NA NA 67 NA NA 48 13 NA ...
$ Item04: Factor w/ 114 levels "10ft iPHone Charger Cable",..: 72 NA NA NA 38 NA NA NA NA NA ...
$ Item05: Factor w/ 110 levels "10ft iPHone Charger Cable",..: 1 NA NA NA 14 NA NA NA NA NA ...
$ Item06: Factor w/ 106 levels "10ft iPHone Charger Cable",..: 45 NA NA NA NA NA NA NA NA NA ...
$ Item07: chr [1:7501] "Creative Pebble 2.0 Speakers" NA NA NA ...
$ Item08: chr [1:7501] "Cleaning Gel Universal Dust Cleaner" NA NA NA ...
$ Item09: chr [1:7501] "Micro Center 32GB Memory card" NA NA NA ...
$ Item10: chr [1:7501] "YUNSONG 3pack 6ft Nylon Lightning Cable" NA NA NA ...
$ Item11: chr [1:7501] "TopMate C5 Laptop Cooler pad" NA NA NA ...
$ Item12: chr [1:7501] "Apple USB-C Charger cable" NA NA NA ...
$ Item13: chr [1:7501] "HyperX Cloud Stinger Headset" NA NA NA ...
$ Item14: chr [1:7501] "TONOR USB Gaming Microphone" NA NA NA ...
$ Item15: chr [1:7501] "Dust-Off Compressed Gas 2 pack" NA NA NA ...
$ Item16: logi [1:7501] NA NA NA NA NA NA ...
$ Item17: logi [1:7501] NA NA NA NA NA NA ...
$ Item18: logi [1:7501] NA NA NA NA NA NA ...
$ Item19: logi [1:7501] NA NA NA NA NA NA ...
$ Item20: logi [1:7501] NA NA NA NA NA NA ...
pacote_split <- split(pacote$Item01,
pacote$Item02,
pacote$Item03,
pacote$Item04,
pacote$Item05,
pacote$Item06)
Warning: c("argumento 'structure(c(45L, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, ' (ainda) não é usado", "argumento 'NA, NA, NA, NA, 71L, NA, 74L, NA, 33L, NA, 12L, 49L, NA, 77L, ' (ainda) não é usado", "argumento '74L, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, ' (ainda) não é usado", "argumento '36L, NA, NA, NA, 75L, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, ' (ainda) não é usado", "argumento '102L, NA, 63L, 12L, NA, NA, NA, NA, NA, NA, NA, 38L, NA, NA, ' (ainda) não é usado", "argumento 'NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 71L, NA, ' (ainda) não é usado",
"argumento 'NA, NA, NA, NA, NA, NA, 25L, NA, NA, 12L, NA, 23L, NA, NA, 12L, ' (ainda) não é usado", "argumento 'NA, 17L, 37L, NA, NA, 10L, NA, 88L, 26L, 39L, NA, NA, NA, NA, ' (ainda) não é usado", "argumento 'NA, 12L, NA, NA, 37L, NA, NA, NA, NA, NA, NA, 25L, NA, 63L, NA, ' (ainda) não é usado", "argumento 'NA, NA, NA, NA, 71L, 90L, NA, NA, NA, NA, 28L, 10L, 88L, 25L, ' (ainda) não é usado", "argumento '25L, [... truncated]
Warning in if (drop) f <- factor(f) :
the condition has length > 1 and only the first element will be used
Transações
transacoes <- as(pacote_split, "transactions")
Warning in asMethod(object) : removing duplicated items in transactions
Inspeção das regras
inspect(head(transacoes, 5))
items transactionID
[1] {Cat8 Ethernet Cable,
Dust-Off Compressed Gas 2 pack,
HP 65 ink,
Logitech M510 Wireless mouse,
M.2 Screw kit,
Nylon Braided Lightning to USB cable,
SanDisk 32GB Ultra SDHC card,
Screen Mom Screen Cleaner kit} 10ft iPHone Charger Cable
[2] {Anker USB C to HDMI Adapter,
Apple Lightning to Digital AV Adapter,
AutoFocus 1080p Webcam,
Cat8 Ethernet Cable,
Dust-Off Compressed Gas 2 pack,
HP 61 ink,
Jelly Comb 2.4G Slim Wireless mouse,
Logitech M510 Wireless mouse,
Mpow HC6 USB Headset,
Nylon Braided Lightning to USB cable,
SAMSUNG EVO 64GB card,
SanDisk 128GB Ultra microSDXC card,
SanDisk Ultra 64GB card,
VIVO Dual LCD Monitor Desk mount} 10ft iPHone Charger Cable 2 Pack
[3] {10ft iPHone Charger Cable,
Brother Genuine High Yield Toner Cartridge,
HP 64 ink,
Screen Mom Screen Cleaner kit,
USB 2.0 Printer cable} 3 pack Nylon Braided Lightning Cable
[4] {Anker 2-in-1 USB Card Reader,
Apple Lightning to Digital AV Adapter,
Dust-Off Compressed Gas 2 pack,
FEIYOLD Blue light Blocking Glasses,
HP 61 ink,
Logitech M510 Wireless mouse,
Moread HDMI to VGA Adapter,
Mpow HC6 USB Headset,
Nylon Braided Lightning to USB cable,
Sabrent 4-port USB 3.0 hub,
SAMSUNG EVO 32GB card,
SanDisk 128GB Ultra microSDXC card,
SanDisk Ultra 64GB card,
Screen Mom Screen Cleaner kit,
VicTsing Wireless mouse,
VIVO Dual LCD Monitor Desk mount} 3A USB Type C Cable 3 pack 6FT
[5] {Apple Lightning to Digital AV Adapter,
Apple Pencil,
Apple USB-C Charger cable,
Dust-Off Compressed Gas 2 pack,
HP 61 ink,
HP 63XL Ink,
HP 64 ink,
SanDisk Ultra 400GB card,
SanDisk Ultra 64GB card,
USB Type C Cable,
USB Type C to USB-A Charger cable,
VIVO Dual LCD Monitor Desk mount} 5pack Nylon Braided USB C cables
Vamos verificar as regras de um produto: Dust-Off Compressed Gas 2 pack
regras_produto1 <- apriori(transacoes,
parameter = list(conf = 0.5, minlen = 3),
appearance = list(rhs = "Dust-Off Compressed Gas 2 pack", default = "lhs"))
Apriori
Parameter specification:
Algorithmic control:
Absolute minimum support count: 11
set item appearances ...[1 item(s)] done [0.00s].
set transactions ...[104 item(s), 117 transaction(s)] done [0.00s].
sorting and recoding items ... [41 item(s)] done [0.00s].
creating transaction tree ... done [0.00s].
checking subsets of size 1 2 3 4 5 6 7 8 9 done [0.01s].
writing ... [2312 rule(s)] done [0.00s].
creating S4 object ... done [0.00s].
regras_produto1
set of 2312 rules
Inspeção das regras
inspect(head(sort(regras_produto1, by = "confidence"), 5))
Vamos verificar as regras de um produto: HP 61 ink
regras_produto2 <- apriori(transacoes,
parameter = list(minlen = 3, conf = 0.5),
appearance = list(rhs = "HP 61 ink",default = "lhs"))
Apriori
Parameter specification:
Algorithmic control:
Absolute minimum support count: 11
set item appearances ...[1 item(s)] done [0.00s].
set transactions ...[104 item(s), 117 transaction(s)] done [0.00s].
sorting and recoding items ... [41 item(s)] done [0.00s].
creating transaction tree ... done [0.00s].
checking subsets of size 1 2 3 4 5 6 7 8 9 done [0.01s].
writing ... [2183 rule(s)] done [0.00s].
creating S4 object ... done [0.00s].
regras_produto2
set of 2183 rules
Inspeção das regras
inspect(head(sort(regras_produto2, by = "confidence"), 5))
lhs rhs support confidence coverage lift count
[1] {Nylon Braided Lightning to USB cable,
TP-Link AC1750 Smart WiFi Router} => {HP 61 ink} 0.1025641 1.0000000 0.1025641 2.127273 12
[2] {Nylon Braided Lightning to USB cable,
TP-Link AC1750 Smart WiFi Router,
VIVO Dual LCD Monitor Desk mount} => {HP 61 ink} 0.1025641 1.0000000 0.1025641 2.127273 12
[3] {Dust-Off Compressed Gas 2 pack,
Nylon Braided Lightning to USB cable,
TP-Link AC1750 Smart WiFi Router} => {HP 61 ink} 0.1025641 1.0000000 0.1025641 2.127273 12
[4] {Dust-Off Compressed Gas 2 pack,
Nylon Braided Lightning to USB cable,
TP-Link AC1750 Smart WiFi Router,
VIVO Dual LCD Monitor Desk mount} => {HP 61 ink} 0.1025641 1.0000000 0.1025641 2.127273 12
[5] {FEIYOLD Blue light Blocking Glasses,
Nylon Braided Lightning to USB cable} => {HP 61 ink} 0.1452991 0.9444444 0.1538462 2.009091 17
Vamos verificar as regras de um produto: VIVO Dual LCD Monitor Desk mount
regras_produto3 <- apriori(transacoes,
parameter = list(minlen = 3,
conf = 0.5),
appearance = list(rhs = "VIVO Dual LCD Monitor Desk mount",
default = "lhs"))
Apriori
Parameter specification:
Algorithmic control:
Absolute minimum support count: 11
set item appearances ...[1 item(s)] done [0.00s].
set transactions ...[104 item(s), 117 transaction(s)] done [0.00s].
sorting and recoding items ... [41 item(s)] done [0.00s].
creating transaction tree ... done [0.00s].
checking subsets of size 1 2 3 4 5 6 7 8 9 done [0.01s].
writing ... [2375 rule(s)] done [0.00s].
creating S4 object ... done [0.00s].
regras_produto3
set of 2375 rules
Inspeção das regras
inspect(head(sort(regras_produto3, by = "confidence"), 5))
Vamos verificar novamente as regras do produto: Dust-Off Compressed Gas 2 pack, alterando uma das métricas
regras_produto1 <- apriori(transacoes,
parameter = list(minlen = 3, supp = 0.2, conf = 0.5, target = "rules"),
appearance = list(rhs = "Dust-Off Compressed Gas 2 pack", default = "lhs"))
Apriori
Parameter specification:
Algorithmic control:
Absolute minimum support count: 23
set item appearances ...[1 item(s)] done [0.00s].
set transactions ...[104 item(s), 117 transaction(s)] done [0.00s].
sorting and recoding items ... [23 item(s)] done [0.00s].
creating transaction tree ... done [0.00s].
checking subsets of size 1 2 3 4 done [0.00s].
writing ... [38 rule(s)] done [0.00s].
creating S4 object ... done [0.00s].
regras_produto1
set of 38 rules
Inspeção das regras
inspect(head(sort(regras_produto1, by = "confidence"), 5))
lhs rhs support confidence coverage lift count
[1] {Logitech M510 Wireless mouse,
Screen Mom Screen Cleaner kit} => {Dust-Off Compressed Gas 2 pack} 0.2222222 1 0.2222222 1.746269 26
[2] {Anker USB C to HDMI Adapter,
Screen Mom Screen Cleaner kit} => {Dust-Off Compressed Gas 2 pack} 0.2307692 1 0.2307692 1.746269 27
[3] {Screen Mom Screen Cleaner kit,
VIVO Dual LCD Monitor Desk mount} => {Dust-Off Compressed Gas 2 pack} 0.2991453 1 0.2991453 1.746269 35
[4] {Nylon Braided Lightning to USB cable,
Screen Mom Screen Cleaner kit} => {Dust-Off Compressed Gas 2 pack} 0.2649573 1 0.2649573 1.746269 31
[5] {Anker USB C to HDMI Adapter,
Screen Mom Screen Cleaner kit,
VIVO Dual LCD Monitor Desk mount} => {Dust-Off Compressed Gas 2 pack} 0.2136752 1 0.2136752 1.746269 25
Filtra as regras redundantes
regras_produto1_clean <- regras_produto1[!is.redundant(regras_produto1)]
Inspeção das regras
inspect(head(sort(regras_produto1_clean, by = "confidence"), 5))
Sumário
summary(regras_produto1_clean)
set of 27 rules
rule length distribution (lhs + rhs):sizes
3 4
23 4
Min. 1st Qu. Median Mean 3rd Qu. Max.
3.000 3.000 3.000 3.148 3.000 4.000
summary of quality measures:
support confidence coverage lift count
Min. :0.2051 Min. :0.6154 Min. :0.2222 Min. :1.075 Min. :24.00
1st Qu.:0.2222 1st Qu.:0.7742 1st Qu.:0.2521 1st Qu.:1.352 1st Qu.:26.00
Median :0.2308 Median :0.9062 Median :0.2735 Median :1.583 Median :27.00
Mean :0.2384 Mean :0.8710 Mean :0.2770 Mean :1.521 Mean :27.89
3rd Qu.:0.2479 3rd Qu.:0.9655 3rd Qu.:0.3034 3rd Qu.:1.686 3rd Qu.:29.00
Max. :0.2991 Max. :1.0000 Max. :0.3675 Max. :1.746 Max. :35.00
mining info:
Plot 1
plot(regras_produto1_clean,
measure = "support",
shading = "confidence",
method = "graph",
engine = "html")
Vamos verificar novamente as regras do produto: HP 61 ink, Alterando uma das métricas
regras_produto2 <- apriori(transacoes,
parameter = list(minlen = 3,
supp = 0.2,
conf = 0.5,
target = "rules"),
appearance = list(rhs = "HP 61 ink", default = "lhs"))
Apriori
Parameter specification:
Algorithmic control:
Absolute minimum support count: 23
set item appearances ...[1 item(s)] done [0.00s].
set transactions ...[104 item(s), 117 transaction(s)] done [0.00s].
sorting and recoding items ... [23 item(s)] done [0.00s].
creating transaction tree ... done [0.00s].
checking subsets of size 1 2 3 4 done [0.00s].
writing ... [35 rule(s)] done [0.00s].
creating S4 object ... done [0.00s].
regras_produto2
set of 35 rules
Inspeção das regras
inspect(head(sort(regras_produto2, by = "confidence"), 5))
Filtra as regras redundantes
regras_produto2_clean <- regras_produto2[!is.redundant(regras_produto2)]
regras_produto2_clean
set of 30 rules
Inspeção das regras
inspect(head(sort(regras_produto2_clean, by = "confidence"), 5))
NA
Sumário
summary(regras_produto2_clean)
set of 30 rules
rule length distribution (lhs + rhs):sizes
3 4
25 5
Min. 1st Qu. Median Mean 3rd Qu. Max.
3.000 3.000 3.000 3.167 3.000 4.000
summary of quality measures:
support confidence coverage lift count
Min. :0.2051 Min. :0.6304 Min. :0.2393 Min. :1.341 Min. :24.00
1st Qu.:0.2051 1st Qu.:0.7407 1st Qu.:0.2650 1st Qu.:1.576 1st Qu.:24.00
Median :0.2179 Median :0.7777 Median :0.2821 Median :1.654 Median :25.50
Mean :0.2236 Mean :0.7720 Mean :0.2920 Mean :1.642 Mean :26.17
3rd Qu.:0.2393 3rd Qu.:0.8042 3rd Qu.:0.3141 3rd Qu.:1.711 3rd Qu.:28.00
Max. :0.2650 Max. :0.8788 Max. :0.3932 Max. :1.869 Max. :31.00
mining info:
Plot 2
plot(regras_produto2_clean,
measure = "support",
shading = "confidence",
method = "graph",
engine = "html")
Vamos verificar novamente as regras do produto: VIVO Dual LCD Monitor Desk mount, alterando uma das métricas
regras_produto3 <- apriori(transacoes,
parameter = list(minlen = 3,
supp = 0.2,
conf = 0.5,
target = "rules"),
appearance = list(rhs = "VIVO Dual LCD Monitor Desk mount", default = "lhs"))
Apriori
Parameter specification:
Algorithmic control:
Absolute minimum support count: 23
set item appearances ...[1 item(s)] done [0.00s].
set transactions ...[104 item(s), 117 transaction(s)] done [0.00s].
sorting and recoding items ... [23 item(s)] done [0.00s].
creating transaction tree ... done [0.00s].
checking subsets of size 1 2 3 4 done [0.00s].
writing ... [34 rule(s)] done [0.00s].
creating S4 object ... done [0.00s].
regras_produto3
set of 34 rules
Inspeção das regras
inspect(head(sort(regras_produto3, by = "confidence"), 5))
lhs rhs support confidence coverage lift count
[1] {Dust-Off Compressed Gas 2 pack,
SanDisk Ultra 64GB card} => {VIVO Dual LCD Monitor Desk mount} 0.2307692 0.9642857 0.2393162 2.128706 27
[2] {Apple Lightning to Digital AV Adapter,
Dust-Off Compressed Gas 2 pack,
SanDisk Ultra 64GB card} => {VIVO Dual LCD Monitor Desk mount} 0.2136752 0.9615385 0.2222222 2.122642 25
[3] {Apple Lightning to Digital AV Adapter,
Dust-Off Compressed Gas 2 pack,
Logitech M510 Wireless mouse} => {VIVO Dual LCD Monitor Desk mount} 0.2136752 0.9615385 0.2222222 2.122642 25
[4] {Anker USB C to HDMI Adapter,
Apple Lightning to Digital AV Adapter,
Dust-Off Compressed Gas 2 pack} => {VIVO Dual LCD Monitor Desk mount} 0.2136752 0.9615385 0.2222222 2.122642 25
[5] {Anker USB C to HDMI Adapter,
Dust-Off Compressed Gas 2 pack,
Nylon Braided Lightning to USB cable} => {VIVO Dual LCD Monitor Desk mount} 0.2307692 0.9310345 0.2478632 2.055303 27
Filtra as regras redundantes
regras_produto3_clean <- regras_produto3[!is.redundant(regras_produto3)]
regras_produto3_clean
set of 30 rules
Inspeção das regras
inspect(head(sort(regras_produto3_clean, by = "confidence"), 5))
lhs rhs support confidence coverage lift count
[1] {Dust-Off Compressed Gas 2 pack,
SanDisk Ultra 64GB card} => {VIVO Dual LCD Monitor Desk mount} 0.2307692 0.9642857 0.2393162 2.128706 27
[2] {Apple Lightning to Digital AV Adapter,
Dust-Off Compressed Gas 2 pack,
Logitech M510 Wireless mouse} => {VIVO Dual LCD Monitor Desk mount} 0.2136752 0.9615385 0.2222222 2.122642 25
[3] {Anker USB C to HDMI Adapter,
Apple Lightning to Digital AV Adapter,
Dust-Off Compressed Gas 2 pack} => {VIVO Dual LCD Monitor Desk mount} 0.2136752 0.9615385 0.2222222 2.122642 25
[4] {Anker USB C to HDMI Adapter,
Dust-Off Compressed Gas 2 pack,
Nylon Braided Lightning to USB cable} => {VIVO Dual LCD Monitor Desk mount} 0.2307692 0.9310345 0.2478632 2.055303 27
[5] {Anker USB C to HDMI Adapter,
Screen Mom Screen Cleaner kit} => {VIVO Dual LCD Monitor Desk mount} 0.2136752 0.9259259 0.2307692 2.044025 25
Sumário
summary(regras_produto3_clean)
set of 30 rules
rule length distribution (lhs + rhs):sizes
3 4
21 9
Min. 1st Qu. Median Mean 3rd Qu. Max.
3.0 3.0 3.0 3.3 4.0 4.0
summary of quality measures:
support confidence coverage lift count
Min. :0.2051 Min. :0.6047 Min. :0.2222 Min. :1.335 Min. :24.00
1st Qu.:0.2137 1st Qu.:0.7757 1st Qu.:0.2479 1st Qu.:1.712 1st Qu.:25.00
Median :0.2308 Median :0.8469 Median :0.2650 Median :1.870 Median :27.00
Mean :0.2308 Mean :0.8360 Mean :0.2795 Mean :1.845 Mean :27.00
3rd Qu.:0.2372 3rd Qu.:0.8929 3rd Qu.:0.3056 3rd Qu.:1.971 3rd Qu.:27.75
Max. :0.2991 Max. :0.9643 Max. :0.3932 Max. :2.129 Max. :35.00
mining info:
Plot 3
plot(regras_produto3_clean,
measure = "support",
shading = "confidence",
method = "graph",
engine = "html")
Top 3 regras
inspect(head(sort(regras_produto1_clean, by = "support", decreasing = TRUE), 1))
inspect(head(sort(regras_produto2_clean, by = "confidence", decreasing = TRUE), 1))
inspect(head(sort(regras_produto3_clean, by = "confidence", decreasing = TRUE), 1))
Salvamos o conjunto de regras dos 3 produtos como dataframe e então salvamos em disco
head(regras_produto1_clean)
set of 6 rules
df_produto1 <- as(regras_produto1_clean, "data.frame")
write_xlsx(df_produto1, "df_produto1.xlsx")