setwd("~/R/Scanningdata")
getwd()
## [1] "C:/Users/menno_000/Documents/R/Scanningdata"
scandata <- read.csv("~/R/Scanningdata/Mei2017Demologcsv.csv", sep = ";")
dim(scandata)
## [1] 388747 18
## Maken van een dataframe met transacties en producten
Scandatasmall <- scandata[ , c("Transactionnumber", "Sequence" , "Product")]
Scandatasmall <- Scandatasmall[order(Scandatasmall$Transactionnumber, Scandatasmall$Product), ]
## Verwijderen van header en footer
Scandatasmall <- Scandatasmall[(!Scandatasmall$Sequence == 0), ]
Scandatasmall <- Scandatasmall[ , c(1, 3)]
## Sorteren scandata
Scandatasmall <- Scandatasmall[order(Scandatasmall$Transactionnumber, Scandatasmall$Product), ]
head(Scandatasmall)
## Transactionnumber Product
## 46 28277 CAFE NOIR
## 26 28277 HALFVOLLE MELK
## 55 28277 MINI KOKOSROTSJES
## 53 28277 RB LUXE KRAKELINGEN
## 65 28277 TOILETPAPIER 4 LAAGS
## 41 28277 ZACHT ZOUT
Eerste stap in het artikel is het herstructureren van de data zodat de analyse plaats kan vinden.
Scandatasmall <- split(Scandatasmall$Product , Scandatasmall$Transactionnumber)
head(Scandatasmall)
## $`28277`
## [1] CAFE NOIR HALFVOLLE MELK MINI KOKOSROTSJES
## [4] RB LUXE KRAKELINGEN TOILETPAPIER 4 LAAGS ZACHT ZOUT
## 9974 Levels: FILET AM.MAGER FILET AM.NAT GEMAKSBAK ... ZWITSERSE RACLETTE
##
## $`28278`
## [1] CHIPS NATUREL CHIPS NATUREL DIGESTIVE BISCUIT
## [4] DIGESTIVE BISCUIT DIGESTIVE BISCUIT DRINK FRAMBOOS
## [7] DRINK FRAMBOOS DRINK FRAMBOOS HALFVOLLE MELK
## [10] HALFVOLLE MELK HALFVOLLE MELK KOKOS BISCUITS
## [13] KOKOS BISCUITS PW ENGL.MEL.40X2G SINAS REGULAR
## [16] SINAS REGULAR THEEZ.PG.GROEN THEEZ.PG.GROEN
## [19] THEEZ.PG.GROEN TUC BACON TUC BACON
## [22] TUC BACON WILHELMINA PEPERMUNT WILHELMINA PEPERMUNT
## [25] WILHELMINA PEPERMUNT WILHELMINA PEPERMUNT
## 9974 Levels: FILET AM.MAGER FILET AM.NAT GEMAKSBAK ... ZWITSERSE RACLETTE
##
## $`28279`
## [1] KANEELBISCUIT KANEELBISCUIT KANEELBISCUIT
## [4] KANEELBISCUIT KANEELBISCUIT RYSTWAFELS ZEEZOUT
## [7] RYSTWAFELS ZEEZOUT RYSTWAFELS ZEEZOUT RYSTWAFELS ZEEZOUT
## [10] RYSTWAFELS ZEEZOUT RYSTWAFELS ZEEZOUT SIROOP AARDBEI
## [13] SIROOP AARDBEI SIROOP AARDBEI SIROOP AARDBEI
## [16] SIROOP AARDBEI SIROOP AARDBEI SIROOP AARDBEI
## [19] SIROOP AARDBEI SIROOP FRAMBOOS SIROOP FRAMBOOS
## [22] SIROOP FRAMBOOS SIROOP FRAMBOOS SIROOP FRAMBOOS
## [25] SIROOP FRAMBOOS SIROOP FRAMBOOS SIROOP FRAMBOOS
## [28] SIROOP FRAMBOOS SIROOP FRAMBOOS WINEGUMS
## [31] WINEGUMS
## 9974 Levels: FILET AM.MAGER FILET AM.NAT GEMAKSBAK ... ZWITSERSE RACLETTE
##
## $`28280`
## [1] CH SENS MEX PEPP&CRM HALFVOLLE MELK HALFVOLLE MELK
## [4] HALFVOLLE MELK HALFVOLLE MELK HALFVOLLE MELK
## [7] JUPILER BIER KARNEMELK KARNEMELK
## [10] KARNEMELK PINDA PARTYPACK SUPERCHIPS PAPRIKA
## 9974 Levels: FILET AM.MAGER FILET AM.NAT GEMAKSBAK ... ZWITSERSE RACLETTE
##
## $`28281`
## [1] AMB SLAGERSACHTERHAM DBK KIPFILET 3 O/S HALFV.VANILLEYOGHURT
## [4] HALFVOLLE MELK HAVERMOUTPAP HAVERMOUTPAP
## [7] HOPJES VLA KERSEN ZONDER PIT KRISTALSUIKER
## [10] Unknown
## 9974 Levels: FILET AM.MAGER FILET AM.NAT GEMAKSBAK ... ZWITSERSE RACLETTE
##
## $`28282`
## [1] BOEREN GEHAKTROL5ST* C-A-S TOMATENCREME CAFE NOIR
## [4] CHEESEBURGER CHINESE MIE CHINESE MIE
## [7] DR.YOGH.AARDBEI DR.YOGH.FRAMBOOS DRINK AARDBEI KERS
## [10] DRINK MANGO PASSIEVR GEH.BROC./KAAS GEHAKT.SCHNIT.KROK.*
## [13] HUTSPOT KROEPOEK NATUREL KWARK CITROEN
## [16] KWARK SP SINAASAPPEL MIX MIHOEN SPECIAAL MIX NATUURLIJK SPAGH
## [19] SINAASAPPEL PERS SUIKER SNEEUWWAFELS TINT FR BOSVRUCHTEN
## [22] TINT FR LGHT AP PERZ TINT FR LGHT DR CITR Unknown
## [25] VRD ROND TARWE VRD ROND WIT WITTE BONEN IN TOMAT
## [28] WITTE BONEN IN TOMAT
## 9974 Levels: FILET AM.MAGER FILET AM.NAT GEMAKSBAK ... ZWITSERSE RACLETTE
Scandatasmall <- as(Scandatasmall, "transactions")
## Warning in asMethod(object): removing duplicated items in transactions
basket_rules <- apriori(Scandatasmall, parameter = list(sup = 0.005, conf = 0.01, target="rules"))
## Apriori
##
## Parameter specification:
## confidence minval smax arem aval originalSupport maxtime support minlen
## 0.01 0.1 1 none FALSE TRUE 5 0.005 1
## maxlen target ext
## 10 rules FALSE
##
## Algorithmic control:
## filter tree heap memopt load sort verbose
## 0.1 TRUE TRUE FALSE TRUE 2 TRUE
##
## Absolute minimum support count: 85
##
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[9972 item(s), 17149 transaction(s)] done [0.06s].
## sorting and recoding items ... [588 item(s)] done [0.00s].
## creating transaction tree ... done [0.02s].
## checking subsets of size 1 2 3 4 5 done [0.02s].
## writing ... [2923 rule(s)] done [0.00s].
## creating S4 object ... done [0.00s].
itemFrequencyPlot(Scandatasmall, topN = 25)
basket_rules_broad <- apriori(Scandatasmall, parameter = list(sup = 0.001, conf = 0.001, target="rules"))
## Apriori
##
## Parameter specification:
## confidence minval smax arem aval originalSupport maxtime support minlen
## 0.001 0.1 1 none FALSE TRUE 5 0.001 1
## maxlen target ext
## 10 rules FALSE
##
## Algorithmic control:
## filter tree heap memopt load sort verbose
## 0.1 TRUE TRUE FALSE TRUE 2 TRUE
##
## Absolute minimum support count: 17
##
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[9972 item(s), 17149 transaction(s)] done [0.07s].
## sorting and recoding items ... [3154 item(s)] done [0.01s].
## creating transaction tree ... done [0.02s].
## checking subsets of size 1 2 3 4 5 6 done [0.65s].
## writing ... [60126 rule(s)] done [0.02s].
## creating S4 object ... done [0.03s].
plot(basket_rules_broad)