Inlezen van het bestand

setwd("~/R/Scanningdata")
getwd()
## [1] "C:/Users/menno_000/Documents/R/Scanningdata"
scandata <- read.csv("~/R/Scanningdata/Mei2017Demologcsv.csv", sep = ";")
dim(scandata)
## [1] 388747     18

Market basket analysis

https://discourse.snowplowanalytics.com/t/market-basket-analysis-identifying-products-and-content-that-go-well-together/1132#metrics

## Maken van een dataframe met transacties en producten
Scandatasmall <- scandata[ , c("Transactionnumber", "Sequence" , "Product")]
Scandatasmall <- Scandatasmall[order(Scandatasmall$Transactionnumber, Scandatasmall$Product), ]

## Verwijderen van header en footer
Scandatasmall <- Scandatasmall[(!Scandatasmall$Sequence == 0), ]

Scandatasmall <- Scandatasmall[ , c(1, 3)]

## Sorteren scandata
Scandatasmall <- Scandatasmall[order(Scandatasmall$Transactionnumber, Scandatasmall$Product), ]

head(Scandatasmall)
##    Transactionnumber              Product
## 46             28277            CAFE NOIR
## 26             28277       HALFVOLLE MELK
## 55             28277    MINI KOKOSROTSJES
## 53             28277  RB LUXE KRAKELINGEN
## 65             28277 TOILETPAPIER 4 LAAGS
## 41             28277           ZACHT ZOUT

Herstructureren data

Eerste stap in het artikel is het herstructureren van de data zodat de analyse plaats kan vinden.

Scandatasmall <- split(Scandatasmall$Product , Scandatasmall$Transactionnumber)
head(Scandatasmall)
## $`28277`
## [1] CAFE NOIR            HALFVOLLE MELK       MINI KOKOSROTSJES   
## [4] RB LUXE KRAKELINGEN  TOILETPAPIER 4 LAAGS ZACHT ZOUT          
## 9974 Levels:  FILET AM.MAGER  FILET AM.NAT  GEMAKSBAK ... ZWITSERSE RACLETTE
## 
## $`28278`
##  [1] CHIPS NATUREL        CHIPS NATUREL        DIGESTIVE BISCUIT   
##  [4] DIGESTIVE BISCUIT    DIGESTIVE BISCUIT    DRINK FRAMBOOS      
##  [7] DRINK FRAMBOOS       DRINK FRAMBOOS       HALFVOLLE MELK      
## [10] HALFVOLLE MELK       HALFVOLLE MELK       KOKOS BISCUITS      
## [13] KOKOS BISCUITS       PW ENGL.MEL.40X2G    SINAS REGULAR       
## [16] SINAS REGULAR        THEEZ.PG.GROEN       THEEZ.PG.GROEN      
## [19] THEEZ.PG.GROEN       TUC BACON            TUC BACON           
## [22] TUC BACON            WILHELMINA PEPERMUNT WILHELMINA PEPERMUNT
## [25] WILHELMINA PEPERMUNT WILHELMINA PEPERMUNT
## 9974 Levels:  FILET AM.MAGER  FILET AM.NAT  GEMAKSBAK ... ZWITSERSE RACLETTE
## 
## $`28279`
##  [1] KANEELBISCUIT      KANEELBISCUIT      KANEELBISCUIT     
##  [4] KANEELBISCUIT      KANEELBISCUIT      RYSTWAFELS ZEEZOUT
##  [7] RYSTWAFELS ZEEZOUT RYSTWAFELS ZEEZOUT RYSTWAFELS ZEEZOUT
## [10] RYSTWAFELS ZEEZOUT RYSTWAFELS ZEEZOUT SIROOP AARDBEI    
## [13] SIROOP AARDBEI     SIROOP AARDBEI     SIROOP AARDBEI    
## [16] SIROOP AARDBEI     SIROOP AARDBEI     SIROOP AARDBEI    
## [19] SIROOP AARDBEI     SIROOP FRAMBOOS    SIROOP FRAMBOOS   
## [22] SIROOP FRAMBOOS    SIROOP FRAMBOOS    SIROOP FRAMBOOS   
## [25] SIROOP FRAMBOOS    SIROOP FRAMBOOS    SIROOP FRAMBOOS   
## [28] SIROOP FRAMBOOS    SIROOP FRAMBOOS    WINEGUMS          
## [31] WINEGUMS          
## 9974 Levels:  FILET AM.MAGER  FILET AM.NAT  GEMAKSBAK ... ZWITSERSE RACLETTE
## 
## $`28280`
##  [1] CH SENS MEX PEPP&CRM HALFVOLLE MELK       HALFVOLLE MELK      
##  [4] HALFVOLLE MELK       HALFVOLLE MELK       HALFVOLLE MELK      
##  [7] JUPILER BIER         KARNEMELK            KARNEMELK           
## [10] KARNEMELK            PINDA PARTYPACK      SUPERCHIPS PAPRIKA  
## 9974 Levels:  FILET AM.MAGER  FILET AM.NAT  GEMAKSBAK ... ZWITSERSE RACLETTE
## 
## $`28281`
##  [1] AMB SLAGERSACHTERHAM DBK KIPFILET 3 O/S   HALFV.VANILLEYOGHURT
##  [4] HALFVOLLE MELK       HAVERMOUTPAP         HAVERMOUTPAP        
##  [7] HOPJES VLA           KERSEN ZONDER PIT    KRISTALSUIKER       
## [10] Unknown             
## 9974 Levels:  FILET AM.MAGER  FILET AM.NAT  GEMAKSBAK ... ZWITSERSE RACLETTE
## 
## $`28282`
##  [1] BOEREN GEHAKTROL5ST* C-A-S TOMATENCREME   CAFE NOIR           
##  [4] CHEESEBURGER         CHINESE MIE          CHINESE MIE         
##  [7] DR.YOGH.AARDBEI      DR.YOGH.FRAMBOOS     DRINK AARDBEI KERS  
## [10] DRINK MANGO PASSIEVR GEH.BROC./KAAS       GEHAKT.SCHNIT.KROK.*
## [13] HUTSPOT              KROEPOEK NATUREL     KWARK CITROEN       
## [16] KWARK SP SINAASAPPEL MIX MIHOEN SPECIAAL  MIX NATUURLIJK SPAGH
## [19] SINAASAPPEL PERS     SUIKER SNEEUWWAFELS  TINT FR BOSVRUCHTEN 
## [22] TINT FR LGHT AP PERZ TINT FR LGHT DR CITR Unknown             
## [25] VRD ROND TARWE       VRD ROND WIT         WITTE BONEN IN TOMAT
## [28] WITTE BONEN IN TOMAT
## 9974 Levels:  FILET AM.MAGER  FILET AM.NAT  GEMAKSBAK ... ZWITSERSE RACLETTE

Runnen van algorritme

Scandatasmall <- as(Scandatasmall, "transactions")
## Warning in asMethod(object): removing duplicated items in transactions
basket_rules <- apriori(Scandatasmall, parameter = list(sup = 0.005, conf = 0.01, target="rules"))
## Apriori
## 
## Parameter specification:
##  confidence minval smax arem  aval originalSupport maxtime support minlen
##        0.01    0.1    1 none FALSE            TRUE       5   0.005      1
##  maxlen target   ext
##      10  rules FALSE
## 
## Algorithmic control:
##  filter tree heap memopt load sort verbose
##     0.1 TRUE TRUE  FALSE TRUE    2    TRUE
## 
## Absolute minimum support count: 85 
## 
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[9972 item(s), 17149 transaction(s)] done [0.06s].
## sorting and recoding items ... [588 item(s)] done [0.00s].
## creating transaction tree ... done [0.02s].
## checking subsets of size 1 2 3 4 5 done [0.02s].
## writing ... [2923 rule(s)] done [0.00s].
## creating S4 object  ... done [0.00s].

Plot top producten

itemFrequencyPlot(Scandatasmall, topN = 25)

Visualiseren van de gedefinieerde regels

basket_rules_broad <- apriori(Scandatasmall, parameter = list(sup = 0.001, conf = 0.001, target="rules"))
## Apriori
## 
## Parameter specification:
##  confidence minval smax arem  aval originalSupport maxtime support minlen
##       0.001    0.1    1 none FALSE            TRUE       5   0.001      1
##  maxlen target   ext
##      10  rules FALSE
## 
## Algorithmic control:
##  filter tree heap memopt load sort verbose
##     0.1 TRUE TRUE  FALSE TRUE    2    TRUE
## 
## Absolute minimum support count: 17 
## 
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[9972 item(s), 17149 transaction(s)] done [0.07s].
## sorting and recoding items ... [3154 item(s)] done [0.01s].
## creating transaction tree ... done [0.02s].
## checking subsets of size 1 2 3 4 5 6 done [0.65s].
## writing ... [60126 rule(s)] done [0.02s].
## creating S4 object  ... done [0.03s].
plot(basket_rules_broad)