library(TraMineR)
##
## TraMineR stable version 2.0-11.1 (Built: 2019-04-24)
## Website: http://traminer.unige.ch
## Please type 'citation("TraMineR")' for citation information.
library(cluster)
cafeData <- read.csv(file = "cafeData.csv", header = TRUE)
#Print first 3 obsercations
head(cafeData,3)
## ï..Name Reading.1 Reading.2 Reading.3 Reading.4 Reading.5 Reading.6
## 1 Tim 5 5 5 5 5 5
## 2 Tim 3 3 3 3 3 3
## 3 Tim 1 1 1 1 1 1
## Reading.7 Reading.8 Reading.9 Reading.10 Reading.11 Reading.12
## 1 5 5 5 5 5 5
## 2 3 3 3 3 3 3
## 3 1 1 1 1 1 1
## Reading.13 Reading.14 Reading.15 Reading.16 Reading.17 Reading.18
## 1 5 5 7 7 7 7
## 2 3 3 3 3 3 3
## 3 1 1 1 1 1 1
## Reading.19 Reading.20 Reading.21 Reading.22 Reading.23 Reading.24
## 1 7 7 7 7 7 7
## 2 3 3 3 3 3 3
## 3 1 1 1 5 5 5
## Reading.25 Reading.26 Reading.27 Reading.28 Reading.29 Reading.30
## 1 7 7 7 5 5 5
## 2 3 3 3 3 3 3
## 3 5 5 5 5 5 5
#Dimension of data set (Im using 10 observation from 2 customers in this example)
dim(cafeData)
## [1] 147 31
#Create labels for the sequence object
cafeData.seq.labels <- c("BookShelf", "PS4", "Counter", "Side1", "Side2", "Business", "Corner")
#create a sequence object, the sequence appears from 2nd-31st column as 1st column is name of customer
cafeData.seq <- seqdef(cafeData, var = 2:31, labels = cafeData.seq.labels)
## [>] 7 distinct states appear in the data:
## 1 = 1
## 2 = 2
## 3 = 3
## 4 = 4
## 5 = 5
## 6 = 6
## 7 = 7
## [>] state coding:
## [alphabet] [label] [long label]
## 1 1 1 BookShelf
## 2 2 2 PS4
## 3 3 3 Counter
## 4 4 4 Side1
## 5 5 5 Side2
## 6 6 6 Business
## 7 7 7 Corner
## [>] 147 sequences in the data set
## [>] min/max sequence length: 30/30
These are the common coffee shop behaviours 1) Takeaway 2) Sit at shop corner 3) Use the bookshelf 4) Gaming Section 5) Business Seats 6) Window Seats
#the distance matrix is generated using the transition rates in the data
coffeecc <- seqsubm(cafeData.seq, method = "TRATE")
## [>] creating substitution-cost matrix using transition rates ...
## [>] computing transition probabilities for states 1/2/3/4/5/6/7 ...
coffee.OM <- seqdist(cafeData.seq, method = "OM", sm = coffeecc)
## [>] 147 sequences with 7 distinct states
## [>] checking 'sm' (one value for each state, triangle inequality)
## [>] 115 distinct sequences
## [>] min/max sequence length: 30/30
## [>] computing distances using the OM metric
## [>] elapsed time: 0.06 secs
#agnes() function provided by the cluster library is called to create clusters from the previously computed optimal matching distance matrix
clusterward <- agnes(coffee.OM, diss = TRUE, method = "ward")
#Plot dendogram
plot(clusterward, which.plots = 2, main = "Dendogram")
#The cluster6 object is a vector containing the cluster id number for each sequence
cluster6 <- cutree(clusterward, k = 6)
cluster6 <- factor(cluster6, labels = c("Type 1", "Type 2", "Type 3","Type 4","Type 5","Type 6"))
table(cluster6)
## cluster6
## Type 1 Type 2 Type 3 Type 4 Type 5 Type 6
## 38 23 18 22 11 35
#We can clearly see that Cluster 1 had the most sequence, and 5 the least. But lets try to visualize it
seqfplot(cafeData.seq, group = cluster6, pbarw = T)
#From the we can confirm that Cluster 1 was the bookshelf cluster, 2 was the Takeaway customers, 3 the Window side customers, 4 the customers that sits in corner, 5 the Gaming customers and 5 the Business seat customers.
clusplot(coffee.OM, cluster6, diss = TRUE)