Read Data , Create State Sequence Object

library(TraMineR)
## 
## TraMineR stable version 2.0-11.1 (Built: 2019-04-24)
## Website: http://traminer.unige.ch
## Please type 'citation("TraMineR")' for citation information.
library(cluster)
cafeData <- read.csv(file = "cafeData.csv", header = TRUE)
#Print first 3 obsercations
head(cafeData,3)
##   ï..Name Reading.1 Reading.2 Reading.3 Reading.4 Reading.5 Reading.6
## 1     Tim         5         5         5         5         5         5
## 2     Tim         3         3         3         3         3         3
## 3     Tim         1         1         1         1         1         1
##   Reading.7 Reading.8 Reading.9 Reading.10 Reading.11 Reading.12
## 1         5         5         5          5          5          5
## 2         3         3         3          3          3          3
## 3         1         1         1          1          1          1
##   Reading.13 Reading.14 Reading.15 Reading.16 Reading.17 Reading.18
## 1          5          5          7          7          7          7
## 2          3          3          3          3          3          3
## 3          1          1          1          1          1          1
##   Reading.19 Reading.20 Reading.21 Reading.22 Reading.23 Reading.24
## 1          7          7          7          7          7          7
## 2          3          3          3          3          3          3
## 3          1          1          1          5          5          5
##   Reading.25 Reading.26 Reading.27 Reading.28 Reading.29 Reading.30
## 1          7          7          7          5          5          5
## 2          3          3          3          3          3          3
## 3          5          5          5          5          5          5
#Dimension of data set (Im using 10 observation from 2 customers in this example)
dim(cafeData)
## [1] 147  31
#Create labels for the sequence object
cafeData.seq.labels <- c("BookShelf", "PS4", "Counter", "Side1", "Side2", "Business", "Corner")
#create a sequence object, the sequence appears from 2nd-31st column as 1st column is name of customer
cafeData.seq <- seqdef(cafeData, var = 2:31, labels = cafeData.seq.labels)
##  [>] 7 distinct states appear in the data:
##      1 = 1
##      2 = 2
##      3 = 3
##      4 = 4
##      5 = 5
##      6 = 6
##      7 = 7
##  [>] state coding:
##        [alphabet]  [label]  [long label]
##      1  1           1        BookShelf
##      2  2           2        PS4
##      3  3           3        Counter
##      4  4           4        Side1
##      5  5           5        Side2
##      6  6           6        Business
##      7  7           7        Corner
##  [>] 147 sequences in the data set
##  [>] min/max sequence length: 30/30

Cluster

These are the common coffee shop behaviours 1) Takeaway 2) Sit at shop corner 3) Use the bookshelf 4) Gaming Section 5) Business Seats 6) Window Seats

#the distance matrix is generated using the transition rates in the data
coffeecc <- seqsubm(cafeData.seq, method = "TRATE")
##  [>] creating substitution-cost matrix using transition rates ...
##  [>] computing transition probabilities for states 1/2/3/4/5/6/7 ...
coffee.OM <- seqdist(cafeData.seq, method = "OM", sm = coffeecc)
##  [>] 147 sequences with 7 distinct states
##  [>] checking 'sm' (one value for each state, triangle inequality)
##  [>] 115 distinct sequences
##  [>] min/max sequence length: 30/30
##  [>] computing distances using the OM metric
##  [>] elapsed time: 0.06 secs
#agnes() function provided by the cluster library is called to create clusters from the previously computed optimal matching distance matrix
clusterward <- agnes(coffee.OM, diss = TRUE, method = "ward")
#Plot dendogram
plot(clusterward, which.plots = 2, main = "Dendogram")

#The cluster6 object is a vector containing the cluster id number for each sequence
cluster6 <- cutree(clusterward, k = 6)
cluster6 <- factor(cluster6, labels = c("Type 1", "Type 2", "Type 3","Type 4","Type 5","Type 6"))
table(cluster6)
## cluster6
## Type 1 Type 2 Type 3 Type 4 Type 5 Type 6 
##     38     23     18     22     11     35
#We can clearly see that Cluster 1 had the most sequence, and 5 the least. But lets try to visualize it
seqfplot(cafeData.seq, group = cluster6, pbarw = T)

#From the we can confirm that Cluster 1 was the bookshelf cluster, 2 was the Takeaway customers, 3 the Window side customers, 4 the customers that sits in corner, 5 the Gaming customers and 5 the Business seat customers.
clusplot(coffee.OM, cluster6, diss = TRUE)