1. Casino data attributes

Attributes:

2. Load and standardize the data

load('casino.RData')
head(casino)
##     Player      Slots         BJ      Craps        Bac    Bingo    Poker
## 1 Player 1 1013.36857 6190.07773 4276.39493 867.964528   0.0000  0.00000
## 2 Player 2   68.15392   22.93871   23.20411  11.706183   0.0000 27.71084
## 3 Player 3  147.59424    0.00000    0.00000   0.000000   0.0000  0.00000
## 4 Player 4   63.07566   17.37241   28.21168   9.341737   0.0000 22.68499
## 5 Player 5   92.16018   44.20142   17.94111   9.887295   0.0000 25.55013
## 6 Player 6  658.04330    0.00000    0.00000   0.000000 105.5141  0.00000
##      Other Total.Spend
## 1  0.00000  12347.8058
## 2 53.40262    207.1164
## 3  0.00000    147.5942
## 4 52.15407    192.8405
## 5 60.48731    250.2275
## 6  0.00000    763.5574
casino.stand=scale(casino[-1])  # To standarize the variables

3. Create clusters - K-means

set.seed(7)
k.means.fit=kmeans(casino.stand, 6,nstart = 1000) # k = 3

4. Plot out clusters

set.seed(1)
autoplot(k.means.fit, label = T, data = casino.stand, frame = TRUE, frame.type = "norm")+theme_minimal()
## Warning: Computation failed in `stat_ellipse()`:
## the leading minor of order 2 is not positive definite

autoplot(k.means.fit, label = T, x = 2, y = 3 , data = casino.stand, frame = TRUE, frame.type = "norm")+theme_minimal()
## Warning: Computation failed in `stat_ellipse()`:
## the leading minor of order 2 is not positive definite

autoplot(k.means.fit, label = T, x = 1, y = 3 , data = casino.stand, frame = TRUE, frame.type = "norm")+theme_minimal()
## Warning: Computation failed in `stat_ellipse()`:
## the leading minor of order 2 is not positive definite

### 4.1. an alternative to plot out clusters

library(cluster)
clusplot(casino.stand, k.means.fit$cluster, main='2D representation of the Cluster solution',
         color=TRUE, shade=TRUE,
         labels=2, lines=0)

5. Append cluster data back to original dataframe

#knitr::kable(k.means.fit$centers,digits = 0)
cluster=k.means.fit$cluster
#class(y)
casino.final1=cbind(casino,cluster)

6. Descriptive statistics of clusters

casino.gather1=casino.final1 %>% gather(game, amount, Slots:Total.Spend)


ggplot(casino.gather1, aes(game, amount))+
  geom_boxplot(varwidth=T, fill="plum") + 
  labs(title="Box plot of amount spent on each game by cluster", 
         subtitle="cluster generated by kmeans",
         x="Games",
         y="Amount")+
  facet_wrap(~as.factor(cluster),scales='free')

sum1= casino.final1 %>% group_by(cluster) %>% summarise(n=n(),Slots=mean(Slots,na.rm=T),BJ=mean(BJ,na.rm=T),Craps=mean(Craps,na.rm=T),Bac=mean(Bac,na.rm=T),Bingo=mean(Bingo,na.rm=T),Poker=mean(Poker,na.rm=T),Other=mean(Other,na.rm=T),Total.Spend=mean(Total.Spend,na.rm=T))


sum1=round(sum1,0)
sum1$percent=round(sum1$n/(dim(casino)[1])*100,2)


DT::datatable(sum1)

Appendix: elbow criterion - determine how many clusters

reference: https://rstudio-pubs-static.s3.amazonaws.com/33876_1d7794d9a86647ca90c4f182df93f0e8.html

wssplot <- function(data, nc=15, seed=1234){
  wss <- (nrow(data)-1)*sum(apply(data,2,var))
  for (i in 2:nc){
    set.seed(seed)
    wss[i] <- sum(kmeans(data, centers=i)$withinss)}
  plot(1:nc, wss, type="b", xlab="Number of Clusters",
       ylab="Within groups sum of squares")}

wssplot(casino.stand, nc=6)