Attributes:
load('casino.RData')
head(casino)
## Player Slots BJ Craps Bac Bingo Poker
## 1 Player 1 1013.36857 6190.07773 4276.39493 867.964528 0.0000 0.00000
## 2 Player 2 68.15392 22.93871 23.20411 11.706183 0.0000 27.71084
## 3 Player 3 147.59424 0.00000 0.00000 0.000000 0.0000 0.00000
## 4 Player 4 63.07566 17.37241 28.21168 9.341737 0.0000 22.68499
## 5 Player 5 92.16018 44.20142 17.94111 9.887295 0.0000 25.55013
## 6 Player 6 658.04330 0.00000 0.00000 0.000000 105.5141 0.00000
## Other Total.Spend
## 1 0.00000 12347.8058
## 2 53.40262 207.1164
## 3 0.00000 147.5942
## 4 52.15407 192.8405
## 5 60.48731 250.2275
## 6 0.00000 763.5574
casino.stand=scale(casino[-1]) # To standarize the variables
set.seed(7)
k.means.fit=kmeans(casino.stand, 6,nstart = 1000) # k = 3
set.seed(1)
autoplot(k.means.fit, label = T, data = casino.stand, frame = TRUE, frame.type = "norm")+theme_minimal()
## Warning: Computation failed in `stat_ellipse()`:
## the leading minor of order 2 is not positive definite
autoplot(k.means.fit, label = T, x = 2, y = 3 , data = casino.stand, frame = TRUE, frame.type = "norm")+theme_minimal()
## Warning: Computation failed in `stat_ellipse()`:
## the leading minor of order 2 is not positive definite
autoplot(k.means.fit, label = T, x = 1, y = 3 , data = casino.stand, frame = TRUE, frame.type = "norm")+theme_minimal()
## Warning: Computation failed in `stat_ellipse()`:
## the leading minor of order 2 is not positive definite
### 4.1. an alternative to plot out clusters
library(cluster)
clusplot(casino.stand, k.means.fit$cluster, main='2D representation of the Cluster solution',
color=TRUE, shade=TRUE,
labels=2, lines=0)
#knitr::kable(k.means.fit$centers,digits = 0)
cluster=k.means.fit$cluster
#class(y)
casino.final1=cbind(casino,cluster)
casino.gather1=casino.final1 %>% gather(game, amount, Slots:Total.Spend)
ggplot(casino.gather1, aes(game, amount))+
geom_boxplot(varwidth=T, fill="plum") +
labs(title="Box plot of amount spent on each game by cluster",
subtitle="cluster generated by kmeans",
x="Games",
y="Amount")+
facet_wrap(~as.factor(cluster),scales='free')
sum1= casino.final1 %>% group_by(cluster) %>% summarise(n=n(),Slots=mean(Slots,na.rm=T),BJ=mean(BJ,na.rm=T),Craps=mean(Craps,na.rm=T),Bac=mean(Bac,na.rm=T),Bingo=mean(Bingo,na.rm=T),Poker=mean(Poker,na.rm=T),Other=mean(Other,na.rm=T),Total.Spend=mean(Total.Spend,na.rm=T))
sum1=round(sum1,0)
sum1$percent=round(sum1$n/(dim(casino)[1])*100,2)
DT::datatable(sum1)
reference: https://rstudio-pubs-static.s3.amazonaws.com/33876_1d7794d9a86647ca90c4f182df93f0e8.html
wssplot <- function(data, nc=15, seed=1234){
wss <- (nrow(data)-1)*sum(apply(data,2,var))
for (i in 2:nc){
set.seed(seed)
wss[i] <- sum(kmeans(data, centers=i)$withinss)}
plot(1:nc, wss, type="b", xlab="Number of Clusters",
ylab="Within groups sum of squares")}
wssplot(casino.stand, nc=6)