Data

Data used for station casino operational analysis

# read data
library(readxl)
casino_data = read_excel("C:/Users/18602/Desktop/M2 - Operations Analytics/Casino Final_Dec 2013 Raw Data_for test.xlsx",sheet = "Sheet1")
# load data in to a dataframe
casino_df = data.frame(casino_data)
# format column names
names(casino_df) = c("Player", "Slots", "BJ", "Craps", "Bac","Bingo","Poker","Other","TotalSpend")
# view data frame structure
library(rmarkdown)
paged_table(casino_df)

Game & Total Bet Analysis

Total dollar amount played on each game

library(ggplot2)
library(dplyr)
library(tidyr)

# create barplot using ggplot
casino_df %>%
  pivot_longer(cols = c(-Player, Slots, BJ, Craps, Bac,Bingo,Poker,Other,-TotalSpend), names_to = "Games", values_to = "Amount") %>%
  ggplot(aes(x= Games, y= Amount, label= Amount, order = Games)) +
  geom_col(color = "dark blue") +
  stat_summary(fun.y = sum, aes(label = format(..y.., big.mark = ",", nsmall = 2), group = Games), geom = "text", vjust = -.3)+
  theme_classic()

People & Wager Analysis

Histogram analysis of how many people played the game (the vertical axis) and how many dollars were wagered (the horizontal axis)

# load library
library(cowplot)

# create individual histograms for gaming machines
plot1 = ggplot(casino_df, aes(x=casino_df$Slots))+
  geom_histogram(color="darkblue", fill="lightblue", bins = 10) +
  xlab("Slots Amount") + ylab("Count")

plot2 = ggplot(casino_df, aes(x=casino_df$BJ))+
  geom_histogram(color="darkblue", fill="lightblue", bins = 5) +
  xlab("BJ Amount") + ylab("Count")

plot3 = ggplot(casino_df, aes(x=casino_df$Craps))+
  geom_histogram(color="darkblue", fill="lightblue", bins = 10) +
  xlab("Craps Amount") + ylab("Count")

plot4 = ggplot(casino_df, aes(x=casino_df$Bac))+
  geom_histogram(color="darkblue", fill="lightblue", bins = 10) +
  xlab("Bac Amount") + ylab("Count")

plot5 = ggplot(casino_df, aes(x=casino_df$Bingo))+
  geom_histogram(color="darkblue", fill="lightblue", bins = 10) +
  xlab("Bingo Amount") + ylab("Count")

plot6 = ggplot(casino_df, aes(x=casino_df$Poker))+
  geom_histogram(color="darkblue", fill="lightblue", bins = 10) +
  xlab("Poker Amount") + ylab("Count")

plot7 = ggplot(casino_df, aes(x=casino_df$Other))+
  geom_histogram(color="darkblue", fill="lightblue", bins = 10) +
  xlab("Other Amount") + ylab("Count")

# create a histogram grid using plot_grid()
# layout I
plot_grid(plot1,plot2,plot3,plot4,plot5,plot6,plot7, label_size = 9)

# layout II (more cleaner view)
plot_grid(plot1,plot2, label_size = 9)

plot_grid(plot3,plot4,label_size = 9)

plot_grid(plot5,plot6,label_size = 9)

plot_grid(plot7,label_size = 9)

K-means Cluster Analysis

Summary of interest variable in station casino data,

library(stats)

# create a interest only dataframe for z scale standardization
interest_casino_df = casino_df[2:8]
summary(interest_casino_df)
##      Slots               BJ              Craps              Bac          
##  Min.   :   0.00   Min.   :   0.00   Min.   :   0.00   Min.   :   0.000  
##  1st Qu.:  62.32   1st Qu.:   0.00   1st Qu.:   0.00   1st Qu.:   0.000  
##  Median : 103.52   Median :  24.31   Median :  15.94   Median :   7.471  
##  Mean   : 291.78   Mean   : 283.29   Mean   : 267.63   Mean   :  82.070  
##  3rd Qu.: 507.01   3rd Qu.: 189.58   3rd Qu.: 117.11   3rd Qu.:  35.273  
##  Max.   :1861.47   Max.   :7294.28   Max.   :7250.61   Max.   :2253.590  
##      Bingo            Poker            Other        
##  Min.   :  0.00   Min.   :  0.00   Min.   :   0.00  
##  1st Qu.:  0.00   1st Qu.:  0.00   1st Qu.:   0.00  
##  Median :  0.00   Median : 10.77   Median :  34.51  
##  Mean   : 10.09   Mean   : 54.59   Mean   : 132.97  
##  3rd Qu.:  0.00   3rd Qu.: 26.57   3rd Qu.:  73.85  
##  Max.   :212.71   Max.   :913.92   Max.   :1025.22
interest_cas_df_z_std = as.data.frame(lapply(interest_casino_df,scale))

K-means Clustering Algorithm

Case1: K = 3

Compare results before and after standardization

summary(interest_casino_df$Slots)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    0.00   62.32  103.52  291.78  507.01 1861.47
summary(interest_cas_df_z_std$Slots)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## -0.8927 -0.7020 -0.5759  0.0000  0.6585  4.8023
# set K value as 3
# set seed so that the algorithm can re ran to compare results again in future
RNGversion("3.5.2")
set.seed(245)
player_clusters = kmeans(interest_cas_df_z_std,3)

View the Cluster Centers

player_clusters$centers
##        Slots          BJ       Craps         Bac      Bingo      Poker
## 1 -0.4337103 -0.27827563 -0.26130566 -0.28330222  0.1125658 -0.4289548
## 2  0.9847084 -0.01341055 -0.09268778  0.02247473 -0.3157667  1.6334589
## 3  2.1434518  3.95368238  4.03265217  3.88072633 -0.3157667 -0.5157333
##        Other
## 1 -0.3933258
## 2  1.5324224
## 3 -0.6113191
# add cluster segmentation to the original dataframe
casino_df$cluster = player_clusters$cluster
cluster3result = casino_df[1:3,c("cluster","TotalSpend")]

Results: when K is 3

Mean spend of players by clusters

aggregate(data=casino_df, TotalSpend~cluster, mean)
##   cluster TotalSpend
## 1       1   289.0234
## 2       2  1847.3543
## 3       3  9905.4941

Case 2: K = 4

Compare results before and after standardization

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    0.00   62.32  103.52  291.78  507.01 1861.47
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## -0.8927 -0.7020 -0.5759  0.0000  0.6585  4.8023

View the Cluster Centers

player_clusters$centers
##        Slots           BJ       Craps         Bac      Bingo      Poker
## 1 -0.8200530 -0.290445266 -0.27088405 -0.30341982 -0.3157667 -0.4557710
## 2  1.0291283 -0.008775564 -0.08968516  0.02818951 -0.3157667  1.6923633
## 3  2.1434518  3.953682376  4.03265217  3.88072633 -0.3157667 -0.5157333
## 4 -0.4084348 -0.276405342 -0.26001054 -0.28085284  0.1366211 -0.4254913
##        Other
## 1  1.7713195
## 2  1.4936153
## 3 -0.6113191
## 4 -0.5057530
# add cluster segmentation to the original dataframe
casino_df$cluster = player_clusters$cluster
cluster4result = casino_df[1:4,c("cluster","TotalSpend")]

Results: when K is 4

Mean spend of players by clusters

##   cluster TotalSpend
## 1       1   592.1981
## 2       2  1868.0519
## 3       3  9905.4941
## 4       4   277.4624

Case 3: K = 5

Compare results before and after standardization

summary(interest_casino_df$Slots)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    0.00   62.32  103.52  291.78  507.01 1861.47
summary(interest_cas_df_z_std$Slots)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## -0.8927 -0.7020 -0.5759  0.0000  0.6585  4.8023
# set seed so that the algorithm can re ran to compare results again in future
RNGversion("3.5.2")
set.seed(245)
player_clusters = kmeans(interest_cas_df_z_std,5)

View the Cluster Centers

player_clusters$centers
##        Slots          BJ       Craps         Bac      Bingo      Poker
## 1 -0.5435643 -0.27261721 -0.25741024 -0.27581028 -0.3101923 -0.4205621
## 2 -0.8926563 -0.31505810 -0.28657423 -0.33208753 -0.3157667  4.3996100
## 3  0.3133618 -0.31505810 -0.28657423 -0.33208753  2.9567489 -0.5157333
## 4  2.1434518  3.95368238  4.03265217  3.88072633 -0.3157667 -0.5157333
## 5  1.1615845  0.01489864 -0.07453187  0.05575009 -0.3157667  1.3461464
##        Other
## 1 -0.3630478
## 2 -0.6113191
## 3 -0.6113191
## 4 -0.6113191
## 5  1.7448049
# add cluster segmentation to the original dataframe
casino_df$cluster = player_clusters$cluster
cluster5result = casino_df[1:5,c("cluster","TotalSpend")]

Results: when K is 5

Mean spend of players by clusters

aggregate(data=casino_df, TotalSpend~cluster, mean)
##   cluster TotalSpend
## 1       1   257.6646
## 2       2   520.3130
## 3       3   498.7284
## 4       4  9905.4941
## 5       5  1971.5843

Case 4: K = 6

Compare results before and after standardization

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    0.00   62.32  103.52  291.78  507.01 1861.47
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## -0.8927 -0.7020 -0.5759  0.0000  0.6585  4.8023

View the Cluster Centers

player_clusters$centers
##        Slots          BJ       Craps         Bac      Bingo      Poker
## 1  0.3133618 -0.31505810 -0.28657423 -0.33208753  2.9567489 -0.5157333
## 2 -0.8926563 -0.31505810 -0.28657423 -0.33208753 -0.3157667  4.3996100
## 3  2.1504833  3.97227120  4.05317190  3.88397647 -0.3157667 -0.5157333
## 4 -0.8141934 -0.24680256 -0.22578158  1.89787184 -0.3157667 -0.5157333
## 5  1.1615845  0.01489864 -0.07453187  0.05575009 -0.3157667  1.3461464
## 6 -0.5377997 -0.27191638 -0.25692865 -0.31071471 -0.3101003 -0.4189905
##        Other
## 1 -0.6113191
## 2 -0.6113191
## 3 -0.6113191
## 4 -0.6113191
## 5  1.7448049
## 6 -0.3589480
# add cluster segmentation to the original dataframe
casino_df$cluster = player_clusters$cluster
cluster6result = casino_df[1:6,c("cluster","TotalSpend")]

Results: when K is 6

Mean spend of players by clusters

aggregate(data=casino_df, TotalSpend~cluster, mean)
##   cluster TotalSpend
## 1       1   498.7284
## 2       2   520.3130
## 3       3  9944.4732
## 4       4   694.8912
## 5       5  1971.5843
## 6       6   253.0637

Scatter plot matrix of clusters when K is 6

library(dplyr)
my_cols = c("#00AFBB", "#E7B800", "#FC4E07", "#33FFB5", "#090D90","#A70BC3") 
pairs(casino_df[,2:8], pch = 20,  cex = 0.1, main = "Station Casino Data - Cluster Analysis", col = my_cols[casino_df$cluster], oma=c(4,4,6,10))
par(xpd=TRUE)
legend(1, .7, as.vector(unique(casino_df$cluster)),  
       fill=c("#00AFBB", "#E7B800", "#FC4E07", "#33FFB5", "#090D90","#A70BC3"))

Key take away

  • We knew from the K-means cluster analysis for k=6, players in cluster 3 had the maximum mean spend
  • Cluster 3 players are highlighted by color 3 in the scatter plot
  • Majority of players who plays slots and BJ seemed to fall in cluster 1. Similar pattern is observed between Slots, BJ, Craps and Bac
  • Players who plays more Bingo seemed to play less Slots. This could be due to the age of bingo players in general and play time duration of bingo
  • Also appears players who play more slot seemed to play less bingo
  • Based on the K-means cluster result, I would target high comp awards for players who visit slot machines