Station Casino Operational Analysis

Data

Data used for station casino operational analysis

# read data
library(readxl)
casino_data = read_excel("C:/Users/18602/Desktop/M2 - Operations Analytics/Casino Final_Dec 2013 Raw Data_for test.xlsx",sheet = "Sheet1")
# load data in to a dataframe
casino_df = data.frame(casino_data)
# format column names
names(casino_df) = c("Player", "Slots", "BJ", "Craps", "Bac","Bingo","Poker","Other","TotalSpend")
# view data frame structure
library(rmarkdown)
paged_table(casino_df)

Game & Total Bet Analysis

Total dollar amount played on each game

library(ggplot2)
library(dplyr)
library(tidyr)

# create barplot using ggplot
casino_df %>%
  pivot_longer(cols = c(-Player, Slots, BJ, Craps, Bac,Bingo,Poker,Other,-TotalSpend), names_to = "Games", values_to = "Amount") %>%
  ggplot(aes(x= Games, y= Amount, label= Amount, order = Games)) +
  geom_col(color = "dark blue") +
  stat_summary(fun.y = sum, aes(label = format(..y.., big.mark = ",", nsmall = 2), group = Games), geom = "text", vjust = -.3)+
  theme_classic()

People & Wager Analysis

Histogram analysis of how many people played the game (the vertical axis) and how many dollars were wagered (the horizontal axis)

# load library
library(cowplot)

# create individual histograms for gaming machines
plot1 = ggplot(casino_df, aes(x=casino_df$Slots))+
  geom_histogram(color="darkblue", fill="lightblue", bins = 10) +
  xlab("Slots Amount") + ylab("Count")

plot2 = ggplot(casino_df, aes(x=casino_df$BJ))+
  geom_histogram(color="darkblue", fill="lightblue", bins = 5) +
  xlab("BJ Amount") + ylab("Count")

plot3 = ggplot(casino_df, aes(x=casino_df$Craps))+
  geom_histogram(color="darkblue", fill="lightblue", bins = 10) +
  xlab("Craps Amount") + ylab("Count")

plot4 = ggplot(casino_df, aes(x=casino_df$Bac))+
  geom_histogram(color="darkblue", fill="lightblue", bins = 10) +
  xlab("Bac Amount") + ylab("Count")

plot5 = ggplot(casino_df, aes(x=casino_df$Bingo))+
  geom_histogram(color="darkblue", fill="lightblue", bins = 10) +
  xlab("Bingo Amount") + ylab("Count")

plot6 = ggplot(casino_df, aes(x=casino_df$Poker))+
  geom_histogram(color="darkblue", fill="lightblue", bins = 10) +
  xlab("Poker Amount") + ylab("Count")

plot7 = ggplot(casino_df, aes(x=casino_df$Other))+
  geom_histogram(color="darkblue", fill="lightblue", bins = 10) +
  xlab("Other Amount") + ylab("Count")

# create a histogram grid using plot_grid()
# layout I
plot_grid(plot1,plot2,plot3,plot4,plot5,plot6,plot7, label_size = 9)

# layout II (more cleaner view)
plot_grid(plot1,plot2, label_size = 9)

plot_grid(plot3,plot4,label_size = 9)

plot_grid(plot5,plot6,label_size = 9)

plot_grid(plot7,label_size = 9)

K-means Cluster Analysis

Summary of interest variable in station casino data,

library(stats)

# create a interest only dataframe for z scale standardization
interest_casino_df = casino_df[2:8]
summary(interest_casino_df)

##      Slots               BJ              Craps              Bac          
##  Min.   :   0.00   Min.   :   0.00   Min.   :   0.00   Min.   :   0.000  
##  1st Qu.:  62.32   1st Qu.:   0.00   1st Qu.:   0.00   1st Qu.:   0.000  
##  Median : 103.52   Median :  24.31   Median :  15.94   Median :   7.471  
##  Mean   : 291.78   Mean   : 283.29   Mean   : 267.63   Mean   :  82.070  
##  3rd Qu.: 507.01   3rd Qu.: 189.58   3rd Qu.: 117.11   3rd Qu.:  35.273  
##  Max.   :1861.47   Max.   :7294.28   Max.   :7250.61   Max.   :2253.590  
##      Bingo            Poker            Other        
##  Min.   :  0.00   Min.   :  0.00   Min.   :   0.00  
##  1st Qu.:  0.00   1st Qu.:  0.00   1st Qu.:   0.00  
##  Median :  0.00   Median : 10.77   Median :  34.51  
##  Mean   : 10.09   Mean   : 54.59   Mean   : 132.97  
##  3rd Qu.:  0.00   3rd Qu.: 26.57   3rd Qu.:  73.85  
##  Max.   :212.71   Max.   :913.92   Max.   :1025.22

interest_cas_df_z_std = as.data.frame(lapply(interest_casino_df,scale))

K-means Clustering Algorithm

Case1: K = 3

Compare results before and after standardization

summary(interest_casino_df$Slots)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    0.00   62.32  103.52  291.78  507.01 1861.47

summary(interest_cas_df_z_std$Slots)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## -0.8927 -0.7020 -0.5759  0.0000  0.6585  4.8023

# set K value as 3
# set seed so that the algorithm can re ran to compare results again in future
RNGversion("3.5.2")
set.seed(245)
player_clusters = kmeans(interest_cas_df_z_std,3)

View the Cluster Centers

player_clusters$centers

##        Slots          BJ       Craps         Bac      Bingo      Poker
## 1 -0.4337103 -0.27827563 -0.26130566 -0.28330222  0.1125658 -0.4289548
## 2  0.9847084 -0.01341055 -0.09268778  0.02247473 -0.3157667  1.6334589
## 3  2.1434518  3.95368238  4.03265217  3.88072633 -0.3157667 -0.5157333
##        Other
## 1 -0.3933258
## 2  1.5324224
## 3 -0.6113191

# add cluster segmentation to the original dataframe
casino_df$cluster = player_clusters$cluster
cluster3result = casino_df[1:3,c("cluster","TotalSpend")]

Results: when K is 3

Mean spend of players by clusters

aggregate(data=casino_df, TotalSpend~cluster, mean)

##   cluster TotalSpend
## 1       1   289.0234
## 2       2  1847.3543
## 3       3  9905.4941

Case 2: K = 4

Compare results before and after standardization

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    0.00   62.32  103.52  291.78  507.01 1861.47

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## -0.8927 -0.7020 -0.5759  0.0000  0.6585  4.8023

View the Cluster Centers

player_clusters$centers

##        Slots           BJ       Craps         Bac      Bingo      Poker
## 1 -0.8200530 -0.290445266 -0.27088405 -0.30341982 -0.3157667 -0.4557710
## 2  1.0291283 -0.008775564 -0.08968516  0.02818951 -0.3157667  1.6923633
## 3  2.1434518  3.953682376  4.03265217  3.88072633 -0.3157667 -0.5157333
## 4 -0.4084348 -0.276405342 -0.26001054 -0.28085284  0.1366211 -0.4254913
##        Other
## 1  1.7713195
## 2  1.4936153
## 3 -0.6113191
## 4 -0.5057530

# add cluster segmentation to the original dataframe
casino_df$cluster = player_clusters$cluster
cluster4result = casino_df[1:4,c("cluster","TotalSpend")]

Results: when K is 4

Mean spend of players by clusters

##   cluster TotalSpend
## 1       1   592.1981
## 2       2  1868.0519
## 3       3  9905.4941
## 4       4   277.4624

Case 3: K = 5

Compare results before and after standardization

summary(interest_casino_df$Slots)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    0.00   62.32  103.52  291.78  507.01 1861.47

summary(interest_cas_df_z_std$Slots)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## -0.8927 -0.7020 -0.5759  0.0000  0.6585  4.8023

# set seed so that the algorithm can re ran to compare results again in future
RNGversion("3.5.2")
set.seed(245)
player_clusters = kmeans(interest_cas_df_z_std,5)

View the Cluster Centers

player_clusters$centers

##        Slots          BJ       Craps         Bac      Bingo      Poker
## 1 -0.5435643 -0.27261721 -0.25741024 -0.27581028 -0.3101923 -0.4205621
## 2 -0.8926563 -0.31505810 -0.28657423 -0.33208753 -0.3157667  4.3996100
## 3  0.3133618 -0.31505810 -0.28657423 -0.33208753  2.9567489 -0.5157333
## 4  2.1434518  3.95368238  4.03265217  3.88072633 -0.3157667 -0.5157333
## 5  1.1615845  0.01489864 -0.07453187  0.05575009 -0.3157667  1.3461464
##        Other
## 1 -0.3630478
## 2 -0.6113191
## 3 -0.6113191
## 4 -0.6113191
## 5  1.7448049

# add cluster segmentation to the original dataframe
casino_df$cluster = player_clusters$cluster
cluster5result = casino_df[1:5,c("cluster","TotalSpend")]

Results: when K is 5

Mean spend of players by clusters

aggregate(data=casino_df, TotalSpend~cluster, mean)

##   cluster TotalSpend
## 1       1   257.6646
## 2       2   520.3130
## 3       3   498.7284
## 4       4  9905.4941
## 5       5  1971.5843

Case 4: K = 6

Compare results before and after standardization

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    0.00   62.32  103.52  291.78  507.01 1861.47

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## -0.8927 -0.7020 -0.5759  0.0000  0.6585  4.8023

View the Cluster Centers

player_clusters$centers

##        Slots          BJ       Craps         Bac      Bingo      Poker
## 1  0.3133618 -0.31505810 -0.28657423 -0.33208753  2.9567489 -0.5157333
## 2 -0.8926563 -0.31505810 -0.28657423 -0.33208753 -0.3157667  4.3996100
## 3  2.1504833  3.97227120  4.05317190  3.88397647 -0.3157667 -0.5157333
## 4 -0.8141934 -0.24680256 -0.22578158  1.89787184 -0.3157667 -0.5157333
## 5  1.1615845  0.01489864 -0.07453187  0.05575009 -0.3157667  1.3461464
## 6 -0.5377997 -0.27191638 -0.25692865 -0.31071471 -0.3101003 -0.4189905
##        Other
## 1 -0.6113191
## 2 -0.6113191
## 3 -0.6113191
## 4 -0.6113191
## 5  1.7448049
## 6 -0.3589480

# add cluster segmentation to the original dataframe
casino_df$cluster = player_clusters$cluster
cluster6result = casino_df[1:6,c("cluster","TotalSpend")]

Results: when K is 6

Mean spend of players by clusters

aggregate(data=casino_df, TotalSpend~cluster, mean)

##   cluster TotalSpend
## 1       1   498.7284
## 2       2   520.3130
## 3       3  9944.4732
## 4       4   694.8912
## 5       5  1971.5843
## 6       6   253.0637

Scatter plot matrix of clusters when K is 6

library(dplyr)
my_cols = c("#00AFBB", "#E7B800", "#FC4E07", "#33FFB5", "#090D90","#A70BC3") 
pairs(casino_df[,2:8], pch = 20,  cex = 0.1, main = "Station Casino Data - Cluster Analysis", col = my_cols[casino_df$cluster], oma=c(4,4,6,10))
par(xpd=TRUE)
legend(1, .7, as.vector(unique(casino_df$cluster)),  
       fill=c("#00AFBB", "#E7B800", "#FC4E07", "#33FFB5", "#090D90","#A70BC3"))

Key take away

We knew from the K-means cluster analysis for k=6, players in cluster 3 had the maximum mean spend
Cluster 3 players are highlighted by color 3 in the scatter plot
Majority of players who plays slots and BJ seemed to fall in cluster 1. Similar pattern is observed between Slots, BJ, Craps and Bac
Players who plays more Bingo seemed to play less Slots. This could be due to the age of bingo players in general and play time duration of bingo
Also appears players who play more slot seemed to play less bingo
Based on the K-means cluster result, I would target high comp awards for players who visit slot machines

Station Casino Operational Analysis

Nag Rajendran

8/17/2021

Data

Game & Total Bet Analysis

People & Wager Analysis

K-means Cluster Analysis

Summary of interest variable in station casino data,

K-means Clustering Algorithm

Case1: K = 3

Compare results before and after standardization

View the Cluster Centers

Results: when K is 3

Case 2: K = 4

Compare results before and after standardization

View the Cluster Centers

Results: when K is 4

Case 3: K = 5

Compare results before and after standardization

View the Cluster Centers

Results: when K is 5

Case 4: K = 6

Compare results before and after standardization

View the Cluster Centers

Results: when K is 6

Scatter plot matrix of clusters when K is 6

Key take away