Data used for station casino operational analysis
# read data
library(readxl)
casino_data = read_excel("C:/Users/18602/Desktop/M2 - Operations Analytics/Casino Final_Dec 2013 Raw Data_for test.xlsx",sheet = "Sheet1")
# load data in to a dataframe
casino_df = data.frame(casino_data)
# format column names
names(casino_df) = c("Player", "Slots", "BJ", "Craps", "Bac","Bingo","Poker","Other","TotalSpend")
# view data frame structure
library(rmarkdown)
paged_table(casino_df)
Total dollar amount played on each game
library(ggplot2)
library(dplyr)
library(tidyr)
# create barplot using ggplot
casino_df %>%
pivot_longer(cols = c(-Player, Slots, BJ, Craps, Bac,Bingo,Poker,Other,-TotalSpend), names_to = "Games", values_to = "Amount") %>%
ggplot(aes(x= Games, y= Amount, label= Amount, order = Games)) +
geom_col(color = "dark blue") +
stat_summary(fun.y = sum, aes(label = format(..y.., big.mark = ",", nsmall = 2), group = Games), geom = "text", vjust = -.3)+
theme_classic()
Histogram analysis of how many people played the game (the vertical axis) and how many dollars were wagered (the horizontal axis)
# load library
library(cowplot)
# create individual histograms for gaming machines
plot1 = ggplot(casino_df, aes(x=casino_df$Slots))+
geom_histogram(color="darkblue", fill="lightblue", bins = 10) +
xlab("Slots Amount") + ylab("Count")
plot2 = ggplot(casino_df, aes(x=casino_df$BJ))+
geom_histogram(color="darkblue", fill="lightblue", bins = 5) +
xlab("BJ Amount") + ylab("Count")
plot3 = ggplot(casino_df, aes(x=casino_df$Craps))+
geom_histogram(color="darkblue", fill="lightblue", bins = 10) +
xlab("Craps Amount") + ylab("Count")
plot4 = ggplot(casino_df, aes(x=casino_df$Bac))+
geom_histogram(color="darkblue", fill="lightblue", bins = 10) +
xlab("Bac Amount") + ylab("Count")
plot5 = ggplot(casino_df, aes(x=casino_df$Bingo))+
geom_histogram(color="darkblue", fill="lightblue", bins = 10) +
xlab("Bingo Amount") + ylab("Count")
plot6 = ggplot(casino_df, aes(x=casino_df$Poker))+
geom_histogram(color="darkblue", fill="lightblue", bins = 10) +
xlab("Poker Amount") + ylab("Count")
plot7 = ggplot(casino_df, aes(x=casino_df$Other))+
geom_histogram(color="darkblue", fill="lightblue", bins = 10) +
xlab("Other Amount") + ylab("Count")
# create a histogram grid using plot_grid()
# layout I
plot_grid(plot1,plot2,plot3,plot4,plot5,plot6,plot7, label_size = 9)
# layout II (more cleaner view)
plot_grid(plot1,plot2, label_size = 9)
plot_grid(plot3,plot4,label_size = 9)
plot_grid(plot5,plot6,label_size = 9)
plot_grid(plot7,label_size = 9)
library(stats)
# create a interest only dataframe for z scale standardization
interest_casino_df = casino_df[2:8]
summary(interest_casino_df)
## Slots BJ Craps Bac
## Min. : 0.00 Min. : 0.00 Min. : 0.00 Min. : 0.000
## 1st Qu.: 62.32 1st Qu.: 0.00 1st Qu.: 0.00 1st Qu.: 0.000
## Median : 103.52 Median : 24.31 Median : 15.94 Median : 7.471
## Mean : 291.78 Mean : 283.29 Mean : 267.63 Mean : 82.070
## 3rd Qu.: 507.01 3rd Qu.: 189.58 3rd Qu.: 117.11 3rd Qu.: 35.273
## Max. :1861.47 Max. :7294.28 Max. :7250.61 Max. :2253.590
## Bingo Poker Other
## Min. : 0.00 Min. : 0.00 Min. : 0.00
## 1st Qu.: 0.00 1st Qu.: 0.00 1st Qu.: 0.00
## Median : 0.00 Median : 10.77 Median : 34.51
## Mean : 10.09 Mean : 54.59 Mean : 132.97
## 3rd Qu.: 0.00 3rd Qu.: 26.57 3rd Qu.: 73.85
## Max. :212.71 Max. :913.92 Max. :1025.22
interest_cas_df_z_std = as.data.frame(lapply(interest_casino_df,scale))
summary(interest_casino_df$Slots)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.00 62.32 103.52 291.78 507.01 1861.47
summary(interest_cas_df_z_std$Slots)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -0.8927 -0.7020 -0.5759 0.0000 0.6585 4.8023
# set K value as 3
# set seed so that the algorithm can re ran to compare results again in future
RNGversion("3.5.2")
set.seed(245)
player_clusters = kmeans(interest_cas_df_z_std,3)
player_clusters$centers
## Slots BJ Craps Bac Bingo Poker
## 1 -0.4337103 -0.27827563 -0.26130566 -0.28330222 0.1125658 -0.4289548
## 2 0.9847084 -0.01341055 -0.09268778 0.02247473 -0.3157667 1.6334589
## 3 2.1434518 3.95368238 4.03265217 3.88072633 -0.3157667 -0.5157333
## Other
## 1 -0.3933258
## 2 1.5324224
## 3 -0.6113191
# add cluster segmentation to the original dataframe
casino_df$cluster = player_clusters$cluster
cluster3result = casino_df[1:3,c("cluster","TotalSpend")]
Mean spend of players by clusters
aggregate(data=casino_df, TotalSpend~cluster, mean)
## cluster TotalSpend
## 1 1 289.0234
## 2 2 1847.3543
## 3 3 9905.4941
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.00 62.32 103.52 291.78 507.01 1861.47
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -0.8927 -0.7020 -0.5759 0.0000 0.6585 4.8023
player_clusters$centers
## Slots BJ Craps Bac Bingo Poker
## 1 -0.8200530 -0.290445266 -0.27088405 -0.30341982 -0.3157667 -0.4557710
## 2 1.0291283 -0.008775564 -0.08968516 0.02818951 -0.3157667 1.6923633
## 3 2.1434518 3.953682376 4.03265217 3.88072633 -0.3157667 -0.5157333
## 4 -0.4084348 -0.276405342 -0.26001054 -0.28085284 0.1366211 -0.4254913
## Other
## 1 1.7713195
## 2 1.4936153
## 3 -0.6113191
## 4 -0.5057530
# add cluster segmentation to the original dataframe
casino_df$cluster = player_clusters$cluster
cluster4result = casino_df[1:4,c("cluster","TotalSpend")]
Mean spend of players by clusters
## cluster TotalSpend
## 1 1 592.1981
## 2 2 1868.0519
## 3 3 9905.4941
## 4 4 277.4624
summary(interest_casino_df$Slots)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.00 62.32 103.52 291.78 507.01 1861.47
summary(interest_cas_df_z_std$Slots)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -0.8927 -0.7020 -0.5759 0.0000 0.6585 4.8023
# set seed so that the algorithm can re ran to compare results again in future
RNGversion("3.5.2")
set.seed(245)
player_clusters = kmeans(interest_cas_df_z_std,5)
player_clusters$centers
## Slots BJ Craps Bac Bingo Poker
## 1 -0.5435643 -0.27261721 -0.25741024 -0.27581028 -0.3101923 -0.4205621
## 2 -0.8926563 -0.31505810 -0.28657423 -0.33208753 -0.3157667 4.3996100
## 3 0.3133618 -0.31505810 -0.28657423 -0.33208753 2.9567489 -0.5157333
## 4 2.1434518 3.95368238 4.03265217 3.88072633 -0.3157667 -0.5157333
## 5 1.1615845 0.01489864 -0.07453187 0.05575009 -0.3157667 1.3461464
## Other
## 1 -0.3630478
## 2 -0.6113191
## 3 -0.6113191
## 4 -0.6113191
## 5 1.7448049
# add cluster segmentation to the original dataframe
casino_df$cluster = player_clusters$cluster
cluster5result = casino_df[1:5,c("cluster","TotalSpend")]
Mean spend of players by clusters
aggregate(data=casino_df, TotalSpend~cluster, mean)
## cluster TotalSpend
## 1 1 257.6646
## 2 2 520.3130
## 3 3 498.7284
## 4 4 9905.4941
## 5 5 1971.5843
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.00 62.32 103.52 291.78 507.01 1861.47
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -0.8927 -0.7020 -0.5759 0.0000 0.6585 4.8023
player_clusters$centers
## Slots BJ Craps Bac Bingo Poker
## 1 0.3133618 -0.31505810 -0.28657423 -0.33208753 2.9567489 -0.5157333
## 2 -0.8926563 -0.31505810 -0.28657423 -0.33208753 -0.3157667 4.3996100
## 3 2.1504833 3.97227120 4.05317190 3.88397647 -0.3157667 -0.5157333
## 4 -0.8141934 -0.24680256 -0.22578158 1.89787184 -0.3157667 -0.5157333
## 5 1.1615845 0.01489864 -0.07453187 0.05575009 -0.3157667 1.3461464
## 6 -0.5377997 -0.27191638 -0.25692865 -0.31071471 -0.3101003 -0.4189905
## Other
## 1 -0.6113191
## 2 -0.6113191
## 3 -0.6113191
## 4 -0.6113191
## 5 1.7448049
## 6 -0.3589480
# add cluster segmentation to the original dataframe
casino_df$cluster = player_clusters$cluster
cluster6result = casino_df[1:6,c("cluster","TotalSpend")]
Mean spend of players by clusters
aggregate(data=casino_df, TotalSpend~cluster, mean)
## cluster TotalSpend
## 1 1 498.7284
## 2 2 520.3130
## 3 3 9944.4732
## 4 4 694.8912
## 5 5 1971.5843
## 6 6 253.0637
library(dplyr)
my_cols = c("#00AFBB", "#E7B800", "#FC4E07", "#33FFB5", "#090D90","#A70BC3")
pairs(casino_df[,2:8], pch = 20, cex = 0.1, main = "Station Casino Data - Cluster Analysis", col = my_cols[casino_df$cluster], oma=c(4,4,6,10))
par(xpd=TRUE)
legend(1, .7, as.vector(unique(casino_df$cluster)),
fill=c("#00AFBB", "#E7B800", "#FC4E07", "#33FFB5", "#090D90","#A70BC3"))