library(hockeyR)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(sportyR)
library(dplyr)
library(markovchain)
## Package: markovchain
## Version: 0.9.5
## Date: 2023-09-24 09:20:02 UTC
## BugReport: https://github.com/spedygiorgio/markovchain/issues
##
##
## Attaching package: 'markovchain'
##
## The following object is masked from 'package:lubridate':
##
## period
library(openxlsx)
library(stringr)
library(ggplot2)
library(dplyr)
library(tidyr)
library(scales)
##
## Attaching package: 'scales'
##
## The following object is masked from 'package:purrr':
##
## discard
##
## The following object is masked from 'package:readr':
##
## col_factor
library(ggthemes)
library(plotly)
##
## Attaching package: 'plotly'
##
## The following object is masked from 'package:ggplot2':
##
## last_plot
##
## The following object is masked from 'package:stats':
##
## filter
##
## The following object is masked from 'package:graphics':
##
## layout
NHL_2023 <- load_pbp(2023)
NHL_2023 <- NHL_2023 |>
mutate(points_outcome = ifelse(event=='Goal',1,0))
Add_PossesionRetained <- function(Data) {
df_test <- Data |>
add_column(PossessionRetained = 0)|>
select(game_id,event_id,event_team_abbr,event,description,PossessionRetained,points_outcome
,home_abbreviation,away_abbreviation,ordinal_num,home_score,away_score
,event_player_1_name,event_player_1_type,event_player_2_name,event_player_2_type
,event_player_3_name,event_player_3_type,period_seconds,period_time,x,y,x_fixed,y_fixed,shot_distance, shot_angle,date_time)
df1 <- df_test |>
mutate(PossessionRetained = case_when(
event %in% c('Faceoff', 'Takeaway') ~ 1, # Possession retained
event %in% c('Giveaway', 'Penalty','Goal') ~ 0, # Possession lost
TRUE ~ NA_real_ # Handle other cases with NA or a default value
))
# Create a shifted version of event_team_abbr and event for the next row
df1 <- df1 |>
mutate(next_event_team_abbr = lead(event_team_abbr),
next_event = lead(event))
# Update PossessionRetained based on the next event using vectorized conditions
df1 <- df1 |>
mutate(PossessionRetained = case_when(
event %in% c('Blocked Shot', 'Shot', 'Missed Shot', 'Hit') ~
case_when(
# Handle case where next_event_team_abbr is NA
is.na(next_event_team_abbr) ~ 0, # Default to 0 if next_event_team_abbr is NA
next_event_team_abbr == event_team_abbr ~
case_when(
next_event %in% c("Missed Shot", "Shot", "Goal", "Giveaway",
"Failed Shot Attempt","Blocked Shot","Penalty") ~ 1,
next_event %in% c( "Hit", "Faceoff", "Takeaway",
"Period End") ~ 0,
TRUE ~ NA_real_ # For any unexpected events, return NA
),
next_event_team_abbr != event_team_abbr ~
case_when(
next_event %in% c("Missed Shot", "Shot", "Goal", "Giveaway",
"Failed Shot Attempt", "Period End","Blocked Shot"
,"Faceoff") ~ 0,
next_event %in% c("Hit", "Takeaway", "Penalty") ~ 1,
TRUE ~ NA_real_ # For any unexpected events, return NA
),
TRUE ~ NA_real_ # Catch any unhandled cases
),
TRUE ~ PossessionRetained # Retain existing values for other events
))
# Clean up temporary columns
df1 <- df1 |>
select(-next_event_team_abbr, -next_event)
return(df1)
}
events_to_keep <- c(
"Faceoff", "Hit", "Shot", "Missed Shot",
"Giveaway", "Blocked Shot", "Goal",
"Takeaway", "Penalty", "Failed Shot Attempt"
)
NHL_2023 <- NHL_2023 |> filter(event %in% events_to_keep)
NHL_2023_Possessions <- Add_PossesionRetained(NHL_2023)
NHL_2023_Possessions <- NHL_2023_Possessions |>
filter(!is.na(NHL_2023_Possessions$PossessionRetained))
home_team <- 'EDM'
away_team <- 'VGK'
The markov_chain_cleansing
function
processes a dataset of events to build a comprehensive Markov chain
model representing transitions between various event states across all
teams. It then isolates and extracts a transition matrix specific to a
given team by:
Concatenating relevant event information to define unique states.
Fitting a Markov chain to determine transition probabilities.
Filtering the transition matrix to include only states and transitions pertinent to the specified team.
markov_chain_cleansing <- function(data,team){
df <- data
concat_df <- paste(df$event_team_abbr,df$event,df$PossessionRetained,df$points_outcome,sep = ';')
markov_chain_matrix <- markovchainFit(concat_df)$estimate@transitionMatrix
row_names <- rownames(markov_chain_matrix)
col_names <- colnames(markov_chain_matrix)
first_part_rows <- sapply(str_split(row_names, ";"), "[", 1)
first_part_cols <- sapply(str_split(col_names, ";"), "[", 1)
row_index <- which(first_part_rows == team)
col_index <- which(first_part_cols == team)
markov_chain_matrix_cleansed <- markov_chain_matrix[row_index, col_index]
return(markov_chain_matrix_cleansed)
}
Home_team_markov_matrices <- function(data, team) {
# Filter the data for the specified home team
Home_team_data <- data |>
filter(home_abbreviation == team)
# Split the data by gameid
split_data <- split(Home_team_data, Home_team_data$game_id)
# Apply the markov_chain_cleansing function to each game
markov_chain_list <- lapply(split_data, function(game_data) {
markov_chain_cleansing(game_data, team)
})
return(markov_chain_list)
}
Away_team_markov_matrices <- function(data, team) {
# Filter the data for the specified home team
Away_team_data <- data |>
filter(away_abbreviation == team)
# Split the data by gameid
split_data <- split(Away_team_data, Away_team_data$game_id)
# Apply the markov_chain_cleansing function to each game
markov_chain_list <- lapply(split_data, function(game_data) {
markov_chain_cleansing(game_data, team)
})
return(markov_chain_list)
}
Home_team_markov_matrices <- Home_team_markov_matrices(NHL_2023_Possessions, home_team)
Away_team_markov_matrices <- Away_team_markov_matrices(NHL_2023_Possessions, away_team)
simplify_matrix <- function(markov_chain_matrices){
xx <- markov_chain_matrices[as.logical(rowSums(markov_chain_matrices !=0,na.rm = TRUE)), ]
xx <- xx[,as.logical(colSums(xx!=0,na.rm=TRUE))]
return(xx)
}
Home_team_markov_matrices <- lapply(Home_team_markov_matrices, simplify_matrix)
Away_team_markov_matrices <- lapply(Away_team_markov_matrices, simplify_matrix)
chome <- unique(unlist(lapply(Home_team_markov_matrices, colnames)))
rhome <- unique(unlist(lapply(Home_team_markov_matrices, rownames)))
temp_home_master_markov <- matrix(0,nrow = length(rhome), ncol = length(chome), dimnames = list(rhome,chome))
home_master_markov <- rep(list(temp_home_master_markov),length(Home_team_markov_matrices))
temp_index_home_list <- lapply(Home_team_markov_matrices, function(x) outer(rownames(x), colnames(x), paste))
index_home_list <- lapply(temp_index_home_list, function(x) outer(rhome, chome, paste) %in% x)
home_team_game_dates <- function(data, team) {
# Filter the data for the specified home team
Home_team_data <- data |>
filter(home_abbreviation == team ,ordinal_num == '1st')
Home_team_data <- Home_team_data |>
mutate(date_only = as.Date(date_time)) |>
select(game_id,date_only) |>
group_by(game_id)
return(unique(Home_team_data$date_only))
}
today <- Sys.Date()
home_team_game_dates <- home_team_game_dates(NHL_2023_Possessions,home_team)
home_team_game_weights <- exp(as.numeric(home_team_game_dates-today))/sum(exp(as.numeric(home_team_game_dates-today)))
for (element in 1:length(home_master_markov)) {
for (row in 1:nrow(Home_team_markov_matrices[[element]])) {
row_name <- rownames(Home_team_markov_matrices[[element]])[row]
column_names <- names(which(Home_team_markov_matrices[[element]][row,] != 0))
for (column in 1:length(column_names)) {
home_master_markov[[element]][which(rownames(home_master_markov[[element]])==row_name)
,which(colnames(home_master_markov[[element]])==column_names[column])] <-
Home_team_markov_matrices[[element]][row,which(colnames(Home_team_markov_matrices[[element]])==column_names[column])] * home_team_game_weights[element]
}
}
}
home_final_markov_chain <- Reduce("+",home_master_markov )
home_final_markov_chain <- simplify_matrix(home_final_markov_chain)
caway <- unique(unlist(lapply(Away_team_markov_matrices, colnames)))
raway <- unique(unlist(lapply(Away_team_markov_matrices, rownames)))
temp_away_master_markov <- matrix(0,nrow = length(raway), ncol = length(caway), dimnames = list(raway,caway))
away_master_markov <- rep(list(temp_away_master_markov),length(Away_team_markov_matrices))
temp_index_away_list <- lapply(Away_team_markov_matrices, function(x) outer(rownames(x), colnames(x), paste))
index_away_list <- lapply(temp_index_away_list, function(x) outer(raway, caway, paste) %in% x)
away_team_game_dates <- function(data, team) {
# Filter the data for the specified home team
Away_team_data <- data |>
filter(away_abbreviation == team ,ordinal_num == '1st')
Away_team_data <- Away_team_data |>
mutate(date_only = as.Date(date_time)) |>
select(game_id,date_only) |>
group_by(game_id)
return(unique(Away_team_data$date_only))
}
away_team_game_dates <- away_team_game_dates(NHL_2023_Possessions,away_team)
away_team_game_weights <- exp(as.numeric(away_team_game_dates-today))/sum(exp(as.numeric(away_team_game_dates-today)))
for (element in 1:length(away_master_markov)) {
for (row in 1:nrow(Away_team_markov_matrices[[element]])) {
row_name <- rownames(Away_team_markov_matrices[[element]])[row]
column_names <- names(which(Away_team_markov_matrices[[element]][row,] != 0))
for (column in 1:length(column_names)) {
away_master_markov[[element]][which(rownames(away_master_markov[[element]])==row_name)
,which(colnames(away_master_markov[[element]])==column_names[column])] <-
Away_team_markov_matrices[[element]][row,which(colnames(Away_team_markov_matrices[[element]])==column_names[column])] *away_team_game_weights[element]
}
}
}
away_final_markov_chain <- Reduce("+",away_master_markov)
away_final_markov_chain <- simplify_matrix(away_final_markov_chain)
all_home_colnames <- unlist(lapply(Home_team_markov_matrices, rownames))
home_colnames_df <- data.frame(column_name = all_home_colnames, stringsAsFactors = FALSE)
home_final_table <- home_colnames_df |>
count(column_name, name = "freq")
home_final_table <- home_final_table[home_final_table$column_name %in% rownames(home_final_markov_chain),]
home_final_table <- home_final_table |>
arrange(desc(freq)) |>
mutate(prob = freq/sum(freq),
cumprob = cumsum(prob))
all_away_colnames <- unlist(lapply(Away_team_markov_matrices, rownames))
away_colnames_df <- data.frame(column_name = all_away_colnames, stringsAsFactors = FALSE)
away_final_table <- away_colnames_df |>
count(column_name, name = "freq")
away_final_table <- away_final_table[away_final_table$column_name %in% rownames(away_final_markov_chain),]
away_final_table <- away_final_table |>
arrange(desc(freq)) |>
mutate(prob = freq/sum(freq),
cumprob = cumsum(prob))
Do these updates:
1. After goal the next event should be faceoff.
ss <- function(home_final_markov_chain, away_final_markov_chain, home_final_table, away_final_table, away_team, home_team){
simulated_final_game <- data.frame(
home_team = character(),
away_team = character(),
event_team = character(),
event = character(),
PossessionRetained = integer(),
points_outcome = integer(),
stringsAsFactors = FALSE
)
# Function to select and add a faceoff play
add_faceoff <- function(sim_game, home_table, away_table, home_team, away_team){
options <- c('home','away')
select_faceoff_team <- sample(options, 1)
if(select_faceoff_team == 'home'){
faceoff_row <- home_table[grepl("faceoff", home_table$column_name, ignore.case = TRUE), ]
first_play_parsed <- c(home_team, away_team,
unlist(str_split(as.character(faceoff_row$column_name), ';')))
}
else{
faceoff_row <- away_table[grepl("faceoff", away_table$column_name, ignore.case = TRUE), ]
first_play_parsed <- c(home_team, away_team,
unlist(str_split(as.character(faceoff_row$column_name), ';')))
}
if(length(first_play_parsed) != ncol(sim_game)){
stop("Mismatch in first_play_parsed length")
}
first_play_df <- t(as.data.frame(first_play_parsed, stringsAsFactors = FALSE))
colnames(first_play_df) <- colnames(sim_game)
rownames(first_play_df) <- NULL
sim_game <- rbind(sim_game, first_play_df)
return(sim_game)
}
# Initialize the game with a faceoff
simulated_final_game <- add_faceoff(simulated_final_game, home_final_table, away_final_table,
home_team, away_team)
while (nrow(simulated_final_game) <= 200) {
# Find last row
last_row <- simulated_final_game[nrow(simulated_final_game), ]
# Check if the last event was a goal
if(tolower(last_row$event) == "goal"){
# Start from faceoff
simulated_final_game <- add_faceoff(simulated_final_game, home_final_table,
away_final_table, home_team, away_team)
# Proceed to next iteration
next
}
# Concatenate the relevant columns to form the state
last_row_concatenated <- paste(as.character(last_row[3:ncol(last_row)]), sep=';',
collapse = ';')
# Determine the next play based on possession and event team
if(last_row$PossessionRetained == 1 & last_row$event_team == home_team){
# Home team retains possession
markov_chain <- home_final_markov_chain
current_markov <- "home"
}
else if(last_row$PossessionRetained == 0 & last_row$event_team == home_team) {
# Possession changes to away team
current_table <- away_final_table
}
else if(last_row$PossessionRetained == 1 & last_row$event_team == away_team) {
# Away team retains possession
markov_chain <- away_final_markov_chain
current_markov <- "away"
}
else if(last_row$PossessionRetained == 0 & last_row$event_team == away_team) {
# Possession changes to home team
current_table <- home_final_table
}
# Proceed only if last event was not a goal
if(tolower(last_row$event) != "goal"){
if(last_row$PossessionRetained == 1){
# Find probability of the next event from the current markov chain
row_to_inspect <- which(rownames(markov_chain) == last_row_concatenated)
if(length(row_to_inspect) == 0){
stop(paste("State not found in", current_markov, "markov chain:", last_row_concatenated))
}
options_to_inspect <- as.data.frame(markov_chain[row_to_inspect, ], stringsAsFactors = FALSE)
options_to_inspect <- options_to_inspect[which(markov_chain[row_to_inspect, ] > 0), , drop = FALSE]
colnames(options_to_inspect) <- c('freq')
options_to_inspect$prob <- options_to_inspect$freq / sum(options_to_inspect$freq)
options_to_inspect <- options_to_inspect[order(-options_to_inspect$prob), ]
options_to_inspect$cumprob <- round(cumsum(options_to_inspect$prob), 6)
# Randomly select next play
next_play <- runif(1)
selected_state <- rownames(options_to_inspect)[min(which(options_to_inspect$cumprob >= next_play))]
next_play_parsed <- c(home_team, away_team, unlist(str_split(as.character(selected_state), ';')))
# Add the selected play to the simulation
next_play_df <- t(as.data.frame(next_play_parsed, stringsAsFactors = FALSE))
colnames(next_play_df) <- colnames(simulated_final_game)
rownames(next_play_df) <- NULL
simulated_final_game <- rbind(simulated_final_game, next_play_df)
}
else {
# Possession changes to the opponent team
# Select next play based on the opponent's table cumulative probabilities
next_play <- runif(1)
selected_row <- which(next_play <= current_table$cumprob)[1]
if(is.na(selected_row)){
stop("No valid play found for the opponent's cumulative probability.")
}
next_play_parsed <- c(home_team, away_team, unlist(str_split(as.character(current_table$column_name[selected_row]), ';')))
# Add the selected play to the simulation
next_play_df <- t(as.data.frame(next_play_parsed, stringsAsFactors = FALSE))
colnames(next_play_df) <- colnames(simulated_final_game)
rownames(next_play_df) <- NULL
simulated_final_game <- rbind(simulated_final_game, next_play_df)
}
}
}
Home_Shots_taken <- simulated_final_game |>
filter(event_team == home_team &
event %in% c("Shot", "Missed Shot", "Blocked Shot", "Goal"))
Away_Shots_taken <- simulated_final_game |>
filter(event_team == away_team &
event %in% c("Shot", "Missed Shot", "Blocked Shot", "Goal"))
Home_goals <- sum(as.numeric(simulated_final_game$points_outcome[
which(simulated_final_game$event_team == home_team)]))
Away_goals <- sum(as.numeric(simulated_final_game$points_outcome[
which(simulated_final_game$event_team == away_team)]))
goal_diff <- Home_goals - Away_goals
# Return Data frame
return(data.frame(
goal_diff = goal_diff,
Home_goals = Home_goals,
Away_goals = Away_goals,
Home_shots = nrow(Home_Shots_taken),
Away_shots = nrow(Away_Shots_taken),
stringsAsFactors = FALSE
))
}
Faceoff Selection: After a goal, the simulation resets by selecting a faceoff play, mimicking real-game scenarios where play resumes from a faceoff after a goal.
Possession Logic: The possession logic remains intact. If possession is retained or changes, the simulation continues based on the Markov chains.
Flexibility: The helper function
add_faceoff
allows for easy modifications
in the future, such as changing how faceoffs are handled or adding more
complexity.
Error Handling: Proper error messages help in debugging issues related to state mismatches or invalid probability selections.
results_list <- replicate(
n = 100,
expr = ss(
home_final_markov_chain = home_final_markov_chain,
away_final_markov_chain = away_final_markov_chain,
home_final_table = home_final_table,
away_final_table = away_final_table,
away_team = away_team,
home_team = home_team
),
simplify = FALSE
)
Results_df <- function(result,n){
results_df <- bind_rows(result)
results_df <- results_df %>%
mutate(simulation = 1:n)
results_df <- results_df %>%
select(simulation, everything())
return(results_df)
}
Result_100 <- Results_df(results_list,100)
Result_100 <- Result_100 |>
mutate(result = ifelse(goal_diff > 0, 'win',
ifelse(goal_diff == 0, 'tie', 'loss')))
data <- Result_100
# 1. Distribution of Goal Difference
# Create a histogram for goal_diff
ggplot(data, aes(x = goal_diff)) +
geom_histogram(binwidth = 1, fill = "saddlebrown", color = "black", alpha = 0.7) +
labs(title = "Distribution of Goal Difference - 100x (NSH vs SJS)",
x = "Goal Difference",
y = "Frequency") +
theme_classic()
# 2. Pie Chart for Result Variable
# Create a table for result variable
result_table <- data %>%
count(result) %>%
mutate(percentage = n / sum(n))
# Create the pie chart
library(ggplot2)
library(scales)
# Define a custom brown color palette
brown_palette <- c("win" = "#8B4513", # SaddleBrown
"tie" = "#A0522D", # Sienna
"loss" = "#D2B48C") # Tan
# Create the pie chart with the brown palette
ggplot(result_table, aes(x = "", y = percentage, fill = as.factor(result))) +
geom_bar(stat = "identity", width = 1, color = "white") +
coord_polar(theta = "y") +
scale_y_continuous(labels = percent) +
geom_text(aes(label = scales::percent(percentage, accuracy = 0.1)),
position = position_stack(vjust = 0.5),
color = "white",
size = 5) +
scale_fill_manual(values = brown_palette) +
labs(title = "Pie Chart of Results",
fill = "Result") +
theme_classic() +
theme(
axis.title = element_blank(),
axis.text = element_blank(),
axis.ticks = element_blank(),
panel.grid = element_blank(),
plot.title = element_text(hjust = 0.5, size = 16, face = "bold")
)
# 3. Shot Conversion Rate Bar Chart
# Calculate shot conversion rates
conversion_rates <- data %>%
summarise(
Home_Conversion = mean(Home_goals) / mean(Home_shots),
Away_Conversion = mean(Away_goals) / mean(Away_shots)
) %>%
pivot_longer(cols = everything(), names_to = "Team", values_to = "ConversionRate")
brown_palette <- c("Home_Conversion" = "#8B4513", # SaddleBrown
"Away_Conversion" = "#D2B48C")
# Create the bar chart
ggplot(conversion_rates, aes(x = Team, y = ConversionRate, fill = Team)) +
geom_bar(stat = "identity", alpha = 0.8) +
geom_text(aes(label = scales::percent(ConversionRate, accuracy = 0.01)), vjust = -0.5) +
labs(title = "Shot Conversion Rate for Home and Away Teams - 100x",
x = "Team",
y = "Conversion Rate") +
scale_y_continuous(labels = scales::percent_format(accuracy = 0.01)) +
scale_fill_manual(values = brown_palette) + # Apply the custom brown palette
theme_classic()
results_list_500 <- replicate(
n = 500,
expr = ss(
home_final_markov_chain = home_final_markov_chain,
away_final_markov_chain = away_final_markov_chain,
home_final_table = home_final_table,
away_final_table = away_final_table,
away_team = away_team,
home_team = home_team
),
simplify = FALSE
)
Result_500 <- Results_df(results_list_500,500)
Result_500 <- Result_500 |>
mutate(result = ifelse(goal_diff > 0, 'win',
ifelse(goal_diff == 0, 'tie', 'loss')))
data <- Result_500
# 1. Distribution of Goal Difference
# Create a histogram for goal_diff
ggplot(data, aes(x = goal_diff)) +
geom_histogram(binwidth = 1, fill = "saddlebrown", color = "black", alpha = 0.7) +
labs(title = "Distribution of Goal Difference - 500x (NSH vs SJS)",
x = "Goal Difference",
y = "Frequency") +
theme_classic()
# 2. Pie Chart for Result Variable
# Create a table for result variable
result_table <- data %>%
count(result) %>%
mutate(percentage = n / sum(n))
# Create the pie chart
library(ggplot2)
library(scales)
# Define a custom brown color palette
brown_palette <- c("win" = "#8B4513", # SaddleBrown
"tie" = "#A0522D", # Sienna
"loss" = "#D2B48C") # Tan
# Create the pie chart with the brown palette
ggplot(result_table, aes(x = "", y = percentage, fill = as.factor(result))) +
geom_bar(stat = "identity", width = 1, color = "white") +
coord_polar(theta = "y") +
scale_y_continuous(labels = percent) +
geom_text(aes(label = scales::percent(percentage, accuracy = 0.1)),
position = position_stack(vjust = 0.5),
color = "white",
size = 5) +
scale_fill_manual(values = brown_palette) +
labs(title = "Pie Chart of Results",
fill = "Result") +
theme_classic() +
theme(
axis.title = element_blank(),
axis.text = element_blank(),
axis.ticks = element_blank(),
panel.grid = element_blank(),
plot.title = element_text(hjust = 0.5, size = 16, face = "bold")
)
# 3. Shot Conversion Rate Bar Chart
# Calculate shot conversion rates
conversion_rates <- data %>%
summarise(
Home_Conversion = mean(Home_goals) / mean(Home_shots),
Away_Conversion = mean(Away_goals) / mean(Away_shots)
) %>%
pivot_longer(cols = everything(), names_to = "Team", values_to = "ConversionRate")
brown_palette <- c("Home_Conversion" = "#8B4513", # SaddleBrown
"Away_Conversion" = "#D2B48C")
# Create the bar chart
ggplot(conversion_rates, aes(x = Team, y = ConversionRate, fill = Team)) +
geom_bar(stat = "identity", alpha = 0.8) +
geom_text(aes(label = scales::percent(ConversionRate, accuracy = 0.01)), vjust = -0.5) +
labs(title = "Shot Conversion Rate for Home and Away Teams - 500x",
x = "Team",
y = "Conversion Rate") +
scale_y_continuous(labels = scales::percent_format(accuracy = 0.01)) +
scale_fill_manual(values = brown_palette) + # Apply the custom brown palette
theme_classic()
results_list_1000 <- replicate(
n = 1000,
expr = ss(
home_final_markov_chain = home_final_markov_chain,
away_final_markov_chain = away_final_markov_chain,
home_final_table = home_final_table,
away_final_table = away_final_table,
away_team = away_team,
home_team = home_team
),
simplify = FALSE
)
Result_1000 <- Results_df(results_list_1000,1000)
Result_1000 <- Result_1000 |>
mutate(result = ifelse(goal_diff > 0, 'win',
ifelse(goal_diff == 0, 'tie', 'loss')))
data <- Result_1000
# 1. Distribution of Goal Difference
# Create a histogram for goal_diff
ggplot(data, aes(x = goal_diff)) +
geom_histogram(binwidth = 1, fill = "saddlebrown", color = "black", alpha = 0.7) +
labs(title = "Distribution of Goal Difference - 1000x (NSH vs SJS)",
x = "Goal Difference",
y = "Frequency") +
theme_classic()
# 2. Pie Chart for Result Variable
# Create a table for result variable
result_table <- data %>%
count(result) %>%
mutate(percentage = n / sum(n))
# Create the pie chart
library(ggplot2)
library(scales)
# Define a custom brown color palette
brown_palette <- c("win" = "#8B4513", # SaddleBrown
"tie" = "#A0522D", # Sienna
"loss" = "#D2B48C") # Tan
# Create the pie chart with the brown palette
ggplot(result_table, aes(x = "", y = percentage, fill = as.factor(result))) +
geom_bar(stat = "identity", width = 1, color = "white") +
coord_polar(theta = "y") +
scale_y_continuous(labels = percent) +
geom_text(aes(label = scales::percent(percentage, accuracy = 0.1)),
position = position_stack(vjust = 0.5),
color = "white",
size = 5) +
scale_fill_manual(values = brown_palette) +
labs(title = "Pie Chart of Results",
fill = "Result") +
theme_classic() +
theme(
axis.title = element_blank(),
axis.text = element_blank(),
axis.ticks = element_blank(),
panel.grid = element_blank(),
plot.title = element_text(hjust = 0.5, size = 16, face = "bold")
)
# 3. Shot Conversion Rate Bar Chart
# Calculate shot conversion rates
conversion_rates <- data %>%
summarise(
Home_Conversion = mean(Home_goals) / mean(Home_shots),
Away_Conversion = mean(Away_goals) / mean(Away_shots)
) %>%
pivot_longer(cols = everything(), names_to = "Team", values_to = "ConversionRate")
brown_palette <- c("Home_Conversion" = "#8B4513", # SaddleBrown
"Away_Conversion" = "#D2B48C")
# Create the bar chart
ggplot(conversion_rates, aes(x = Team, y = ConversionRate, fill = Team)) +
geom_bar(stat = "identity", alpha = 0.8) +
geom_text(aes(label = scales::percent(ConversionRate, accuracy = 0.01)), vjust = -0.5) +
labs(title = "Shot Conversion Rate for Home and Away Teams - 1000x",
x = "Team",
y = "Conversion Rate") +
scale_y_continuous(labels = scales::percent_format(accuracy = 0.01)) +
scale_fill_manual(values = brown_palette) + # Apply the custom brown palette
theme_classic()
results_list_5000 <- replicate(
n = 5000,
expr = ss(
home_final_markov_chain = home_final_markov_chain,
away_final_markov_chain = away_final_markov_chain,
home_final_table = home_final_table,
away_final_table = away_final_table,
away_team = away_team,
home_team = home_team
),
simplify = FALSE
)
Result_5000 <- Results_df(results_list_5000,5000)
Result_5000 <- Result_5000 |>
mutate(result = ifelse(goal_diff > 0, 'win',
ifelse(goal_diff == 0, 'tie', 'loss')))
data <- Result_5000
# 1. Distribution of Goal Difference
# Create a histogram for goal_diff
ggplot(data, aes(x = goal_diff)) +
geom_histogram(binwidth = 1, fill = "saddlebrown", color = "black", alpha = 0.7) +
labs(title = "Distribution of Goal Difference - 5000x (NSH vs SJS)",
x = "Goal Difference",
y = "Frequency") +
theme_classic()
# 2. Pie Chart for Result Variable
# Create a table for result variable
result_table <- data %>%
count(result) %>%
mutate(percentage = n / sum(n))
# Create the pie chart
library(ggplot2)
library(scales)
# Define a custom brown color palette
brown_palette <- c("win" = "#8B4513", # SaddleBrown
"tie" = "#A0522D", # Sienna
"loss" = "#D2B48C") # Tan
# Create the pie chart with the brown palette
ggplot(result_table, aes(x = "", y = percentage, fill = as.factor(result))) +
geom_bar(stat = "identity", width = 1, color = "white") +
coord_polar(theta = "y") +
scale_y_continuous(labels = percent) +
geom_text(aes(label = scales::percent(percentage, accuracy = 0.1)),
position = position_stack(vjust = 0.5),
color = "white",
size = 5) +
scale_fill_manual(values = brown_palette) +
labs(title = "Pie Chart of Results",
fill = "Result") +
theme_classic() +
theme(
axis.title = element_blank(),
axis.text = element_blank(),
axis.ticks = element_blank(),
panel.grid = element_blank(),
plot.title = element_text(hjust = 0.5, size = 16, face = "bold")
)
# 3. Shot Conversion Rate Bar Chart
# Calculate shot conversion rates
conversion_rates <- data %>%
summarise(
Home_Conversion = mean(Home_goals) / mean(Home_shots),
Away_Conversion = mean(Away_goals) / mean(Away_shots)
) %>%
pivot_longer(cols = everything(), names_to = "Team", values_to = "ConversionRate")
brown_palette <- c("Home_Conversion" = "#8B4513", # SaddleBrown
"Away_Conversion" = "#D2B48C")
# Create the bar chart
ggplot(conversion_rates, aes(x = Team, y = ConversionRate, fill = Team)) +
geom_bar(stat = "identity", alpha = 0.8) +
geom_text(aes(label = scales::percent(ConversionRate, accuracy = 0.01)), vjust = -0.5) +
labs(title = "Shot Conversion Rate for Home and Away Teams - 5000x",
x = "Team",
y = "Conversion Rate") +
scale_y_continuous(labels = scales::percent_format(accuracy = 0.01)) +
scale_fill_manual(values = brown_palette) + # Apply the custom brown palette
theme_classic()