library(hockeyR)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(sportyR)
library(dplyr)
library(markovchain)
## Package:  markovchain
## Version:  0.9.5
## Date:     2023-09-24 09:20:02 UTC
## BugReport: https://github.com/spedygiorgio/markovchain/issues
## 
## 
## Attaching package: 'markovchain'
## 
## The following object is masked from 'package:lubridate':
## 
##     period
library(openxlsx)
library(stringr)
library(ggplot2)
library(dplyr)
library(tidyr)
library(scales)
## 
## Attaching package: 'scales'
## 
## The following object is masked from 'package:purrr':
## 
##     discard
## 
## The following object is masked from 'package:readr':
## 
##     col_factor
library(ggthemes)
library(plotly)
## 
## Attaching package: 'plotly'
## 
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## 
## The following object is masked from 'package:stats':
## 
##     filter
## 
## The following object is masked from 'package:graphics':
## 
##     layout
NHL_2023 <- load_pbp(2023)
NHL_2023 <- NHL_2023 |>
  mutate(points_outcome = ifelse(event=='Goal',1,0))
Add_PossesionRetained <- function(Data) {
  df_test <- Data |>
    add_column(PossessionRetained = 0)|>
    select(game_id,event_id,event_team_abbr,event,description,PossessionRetained,points_outcome
           ,home_abbreviation,away_abbreviation,ordinal_num,home_score,away_score
           ,event_player_1_name,event_player_1_type,event_player_2_name,event_player_2_type
           ,event_player_3_name,event_player_3_type,period_seconds,period_time,x,y,x_fixed,y_fixed,shot_distance, shot_angle,date_time) 
  
  df1 <- df_test |>
    mutate(PossessionRetained = case_when(
      event %in% c('Faceoff', 'Takeaway') ~ 1,  # Possession retained
      event %in% c('Giveaway', 'Penalty','Goal') ~ 0,  # Possession lost
      TRUE ~ NA_real_  # Handle other cases with NA or a default value
    ))
  
  # Create a shifted version of event_team_abbr and event for the next row
  df1 <- df1 |>
    mutate(next_event_team_abbr = lead(event_team_abbr),
           next_event = lead(event))
  
  # Update PossessionRetained based on the next event using vectorized conditions
  df1 <- df1 |>
    mutate(PossessionRetained = case_when(
      event %in% c('Blocked Shot', 'Shot', 'Missed Shot', 'Hit') ~ 
        case_when(
          # Handle case where next_event_team_abbr is NA
          is.na(next_event_team_abbr) ~ 0,  # Default to 0 if next_event_team_abbr is NA
          next_event_team_abbr == event_team_abbr ~
            case_when(
              next_event %in% c("Missed Shot", "Shot", "Goal", "Giveaway", 
                                "Failed Shot Attempt","Blocked Shot","Penalty") ~ 1,
              next_event %in% c( "Hit", "Faceoff", "Takeaway",
                                "Period End") ~ 0,
              TRUE ~ NA_real_  # For any unexpected events, return NA
            ),
          next_event_team_abbr != event_team_abbr ~
            case_when(
              next_event %in% c("Missed Shot", "Shot", "Goal", "Giveaway", 
                                "Failed Shot Attempt", "Period End","Blocked Shot"
                                ,"Faceoff") ~ 0,
              next_event %in% c("Hit", "Takeaway", "Penalty") ~ 1,
              TRUE ~ NA_real_  # For any unexpected events, return NA
            ),
          TRUE ~ NA_real_  # Catch any unhandled cases
        ),
      TRUE ~ PossessionRetained  # Retain existing values for other events
    ))
  
  # Clean up temporary columns
  df1 <- df1 |>
    select(-next_event_team_abbr, -next_event)
  return(df1)
  
}
events_to_keep <- c(
  "Faceoff", "Hit", "Shot", "Missed Shot", 
  "Giveaway", "Blocked Shot", "Goal", 
  "Takeaway", "Penalty", "Failed Shot Attempt"
)
NHL_2023 <- NHL_2023 |> filter(event %in% events_to_keep)
NHL_2023_Possessions <- Add_PossesionRetained(NHL_2023)
NHL_2023_Possessions <- NHL_2023_Possessions |>
  filter(!is.na(NHL_2023_Possessions$PossessionRetained))
home_team <- 'EDM'
away_team <- 'VGK'
The markov_chain_cleansing function
processes a dataset of events to build a comprehensive Markov chain
model representing transitions between various event states across all
teams. It then isolates and extracts a transition matrix specific to a
given team by:
Concatenating relevant event information to define unique states.
Fitting a Markov chain to determine transition probabilities.
Filtering the transition matrix to include only states and transitions pertinent to the specified team.
markov_chain_cleansing <- function(data,team){
  df <- data
  concat_df <- paste(df$event_team_abbr,df$event,df$PossessionRetained,df$points_outcome,sep = ';')
  markov_chain_matrix <- markovchainFit(concat_df)$estimate@transitionMatrix
  row_names <- rownames(markov_chain_matrix)
  col_names <- colnames(markov_chain_matrix)
  
  first_part_rows <- sapply(str_split(row_names, ";"), "[", 1)
  first_part_cols <- sapply(str_split(col_names, ";"), "[", 1)
  
  row_index <- which(first_part_rows == team)
  col_index <- which(first_part_cols == team)
  
  markov_chain_matrix_cleansed <- markov_chain_matrix[row_index, col_index]
  return(markov_chain_matrix_cleansed)
}
Home_team_markov_matrices <- function(data, team) {
  # Filter the data for the specified home team
  Home_team_data <- data |> 
    filter(home_abbreviation == team)
  
  # Split the data by gameid
  split_data <- split(Home_team_data, Home_team_data$game_id)
  
  # Apply the markov_chain_cleansing function to each game
  markov_chain_list <- lapply(split_data, function(game_data) {
    markov_chain_cleansing(game_data, team)
  })
  
  return(markov_chain_list)
}
Away_team_markov_matrices <- function(data, team) {
  # Filter the data for the specified home team
  Away_team_data <- data |> 
    filter(away_abbreviation == team)
  
  # Split the data by gameid
  split_data <- split(Away_team_data, Away_team_data$game_id)
  
  # Apply the markov_chain_cleansing function to each game
  markov_chain_list <- lapply(split_data, function(game_data) {
    markov_chain_cleansing(game_data, team)
  })
  
  return(markov_chain_list)
}
Home_team_markov_matrices <- Home_team_markov_matrices(NHL_2023_Possessions, home_team)
Away_team_markov_matrices <- Away_team_markov_matrices(NHL_2023_Possessions, away_team)
simplify_matrix <- function(markov_chain_matrices){
  xx <- markov_chain_matrices[as.logical(rowSums(markov_chain_matrices !=0,na.rm = TRUE)), ]
  xx <- xx[,as.logical(colSums(xx!=0,na.rm=TRUE))]
  return(xx)
}
Home_team_markov_matrices <- lapply(Home_team_markov_matrices, simplify_matrix)
Away_team_markov_matrices <- lapply(Away_team_markov_matrices, simplify_matrix)
chome <- unique(unlist(lapply(Home_team_markov_matrices, colnames)))
rhome <- unique(unlist(lapply(Home_team_markov_matrices, rownames)))
temp_home_master_markov <- matrix(0,nrow = length(rhome), ncol = length(chome), dimnames = list(rhome,chome))
home_master_markov <- rep(list(temp_home_master_markov),length(Home_team_markov_matrices))
temp_index_home_list <- lapply(Home_team_markov_matrices, function(x) outer(rownames(x), colnames(x), paste))
index_home_list <- lapply(temp_index_home_list, function(x) outer(rhome, chome, paste) %in% x)
home_team_game_dates <- function(data, team) {
  # Filter the data for the specified home team
  Home_team_data <- data |> 
    filter(home_abbreviation == team ,ordinal_num == '1st')
  
  
  Home_team_data <- Home_team_data |>
    mutate(date_only = as.Date(date_time)) |>
    select(game_id,date_only) |>
    group_by(game_id)
  
  
  return(unique(Home_team_data$date_only))
}
today <- Sys.Date()
home_team_game_dates <- home_team_game_dates(NHL_2023_Possessions,home_team)
home_team_game_weights <- exp(as.numeric(home_team_game_dates-today))/sum(exp(as.numeric(home_team_game_dates-today)))
for (element in 1:length(home_master_markov)) {
  for (row in 1:nrow(Home_team_markov_matrices[[element]])) {
    row_name <- rownames(Home_team_markov_matrices[[element]])[row]
    column_names <- names(which(Home_team_markov_matrices[[element]][row,] != 0))
    for (column in 1:length(column_names)) {
      home_master_markov[[element]][which(rownames(home_master_markov[[element]])==row_name)
                                    ,which(colnames(home_master_markov[[element]])==column_names[column])] <-
        Home_team_markov_matrices[[element]][row,which(colnames(Home_team_markov_matrices[[element]])==column_names[column])] * home_team_game_weights[element] 
      
    }
    
  }
  
}
home_final_markov_chain <- Reduce("+",home_master_markov )
home_final_markov_chain <- simplify_matrix(home_final_markov_chain)
caway <- unique(unlist(lapply(Away_team_markov_matrices, colnames)))
raway <- unique(unlist(lapply(Away_team_markov_matrices, rownames)))
temp_away_master_markov <- matrix(0,nrow = length(raway), ncol = length(caway), dimnames = list(raway,caway))
away_master_markov <- rep(list(temp_away_master_markov),length(Away_team_markov_matrices))
temp_index_away_list <- lapply(Away_team_markov_matrices, function(x) outer(rownames(x), colnames(x), paste))
index_away_list <- lapply(temp_index_away_list, function(x) outer(raway, caway, paste) %in% x)
away_team_game_dates <- function(data, team) {
  # Filter the data for the specified home team
  Away_team_data <- data |> 
    filter(away_abbreviation == team ,ordinal_num == '1st')
  
  
  Away_team_data <- Away_team_data |>
    mutate(date_only = as.Date(date_time)) |>
    select(game_id,date_only) |>
    group_by(game_id)
  
  
  return(unique(Away_team_data$date_only))
}
away_team_game_dates <- away_team_game_dates(NHL_2023_Possessions,away_team)
away_team_game_weights <- exp(as.numeric(away_team_game_dates-today))/sum(exp(as.numeric(away_team_game_dates-today)))
for (element in 1:length(away_master_markov)) {
  for (row in 1:nrow(Away_team_markov_matrices[[element]])) {
    row_name <- rownames(Away_team_markov_matrices[[element]])[row]
    column_names <- names(which(Away_team_markov_matrices[[element]][row,] != 0))
    for (column in 1:length(column_names)) { 
      away_master_markov[[element]][which(rownames(away_master_markov[[element]])==row_name)
                                    ,which(colnames(away_master_markov[[element]])==column_names[column])] <-
        Away_team_markov_matrices[[element]][row,which(colnames(Away_team_markov_matrices[[element]])==column_names[column])] *away_team_game_weights[element] 
    }
    
  }
}
away_final_markov_chain <- Reduce("+",away_master_markov)
away_final_markov_chain <- simplify_matrix(away_final_markov_chain)
all_home_colnames <- unlist(lapply(Home_team_markov_matrices, rownames))
home_colnames_df <- data.frame(column_name = all_home_colnames, stringsAsFactors = FALSE)
home_final_table <- home_colnames_df |>
  count(column_name, name = "freq") 
home_final_table <- home_final_table[home_final_table$column_name %in% rownames(home_final_markov_chain),]
home_final_table <- home_final_table |>
  arrange(desc(freq)) |>
  mutate(prob = freq/sum(freq),
         cumprob = cumsum(prob))
all_away_colnames <- unlist(lapply(Away_team_markov_matrices, rownames))
away_colnames_df <- data.frame(column_name = all_away_colnames, stringsAsFactors = FALSE)
away_final_table <- away_colnames_df |>
  count(column_name, name = "freq") 
away_final_table <- away_final_table[away_final_table$column_name %in% rownames(away_final_markov_chain),]
away_final_table <- away_final_table |>
  arrange(desc(freq)) |>
  mutate(prob = freq/sum(freq),
         cumprob = cumsum(prob)) 
Do these updates:
1. After goal the next event should be faceoff.
ss <- function(home_final_markov_chain, away_final_markov_chain, home_final_table, away_final_table, away_team, home_team){
  
  simulated_final_game <- data.frame(
    home_team = character(),
    away_team = character(),
    event_team = character(),
    event = character(),
    PossessionRetained = integer(),
    points_outcome = integer(),
    stringsAsFactors = FALSE
  )
  
  # Function to select and add a faceoff play
  add_faceoff <- function(sim_game, home_table, away_table, home_team, away_team){
    options <- c('home','away')
    select_faceoff_team <- sample(options, 1)
    if(select_faceoff_team == 'home'){
      faceoff_row <- home_table[grepl("faceoff", home_table$column_name, ignore.case = TRUE), ]
      first_play_parsed <- c(home_team, away_team, 
                             unlist(str_split(as.character(faceoff_row$column_name), ';')))
    }
    else{
      faceoff_row <- away_table[grepl("faceoff", away_table$column_name, ignore.case = TRUE), ]
      first_play_parsed <- c(home_team, away_team, 
                             unlist(str_split(as.character(faceoff_row$column_name), ';')))
    }
    if(length(first_play_parsed) != ncol(sim_game)){
      stop("Mismatch in first_play_parsed length")
    }
    first_play_df <- t(as.data.frame(first_play_parsed, stringsAsFactors = FALSE))
    colnames(first_play_df) <- colnames(sim_game)
    rownames(first_play_df) <- NULL
    sim_game <- rbind(sim_game, first_play_df)
    return(sim_game)
  }
  
  # Initialize the game with a faceoff
  simulated_final_game <- add_faceoff(simulated_final_game, home_final_table, away_final_table,
                                      home_team, away_team)
  
  while (nrow(simulated_final_game) <= 200) {
    # Find last row
    last_row <- simulated_final_game[nrow(simulated_final_game), ]
    
    # Check if the last event was a goal
    if(tolower(last_row$event) == "goal"){
      # Start from faceoff
      simulated_final_game <- add_faceoff(simulated_final_game, home_final_table,
                                          away_final_table, home_team, away_team)
      
      # Proceed to next iteration
      next
    }
    
    # Concatenate the relevant columns to form the state
    last_row_concatenated <- paste(as.character(last_row[3:ncol(last_row)]), sep=';', 
                                   collapse = ';')
    
    # Determine the next play based on possession and event team
    if(last_row$PossessionRetained == 1 & last_row$event_team == home_team){
      # Home team retains possession
      markov_chain <- home_final_markov_chain
      current_markov <- "home"
    } 
    else if(last_row$PossessionRetained == 0 & last_row$event_team == home_team) {
      # Possession changes to away team
      current_table <- away_final_table
    }
    else if(last_row$PossessionRetained == 1 & last_row$event_team == away_team) {
      # Away team retains possession
      markov_chain <- away_final_markov_chain
      current_markov <- "away"
    } 
    else if(last_row$PossessionRetained == 0 & last_row$event_team == away_team) {
      # Possession changes to home team
      current_table <- home_final_table
    }
    
    # Proceed only if last event was not a goal
    if(tolower(last_row$event) != "goal"){
      if(last_row$PossessionRetained == 1){
        # Find probability of the next event from the current markov chain
        row_to_inspect <- which(rownames(markov_chain) == last_row_concatenated)
        if(length(row_to_inspect) == 0){
          stop(paste("State not found in", current_markov, "markov chain:", last_row_concatenated))
        }
        options_to_inspect <- as.data.frame(markov_chain[row_to_inspect, ], stringsAsFactors = FALSE)
        options_to_inspect <- options_to_inspect[which(markov_chain[row_to_inspect, ] > 0), , drop = FALSE]
        colnames(options_to_inspect) <- c('freq')
        options_to_inspect$prob <- options_to_inspect$freq / sum(options_to_inspect$freq)
        options_to_inspect <- options_to_inspect[order(-options_to_inspect$prob), ]
        options_to_inspect$cumprob <- round(cumsum(options_to_inspect$prob), 6)
        
        # Randomly select next play
        next_play <- runif(1)
        selected_state <- rownames(options_to_inspect)[min(which(options_to_inspect$cumprob >= next_play))]
        next_play_parsed <- c(home_team, away_team, unlist(str_split(as.character(selected_state), ';')))
        
        # Add the selected play to the simulation
        next_play_df <- t(as.data.frame(next_play_parsed, stringsAsFactors = FALSE))
        colnames(next_play_df) <- colnames(simulated_final_game)
        rownames(next_play_df) <- NULL
        simulated_final_game <- rbind(simulated_final_game, next_play_df)
      } 
      else {
        # Possession changes to the opponent team
        # Select next play based on the opponent's table cumulative probabilities
        next_play <- runif(1)
        selected_row <- which(next_play <= current_table$cumprob)[1]
        if(is.na(selected_row)){
          stop("No valid play found for the opponent's cumulative probability.")
        }
        next_play_parsed <- c(home_team, away_team, unlist(str_split(as.character(current_table$column_name[selected_row]), ';')))
        
        # Add the selected play to the simulation
        next_play_df <- t(as.data.frame(next_play_parsed, stringsAsFactors = FALSE))
        colnames(next_play_df) <- colnames(simulated_final_game)
        rownames(next_play_df) <- NULL
        simulated_final_game <- rbind(simulated_final_game, next_play_df)
      }
    }
  }
  
  Home_Shots_taken <- simulated_final_game |>
    filter(event_team == home_team & 
             event %in% c("Shot", "Missed Shot", "Blocked Shot", "Goal"))
  Away_Shots_taken <- simulated_final_game |>
    filter(event_team == away_team & 
             event %in% c("Shot", "Missed Shot", "Blocked Shot", "Goal"))
  
  Home_goals <- sum(as.numeric(simulated_final_game$points_outcome[
    which(simulated_final_game$event_team == home_team)]))
  Away_goals <- sum(as.numeric(simulated_final_game$points_outcome[
    which(simulated_final_game$event_team == away_team)]))
          
  goal_diff <- Home_goals - Away_goals
  
  # Return Data frame 
  return(data.frame(
  goal_diff = goal_diff,
  Home_goals = Home_goals,
  Away_goals = Away_goals,
  Home_shots = nrow(Home_Shots_taken),
  Away_shots = nrow(Away_Shots_taken),
  stringsAsFactors = FALSE
))
  
}
Faceoff Selection: After a goal, the simulation resets by selecting a faceoff play, mimicking real-game scenarios where play resumes from a faceoff after a goal.
Possession Logic: The possession logic remains intact. If possession is retained or changes, the simulation continues based on the Markov chains.
Flexibility: The helper function
add_faceoff allows for easy modifications
in the future, such as changing how faceoffs are handled or adding more
complexity.
Error Handling: Proper error messages help in debugging issues related to state mismatches or invalid probability selections.
results_list <- replicate(
  n = 100,
  expr = ss(
    home_final_markov_chain = home_final_markov_chain,
    away_final_markov_chain = away_final_markov_chain,
    home_final_table = home_final_table,
    away_final_table = away_final_table,
    away_team = away_team,
    home_team = home_team
  ),
  simplify = FALSE
)
Results_df <- function(result,n){
  results_df <- bind_rows(result)
  results_df <- results_df %>%
    mutate(simulation = 1:n)
  results_df <- results_df %>%
    select(simulation, everything())
    return(results_df)
}
Result_100 <- Results_df(results_list,100)
Result_100 <- Result_100 |>
  mutate(result = ifelse(goal_diff > 0, 'win',
                         ifelse(goal_diff == 0, 'tie', 'loss')))
data <- Result_100
# 1. Distribution of Goal Difference
# Create a histogram for goal_diff
ggplot(data, aes(x = goal_diff)) +
  geom_histogram(binwidth = 1, fill = "saddlebrown", color = "black", alpha = 0.7) +
  labs(title = "Distribution of Goal Difference - 100x (NSH vs SJS)",
       x = "Goal Difference",
       y = "Frequency") +
  theme_classic()
# 2. Pie Chart for Result Variable
# Create a table for result variable
result_table <- data %>% 
  count(result) %>% 
  mutate(percentage = n / sum(n))
# Create the pie chart
library(ggplot2)
library(scales)
# Define a custom brown color palette
brown_palette <- c("win" = "#8B4513",   # SaddleBrown
                   "tie" = "#A0522D",   # Sienna
                   "loss" = "#D2B48C")  # Tan
# Create the pie chart with the brown palette
ggplot(result_table, aes(x = "", y = percentage, fill = as.factor(result))) +
  geom_bar(stat = "identity", width = 1, color = "white") +
  coord_polar(theta = "y") +
  scale_y_continuous(labels = percent) +
  geom_text(aes(label = scales::percent(percentage, accuracy = 0.1)), 
            position = position_stack(vjust = 0.5), 
            color = "white", 
            size = 5) +
  scale_fill_manual(values = brown_palette) +
  labs(title = "Pie Chart of Results",
       fill = "Result") +
  theme_classic() +
  theme(
    axis.title = element_blank(),
    axis.text = element_blank(),
    axis.ticks = element_blank(),
    panel.grid  = element_blank(),
    plot.title = element_text(hjust = 0.5, size = 16, face = "bold")
  )
# 3. Shot Conversion Rate Bar Chart
# Calculate shot conversion rates
conversion_rates <- data %>%
  summarise(
    Home_Conversion = mean(Home_goals) / mean(Home_shots),
    Away_Conversion = mean(Away_goals) / mean(Away_shots)
  ) %>%
  pivot_longer(cols = everything(), names_to = "Team", values_to = "ConversionRate")
brown_palette <- c("Home_Conversion" = "#8B4513",   # SaddleBrown
                   "Away_Conversion" = "#D2B48C")
# Create the bar chart
ggplot(conversion_rates, aes(x = Team, y = ConversionRate, fill = Team)) +
  geom_bar(stat = "identity", alpha = 0.8) +
  geom_text(aes(label = scales::percent(ConversionRate, accuracy = 0.01)), vjust = -0.5) +
  labs(title = "Shot Conversion Rate for Home and Away Teams - 100x",
       x = "Team",
       y = "Conversion Rate") +
  scale_y_continuous(labels = scales::percent_format(accuracy = 0.01)) +
  scale_fill_manual(values = brown_palette) +  # Apply the custom brown palette
  theme_classic()
results_list_500 <- replicate(
  n = 500,
  expr = ss(
    home_final_markov_chain = home_final_markov_chain,
    away_final_markov_chain = away_final_markov_chain,
    home_final_table = home_final_table,
    away_final_table = away_final_table,
    away_team = away_team,
    home_team = home_team
  ),
  simplify = FALSE
)
Result_500 <- Results_df(results_list_500,500)
Result_500 <- Result_500 |>
  mutate(result = ifelse(goal_diff > 0, 'win',
                         ifelse(goal_diff == 0, 'tie', 'loss')))
data <- Result_500
# 1. Distribution of Goal Difference
# Create a histogram for goal_diff
ggplot(data, aes(x = goal_diff)) +
  geom_histogram(binwidth = 1, fill = "saddlebrown", color = "black", alpha = 0.7) +
  labs(title = "Distribution of Goal Difference - 500x (NSH vs SJS)",
       x = "Goal Difference",
       y = "Frequency") +
  theme_classic()
# 2. Pie Chart for Result Variable
# Create a table for result variable
result_table <- data %>% 
  count(result) %>% 
  mutate(percentage = n / sum(n))
# Create the pie chart
library(ggplot2)
library(scales)
# Define a custom brown color palette
brown_palette <- c("win" = "#8B4513",   # SaddleBrown
                   "tie" = "#A0522D",   # Sienna
                   "loss" = "#D2B48C")  # Tan
# Create the pie chart with the brown palette
ggplot(result_table, aes(x = "", y = percentage, fill = as.factor(result))) +
  geom_bar(stat = "identity", width = 1, color = "white") +
  coord_polar(theta = "y") +
  scale_y_continuous(labels = percent) +
  geom_text(aes(label = scales::percent(percentage, accuracy = 0.1)), 
            position = position_stack(vjust = 0.5), 
            color = "white", 
            size = 5) +
  scale_fill_manual(values = brown_palette) +
  labs(title = "Pie Chart of Results",
       fill = "Result") +
  theme_classic() +
  theme(
    axis.title = element_blank(),
    axis.text = element_blank(),
    axis.ticks = element_blank(),
    panel.grid  = element_blank(),
    plot.title = element_text(hjust = 0.5, size = 16, face = "bold")
  )
# 3. Shot Conversion Rate Bar Chart
# Calculate shot conversion rates
conversion_rates <- data %>%
  summarise(
    Home_Conversion = mean(Home_goals) / mean(Home_shots),
    Away_Conversion = mean(Away_goals) / mean(Away_shots)
  ) %>%
  pivot_longer(cols = everything(), names_to = "Team", values_to = "ConversionRate")
brown_palette <- c("Home_Conversion" = "#8B4513",   # SaddleBrown
                   "Away_Conversion" = "#D2B48C")
# Create the bar chart
ggplot(conversion_rates, aes(x = Team, y = ConversionRate, fill = Team)) +
  geom_bar(stat = "identity", alpha = 0.8) +
  geom_text(aes(label = scales::percent(ConversionRate, accuracy = 0.01)), vjust = -0.5) +
  labs(title = "Shot Conversion Rate for Home and Away Teams - 500x",
       x = "Team",
       y = "Conversion Rate") +
  scale_y_continuous(labels = scales::percent_format(accuracy = 0.01)) +
  scale_fill_manual(values = brown_palette) +  # Apply the custom brown palette
  theme_classic()
results_list_1000 <- replicate(
  n = 1000,
  expr = ss(
    home_final_markov_chain = home_final_markov_chain,
    away_final_markov_chain = away_final_markov_chain,
    home_final_table = home_final_table,
    away_final_table = away_final_table,
    away_team = away_team,
    home_team = home_team
  ),
  simplify = FALSE
)
Result_1000 <- Results_df(results_list_1000,1000)
Result_1000 <- Result_1000 |>
  mutate(result = ifelse(goal_diff > 0, 'win',
                         ifelse(goal_diff == 0, 'tie', 'loss')))
data <- Result_1000
# 1. Distribution of Goal Difference
# Create a histogram for goal_diff
ggplot(data, aes(x = goal_diff)) +
  geom_histogram(binwidth = 1, fill = "saddlebrown", color = "black", alpha = 0.7) +
  labs(title = "Distribution of Goal Difference - 1000x (NSH vs SJS)",
       x = "Goal Difference",
       y = "Frequency") +
  theme_classic()
# 2. Pie Chart for Result Variable
# Create a table for result variable
result_table <- data %>% 
  count(result) %>% 
  mutate(percentage = n / sum(n))
# Create the pie chart
library(ggplot2)
library(scales)
# Define a custom brown color palette
brown_palette <- c("win" = "#8B4513",   # SaddleBrown
                   "tie" = "#A0522D",   # Sienna
                   "loss" = "#D2B48C")  # Tan
# Create the pie chart with the brown palette
ggplot(result_table, aes(x = "", y = percentage, fill = as.factor(result))) +
  geom_bar(stat = "identity", width = 1, color = "white") +
  coord_polar(theta = "y") +
  scale_y_continuous(labels = percent) +
  geom_text(aes(label = scales::percent(percentage, accuracy = 0.1)), 
            position = position_stack(vjust = 0.5), 
            color = "white", 
            size = 5) +
  scale_fill_manual(values = brown_palette) +
  labs(title = "Pie Chart of Results",
       fill = "Result") +
  theme_classic() +
  theme(
    axis.title = element_blank(),
    axis.text = element_blank(),
    axis.ticks = element_blank(),
    panel.grid  = element_blank(),
    plot.title = element_text(hjust = 0.5, size = 16, face = "bold")
  )
# 3. Shot Conversion Rate Bar Chart
# Calculate shot conversion rates
conversion_rates <- data %>%
  summarise(
    Home_Conversion = mean(Home_goals) / mean(Home_shots),
    Away_Conversion = mean(Away_goals) / mean(Away_shots)
  ) %>%
  pivot_longer(cols = everything(), names_to = "Team", values_to = "ConversionRate")
brown_palette <- c("Home_Conversion" = "#8B4513",   # SaddleBrown
                   "Away_Conversion" = "#D2B48C")
# Create the bar chart
ggplot(conversion_rates, aes(x = Team, y = ConversionRate, fill = Team)) +
  geom_bar(stat = "identity", alpha = 0.8) +
  geom_text(aes(label = scales::percent(ConversionRate, accuracy = 0.01)), vjust = -0.5) +
  labs(title = "Shot Conversion Rate for Home and Away Teams - 1000x",
       x = "Team",
       y = "Conversion Rate") +
  scale_y_continuous(labels = scales::percent_format(accuracy = 0.01)) +
  scale_fill_manual(values = brown_palette) +  # Apply the custom brown palette
  theme_classic()
results_list_5000 <- replicate(
  n = 5000,
  expr = ss(
    home_final_markov_chain = home_final_markov_chain,
    away_final_markov_chain = away_final_markov_chain,
    home_final_table = home_final_table,
    away_final_table = away_final_table,
    away_team = away_team,
    home_team = home_team
  ),
  simplify = FALSE
)
Result_5000 <- Results_df(results_list_5000,5000)
Result_5000 <- Result_5000 |>
  mutate(result = ifelse(goal_diff > 0, 'win',
                         ifelse(goal_diff == 0, 'tie', 'loss')))
data <- Result_5000
# 1. Distribution of Goal Difference
# Create a histogram for goal_diff
ggplot(data, aes(x = goal_diff)) +
  geom_histogram(binwidth = 1, fill = "saddlebrown", color = "black", alpha = 0.7) +
  labs(title = "Distribution of Goal Difference - 5000x (NSH vs SJS)",
       x = "Goal Difference",
       y = "Frequency") +
  theme_classic()
# 2. Pie Chart for Result Variable
# Create a table for result variable
result_table <- data %>% 
  count(result) %>% 
  mutate(percentage = n / sum(n))
# Create the pie chart
library(ggplot2)
library(scales)
# Define a custom brown color palette
brown_palette <- c("win" = "#8B4513",   # SaddleBrown
                   "tie" = "#A0522D",   # Sienna
                   "loss" = "#D2B48C")  # Tan
# Create the pie chart with the brown palette
ggplot(result_table, aes(x = "", y = percentage, fill = as.factor(result))) +
  geom_bar(stat = "identity", width = 1, color = "white") +
  coord_polar(theta = "y") +
  scale_y_continuous(labels = percent) +
  geom_text(aes(label = scales::percent(percentage, accuracy = 0.1)), 
            position = position_stack(vjust = 0.5), 
            color = "white", 
            size = 5) +
  scale_fill_manual(values = brown_palette) +
  labs(title = "Pie Chart of Results",
       fill = "Result") +
  theme_classic() +
  theme(
    axis.title = element_blank(),
    axis.text = element_blank(),
    axis.ticks = element_blank(),
    panel.grid  = element_blank(),
    plot.title = element_text(hjust = 0.5, size = 16, face = "bold")
  )
# 3. Shot Conversion Rate Bar Chart
# Calculate shot conversion rates
conversion_rates <- data %>%
  summarise(
    Home_Conversion = mean(Home_goals) / mean(Home_shots),
    Away_Conversion = mean(Away_goals) / mean(Away_shots)
  ) %>%
  pivot_longer(cols = everything(), names_to = "Team", values_to = "ConversionRate")
brown_palette <- c("Home_Conversion" = "#8B4513",   # SaddleBrown
                   "Away_Conversion" = "#D2B48C")
# Create the bar chart
ggplot(conversion_rates, aes(x = Team, y = ConversionRate, fill = Team)) +
  geom_bar(stat = "identity", alpha = 0.8) +
  geom_text(aes(label = scales::percent(ConversionRate, accuracy = 0.01)), vjust = -0.5) +
  labs(title = "Shot Conversion Rate for Home and Away Teams - 5000x",
       x = "Team",
       y = "Conversion Rate") +
  scale_y_continuous(labels = scales::percent_format(accuracy = 0.01)) +
  scale_fill_manual(values = brown_palette) +  # Apply the custom brown palette
  theme_classic()