library(tidyr)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(openxlsx)
library(ggthemes)
library(plotly)
## Loading required package: ggplot2
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout

Men’s Ice Hockey 2024 Division 1 Teams Data

MIH_Teams_2024 <- read.csv("/Users/anuragreddy/Desktop/MCP/MIH_2024_teamstats_div1.csv")
#View(MIH_Teams_2024)

SSR - Special Situation Rating.

Special Situation Rating (SSR) = Disadvantage Play Efficiency (DPE) + Disadvantage Kill Efficiency (DKE)

Disadvantage Play Efficiency - Measures how often a team scores when they are on disadvantage (i.e., the team is shorthanded due to their own penalties).

The formula for calculating Disadvanatge Play Efficiency (PPE) is:

\[ DPE = \left( \frac{\text{Short Handed Goals}}{\text{Short Handed Shots}} \right) \times 100 \]

Disadvantage Kill Efficiency - Measures how often a team successfully prevents they are on disadvantage (i.e., the team is shorthanded due to their own penalties).

The formula for calculating Penalty Kill Efficiency (PKE) is:

\[ DKE = \left( 1-\frac{\text{Short Handed Goals Allowed}}{\text{Total Penalities}} \right) \times 100 \]

Special Situation Rating (SSR) - Single metric to evaluate how well a team performs on special teams.

The formula for calculating Special Teams Efficiency (STE) is:

\[ SSR = DPE + DKE \]

New_data <- MIH_Teams_2024 |>
  select(team_name,Points,wins,Short.Handed.Shots,Short.Handed.Goals,Shorthanded.Goals.Allowed,Penalties,Power.Play.Percentage)|>
  mutate(across(everything(), ~replace_na(., 0)))|>
  mutate(DPE = 100*Short.Handed.Goals/Short.Handed.Shots,
         DKE = 100*(1 - (Shorthanded.Goals.Allowed/Penalties)),
         SSR = DPE + DKE) |>
  select(team_name,Points,wins,Short.Handed.Shots,Short.Handed.Goals,Shorthanded.Goals.Allowed,Penalties,DPE,DKE,SSR,Power.Play.Percentage)

But can we really add these metrics?

I guess not – while we can mathematically add them, it’s hard to interpret. However, if we normalize them by dividing each metric by the league average, then adding them would make more sense.

league_DPE <- 100*sum(New_data$Short.Handed.Goals)/sum(New_data$Short.Handed.Shots)
league_DKE <- 100*(1- (sum(New_data$Shorthanded.Goals.Allowed)/sum(New_data$Penalties)))
New_data <- New_data |>
  mutate(Norm_DPE = DPE/league_DPE,
         Norm_DKE = DKE/league_DKE,
         Norm_SSR = Norm_DPE+Norm_DKE,
         Category = ifelse(Norm_SSR > 2, "Better Performers", "Avg Performers"))

Interpretation: Norm_SSR value greater than 2 would suggest your team is performing better than average in both categories, while a value below 2 might indicate average or below-average performance.

Visualizing the quality of teams at short handed

p <- ggplot(New_data, aes(x = team_name, y = Norm_SSR, color = Category)) +
  geom_point(size = 3) +
  geom_hline(yintercept = 2, linetype = "dashed", color = "black") +
  labs(title = "Norm SSR of Teams",
       x = "Team Name",
       y = "Norm SSR") +
  scale_color_manual(values = c("Better Performers" = "green", "Avg Performers" = "red")) +
  scale_x_discrete(expand = expansion(add = c(0.5, 0.5))) +  # Add space between ticks
  theme_classic() +
  theme(axis.text.x = element_blank(), 
        axis.ticks.x = element_blank())
  
ggplotly(p)

Ranking the teams based on Norm_SSR.

New_data <- New_data |>
  mutate(Original_rank  = rank(-Points, ties.method = 'min'),
         New_rank = rank(-Norm_SSR, ties.method = 'min'))

Original Ranking vs New Ranking

correlation_value <- cor(New_data$Original_rank,New_data$New_rank)
q <- ggplot(New_data, aes(x = Original_rank, y = New_rank)) +
  geom_point(size = 2) +
  labs(title = "Norm SSR of Teams",
       x = "Original Rank",
       y = "New Rank") +
  annotate("text", 
           x = 30,  
           y = 62,       
           label = paste("Correlation:", round(correlation_value, 2)),
           size = 5, 
           color = "red", 
           fontface = "bold") +
  theme_classic()
  
ggplotly(q)

Interpretation: It clearly shows that the newly calculated metric isn’t performing well in ranking the teams.

Let’s find the correlation between the New Rank and other perfromance indicators like -

  1. Points
  2. Wins
  3. Short handed goals scored
  4. Short handed goals allowed
  5. Power Play Efficiency.
  1. New Rank vs Points.
correlation_value <- cor(New_data$Points,New_data$New_rank)
q <- ggplot(New_data, aes(x = Points, y = New_rank)) +
  geom_point(size = 2) +
  labs(title = "New Rank vs Points",
       x = "Points",
       y = "New Rank") +
  annotate("text", 
           x = 500,  
           y = 62,       
           label = paste("Correlation:", round(correlation_value, 2)),
           size = 5, 
           color = "red", 
           fontface = "bold") +
  theme_classic()
  
ggplotly(q)

Interpretation: It clearly shows that there is a weak negative correlation associated between the new ranks and points. Norm_SSR metric isn’t showing any significant relation with points scored by a team.

  1. New Rank vs Wins
correlation_value <- cor(New_data$wins,New_data$New_rank)
q <- ggplot(New_data, aes(x = wins, y = New_rank)) +
  geom_point(size = 2) +
  labs(title = "Wins vs New Rank",
       x = "Wins",
       y = "New Rank") +
  annotate("text", 
           x = 30,  
           y = 62,       
           label = paste("Correlation:", round(correlation_value, 2)),
           size = 5, 
           color = "red", 
           fontface = "bold") +
  theme_classic()
  
ggplotly(q)

Interpretation: It clearly shows that there is a weak negative correlation associated between the new ranks and wins. Norm_SSR metric isn’t showing any significant relation with wins secured by a team.

  1. New Rank vs Shot Handed Scored
correlation_value <- cor(New_data$Short.Handed.Goals,New_data$New_rank)
 q <- ggplot(New_data, aes(x = Short.Handed.Goals, y = New_rank)) +
  geom_point(size = 2) +
  labs(title = "SHG vs New Rank",
       x = "Short Handed Goals",
       y = "New Rank") +
   annotate("text", 
           x = 7.5,  
           y = 62,       
           label = paste("Correlation:", round(correlation_value, 2)),
           size = 5, 
           color = "red", 
           fontface = "bold") +
  theme_classic()
   

ggplotly(q)

Interpretation: It clearly shows that there is a strong negative correlation associated between the new ranks and short handed goals. Norm_SSR metric isn showing any significant relation with short handed goals scored by a team. Our metric calculates the offensive strength of the teams in un-preferred situation.

  1. New Rank vs Shots Goals Allowed
correlation_value <- cor(New_data$Shorthanded.Goals.Allowed,New_data$New_rank)
 q <- ggplot(New_data, aes(x = Shorthanded.Goals.Allowed, y = New_rank)) +
  geom_point(size = 2) +
  labs(title = "SHGA vs New Rank",
       x = "Short Handed Goals",
       y = "New Rank") +
   annotate("text", 
           x = 7.5,  
           y = 50,       
           label = paste("Correlation:", round(correlation_value, 2)),
           size = 5, 
           color = "red", 
           fontface = "bold") +
  theme_classic()
   

ggplotly(q)

Interpretation: It clearly shows that there is a weak positive correlation associated between the new ranks and short handed goals allowed. Norm_SSR metric isn’t showing any significant relation with wins secured by a team. Well the positive shows that the higher ranked (lower Norm_SSR) teams allow more goals when the team is playing with less than 5 players.

  1. New Rank vs Power.Play.Percentage
correlation_value <- cor(New_data$Power.Play.Percentage,New_data$New_rank)
 q <- ggplot(New_data, aes(x = Power.Play.Percentage, y = New_rank)) +
  geom_point(size = 2) +
  labs(title = "Power Play Percentage vs New Rank",
       x = "Power Play Percentage",
       y = "New Rank") +
   annotate("text", 
           x = 0.3,  
           y = 50,       
           label = paste("Correlation:", round(correlation_value, 2)),
           size = 5, 
           color = "red", 
           fontface = "bold") +
  theme_classic()
   
   
   

ggplotly(q)

Interpretation: It clearly shows that there is a weak negative correlation associated between the power play percentage and new rank. Norm_SSR metric isn’t showing any significant relation with wins secured by a team. Well the negtative shows that the higher ranked (lower Norm_SSR) teams have less offensive strenght.

Significance of Normalized Special Situation Rating:

The combination of normalized Disadvantage Play Efficiency (DPE) and Disadvantage Kill Efficiency (DKE) provides a more holistic view of a team’s performance while they are shorthanded, especially in special teams situations (penalty kill).

Interpretation of Norm SSR score:

Above League Average (Score > 2)

  • A combined score above 2 means the team is performing better than the league average in shorthanded situations.

  • High DPE and High DKE: The team excels both offensively (DPE) and defensively (DKE) while shorthanded.

  • High DPE, Low DKE: The team is strong at scoring shorthanded goals but struggles defensively in penalty kill situations.

  • Low DPE, High DKE: The team defends well but generates few offensive chances while shorthanded.

2. At League Average (Score ≈ 2)

  • A score around 2 suggests the team is performing at the league average in shorthanded situations, balancing offense and defense.

3. Below League Average (Score < 2)

  • A score below 2 indicates the team is underperforming compared to the league average.

  • Low DPE and Low DKE: The team struggles both offensively and defensively while shorthanded.

By comparing the combined score to 2, you get a more meaningful interpretation of how teams perform relative to the league average when both metrics are combined.