library(tidyr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(openxlsx)
library(ggthemes)
library(plotly)
## Loading required package: ggplot2
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
MIH_Teams_2024 <- read.csv("/Users/anuragreddy/Desktop/MCP/MIH_2024_teamstats_div1.csv")
#View(MIH_Teams_2024)
Disadvantage Play Efficiency - Measures how often a team scores when they are on disadvantage (i.e., the team is shorthanded due to their own penalties).
The formula for calculating Disadvanatge Play Efficiency (PPE) is:
\[ DPE = \left( \frac{\text{Short Handed Goals}}{\text{Short Handed Shots}} \right) \times 100 \]
Disadvantage Kill Efficiency - Measures how often a team successfully prevents they are on disadvantage (i.e., the team is shorthanded due to their own penalties).
The formula for calculating Penalty Kill Efficiency (PKE) is:
\[ DKE = \left( 1-\frac{\text{Short Handed Goals Allowed}}{\text{Total Penalities}} \right) \times 100 \]
Special Situation Rating (SSR) - Single metric to evaluate how well a team performs on special teams.
The formula for calculating Special Teams Efficiency (STE) is:
\[ SSR = DPE + DKE \]
New_data <- MIH_Teams_2024 |>
select(team_name,Points,wins,Short.Handed.Shots,Short.Handed.Goals,Shorthanded.Goals.Allowed,Penalties,Power.Play.Percentage)|>
mutate(across(everything(), ~replace_na(., 0)))|>
mutate(DPE = 100*Short.Handed.Goals/Short.Handed.Shots,
DKE = 100*(1 - (Shorthanded.Goals.Allowed/Penalties)),
SSR = DPE + DKE) |>
select(team_name,Points,wins,Short.Handed.Shots,Short.Handed.Goals,Shorthanded.Goals.Allowed,Penalties,DPE,DKE,SSR,Power.Play.Percentage)
league_DPE <- 100*sum(New_data$Short.Handed.Goals)/sum(New_data$Short.Handed.Shots)
league_DKE <- 100*(1- (sum(New_data$Shorthanded.Goals.Allowed)/sum(New_data$Penalties)))
New_data <- New_data |>
mutate(Norm_DPE = DPE/league_DPE,
Norm_DKE = DKE/league_DKE,
Norm_SSR = Norm_DPE+Norm_DKE,
Category = ifelse(Norm_SSR > 2, "Better Performers", "Avg Performers"))
Interpretation: Norm_SSR value greater than 2 would suggest your team is performing better than average in both categories, while a value below 2 might indicate average or below-average performance.
p <- ggplot(New_data, aes(x = team_name, y = Norm_SSR, color = Category)) +
geom_point(size = 3) +
geom_hline(yintercept = 2, linetype = "dashed", color = "black") +
labs(title = "Norm SSR of Teams",
x = "Team Name",
y = "Norm SSR") +
scale_color_manual(values = c("Better Performers" = "green", "Avg Performers" = "red")) +
scale_x_discrete(expand = expansion(add = c(0.5, 0.5))) + # Add space between ticks
theme_classic() +
theme(axis.text.x = element_blank(),
axis.ticks.x = element_blank())
ggplotly(p)
New_data <- New_data |>
mutate(Original_rank = rank(-Points, ties.method = 'min'),
New_rank = rank(-Norm_SSR, ties.method = 'min'))
Original Ranking vs New Ranking
correlation_value <- cor(New_data$Original_rank,New_data$New_rank)
q <- ggplot(New_data, aes(x = Original_rank, y = New_rank)) +
geom_point(size = 2) +
labs(title = "Norm SSR of Teams",
x = "Original Rank",
y = "New Rank") +
annotate("text",
x = 30,
y = 62,
label = paste("Correlation:", round(correlation_value, 2)),
size = 5,
color = "red",
fontface = "bold") +
theme_classic()
ggplotly(q)
Interpretation: It clearly shows that the newly calculated metric isn’t performing well in ranking the teams.
correlation_value <- cor(New_data$Points,New_data$New_rank)
q <- ggplot(New_data, aes(x = Points, y = New_rank)) +
geom_point(size = 2) +
labs(title = "New Rank vs Points",
x = "Points",
y = "New Rank") +
annotate("text",
x = 500,
y = 62,
label = paste("Correlation:", round(correlation_value, 2)),
size = 5,
color = "red",
fontface = "bold") +
theme_classic()
ggplotly(q)
Interpretation: It clearly shows that there is a weak negative correlation associated between the new ranks and points. Norm_SSR metric isn’t showing any significant relation with points scored by a team.
correlation_value <- cor(New_data$wins,New_data$New_rank)
q <- ggplot(New_data, aes(x = wins, y = New_rank)) +
geom_point(size = 2) +
labs(title = "Wins vs New Rank",
x = "Wins",
y = "New Rank") +
annotate("text",
x = 30,
y = 62,
label = paste("Correlation:", round(correlation_value, 2)),
size = 5,
color = "red",
fontface = "bold") +
theme_classic()
ggplotly(q)
Interpretation: It clearly shows that there is a weak negative correlation associated between the new ranks and wins. Norm_SSR metric isn’t showing any significant relation with wins secured by a team.
correlation_value <- cor(New_data$Short.Handed.Goals,New_data$New_rank)
q <- ggplot(New_data, aes(x = Short.Handed.Goals, y = New_rank)) +
geom_point(size = 2) +
labs(title = "SHG vs New Rank",
x = "Short Handed Goals",
y = "New Rank") +
annotate("text",
x = 7.5,
y = 62,
label = paste("Correlation:", round(correlation_value, 2)),
size = 5,
color = "red",
fontface = "bold") +
theme_classic()
ggplotly(q)
Interpretation: It clearly shows that there is a strong negative correlation associated between the new ranks and short handed goals. Norm_SSR metric isn showing any significant relation with short handed goals scored by a team. Our metric calculates the offensive strength of the teams in un-preferred situation.
correlation_value <- cor(New_data$Shorthanded.Goals.Allowed,New_data$New_rank)
q <- ggplot(New_data, aes(x = Shorthanded.Goals.Allowed, y = New_rank)) +
geom_point(size = 2) +
labs(title = "SHGA vs New Rank",
x = "Short Handed Goals",
y = "New Rank") +
annotate("text",
x = 7.5,
y = 50,
label = paste("Correlation:", round(correlation_value, 2)),
size = 5,
color = "red",
fontface = "bold") +
theme_classic()
ggplotly(q)
Interpretation: It clearly shows that there is a weak positive correlation associated between the new ranks and short handed goals allowed. Norm_SSR metric isn’t showing any significant relation with wins secured by a team. Well the positive shows that the higher ranked (lower Norm_SSR) teams allow more goals when the team is playing with less than 5 players.
correlation_value <- cor(New_data$Power.Play.Percentage,New_data$New_rank)
q <- ggplot(New_data, aes(x = Power.Play.Percentage, y = New_rank)) +
geom_point(size = 2) +
labs(title = "Power Play Percentage vs New Rank",
x = "Power Play Percentage",
y = "New Rank") +
annotate("text",
x = 0.3,
y = 50,
label = paste("Correlation:", round(correlation_value, 2)),
size = 5,
color = "red",
fontface = "bold") +
theme_classic()
ggplotly(q)
Interpretation: It clearly shows that there is a weak negative correlation associated between the power play percentage and new rank. Norm_SSR metric isn’t showing any significant relation with wins secured by a team. Well the negtative shows that the higher ranked (lower Norm_SSR) teams have less offensive strenght.
The combination of normalized Disadvantage Play Efficiency (DPE) and Disadvantage Kill Efficiency (DKE) provides a more holistic view of a team’s performance while they are shorthanded, especially in special teams situations (penalty kill).
A combined score above 2 means the team is performing better than the league average in shorthanded situations.
High DPE and High DKE: The team excels both offensively (DPE) and defensively (DKE) while shorthanded.
High DPE, Low DKE: The team is strong at scoring shorthanded goals but struggles defensively in penalty kill situations.
Low DPE, High DKE: The team defends well but generates few offensive chances while shorthanded.
A score below 2 indicates the team is underperforming compared to the league average.
Low DPE and Low DKE: The team struggles both offensively and defensively while shorthanded.
By comparing the combined score to 2, you get a more meaningful interpretation of how teams perform relative to the league average when both metrics are combined.