library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.2     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.4.3     ✔ tibble    3.2.1
## ✔ lubridate 1.9.2     ✔ tidyr     1.3.0
## ✔ purrr     1.0.1     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the ]8;;http://conflicted.r-lib.org/conflicted package]8;; to force all conflicts to become errors
library(readxl)

Question: What relievers are underused / overused in high - leverage situations?

Data Source: Fangraphs

# Load data
leverage_table <- read_xlsx('Leverage Data.xlsx', sheet = 'leverage_data')
stats_table <- read_xlsx('Leverage Data.xlsx', sheet = 'standard_stats')

head(leverage_table)
## # A tibble: 6 × 6
##     `#` Name             Team    WPA  gmLI Clutch
##   <dbl> <chr>            <chr> <dbl> <dbl>  <dbl>
## 1     1 Trevor Gott      2 Tms -2.07  1.08  -1.43
## 2     2 Ian Gibaut       CIN    0.34  1.54  -0.21
## 3     3 Carlos Hernández KCR   -2.4   1.35  -1.85
## 4     4 Yimi García      TOR    0.07  1.38   0.24
## 5     5 Buck Farmer      CIN    0.38  1.29  -0.15
## 6     6 Sam Moll         2 Tms  0.17  1.35  -0.61
head(stats_table)
## # A tibble: 6 × 11
##     `#` Name          Team      G    GS    IP   TBF     H    BB    WP    SO
##   <dbl> <chr>         <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1     1 Tommy Hunter  NYM      14     0  23.2   106    28     5     0    20
## 2     2 Matt Bush     MIL      12     0  10.1    48    11     6     0    10
## 3     3 Adam Ottavino NYM      66     0  61.2   261    46    29     5    62
## 4     4 Matt Moore    3 Tms    50     0  52.2   218    46    15     1    60
## 5     5 Zack Greinke  KCR       3     0  11.1    50    13     2     0     9
## 6     6 Corey Kluber  BOS       6     0  13.1    65    23     3     0     8
# Join tables and clean
combined_table <- leverage_table %>%
  inner_join(stats_table, by = 'Name')

combined_table <- combined_table %>%
  select(-c('#.y', 'Team.y')) %>%
  filter(IP >= 17) %>%
  mutate(SO = SO/TBF) # Create strikeout rate

head(combined_table)
## # A tibble: 6 × 14
##   `#.x` Name       Team.x   WPA  gmLI Clutch     G    GS    IP   TBF     H    BB
##   <dbl> <chr>      <chr>  <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1     1 Trevor Go… 2 Tms  -2.07  1.08  -1.43    64     0  58     261    63    19
## 2     2 Ian Gibaut CIN     0.34  1.54  -0.21    74     0  75.2   318    69    28
## 3     3 Carlos He… KCR    -2.4   1.35  -1.85    63     0  63     273    57    30
## 4     4 Yimi Garc… TOR     0.07  1.38   0.24    73     0  66     282    67    15
## 5     5 Buck Farm… CIN     0.38  1.29  -0.15    71     0  75     309    58    29
## 6     6 Sam Moll   2 Tms   0.17  1.35  -0.61    69     0  61.1   264    46    30
## # ℹ 2 more variables: WP <dbl>, SO <dbl>
med_gmLI <- median(combined_table$gmLI)
med_SO <- median(combined_table$SO)

plot1 <- ggplot(data = combined_table) +
  geom_point(aes(x = SO, y = gmLI)) + 
  # Add horizontal line at the median of gmLI
  geom_hline(yintercept = med_gmLI, linetype = "dashed", color = "royalblue") +
  # Add vertical line at the median of SO
  geom_vline(xintercept = med_SO, linetype = "dashed", color = "royalblue") +
  labs(x = "SO", y = "gmLI", title = "gmLI vs K rate") +
  annotate('text', x = 0.125, y = 1.75, label = 'Overused in \n leverage situations') +
  annotate('text', x = 0.4, y = 0.25, label = 'Underused in \n leverage situations') +
  annotate('text', x = 0.38, y = 0.75, label = 'Trevor Megill', color = 'darkred') 

plot1

combined_table %>%
  filter(gmLI < 0.75 & SO > 0.35)
## # A tibble: 1 × 14
##   `#.x` Name       Team.x   WPA  gmLI Clutch     G    GS    IP   TBF     H    BB
##   <dbl> <chr>      <chr>  <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1   430 Trevor Me… MIL     0.38  0.62   0.27    29     0  32.2   137    31    11
## # ℹ 2 more variables: WP <dbl>, SO <dbl>