# Load necessary libraries
library(readxl)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(kableExtra)
## 
## Attaching package: 'kableExtra'
## The following object is masked from 'package:dplyr':
## 
##     group_rows
library(knitr)

# Define the file path
file_path <- "C:/Users/Franco Castagliuolo/OneDrive - Bentley University/Neers 24/Pitchers/AllGames/VT_Games.xlsx"

# Read the Excel file
games <- read_excel(file_path)

# Filter the dataset to include only rows where Vermont is pitching
VTP <- games %>% filter(PitcherTeam == "VER_MOU") #filter from dplyr to only include Vermont pitchers. %>% is pipe operator, passes result of one expression as input to the next expression, creating operation sequence. Key for efficiency

# vertical is the more dominant factor in effective velocity changes. That is why there is a minimal drop between LM and LO
FB_HI = 3 
FB_HM = 2 
FB_HO = 1  
FB_MI = 1
FB_MM = 0 
FB_MO = -1
FB_LI = -1 
FB_LM = -2  
FB_LO = -3



ELSE_HI = 1.5
ELSE_HM = 1
ELSE_HO = 0.5
ELSE_MI = 0.5
ELSE_MM = 0
ELSE_MO = -0.5
ELSE_LI  = -0.5
ELSE_LM = -1
ELSE_LO = -1.5


#Difference in FB and other pitches because FB are faster and effective velocity differences more pronounced.
#

# Define the strike zone thresholds
low_threshold <- 2.167 #low pitches in zone below this
middle_threshold <- 2.834 # middle between 2.167 and 2.834, up >2.834
horizontal_threshold <- 0.2833 #between +/-0.2833 is middle

# Add columns to determine pitch location category, velocity adjustment, and adjusted speed
VTP <- VTP %>%
  mutate( #mutatae to add new column VerticalLocation
    VerticalLocation = case_when( #case_when similar to ifelse
      PlateLocHeight < low_threshold ~ "Low",
      PlateLocHeight <= middle_threshold ~ "Middle",
      PlateLocHeight > middle_threshold ~ "High"
    ),
    HorizontalLocation = case_when( #same thing here but with R/L. Inside to R positive values, inside to L negative. Another ifelse statement essentially
      BatterSide == "Right" & PlateLocSide > horizontal_threshold ~ "Inside",
      BatterSide == "Right" & PlateLocSide >= -horizontal_threshold & PlateLocSide <= horizontal_threshold ~ "Middle",
      BatterSide == "Right" & PlateLocSide < -horizontal_threshold ~ "Outside",
      BatterSide == "Left" & PlateLocSide < -horizontal_threshold ~ "Inside",
      BatterSide == "Left" & PlateLocSide >= -horizontal_threshold & PlateLocSide <= horizontal_threshold ~ "Middle",
      BatterSide == "Left" & PlateLocSide > horizontal_threshold ~ "Outside"
    ),
    LocationCategory = paste(VerticalLocation, HorizontalLocation, sep = "_"), #combining the horizontal and vertical location categories into one variable (ex: High_Inside)
    VelocityAdjustment = case_when(
      AutoPitchType %in% c("Four-Seam", "Cutter", "Sinker") & LocationCategory == "High_Inside" ~ FB_HI,
      AutoPitchType %in% c("Four-Seam", "Cutter", "Sinker") & LocationCategory == "High_Middle" ~ FB_HM,
      AutoPitchType %in% c("Four-Seam", "Cutter", "Sinker") & LocationCategory == "High_Outside" ~ FB_HO,
      AutoPitchType %in% c("Four-Seam", "Cutter", "Sinker") & LocationCategory == "Middle_Inside" ~ FB_MI,
      AutoPitchType %in% c("Four-Seam", "Cutter", "Sinker") & LocationCategory == "Middle_Middle" ~ FB_MM,
      AutoPitchType %in% c("Four-Seam", "Cutter", "Sinker") & LocationCategory == "Middle_Outside" ~ FB_MO,
      AutoPitchType %in% c("Four-Seam", "Cutter", "Sinker") & LocationCategory == "Low_Inside" ~ FB_LI,
      AutoPitchType %in% c("Four-Seam", "Cutter", "Sinker") & LocationCategory == "Low_Middle" ~ FB_LM,
      AutoPitchType %in% c("Four-Seam", "Cutter", "Sinker") & LocationCategory == "Low_Outside" ~ FB_LO,
      LocationCategory == "High_Inside" ~ ELSE_HI,
      LocationCategory == "High_Middle" ~ ELSE_HM,
      LocationCategory == "High_Outside" ~ ELSE_HO,
      LocationCategory == "Middle_Inside" ~ ELSE_MI,
      LocationCategory == "Middle_Middle" ~ ELSE_MM,
      LocationCategory == "Middle_Outside" ~ ELSE_MO,
      LocationCategory == "Low_Inside" ~ ELSE_LI,
      LocationCategory == "Low_Middle" ~ ELSE_LM,
      LocationCategory == "Low_Outside" ~ ELSE_LO,
      TRUE ~ 0 #set to 0 if no conditions met
    ),
    AdjustedSpeed = RelSpeed + VelocityAdjustment 
  ) %>%
  select(1:Pitcher, LocationCategory, VelocityAdjustment, AdjustedSpeed, PlateLocSide, PlateLocHeight, everything())

# Display the dataset
View(VTP)
# Create EffectiveDifference column using PitchofPA to identify the progression
VTP <- VTP %>% 
  group_by(Date, Pitcher, Batter, Inning) %>% # Group by identifiers for plate appearance
  mutate(
    EffectiveDifference = case_when(
      PitchofPA == 1 ~ NA_real_, #ensures first pitch of PA not included in effective velocity averages. 
      TRUE ~ abs(AdjustedSpeed - lag(AdjustedSpeed)) #finding the absolute value of effective difference. Current adjusted speed - the previous one using lag function. 
    )
  ) %>%
  ungroup() %>%
  select(PitchofPA, Pitcher, BatterSide, PlateLocSide, PlateLocHeight, LocationCategory, RelSpeed, AdjustedSpeed, VelocityAdjustment, EffectiveDifference)

# Display the dataset
View(VTP)
# Calculate the average EffectiveDifference for the team
team_average_effective_difference <- VTP %>%
  summarise(AverageEffectiveDifference = mean(EffectiveDifference, na.rm = TRUE))

# Calculate the standard deviation of EffectiveDifference for the team
team_sd_effective_difference <- VTP %>%
  summarise(SD_EffectiveDifference = sd(EffectiveDifference, na.rm = TRUE))

# Calculate the average EffectiveDifference for each pitcher
pitcher_effective_difference <- VTP %>%
  group_by(Pitcher) %>%
  summarise(AverageEffectiveDifference = mean(EffectiveDifference, na.rm = TRUE)) %>%
  arrange(desc(AverageEffectiveDifference))

# Calculate the standard deviation of EffectiveDifference for each pitcher
pitcher_sd_effective_difference <- VTP %>%
  group_by(Pitcher) %>%
  summarise(SD_EffectiveDifference = sd(EffectiveDifference, na.rm = TRUE)) %>%
  arrange(desc(SD_EffectiveDifference))

# Print the team average effective difference
kable(team_average_effective_difference, caption = "Team Average Effective Difference") %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"))
Team Average Effective Difference
AverageEffectiveDifference
5.739887
# Print the team standard deviation of effective difference
kable(team_sd_effective_difference, caption = "Team Standard Deviation of Effective Difference") %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"))
Team Standard Deviation of Effective Difference
SD_EffectiveDifference
4.660449
# Print the table for average effective difference by pitcher
kable(pitcher_effective_difference, caption = "Average Effective Difference by Pitcher") %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"))
Average Effective Difference by Pitcher
Pitcher AverageEffectiveDifference
Mulligan, Sean 8.905852
Dowlen, Colin 8.319426
Lapham, Jacob 8.101268
Jaun, William 7.965384
Carozza, Kyle 7.460404
Chae, Yoon 7.285085
Ellison, Chris 7.084007
Knox, Johnny 6.740564
Voli, Joseph 6.538061
Adams, Brandon 6.403834
Cox, Tyler 6.335447
Martinez, Matthew 6.091345
Luczak, Andrew 6.002486
Simpson, Max 5.997024
Angelo, Samuel 5.913690
Alekson, Ben 5.873562
Isler, Nate 5.791362
Burke, Colin 5.655859
Meehan, Harry 5.621752
Keevan, Josh 5.591511
Helwig, Dennis 5.488581
Moss, Max 5.414971
Benzinger, Derek 5.209349
Flaherty, Tommy 5.146689
Hale, Charlie 4.974820
Krupp, Aidan 4.943053
Falter, Nick 4.926022
Sibley, Joshua 4.879063
Foley, Brian 4.817912
Interdonato, Reed 4.733116
Eshleman, Jack 3.972495
Kutz, Charlie 3.876454
Burke, Keegan 3.853376
Ryan, Dillon 3.745662
Brulport, Max 3.178500
# Print the table for standard deviation of effective difference by pitcher
kable(pitcher_sd_effective_difference, caption = "Standard Deviation of Effective Difference by Pitcher") %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"))
Standard Deviation of Effective Difference by Pitcher
Pitcher SD_EffectiveDifference
Mulligan, Sean 7.905850
Ellison, Chris 5.968789
Lapham, Jacob 5.941670
Angelo, Samuel 5.899802
Carozza, Kyle 5.748937
Dowlen, Colin 5.703521
Jaun, William 5.664424
Isler, Nate 5.604721
Martinez, Matthew 5.438487
Luczak, Andrew 5.347833
Cox, Tyler 5.280660
Adams, Brandon 5.183091
Knox, Johnny 5.002290
Meehan, Harry 4.960285
Krupp, Aidan 4.813201
Simpson, Max 4.808042
Voli, Joseph 4.725696
Foley, Brian 4.480798
Burke, Colin 4.282923
Flaherty, Tommy 4.226021
Hale, Charlie 4.193461
Helwig, Dennis 4.171225
Moss, Max 4.117355
Keevan, Josh 4.076650
Chae, Yoon 4.064763
Alekson, Ben 3.907176
Benzinger, Derek 3.696974
Interdonato, Reed 3.474497
Sibley, Joshua 3.462558
Eshleman, Jack 3.442146
Falter, Nick 3.306158
Kutz, Charlie 3.209195
Brulport, Max 2.777400
Burke, Keegan 2.606127
Ryan, Dillon 2.518291
# Calculate the one standard deviation range for the team
team_one_sd_range <- team_average_effective_difference %>%
  mutate(
    LowerBound = pmax(AverageEffectiveDifference - team_sd_effective_difference$SD_EffectiveDifference, 0),
    UpperBound = AverageEffectiveDifference + team_sd_effective_difference$SD_EffectiveDifference
  )

# Print the team one standard deviation range
kable(team_one_sd_range, caption = "Team One Standard Deviation Range of Effective Difference") %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"))
Team One Standard Deviation Range of Effective Difference
AverageEffectiveDifference LowerBound UpperBound
5.739887 1.079438 10.40034
# Calculate the one standard deviation range for each pitcher
pitcher_one_sd_range <- pitcher_effective_difference %>%
  left_join(pitcher_sd_effective_difference, by = "Pitcher") %>%
  mutate(
    LowerBound = pmax(AverageEffectiveDifference - SD_EffectiveDifference, 0),
    UpperBound = AverageEffectiveDifference + SD_EffectiveDifference
  )

# Print the table for one standard deviation range by pitcher
kable(pitcher_one_sd_range %>% select(Pitcher, LowerBound, UpperBound), caption = "One Standard Deviation Range of Effective Difference by Pitcher") %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"))
One Standard Deviation Range of Effective Difference by Pitcher
Pitcher LowerBound UpperBound
Mulligan, Sean 1.0000016 16.811702
Dowlen, Colin 2.6159052 14.022947
Lapham, Jacob 2.1595979 14.042938
Jaun, William 2.3009596 13.629808
Carozza, Kyle 1.7114670 13.209340
Chae, Yoon 3.2203221 11.349849
Ellison, Chris 1.1152180 13.052797
Knox, Johnny 1.7382742 11.742854
Voli, Joseph 1.8123654 11.263757
Adams, Brandon 1.2207430 11.586925
Cox, Tyler 1.0547866 11.616108
Martinez, Matthew 0.6528581 11.529832
Luczak, Andrew 0.6546527 11.350319
Simpson, Max 1.1889827 10.805066
Angelo, Samuel 0.0138880 11.813492
Alekson, Ben 1.9663856 9.780738
Isler, Nate 0.1866407 11.396083
Burke, Colin 1.3729358 9.938783
Meehan, Harry 0.6614666 10.582037
Keevan, Josh 1.5148610 9.668161
Helwig, Dennis 1.3173554 9.659806
Moss, Max 1.2976163 9.532326
Benzinger, Derek 1.5123755 8.906323
Flaherty, Tommy 0.9206675 9.372710
Hale, Charlie 0.7813587 9.168281
Krupp, Aidan 0.1298522 9.756254
Falter, Nick 1.6198647 8.232180
Sibley, Joshua 1.4165046 8.341621
Foley, Brian 0.3371136 9.298710
Interdonato, Reed 1.2586190 8.207613
Eshleman, Jack 0.5303497 7.414641
Kutz, Charlie 0.6672589 7.085648
Burke, Keegan 1.2472489 6.459504
Ryan, Dillon 1.2273709 6.263953
Brulport, Max 0.4011002 5.955900