# Load necessary libraries
library(readxl)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(kableExtra)
##
## Attaching package: 'kableExtra'
## The following object is masked from 'package:dplyr':
##
## group_rows
library(knitr)
# Define the file path
file_path <- "C:/Users/Franco Castagliuolo/OneDrive - Bentley University/Neers 24/Pitchers/AllGames/VT_Games.xlsx"
# Read the Excel file
games <- read_excel(file_path)
# Filter the dataset to include only rows where Vermont is pitching
VTP <- games %>% filter(PitcherTeam == "VER_MOU") #filter from dplyr to only include Vermont pitchers. %>% is pipe operator, passes result of one expression as input to the next expression, creating operation sequence. Key for efficiency
# vertical is the more dominant factor in effective velocity changes. That is why there is a minimal drop between LM and LO
FB_HI = 3
FB_HM = 2
FB_HO = 1
FB_MI = 1
FB_MM = 0
FB_MO = -1
FB_LI = -1
FB_LM = -2
FB_LO = -3
ELSE_HI = 1.5
ELSE_HM = 1
ELSE_HO = 0.5
ELSE_MI = 0.5
ELSE_MM = 0
ELSE_MO = -0.5
ELSE_LI = -0.5
ELSE_LM = -1
ELSE_LO = -1.5
#Difference in FB and other pitches because FB are faster and effective velocity differences more pronounced.
#
# Define the strike zone thresholds
low_threshold <- 2.167 #low pitches in zone below this
middle_threshold <- 2.834 # middle between 2.167 and 2.834, up >2.834
horizontal_threshold <- 0.2833 #between +/-0.2833 is middle
# Add columns to determine pitch location category, velocity adjustment, and adjusted speed
VTP <- VTP %>%
mutate( #mutatae to add new column VerticalLocation
VerticalLocation = case_when( #case_when similar to ifelse
PlateLocHeight < low_threshold ~ "Low",
PlateLocHeight <= middle_threshold ~ "Middle",
PlateLocHeight > middle_threshold ~ "High"
),
HorizontalLocation = case_when( #same thing here but with R/L. Inside to R positive values, inside to L negative. Another ifelse statement essentially
BatterSide == "Right" & PlateLocSide > horizontal_threshold ~ "Inside",
BatterSide == "Right" & PlateLocSide >= -horizontal_threshold & PlateLocSide <= horizontal_threshold ~ "Middle",
BatterSide == "Right" & PlateLocSide < -horizontal_threshold ~ "Outside",
BatterSide == "Left" & PlateLocSide < -horizontal_threshold ~ "Inside",
BatterSide == "Left" & PlateLocSide >= -horizontal_threshold & PlateLocSide <= horizontal_threshold ~ "Middle",
BatterSide == "Left" & PlateLocSide > horizontal_threshold ~ "Outside"
),
LocationCategory = paste(VerticalLocation, HorizontalLocation, sep = "_"), #combining the horizontal and vertical location categories into one variable (ex: High_Inside)
VelocityAdjustment = case_when(
AutoPitchType %in% c("Four-Seam", "Cutter", "Sinker") & LocationCategory == "High_Inside" ~ FB_HI,
AutoPitchType %in% c("Four-Seam", "Cutter", "Sinker") & LocationCategory == "High_Middle" ~ FB_HM,
AutoPitchType %in% c("Four-Seam", "Cutter", "Sinker") & LocationCategory == "High_Outside" ~ FB_HO,
AutoPitchType %in% c("Four-Seam", "Cutter", "Sinker") & LocationCategory == "Middle_Inside" ~ FB_MI,
AutoPitchType %in% c("Four-Seam", "Cutter", "Sinker") & LocationCategory == "Middle_Middle" ~ FB_MM,
AutoPitchType %in% c("Four-Seam", "Cutter", "Sinker") & LocationCategory == "Middle_Outside" ~ FB_MO,
AutoPitchType %in% c("Four-Seam", "Cutter", "Sinker") & LocationCategory == "Low_Inside" ~ FB_LI,
AutoPitchType %in% c("Four-Seam", "Cutter", "Sinker") & LocationCategory == "Low_Middle" ~ FB_LM,
AutoPitchType %in% c("Four-Seam", "Cutter", "Sinker") & LocationCategory == "Low_Outside" ~ FB_LO,
LocationCategory == "High_Inside" ~ ELSE_HI,
LocationCategory == "High_Middle" ~ ELSE_HM,
LocationCategory == "High_Outside" ~ ELSE_HO,
LocationCategory == "Middle_Inside" ~ ELSE_MI,
LocationCategory == "Middle_Middle" ~ ELSE_MM,
LocationCategory == "Middle_Outside" ~ ELSE_MO,
LocationCategory == "Low_Inside" ~ ELSE_LI,
LocationCategory == "Low_Middle" ~ ELSE_LM,
LocationCategory == "Low_Outside" ~ ELSE_LO,
TRUE ~ 0 #set to 0 if no conditions met
),
AdjustedSpeed = RelSpeed + VelocityAdjustment
) %>%
select(1:Pitcher, LocationCategory, VelocityAdjustment, AdjustedSpeed, PlateLocSide, PlateLocHeight, everything())
# Display the dataset
View(VTP)
# Create EffectiveDifference column using PitchofPA to identify the progression
VTP <- VTP %>%
group_by(Date, Pitcher, Batter, Inning) %>% # Group by identifiers for plate appearance
mutate(
EffectiveDifference = case_when(
PitchofPA == 1 ~ NA_real_, #ensures first pitch of PA not included in effective velocity averages.
TRUE ~ abs(AdjustedSpeed - lag(AdjustedSpeed)) #finding the absolute value of effective difference. Current adjusted speed - the previous one using lag function.
)
) %>%
ungroup() %>%
select(PitchofPA, Pitcher, BatterSide, PlateLocSide, PlateLocHeight, LocationCategory, RelSpeed, AdjustedSpeed, VelocityAdjustment, EffectiveDifference)
# Display the dataset
View(VTP)
# Calculate the average EffectiveDifference for the team
team_average_effective_difference <- VTP %>%
summarise(AverageEffectiveDifference = mean(EffectiveDifference, na.rm = TRUE))
# Calculate the standard deviation of EffectiveDifference for the team
team_sd_effective_difference <- VTP %>%
summarise(SD_EffectiveDifference = sd(EffectiveDifference, na.rm = TRUE))
# Calculate the average EffectiveDifference for each pitcher
pitcher_effective_difference <- VTP %>%
group_by(Pitcher) %>%
summarise(AverageEffectiveDifference = mean(EffectiveDifference, na.rm = TRUE)) %>%
arrange(desc(AverageEffectiveDifference))
# Calculate the standard deviation of EffectiveDifference for each pitcher
pitcher_sd_effective_difference <- VTP %>%
group_by(Pitcher) %>%
summarise(SD_EffectiveDifference = sd(EffectiveDifference, na.rm = TRUE)) %>%
arrange(desc(SD_EffectiveDifference))
# Print the team average effective difference
kable(team_average_effective_difference, caption = "Team Average Effective Difference") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"))
Team Average Effective Difference
AverageEffectiveDifference
|
5.739887
|
# Print the team standard deviation of effective difference
kable(team_sd_effective_difference, caption = "Team Standard Deviation of Effective Difference") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"))
Team Standard Deviation of Effective Difference
SD_EffectiveDifference
|
4.660449
|
# Print the table for average effective difference by pitcher
kable(pitcher_effective_difference, caption = "Average Effective Difference by Pitcher") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"))
Average Effective Difference by Pitcher
Pitcher
|
AverageEffectiveDifference
|
Mulligan, Sean
|
8.905852
|
Dowlen, Colin
|
8.319426
|
Lapham, Jacob
|
8.101268
|
Jaun, William
|
7.965384
|
Carozza, Kyle
|
7.460404
|
Chae, Yoon
|
7.285085
|
Ellison, Chris
|
7.084007
|
Knox, Johnny
|
6.740564
|
Voli, Joseph
|
6.538061
|
Adams, Brandon
|
6.403834
|
Cox, Tyler
|
6.335447
|
Martinez, Matthew
|
6.091345
|
Luczak, Andrew
|
6.002486
|
Simpson, Max
|
5.997024
|
Angelo, Samuel
|
5.913690
|
Alekson, Ben
|
5.873562
|
Isler, Nate
|
5.791362
|
Burke, Colin
|
5.655859
|
Meehan, Harry
|
5.621752
|
Keevan, Josh
|
5.591511
|
Helwig, Dennis
|
5.488581
|
Moss, Max
|
5.414971
|
Benzinger, Derek
|
5.209349
|
Flaherty, Tommy
|
5.146689
|
Hale, Charlie
|
4.974820
|
Krupp, Aidan
|
4.943053
|
Falter, Nick
|
4.926022
|
Sibley, Joshua
|
4.879063
|
Foley, Brian
|
4.817912
|
Interdonato, Reed
|
4.733116
|
Eshleman, Jack
|
3.972495
|
Kutz, Charlie
|
3.876454
|
Burke, Keegan
|
3.853376
|
Ryan, Dillon
|
3.745662
|
Brulport, Max
|
3.178500
|
# Print the table for standard deviation of effective difference by pitcher
kable(pitcher_sd_effective_difference, caption = "Standard Deviation of Effective Difference by Pitcher") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"))
Standard Deviation of Effective Difference by Pitcher
Pitcher
|
SD_EffectiveDifference
|
Mulligan, Sean
|
7.905850
|
Ellison, Chris
|
5.968789
|
Lapham, Jacob
|
5.941670
|
Angelo, Samuel
|
5.899802
|
Carozza, Kyle
|
5.748937
|
Dowlen, Colin
|
5.703521
|
Jaun, William
|
5.664424
|
Isler, Nate
|
5.604721
|
Martinez, Matthew
|
5.438487
|
Luczak, Andrew
|
5.347833
|
Cox, Tyler
|
5.280660
|
Adams, Brandon
|
5.183091
|
Knox, Johnny
|
5.002290
|
Meehan, Harry
|
4.960285
|
Krupp, Aidan
|
4.813201
|
Simpson, Max
|
4.808042
|
Voli, Joseph
|
4.725696
|
Foley, Brian
|
4.480798
|
Burke, Colin
|
4.282923
|
Flaherty, Tommy
|
4.226021
|
Hale, Charlie
|
4.193461
|
Helwig, Dennis
|
4.171225
|
Moss, Max
|
4.117355
|
Keevan, Josh
|
4.076650
|
Chae, Yoon
|
4.064763
|
Alekson, Ben
|
3.907176
|
Benzinger, Derek
|
3.696974
|
Interdonato, Reed
|
3.474497
|
Sibley, Joshua
|
3.462558
|
Eshleman, Jack
|
3.442146
|
Falter, Nick
|
3.306158
|
Kutz, Charlie
|
3.209195
|
Brulport, Max
|
2.777400
|
Burke, Keegan
|
2.606127
|
Ryan, Dillon
|
2.518291
|
# Calculate the one standard deviation range for the team
team_one_sd_range <- team_average_effective_difference %>%
mutate(
LowerBound = pmax(AverageEffectiveDifference - team_sd_effective_difference$SD_EffectiveDifference, 0),
UpperBound = AverageEffectiveDifference + team_sd_effective_difference$SD_EffectiveDifference
)
# Print the team one standard deviation range
kable(team_one_sd_range, caption = "Team One Standard Deviation Range of Effective Difference") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"))
Team One Standard Deviation Range of Effective Difference
AverageEffectiveDifference
|
LowerBound
|
UpperBound
|
5.739887
|
1.079438
|
10.40034
|
# Calculate the one standard deviation range for each pitcher
pitcher_one_sd_range <- pitcher_effective_difference %>%
left_join(pitcher_sd_effective_difference, by = "Pitcher") %>%
mutate(
LowerBound = pmax(AverageEffectiveDifference - SD_EffectiveDifference, 0),
UpperBound = AverageEffectiveDifference + SD_EffectiveDifference
)
# Print the table for one standard deviation range by pitcher
kable(pitcher_one_sd_range %>% select(Pitcher, LowerBound, UpperBound), caption = "One Standard Deviation Range of Effective Difference by Pitcher") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"))
One Standard Deviation Range of Effective Difference by Pitcher
Pitcher
|
LowerBound
|
UpperBound
|
Mulligan, Sean
|
1.0000016
|
16.811702
|
Dowlen, Colin
|
2.6159052
|
14.022947
|
Lapham, Jacob
|
2.1595979
|
14.042938
|
Jaun, William
|
2.3009596
|
13.629808
|
Carozza, Kyle
|
1.7114670
|
13.209340
|
Chae, Yoon
|
3.2203221
|
11.349849
|
Ellison, Chris
|
1.1152180
|
13.052797
|
Knox, Johnny
|
1.7382742
|
11.742854
|
Voli, Joseph
|
1.8123654
|
11.263757
|
Adams, Brandon
|
1.2207430
|
11.586925
|
Cox, Tyler
|
1.0547866
|
11.616108
|
Martinez, Matthew
|
0.6528581
|
11.529832
|
Luczak, Andrew
|
0.6546527
|
11.350319
|
Simpson, Max
|
1.1889827
|
10.805066
|
Angelo, Samuel
|
0.0138880
|
11.813492
|
Alekson, Ben
|
1.9663856
|
9.780738
|
Isler, Nate
|
0.1866407
|
11.396083
|
Burke, Colin
|
1.3729358
|
9.938783
|
Meehan, Harry
|
0.6614666
|
10.582037
|
Keevan, Josh
|
1.5148610
|
9.668161
|
Helwig, Dennis
|
1.3173554
|
9.659806
|
Moss, Max
|
1.2976163
|
9.532326
|
Benzinger, Derek
|
1.5123755
|
8.906323
|
Flaherty, Tommy
|
0.9206675
|
9.372710
|
Hale, Charlie
|
0.7813587
|
9.168281
|
Krupp, Aidan
|
0.1298522
|
9.756254
|
Falter, Nick
|
1.6198647
|
8.232180
|
Sibley, Joshua
|
1.4165046
|
8.341621
|
Foley, Brian
|
0.3371136
|
9.298710
|
Interdonato, Reed
|
1.2586190
|
8.207613
|
Eshleman, Jack
|
0.5303497
|
7.414641
|
Kutz, Charlie
|
0.6672589
|
7.085648
|
Burke, Keegan
|
1.2472489
|
6.459504
|
Ryan, Dillon
|
1.2273709
|
6.263953
|
Brulport, Max
|
0.4011002
|
5.955900
|