Based on the article, “Seriously Though, What Is a Changeup and What
Does It Do? Daniel R. Epstein
January 3, 2024”
I was curious to see what other pitches a changeup could play off of. While the article suggested a changeup is really only designed to play off of a fastball, I wanted to see if the data could suggest that it could play off of other pitches.
The scope of this analysis will be to look at horizontal, vertical, and velocity differential between each pitchers’s change up with various other pitches. To start, we will look at only the 2023 season. The data will be downloaded from Statcast for the velocity, horizontal, and vertical differential data of various pitches. The DRA- data will be downloaded from Baseball Prospectus and imported into a data frame.
library(tidyverse)
library(devtools)
setwd("C:\\Users\\james\\R_Working_Directory\\Analyzing_Baseball_Data_With_R\\baseball_R\\data")
library(dplyr)
library(baseballr)
First, let’s create a temporary data frame just to see if our connection to the statcast data is working:
temp_data <- scrape_statcast_savant(start_date = "2023-05-01", end_date = "2023-05-02")
trying URL 'https://baseballsavant.mlb.com/statcast_search/csv?all=true&hfPT=&hfAB=&hfBBT=&hfPR=&hfZ=&stadium=&hfBBL=&hfNewZones=&hfGT=R%7CPO%7CS%7C&hfC&hfSea=2023%7C&hfSit=&hfOuts=&opponent=&pitcher_throws=&batter_stands=&hfSA=&player_type=batter&hfInfield=&team=&position=&hfOutfield=&hfRO=&home_road=&game_date_gt=2023-05-01&game_date_lt=2023-05-02&hfFlag=&hfPull=&metric_1=&hfInn=&min_pitches=0&min_results=0&group_by=name&sort_col=pitches&player_event_sort=h_launch_speed&sort_order=desc&min_abs=0&type=details'
Content type 'application/download; charset=utf-8' length 3685080 bytes (3.5 MB)
downloaded 3.5 MB
str(temp_data)
bsbllr_d [6,440 × 92] (S3: baseballr_data/tbl_df/tbl/data.table/data.frame)
$ pitch_type : chr [1:6440] "SL" "FF" "SI" "FF" ...
$ game_date : Date[1:6440], format: "2023-05-02" "2023-05-02" "2023-05-02" "2023-05-02" ...
$ release_speed : num [1:6440] 88 95.7 93.4 93.9 96 88.4 88 93.5 96.5 96.3 ...
$ release_pos_x : num [1:6440] -1.66 -1.64 -1.68 -1.43 -1.72 -1.78 -1.65 -1.51 -1.76 -1.75 ...
$ release_pos_z : num [1:6440] 5.51 5.92 5.68 5.69 5.89 5.72 5.65 5.83 5.97 5.95 ...
$ player_name : chr [1:6440] "Meyers, Jake" "Straw, Myles" "Meyers, Jake" "Meyers, Jake" ...
$ batter : num [1:6440] 676694 664702 676694 676694 664702 ...
$ pitcher : num [1:6440] 543101 543037 543101 543101 543037 ...
$ events : chr [1:6440] "field_out" "field_out" "" "" ...
$ description : chr [1:6440] "hit_into_play" "hit_into_play" "ball" "ball" ...
$ spin_dir : logi [1:6440] NA NA NA NA NA NA ...
$ spin_rate_deprecated : logi [1:6440] NA NA NA NA NA NA ...
$ break_angle_deprecated : logi [1:6440] NA NA NA NA NA NA ...
$ break_length_deprecated : logi [1:6440] NA NA NA NA NA NA ...
$ zone : num [1:6440] 12 5 12 12 13 13 6 11 11 6 ...
$ des : chr [1:6440] "Jake Meyers grounds out, second baseman Brett Wisely to first baseman LaMonte Wade Jr." "Myles Straw flies out to right fielder Oswaldo Cabrera." "Jake Meyers grounds out, second baseman Brett Wisely to first baseman LaMonte Wade Jr." "Jake Meyers grounds out, second baseman Brett Wisely to first baseman LaMonte Wade Jr." ...
$ game_type : chr [1:6440] "R" "R" "R" "R" ...
$ stand : chr [1:6440] "R" "R" "R" "R" ...
$ p_throws : chr [1:6440] "R" "R" "R" "R" ...
$ home_team : chr [1:6440] "HOU" "NYY" "HOU" "HOU" ...
$ away_team : chr [1:6440] "SF" "CLE" "SF" "SF" ...
$ type : chr [1:6440] "X" "X" "B" "B" ...
$ hit_location : int [1:6440] 4 9 NA NA NA 1 NA NA NA NA ...
$ bb_type : chr [1:6440] "ground_ball" "fly_ball" "" "" ...
$ balls : int [1:6440] 3 1 2 1 0 1 1 0 0 0 ...
$ strikes : int [1:6440] 2 0 2 2 0 1 1 1 1 0 ...
$ game_year : int [1:6440] 2023 2023 2023 2023 2023 2023 2023 2023 2023 2023 ...
$ pfx_x : num [1:6440] 0.1 -0.56 -1.34 -1.05 -0.54 -1.29 0.26 -1.25 -0.83 -0.85 ...
$ pfx_z : num [1:6440] 0.56 1.66 1.01 1.28 1.71 0.69 0.57 0.95 1.55 1.51 ...
$ plate_x : num [1:6440] 1.25 0.2 0.53 1.29 -0.02 -0.93 0.38 -1.51 -0.44 0.54 ...
$ plate_z : num [1:6440] 2.54 2.6 3.37 3.63 1.26 2.24 2.55 3.42 4.8 2.29 ...
$ on_3b : num [1:6440] NA NA NA NA NA NA NA NA NA NA ...
$ on_2b : num [1:6440] 673237 686823 673237 673237 686823 ...
$ on_1b : num [1:6440] NA NA NA NA NA NA NA NA NA NA ...
$ outs_when_up : int [1:6440] 2 2 2 2 2 2 2 2 2 2 ...
$ inning : num [1:6440] 8 6 8 8 6 6 8 8 6 6 ...
$ inning_topbot : chr [1:6440] "Bot" "Top" "Bot" "Bot" ...
$ hc_x : num [1:6440] 131 193 NA NA NA ...
$ hc_y : num [1:6440] 155.3 89.5 NA NA NA ...
$ tfs_deprecated : logi [1:6440] NA NA NA NA NA NA ...
$ tfs_zulu_deprecated : logi [1:6440] NA NA NA NA NA NA ...
$ fielder_2 : num [1:6440] 663698 624431 663698 663698 624431 ...
$ umpire : logi [1:6440] NA NA NA NA NA NA ...
$ sv_id : logi [1:6440] NA NA NA NA NA NA ...
$ vx0 : num [1:6440] 6.91 6.1 8.67 9.4 5.72 ...
$ vy0 : num [1:6440] -128 -139 -136 -136 -139 ...
$ vz0 : num [1:6440] -2.35 -6.81 -2.47 -2.38 -10.46 ...
$ ax : num [1:6440] -0.188 -8.65 -18.417 -15.178 -8.305 ...
$ ay : num [1:6440] 23.3 33.7 28.1 31.3 32.4 ...
$ az : num [1:6440] -25.7 -9.28 -19.26 -15.96 -7.76 ...
$ sz_top : num [1:6440] 3.23 3.36 3.23 3.33 3.33 3.39 3.23 3.35 3.36 3.39 ...
$ sz_bot : num [1:6440] 1.5 1.56 1.53 1.59 1.56 1.59 1.5 1.55 1.59 1.59 ...
$ hit_distance_sc : num [1:6440] 5 318 NA NA NA 1 171 NA NA 215 ...
$ launch_speed : num [1:6440] 57.9 90.4 NA NA NA 59.2 83.4 NA NA 71.3 ...
$ launch_angle : num [1:6440] -29 30 NA NA NA -60 68 NA NA 30 ...
$ effective_speed : num [1:6440] 88.8 95.2 94 94 95.8 88.4 88.5 94.1 96.1 96.4 ...
$ release_spin_rate : num [1:6440] 2267 2458 2364 2337 2462 ...
$ release_extension : num [1:6440] 6.5 6.3 6.5 6.5 6.4 6.2 6.3 6.3 6.1 6.4 ...
$ game_pk : num [1:6440] 718337 718339 718337 718337 718339 ...
$ pitcher_1 : num [1:6440] 543101 543037 543101 543101 543037 ...
$ fielder_2_1 : num [1:6440] 663698 624431 663698 663698 624431 ...
$ fielder_3 : num [1:6440] 664774 519203 664774 664774 519203 ...
$ fielder_4 : num [1:6440] 689172 650402 689172 689172 650402 ...
$ fielder_5 : num [1:6440] 605204 518934 605204 605204 518934 ...
$ fielder_6 : num [1:6440] 642731 683011 642731 642731 683011 ...
$ fielder_7 : num [1:6440] 596103 543305 596103 596103 543305 ...
$ fielder_8 : num [1:6440] 670276 664056 670276 670276 664056 ...
$ fielder_9 : num [1:6440] 624424 665828 624424 624424 665828 ...
$ release_pos_y : num [1:6440] 54 54.2 54 54 54.1 ...
$ estimated_ba_using_speedangle : num [1:6440] 0.06 0.074 NA NA NA 0.249 NA NA NA NA ...
$ estimated_woba_using_speedangle: num [1:6440] 0.059 0.105 NA NA NA 0.224 NA NA NA NA ...
$ woba_value : num [1:6440] 0 0 NA NA NA 0.9 NA NA NA NA ...
$ woba_denom : int [1:6440] 1 1 NA NA NA 1 NA NA NA NA ...
$ babip_value : int [1:6440] 0 0 NA NA NA 1 NA NA NA NA ...
$ iso_value : int [1:6440] 0 0 NA NA NA 0 NA NA NA NA ...
$ launch_speed_angle : int [1:6440] 1 3 NA NA NA 1 NA NA NA NA ...
$ at_bat_number : num [1:6440] 61 44 61 61 44 43 61 61 43 43 ...
$ pitch_number : num [1:6440] 6 2 5 4 1 3 3 2 2 1 ...
$ pitch_name : chr [1:6440] "Slider" "4-Seam Fastball" "Sinker" "4-Seam Fastball" ...
$ home_score : num [1:6440] 0 0 0 0 0 0 0 0 0 0 ...
$ away_score : num [1:6440] 2 2 2 2 2 2 2 2 2 2 ...
$ bat_score : num [1:6440] 0 2 0 0 2 2 0 0 2 2 ...
$ fld_score : num [1:6440] 2 0 2 2 0 0 2 2 0 0 ...
$ post_away_score : num [1:6440] 2 2 2 2 2 2 2 2 2 2 ...
$ post_home_score : num [1:6440] 0 0 0 0 0 0 0 0 0 0 ...
$ post_bat_score : num [1:6440] 0 2 0 0 2 2 0 0 2 2 ...
$ post_fld_score : num [1:6440] 2 0 2 2 0 0 2 2 0 0 ...
$ if_fielding_alignment : chr [1:6440] "Standard" "Standard" "Standard" "Standard" ...
$ of_fielding_alignment : chr [1:6440] "Standard" "Strategic" "Standard" "Standard" ...
$ spin_axis : num [1:6440] 162 207 223 225 212 238 181 224 214 216 ...
$ delta_home_win_exp : num [1:6440] -0.051 0.027 0 0 0 -0.019 0 0 0 0 ...
$ delta_run_exp : num [1:6440] -0.301 -0.35 0.057 0.017 0.023 0.138 -0.071 0.022 0.012 -0.017 ...
- attr(*, "baseballr_timestamp")= POSIXct[1:1], format: "2024-03-07 10:02:53"
- attr(*, "baseballr_type")= chr "MLB Baseball Savant Statcast Search data from baseballsavant.mlb.com"
Okay, now that we know we can access the statcast data, we need to import what we want for our analysis. Because of the volume of data, we will need to create a for loop that will allow us to pull in data one month at a time. Otherwise, constraints from the statcast website on the amount of data we can download at once may cause us to miss some data.
Additionally, in the baseballr library, we will want to make sure we use the scrape_statcast_savant_pitcher_all function to get the pitcher play by play data.
# Define broader date ranges for batching, e.g., monthly in the 2023 season
start_dates <- seq(as.Date("2023-04-01"), as.Date("2023-10-01"), by="month")
end_dates <- seq(as.Date("2023-04-30"), as.Date("2023-10-31"), by="month")
# Initialize an empty list to store fetched data frames
all_statcast_data <- list()
# Loop through each date range and fetch data
for (i in 1:length(start_dates)) {
start_date <- format(start_dates[i], "%Y-%m-%d")
end_date <- format(end_dates[i], "%Y-%m-%d")
# Attempt to fetch the data in larger batches
temp_data <- tryCatch({
scrape_statcast_savant_pitcher_all(start_date = start_date, end_date = end_date)
}, error = function(e) {
message("Error fetching data for period: ", start_date, " to ", end_date)
NULL # Return NULL on error to safely continue the loop
})
if (!is.null(temp_data)) {
all_statcast_data[[i]] <- temp_data
}
}
trying URL 'https://baseballsavant.mlb.com/statcast_search/csv?all=true&hfPT=&hfAB=&hfBBT=&hfPR=&hfZ=&stadium=&hfBBL=&hfNewZones=&hfGT=R%7CPO%7CS%7C&hfC&hfSea=2023%7C&hfSit=&hfOuts=&opponent=&pitcher_throws=&batter_stands=&hfSA=&player_type=pitcher&hfInfield=&team=&position=&hfOutfield=&hfRO=&home_road=&game_date_gt=2023-04-01&game_date_lt=2023-04-30&hfFlag=&hfPull=&metric_1=&hfInn=&min_pitches=0&min_results=0&group_by=name&sort_col=pitches&player_event_sort=h_launch_speed&sort_order=desc&min_abs=0&type=details'
Content type 'application/download; charset=utf-8' length 14271705 bytes (13.6 MB)
downloaded 13.6 MB
trying URL 'https://baseballsavant.mlb.com/statcast_search/csv?all=true&hfPT=&hfAB=&hfBBT=&hfPR=&hfZ=&stadium=&hfBBL=&hfNewZones=&hfGT=R%7CPO%7CS%7C&hfC&hfSea=2023%7C&hfSit=&hfOuts=&opponent=&pitcher_throws=&batter_stands=&hfSA=&player_type=pitcher&hfInfield=&team=&position=&hfOutfield=&hfRO=&home_road=&game_date_gt=2023-05-01&game_date_lt=2023-05-30&hfFlag=&hfPull=&metric_1=&hfInn=&min_pitches=0&min_results=0&group_by=name&sort_col=pitches&player_event_sort=h_launch_speed&sort_order=desc&min_abs=0&type=details'
Content type 'application/download; charset=utf-8' length 14322499 bytes (13.7 MB)
downloaded 13.7 MB
trying URL 'https://baseballsavant.mlb.com/statcast_search/csv?all=true&hfPT=&hfAB=&hfBBT=&hfPR=&hfZ=&stadium=&hfBBL=&hfNewZones=&hfGT=R%7CPO%7CS%7C&hfC&hfSea=2023%7C&hfSit=&hfOuts=&opponent=&pitcher_throws=&batter_stands=&hfSA=&player_type=pitcher&hfInfield=&team=&position=&hfOutfield=&hfRO=&home_road=&game_date_gt=2023-06-01&game_date_lt=2023-06-30&hfFlag=&hfPull=&metric_1=&hfInn=&min_pitches=0&min_results=0&group_by=name&sort_col=pitches&player_event_sort=h_launch_speed&sort_order=desc&min_abs=0&type=details'
Content type 'application/download; charset=utf-8' length 14293611 bytes (13.6 MB)
downloaded 13.6 MB
trying URL 'https://baseballsavant.mlb.com/statcast_search/csv?all=true&hfPT=&hfAB=&hfBBT=&hfPR=&hfZ=&stadium=&hfBBL=&hfNewZones=&hfGT=R%7CPO%7CS%7C&hfC&hfSea=2023%7C&hfSit=&hfOuts=&opponent=&pitcher_throws=&batter_stands=&hfSA=&player_type=pitcher&hfInfield=&team=&position=&hfOutfield=&hfRO=&home_road=&game_date_gt=2023-07-01&game_date_lt=2023-07-30&hfFlag=&hfPull=&metric_1=&hfInn=&min_pitches=0&min_results=0&group_by=name&sort_col=pitches&player_event_sort=h_launch_speed&sort_order=desc&min_abs=0&type=details'
Content type 'application/download; charset=utf-8' length 14297918 bytes (13.6 MB)
downloaded 13.6 MB
trying URL 'https://baseballsavant.mlb.com/statcast_search/csv?all=true&hfPT=&hfAB=&hfBBT=&hfPR=&hfZ=&stadium=&hfBBL=&hfNewZones=&hfGT=R%7CPO%7CS%7C&hfC&hfSea=2023%7C&hfSit=&hfOuts=&opponent=&pitcher_throws=&batter_stands=&hfSA=&player_type=pitcher&hfInfield=&team=&position=&hfOutfield=&hfRO=&home_road=&game_date_gt=2023-08-01&game_date_lt=2023-08-30&hfFlag=&hfPull=&metric_1=&hfInn=&min_pitches=0&min_results=0&group_by=name&sort_col=pitches&player_event_sort=h_launch_speed&sort_order=desc&min_abs=0&type=details'
Content type 'application/download; charset=utf-8' length 14348996 bytes (13.7 MB)
downloaded 13.7 MB
trying URL 'https://baseballsavant.mlb.com/statcast_search/csv?all=true&hfPT=&hfAB=&hfBBT=&hfPR=&hfZ=&stadium=&hfBBL=&hfNewZones=&hfGT=R%7CPO%7CS%7C&hfC&hfSea=2023%7C&hfSit=&hfOuts=&opponent=&pitcher_throws=&batter_stands=&hfSA=&player_type=pitcher&hfInfield=&team=&position=&hfOutfield=&hfRO=&home_road=&game_date_gt=2023-09-01&game_date_lt=2023-09-30&hfFlag=&hfPull=&metric_1=&hfInn=&min_pitches=0&min_results=0&group_by=name&sort_col=pitches&player_event_sort=h_launch_speed&sort_order=desc&min_abs=0&type=details'
Content type 'application/download; charset=utf-8' length 14328988 bytes (13.7 MB)
downloaded 13.7 MB
trying URL 'https://baseballsavant.mlb.com/statcast_search/csv?all=true&hfPT=&hfAB=&hfBBT=&hfPR=&hfZ=&stadium=&hfBBL=&hfNewZones=&hfGT=R%7CPO%7CS%7C&hfC&hfSea=2023%7C&hfSit=&hfOuts=&opponent=&pitcher_throws=&batter_stands=&hfSA=&player_type=pitcher&hfInfield=&team=&position=&hfOutfield=&hfRO=&home_road=&game_date_gt=2023-10-01&game_date_lt=2023-10-30&hfFlag=&hfPull=&metric_1=&hfInn=&min_pitches=0&min_results=0&group_by=name&sort_col=pitches&player_event_sort=h_launch_speed&sort_order=desc&min_abs=0&type=details'
Content type 'application/download; charset=utf-8' length 8938801 bytes (8.5 MB)
downloaded 8.5 MB
# Combine all data frames into one
final_statcast_data <- bind_rows(all_statcast_data)
#mlb2023_season_savant_data <- scrape_statcast_savant(start_date = "2023-05-01", end_date = "2023-05-31", )
Now, let’s quickly summarize the data we’ve pulled in to get an idea of what we’re looking at. Let’s count the the number of pitches and the average release speed of each pitch.
pitch_count <- final_statcast_data %>%
group_by(pitch_name) %>%
summarise(
pitch_count = n(),
avg_release_speed = mean(release_speed, na.rm = TRUE)
)
pitch_count
Next, let’s create a dataframe of just the pitches that we want to look at. We’ll get a league average and look at league differentials. This won’t be used for our correlation with DRA-, rather it’s good to just look at the league as a whole before we dive in pitcher by pitcher.
# Filter for relevant pitch types
relevant_pitches <- final_statcast_data %>%
filter(pitch_name %in% c("Sweeper", "Slider", "Curveball", "Changeup", "Cutter", "Sinker"))
# Calculate average pfx_x, pfx_z, and release_speed for each pitch type
average_metrics <- relevant_pitches %>%
group_by(pitch_name) %>%
summarize(
avg_pfx_x = mean(pfx_x, na.rm = TRUE),
avg_pfx_z = mean(pfx_z, na.rm = TRUE),
avg_release_speed = mean(release_speed, na.rm = TRUE)
)
# Calculate differentials between each breaking ball and Changeup
# Assuming 'Changeup' averages are stored in variables: changeup_avg_pfx_x, changeup_avg_pfx_z, changeup_avg_release_speed
changeup_metrics <- average_metrics %>%
filter(pitch_name == "Changeup")
differentials <- average_metrics %>%
filter(pitch_name != "Changeup") %>%
mutate(
diff_pfx_x = avg_pfx_x - changeup_metrics$avg_pfx_x,
diff_pfx_z = avg_pfx_z - changeup_metrics$avg_pfx_z,
diff_release_speed = avg_release_speed - changeup_metrics$avg_release_speed
)
Now, let’s summarize per pitch type per player, and calculate the differentials for each player.
# Filter for relevant pitch types
relevant_pitches <- final_statcast_data %>%
filter(pitch_name %in% c("Sweeper", "Slider", "Curveball", "Changeup", "Cutter", "Sinker"))
# Calculate average pfx_x, pfx_z, and release_speed for each pitch type per player
average_metrics_per_player <- relevant_pitches %>%
group_by(player_name, pitcher, p_throws , pitch_name) %>%
summarize(
avg_pfx_x = mean(pfx_x, na.rm = TRUE),
avg_pfx_z = mean(pfx_z, na.rm = TRUE),
avg_release_speed = mean(release_speed, na.rm = TRUE),
.groups = 'drop' # This option drops the grouping structure afterwards
)
# For each player, calculate differential between Changeup and each pitch.
differentials_per_player <- average_metrics_per_player %>%
pivot_wider(
names_from = pitch_name,
values_from = c(avg_pfx_x, avg_pfx_z, avg_release_speed)
) %>%
rowwise() %>%
mutate(
sweeper_ch_diff_pfx_x = avg_pfx_x_Changeup - avg_pfx_x_Sweeper,
sweeper_ch_diff_pfx_z = avg_pfx_z_Changeup - avg_pfx_z_Sweeper,
sweeper_ch_diff_release_speed = avg_release_speed_Changeup - avg_release_speed_Sweeper,
slider_ch_diff_pfx_x = avg_pfx_x_Changeup - avg_pfx_x_Slider,
slider_ch_diff_pfx_z = avg_pfx_z_Changeup - avg_pfx_z_Slider,
slider_ch_diff_release_speed = avg_release_speed_Changeup - avg_release_speed_Slider,
curveball_ch_diff_pfx_x = avg_pfx_x_Changeup - avg_pfx_x_Curveball,
curveball_ch_diff_pfx_z = avg_pfx_z_Changeup - avg_pfx_z_Curveball,
curveball_ch_diff_release_speed = avg_release_speed_Changeup - avg_release_speed_Curveball,
sinker_ch_diff_pfx_x = avg_pfx_x_Changeup - avg_pfx_x_Sinker,
sinker_ch_diff_pfx_z = avg_pfx_z_Changeup - avg_pfx_z_Sinker,
sinker_ch_diff_release_speed = avg_release_speed_Changeup - avg_release_speed_Sinker,
cutter_ch_diff_pfx_x = avg_pfx_x_Changeup - avg_pfx_x_Cutter,
cutter_ch_diff_pfx_z = avg_pfx_z_Changeup - avg_pfx_z_Cutter,
cutter_ch_diff_release_speed = avg_release_speed_Changeup - avg_release_speed_Cutter
) %>%
select(player_name, pitcher, p_throws,
sweeper_ch_diff_pfx_x, sweeper_ch_diff_pfx_z, sweeper_ch_diff_release_speed,
slider_ch_diff_pfx_x, slider_ch_diff_pfx_z, slider_ch_diff_release_speed,
curveball_ch_diff_pfx_x, curveball_ch_diff_pfx_z, curveball_ch_diff_release_speed, sinker_ch_diff_pfx_x,
sinker_ch_diff_pfx_z, sinker_ch_diff_release_speed, cutter_ch_diff_pfx_x, cutter_ch_diff_pfx_z,
cutter_ch_diff_release_speed)
# View the results
print(differentials_per_player)
Now that we’ve calculated our differentials, we’ll want to start seeing about correlation/regression analysis with DRA-. Next, we’ll need to download the DRA- data in a CSV file from Baseball Prospectus and import it. After we import it into a data frame, we’ll join it with our current differential data frame using a left join to retain the data structure of our differential data frame.
library(readxl)
dra_numbers <- read_csv("C:\\Users\\james\\Downloads\\bp_export_20240306.csv")
Rows: 851 Columns: 25── Column specification ─────────────────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (2): Name, Team
dbl (23): bpid, mlbid, Age, WARP, DRA-, DRA, DRA SD, cFIP, G, GS, GR, IP, W, L, SV, ERA, RA9, FIP, WHIP, K%, ...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Perform a left join to merge 'dra_numbers' into 'differentials_per_player'
differentials_with_dra <- left_join(differentials_per_player, dra_numbers, by = c("pitcher" = "mlbid"))
Now let’s export what we have so far.
write_csv(differentials_with_dra, "differentials_with_dra.csv")
Next, let’s create a data frame for lefties and for righties to allow us to evaluate differential correlation with DRA- for specific handedness.
lefties <- differentials_with_dra %>%
filter(p_throws == "L")
righties <- differentials_with_dra %>%
filter(p_throws == "R")
Now let’s start with the comparisons. My strategy will be to create a simple linear regression for each differential for both hands. We’ll need to do it for all pitches, for all hands, and for all differentials. Rather than performing a loop, I’ll hard code this:
Sweeper:
# Sweeper - Horizontal Movement (pfx_x)
# Left-handed pitchers
sweeper_horiz_diff_lefty <- lm(`DRA-` ~ sweeper_ch_diff_pfx_x, data=lefties)
print(summary(sweeper_horiz_diff_lefty))
Call:
lm(formula = `DRA-` ~ sweeper_ch_diff_pfx_x, data = lefties)
Residuals:
Min 1Q Median 3Q Max
-32.324 -12.668 -2.283 11.954 42.768
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 108.886 19.600 5.555 6.88e-06 ***
sweeper_ch_diff_pfx_x -1.729 8.294 -0.208 0.836
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 18.32 on 27 degrees of freedom
(165 observations deleted due to missingness)
Multiple R-squared: 0.001607, Adjusted R-squared: -0.03537
F-statistic: 0.04345 on 1 and 27 DF, p-value: 0.8364
# Right-handed pitchers
sweeper_horiz_diff_righty <- lm(`DRA-` ~ sweeper_ch_diff_pfx_x, data=righties)
print(summary(sweeper_horiz_diff_righty))
Call:
lm(formula = `DRA-` ~ sweeper_ch_diff_pfx_x, data = righties)
Residuals:
Min 1Q Median 3Q Max
-30.508 -10.092 -0.145 9.885 34.364
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 107.681 10.327 10.427 <2e-16 ***
sweeper_ch_diff_pfx_x 2.643 4.268 0.619 0.538
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 13.6 on 83 degrees of freedom
(447 observations deleted due to missingness)
Multiple R-squared: 0.004597, Adjusted R-squared: -0.007396
F-statistic: 0.3833 on 1 and 83 DF, p-value: 0.5375
# Sweeper - Vertical Movement (pfx_z)
# Left-handed pitchers
sweeper_vert_diff_lefty <- lm(`DRA-` ~ sweeper_ch_diff_pfx_z, data=lefties)
print(summary(sweeper_vert_diff_lefty))
Call:
lm(formula = `DRA-` ~ sweeper_ch_diff_pfx_z, data = lefties)
Residuals:
Min 1Q Median 3Q Max
-33.226 -15.117 0.176 12.018 43.126
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 106.452 4.298 24.77 <2e-16 ***
sweeper_ch_diff_pfx_z -4.155 6.929 -0.60 0.554
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 18.21 on 27 degrees of freedom
(165 observations deleted due to missingness)
Multiple R-squared: 0.01314, Adjusted R-squared: -0.02341
F-statistic: 0.3596 on 1 and 27 DF, p-value: 0.5537
# Right-handed pitchers
sweeper_vert_diff_righty <- lm(`DRA-` ~ sweeper_ch_diff_pfx_z, data=righties)
print(summary(sweeper_vert_diff_righty))
Call:
lm(formula = `DRA-` ~ sweeper_ch_diff_pfx_z, data = righties)
Residuals:
Min 1Q Median 3Q Max
-29.135 -9.607 0.906 9.204 34.527
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 100.626 1.716 58.631 <2e-16 ***
sweeper_ch_diff_pfx_z 2.418 2.931 0.825 0.412
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 13.58 on 83 degrees of freedom
(447 observations deleted due to missingness)
Multiple R-squared: 0.008133, Adjusted R-squared: -0.003817
F-statistic: 0.6806 on 1 and 83 DF, p-value: 0.4118
# Sweeper - Release Speed
# Left-handed pitchers
sweeper_speed_diff_lefty <- lm(`DRA-` ~ sweeper_ch_diff_release_speed, data=lefties)
print(summary(sweeper_speed_diff_lefty))
Call:
lm(formula = `DRA-` ~ sweeper_ch_diff_release_speed, data = lefties)
Residuals:
Min 1Q Median 3Q Max
-30.707 -12.114 1.889 12.773 42.745
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 97.277 5.386 18.062 <2e-16 ***
sweeper_ch_diff_release_speed 1.851 1.053 1.758 0.09 .
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 17.37 on 27 degrees of freedom
(165 observations deleted due to missingness)
Multiple R-squared: 0.1027, Adjusted R-squared: 0.06951
F-statistic: 3.092 on 1 and 27 DF, p-value: 0.09002
# Right-handed pitchers
sweeper_speed_diff_righty <- lm(`DRA-` ~ sweeper_ch_diff_release_speed, data=righties)
print(summary(sweeper_speed_diff_righty))
Call:
lm(formula = `DRA-` ~ sweeper_ch_diff_release_speed, data = righties)
Residuals:
Min 1Q Median 3Q Max
-29.727 -10.152 0.334 9.941 34.486
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 100.7797 3.0972 32.539 <2e-16 ***
sweeper_ch_diff_release_speed 0.1272 0.6037 0.211 0.834
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 13.63 on 83 degrees of freedom
(447 observations deleted due to missingness)
Multiple R-squared: 0.0005343, Adjusted R-squared: -0.01151
F-statistic: 0.04437 on 1 and 83 DF, p-value: 0.8337
It doesn’t appear that the sweeper differentials have any meaningful correlations with DRA-. Next, Let’s looks at the Slider:
# Slider - Horizontal Movement (pfx_x)
# Left-handed pitchers
slider_horiz_diff_lefty <- lm(`DRA-` ~ slider_ch_diff_pfx_x, data=lefties)
print(summary(slider_horiz_diff_lefty))
Call:
lm(formula = `DRA-` ~ slider_ch_diff_pfx_x, data = lefties)
Residuals:
Min 1Q Median 3Q Max
-35.92 -12.71 -2.32 12.13 45.58
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 113.576 6.574 17.277 <2e-16 ***
slider_ch_diff_pfx_x -5.154 4.080 -1.263 0.209
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 17.63 on 104 degrees of freedom
(88 observations deleted due to missingness)
Multiple R-squared: 0.01512, Adjusted R-squared: 0.005648
F-statistic: 1.596 on 1 and 104 DF, p-value: 0.2092
# Right-handed pitchers
slider_horiz_diff_righty <- lm(`DRA-` ~ slider_ch_diff_pfx_x, data=righties)
print(summary(slider_horiz_diff_righty))
Call:
lm(formula = `DRA-` ~ slider_ch_diff_pfx_x, data = righties)
Residuals:
Min 1Q Median 3Q Max
-32.012 -12.013 0.467 9.541 45.500
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 108.310 3.640 29.756 <2e-16 ***
slider_ch_diff_pfx_x 2.720 2.116 1.285 0.2
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 15.26 on 255 degrees of freedom
(275 observations deleted due to missingness)
Multiple R-squared: 0.006437, Adjusted R-squared: 0.002541
F-statistic: 1.652 on 1 and 255 DF, p-value: 0.1998
# Slider - Vertical Movement (pfx_z)
# Left-handed pitchers
slider_vert_diff_lefty <- lm(`DRA-` ~ slider_ch_diff_pfx_z, data=lefties)
print(summary(slider_vert_diff_lefty))
Call:
lm(formula = `DRA-` ~ slider_ch_diff_pfx_z, data = lefties)
Residuals:
Min 1Q Median 3Q Max
-34.615 -11.546 -4.094 11.481 47.471
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 105.6075 2.4153 43.73 <2e-16 ***
slider_ch_diff_pfx_z -0.1375 4.5663 -0.03 0.976
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 17.76 on 104 degrees of freedom
(88 observations deleted due to missingness)
Multiple R-squared: 8.714e-06, Adjusted R-squared: -0.009607
F-statistic: 0.0009062 on 1 and 104 DF, p-value: 0.976
# Right-handed pitchers
slider_vert_diff_righty <- lm(`DRA-` ~ slider_ch_diff_pfx_z, data=righties)
print(summary(slider_vert_diff_righty))
Call:
lm(formula = `DRA-` ~ slider_ch_diff_pfx_z, data = righties)
Residuals:
Min 1Q Median 3Q Max
-34.453 -11.617 0.520 9.906 44.373
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 102.570 1.258 81.545 <2e-16 ***
slider_ch_diff_pfx_z 3.418 2.300 1.486 0.138
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 15.24 on 255 degrees of freedom
(275 observations deleted due to missingness)
Multiple R-squared: 0.008589, Adjusted R-squared: 0.004701
F-statistic: 2.209 on 1 and 255 DF, p-value: 0.1384
# Slider - Release Speed
# Left-handed pitchers
slider_speed_diff_lefty <- lm(`DRA-` ~ slider_ch_diff_release_speed, data=lefties)
print(summary(slider_speed_diff_lefty))
Call:
lm(formula = `DRA-` ~ slider_ch_diff_release_speed, data = lefties)
Residuals:
Min 1Q Median 3Q Max
-38.528 -12.368 -2.767 13.083 47.078
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 106.5132 1.7954 59.327 <2e-16 ***
slider_ch_diff_release_speed -0.8587 0.5121 -1.677 0.0966 .
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 17.53 on 104 degrees of freedom
(88 observations deleted due to missingness)
Multiple R-squared: 0.02633, Adjusted R-squared: 0.01696
F-statistic: 2.812 on 1 and 104 DF, p-value: 0.09657
# Right-handed pitchers
slider_speed_diff_righty <- lm(`DRA-` ~ slider_ch_diff_release_speed, data=righties)
print(summary(slider_speed_diff_righty))
Call:
lm(formula = `DRA-` ~ slider_ch_diff_release_speed, data = righties)
Residuals:
Min 1Q Median 3Q Max
-32.068 -11.689 0.419 9.204 45.497
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 103.93150 1.11085 93.560 <2e-16 ***
slider_ch_diff_release_speed -0.07524 0.31021 -0.243 0.809
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 15.31 on 255 degrees of freedom
(275 observations deleted due to missingness)
Multiple R-squared: 0.0002306, Adjusted R-squared: -0.00369
F-statistic: 0.05882 on 1 and 255 DF, p-value: 0.8086
Again, the slider/changeup differentials don’t appear to have any meaningful correlations with DRA-.
Next, let’s look at the curveball:
# Curveball - Horizontal Movement (pfx_x)
# Left-handed pitchers
curveball_horiz_diff_lefty <- lm(`DRA-` ~ curveball_ch_diff_pfx_x, data=lefties)
print(summary(curveball_horiz_diff_lefty))
Call:
lm(formula = `DRA-` ~ curveball_ch_diff_pfx_x, data = lefties)
Residuals:
Min 1Q Median 3Q Max
-30.279 -14.083 -1.680 9.884 39.122
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 117.807 8.910 13.222 <2e-16 ***
curveball_ch_diff_pfx_x -5.866 4.612 -1.272 0.208
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 16.73 on 67 degrees of freedom
(125 observations deleted due to missingness)
Multiple R-squared: 0.02358, Adjusted R-squared: 0.009003
F-statistic: 1.618 on 1 and 67 DF, p-value: 0.2078
# Right-handed pitchers
curveball_horiz_diff_righty <- lm(`DRA-` ~ curveball_ch_diff_pfx_x, data=righties)
print(summary(curveball_horiz_diff_righty))
Call:
lm(formula = `DRA-` ~ curveball_ch_diff_pfx_x, data = righties)
Residuals:
Min 1Q Median 3Q Max
-32.899 -9.549 0.527 9.340 39.486
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 119.059 4.959 24.009 <2e-16 ***
curveball_ch_diff_pfx_x 6.555 2.594 2.527 0.0124 *
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 14.18 on 162 degrees of freedom
(368 observations deleted due to missingness)
Multiple R-squared: 0.03794, Adjusted R-squared: 0.032
F-statistic: 6.388 on 1 and 162 DF, p-value: 0.01245
# Curveball - Vertical Movement (pfx_z)
# Left-handed pitchers
curveball_vert_diff_lefty <- lm(`DRA-` ~ curveball_ch_diff_pfx_z, data=lefties)
print(summary(curveball_vert_diff_lefty))
Call:
lm(formula = `DRA-` ~ curveball_ch_diff_pfx_z, data = lefties)
Residuals:
Min 1Q Median 3Q Max
-28.137 -13.890 0.314 9.852 41.301
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 103.430 4.304 24.032 <2e-16 ***
curveball_ch_diff_pfx_z 2.629 2.990 0.879 0.382
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 16.84 on 67 degrees of freedom
(125 observations deleted due to missingness)
Multiple R-squared: 0.0114, Adjusted R-squared: -0.003352
F-statistic: 0.7729 on 1 and 67 DF, p-value: 0.3825
# Right-handed pitchers
curveball_vert_diff_righty <- lm(`DRA-` ~ curveball_ch_diff_pfx_z, data=righties)
print(summary(curveball_vert_diff_righty))
Call:
lm(formula = `DRA-` ~ curveball_ch_diff_pfx_z, data = righties)
Residuals:
Min 1Q Median 3Q Max
-34.065 -9.865 0.030 9.718 35.625
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 104.454 2.860 36.517 <2e-16 ***
curveball_ch_diff_pfx_z 1.823 2.007 0.908 0.365
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 14.42 on 162 degrees of freedom
(368 observations deleted due to missingness)
Multiple R-squared: 0.005063, Adjusted R-squared: -0.001078
F-statistic: 0.8244 on 1 and 162 DF, p-value: 0.3652
# Curveball - Release Speed
# Left-handed pitchers
curveball_speed_diff_lefty <- lm(`DRA-` ~ curveball_ch_diff_release_speed, data=lefties)
print(summary(curveball_speed_diff_lefty))
Call:
lm(formula = `DRA-` ~ curveball_ch_diff_release_speed, data = lefties)
Residuals:
Min 1Q Median 3Q Max
-28.777 -13.047 -1.457 11.123 40.437
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 110.4929 4.6224 23.904 <2e-16 ***
curveball_ch_diff_release_speed -0.5832 0.6505 -0.897 0.373
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 16.83 on 67 degrees of freedom
(125 observations deleted due to missingness)
Multiple R-squared: 0.01185, Adjusted R-squared: -0.002894
F-statistic: 0.8038 on 1 and 67 DF, p-value: 0.3732
# Right-handed pitchers
curveball_speed_diff_righty <- lm(`DRA-` ~ curveball_ch_diff_release_speed, data=righties)
print(summary(curveball_speed_diff_righty))
Call:
lm(formula = `DRA-` ~ curveball_ch_diff_release_speed, data = righties)
Residuals:
Min 1Q Median 3Q Max
-34.965 -10.229 0.254 9.170 34.488
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 105.5377 2.9795 35.422 <2e-16 ***
curveball_ch_diff_release_speed 0.1884 0.3985 0.473 0.637
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 14.45 on 162 degrees of freedom
(368 observations deleted due to missingness)
Multiple R-squared: 0.001378, Adjusted R-squared: -0.004786
F-statistic: 0.2236 on 1 and 162 DF, p-value: 0.637
Above, we see a statistically significant p-value for horizontal curveball/chaneup movement differential for righties. However, with an R squared value of just 0.03, it’s hardly meaningful. Again we see no meaningful results. Next, let’s look at the sinker:
# Sinker - Horizontal Movement (pfx_x)
# Left-handed pitchers
sinker_horiz_diff_lefty <- lm(`DRA-` ~ sinker_ch_diff_pfx_x, data=lefties)
print(summary(sinker_horiz_diff_lefty))
Call:
lm(formula = `DRA-` ~ sinker_ch_diff_pfx_x, data = lefties)
Residuals:
Min 1Q Median 3Q Max
-30.965 -12.147 -4.274 11.568 45.622
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 101.133 2.146 47.121 <2e-16 ***
sinker_ch_diff_pfx_x -4.297 8.492 -0.506 0.614
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 17.39 on 79 degrees of freedom
(113 observations deleted due to missingness)
Multiple R-squared: 0.00323, Adjusted R-squared: -0.009387
F-statistic: 0.256 on 1 and 79 DF, p-value: 0.6143
# Right-handed pitchers
sinker_horiz_diff_righty <- lm(`DRA-` ~ sinker_ch_diff_pfx_x, data=righties)
print(summary(sinker_horiz_diff_righty))
Call:
lm(formula = `DRA-` ~ sinker_ch_diff_pfx_x, data = righties)
Residuals:
Min 1Q Median 3Q Max
-31.850 -11.468 -0.981 9.335 46.202
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 102.504 1.065 96.265 <2e-16 ***
sinker_ch_diff_pfx_x 2.129 4.475 0.476 0.635
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 15.35 on 223 degrees of freedom
(307 observations deleted due to missingness)
Multiple R-squared: 0.001014, Adjusted R-squared: -0.003466
F-statistic: 0.2263 on 1 and 223 DF, p-value: 0.6348
# Sinker - Vertical Movement (pfx_z)
# Left-handed pitchers
sinker_vert_diff_lefty <- lm(`DRA-` ~ sinker_ch_diff_pfx_z, data=lefties)
print(summary(sinker_vert_diff_lefty))
Call:
lm(formula = `DRA-` ~ sinker_ch_diff_pfx_z, data = lefties)
Residuals:
Min 1Q Median 3Q Max
-31.159 -11.593 -3.324 11.212 47.495
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 101.161 2.596 38.971 <2e-16 ***
sinker_ch_diff_pfx_z -1.748 6.819 -0.256 0.798
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 17.41 on 79 degrees of freedom
(113 observations deleted due to missingness)
Multiple R-squared: 0.0008311, Adjusted R-squared: -0.01182
F-statistic: 0.06571 on 1 and 79 DF, p-value: 0.7984
# Right-handed pitchers
sinker_vert_diff_righty <- lm(`DRA-` ~ sinker_ch_diff_pfx_z, data=righties)
print(summary(sinker_vert_diff_righty))
Call:
lm(formula = `DRA-` ~ sinker_ch_diff_pfx_z, data = righties)
Residuals:
Min 1Q Median 3Q Max
-31.795 -11.199 -0.608 9.444 45.920
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 102.270 1.526 67.009 <2e-16 ***
sinker_ch_diff_pfx_z -1.210 3.658 -0.331 0.741
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 15.35 on 223 degrees of freedom
(307 observations deleted due to missingness)
Multiple R-squared: 0.0004905, Adjusted R-squared: -0.003992
F-statistic: 0.1094 on 1 and 223 DF, p-value: 0.7411
# Sinker - Release Speed
# Left-handed pitchers
sinker_speed_diff_lefty <- lm(`DRA-` ~ sinker_ch_diff_release_speed, data=lefties)
print(summary(sinker_speed_diff_lefty))
Call:
lm(formula = `DRA-` ~ sinker_ch_diff_release_speed, data = lefties)
Residuals:
Min 1Q Median 3Q Max
-31.535 -10.725 -3.526 11.089 46.359
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 99.9148 6.8237 14.642 <2e-16 ***
sinker_ch_diff_release_speed -0.2258 0.8741 -0.258 0.797
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 17.41 on 79 degrees of freedom
(113 observations deleted due to missingness)
Multiple R-squared: 0.0008438, Adjusted R-squared: -0.0118
F-statistic: 0.06672 on 1 and 79 DF, p-value: 0.7968
# Right-handed pitchers
sinker_speed_diff_righty <- lm(`DRA-` ~ sinker_ch_diff_release_speed, data=righties)
print(summary(sinker_speed_diff_righty))
Call:
lm(formula = `DRA-` ~ sinker_ch_diff_release_speed, data = righties)
Residuals:
Min 1Q Median 3Q Max
-31.432 -11.907 0.067 10.349 45.779
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 106.2452 3.6033 29.486 <2e-16 ***
sinker_ch_diff_release_speed 0.5280 0.5067 1.042 0.299
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 15.32 on 223 degrees of freedom
(307 observations deleted due to missingness)
Multiple R-squared: 0.004846, Adjusted R-squared: 0.000383
F-statistic: 1.086 on 1 and 223 DF, p-value: 0.2985
Again, no meaningful results. Finally, let’s look at the cutter:
# Assuming 'lefties' and 'righties' are already defined subsets of 'differentials_with_dra'
# Cutter - Horizontal Movement (pfx_x)
# Left-handed pitchers
cutter_horiz_diff_lefty <- lm(`DRA-` ~ cutter_ch_diff_pfx_x, data=lefties)
print(summary(cutter_horiz_diff_lefty))
Call:
lm(formula = `DRA-` ~ cutter_ch_diff_pfx_x, data = lefties)
Residuals:
Min 1Q Median 3Q Max
-28.329 -15.160 -1.977 11.046 37.894
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 150.676 12.598 11.960 4.13e-15 ***
cutter_ch_diff_pfx_x -30.906 9.345 -3.307 0.00194 **
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 17.67 on 42 degrees of freedom
(150 observations deleted due to missingness)
Multiple R-squared: 0.2066, Adjusted R-squared: 0.1877
F-statistic: 10.94 on 1 and 42 DF, p-value: 0.001938
# Right-handed pitchers
cutter_horiz_diff_righty <- lm(`DRA-` ~ cutter_ch_diff_pfx_x, data=righties)
print(summary(cutter_horiz_diff_righty))
Call:
lm(formula = `DRA-` ~ cutter_ch_diff_pfx_x, data = righties)
Residuals:
Min 1Q Median 3Q Max
-31.923 -12.301 0.038 10.044 38.427
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 109.558 7.539 14.532 <2e-16 ***
cutter_ch_diff_pfx_x 3.901 5.460 0.714 0.476
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 15.21 on 125 degrees of freedom
(405 observations deleted due to missingness)
Multiple R-squared: 0.004066, Adjusted R-squared: -0.003902
F-statistic: 0.5103 on 1 and 125 DF, p-value: 0.4763
# Cutter - Vertical Movement (pfx_z)
# Left-handed pitchers
cutter_vert_diff_lefty <- lm(`DRA-` ~ cutter_ch_diff_pfx_z, data=lefties)
print(summary(cutter_vert_diff_lefty))
Call:
lm(formula = `DRA-` ~ cutter_ch_diff_pfx_z, data = lefties)
Residuals:
Min 1Q Median 3Q Max
-26.868 -16.195 -4.066 12.606 46.639
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 110.290 2.879 38.313 <2e-16 ***
cutter_ch_diff_pfx_z -14.299 7.636 -1.873 0.0681 .
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 19.06 on 42 degrees of freedom
(150 observations deleted due to missingness)
Multiple R-squared: 0.07706, Adjusted R-squared: 0.05508
F-statistic: 3.507 on 1 and 42 DF, p-value: 0.0681
# Right-handed pitchers
cutter_vert_diff_righty <- lm(`DRA-` ~ cutter_ch_diff_pfx_z, data=righties)
print(summary(cutter_vert_diff_righty))
Call:
lm(formula = `DRA-` ~ cutter_ch_diff_pfx_z, data = righties)
Residuals:
Min 1Q Median 3Q Max
-31.661 -11.886 1.001 9.003 38.846
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 106.207 1.420 74.816 < 2e-16 ***
cutter_ch_diff_pfx_z 11.273 3.359 3.356 0.00105 **
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 14.6 on 125 degrees of freedom
(405 observations deleted due to missingness)
Multiple R-squared: 0.08266, Adjusted R-squared: 0.07533
F-statistic: 11.26 on 1 and 125 DF, p-value: 0.001047
# Cutter - Release Speed
# Left-handed pitchers
cutter_speed_diff_lefty <- lm(`DRA-` ~ cutter_ch_diff_release_speed, data=lefties)
print(summary(cutter_speed_diff_lefty))
Call:
lm(formula = `DRA-` ~ cutter_ch_diff_release_speed, data = lefties)
Residuals:
Min 1Q Median 3Q Max
-40.357 -13.937 -2.066 11.779 44.182
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 104.1211 4.3809 23.767 <2e-16 ***
cutter_ch_diff_release_speed -1.6751 0.9467 -1.769 0.0841 .
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 19.14 on 42 degrees of freedom
(150 observations deleted due to missingness)
Multiple R-squared: 0.06937, Adjusted R-squared: 0.04721
F-statistic: 3.131 on 1 and 42 DF, p-value: 0.08409
# Right-handed pitchers
cutter_speed_diff_righty <- lm(`DRA-` ~ cutter_ch_diff_release_speed, data=righties)
print(summary(cutter_speed_diff_righty))
Call:
lm(formula = `DRA-` ~ cutter_ch_diff_release_speed, data = righties)
Residuals:
Min 1Q Median 3Q Max
-32.243 -11.738 -0.259 9.710 37.772
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 104.21126 1.97432 52.783 <2e-16 ***
cutter_ch_diff_release_speed -0.01742 0.51576 -0.034 0.973
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 15.24 on 125 degrees of freedom
(405 observations deleted due to missingness)
Multiple R-squared: 9.129e-06, Adjusted R-squared: -0.007991
F-statistic: 0.001141 on 1 and 125 DF, p-value: 0.9731
While nothing is very large, we finally see some meaningful results in regards to changeup/cutter differential. We see this is especially true for Lefties, who have an R squared value of around .21 for changeup/cutter horizontal differential, with a correlation value of around 0.45. It appears that there is correlation between cutter/changeup horizontal movement differential and DRA- for lefties. While we don’t see this as the case for righties, it appears for lefties, the horizontal differential between the changeup and the cutter can be meaningful in terms of DRA-.As cutter/changeup differential increases, DRA- tends to decrease.
Let’s graph some of the cutter results below:
library(ggplot2)
# Plot for left-handed pitchers
ggplot(lefties, aes(x = cutter_ch_diff_pfx_x, y = `DRA-`)) +
geom_point() +
geom_smooth(method = "lm", formula = y ~ x, se = FALSE, color = "blue") +
labs(title = "Left-handed pitchers: DRA- vs. Cutter Vertical Movement Differential",
x = "Cutter Vertical Movement Differential", y = "DRA-") +
theme_minimal()
# Plot for right-handed pitchers
ggplot(righties, aes(x = cutter_ch_diff_pfx_x, y = `DRA-`)) +
geom_point() +
geom_smooth(method = "lm", formula = y ~ x, se = FALSE, color = "red") +
labs(title = "Right-handed pitchers: DRA- vs. Cutter Vertical Movement Differential",
x = "Cutter Vertical Movement Differential", y = "DRA-") +
theme_minimal()
# Plot for left-handed pitchers
ggplot(lefties, aes(x = cutter_ch_diff_pfx_z, y = `DRA-`)) +
geom_point() +
geom_smooth(method = "lm", formula = y ~ x, se = FALSE, color = "blue") +
labs(title = "Left-handed pitchers: DRA- vs. Cutter Horizontal Movement Differential",
x = "Cutter Horizontal Movement Differential", y = "DRA-") +
theme_minimal()
# Plot for right-handed pitchers
ggplot(righties, aes(x = cutter_ch_diff_pfx_z, y = `DRA-`)) +
geom_point() +
geom_smooth(method = "lm", formula = y ~ x, se = FALSE, color = "red") +
labs(title = "Right-handed pitchers: DRA- vs. Cutter Horizontal Movement Differential",
x = "Cutter Horizontal Movement Differential", y = "DRA-") +
theme_minimal()
# Plot for right-handed pitchers
ggplot(differentials_with_dra, aes(x = cutter_ch_diff_release_speed, y = `DRA-`)) +
geom_point() +
geom_smooth(method = "lm", formula = y ~ x, se = FALSE, color = "red") +
labs(title = "DRA- vs. Cutter Changeup Velocity Differential",
x = "Cutter Changeup Velocity Differential", y = "DRA-") +
theme_minimal()
# Plot for left-handed pitchers
ggplot(lefties, aes(x = cutter_ch_diff_release_speed, y = `DRA-`)) +
geom_point() +
geom_smooth(method = "lm", formula = y ~ x, se = FALSE, color = "blue") +
labs(title = "Left-handed pitchers: DRA- vs. Cutter Changeup Velocity Differential",
x = "Cutter Changeup Velocity Differential", y = "DRA-") +
theme_minimal()
# Plot for right-handed pitchers
ggplot(righties, aes(x = cutter_ch_diff_release_speed, y = `DRA-`)) +
geom_point() +
geom_smooth(method = "lm", formula = y ~ x, se = FALSE, color = "red") +
labs(title = "Right-handed pitchers: DRA- vs. Cutter Changeup Velocity Differential",
x = "Cutter Changeup Velocity Differential", y = "DRA-") +
theme_minimal()