Download and save the data
library(baseballr)
playerid_lookup("Sale")
data <- scrape_statcast_savant(start_date = "2017-04-01", end_date = "2017-11-01", playerid = 519242, player_type='pitcher')
str(data)
head(data)
# Save it to disk with saveRDS()
write.csv(data, "C:/Users/sclee1/OneDrive/Documents/R/sportsData/data/data.csv")
Import data and select two variables
# Read it back in with readRDS()
data <- read.csv("C:/Users/sclee1/OneDrive/Documents/R/sportsData/data/data.csv")
str(data)
## 'data.frame': 3428 obs. of 89 variables:
## $ X : int 1 2 3 4 5 6 7 8 9 10 ...
## $ pitch_type : Factor w/ 4 levels "CH","FF","FT",..: 3 1 2 1 1 3 3 4 3 4 ...
## $ game_date : Factor w/ 32 levels "2017-04-05","2017-04-10",..: 32 32 32 32 32 32 32 32 32 32 ...
## $ release_speed : num 94.4 91 97.1 87 87.7 97.4 98.1 80.4 92.4 82.5 ...
## $ release_pos_x : num 3.08 3.07 3.05 3.17 3.07 ...
## $ release_pos_z : num 5.03 4.97 5.26 4.97 5.01 ...
## $ player_name : Factor w/ 1 level "Chris Sale": 1 1 1 1 1 1 1 1 1 1 ...
## $ batter : int 431145 431145 431145 431145 431145 607680 607680 434778 434778 430832 ...
## $ pitcher : int 519242 519242 519242 519242 519242 519242 519242 519242 519242 519242 ...
## $ events : Factor w/ 16 levels "caught_stealing_2b",..: 5 NA NA NA NA 13 NA 10 NA 2 ...
## $ description : Factor w/ 13 levels "ball","blocked_ball",..: 8 1 1 12 1 9 4 10 1 9 ...
## $ spin_dir : logi NA NA NA NA NA NA ...
## $ spin_rate_deprecated : logi NA NA NA NA NA NA ...
## $ break_angle_deprecated : logi NA NA NA NA NA NA ...
## $ break_length_deprecated : logi NA NA NA NA NA NA ...
## $ zone : int 5 14 1 6 8 6 12 3 12 6 ...
## $ des : Factor w/ 713 levels "Aaron Altherr flies out to center fielder Jackie Bradley. ",..: 588 NA NA NA NA 420 NA 392 NA 345 ...
## $ game_type : Factor w/ 1 level "R": 1 1 1 1 1 1 1 1 1 1 ...
## $ stand : Factor w/ 2 levels "L","R": 2 2 2 2 2 2 2 2 2 2 ...
## $ p_throws : Factor w/ 1 level "L": 1 1 1 1 1 1 1 1 1 1 ...
## $ home_team : Factor w/ 14 levels "BAL","BOS","CLE",..: 2 2 2 2 2 2 2 2 2 2 ...
## $ away_team : Factor w/ 10 levels "BAL","BOS","CLE",..: 10 10 10 10 10 10 10 10 10 10 ...
## $ type : Factor w/ 3 levels "B","S","X": 3 1 1 2 1 3 2 3 1 3 ...
## $ hit_location : int 5 NA NA NA NA 1 NA NA NA NA ...
## $ bb_type : Factor w/ 4 levels "fly_ball","ground_ball",..: 3 NA NA NA NA 2 NA 3 NA 3 ...
## $ balls : int 3 2 1 1 0 0 0 1 0 1 ...
## $ strikes : int 1 1 1 0 0 1 0 0 0 1 ...
## $ game_year : int 2017 2017 2017 2017 2017 2017 2017 2017 2017 2017 ...
## $ pfx_x : num 1.56 1.7 1.34 1.56 1.71 ...
## $ pfx_z : num 0.5869 0.0515 1.0221 0.4351 0.4717 ...
## $ plate_x : num 0.0603 1.0183 -0.8058 0.5792 -0.103 ...
## $ plate_z : num 2.51 2.01 3.11 2.38 1.79 ...
## $ on_3b : int NA NA NA NA NA NA NA NA NA NA ...
## $ on_2b : int NA NA NA NA NA NA NA 430832 430832 NA ...
## $ on_1b : int 607680 607680 607680 607680 607680 NA NA NA NA NA ...
## $ outs_when_up : int 2 2 2 2 2 2 2 2 2 2 ...
## $ inning : int 5 5 5 5 5 5 5 5 5 5 ...
## $ inning_topbot : Factor w/ 2 levels "Bot","Top": 2 2 2 2 2 2 2 2 2 2 ...
## $ hc_x : num 92.8 NA NA NA NA ...
## $ hc_y : num 161 NA NA NA NA ...
## $ tfs_deprecated : logi NA NA NA NA NA NA ...
## $ tfs_zulu_deprecated : logi NA NA NA NA NA NA ...
## $ pos2_person_id : int 506702 506702 506702 506702 506702 506702 506702 506702 506702 506702 ...
## $ umpire : logi NA NA NA NA NA NA ...
## $ sv_id : Factor w/ 3414 levels "_","170405_231036",..: 3414 3413 3412 3411 3410 3409 3408 3407 3406 3405 ...
## $ vx0 : num -11.2 -8.7 -13.2 -9.27 -10.99 ...
## $ vy0 : num -137 -132 -140 -126 -127 ...
## $ vz0 : num -2.125 -1.607 -2.472 -0.927 -2.576 ...
## $ ax : num 21.9 21.6 20.5 18.3 20.5 ...
## $ ay : num 28.6 28.2 29.8 23.2 24.6 ...
## $ az : num -24.6 -31.5 -18.4 -27.6 -26.8 ...
## $ sz_top : num 3.2 3.36 3.39 3.2 3.36 ...
## $ sz_bot : num 1.27 1.48 1.54 1.27 1.6 ...
## $ hit_distance_sc : int 121 NA NA NA NA 10 45 356 NA 243 ...
## $ launch_speed : num 101 NA NA NA NA ...
## $ launch_angle : num 6.39 NA NA NA NA ...
## $ effective_speed : num 93.9 90.5 96.4 86.6 86.9 ...
## $ release_spin_rate : int 2239 2178 2478 2045 2250 2449 2364 2439 2150 2513 ...
## $ release_extension : num 6.09 6.16 6.07 6.01 5.9 ...
## $ game_pk : int 492457 492457 492457 492457 492457 492457 492457 492457 492457 492457 ...
## $ pos1_person_id : int 519242 519242 519242 519242 519242 519242 519242 519242 519242 519242 ...
## $ pos2_person_id.1 : int 506702 506702 506702 506702 506702 506702 506702 506702 506702 506702 ...
## $ pos3_person_id : int 607752 607752 607752 607752 607752 607752 607752 607752 607752 607752 ...
## $ pos4_person_id : int 571918 571918 571918 571918 571918 571918 571918 571918 571918 571918 ...
## $ pos5_person_id : int 646240 646240 646240 646240 646240 646240 646240 646240 646240 646240 ...
## $ pos6_person_id : int 593428 593428 593428 593428 593428 593428 593428 593428 593428 593428 ...
## $ pos7_person_id : int 643217 643217 643217 643217 643217 643217 643217 643217 643217 643217 ...
## $ pos8_person_id : int 598265 598265 598265 598265 598265 598265 598265 598265 598265 598265 ...
## $ pos9_person_id : int 455759 455759 455759 455759 455759 455759 455759 455759 455759 455759 ...
## $ release_pos_y : num 54.4 54.3 54.4 54.5 54.6 ...
## $ estimated_ba_using_speedangle : num 0.65 NA NA NA NA 0.256 NA 0.143 NA 0.922 ...
## $ estimated_woba_using_speedangle: num 0.651 NA NA NA NA 0.211 NA 0.216 NA 0.897 ...
## $ woba_value : num 0 NA NA NA NA 0.9 NA 2 NA 1.25 ...
## $ woba_denom : int 1 NA NA NA NA 1 NA 1 NA 1 ...
## $ babip_value : int 0 NA NA NA NA 1 NA 0 NA 1 ...
## $ iso_value : int 0 NA NA NA NA 0 NA 3 NA 1 ...
## $ launch_speed_angle : int 4 NA NA NA NA 2 NA 3 NA 4 ...
## $ at_bat_number : int 39 39 39 39 39 38 38 37 37 36 ...
## $ pitch_number : int 5 4 3 2 1 2 1 2 1 3 ...
## $ pitch_name : Factor w/ 5 levels "","2-Seam Fastball",..: 2 4 3 4 4 2 2 5 2 5 ...
## $ home_score : int 1 1 1 1 1 1 1 1 1 1 ...
## $ away_score : int 5 5 5 5 5 5 5 3 3 3 ...
## $ bat_score : int 5 5 5 5 5 5 5 3 3 3 ...
## $ fld_score : int 1 1 1 1 1 1 1 1 1 1 ...
## $ post_away_score : int 5 5 5 5 5 5 5 3 3 3 ...
## $ post_home_score : int 1 1 1 1 1 1 1 1 1 1 ...
## $ post_bat_score : int 5 5 5 5 5 5 5 3 3 3 ...
## $ post_fld_score : int 1 1 1 1 1 1 1 1 1 1 ...
## $ barrel : int 0 NA NA NA NA 0 0 0 NA 0 ...
# Select variables that are necessary for our analysis
library(dplyr)
data <-
data %>%
select(pitch_type, description)
head(data)
## pitch_type description
## 1 FT hit_into_play
## 2 CH ball
## 3 FF ball
## 4 CH swinging_strike
## 5 CH ball
## 6 FT hit_into_play_no_out
Q1 How many pitches did he throw during the season?
Q2 What’s his primary pitch that he threw most often?
Q3 How many pitches did he throw that induced swing?
Q4 How many foreseam fastballs did he throw?
Q5 How many foreseam fastballs did he throw that missed the bat?
Q6 What’s his best strike out pitch?
# Count the number pitch by pitch_type
data %>%
count(pitch_type)
## # A tibble: 5 x 2
## pitch_type n
## <fctr> <int>
## 1 CH 644
## 2 FF 1262
## 3 FT 390
## 4 SL 1127
## 5 <NA> 5
library(ggplot2)
data %>%
filter(!description %in% c("ball", "blocked_ball", "called_strike", "hit_by_pitch"),
!is.na(pitch_type)) %>% #exclude no swings
mutate(swingMiss = description %in% c("swinging_strike", "missed_bunt", "swinging_strike_blocked")) %>%
group_by(pitch_type) %>%
summarize(missPerType = sum(swingMiss),
N = n(),
noContactRate = missPerType / N * 100) %>%
ungroup() %>%
ggplot(aes(reorder(x = pitch_type, noContactRate), y = noContactRate, fill = pitch_type)) +
geom_col(show.legend = FALSE) +
coord_flip() +
labs(title = "Swing and Miss Pitch",
x = NULL,
y = "Swing and Miss Rate")

# Show step-by-step
data %>%
filter(!description %in% c("ball", "blocked_ball", "called_strike", "hit_by_pitch"),
!is.na(pitch_type)) %>% #exclude no swings
mutate(swingMiss = description %in% c("swinging_strike", "missed_bunt", "swinging_strike_blocked")) %>%
group_by(pitch_type) %>%
summarize(missPerType = sum(swingMiss),
N = n(),
noContactRate = missPerType / N * 100)
## # A tibble: 4 x 4
## pitch_type missPerType N noContactRate
## <fctr> <int> <int> <dbl>
## 1 CH 117 332 35.2
## 2 FF 179 721 24.8
## 3 FT 32 186 17.2
## 4 SL 183 476 38.4