Fastball-count is the pitching-count such that the fastball is often thrown.
Now, visualize the fastball-count of some pitchers by using R and pitch-f/x data.
The code and data can be obtained from github (fastball.Rmd)
https://github.com/gghatano/MLB_data
library(dplyr)
library(data.table)
library(magrittr)
library(rCharts)
library(xtable)
# select the column used for this document
col_used = c("pitch_type", "type", "count", "pitcher_name", "sv_id")
# dat = fread(paste(getwd(), "/pitch_fx/2013.csv", sep=""),
# select = col_used, showProgress=FALSE)
dat = fread("pitchfx_data_for_plot.csv")
# remove the row includes "NA
# remove the row of Pre-Season Game
data = dat %>% filter(pitch_type!="NA") %>%
mutate(month = substr(sv_id, 4,4)) %>%
filter(month >= 4)
fastball = c("FF", "FT", "FC", "FS")
# check the fastball-ratio on each Ball/Strike count
data_fast = data %>%
mutate(FAST_FL = ifelse(pitch_type %in% fastball, "T", "F"))
# check the data-table
data_fast %>% head %>% xtable %>% print(type="html")
| pitch_type | sv_id | type | count | pitcher_name | month | FAST_FL | |
|---|---|---|---|---|---|---|---|
| 1 | FF | 130401_131510 | S | 0-0 | Clayton Kershaw | 4 | T |
| 2 | CU | 130401_131526 | X | 0-1 | Clayton Kershaw | 4 | F |
| 3 | FF | 130401_131606 | B | 0-0 | Clayton Kershaw | 4 | T |
| 4 | FF | 130401_131619 | S | 1-0 | Clayton Kershaw | 4 | T |
| 5 | CU | 130401_131652 | B | 1-1 | Clayton Kershaw | 4 | F |
| 6 | FF | 130401_131712 | B | 2-1 | Clayton Kershaw | 4 | T |
Function “fast_rate” calculates the pitcher's fastball-ratio of each Ball/Strike count and visualizes them
# output the fastball-ratio barplot of "pitcher"
# default : "Hiroki Kuroda"
fast_rate = function(pitcher = "Hiroki Kuroda"){
# calculate fastball/non-fastball ratio
data_count = data_fast %>%
filter(pitcher_name == pitcher) %>%
group_by(count) %>%
dplyr::summarise(fast = sum(FAST_FL == "T"),
non_fast = sum(FAST_FL == "F")) %>%
mutate(FAST = fast / (fast+non_fast),
non_FAST = non_fast / (fast+non_fast)) %>%
dplyr::select(count, FAST, non_FAST) %>%
melt(id.var = "count") %>%
setnames(c("count", "pitch", "freq"))
# visualize by using rPlot
rp = rPlot(data = data_count, freq ~ count, color = "pitch", type = "bar")
return(rp)
}
rp = fast_rate("Hiroki Kuroda")
rp$show("iframesrc", cdn = TRUE)
rp = fast_rate("Yu Darvish")
rp$show("iframesrc", cdn = TRUE)
rp = fast_rate("Clayton Kershaw")
rp$show("iframesrc", cdn = TRUE)