Fastball-count is the pitching-count such that the fastball is often thrown.
Now, visualize the fastball-count of some pitchers by using R and pitch-f/x data.
The code and data can be obtained from github (fastball.Rmd)
https://github.com/gghatano/analyze_mlbdata_with_R/tree/master/rchart
library(dplyr)
library(data.table)
library(magrittr)
library(rCharts)
library(xtable)
# select the column used for this document
col_used = c("pitch_type", "type", "count", "pitcher_name", "sv_id")
# dat = fread(paste(getwd(), "/pitch_fx/2013.csv", sep=""),
# select = col_used, showProgress=FALSE)
dat = fread("pitchfx_data_for_plot.csv")
# remove the row includes "NA
# remove the row of Pre-Season Game
data = dat %>% filter(pitch_type!="NA") %>%
mutate(month = substr(sv_id, 4,4)) %>%
filter(month >= 4)
fastball = c("FF", "FT", "FC", "FS")
# check the fastball-ratio on each Ball/Strike count
data_fast = data %>%
mutate(FAST_FL = ifelse(pitch_type %in% fastball, "T", "F"))
# check the data-table
# data_fast %>% head %>% xtable %>% print(type="html")
data_fast %>% head %>% kable(format="html")
| pitch_type | sv_id | type | count | pitcher_name | month | FAST_FL |
|---|---|---|---|---|---|---|
| FF | 130401_131510 | S | 0-0 | Clayton Kershaw | 4 | T |
| CU | 130401_131526 | X | 0-1 | Clayton Kershaw | 4 | F |
| FF | 130401_131606 | B | 0-0 | Clayton Kershaw | 4 | T |
| FF | 130401_131619 | S | 1-0 | Clayton Kershaw | 4 | T |
| CU | 130401_131652 | B | 1-1 | Clayton Kershaw | 4 | F |
| FF | 130401_131712 | B | 2-1 | Clayton Kershaw | 4 | T |
Function “fast_rate” calculates the pitcher's fastball-ratio of each Ball/Strike count and visualizes them
# output the fastball-ratio badata_plotlot of "pitcher"
# default : "Hiroki Kuroda"
fast_rate = function(pitcher = "Hiroki Kuroda"){
# calculate fastball/non-fastball ratio
data_count = data_fast %>%
filter(pitcher_name == pitcher) %>%
group_by(count) %>%
dplyr::summarise(fast = sum(FAST_FL == "T"),
non_fast = sum(FAST_FL == "F")) %>%
mutate(FAST = fast / (fast+non_fast),
non_FAST = non_fast / (fast+non_fast)) %>%
dplyr::select(count, FAST, non_FAST) %>%
melt(id.var = "count") %>%
setnames(c("count", "pitch", "freq")) %>%
mutate(count = paste(count))
# make plot by using ggplot2
gp = ggplot(data_count, aes(x = count, y=freq, fill=pitch)) +
geom_bar(stat="identity") +
xlab("count") + ylab("frequency") +
ggtitle("Fastball-ratio") +
theme(plot.title=element_text(size=20, face="bold"))
# make plot by using polychart.js
rp = rPlot(freq~count, data = data_count, color="pitch", type="bar")
return(rp)
}
plot = fast_rate("Hiroki Kuroda")
plot$show("iframesrc", cdn = TRUE)
plot = fast_rate("Yu Darvish")
plot$show("iframesrc", cdn = TRUE)
plot = fast_rate("Clayton Kershaw")
plot$show("iframesrc", cdn = TRUE)