MLB Scouting Report: Fastball-Count (2013)

Fastball-count is the pitching-count such that the fastball is often thrown.

Now, visualize the fastball-count of some pitchers by using R and pitch-f/x data.

The code and data can be obtained from github (fastball.Rmd)

https://github.com/gghatano/analyze_mlbdata_with_R/tree/master/rchart

library(dplyr)
library(data.table)
library(magrittr)
library(rCharts)
library(xtable)


# select the column used for this document
col_used = c("pitch_type", "type", "count", "pitcher_name", "sv_id")
# dat = fread(paste(getwd(), "/pitch_fx/2013.csv", sep=""), 
#             select = col_used, showProgress=FALSE) 
dat = fread("pitchfx_data_for_plot.csv")

# remove the row includes "NA
# remove the row of Pre-Season Game
data = dat %>% filter(pitch_type!="NA") %>% 
  mutate(month = substr(sv_id, 4,4)) %>% 
  filter(month >= 4)

fastball = c("FF", "FT", "FC", "FS")

# check the fastball-ratio on each Ball/Strike count
data_fast = data %>% 
  mutate(FAST_FL = ifelse(pitch_type %in% fastball, "T", "F")) 

# check the data-table
# data_fast %>% head %>% xtable %>% print(type="html")
data_fast %>% head %>% kable(format="html")
pitch_type sv_id type count pitcher_name month FAST_FL
FF 130401_131510 S 0-0 Clayton Kershaw 4 T
CU 130401_131526 X 0-1 Clayton Kershaw 4 F
FF 130401_131606 B 0-0 Clayton Kershaw 4 T
FF 130401_131619 S 1-0 Clayton Kershaw 4 T
CU 130401_131652 B 1-1 Clayton Kershaw 4 F
FF 130401_131712 B 2-1 Clayton Kershaw 4 T

——————————————————————

Function “fast_rate” calculates the pitcher's fastball-ratio of each Ball/Strike count and visualizes them

# output the fastball-ratio badata_plotlot of "pitcher"
# default : "Hiroki Kuroda"
fast_rate = function(pitcher = "Hiroki Kuroda"){
  # calculate fastball/non-fastball ratio 
  data_count = data_fast %>% 
    filter(pitcher_name == pitcher) %>% 
    group_by(count) %>% 
    dplyr::summarise(fast = sum(FAST_FL == "T"), 
                     non_fast = sum(FAST_FL == "F")) %>% 
    mutate(FAST = fast / (fast+non_fast), 
           non_FAST = non_fast / (fast+non_fast)) %>% 
    dplyr::select(count, FAST, non_FAST) %>% 
    melt(id.var = "count") %>% 
    setnames(c("count", "pitch", "freq")) %>% 
    mutate(count = paste(count))

  # make plot by using ggplot2
  gp = ggplot(data_count, aes(x = count,  y=freq, fill=pitch)) + 
    geom_bar(stat="identity") + 
    xlab("count") + ylab("frequency") + 
    ggtitle("Fastball-ratio") + 
    theme(plot.title=element_text(size=20, face="bold"))
  # make plot by using polychart.js
  rp = rPlot(freq~count, data = data_count, color="pitch", type="bar") 

  return(rp)
}

Fastball-ratio of Hiroki Kuroda (NYY).

plot = fast_rate("Hiroki Kuroda")
plot$show("iframesrc", cdn = TRUE)

Fastball-ratio of Yu Darvish (TEX).

plot = fast_rate("Yu Darvish")
plot$show("iframesrc", cdn = TRUE)

Fastball-ratio of Clayton Kershaw (LAD).

plot = fast_rate("Clayton Kershaw")
plot$show("iframesrc", cdn = TRUE)