Looking at Screwballs in pitchRx

Motivated by http://www.nytimes.com/2014/07/13/magazine/the-mystery-of-the-vanishing-screwball.html

R code based on Visualizing Cliff LeeÂ’s Pitches in the 2013 Season.

The NYT article states Hector Santiago of the Los Angeles Angels is the sole screwball pitcher in the majors today. Let’s take a look at how his pitches show up in PITCHf/x using the pitchRx R package.

library(pitchRx)
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 3.0.3
library(dplyr)
## Warning: package 'dplyr' was built under R version 3.0.3
## 
## Attaching package: 'dplyr'
## 
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## 
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
my_db <- src_sqlite("~/pitchfx/pitchRx.sqlite3")
## Loading required package: RSQLite
## Loading required package: DBI
## Loading required package: RSQLite.extfuns
# DB is 4.5GB.  Index helped a great deal!
# dbSendQuery(my_db$con, 'CREATE INDEX pitcher_idx ON atbat(pitcher_name)')
locations <- select(tbl(my_db, "pitch"), 
                pitch_type, px, pz, des, num, gameday_link)
names <- select(tbl(my_db, "atbat"), pitcher_name, batter_name, 
                num, gameday_link, event, stand)
que <- inner_join(locations, filter(names, 
                  pitcher_name == "Hector Santiago"),
                  by = c("num", "gameday_link"))
pitchfx <- collect(que)  #submit query and bring data into R

Let’s look at 2014 first.

Some information about pitch types:
http://www.fangraphs.com/library/pitch-type-abbreviations-classifications/

Look at individual starts at Brooks Baseball. For example, here is July 10. 2014

pitchfx$gamedate <- substr(pitchfx$gameday_link, 5, 14 )
#table(pitchfx$gamedate)
pitchfx2014 <- subset(pitchfx, 
                  as.numeric(substr(gamedate, 1, 4)) == 2014)
#table(pitchfx2014$gamedate)

#table(pitchfx2014$pitch_type)
#table(pitchfx2014$gamedate, pitchfx2014$pitch_type)
table(pitchfx$pitch_type)
## 
##   CH   CU   FC   FF   FT   IN   PO   SC   SI   SL 
## 1023  444  364 1524   12   17    2  268 1844   31
table(pitchfx$gamedate, pitchfx$pitch_type)
##             
##              CH CU FC FF FT IN PO SC SI SL
##   2011_03_17  0  0  0  0  0  0  0  0  0  0
##   2011_03_22  0  0  0 16  3  0  0 10  0  2
##   2011_07_06  0  1  0 10  0  0  0  2  2  0
##   2011_07_07  0  0  0 18  9  3  0 19  0  1
##   2012_03_05  0  0  0  0  0  0  0  0  0  0
##   2012_03_08  9  0  0 13  0  0  0 10  8  5
##   2012_03_12  0  0  0  0  0  0  0  0  0  0
##   2012_03_19  0  0  0  0  0  0  0  0  0  0
##   2012_03_23  0  0  0  0  0  0  0  0  0  0
##   2012_03_25  0  0  0  0  0  0  0  0  0  0
##   2012_03_28  0  0  0  0  0  0  0  0  0  0
##   2012_03_29  0  0  0  0  0  0  0  0  0  0
##   2012_04_01  0  0  0  0  0  0  0  0  0  0
##   2012_04_03  0  0  0  2  0  0  0  2 15  3
##   2012_04_07  1  1  0  3  0  0  0  1  5  0
##   2012_04_09  2  3  0  1  0  0  0  1 17  0
##   2012_04_13  0  1  0  1  0  0  0  1 12  0
##   2012_04_16  5  1  0  8  0  0  0  3  6  0
##   2012_04_20  3  0  0  0  0  0  0  2  9  0
##   2012_04_22  1  1  0  6  0  0  0  1  9  0
##   2012_04_25  3  0  0  3  0  0  0  3 12  0
##   2012_05_01  4  0  0 14  0  0  0  3  7  4
##   2012_05_05  2  2  0  5  0  0  0  0 11  0
##   2012_05_06  3  6  0  6  0  0  0  5 13  1
##   2012_05_08  0  3  0  4  0  0  0  0 17  0
##   2012_05_11  0  0  1  2  0  0  0  2  7  0
##   2012_05_14  0  1  0  2  0  0  0  0 17  0
##   2012_05_15  1  2  0  9  0  0  0  0 10  0
##   2012_05_19  2  1  1 10  0  0  0  0  0  0
##   2012_05_25  5  0  0 15  0  0  0  3  1  2
##   2012_05_27  2  7  0  9  0  0  0  1 10  0
##   2012_05_29  0  1  0  0  0  0  0  3 10  0
##   2012_06_02  0  1  0  1  0  0  0  0 11  0
##   2012_06_06  7  4  0 15  0  0  0  2  3  0
##   2012_06_08  5  4  1 11  0  0  0  1  7  0
##   2012_06_16  2  0  0 16  0  0  0  1  3  0
##   2012_06_18  6  5  9  8  0  0  0  1 12  0
##   2012_06_25  3  0  4  0  0  0  0  0 12  0
##   2012_06_28  3  0  2  3  0  0  0  0 13  0
##   2012_07_08  8  1  7 22  0  0  0  3 16  0
##   2012_07_13  0  0  0  3  0  0  0  0  2  0
##   2012_07_14  0  1  6  0  0  4  0  0  0  0
##   2012_07_18  7  1  2  2  0  0  0  2 31  0
##   2012_07_22  7  4  7 23  0  0  0  1 14  0
##   2012_07_25  0  0  0  1  0  0  0  0  3  0
##   2012_07_29  4  2  2  3  0  0  0  2  4  0
##   2012_08_17  2  0  5  5  0  0  0  3  4  0
##   2012_08_20 19  6 10 21  0  0  0  1 19  0
##   2012_08_26 10  0  9 17  0  0  0  4 23 10
##   2012_08_30  4  1  1  3  0  0  0  1  8  0
##   2012_09_03  8  4 20 21  0  0  0  4 34  0
##   2012_09_09 13  6  7 26  0  0  0  8 33  0
##   2012_09_12  8  1  2  7  0  0  0  4 29  0
##   2012_09_21  4  0  0  7  0  0  0  0  0  0
##   2012_09_26 10 11  5 17  0  0  0  3 32  0
##   2012_10_01 11  8  9  1  0  0  0 17 62  0
##   2013_02_25  0  0  0  0  0  0  0  0  0  0
##   2013_03_02  0  0  0  0  0  0  0  0  0  0
##   2013_03_05  0  0  0  0  0  0  0  0  0  0
##   2013_03_09  0  0  0  0  0  0  0  0  0  0
##   2013_03_14  0  0  0  0  0  0  0  0  0  0
##   2013_03_22  0  0  0  0  0  0  0  0  0  0
##   2013_03_26 12  0  8  6  0  0  0  1  6  1
##   2013_03_29  5  0  2  6  0  0  0  2 18  2
##   2013_04_04 10  3  6 10  0  0  0  2 17  0
##   2013_04_10  2  0  1  3  0  0  0  3  4  0
##   2013_04_11  5  1  2  5  0  0  0  1 18  0
##   2013_04_15  0  2  2  2  0  0  0  1  9  0
##   2013_04_16  2  0  4  6  0  0  0  0  8  0
##   2013_04_20  6  1  3  8  0  0  1  0  5  0
##   2013_04_27 28  8 14 30  0  0  0  3  4  0
##   2013_05_02 23  8  8  3  0  0  0  9 41  0
##   2013_05_07 27  8  9  4  0  0  0  5 58  0
##   2013_05_13 25 13  1  1  0  0  0  7 62  0
##   2013_05_18 18  3 16 21  0  0  0  1 21  0
##   2013_05_22 16  4  7 52  0  0  0  4 24  0
##   2013_05_29  2  1  3 10  0  0  1  4  4  0
##   2013_06_01  5  3  1 12  0  8  0  2 15  0
##   2013_06_04  2  2  0  2  0  0  0  1  5  0
##   2013_06_05  1  1  0  2  0  0  0  1  4  0
##   2013_06_09 25  6  2 32  0  0  0  5 37  0
##   2013_06_16 26 12  0  1  0  0  0  8 65  0
##   2013_06_21 14 12  3 46  0  0  0  2 29  0
##   2013_06_28 17  6  1 28  0  0  0  1 20  0
##   2013_07_03 35  6  1 64  0  0  0  3 14  0
##   2013_07_08 25 18  4 33  0  0  0 11 10  0
##   2013_07_13 28  6  1 36  0  0  0  5 32  0
##   2013_07_23 17  9 20 56  0  0  0  4  4  0
##   2013_07_28 12  9  1 47  0  0  0  4 32  0
##   2013_08_02 19 10  8 35  0  0  0  9 25  0
##   2013_08_07 15 16  3 63  0  0  0  2  1  0
##   2013_08_13 32 11  9 48  0  0  0  0  2  0
##   2013_08_18 24 18  4  2  0  0  0  1 68  0
##   2013_08_24 20 11  1 52  0  0  0  3 18  0
##   2013_08_30 24  6  5  1  0  0  0  1 64  0
##   2013_09_07 27  8  2  4  0  0  0 13 46  0
##   2013_09_13 29  6  6 31  0  0  0  2 20  0
##   2013_09_24 18 12  4  2  0  0  0  1 60  0
##   2014_03_02  0  0  0  0  0  0  0  0  0  0
##   2014_03_07  0  0  0  0  0  0  0  0  0  0
##   2014_03_12  0  0  0  0  0  0  0  0  0  0
##   2014_03_17  0  0  0  0  0  0  0  0  0  0
##   2014_03_27 22  4 13 39  0  0  0  7 19  0
##   2014_04_02 16  3  5 25  0  0  0  0 34  0
##   2014_04_08  7  6  8 19  0  0  0  0 54  0
##   2014_04_14 23  9  7 25  0  0  0  3 34  0
##   2014_04_20 25  5  7 23  0  0  0  4 55  0
##   2014_04_26 15  9  9 19  0  0  0  0 25  0
##   2014_05_02 19 14  4 29  0  0  0  2 34  0
##   2014_05_07 13  7  3 22  0  0  0  0 13  0
##   2014_05_11  2  0  0  3  0  0  0  0  3  0
##   2014_05_19  7  6  0  7  0  0  0  0  8  0
##   2014_06_10  8 10 14 45  0  0  0  0 21  0
##   2014_06_15 19 10  7 43  0  0  0  0 15  0
##   2014_06_22  0  2  0  2  0  0  0  0  0  0
##   2014_06_28 16  6  3 24  0  0  0  0 18  0
##   2014_07_05 33 11  4 33  0  2  0  0 16  0
##   2014_07_10 18 11  8 12  0  0  0  0 52  0

If the pitch types can be trusted it looks like he has been moving away from the screwball.

PITCHf/x Primer

Note IN is Intentional Ball, SC is Screwball - Pitch type abbreviations

topKzone <- 3.5
botKzone <- 1.6
inKzone <- -0.95
outKzone <- 0.95
kZone <- data.frame(
  x=c(inKzone, inKzone, outKzone, outKzone, inKzone),
  y=c(botKzone, topKzone, topKzone, botKzone, botKzone)
)

library(ggplot2)
print(ggplot(pitchfx2014, aes(px, pz, color=stand)) + geom_point() +
  geom_path(aes(x, y), data=kZone, lwd=2, col="red") +
  ylim(0, 5) + facet_wrap(~ stand, ncol=1))
## Warning: Removed 35 rows containing missing values (geom_point).
## Warning: Removed 94 rows containing missing values (geom_point).

plot of chunk strikezone

print(ggplot(pitchfx2014, aes(px, pz, color=pitch_type)) + geom_point() +
  geom_path(aes(x, y), data=kZone, lwd=2, col="red") +
  ylim(0, 5) + facet_wrap(~ pitch_type))
## Warning: Removed 3 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 124 rows containing missing values (geom_point).

plot of chunk strikezone