Chapter 1: Exploring Pitch

Import Greinke dataset

greinke <- read.csv("greinke.csv")

1.1 Clean the Data

# Print the first 6 rows of the data
head(greinke)
##         p_name pitcher_id batter_stand pitch_type    pitch_result
## 1 Zack Greinke     425844            R         FF            Ball
## 2 Zack Greinke     425844            R         FF Swinging Strike
## 3 Zack Greinke     425844            R         FF   Called Strike
## 4 Zack Greinke     425844            R         SL Swinging Strike
## 5 Zack Greinke     425844            R         FF Swinging Strike
## 6 Zack Greinke     425844            R         SL Swinging Strike
##   atbat_result start_speed    z0     x0  pfx_x  pfx_z    px    pz
## 1         Walk        94.2 5.997 -0.675 -4.457  9.760 1.714 1.925
## 2       Single        92.4 6.281 -0.760 -1.590 11.400 0.589 3.271
## 3     Home Run        92.7 6.168 -0.958 -1.884  9.245 0.399 2.918
## 4    Strikeout        86.9 6.077 -0.939  3.594  0.762 0.764 1.306
## 5    Strikeout        92.8 6.107 -0.524 -0.558 11.134 1.517 2.193
## 6    Strikeout        87.8 6.321 -0.948  4.313  0.132 0.695 3.431
##   break_angle break_length spin_rate spin_dir balls strikes outs game_date
## 1        24.8          3.5  2188.802  204.457     2       2    2 10/3/2015
## 2        10.1          2.7  2312.202  187.913     1       1    0 10/3/2015
## 3         9.2          3.5  1889.841  191.468     0       0    1 10/3/2015
## 4       -11.4          8.0   693.649  102.648     1       2    0 10/3/2015
## 5        -0.4          2.8  2242.916  182.859     1       2    0 10/3/2015
## 6       -13.6          7.8   828.693   92.330     2       2    1 10/3/2015
##   inning inning_topbot batted_ball_type batted_ball_velocity   hc_x  hc_y
## 1      4           top                                    NA   0.00  0.00
## 2      3           top                                   104 123.56 97.26
## 3      5           top                                   103  50.88 31.17
## 4      6           top                                    NA   0.00  0.00
## 5      8           top                                    NA   0.00  0.00
## 6      1           top                                    NA   0.00  0.00
##   pitch_id distance_feet
## 1      160            NA
## 2       95             0
## 3      218           425
## 4      265            NA
## 5      374            NA
## 6       14            NA
# Print the number of rows in the data frame
nrow(greinke)
## [1] 3239
# Summarize the start_speed variable
summary(greinke$start_speed)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   52.20   87.30   89.80   88.44   91.80   95.40       3
# Get rid of data without start_speed
greinke <- subset(greinke, !is.na(start_speed))

# Print the number of complete entries
nrow(greinke)
## [1] 3236
# Print the structure of greinke
str(greinke)
## 'data.frame':    3236 obs. of  29 variables:
##  $ p_name              : Factor w/ 1 level "Zack Greinke": 1 1 1 1 1 1 1 1 1 1 ...
##  $ pitcher_id          : int  425844 425844 425844 425844 425844 425844 425844 425844 425844 425844 ...
##  $ batter_stand        : Factor w/ 2 levels "L","R": 2 2 2 2 2 2 2 2 2 2 ...
##  $ pitch_type          : Factor w/ 8 levels "","CH","CU","EP",..: 5 5 5 8 5 8 2 5 8 8 ...
##  $ pitch_result        : Factor w/ 15 levels "Ball","Ball In Dirt",..: 1 14 3 14 14 14 15 3 4 14 ...
##  $ atbat_result        : Factor w/ 24 levels "Bunt Groundout",..: 24 20 12 21 21 21 21 21 10 24 ...
##  $ start_speed         : num  94.2 92.4 92.7 86.9 92.8 87.8 90.3 92.7 85.5 87.3 ...
##  $ z0                  : num  6 6.28 6.17 6.08 6.11 ...
##  $ x0                  : num  -0.675 -0.76 -0.958 -0.939 -0.524 ...
##  $ pfx_x               : num  -4.457 -1.59 -1.884 3.594 -0.558 ...
##  $ pfx_z               : num  9.76 11.4 9.245 0.762 11.134 ...
##  $ px                  : num  1.714 0.589 0.399 0.764 1.517 ...
##  $ pz                  : num  1.93 3.27 2.92 1.31 2.19 ...
##  $ break_angle         : num  24.8 10.1 9.2 -11.4 -0.4 -13.6 22.5 25.1 -8.4 -11.3 ...
##  $ break_length        : num  3.5 2.7 3.5 8 2.8 7.8 7.4 3.8 7.5 7.4 ...
##  $ spin_rate           : num  2189 2312 1890 694 2243 ...
##  $ spin_dir            : num  204 188 191 103 183 ...
##  $ balls               : int  2 1 0 1 1 2 1 0 0 0 ...
##  $ strikes             : int  2 1 0 2 2 2 2 2 0 1 ...
##  $ outs                : int  2 0 1 0 0 1 1 2 2 2 ...
##  $ game_date           : Factor w/ 32 levels "10/3/2015","4/12/2015",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ inning              : int  4 3 5 6 8 1 6 5 8 4 ...
##  $ inning_topbot       : Factor w/ 2 levels "bot","top": 2 2 2 2 2 2 2 2 2 2 ...
##  $ batted_ball_type    : Factor w/ 5 levels "","FB","GB","LD",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ batted_ball_velocity: int  NA 104 103 NA NA NA NA NA NA NA ...
##  $ hc_x                : num  0 123.6 50.9 0 0 ...
##  $ hc_y                : num  0 97.3 31.2 0 0 ...
##  $ pitch_id            : int  160 95 218 265 374 14 279 231 386 156 ...
##  $ distance_feet       : int  NA 0 425 NA NA NA NA NA NA NA ...

1.2 Check Dates

# Check if dates are formatted as dates
class(greinke$game_date)
## [1] "factor"
# Change them to dates
greinke$game_date <- as.Date(greinke$game_date, format = "%m/%d/%Y")

# Check that the variable is now formatted as a date
class(greinke$game_date)
## [1] "Date"

X.1.3 Delimit Dates

library(tidyr)
# Separate game_date into "year", "month", and "day"
greinke <- separate(data = greinke, col = game_date,
                    into = c("year", "month", "day"),
                    sep = "-", remove = FALSE)

# Convert month to numeric
greinke$month <- as.numeric(greinke$month)

# Create the july variable
greinke$july <- ifelse(greinke$month == 7, "july", "other")

# View the head() of greinke
head(greinke)
##         p_name pitcher_id batter_stand pitch_type    pitch_result
## 1 Zack Greinke     425844            R         FF            Ball
## 2 Zack Greinke     425844            R         FF Swinging Strike
## 3 Zack Greinke     425844            R         FF   Called Strike
## 4 Zack Greinke     425844            R         SL Swinging Strike
## 5 Zack Greinke     425844            R         FF Swinging Strike
## 6 Zack Greinke     425844            R         SL Swinging Strike
##   atbat_result start_speed    z0     x0  pfx_x  pfx_z    px    pz
## 1         Walk        94.2 5.997 -0.675 -4.457  9.760 1.714 1.925
## 2       Single        92.4 6.281 -0.760 -1.590 11.400 0.589 3.271
## 3     Home Run        92.7 6.168 -0.958 -1.884  9.245 0.399 2.918
## 4    Strikeout        86.9 6.077 -0.939  3.594  0.762 0.764 1.306
## 5    Strikeout        92.8 6.107 -0.524 -0.558 11.134 1.517 2.193
## 6    Strikeout        87.8 6.321 -0.948  4.313  0.132 0.695 3.431
##   break_angle break_length spin_rate spin_dir balls strikes outs
## 1        24.8          3.5  2188.802  204.457     2       2    2
## 2        10.1          2.7  2312.202  187.913     1       1    0
## 3         9.2          3.5  1889.841  191.468     0       0    1
## 4       -11.4          8.0   693.649  102.648     1       2    0
## 5        -0.4          2.8  2242.916  182.859     1       2    0
## 6       -13.6          7.8   828.693   92.330     2       2    1
##    game_date year month day inning inning_topbot batted_ball_type
## 1 2015-10-03 2015    10  03      4           top                 
## 2 2015-10-03 2015    10  03      3           top                 
## 3 2015-10-03 2015    10  03      5           top                 
## 4 2015-10-03 2015    10  03      6           top                 
## 5 2015-10-03 2015    10  03      8           top                 
## 6 2015-10-03 2015    10  03      1           top                 
##   batted_ball_velocity   hc_x  hc_y pitch_id distance_feet  july
## 1                   NA   0.00  0.00      160            NA other
## 2                  104 123.56 97.26       95             0 other
## 3                  103  50.88 31.17      218           425 other
## 4                   NA   0.00  0.00      265            NA other
## 5                   NA   0.00  0.00      374            NA other
## 6                   NA   0.00  0.00       14            NA other
# Print a summary of the july variable
summary(factor(greinke$july))
##  july other 
##   524  2712

1.4 Velocity Distribution

# Make a histogram of Greinke's start speed
hist(greinke$start_speed)

# Create greinke_july
greinke_july <- subset(greinke, july == "july")

# Create greinke_other
greinke_other <- subset(greinke, july == "other")

# Use par to format your plot layout
par(mfrow = c(1,2))

# Plot start_speed histogram from july
hist(greinke_july$start_speed)

# Plot start_speed histogram for other months
hist(greinke_other$start_speed)

1.5 Fastball Velocity Distribution

# Create july_ff
july_ff <- subset(greinke_july, pitch_type == "FF")

# Create other_ff
other_ff <- subset(greinke_other, pitch_type == "FF")

# Formatting code, don't change this
par(mfrow = c(1, 2))

# Plot histogram of July fastball speeds
hist(july_ff$start_speed)

# Plot histogram of other month fastball speeds
hist(other_ff$start_speed)

1.6 Distribution Comparisons w/ Color

# Make a fastball speed histogram for other months
hist(other_ff$start_speed,
     col = "#00009950", freq = FALSE,
     ylim = c(0, .35), xlab = "Velocity (mph)",
     main = "Greinke 4-Seam Fastball Velocity")

# Add a histogram for July
hist(july_ff$start_speed, add = TRUE,
     col = "#99000050", freq = FALSE)

# Draw vertical line at the mean of other_ff
abline(v = mean(other_ff$start_speed),
       col = "#00009950", lwd = 2)

# Draw vertical line at the mean of july_ff
abline(v = mean(july_ff$start_speed),
       col = "#99000050", lwd = 2)

1.7 Tapply Velocity Changes

# Summarize velocity in July and other months
tapply(greinke$start_speed, greinke$july, mean)
##     july    other 
## 88.86489 88.35601
# Create greinke_ff
greinke_ff <- subset(greinke, pitch_type == "FF")

# Calculate mean fastball velocities: ff_velo_month
ff_velo_month <- tapply(greinke_ff$start_speed, greinke_ff$july, mean)

# Print ff_velo_month
ff_velo_month
##     july    other 
## 92.42077 91.66474

X.1.8 Game by game Changes

# Create ff_dt
ff_dt <- data.frame(tapply(greinke_ff$start_speed, greinke_ff$game_date, mean))

# Print the first 6 rows of ff_dt
head(ff_dt)
##            tapply.greinke_ff.start_speed..greinke_ff.game_date..mean.
## 2015-04-07                                                   90.82632
## 2015-04-12                                                   90.51622
## 2015-04-18                                                   90.28654
## 2015-04-24                                                   90.51277
## 2015-04-29                                                   90.40732
## 2015-05-05                                                   90.33043

1.9 Tidying the data frame

# Create game_date in ff_dt
ff_dt$game_date <- as.Date(row.names(ff_dt), "%Y-%m-%d")

# Rename the first column
colnames(ff_dt)[1] <- "start_speed"

# Remove row names
row.names(ff_dt) <- NULL

# View head of ff_dt
head(ff_dt)
##   start_speed  game_date
## 1    90.82632 2015-04-07
## 2    90.51622 2015-04-12
## 3    90.28654 2015-04-18
## 4    90.51277 2015-04-24
## 5    90.40732 2015-04-29
## 6    90.33043 2015-05-05

1.10 Game by Game

# Plot game-by-game 4-seam fastballs
plot(ff_dt$start_speed ~ ff_dt$game_date,
     lwd = 4, type = "l", ylim = c(88, 95),
     main = "Greinke 4-Seam Fastball Velocity", xlab = "Date", ylab = "Velocity (mph)")

1.11 Jittered Points

# Code from previous exercise, don't change this
plot(ff_dt$start_speed ~ ff_dt$game_date,
     lwd = 4, type = "l", ylim = c(88, 95),
     main = "Greinke 4-Seam Fastball Velocity",
     xlab = "Date", ylab = "Velocity (mph)")

# Add jittered points to the plot
points(greinke_ff$start_speed ~ jitter(as.numeric(greinke_ff$game_date)),
       pch = 16, col = "#99004450")

Chapter 2: Exploring Pitch Types

2.1 Pitch Mix Tables

# Subset the data to remove pitch types "IN" and "EP"
greinke <- subset(greinke, pitch_type != "IN" & pitch_type != "EP")

# Drop the levels from pitch_type
greinke$pitch_type <- droplevels(greinke$pitch_type)

# Create type_tab
type_tab <- table(greinke$pitch_type, greinke$july)

# Print type_tab
type_tab
##     
##      july other
##   CH  112   487
##   CU   51   242
##   FF  207  1191
##   FT   66   255
##   SL   86   535

2.2 Pitch Mix Table

# Create type_prop table
type_prop <- round(prop.table(type_tab, margin = 2), 3)

# Print type_prop
type_prop
##     
##       july other
##   CH 0.215 0.180
##   CU 0.098 0.089
##   FF 0.397 0.439
##   FT 0.126 0.094
##   SL 0.165 0.197

2.3 PMT July

# Create ff_prop
ff_prop <- type_prop[3, ] 

# Print ff_prop
ff_prop
##  july other 
## 0.397 0.439
# Print ff_velo_month
ff_velo_month
##     july    other 
## 92.42077 91.66474

2.4 Describe Fastball Usage

type_prop <- as.data.frame.matrix(type_prop)
Pitch <- rownames(type_prop)
type_prop <- cbind(Pitch, type_prop)
colnames(type_prop) <- paste(c("Pitch", "July", "Other"))

2.5 PMT Changes

# Create the Difference column
type_prop$Difference <- (type_prop$July - type_prop$Other) / type_prop$Other

# Print the type_prop
type_prop
##    Pitch  July Other  Difference
## CH    CH 0.215 0.180  0.19444444
## CU    CU 0.098 0.089  0.10112360
## FF    FF 0.397 0.439 -0.09567198
## FT    FT 0.126 0.094  0.34042553
## SL    SL 0.165 0.197 -0.16243655
# Plot a barplot
barplot(type_prop$Difference, names.arg = type_prop$Pitch, 
        main = "Pitch Usage in July vs. Other Months", 
        ylab = "Percentage Change in July", 
        ylim = c(-0.3, 0.3))

2.6 Ball Strike Count Frequency

# Create bs_count
greinke$bs_count <- paste(greinke$balls, greinke$strikes, sep = "-")

# Print the first 6 rows of greinke
head(greinke)
##         p_name pitcher_id batter_stand pitch_type    pitch_result
## 1 Zack Greinke     425844            R         FF            Ball
## 2 Zack Greinke     425844            R         FF Swinging Strike
## 3 Zack Greinke     425844            R         FF   Called Strike
## 4 Zack Greinke     425844            R         SL Swinging Strike
## 5 Zack Greinke     425844            R         FF Swinging Strike
## 6 Zack Greinke     425844            R         SL Swinging Strike
##   atbat_result start_speed    z0     x0  pfx_x  pfx_z    px    pz
## 1         Walk        94.2 5.997 -0.675 -4.457  9.760 1.714 1.925
## 2       Single        92.4 6.281 -0.760 -1.590 11.400 0.589 3.271
## 3     Home Run        92.7 6.168 -0.958 -1.884  9.245 0.399 2.918
## 4    Strikeout        86.9 6.077 -0.939  3.594  0.762 0.764 1.306
## 5    Strikeout        92.8 6.107 -0.524 -0.558 11.134 1.517 2.193
## 6    Strikeout        87.8 6.321 -0.948  4.313  0.132 0.695 3.431
##   break_angle break_length spin_rate spin_dir balls strikes outs
## 1        24.8          3.5  2188.802  204.457     2       2    2
## 2        10.1          2.7  2312.202  187.913     1       1    0
## 3         9.2          3.5  1889.841  191.468     0       0    1
## 4       -11.4          8.0   693.649  102.648     1       2    0
## 5        -0.4          2.8  2242.916  182.859     1       2    0
## 6       -13.6          7.8   828.693   92.330     2       2    1
##    game_date year month day inning inning_topbot batted_ball_type
## 1 2015-10-03 2015    10  03      4           top                 
## 2 2015-10-03 2015    10  03      3           top                 
## 3 2015-10-03 2015    10  03      5           top                 
## 4 2015-10-03 2015    10  03      6           top                 
## 5 2015-10-03 2015    10  03      8           top                 
## 6 2015-10-03 2015    10  03      1           top                 
##   batted_ball_velocity   hc_x  hc_y pitch_id distance_feet  july bs_count
## 1                   NA   0.00  0.00      160            NA other      2-2
## 2                  104 123.56 97.26       95             0 other      1-1
## 3                  103  50.88 31.17      218           425 other      0-0
## 4                   NA   0.00  0.00      265            NA other      1-2
## 5                   NA   0.00  0.00      374            NA other      1-2
## 6                   NA   0.00  0.00       14            NA other      2-2

2.7 B-S Count in July vs Other Months

# Create bs_count_tab
bs_count_tab <- table(greinke$bs_count, greinke$july)

# Create bs_month
bs_month <- round(prop.table(bs_count_tab, margin = 2), 3)

# Print bs_month
bs_month
##      
##        july other
##   0-0 0.261 0.262
##   0-1 0.134 0.135
##   0-2 0.056 0.063
##   1-0 0.105 0.093
##   1-1 0.123 0.113
##   1-2 0.092 0.097
##   2-0 0.029 0.025
##   2-1 0.052 0.053
##   2-2 0.086 0.094
##   3-0 0.006 0.006
##   3-1 0.015 0.015
##   3-2 0.042 0.043

2.8 Visualizing B-S count in July vs Others

# Create diff_bs
diff_bs <- round((bs_month[, 1] - bs_month[, 2]) / bs_month[, 2], 3)

# Print diff_bs
diff_bs
##    0-0    0-1    0-2    1-0    1-1    1-2    2-0    2-1    2-2    3-0 
## -0.004 -0.007 -0.111  0.129  0.088 -0.052  0.160 -0.019 -0.085  0.000 
##    3-1    3-2 
##  0.000 -0.023
# Create a bar plot of the changes
barplot(diff_bs, main = "Ball-Strike Count Rate in July vs. Other Months" , 
        ylab = "Percentage Change in July", ylim = c(-0.15, 0.15), las = 2)

2.9 Cross-Tabulate Pitch Use

# Create type_bs
type_bs <- table(greinke$pitch_type, greinke$bs_count)

# Print type_bs
type_bs 
##     
##      0-0 0-1 0-2 1-0 1-1 1-2 2-0 2-1 2-2 3-0 3-1 3-2
##   CH  92  93  36  70  79  62  27  46  52   0  18  24
##   CU 124  49  10  34  38   9   4  12   9   0   0   4
##   FF 482 167  61 136 136  89  37  71 109  17  24  69
##   FT  54  55  19  32  50  31  11  18  34   2   3  12
##   SL  93  71  75  35  68 119   5  24  96   0   5  30
# Create type_bs_prop
type_bs_prop <- round(prop.table(type_bs, margin = 2), 3)

# Print type_bs_prop
type_bs_prop
##     
##        0-0   0-1   0-2   1-0   1-1   1-2   2-0   2-1   2-2   3-0   3-1
##   CH 0.109 0.214 0.179 0.228 0.213 0.200 0.321 0.269 0.173 0.000 0.360
##   CU 0.147 0.113 0.050 0.111 0.102 0.029 0.048 0.070 0.030 0.000 0.000
##   FF 0.570 0.384 0.303 0.443 0.367 0.287 0.440 0.415 0.363 0.895 0.480
##   FT 0.064 0.126 0.095 0.104 0.135 0.100 0.131 0.105 0.113 0.105 0.060
##   SL 0.110 0.163 0.373 0.114 0.183 0.384 0.060 0.140 0.320 0.000 0.100
##     
##        3-2
##   CH 0.173
##   CU 0.029
##   FF 0.496
##   FT 0.086
##   SL 0.216

2.10 Pitch Mix late in Games

# Create the late_in_game column
greinke$late_in_game <- ifelse(greinke$inning > 5, 1, 0)

# Convert late_in_game
greinke$late_in_game <- factor(greinke$late_in_game)

# Create type_late
type_late <- table(greinke$pitch_type, greinke$late_in_game)

# Create type_late_prop
type_late_prop <- round(prop.table(type_late, margin = 2), 3)

# Print type_late_prop
type_late_prop
##     
##          0     1
##   CH 0.178 0.204
##   CU 0.086 0.102
##   FF 0.444 0.403
##   FT 0.107 0.080
##   SL 0.185 0.211

2.11 Late Game Pitch Mix

# Create t_type_late
t_type_late <- t(type_late)

# Print dimensions of t_type_late
dim(t_type_late)
## [1] 2 5
# Print dimensions of type_late
dim(type_late)
## [1] 5 2
# Change row names
rownames(t_type_late) <- c("Early", "Late")

# Make barplot using t_type_late
barplot(t_type_late, beside = TRUE, col = c("red", "blue"), 
        main = "Early vs. Late In Game Pitch Selection", 
        ylab = "Pitch Selection Proportion", 
        legend = rownames(t_type_late))

Chapter 3: Exploring Pitch Locations

3.1 Locational Changes

# Calculate average pitch height in inches in July vs. other months
tapply(greinke$pz, greinke$july, mean) * 12
##     july    other 
## 26.26002 26.39904
# Create greinke_lhb
greinke_lhb <- subset(greinke, batter_stand == "L")

# Create greinke_rhb
greinke_rhb <- subset(greinke, batter_stand == "R")

# Compute average px location for LHB
tapply(greinke_lhb$px, greinke_lhb$july, mean) * 12
##      july     other 
## -4.627355 -6.320144
# Compute average px location for RHB
tapply(greinke_rhb$px, greinke_rhb$july, mean) * 12
##     july    other 
## 4.032226 5.731850

3.2 LC Visualization

# Plot location of all pitches
plot(greinke$pz ~ greinke$px,
     col = factor(greinke$july),
     xlim = c(-3, 3))

# Formatting code, don't change this
par(mfrow = c(1, 2))

# Plot the pitch loctions for July
plot(pz ~ px, data = greinke_july,
     col = "black", pch = 16,
     xlim = c(-3, 3), ylim = c(-1, 6),
     main = "July")

# Plot the pitch locations for other months
plot(pz ~ px, data = greinke_other,
     col = "black", pch = 16,
     xlim = c(-3, 3), ylim = c(-1, 6),
     main = "Other months")

3.3 Plotting a Grid

# Create greinke_sub
greinke_sub <- subset(greinke, px > -2 & px < 2 &
                        pz > 0 & pz < 5)

# Plot pitch location window
plot(x = c(-2, 2), y = c(0, 5), type = "n",
     main = "Greinke Locational Zone Proportions",
     xlab = "Horizontal Location (ft.; Catcher's View)",
     ylab = "Vertical Location (ft.)")

# Add the grid lines
grid(lty = "solid", col = "black")

3.4 Binning Locational Data

greinke_sub <- cbind(greinke_sub, read.csv("greinke_sub_zone.csv"))

# Plot pitch location window
plot(x = c(-2, 2), y = c(0, 5), type = "n",
     main = "Greinke Locational Zone Proportions",
     xlab = "Horizontal Location (ft.; Catcher's View)",
     ylab = "Vertical Location (ft.)")

# Add the grid lines
grid(lty = "solid", col = "black")

# Create greinke_table
greinke_table <- table(greinke_sub$zone)

# Create zone_prop
zone_prop <- round(prop.table(greinke_table), 3)



# Add text from zone_prop[1]
text(-1.5, 4.5, zone_prop[1], cex = 1.5)

3.5 For Loops and Plotting Locational Grid Proportions

# Plot pitch location window
plot(x = c(-2, 2), y = c(0, 5), type = "n",
     main = "Greinke Locational Zone Proportions",
     xlab = "Horizontal Location (ft.; Catcher's View)",
     ylab = "Vertical Location (ft.)")

# Add the grid lines
grid(lty = "solid", col = "black")



# Plot text using for loop
for(i in 1:20) 
  {text(mean(greinke_sub$zone_px[greinke_sub$zone == i]),
       mean(greinke_sub$zone_pz[greinke_sub$zone == i]),
       zone_prop[i], cex = 1.5)
}

3.6 Binned Locational Differences

# Create zone_prop_july
zone_prop_july <- round(
  table(greinke_sub$zone[greinke_sub$july == "july"]) /
    nrow(subset(greinke_sub, july == "july")), 3)

# Create zone_prop_other
zone_prop_other <- round(
  table(greinke_sub$zone[greinke_sub$july == "other"]) /
    nrow(subset(greinke_sub, july == "other")), 3)

# Print zone_prop_july
zone_prop_july
## 
##     1     2     3     5     6     7     8     9    10    11    12    13 
## 0.004 0.002 0.006 0.036 0.058 0.060 0.020 0.090 0.126 0.160 0.030 0.040 
##    14    15    16    17    18    19    20 
## 0.128 0.110 0.050 0.002 0.036 0.028 0.016
# Print zone_prop_other
zone_prop_other
## 
##     1     2     3     4     5     6     7     8     9    10    11    12 
## 0.008 0.008 0.010 0.002 0.028 0.044 0.070 0.018 0.058 0.128 0.163 0.049 
##    13    14    15    16    17    18    19    20 
## 0.039 0.114 0.123 0.056 0.006 0.025 0.032 0.019
# Fix zone_prop_july vector, don't change this
zone_prop_july2 <- c(zone_prop_july[1:3], 0.00, zone_prop_july[4:19])
names(zone_prop_july2) <- c(1:20)

# Create zone_prop_diff
zone_prop_diff <- zone_prop_july2 - zone_prop_other

# Print zone_prop_diff
zone_prop_diff
## 
##      1      2      3      4      5      6      7      8      9     10 
## -0.004 -0.006 -0.004 -0.002  0.008  0.014 -0.010  0.002  0.032 -0.002 
##     11     12     13     14     15     16     17     18     19     20 
## -0.003 -0.019  0.001  0.014 -0.013 -0.006 -0.004  0.011 -0.004 -0.003

3.7 Plotting Zone Proportion Differences

# Plot pitch location window
plot(x = c(-2, 2), y = c(0, 5), type = "n",
     main = "Greinke Locational Zone Proportions",
     xlab = "Horizontal Location (ft.; Catcher's View)",
     ylab = "Vertical Location (ft.)")

# Add the grid lines
grid(lty = "solid", col = "black")

# Create for loop
for(i in 1:20) {
  text(mean(greinke_sub$zone_px[greinke_sub$zone == i]),
       mean(greinke_sub$zone_pz[greinke_sub$zone == i]),
       zone_prop_diff[i], cex = 1.5)}

3.8 Location and Ball-Strike Count

# Create greinke_zone_tab
greinke_zone_tab <- table(greinke_sub$zone, greinke_sub$bs_count)

# Create zone_count_prop
zone_count_prop <- round(prop.table(greinke_zone_tab, margin = 2), 3)

# Print zone_count_prop
zone_count_prop
##     
##        0-0   0-1   0-2   1-0   1-1   1-2   2-0   2-1   2-2   3-0   3-1
##   1  0.007 0.002 0.006 0.010 0.005 0.025 0.000 0.000 0.000 0.000 0.000
##   2  0.005 0.007 0.018 0.007 0.000 0.014 0.000 0.012 0.007 0.000 0.000
##   3  0.007 0.012 0.012 0.003 0.008 0.025 0.000 0.006 0.010 0.000 0.000
##   4  0.001 0.009 0.006 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000
##   5  0.039 0.035 0.012 0.026 0.027 0.018 0.036 0.065 0.007 0.053 0.020
##   6  0.065 0.024 0.006 0.056 0.030 0.021 0.107 0.053 0.041 0.000 0.100
##   7  0.072 0.078 0.054 0.056 0.077 0.050 0.024 0.059 0.092 0.105 0.060
##   8  0.018 0.033 0.048 0.010 0.019 0.028 0.000 0.000 0.003 0.000 0.000
##   9  0.065 0.094 0.030 0.066 0.066 0.050 0.083 0.041 0.054 0.158 0.020
##   10 0.131 0.073 0.054 0.171 0.148 0.050 0.167 0.183 0.126 0.105 0.200
##   11 0.191 0.130 0.078 0.197 0.156 0.121 0.179 0.189 0.153 0.263 0.080
##   12 0.049 0.071 0.078 0.036 0.060 0.032 0.012 0.018 0.044 0.000 0.000
##   13 0.026 0.068 0.072 0.026 0.052 0.074 0.024 0.018 0.010 0.053 0.020
##   14 0.096 0.111 0.114 0.138 0.101 0.138 0.167 0.142 0.129 0.105 0.160
##   15 0.132 0.083 0.120 0.115 0.101 0.113 0.131 0.154 0.139 0.105 0.260
##   16 0.058 0.068 0.096 0.023 0.082 0.057 0.000 0.018 0.061 0.053 0.040
##   17 0.001 0.009 0.018 0.003 0.008 0.014 0.000 0.000 0.007 0.000 0.000
##   18 0.012 0.035 0.048 0.016 0.030 0.060 0.000 0.018 0.041 0.000 0.020
##   19 0.012 0.033 0.072 0.033 0.019 0.060 0.060 0.018 0.054 0.000 0.020
##   20 0.011 0.024 0.054 0.007 0.011 0.050 0.012 0.006 0.020 0.000 0.000
##     
##        3-2
##   1  0.014
##   2  0.022
##   3  0.000
##   4  0.000
##   5  0.014
##   6  0.072
##   7  0.058
##   8  0.000
##   9  0.050
##   10 0.266
##   11 0.216
##   12 0.007
##   13 0.014
##   14 0.101
##   15 0.108
##   16 0.007
##   17 0.000
##   18 0.007
##   19 0.029
##   20 0.014

3.9 0-2 v. 3-0 Locations

# Create zone_count_diff
zone_count_diff <- zone_count_prop[, 3] - zone_count_prop[, 10]

# Print the table
zone_count_diff
##      1      2      3      4      5      6      7      8      9     10 
##  0.006  0.018  0.012  0.006 -0.041  0.006 -0.051  0.048 -0.128 -0.051 
##     11     12     13     14     15     16     17     18     19     20 
## -0.185  0.078  0.019  0.009  0.015  0.043  0.018  0.048  0.072  0.054

3.10 Plotting Count-Based Locational Differences

# Plot grid, don't change this
plot(x = c(-2, 2), y = c(0, 5), type = "n",
     main = "Greinke Locational Zone (0-2 vs. 3-0 Counts)",
     xlab = "Horizontal Location (ft.; Catcher's View)",
     ylab = "Vertical Location (ft.)")
grid(lty = "solid", col = "black")

# Add text to the figure for location differences
for(i in 1:20) {
  text(mean(greinke_sub$zone_px[greinke_sub$zone == i]),
       mean(greinke_sub$zone_pz[greinke_sub$zone == i]),
       round(zone_count_diff[i],3), cex = 1.5)
}