Import Greinke dataset
greinke <- read.csv("greinke.csv")
# Print the first 6 rows of the data
head(greinke)
## p_name pitcher_id batter_stand pitch_type pitch_result
## 1 Zack Greinke 425844 R FF Ball
## 2 Zack Greinke 425844 R FF Swinging Strike
## 3 Zack Greinke 425844 R FF Called Strike
## 4 Zack Greinke 425844 R SL Swinging Strike
## 5 Zack Greinke 425844 R FF Swinging Strike
## 6 Zack Greinke 425844 R SL Swinging Strike
## atbat_result start_speed z0 x0 pfx_x pfx_z px pz
## 1 Walk 94.2 5.997 -0.675 -4.457 9.760 1.714 1.925
## 2 Single 92.4 6.281 -0.760 -1.590 11.400 0.589 3.271
## 3 Home Run 92.7 6.168 -0.958 -1.884 9.245 0.399 2.918
## 4 Strikeout 86.9 6.077 -0.939 3.594 0.762 0.764 1.306
## 5 Strikeout 92.8 6.107 -0.524 -0.558 11.134 1.517 2.193
## 6 Strikeout 87.8 6.321 -0.948 4.313 0.132 0.695 3.431
## break_angle break_length spin_rate spin_dir balls strikes outs game_date
## 1 24.8 3.5 2188.802 204.457 2 2 2 10/3/2015
## 2 10.1 2.7 2312.202 187.913 1 1 0 10/3/2015
## 3 9.2 3.5 1889.841 191.468 0 0 1 10/3/2015
## 4 -11.4 8.0 693.649 102.648 1 2 0 10/3/2015
## 5 -0.4 2.8 2242.916 182.859 1 2 0 10/3/2015
## 6 -13.6 7.8 828.693 92.330 2 2 1 10/3/2015
## inning inning_topbot batted_ball_type batted_ball_velocity hc_x hc_y
## 1 4 top NA 0.00 0.00
## 2 3 top 104 123.56 97.26
## 3 5 top 103 50.88 31.17
## 4 6 top NA 0.00 0.00
## 5 8 top NA 0.00 0.00
## 6 1 top NA 0.00 0.00
## pitch_id distance_feet
## 1 160 NA
## 2 95 0
## 3 218 425
## 4 265 NA
## 5 374 NA
## 6 14 NA
# Print the number of rows in the data frame
nrow(greinke)
## [1] 3239
# Summarize the start_speed variable
summary(greinke$start_speed)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 52.20 87.30 89.80 88.44 91.80 95.40 3
# Get rid of data without start_speed
greinke <- subset(greinke, !is.na(start_speed))
# Print the number of complete entries
nrow(greinke)
## [1] 3236
# Print the structure of greinke
str(greinke)
## 'data.frame': 3236 obs. of 29 variables:
## $ p_name : Factor w/ 1 level "Zack Greinke": 1 1 1 1 1 1 1 1 1 1 ...
## $ pitcher_id : int 425844 425844 425844 425844 425844 425844 425844 425844 425844 425844 ...
## $ batter_stand : Factor w/ 2 levels "L","R": 2 2 2 2 2 2 2 2 2 2 ...
## $ pitch_type : Factor w/ 8 levels "","CH","CU","EP",..: 5 5 5 8 5 8 2 5 8 8 ...
## $ pitch_result : Factor w/ 15 levels "Ball","Ball In Dirt",..: 1 14 3 14 14 14 15 3 4 14 ...
## $ atbat_result : Factor w/ 24 levels "Bunt Groundout",..: 24 20 12 21 21 21 21 21 10 24 ...
## $ start_speed : num 94.2 92.4 92.7 86.9 92.8 87.8 90.3 92.7 85.5 87.3 ...
## $ z0 : num 6 6.28 6.17 6.08 6.11 ...
## $ x0 : num -0.675 -0.76 -0.958 -0.939 -0.524 ...
## $ pfx_x : num -4.457 -1.59 -1.884 3.594 -0.558 ...
## $ pfx_z : num 9.76 11.4 9.245 0.762 11.134 ...
## $ px : num 1.714 0.589 0.399 0.764 1.517 ...
## $ pz : num 1.93 3.27 2.92 1.31 2.19 ...
## $ break_angle : num 24.8 10.1 9.2 -11.4 -0.4 -13.6 22.5 25.1 -8.4 -11.3 ...
## $ break_length : num 3.5 2.7 3.5 8 2.8 7.8 7.4 3.8 7.5 7.4 ...
## $ spin_rate : num 2189 2312 1890 694 2243 ...
## $ spin_dir : num 204 188 191 103 183 ...
## $ balls : int 2 1 0 1 1 2 1 0 0 0 ...
## $ strikes : int 2 1 0 2 2 2 2 2 0 1 ...
## $ outs : int 2 0 1 0 0 1 1 2 2 2 ...
## $ game_date : Factor w/ 32 levels "10/3/2015","4/12/2015",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ inning : int 4 3 5 6 8 1 6 5 8 4 ...
## $ inning_topbot : Factor w/ 2 levels "bot","top": 2 2 2 2 2 2 2 2 2 2 ...
## $ batted_ball_type : Factor w/ 5 levels "","FB","GB","LD",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ batted_ball_velocity: int NA 104 103 NA NA NA NA NA NA NA ...
## $ hc_x : num 0 123.6 50.9 0 0 ...
## $ hc_y : num 0 97.3 31.2 0 0 ...
## $ pitch_id : int 160 95 218 265 374 14 279 231 386 156 ...
## $ distance_feet : int NA 0 425 NA NA NA NA NA NA NA ...
# Check if dates are formatted as dates
class(greinke$game_date)
## [1] "factor"
# Change them to dates
greinke$game_date <- as.Date(greinke$game_date, format = "%m/%d/%Y")
# Check that the variable is now formatted as a date
class(greinke$game_date)
## [1] "Date"
library(tidyr)
# Separate game_date into "year", "month", and "day"
greinke <- separate(data = greinke, col = game_date,
into = c("year", "month", "day"),
sep = "-", remove = FALSE)
# Convert month to numeric
greinke$month <- as.numeric(greinke$month)
# Create the july variable
greinke$july <- ifelse(greinke$month == 7, "july", "other")
# View the head() of greinke
head(greinke)
## p_name pitcher_id batter_stand pitch_type pitch_result
## 1 Zack Greinke 425844 R FF Ball
## 2 Zack Greinke 425844 R FF Swinging Strike
## 3 Zack Greinke 425844 R FF Called Strike
## 4 Zack Greinke 425844 R SL Swinging Strike
## 5 Zack Greinke 425844 R FF Swinging Strike
## 6 Zack Greinke 425844 R SL Swinging Strike
## atbat_result start_speed z0 x0 pfx_x pfx_z px pz
## 1 Walk 94.2 5.997 -0.675 -4.457 9.760 1.714 1.925
## 2 Single 92.4 6.281 -0.760 -1.590 11.400 0.589 3.271
## 3 Home Run 92.7 6.168 -0.958 -1.884 9.245 0.399 2.918
## 4 Strikeout 86.9 6.077 -0.939 3.594 0.762 0.764 1.306
## 5 Strikeout 92.8 6.107 -0.524 -0.558 11.134 1.517 2.193
## 6 Strikeout 87.8 6.321 -0.948 4.313 0.132 0.695 3.431
## break_angle break_length spin_rate spin_dir balls strikes outs
## 1 24.8 3.5 2188.802 204.457 2 2 2
## 2 10.1 2.7 2312.202 187.913 1 1 0
## 3 9.2 3.5 1889.841 191.468 0 0 1
## 4 -11.4 8.0 693.649 102.648 1 2 0
## 5 -0.4 2.8 2242.916 182.859 1 2 0
## 6 -13.6 7.8 828.693 92.330 2 2 1
## game_date year month day inning inning_topbot batted_ball_type
## 1 2015-10-03 2015 10 03 4 top
## 2 2015-10-03 2015 10 03 3 top
## 3 2015-10-03 2015 10 03 5 top
## 4 2015-10-03 2015 10 03 6 top
## 5 2015-10-03 2015 10 03 8 top
## 6 2015-10-03 2015 10 03 1 top
## batted_ball_velocity hc_x hc_y pitch_id distance_feet july
## 1 NA 0.00 0.00 160 NA other
## 2 104 123.56 97.26 95 0 other
## 3 103 50.88 31.17 218 425 other
## 4 NA 0.00 0.00 265 NA other
## 5 NA 0.00 0.00 374 NA other
## 6 NA 0.00 0.00 14 NA other
# Print a summary of the july variable
summary(factor(greinke$july))
## july other
## 524 2712
# Make a histogram of Greinke's start speed
hist(greinke$start_speed)
# Create greinke_july
greinke_july <- subset(greinke, july == "july")
# Create greinke_other
greinke_other <- subset(greinke, july == "other")
# Use par to format your plot layout
par(mfrow = c(1,2))
# Plot start_speed histogram from july
hist(greinke_july$start_speed)
# Plot start_speed histogram for other months
hist(greinke_other$start_speed)
# Create july_ff
july_ff <- subset(greinke_july, pitch_type == "FF")
# Create other_ff
other_ff <- subset(greinke_other, pitch_type == "FF")
# Formatting code, don't change this
par(mfrow = c(1, 2))
# Plot histogram of July fastball speeds
hist(july_ff$start_speed)
# Plot histogram of other month fastball speeds
hist(other_ff$start_speed)
# Make a fastball speed histogram for other months
hist(other_ff$start_speed,
col = "#00009950", freq = FALSE,
ylim = c(0, .35), xlab = "Velocity (mph)",
main = "Greinke 4-Seam Fastball Velocity")
# Add a histogram for July
hist(july_ff$start_speed, add = TRUE,
col = "#99000050", freq = FALSE)
# Draw vertical line at the mean of other_ff
abline(v = mean(other_ff$start_speed),
col = "#00009950", lwd = 2)
# Draw vertical line at the mean of july_ff
abline(v = mean(july_ff$start_speed),
col = "#99000050", lwd = 2)
# Summarize velocity in July and other months
tapply(greinke$start_speed, greinke$july, mean)
## july other
## 88.86489 88.35601
# Create greinke_ff
greinke_ff <- subset(greinke, pitch_type == "FF")
# Calculate mean fastball velocities: ff_velo_month
ff_velo_month <- tapply(greinke_ff$start_speed, greinke_ff$july, mean)
# Print ff_velo_month
ff_velo_month
## july other
## 92.42077 91.66474
# Create ff_dt
ff_dt <- data.frame(tapply(greinke_ff$start_speed, greinke_ff$game_date, mean))
# Print the first 6 rows of ff_dt
head(ff_dt)
## tapply.greinke_ff.start_speed..greinke_ff.game_date..mean.
## 2015-04-07 90.82632
## 2015-04-12 90.51622
## 2015-04-18 90.28654
## 2015-04-24 90.51277
## 2015-04-29 90.40732
## 2015-05-05 90.33043
# Create game_date in ff_dt
ff_dt$game_date <- as.Date(row.names(ff_dt), "%Y-%m-%d")
# Rename the first column
colnames(ff_dt)[1] <- "start_speed"
# Remove row names
row.names(ff_dt) <- NULL
# View head of ff_dt
head(ff_dt)
## start_speed game_date
## 1 90.82632 2015-04-07
## 2 90.51622 2015-04-12
## 3 90.28654 2015-04-18
## 4 90.51277 2015-04-24
## 5 90.40732 2015-04-29
## 6 90.33043 2015-05-05
# Plot game-by-game 4-seam fastballs
plot(ff_dt$start_speed ~ ff_dt$game_date,
lwd = 4, type = "l", ylim = c(88, 95),
main = "Greinke 4-Seam Fastball Velocity", xlab = "Date", ylab = "Velocity (mph)")
# Code from previous exercise, don't change this
plot(ff_dt$start_speed ~ ff_dt$game_date,
lwd = 4, type = "l", ylim = c(88, 95),
main = "Greinke 4-Seam Fastball Velocity",
xlab = "Date", ylab = "Velocity (mph)")
# Add jittered points to the plot
points(greinke_ff$start_speed ~ jitter(as.numeric(greinke_ff$game_date)),
pch = 16, col = "#99004450")
# Subset the data to remove pitch types "IN" and "EP"
greinke <- subset(greinke, pitch_type != "IN" & pitch_type != "EP")
# Drop the levels from pitch_type
greinke$pitch_type <- droplevels(greinke$pitch_type)
# Create type_tab
type_tab <- table(greinke$pitch_type, greinke$july)
# Print type_tab
type_tab
##
## july other
## CH 112 487
## CU 51 242
## FF 207 1191
## FT 66 255
## SL 86 535
# Create type_prop table
type_prop <- round(prop.table(type_tab, margin = 2), 3)
# Print type_prop
type_prop
##
## july other
## CH 0.215 0.180
## CU 0.098 0.089
## FF 0.397 0.439
## FT 0.126 0.094
## SL 0.165 0.197
# Create ff_prop
ff_prop <- type_prop[3, ]
# Print ff_prop
ff_prop
## july other
## 0.397 0.439
# Print ff_velo_month
ff_velo_month
## july other
## 92.42077 91.66474
type_prop <- as.data.frame.matrix(type_prop)
Pitch <- rownames(type_prop)
type_prop <- cbind(Pitch, type_prop)
colnames(type_prop) <- paste(c("Pitch", "July", "Other"))
# Create the Difference column
type_prop$Difference <- (type_prop$July - type_prop$Other) / type_prop$Other
# Print the type_prop
type_prop
## Pitch July Other Difference
## CH CH 0.215 0.180 0.19444444
## CU CU 0.098 0.089 0.10112360
## FF FF 0.397 0.439 -0.09567198
## FT FT 0.126 0.094 0.34042553
## SL SL 0.165 0.197 -0.16243655
# Plot a barplot
barplot(type_prop$Difference, names.arg = type_prop$Pitch,
main = "Pitch Usage in July vs. Other Months",
ylab = "Percentage Change in July",
ylim = c(-0.3, 0.3))
# Create bs_count
greinke$bs_count <- paste(greinke$balls, greinke$strikes, sep = "-")
# Print the first 6 rows of greinke
head(greinke)
## p_name pitcher_id batter_stand pitch_type pitch_result
## 1 Zack Greinke 425844 R FF Ball
## 2 Zack Greinke 425844 R FF Swinging Strike
## 3 Zack Greinke 425844 R FF Called Strike
## 4 Zack Greinke 425844 R SL Swinging Strike
## 5 Zack Greinke 425844 R FF Swinging Strike
## 6 Zack Greinke 425844 R SL Swinging Strike
## atbat_result start_speed z0 x0 pfx_x pfx_z px pz
## 1 Walk 94.2 5.997 -0.675 -4.457 9.760 1.714 1.925
## 2 Single 92.4 6.281 -0.760 -1.590 11.400 0.589 3.271
## 3 Home Run 92.7 6.168 -0.958 -1.884 9.245 0.399 2.918
## 4 Strikeout 86.9 6.077 -0.939 3.594 0.762 0.764 1.306
## 5 Strikeout 92.8 6.107 -0.524 -0.558 11.134 1.517 2.193
## 6 Strikeout 87.8 6.321 -0.948 4.313 0.132 0.695 3.431
## break_angle break_length spin_rate spin_dir balls strikes outs
## 1 24.8 3.5 2188.802 204.457 2 2 2
## 2 10.1 2.7 2312.202 187.913 1 1 0
## 3 9.2 3.5 1889.841 191.468 0 0 1
## 4 -11.4 8.0 693.649 102.648 1 2 0
## 5 -0.4 2.8 2242.916 182.859 1 2 0
## 6 -13.6 7.8 828.693 92.330 2 2 1
## game_date year month day inning inning_topbot batted_ball_type
## 1 2015-10-03 2015 10 03 4 top
## 2 2015-10-03 2015 10 03 3 top
## 3 2015-10-03 2015 10 03 5 top
## 4 2015-10-03 2015 10 03 6 top
## 5 2015-10-03 2015 10 03 8 top
## 6 2015-10-03 2015 10 03 1 top
## batted_ball_velocity hc_x hc_y pitch_id distance_feet july bs_count
## 1 NA 0.00 0.00 160 NA other 2-2
## 2 104 123.56 97.26 95 0 other 1-1
## 3 103 50.88 31.17 218 425 other 0-0
## 4 NA 0.00 0.00 265 NA other 1-2
## 5 NA 0.00 0.00 374 NA other 1-2
## 6 NA 0.00 0.00 14 NA other 2-2
# Create bs_count_tab
bs_count_tab <- table(greinke$bs_count, greinke$july)
# Create bs_month
bs_month <- round(prop.table(bs_count_tab, margin = 2), 3)
# Print bs_month
bs_month
##
## july other
## 0-0 0.261 0.262
## 0-1 0.134 0.135
## 0-2 0.056 0.063
## 1-0 0.105 0.093
## 1-1 0.123 0.113
## 1-2 0.092 0.097
## 2-0 0.029 0.025
## 2-1 0.052 0.053
## 2-2 0.086 0.094
## 3-0 0.006 0.006
## 3-1 0.015 0.015
## 3-2 0.042 0.043
# Create diff_bs
diff_bs <- round((bs_month[, 1] - bs_month[, 2]) / bs_month[, 2], 3)
# Print diff_bs
diff_bs
## 0-0 0-1 0-2 1-0 1-1 1-2 2-0 2-1 2-2 3-0
## -0.004 -0.007 -0.111 0.129 0.088 -0.052 0.160 -0.019 -0.085 0.000
## 3-1 3-2
## 0.000 -0.023
# Create a bar plot of the changes
barplot(diff_bs, main = "Ball-Strike Count Rate in July vs. Other Months" ,
ylab = "Percentage Change in July", ylim = c(-0.15, 0.15), las = 2)
# Create type_bs
type_bs <- table(greinke$pitch_type, greinke$bs_count)
# Print type_bs
type_bs
##
## 0-0 0-1 0-2 1-0 1-1 1-2 2-0 2-1 2-2 3-0 3-1 3-2
## CH 92 93 36 70 79 62 27 46 52 0 18 24
## CU 124 49 10 34 38 9 4 12 9 0 0 4
## FF 482 167 61 136 136 89 37 71 109 17 24 69
## FT 54 55 19 32 50 31 11 18 34 2 3 12
## SL 93 71 75 35 68 119 5 24 96 0 5 30
# Create type_bs_prop
type_bs_prop <- round(prop.table(type_bs, margin = 2), 3)
# Print type_bs_prop
type_bs_prop
##
## 0-0 0-1 0-2 1-0 1-1 1-2 2-0 2-1 2-2 3-0 3-1
## CH 0.109 0.214 0.179 0.228 0.213 0.200 0.321 0.269 0.173 0.000 0.360
## CU 0.147 0.113 0.050 0.111 0.102 0.029 0.048 0.070 0.030 0.000 0.000
## FF 0.570 0.384 0.303 0.443 0.367 0.287 0.440 0.415 0.363 0.895 0.480
## FT 0.064 0.126 0.095 0.104 0.135 0.100 0.131 0.105 0.113 0.105 0.060
## SL 0.110 0.163 0.373 0.114 0.183 0.384 0.060 0.140 0.320 0.000 0.100
##
## 3-2
## CH 0.173
## CU 0.029
## FF 0.496
## FT 0.086
## SL 0.216
# Create the late_in_game column
greinke$late_in_game <- ifelse(greinke$inning > 5, 1, 0)
# Convert late_in_game
greinke$late_in_game <- factor(greinke$late_in_game)
# Create type_late
type_late <- table(greinke$pitch_type, greinke$late_in_game)
# Create type_late_prop
type_late_prop <- round(prop.table(type_late, margin = 2), 3)
# Print type_late_prop
type_late_prop
##
## 0 1
## CH 0.178 0.204
## CU 0.086 0.102
## FF 0.444 0.403
## FT 0.107 0.080
## SL 0.185 0.211
# Create t_type_late
t_type_late <- t(type_late)
# Print dimensions of t_type_late
dim(t_type_late)
## [1] 2 5
# Print dimensions of type_late
dim(type_late)
## [1] 5 2
# Change row names
rownames(t_type_late) <- c("Early", "Late")
# Make barplot using t_type_late
barplot(t_type_late, beside = TRUE, col = c("red", "blue"),
main = "Early vs. Late In Game Pitch Selection",
ylab = "Pitch Selection Proportion",
legend = rownames(t_type_late))
# Calculate average pitch height in inches in July vs. other months
tapply(greinke$pz, greinke$july, mean) * 12
## july other
## 26.26002 26.39904
# Create greinke_lhb
greinke_lhb <- subset(greinke, batter_stand == "L")
# Create greinke_rhb
greinke_rhb <- subset(greinke, batter_stand == "R")
# Compute average px location for LHB
tapply(greinke_lhb$px, greinke_lhb$july, mean) * 12
## july other
## -4.627355 -6.320144
# Compute average px location for RHB
tapply(greinke_rhb$px, greinke_rhb$july, mean) * 12
## july other
## 4.032226 5.731850
# Plot location of all pitches
plot(greinke$pz ~ greinke$px,
col = factor(greinke$july),
xlim = c(-3, 3))
# Formatting code, don't change this
par(mfrow = c(1, 2))
# Plot the pitch loctions for July
plot(pz ~ px, data = greinke_july,
col = "black", pch = 16,
xlim = c(-3, 3), ylim = c(-1, 6),
main = "July")
# Plot the pitch locations for other months
plot(pz ~ px, data = greinke_other,
col = "black", pch = 16,
xlim = c(-3, 3), ylim = c(-1, 6),
main = "Other months")
# Create greinke_sub
greinke_sub <- subset(greinke, px > -2 & px < 2 &
pz > 0 & pz < 5)
# Plot pitch location window
plot(x = c(-2, 2), y = c(0, 5), type = "n",
main = "Greinke Locational Zone Proportions",
xlab = "Horizontal Location (ft.; Catcher's View)",
ylab = "Vertical Location (ft.)")
# Add the grid lines
grid(lty = "solid", col = "black")
greinke_sub <- cbind(greinke_sub, read.csv("greinke_sub_zone.csv"))
# Plot pitch location window
plot(x = c(-2, 2), y = c(0, 5), type = "n",
main = "Greinke Locational Zone Proportions",
xlab = "Horizontal Location (ft.; Catcher's View)",
ylab = "Vertical Location (ft.)")
# Add the grid lines
grid(lty = "solid", col = "black")
# Create greinke_table
greinke_table <- table(greinke_sub$zone)
# Create zone_prop
zone_prop <- round(prop.table(greinke_table), 3)
# Add text from zone_prop[1]
text(-1.5, 4.5, zone_prop[1], cex = 1.5)
# Plot pitch location window
plot(x = c(-2, 2), y = c(0, 5), type = "n",
main = "Greinke Locational Zone Proportions",
xlab = "Horizontal Location (ft.; Catcher's View)",
ylab = "Vertical Location (ft.)")
# Add the grid lines
grid(lty = "solid", col = "black")
# Plot text using for loop
for(i in 1:20)
{text(mean(greinke_sub$zone_px[greinke_sub$zone == i]),
mean(greinke_sub$zone_pz[greinke_sub$zone == i]),
zone_prop[i], cex = 1.5)
}
# Create zone_prop_july
zone_prop_july <- round(
table(greinke_sub$zone[greinke_sub$july == "july"]) /
nrow(subset(greinke_sub, july == "july")), 3)
# Create zone_prop_other
zone_prop_other <- round(
table(greinke_sub$zone[greinke_sub$july == "other"]) /
nrow(subset(greinke_sub, july == "other")), 3)
# Print zone_prop_july
zone_prop_july
##
## 1 2 3 5 6 7 8 9 10 11 12 13
## 0.004 0.002 0.006 0.036 0.058 0.060 0.020 0.090 0.126 0.160 0.030 0.040
## 14 15 16 17 18 19 20
## 0.128 0.110 0.050 0.002 0.036 0.028 0.016
# Print zone_prop_other
zone_prop_other
##
## 1 2 3 4 5 6 7 8 9 10 11 12
## 0.008 0.008 0.010 0.002 0.028 0.044 0.070 0.018 0.058 0.128 0.163 0.049
## 13 14 15 16 17 18 19 20
## 0.039 0.114 0.123 0.056 0.006 0.025 0.032 0.019
# Fix zone_prop_july vector, don't change this
zone_prop_july2 <- c(zone_prop_july[1:3], 0.00, zone_prop_july[4:19])
names(zone_prop_july2) <- c(1:20)
# Create zone_prop_diff
zone_prop_diff <- zone_prop_july2 - zone_prop_other
# Print zone_prop_diff
zone_prop_diff
##
## 1 2 3 4 5 6 7 8 9 10
## -0.004 -0.006 -0.004 -0.002 0.008 0.014 -0.010 0.002 0.032 -0.002
## 11 12 13 14 15 16 17 18 19 20
## -0.003 -0.019 0.001 0.014 -0.013 -0.006 -0.004 0.011 -0.004 -0.003
# Plot pitch location window
plot(x = c(-2, 2), y = c(0, 5), type = "n",
main = "Greinke Locational Zone Proportions",
xlab = "Horizontal Location (ft.; Catcher's View)",
ylab = "Vertical Location (ft.)")
# Add the grid lines
grid(lty = "solid", col = "black")
# Create for loop
for(i in 1:20) {
text(mean(greinke_sub$zone_px[greinke_sub$zone == i]),
mean(greinke_sub$zone_pz[greinke_sub$zone == i]),
zone_prop_diff[i], cex = 1.5)}
# Create greinke_zone_tab
greinke_zone_tab <- table(greinke_sub$zone, greinke_sub$bs_count)
# Create zone_count_prop
zone_count_prop <- round(prop.table(greinke_zone_tab, margin = 2), 3)
# Print zone_count_prop
zone_count_prop
##
## 0-0 0-1 0-2 1-0 1-1 1-2 2-0 2-1 2-2 3-0 3-1
## 1 0.007 0.002 0.006 0.010 0.005 0.025 0.000 0.000 0.000 0.000 0.000
## 2 0.005 0.007 0.018 0.007 0.000 0.014 0.000 0.012 0.007 0.000 0.000
## 3 0.007 0.012 0.012 0.003 0.008 0.025 0.000 0.006 0.010 0.000 0.000
## 4 0.001 0.009 0.006 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000
## 5 0.039 0.035 0.012 0.026 0.027 0.018 0.036 0.065 0.007 0.053 0.020
## 6 0.065 0.024 0.006 0.056 0.030 0.021 0.107 0.053 0.041 0.000 0.100
## 7 0.072 0.078 0.054 0.056 0.077 0.050 0.024 0.059 0.092 0.105 0.060
## 8 0.018 0.033 0.048 0.010 0.019 0.028 0.000 0.000 0.003 0.000 0.000
## 9 0.065 0.094 0.030 0.066 0.066 0.050 0.083 0.041 0.054 0.158 0.020
## 10 0.131 0.073 0.054 0.171 0.148 0.050 0.167 0.183 0.126 0.105 0.200
## 11 0.191 0.130 0.078 0.197 0.156 0.121 0.179 0.189 0.153 0.263 0.080
## 12 0.049 0.071 0.078 0.036 0.060 0.032 0.012 0.018 0.044 0.000 0.000
## 13 0.026 0.068 0.072 0.026 0.052 0.074 0.024 0.018 0.010 0.053 0.020
## 14 0.096 0.111 0.114 0.138 0.101 0.138 0.167 0.142 0.129 0.105 0.160
## 15 0.132 0.083 0.120 0.115 0.101 0.113 0.131 0.154 0.139 0.105 0.260
## 16 0.058 0.068 0.096 0.023 0.082 0.057 0.000 0.018 0.061 0.053 0.040
## 17 0.001 0.009 0.018 0.003 0.008 0.014 0.000 0.000 0.007 0.000 0.000
## 18 0.012 0.035 0.048 0.016 0.030 0.060 0.000 0.018 0.041 0.000 0.020
## 19 0.012 0.033 0.072 0.033 0.019 0.060 0.060 0.018 0.054 0.000 0.020
## 20 0.011 0.024 0.054 0.007 0.011 0.050 0.012 0.006 0.020 0.000 0.000
##
## 3-2
## 1 0.014
## 2 0.022
## 3 0.000
## 4 0.000
## 5 0.014
## 6 0.072
## 7 0.058
## 8 0.000
## 9 0.050
## 10 0.266
## 11 0.216
## 12 0.007
## 13 0.014
## 14 0.101
## 15 0.108
## 16 0.007
## 17 0.000
## 18 0.007
## 19 0.029
## 20 0.014
# Create zone_count_diff
zone_count_diff <- zone_count_prop[, 3] - zone_count_prop[, 10]
# Print the table
zone_count_diff
## 1 2 3 4 5 6 7 8 9 10
## 0.006 0.018 0.012 0.006 -0.041 0.006 -0.051 0.048 -0.128 -0.051
## 11 12 13 14 15 16 17 18 19 20
## -0.185 0.078 0.019 0.009 0.015 0.043 0.018 0.048 0.072 0.054
# Plot grid, don't change this
plot(x = c(-2, 2), y = c(0, 5), type = "n",
main = "Greinke Locational Zone (0-2 vs. 3-0 Counts)",
xlab = "Horizontal Location (ft.; Catcher's View)",
ylab = "Vertical Location (ft.)")
grid(lty = "solid", col = "black")
# Add text to the figure for location differences
for(i in 1:20) {
text(mean(greinke_sub$zone_px[greinke_sub$zone == i]),
mean(greinke_sub$zone_pz[greinke_sub$zone == i]),
round(zone_count_diff[i],3), cex = 1.5)
}