knitr::opts_chunk$set(echo = TRUE)
library(tidyverse)
## ── Attaching packages ───────────────────────────────────────────────────────────────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 3.1.0 ✔ purrr 0.3.2
## ✔ tibble 2.1.1 ✔ dplyr 0.8.0.1
## ✔ tidyr 0.8.3 ✔ stringr 1.4.0
## ✔ readr 1.3.1 ✔ forcats 0.4.0
## ── Conflicts ──────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(Lahman)
df = Batting %>%
mutate(
PA=AB + BB + HBP,
) %>%
filter(PA > 450, yearID >= 1903) %>%
mutate(
BA=H / AB,
OBP=(BB + H) / PA,
SLG=(H + 2*X2B + 3*X3B + 4*HR)/AB,
OPS=OBP+SLG,
Year=yearID
) %>%
select(-yearID)
yearly = df %>%
group_by(Year) %>%
summarize_if(is.numeric, mean, na.rm=TRUE)
yearly %>%
ggplot(aes(x=Year, y=BA)) +
geom_point() +
labs(title='Rise and fall of batting average', y='Batting average') +
theme_bw()

yearly %>%
ggplot(aes(x=Year, y=HR/AB)) +
geom_point() +
theme_bw() +
labs(title='The rise of the home run')

yearly %>%
ggplot(aes(x=Year, y=SB/PA)) +
geom_point() +
theme_bw() +
labs(title='The decline, revival, and decline of stolen bases', x='Stolen bases per plate appearance')

yearly %>%
ggplot(aes(x=Year, y=BB/PA)) +
geom_point() +
theme_bw() +
labs(title='Walks per plate appearance')

yearly %>%
ggplot(aes(x=Year, y=SO/PA)) +
geom_point() +
theme_bw() +
labs(title='Rise of the strike out', y='Strike outs per plate appearance')
## Warning: Removed 7 rows containing missing values (geom_point).

so_pa = yearly$SO / yearly$PA
hr_pa = yearly$HR / yearly$PA
cor.test(so_pa, hr_pa)
##
## Pearson's product-moment correlation
##
## data: so_pa and hr_pa
## t = 13.63, df = 105, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.7184756 0.8588296
## sample estimates:
## cor
## 0.7993022