knitr::opts_chunk$set(echo = TRUE)
library(tidyverse)
## ── Attaching packages ───────────────────────────────────────────────────────────────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 3.1.0       ✔ purrr   0.3.2  
## ✔ tibble  2.1.1       ✔ dplyr   0.8.0.1
## ✔ tidyr   0.8.3       ✔ stringr 1.4.0  
## ✔ readr   1.3.1       ✔ forcats 0.4.0
## ── Conflicts ──────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
library(Lahman)
df = Batting %>%
    mutate(
        PA=AB + BB + HBP,
    ) %>%
    filter(PA > 450, yearID >= 1903) %>%
    mutate(
        BA=H / AB,
        OBP=(BB + H) / PA,
        SLG=(H + 2*X2B + 3*X3B + 4*HR)/AB,
        OPS=OBP+SLG,
        Year=yearID
    ) %>%
    select(-yearID)

yearly = df %>%
    group_by(Year) %>%
    summarize_if(is.numeric, mean, na.rm=TRUE)
yearly %>%
    ggplot(aes(x=Year, y=BA)) +
    geom_point() +
    labs(title='Rise and fall of batting average', y='Batting average') +
    theme_bw()

yearly %>%
    ggplot(aes(x=Year, y=HR/AB)) +
    geom_point() +
    theme_bw() +
    labs(title='The rise of the home run')

yearly %>%
    ggplot(aes(x=Year, y=SB/PA)) +
    geom_point() +
    theme_bw() +
    labs(title='The decline, revival, and decline of stolen bases', x='Stolen bases per plate appearance')

yearly %>%
    ggplot(aes(x=Year, y=BB/PA)) +
    geom_point() +
    theme_bw() +
    labs(title='Walks per plate appearance')

yearly %>%
    ggplot(aes(x=Year, y=SO/PA)) +
    geom_point() +
    theme_bw() +
    labs(title='Rise of the strike out', y='Strike outs per plate appearance')
## Warning: Removed 7 rows containing missing values (geom_point).

so_pa = yearly$SO / yearly$PA
hr_pa = yearly$HR / yearly$PA

cor.test(so_pa, hr_pa)
## 
##  Pearson's product-moment correlation
## 
## data:  so_pa and hr_pa
## t = 13.63, df = 105, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.7184756 0.8588296
## sample estimates:
##       cor 
## 0.7993022