Fraction of WAR contributed by players younger than X
Load some packages and some data
library(dplyr)
library(ggplot2)
library(readr)
library(gridExtra)
f1 = 'http://www.baseball-reference.com/data/war_daily_bat.txt'
f2 = 'http://www.baseball-reference.com/data/war_daily_pitch.txt'
war.b = read_csv(f1)
war.p = read_csv(f2)
df.bat = war.b %>% filter(age>0) %>% select(year_ID, age, WAR)
df.bat = df.bat %>% mutate(WAR=as.numeric(WAR)) %>% filter(WAR>=-1000)
df.pitch = war.p %>% filter(age>0) %>% select(year_ID, age, WAR)
df.pitch = df.pitch %>% mutate(WAR=as.numeric(WAR)) %>% filter(WAR>=-1000)
df.all = rbind.data.frame(df.bat, df.pitch)
df.all = df.all %>% mutate(WAR = ifelse(WAR<0, 0, WAR))
df.bat = df.bat %>% mutate(WAR = ifelse(WAR<0, 0, WAR))
df.pitch = df.pitch %>% mutate(WAR = ifelse(WAR<0, 0, WAR))
Functions to compute the fraction of WAR for players younger than some threshold.
compute.fraction = function(df.all, age.cutoff, war.cutoff) {
kk = df.all %>%
filter(age>0, WAR>=war.cutoff) %>%
mutate(ii=as.integer(age <= age.cutoff)) %>%
group_by(year_ID, ii) %>%
mutate(swar=sum(WAR, na.rm=TRUE)) %>%
summarise(ss=sum(WAR)) %>% mutate(iwar=sum(as.integer(ii==1)*ss), jwar=sum(ss), twar=iwar/jwar) %>%
filter(ii==1)
}
make.age.plot = function(df.all, age.cutoff, war.cutoff, ymax=0.6) {
p = compute.fraction(df.all, age.cutoff, war.cutoff) %>%
ggplot(aes(x=year_ID, y=twar)) + geom_point() +
geom_line() + geom_smooth(span=0.2) + ylim(0, ymax) +
xlab('season') + ylab('fraction of WAR') +
scale_x_continuous(breaks = round(seq(1900, 2020, by=10),1)) +
labs(title = sprintf("age<=%d, WAR>=%.1f", age.cutoff, war.cutoff))
}
Make plots for age cutoffs of 25, 24, 23.
p1 = make.age.plot(df.all, 25, 0, 0.5)
p2 = make.age.plot(df.all, 24, 0, 0.5)
p3 = make.age.plot(df.all, 23, 0, 0.5)
print(p1)

print(p2)

print(p3)

Make plots for age cutoffs of 25, 24, 23, and WAR>4.0
p1 = make.age.plot(df.all, 25, 3, 0.5)
p2 = make.age.plot(df.all, 24, 3, 0.5)
p3 = make.age.plot(df.all, 23, 3, 0.5)
print(p1)

print(p2)

print(p3)

Make plots for age cutoffs of 25, 24, 23, and WAR>4.0
p1 = make.age.plot(df.all, 25, 4, 0.5)
p2 = make.age.plot(df.all, 24, 4, 0.5)
p3 = make.age.plot(df.all, 23, 4, 0.5)
print(p1)

print(p2)

print(p3)
