Fraction of WAR contributed by players younger than X

Load some packages and some data

library(dplyr)
library(ggplot2)
library(readr)
library(gridExtra)

f1 = 'http://www.baseball-reference.com/data/war_daily_bat.txt'
f2 = 'http://www.baseball-reference.com/data/war_daily_pitch.txt'

war.b = read_csv(f1)
war.p = read_csv(f2)

df.bat = war.b %>% filter(age>0) %>% select(year_ID, age, WAR)
df.bat = df.bat  %>% mutate(WAR=as.numeric(WAR)) %>% filter(WAR>=-1000)

df.pitch = war.p %>% filter(age>0) %>% select(year_ID, age, WAR)
df.pitch = df.pitch  %>% mutate(WAR=as.numeric(WAR)) %>% filter(WAR>=-1000)

df.all = rbind.data.frame(df.bat, df.pitch)

df.all = df.all %>% mutate(WAR = ifelse(WAR<0, 0, WAR))
df.bat = df.bat %>% mutate(WAR = ifelse(WAR<0, 0, WAR))
df.pitch = df.pitch %>% mutate(WAR = ifelse(WAR<0, 0, WAR))

Functions to compute the fraction of WAR for players younger than some threshold.

compute.fraction = function(df.all, age.cutoff, war.cutoff) {
  kk = df.all %>% 
    filter(age>0, WAR>=war.cutoff) %>% 
    mutate(ii=as.integer(age <= age.cutoff)) %>% 
    group_by(year_ID, ii) %>% 
    mutate(swar=sum(WAR, na.rm=TRUE)) %>% 
    summarise(ss=sum(WAR)) %>% mutate(iwar=sum(as.integer(ii==1)*ss), jwar=sum(ss), twar=iwar/jwar) %>% 
    filter(ii==1)
}

make.age.plot = function(df.all, age.cutoff, war.cutoff, ymax=0.6) {
  p = compute.fraction(df.all, age.cutoff, war.cutoff) %>% 
    ggplot(aes(x=year_ID, y=twar)) + geom_point() + 
    geom_line() + geom_smooth(span=0.2) + ylim(0, ymax) + 
    xlab('season') + ylab('fraction of WAR') + 
    scale_x_continuous(breaks = round(seq(1900, 2020, by=10),1)) + 
    labs(title = sprintf("age<=%d, WAR>=%.1f", age.cutoff, war.cutoff))
}

Make plots for age cutoffs of 25, 24, 23.

p1 = make.age.plot(df.all, 25, 0, 0.5)
p2 = make.age.plot(df.all, 24, 0, 0.5)
p3 = make.age.plot(df.all, 23, 0, 0.5)
print(p1)

print(p2)

print(p3)

Make plots for age cutoffs of 25, 24, 23, and WAR>4.0

p1 = make.age.plot(df.all, 25, 3, 0.5)
p2 = make.age.plot(df.all, 24, 3, 0.5)
p3 = make.age.plot(df.all, 23, 3, 0.5)
print(p1)

print(p2)

print(p3)

Make plots for age cutoffs of 25, 24, 23, and WAR>4.0

p1 = make.age.plot(df.all, 25, 4, 0.5)
p2 = make.age.plot(df.all, 24, 4, 0.5)
p3 = make.age.plot(df.all, 23, 4, 0.5)
print(p1)

print(p2)

print(p3)