Setup

library(jsonlite)
library(dplyr)
library(Lahman)
library(baseballr)
library(ggplot2)
library(ggtext)

player_ids.df <- playerid_lookup(last_name = "", first_name = "")

Each MLB Player’s 1st Team (1871 - 2022)

# Debut teams
debut.teams.df <-
  union(Lahman::Batting %>%
          filter(stint == 1) %>%
          select(playerID, yearID, teamID),
        Lahman::Pitching %>%
          filter(stint == 1) %>%
          select(playerID, yearID, teamID)
  ) %>%
  group_by(playerID) %>%
  arrange(yearID) %>%
  filter(row_number() == 1) %>%
  left_join(Lahman::Teams %>% select(yearID, teamID, franchID),
            by = c("yearID" = "yearID", "teamID" = "teamID")) %>%
  left_join(Lahman::People %>% select(playerID, bbrefID), by = "playerID") %>%
  left_join(player_ids.df %>% select(bbref_id, fangraphs_id),
            by = c("bbrefID" = "bbref_id")) %>%
  ungroup() %>%
  select(fangraphs_id, franchID)
debut.teams.df %>% head(10)
## # A tibble: 10 × 2
##    fangraphs_id franchID
##           <int> <fct>   
##  1      1000017 TRO     
##  2      1000070 ROK     
##  3      1000164 CFC     
##  4      1000168 OLY     
##  5      1000272 ROK     
##  6      1000310 KEK     
##  7      1000543 ROK     
##  8      1000573 BNA     
##  9      1000591 KEK     
## 10      1000608 BNA

Career fWAR for Each Player

Batting

# Batter Seasons
base.url <- "https://www.fangraphs.com/api/leaders/major-league/data"
query.params <- list(age = "", pos = "all", stats = "bat", lg = "al%2Cnl",
                     qual = "0", season = "2022", season1 = "1871",
                     startdate = "", enddate = "", month = "0", hand = "",
                     team = "0", pageitems = "2000000000", pagenum = "1",
                     ind = "0", rost = "0", players = "", type = "8",
                     sortdir = "default", sortstat = "WAR")
sep = "?"
url <- base.url
for (param in names(query.params)) {
  url <- paste(url, sep, param, "=", query.params[[param]], sep = "")
  sep = "&"
}
batter.seasons.df <- fromJSON(url)$data %>%
  select(Name, WAR) %>%
  mutate(Type = "Batter")
batter.seasons.df %>% head(10)
##                                                                     Name
## 1       <a href="statss.aspx?playerid=1011327&position=OF">Babe Ruth</a>
## 2        <a href="statss.aspx?playerid=1109&position=OF">Barry Bonds</a>
## 3     <a href="statss.aspx?playerid=1008315&position=OF">Willie Mays</a>
## 4         <a href="statss.aspx?playerid=1002378&position=OF">Ty Cobb</a>
## 5    <a href="statss.aspx?playerid=1013485&position=SS">Honus Wagner</a>
## 6      <a href="statss.aspx?playerid=1000001&position=OF">Hank Aaron</a>
## 7    <a href="statss.aspx?playerid=1012309&position=OF">Tris Speaker</a>
## 8    <a href="statss.aspx?playerid=1014040&position=OF">Ted Williams</a>
## 9  <a href="statss.aspx?playerid=1006030&position=2B">Rogers Hornsby</a>
## 10 <a href="statss.aspx?playerid=1009405&position=1B/OF">Stan Musial</a>
##         WAR   Type
## 1  167.0179 Batter
## 2  164.4482 Batter
## 3  149.8535 Batter
## 4  149.0767 Batter
## 5  138.0926 Batter
## 6  136.3079 Batter
## 7  130.1979 Batter
## 8  129.7669 Batter
## 9  129.1392 Batter
## 10 126.3664 Batter

Pitching

# Pitcher Seasons
query.params$stats <- "pit"

sep = "?"
url <- base.url
for (param in names(query.params)) {
  url <- paste(url, sep, param, "=", query.params[[param]], sep = "")
  sep = "&"
}
pitcher.seasons.df <- fromJSON(url)$data %>%
  select(Name, WAR) %>%
  mutate(Type = "Pitcher")
pitcher.seasons.df %>% head(10)
##                                                                    Name
## 1       <a href="statss.aspx?playerid=815&position=P">Roger Clemens</a>
## 2        <a href="statss.aspx?playerid=1014369&position=P">Cy Young</a>
## 3         <a href="statss.aspx?playerid=104&position=P">Greg Maddux</a>
## 4  <a href="statss.aspx?playerid=1006511&position=P">Walter Johnson</a>
## 5        <a href="statss.aspx?playerid=60&position=P">Randy Johnson</a>
## 6      <a href="statss.aspx?playerid=1011348&position=P">Nolan Ryan</a>
## 7   <a href="statss.aspx?playerid=1001098&position=P">Bert Blyleven</a>
## 8   <a href="statss.aspx?playerid=1010210&position=P">Gaylord Perry</a>
## 9   <a href="statss.aspx?playerid=1001964&position=P">Steve Carlton</a>
## 10 <a href="statss.aspx?playerid=1000128&position=P">Pete Alexander</a>
##          WAR    Type
## 1  133.68920 Pitcher
## 2  131.46872 Pitcher
## 3  116.66175 Pitcher
## 4  116.41905 Pitcher
## 5  110.45433 Pitcher
## 6  106.74577 Pitcher
## 7  102.91322 Pitcher
## 8  100.06021 Pitcher
## 9   96.45129 Pitcher
## 10  95.74027 Pitcher

Combine Batting & Pitching

war.df <- union(batter.seasons.df, pitcher.seasons.df) %>%
  mutate(fangraphs_id = as.integer(gsub(".*playerid=(.*)&.*", "\\1", Name)),
         Name = gsub(".*>(.*)<.*", "\\1", Name)) %>%
  group_by(fangraphs_id) %>%
  summarise(Name = first(Name), WAR = sum(WAR)) %>%
  full_join(debut.teams.df, by = "fangraphs_id") %>%
  rename(debut_franchise = franchID)
war.df %>% head(10)
## # A tibble: 10 × 4
##    fangraphs_id Name                WAR debut_franchise
##           <int> <chr>             <dbl> <fct>          
##  1            1 Alfredo Amezaga  3.22   ANA            
##  2            2 Garret Anderson 23.9    ANA            
##  3            3 Kevin Appier    50.4    KCR            
##  4            4 Larry Barnes    -0.567  ANA            
##  5            5 Scott Patterson  0.0469 NYY            
##  6            6 Jamie Burke      0.491  ANA            
##  7            7 Mickey Callaway  0.843  TBD            
##  8            8 Dennis Cook      5.31   SFG            
##  9           10 David Eckstein  16.7    ANA            
## 10           11 Darin Erstad    28.5    ANA

Data Quality Check

cat(scales::percent(sum(is.na(war.df$debut_franchise)) /
                      length(war.df$debut_franchise), accuracy = 0.1),
    "of player IDs were not mapped successfully")
## 0.1% of player IDs were not mapped successfully
war.df %>% filter(is.na(debut_franchise)) %>% arrange(desc(WAR))
## # A tibble: 21 × 4
##    fangraphs_id Name                 WAR debut_franchise
##           <int> <chr>              <dbl> <fct>          
##  1      1010104 "George Pearce"   3.73   <NA>           
##  2      1000777 "Ed Begley"       0.555  <NA>           
##  3      1010739 "Rip Reagan"      0.271  <NA>           
##  4      1010882 "Bill Rhodes"     0.161  <NA>           
##  5      1000511 "Bill Banks"      0.0872 <NA>           
##  6      1009235 "Sparrow Morton"  0.0719 <NA>           
##  7      1008462 "Jim McDonald"    0.0179 <NA>           
##  8      1005394 "Ben Harrison"   -0.0173 <NA>           
##  9      1010128 "Monte Peffer"   -0.0240 <NA>           
## 10      1002444 " Collins"       -0.0451 <NA>           
## # ℹ 11 more rows

Aggregate fWAR by Franchise

team.war.df <- war.df %>%
    group_by(debut_franchise) %>%
    summarise(WAR = sum(WAR, na.rm = TRUE), players = n()) %>%
    arrange(WAR) %>%
    tail(31) %>%
    mutate(
      `WAR/player` = WAR / players,
      logo = ifelse(
        debut_franchise != "LOU",
        paste("<img src=\"https://cdn.ssref.net/req/202310031/tlogo/br/",
              debut_franchise, ".png\" height=\"25\"/>", sep = ""),
        "<img src=\"https://upload.wikimedia.org/wikipedia/en/4/4a/LouisvilleColonelsLogo.PNG\" height=\"25\"/>"
      )
    )
team.war.df$logo <- factor(team.war.df$logo, levels = team.war.df$logo)
team.war.df %>% head(10)
## # A tibble: 10 × 5
##    debut_franchise   WAR players `WAR/player` logo                              
##    <fct>           <dbl>   <int>        <dbl> <fct>                             
##  1 COL              630.     217         2.90 "<img src=\"https://cdn.ssref.net…
##  2 ARI              636.     199         3.19 "<img src=\"https://cdn.ssref.net…
##  3 LOU              660.     150         4.40 "<img src=\"https://upload.wikime…
##  4 TBD              679.     172         3.95 "<img src=\"https://cdn.ssref.net…
##  5 FLA              937.     254         3.69 "<img src=\"https://cdn.ssref.net…
##  6 SDP             1340.     408         3.28 "<img src=\"https://cdn.ssref.net…
##  7 KCR             1445.     372         3.88 "<img src=\"https://cdn.ssref.net…
##  8 MIL             1459.     317         4.60 "<img src=\"https://cdn.ssref.net…
##  9 TOR             1571.     324         4.85 "<img src=\"https://cdn.ssref.net…
## 10 SEA             1572.     336         4.68 "<img src=\"https://cdn.ssref.net…

Graph

team.war.sum.plot <- ggplot(team.war.df, aes(x = reorder(logo, WAR), y = WAR)) +
  geom_col(fill = "#69b3a2") +
  geom_text(aes(label = round(WAR, digits = 0)), color = "white", size = 6,
            hjust = 1.1) +
  coord_flip() +
  scale_x_discrete(name = NULL, labels = team.war.df$logo) +
  ggtitle("Career WAR by Players who Debuted with Each Franchise (1871-2022)") +
  theme(plot.title = element_text(size = 22),
        axis.text.x = element_text(size = 14),
        axis.title.x = element_text(size = 16, face = "bold"),
        axis.text.y = element_markdown())

ggsave("~/Desktop/peteb206.github.io/tweets/1/team_war_sum.png",
       plot = team.war.sum.plot, width = 15, height = 12)
team.war.sum.plot