Setup
library(jsonlite)
library(dplyr)
library(Lahman)
library(baseballr)
library(ggplot2)
library(ggtext)
player_ids.df <- playerid_lookup(last_name = "", first_name = "")
Each MLB Player’s 1st Team (1871 - 2022)
# Debut teams
debut.teams.df <-
union(Lahman::Batting %>%
filter(stint == 1) %>%
select(playerID, yearID, teamID),
Lahman::Pitching %>%
filter(stint == 1) %>%
select(playerID, yearID, teamID)
) %>%
group_by(playerID) %>%
arrange(yearID) %>%
filter(row_number() == 1) %>%
left_join(Lahman::Teams %>% select(yearID, teamID, franchID),
by = c("yearID" = "yearID", "teamID" = "teamID")) %>%
left_join(Lahman::People %>% select(playerID, bbrefID), by = "playerID") %>%
left_join(player_ids.df %>% select(bbref_id, fangraphs_id),
by = c("bbrefID" = "bbref_id")) %>%
ungroup() %>%
select(fangraphs_id, franchID)
debut.teams.df %>% head(10)
## # A tibble: 10 × 2
## fangraphs_id franchID
## <int> <fct>
## 1 1000017 TRO
## 2 1000070 ROK
## 3 1000164 CFC
## 4 1000168 OLY
## 5 1000272 ROK
## 6 1000310 KEK
## 7 1000543 ROK
## 8 1000573 BNA
## 9 1000591 KEK
## 10 1000608 BNA
Career fWAR for Each Player
Batting
# Batter Seasons
base.url <- "https://www.fangraphs.com/api/leaders/major-league/data"
query.params <- list(age = "", pos = "all", stats = "bat", lg = "al%2Cnl",
qual = "0", season = "2022", season1 = "1871",
startdate = "", enddate = "", month = "0", hand = "",
team = "0", pageitems = "2000000000", pagenum = "1",
ind = "0", rost = "0", players = "", type = "8",
sortdir = "default", sortstat = "WAR")
sep = "?"
url <- base.url
for (param in names(query.params)) {
url <- paste(url, sep, param, "=", query.params[[param]], sep = "")
sep = "&"
}
batter.seasons.df <- fromJSON(url)$data %>%
select(Name, WAR) %>%
mutate(Type = "Batter")
batter.seasons.df %>% head(10)
## Name
## 1 <a href="statss.aspx?playerid=1011327&position=OF">Babe Ruth</a>
## 2 <a href="statss.aspx?playerid=1109&position=OF">Barry Bonds</a>
## 3 <a href="statss.aspx?playerid=1008315&position=OF">Willie Mays</a>
## 4 <a href="statss.aspx?playerid=1002378&position=OF">Ty Cobb</a>
## 5 <a href="statss.aspx?playerid=1013485&position=SS">Honus Wagner</a>
## 6 <a href="statss.aspx?playerid=1000001&position=OF">Hank Aaron</a>
## 7 <a href="statss.aspx?playerid=1012309&position=OF">Tris Speaker</a>
## 8 <a href="statss.aspx?playerid=1014040&position=OF">Ted Williams</a>
## 9 <a href="statss.aspx?playerid=1006030&position=2B">Rogers Hornsby</a>
## 10 <a href="statss.aspx?playerid=1009405&position=1B/OF">Stan Musial</a>
## WAR Type
## 1 167.0179 Batter
## 2 164.4482 Batter
## 3 149.8535 Batter
## 4 149.0767 Batter
## 5 138.0926 Batter
## 6 136.3079 Batter
## 7 130.1979 Batter
## 8 129.7669 Batter
## 9 129.1392 Batter
## 10 126.3664 Batter
Pitching
# Pitcher Seasons
query.params$stats <- "pit"
sep = "?"
url <- base.url
for (param in names(query.params)) {
url <- paste(url, sep, param, "=", query.params[[param]], sep = "")
sep = "&"
}
pitcher.seasons.df <- fromJSON(url)$data %>%
select(Name, WAR) %>%
mutate(Type = "Pitcher")
pitcher.seasons.df %>% head(10)
## Name
## 1 <a href="statss.aspx?playerid=815&position=P">Roger Clemens</a>
## 2 <a href="statss.aspx?playerid=1014369&position=P">Cy Young</a>
## 3 <a href="statss.aspx?playerid=104&position=P">Greg Maddux</a>
## 4 <a href="statss.aspx?playerid=1006511&position=P">Walter Johnson</a>
## 5 <a href="statss.aspx?playerid=60&position=P">Randy Johnson</a>
## 6 <a href="statss.aspx?playerid=1011348&position=P">Nolan Ryan</a>
## 7 <a href="statss.aspx?playerid=1001098&position=P">Bert Blyleven</a>
## 8 <a href="statss.aspx?playerid=1010210&position=P">Gaylord Perry</a>
## 9 <a href="statss.aspx?playerid=1001964&position=P">Steve Carlton</a>
## 10 <a href="statss.aspx?playerid=1000128&position=P">Pete Alexander</a>
## WAR Type
## 1 133.68920 Pitcher
## 2 131.46872 Pitcher
## 3 116.66175 Pitcher
## 4 116.41905 Pitcher
## 5 110.45433 Pitcher
## 6 106.74577 Pitcher
## 7 102.91322 Pitcher
## 8 100.06021 Pitcher
## 9 96.45129 Pitcher
## 10 95.74027 Pitcher
Combine Batting & Pitching
war.df <- union(batter.seasons.df, pitcher.seasons.df) %>%
mutate(fangraphs_id = as.integer(gsub(".*playerid=(.*)&.*", "\\1", Name)),
Name = gsub(".*>(.*)<.*", "\\1", Name)) %>%
group_by(fangraphs_id) %>%
summarise(Name = first(Name), WAR = sum(WAR)) %>%
full_join(debut.teams.df, by = "fangraphs_id") %>%
rename(debut_franchise = franchID)
war.df %>% head(10)
## # A tibble: 10 × 4
## fangraphs_id Name WAR debut_franchise
## <int> <chr> <dbl> <fct>
## 1 1 Alfredo Amezaga 3.22 ANA
## 2 2 Garret Anderson 23.9 ANA
## 3 3 Kevin Appier 50.4 KCR
## 4 4 Larry Barnes -0.567 ANA
## 5 5 Scott Patterson 0.0469 NYY
## 6 6 Jamie Burke 0.491 ANA
## 7 7 Mickey Callaway 0.843 TBD
## 8 8 Dennis Cook 5.31 SFG
## 9 10 David Eckstein 16.7 ANA
## 10 11 Darin Erstad 28.5 ANA
Data Quality Check
cat(scales::percent(sum(is.na(war.df$debut_franchise)) /
length(war.df$debut_franchise), accuracy = 0.1),
"of player IDs were not mapped successfully")
## 0.1% of player IDs were not mapped successfully
war.df %>% filter(is.na(debut_franchise)) %>% arrange(desc(WAR))
## # A tibble: 21 × 4
## fangraphs_id Name WAR debut_franchise
## <int> <chr> <dbl> <fct>
## 1 1010104 "George Pearce" 3.73 <NA>
## 2 1000777 "Ed Begley" 0.555 <NA>
## 3 1010739 "Rip Reagan" 0.271 <NA>
## 4 1010882 "Bill Rhodes" 0.161 <NA>
## 5 1000511 "Bill Banks" 0.0872 <NA>
## 6 1009235 "Sparrow Morton" 0.0719 <NA>
## 7 1008462 "Jim McDonald" 0.0179 <NA>
## 8 1005394 "Ben Harrison" -0.0173 <NA>
## 9 1010128 "Monte Peffer" -0.0240 <NA>
## 10 1002444 " Collins" -0.0451 <NA>
## # ℹ 11 more rows
Aggregate fWAR by Franchise
team.war.df <- war.df %>%
group_by(debut_franchise) %>%
summarise(WAR = sum(WAR, na.rm = TRUE), players = n()) %>%
arrange(WAR) %>%
tail(31) %>%
mutate(
`WAR/player` = WAR / players,
logo = ifelse(
debut_franchise != "LOU",
paste("<img src=\"https://cdn.ssref.net/req/202310031/tlogo/br/",
debut_franchise, ".png\" height=\"25\"/>", sep = ""),
"<img src=\"https://upload.wikimedia.org/wikipedia/en/4/4a/LouisvilleColonelsLogo.PNG\" height=\"25\"/>"
)
)
team.war.df$logo <- factor(team.war.df$logo, levels = team.war.df$logo)
team.war.df %>% head(10)
## # A tibble: 10 × 5
## debut_franchise WAR players `WAR/player` logo
## <fct> <dbl> <int> <dbl> <fct>
## 1 COL 630. 217 2.90 "<img src=\"https://cdn.ssref.net…
## 2 ARI 636. 199 3.19 "<img src=\"https://cdn.ssref.net…
## 3 LOU 660. 150 4.40 "<img src=\"https://upload.wikime…
## 4 TBD 679. 172 3.95 "<img src=\"https://cdn.ssref.net…
## 5 FLA 937. 254 3.69 "<img src=\"https://cdn.ssref.net…
## 6 SDP 1340. 408 3.28 "<img src=\"https://cdn.ssref.net…
## 7 KCR 1445. 372 3.88 "<img src=\"https://cdn.ssref.net…
## 8 MIL 1459. 317 4.60 "<img src=\"https://cdn.ssref.net…
## 9 TOR 1571. 324 4.85 "<img src=\"https://cdn.ssref.net…
## 10 SEA 1572. 336 4.68 "<img src=\"https://cdn.ssref.net…
Graph
team.war.sum.plot <- ggplot(team.war.df, aes(x = reorder(logo, WAR), y = WAR)) +
geom_col(fill = "#69b3a2") +
geom_text(aes(label = round(WAR, digits = 0)), color = "white", size = 6,
hjust = 1.1) +
coord_flip() +
scale_x_discrete(name = NULL, labels = team.war.df$logo) +
ggtitle("Career WAR by Players who Debuted with Each Franchise (1871-2022)") +
theme(plot.title = element_text(size = 22),
axis.text.x = element_text(size = 14),
axis.title.x = element_text(size = 16, face = "bold"),
axis.text.y = element_markdown())
ggsave("~/Desktop/peteb206.github.io/tweets/1/team_war_sum.png",
plot = team.war.sum.plot, width = 15, height = 12)
team.war.sum.plot
