MLB Stats

Author

Nick Warino

Published

September 9, 2024

Show the code
league_per_game_url <- "https://www.baseball-reference.com/leagues/majors/bat.shtml#all_teams_standard_batting_totals"
league_per_game_url_page <- read_html(league_per_game_url)
mlb_season_history <- html_table(league_per_game_url_page)[[1]]

# Remove any row where the value in the first column is "Year"
mlb_season_history <- mlb_season_history %>%
  filter(.[[1]] != "Year")



# Select columns 9 to 31 for faceted graph over all years (Year)
mlb_season_history_for_graph <- mlb_season_history %>%
  select(1, 9:30) %>%
  mutate(across(everything(), as.numeric)) |> 
  select(-"RBI",-"TB")

# For column "SF" replace any value of 0 with NA
mlb_season_history_for_graph$SF <- ifelse(mlb_season_history_for_graph$SF == 0, NA, mlb_season_history_for_graph$SF)

# Define the desired order for the statistics
desired_order <- c("R", "H", "1B", "2B", "3B", "HR", "BB", "IBB", "HBP", "SF", 
                   "SH", "SO", "GDP", "SB", "CS", "BIP", "BA", "OBP", "SLG", "OPS")

# Make faceted graph
mlb_season_history_for_graph %>%
  pivot_longer(cols = -Year, names_to = "Stat", values_to = "Value") %>%
  mutate(Stat = factor(Stat, levels = desired_order)) %>%
  ggplot(aes(x = Year, y = Value)) +
  geom_line() +
  facet_wrap(~Stat, scales = "free_y") +
  geom_smooth(method = "loess", se = FALSE, color = "red") +
  theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
  labs(title = "MLB Season History, Per Game Averages 1871 to 2024",
       x = "Year",
       y = "Value") +
  My_Theme_WithY()

Show the code
# Filter mlb_season_history_for_graph all Years to after 1899
mlb_season_history_20th_cent_for_graph <- mlb_season_history_for_graph %>%
  filter(Year > 1899)

# Make faceted graph
mlb_season_history_20th_cent_for_graph %>%
  pivot_longer(cols = -Year, names_to = "Stat", values_to = "Value") %>%
  mutate(Stat = factor(Stat, levels = desired_order)) %>%
  ggplot(aes(x = Year, y = Value)) +
  geom_line() + # Add X axis labels for every 10 years
  scale_x_continuous(breaks = seq(1900, 2020, by = 50)) +
  facet_wrap(~Stat, scales = "free_y") + # add fitted loees curve to graph
  geom_smooth(method = "loess", se = FALSE, color = "red") +
  theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
  labs(title = "MLB Season History, Per Game Averages 1900 to 2024",
       x = "Year",
       y = "Value",
       caption = paste("Created by Nick Warino. nickwarino.com. Generated on:", Sys.Date())) +
  My_Theme_WithY()

Show the code
# Install and load libraries
library(baseballr)
# Load the utils package
library(utils)

# URL of the CSV file
url <- "https://raw.githubusercontent.com/Neil-Paine-1/MLB-WAR-data-historical/master/jeffbagwell_war_historical_2024.csv"

# Download the file and read it into a data frame
data <- read.csv(url, stringsAsFactors = FALSE)

# Save file to output_data fold with today's date which shoul dynamically adjust
write.csv(data, paste0(here("output_data"), "/mlb_war_data_", Sys.Date(), ".csv"), row.names = FALSE)
Show the code
data_filtered <- data |> 
  select(player_name, lg_ID, team_ID, sched, year_ID, bwar162, br_pwar162) |> 
  filter(sched > 100 | year_ID > 2019) |> 
  mutate(twar162 = bwar162 + br_pwar162) |> 
  mutate(label = ifelse(player_name == "Shohei Ohtani" | player_name == "Babe Ruth", 
                        paste(player_name, year_ID), 
                        NA)) |> 
  mutate(bwar162 = round(bwar162, digits=1),
         br_pwar162 = round(br_pwar162, digits=1),
         twar162 = round(twar162, digits=1)) |>   
  arrange(desc(twar162)) |> 
  mutate(rank_twar = row_number()) |> 
  relocate(rank_twar, .before = player_name)

data_filtered[] <- lapply(data_filtered, function(x) {
  if(is.character(x)) {
    # Convert to UTF-8
    x <- iconv(x, to = "UTF-8", sub = "byte")
    # Optionally, remove non-ASCII characters if they are not needed
    x <- gsub("[^\x01-\x7F]", "", x)
    return(x)
  } else {
    return(x)
  }
})


data_post_integration <- data_filtered |> 
  filter(year_ID > 1944)

seasons_with_2war <- data_filtered |> 
  filter(bwar162 >=2,
         br_pwar162 >=2)  |> 
  select(-label)

seasons_with_1war_post_integration <- data_post_integration |> 
  filter(bwar162 >= 2,
         br_pwar162 >= 2)  |> 
  select(-label)

top_100_ever <- data_filtered |> 
  head(100) |> 
  select(-label)

top_100_post_integration <- data_post_integration |> 
  head(100) |> 
  select(-label)

1 Best Seasons Ever, by WAR per 162 team games

Batting and Pitching WAR per 162 team games by Season (only including seasons with >100 scheduled games)

Show the code
ggplot(data_filtered,
       aes(x = bwar162,
           y = br_pwar162)) +
  geom_point(aes(size = twar162, fill = twar162), alpha = 0.7, shape = 21, stroke = .5) +
  geom_text_repel(aes(label = label), 
                  size = 5, color = "black", 
                  na.rm = TRUE,
                  max.overlaps = 5) +
  scale_fill_gradient2(low = "blue", mid = "white", high = "red",
                       midpoint = median(data_filtered$twar162, na.rm = TRUE)) +
  labs(title = "WAR By Season, pro-rated to 162 team games",
       subtitle = "Batting and Pitching WAR per 162 team games by Season (only including seasons with >100 scheduled games).\nnickwarino.com, created 2023-08-08",
       caption = "sources: Neil Pain's MLB historical WAR data",
       x = "Batting WAR",
       y = "Pitching WAR",
       fill = "Total WAR") +
  theme(legend.position = c(.9, .9)) +
  My_Theme_WithY()

Show the code
kable(top_100_ever)
rank_twar player_name lg_ID team_ID sched year_ID bwar162 br_pwar162 twar162
1 Walter Johnson AL WSH 155 1913 1.0 15.9 16.9
2 Walter Johnson AL WSH 154 1912 1.2 15.0 16.2
3 Babe Ruth AL NYY 152 1923 15.4 0.0 15.4
4 Cy Young AL BOS 138 1901 0.1 14.6 14.7
5 Walter Johnson AL WSH 130 1918 1.2 13.0 14.3
6 Babe Ruth AL NYY 153 1921 14.1 -0.3 13.8
7 Joe McGinnity NL NYG 142 1903 -0.2 13.9 13.7
8 Grover Alexander NL CHC 154 1920 0.7 12.6 13.3
9 Babe Ruth AL NYY 155 1927 13.3 0.0 13.3
10 Dwight Gooden NL NYM 162 1985 0.9 12.2 13.1
11 Ed Walsh AL CHW 158 1912 0.8 12.4 13.1
12 Steve Carlton NL PHI 156 1972 0.5 12.5 13.0
13 Babe Ruth AL NYY 154 1920 13.1 -0.1 13.0
14 Bullet Rogan NNL KCM 101 1921 4.4 8.5 12.9
15 Rogers Hornsby NL STL 154 1924 12.8 0.0 12.8
16 Walter Johnson AL WSH 157 1914 0.6 12.1 12.8
17 Lou Gehrig AL NYY 155 1927 12.7 0.0 12.7
18 Walter Johnson AL WSH 155 1915 0.6 12.1 12.7
19 Grover Alexander NL PHI 154 1916 1.1 11.5 12.6
20 Hal Newhouser AL DET 155 1945 0.8 11.8 12.6
21 Eddie Cicotte AL CHW 156 1917 0.2 12.4 12.5
22 Babe Ruth AL NYY 153 1924 12.5 0.0 12.5
23 Russ Ford AL NYY 156 1910 0.6 11.8 12.4
24 Greg Maddux NL ATL 114 1994 0.4 12.1 12.4
25 Christy Mathewson NL NYG 142 1903 0.5 11.9 12.4
26 Barry Bonds NL SFG 162 2001 12.2 0.0 12.2
27 Barry Bonds NL SFG 162 2002 12.2 0.0 12.2
28 Walter Johnson AL WSH 142 1919 -0.1 12.3 12.2
29 Christy Mathewson NL NYG 157 1908 0.0 12.1 12.2
30 Honus Wagner NL PIT 155 1908 12.2 0.0 12.2
31 Roger Clemens AL TOR 162 1997 0.1 11.9 12.1
32 Babe Ruth AL NYY 155 1926 12.1 0.0 12.1
33 Ed Walsh AL CHW 156 1910 0.6 11.6 12.1
34 Ty Cobb AL DET 154 1917 12.0 0.0 12.0
35 Stan Coveleski AL CLE 129 1918 -0.4 12.4 12.0
36 Mickey Mantle AL NYY 154 1956 12.0 0.0 12.0
37 Cy Young AL BOS 138 1902 0.1 11.8 12.0
38 Jack Chesbro AL NYY 155 1904 0.8 11.1 11.9
39 Mickey Mantle AL NYY 154 1957 11.9 0.0 11.9
40 Bob Gibson NL STL 162 1968 0.6 11.2 11.8
41 Babe Ruth AL BOS 138 1919 10.9 0.9 11.8
42 Smoky Joe Wood AL BOS 154 1912 1.1 10.7 11.8
43 Carl Yastrzemski AL BOS 162 1967 11.8 0.0 11.8
44 Red Faber AL CHW 154 1921 -0.3 12.0 11.7
45 Fergie Jenkins NL CHC 162 1971 1.6 10.1 11.7
46 Pedro Martinez AL BOS 162 2000 0.0 11.7 11.7
47 Mike Schmidt NL PHI 107 1981 11.7 0.0 11.7
48 Ted Williams AL BOS 152 1942 11.7 0.0 11.7
49 Grover Alexander NL PHI 153 1915 -0.1 11.7 11.6
50 Stan Musial NL STL 155 1948 11.6 0.0 11.6
51 Rube Waddell AL PHA 137 1902 0.8 10.8 11.6
52 Rogers Hornsby NL STL 154 1921 11.5 0.0 11.5
53 Ted Williams AL BOS 156 1946 11.5 0.0 11.5
54 Ty Cobb AL DET 154 1911 11.4 0.0 11.4
55 Jimmie Foxx AL PHA 154 1932 11.4 0.0 11.4
56 Dolf Luque NL CIN 154 1923 0.2 11.3 11.4
57 Gaylord Perry AL CLE 156 1972 0.2 11.2 11.4
58 Jeff Bagwell NL HOU 115 1994 11.3 0.0 11.3
59 Barry Bonds NL SFG 162 2004 11.3 0.0 11.3
60 Eddie Cicotte AL CHW 140 1919 0.1 11.2 11.3
61 Bob Gibson NL STL 162 1969 0.9 10.4 11.3
62 Wes Ferrell AL BOS 154 1935 2.6 8.6 11.2
63 Lefty Grove AL BOS 155 1936 -0.5 11.7 11.2
64 Rogers Hornsby NL CHC 156 1929 11.2 0.0 11.2
65 Walter Johnson AL WSH 159 1916 1.0 10.2 11.2
66 Babe Ruth AL NYY 154 1930 10.9 0.2 11.2
67 Dizzy Trout AL DET 156 1944 1.6 9.6 11.2
68 Ed Walsh AL CHW 156 1908 0.5 10.7 11.2
69 Ted Williams AL BOS 155 1941 11.2 0.0 11.2
70 Rogers Hornsby NL STL 153 1925 11.1 0.0 11.1
71 Cal Ripken Jr. AL BAL 162 1991 11.1 0.0 11.1
72 Babe Ruth AL NYY 155 1931 11.1 0.0 11.1
73 Tom Seaver NL NYM 161 1973 0.4 10.7 11.1
74 Lou Boudreau AL CLE 156 1948 11.0 0.0 11.0
75 Walter Johnson AL WSH 157 1910 0.2 10.8 11.0
76 Christy Mathewson NL NYG 155 1905 1.0 10.1 11.0
77 Joe Morgan NL CIN 162 1975 11.0 0.0 11.0
78 Jack Taylor NL CHC 143 1902 0.4 10.6 11.0
79 Rube Waddell AL PHA 155 1904 -0.8 11.8 11.0
80 Honus Wagner NL PIT 155 1905 11.0 0.0 11.0
81 Wilbur Wood AL CHW 162 1971 -0.8 11.8 11.0
82 Ty Cobb AL DET 155 1910 10.9 0.0 10.9
83 Lou Gehrig AL NYY 154 1934 10.9 0.0 10.9
84 Lefty Grove AL PHA 154 1930 0.0 10.9 10.9
85 Aaron Judge AL NYY 143 2024 10.9 0.0 10.9
86 Greg Maddux NL ATL 144 1995 -0.1 10.9 10.9
87 Willie Mays NL NYG 154 1954 10.9 0.0 10.9
88 Willie Mays NL SFG 163 1965 10.9 0.0 10.9
89 Babe Ruth AL NYY 154 1928 10.9 0.0 10.9
90 Tris Speaker AL BOS 154 1912 10.9 0.0 10.9
91 Wilbur Wood AL CHW 154 1972 -0.4 11.3 10.9
92 Rogers Hornsby NL STL 154 1922 10.8 0.0 10.8
93 Rogers Hornsby NL NYG 155 1927 10.8 0.0 10.8
94 Aaron Judge AL NYY 162 2022 10.8 0.0 10.8
95 Christy Mathewson NL NYG 158 1909 0.9 9.9 10.8
96 Willie Mays NL SFG 162 1964 10.8 0.0 10.8
97 Tom Seaver NL NYM 162 1971 0.6 10.2 10.8
98 Dazzy Vance NL BRO 154 1924 -0.2 11.0 10.8
99 Lefty Grove AL PHA 153 1931 -0.3 11.0 10.7
100 Dazzy Vance NL BRO 155 1928 0.1 10.6 10.7
Show the code
kable(seasons_with_2war)
rank_twar player_name lg_ID team_ID sched year_ID bwar162 br_pwar162 twar162
14 Bullet Rogan NNL KCM 101 1921 4.4 8.5 12.9
62 Wes Ferrell AL BOS 154 1935 2.6 8.6 11.2
144 Shohei Ohtani AL LAA 162 2023 6.3 3.9 10.1
158 Lewis Hampton NNL ABC 101 1922 3.3 6.7 10.0
199 Shohei Ohtani AL LAA 162 2022 3.5 6.2 9.7
276 Babe Ruth AL BOS 126 1918 6.3 3.0 9.3
329 Shohei Ohtani AL LAA 162 2021 4.9 4.1 9.0
437 Wes Ferrell AL CLE 155 1931 2.0 6.5 8.5
2740 Don Drysdale NL LAD 162 1965 2.3 3.2 5.6
2974 Don Newcombe NL BRO 154 1955 2.4 3.1 5.5

2 Best Seasons Since MLB Integration, by WAR per 162 team games

Batting and Pitching WAR per 162 team games by Season (only including seasons with >100 scheduled games)

Show the code
ggplot(data_post_integration,
       aes(x = bwar162,
           y = br_pwar162)) +
  geom_point(aes(size = twar162, fill = twar162), alpha = 0.7, shape = 21, stroke = .5) +
  scale_size(range = c(1, 10)) +
  geom_text_repel(aes(label = label), 
                  size = 5, color = "black", 
                  na.rm = TRUE,
                  max.overlaps = 5) +
  scale_fill_gradient2(low = "blue", mid = "white", high = "red",
                       midpoint = median(data_post_integration$twar162, na.rm = TRUE)) +
  labs(title = "WAR By Season (Post-Integrated MLB), pro-rated to 162 team games",
       subtitle = "Batting and Pitching WAR per 162 team games by Season (only including seasons with >100 scheduled games.\nnickwarino.com, created 2023-08-08",
       caption = "sources: Neil Pain's MLB historical WAR data",
       x = "Batting WAR",
       y = "Pitching WAR",
       fill = "Total WAR") +
  theme(legend.position = c(.9, .9)) +
  My_Theme_WithY()

Show the code
kable(top_100_post_integration)
rank_twar player_name lg_ID team_ID sched year_ID bwar162 br_pwar162 twar162
10 Dwight Gooden NL NYM 162 1985 0.9 12.2 13.1
12 Steve Carlton NL PHI 156 1972 0.5 12.5 13.0
20 Hal Newhouser AL DET 155 1945 0.8 11.8 12.6
24 Greg Maddux NL ATL 114 1994 0.4 12.1 12.4
26 Barry Bonds NL SFG 162 2001 12.2 0.0 12.2
27 Barry Bonds NL SFG 162 2002 12.2 0.0 12.2
31 Roger Clemens AL TOR 162 1997 0.1 11.9 12.1
36 Mickey Mantle AL NYY 154 1956 12.0 0.0 12.0
39 Mickey Mantle AL NYY 154 1957 11.9 0.0 11.9
40 Bob Gibson NL STL 162 1968 0.6 11.2 11.8
43 Carl Yastrzemski AL BOS 162 1967 11.8 0.0 11.8
45 Fergie Jenkins NL CHC 162 1971 1.6 10.1 11.7
46 Pedro Martinez AL BOS 162 2000 0.0 11.7 11.7
47 Mike Schmidt NL PHI 107 1981 11.7 0.0 11.7
50 Stan Musial NL STL 155 1948 11.6 0.0 11.6
53 Ted Williams AL BOS 156 1946 11.5 0.0 11.5
57 Gaylord Perry AL CLE 156 1972 0.2 11.2 11.4
58 Jeff Bagwell NL HOU 115 1994 11.3 0.0 11.3
59 Barry Bonds NL SFG 162 2004 11.3 0.0 11.3
61 Bob Gibson NL STL 162 1969 0.9 10.4 11.3
71 Cal Ripken Jr. AL BAL 162 1991 11.1 0.0 11.1
73 Tom Seaver NL NYM 161 1973 0.4 10.7 11.1
74 Lou Boudreau AL CLE 156 1948 11.0 0.0 11.0
77 Joe Morgan NL CIN 162 1975 11.0 0.0 11.0
81 Wilbur Wood AL CHW 162 1971 -0.8 11.8 11.0
85 Aaron Judge AL NYY 143 2024 10.9 0.0 10.9
86 Greg Maddux NL ATL 144 1995 -0.1 10.9 10.9
87 Willie Mays NL NYG 154 1954 10.9 0.0 10.9
88 Willie Mays NL SFG 163 1965 10.9 0.0 10.9
91 Wilbur Wood AL CHW 154 1972 -0.4 11.3 10.9
94 Aaron Judge AL NYY 162 2022 10.8 0.0 10.8
96 Willie Mays NL SFG 162 1964 10.8 0.0 10.8
97 Tom Seaver NL NYM 162 1971 0.6 10.2 10.8
101 Andre Dawson NL MON 108 1981 10.6 0.0 10.6
103 Randy Johnson NL ARI 162 2002 -0.2 10.7 10.5
104 Willie Mays NL SFG 154 1958 10.5 0.0 10.5
105 Bobby Witt Jr. AL KCR 144 2024 10.5 0.0 10.5
106 Ernie Banks NL CHC 155 1959 10.4 0.0 10.4
107 Mookie Betts AL BOS 162 2018 10.4 0.0 10.4
108 Roger Clemens AL BOS 162 1990 0.0 10.4 10.4
112 Zack Greinke AL KCR 162 2009 0.0 10.4 10.4
114 Phil Niekro NL ATL 162 1978 0.5 10.0 10.4
118 Mickey Mantle AL NYY 163 1961 10.3 0.0 10.3
119 Juan Marichal NL SFG 163 1965 0.1 10.2 10.3
120 Willie Mays NL SFG 165 1962 10.3 0.0 10.3
121 Robin Roberts NL PHI 156 1953 0.1 10.2 10.3
123 Mike Trout AL LAA 162 2012 10.3 0.0 10.3
124 Barry Bonds NL SFG 162 1993 10.2 0.0 10.2
126 Bob Gibson NL STL 162 1970 1.3 8.9 10.2
129 Mickey Mantle AL NYY 154 1955 10.2 0.0 10.2
130 Willie Mays NL SFG 162 1963 10.2 0.0 10.2
131 Jose Rijo NL CIN 162 1993 1.0 9.2 10.2
133 Ted Williams AL BOS 157 1947 10.2 0.0 10.2
134 Ted Williams AL BOS 154 1957 10.2 0.0 10.2
137 Steve Carlton NL PHI 162 1980 -0.1 10.2 10.1
141 Rickey Henderson AL OAK 162 1990 10.1 0.0 10.1
144 Shohei Ohtani AL LAA 162 2023 6.3 3.9 10.1
145 Al Rosen AL CLE 155 1953 10.1 0.0 10.1
146 Sammy Sosa NL CHC 162 2001 10.1 0.0 10.1
149 Robin Yount AL MIL 163 1982 10.1 0.0 10.1
154 Dwight Evans AL BOS 108 1981 10.0 0.0 10.0
155 Bob Feller AL CLE 156 1946 -0.4 10.4 10.0
160 Hal Newhouser AL DET 155 1946 -0.2 10.1 10.0
162 Jacob deGrom NL NYM 162 2018 0.4 9.5 9.9
164 Dick Ellsworth NL CHC 162 1963 -0.3 10.2 9.9
166 Ken Griffey Jr. AL SEA 112 1994 9.9 0.0 9.9
167 Rickey Henderson AL OAK 109 1981 9.9 0.0 9.9
168 Rickey Henderson AL NYY 161 1985 9.9 0.0 9.9
169 Sandy Koufax NL LAD 163 1963 -0.7 10.6 9.9
170 Kenny Lofton AL CLE 113 1994 9.9 0.0 9.9
171 Juan Marichal NL SFG 161 1966 0.7 9.1 9.9
173 Cal Ripken Jr. AL BAL 162 1984 9.9 0.0 9.9
174 Alex Rodriguez AL SEA 162 2000 9.9 0.0 9.9
175 Warren Spahn NL BSN 154 1947 -0.1 10.0 9.9
178 Ted Williams AL BOS 155 1949 9.9 0.0 9.9
179 Carl Yastrzemski AL BOS 162 1968 9.9 0.0 9.9
182 Barry Bonds NL PIT 162 1990 9.8 0.0 9.8
184 Ken Griffey Jr. AL SEA 161 1996 9.8 0.0 9.8
188 Pedro Martinez AL BOS 162 1999 0.0 9.8 9.8
189 Rico Petrocelli AL BOS 162 1969 9.8 0.0 9.8
190 Jackie Robinson NL BRO 156 1949 9.8 0.0 9.8
194 Bert Blyleven AL MIN 162 1973 0.0 9.7 9.7
195 Barry Bonds NL SFG 161 2003 9.7 0.0 9.7
197 David Cone AL KCR 115 1994 0.0 9.7 9.7
199 Shohei Ohtani AL LAA 162 2022 3.5 6.2 9.7
201 Bret Saberhagen AL KCR 162 1989 0.0 9.7 9.7
202 Bobby Shantz AL PHA 155 1952 0.4 9.2 9.7
203 Mike Trout AL LAA 162 2018 9.7 0.0 9.7
205 Henry Aaron NL MLN 155 1961 9.6 0.0 9.6
206 Adrian Beltre NL LAD 162 2004 9.6 0.0 9.6
207 Norm Cash AL DET 163 1961 9.6 0.0 9.6
209 Mark Fidrych AL DET 161 1976 0.0 9.6 9.6
213 Randy Johnson AL SEA 145 1995 0.0 9.6 9.6
214 Randy Johnson NL ARI 162 2001 -0.5 10.1 9.6
215 Sandy Koufax NL LAD 162 1966 -0.6 10.3 9.6
218 Willie Mays NL NYG 154 1955 9.6 0.0 9.6
219 Joe Morgan NL CIN 162 1976 9.6 0.0 9.6
220 Rick Reuschel NL CHC 162 1977 0.2 9.4 9.6
221 Jackie Robinson NL BRO 158 1951 9.6 0.0 9.6
222 Ron Santo NL CHC 162 1967 9.6 0.0 9.6
Show the code
kable(seasons_with_1war_post_integration)
rank_twar player_name lg_ID team_ID sched year_ID bwar162 br_pwar162 twar162
144 Shohei Ohtani AL LAA 162 2023 6.3 3.9 10.1
199 Shohei Ohtani AL LAA 162 2022 3.5 6.2 9.7
329 Shohei Ohtani AL LAA 162 2021 4.9 4.1 9.0
2740 Don Drysdale NL LAD 162 1965 2.3 3.2 5.6
2974 Don Newcombe NL BRO 154 1955 2.4 3.1 5.5

3 2024 Hit Data

Show the code
# Load CSV from raw data
hit_data <- read.csv(here("raw_data", "exit_velocity.csv"), stringsAsFactors = FALSE)
hit_data2 <- read.csv(here("raw_data", "expected_stats.csv"), stringsAsFactors = FALSE)

# Join by player_id
hit_data <- hit_data |> 
  left_join(hit_data2, by = "player_id")

# Rename column 1 to Player
colnames(hit_data)[1] <- "player_name"

hit_data_above_30_attempts <- hit_data |> 
  filter(attempts > 30)

# Plot max_hit_speed and avg_hit_speed by player
ggplot(hit_data_above_30_attempts, aes(x = max_hit_speed, y = avg_hit_speed)) +
  geom_point(aes(size = attempts, fill = max_distance), alpha = 0.7, shape = 21, stroke = .5) +
  geom_text_repel(aes(label = player_name), 
                  size = 5, color = "black", 
                  na.rm = TRUE,
                  max.overlaps = 5) +
  scale_size(range = c(1, 15)) +
  scale_fill_gradient2(low = "blue", mid = "white", high = "red",
                       midpoint = median(hit_data_above_30_attempts$max_distance, na.rm = TRUE)) +
  labs(title = "No one crushes the ball like Ohtani",
       subtitle = "Max Hit Speed, Average Hit Speed, Max Distance, and Average Distance by Player\nnickwarino.com, created 2024-04-25",
       caption = "sources: Statcast",
       x = "Max Hit Speed",
       y = "Average Hit Speed",
       fill = "Max Distance",
       size = "Attempts") +
  theme(legend.position = c(.9, .9)) +
  My_Theme_WithY()

Show the code
# Plot max_hit_speed and avg_hit_speed by player
ggplot(hit_data_above_30_attempts, aes(x = max_hit_speed, y = max_distance)) +
  geom_point(aes(size = attempts, fill = avg_hit_speed), alpha = 0.7, shape = 21, stroke = .5) +
  geom_text_repel(aes(label = player_name), 
                  size = 5, color = "black", 
                  na.rm = TRUE,
                  max.overlaps = 5) +
  scale_size(range = c(1, 15)) +
  scale_fill_gradient2(low = "blue", mid = "white", high = "red",
                       midpoint = median(hit_data_above_30_attempts$avg_hit_speed, na.rm = TRUE)) +
  labs(title = "No one crushes the ball like Ohtani",
       subtitle = "Max Hit Speed, Average Hit Speed, Max Distance, and Average Distance by Player\nnickwarino.com, created 2024-04-25",
       caption = "sources: Statcast",
       x = "Max Hit Speed",
       y = "Max Distance",
       fill = "Average Hit Speed",
       size = "Attempts") +
  theme(legend.position = c(.9, .9)) +
  My_Theme_WithY()

Show the code
# Plot max_hit_speed and avg_hit_speed by player
ggplot(hit_data_above_30_attempts, aes(x = max_hit_speed, y = max_distance)) +
  geom_point(aes(size = attempts, fill = est_woba), alpha = 0.7, shape = 21, stroke = .5) +
  geom_text_repel(aes(label = player_name), 
                  size = 5, color = "black", 
                  na.rm = TRUE,
                  max.overlaps = 5) +
  scale_size(range = c(1, 15)) +
  scale_fill_gradient2(low = "blue", mid = "white", high = "red",
                       midpoint = median(hit_data_above_30_attempts$est_woba, na.rm = TRUE)) +
  labs(title = "No one crushes the ball like Ohtani",
       subtitle = "Max Hit Speed, Max Distance, and Expected wOBA by Player\nnickwarino.com, created 2024-04-25",
       caption = "sources: Statcast",
       x = "Max Hit Speed",
       y = "Max Distance",
       fill = "Expected wOBA",
       size = "Attempts") +
  theme(legend.position = c(.9, .9)) +
  My_Theme_WithY()

4 Fuck the Yankees: Team Payroll History

Show the code
# Load "output_data/team_salaries_2024-05-02.csv"
team_salaries <- read.csv(here("output_data", "team_salaries_2024-05-02.csv"), stringsAsFactors = FALSE)
Show the code
# Calculate new column that is total league payroll grouped by Year, and then another new column that is the percentage of the league payroll that each team is spending
team_salaries <- team_salaries |> 
  group_by(Year) |> 
  mutate(league_payroll = sum(Payroll), 
         pct_of_league_payroll = round((Payroll / league_payroll)*100, 2))
Show the code
library(tidyverse)

# Assuming team_salaries is your dataset
highlighted_teams <- c("Yankees", "Dodgers", "Red Sox", "Giants")  # Define highlighted teams

team_salaries %>%
  ggplot(aes(x = Year, y = pct_of_league_payroll, group = TeamName,
             color = ifelse(TeamName %in% highlighted_teams, TeamName, "Other"))) +
  geom_line(aes(size = ifelse(TeamName %in% highlighted_teams, 2, 0.5))) +  # Adjust this line for line sizes
  scale_size_identity() +  # Ensure that sizes are used as is
  scale_y_continuous(limits = c(0, 10)) +
  scale_color_manual(values = c(
    "Yankees" = "black",
    "Dodgers" = "blue",
    "Red Sox" = "red",
    "Giants" = "orange",
    "Other" = "lightgrey"  # Light grey for all other teams
  )) +
  labs(title = "Fuck the Yankees: Team Payroll as a Percentage of League Payroll (1985-2024)",
       x = "Year",
       y = "Percentage of League Payroll",
       caption = "Source: Baseball-Reference, Lahman package. Created by Nick Warino on 2024-05-02") +
  guides(color = guide_legend(title = "Team Name"), size = FALSE) +  # Disable size legend
  My_Theme_WithY() +  # Assuming this is your custom theme function
  theme(legend.position = "top")  # Move legend to top