league_per_game_url <-"https://www.baseball-reference.com/leagues/majors/bat.shtml#all_teams_standard_batting_totals"league_per_game_url_page <-read_html(league_per_game_url)mlb_season_history <-html_table(league_per_game_url_page)[[1]]# Remove any row where the value in the first column is "Year"mlb_season_history <- mlb_season_history %>%filter(.[[1]] !="Year")# Select columns 9 to 31 for faceted graph over all years (Year)mlb_season_history_for_graph <- mlb_season_history %>%select(1, 9:30) %>%mutate(across(everything(), as.numeric)) |>select(-"RBI",-"TB")# For column "SF" replace any value of 0 with NAmlb_season_history_for_graph$SF <-ifelse(mlb_season_history_for_graph$SF ==0, NA, mlb_season_history_for_graph$SF)# Define the desired order for the statisticsdesired_order <-c("R", "H", "1B", "2B", "3B", "HR", "BB", "IBB", "HBP", "SF", "SH", "SO", "GDP", "SB", "CS", "BIP", "BA", "OBP", "SLG", "OPS")# Make faceted graphmlb_season_history_for_graph %>%pivot_longer(cols =-Year, names_to ="Stat", values_to ="Value") %>%mutate(Stat =factor(Stat, levels = desired_order)) %>%ggplot(aes(x = Year, y = Value)) +geom_line() +facet_wrap(~Stat, scales ="free_y") +geom_smooth(method ="loess", se =FALSE, color ="red") +theme(axis.text.x =element_text(angle =90, hjust =1)) +labs(title ="MLB Season History, Per Game Averages 1871 to 2024",x ="Year",y ="Value") +My_Theme_WithY()
Show the code
# Filter mlb_season_history_for_graph all Years to after 1899mlb_season_history_20th_cent_for_graph <- mlb_season_history_for_graph %>%filter(Year >1899)# Make faceted graphmlb_season_history_20th_cent_for_graph %>%pivot_longer(cols =-Year, names_to ="Stat", values_to ="Value") %>%mutate(Stat =factor(Stat, levels = desired_order)) %>%ggplot(aes(x = Year, y = Value)) +geom_line() +# Add X axis labels for every 10 yearsscale_x_continuous(breaks =seq(1900, 2020, by =50)) +facet_wrap(~Stat, scales ="free_y") +# add fitted loees curve to graphgeom_smooth(method ="loess", se =FALSE, color ="red") +theme(axis.text.x =element_text(angle =90, hjust =1)) +labs(title ="MLB Season History, Per Game Averages 1900 to 2024",x ="Year",y ="Value",caption =paste("Created by Nick Warino. nickwarino.com. Generated on:", Sys.Date())) +My_Theme_WithY()
Show the code
# Install and load librarieslibrary(baseballr)# Load the utils packagelibrary(utils)# URL of the CSV fileurl <-"https://raw.githubusercontent.com/Neil-Paine-1/MLB-WAR-data-historical/master/jeffbagwell_war_historical_2024.csv"# Download the file and read it into a data framedata <-read.csv(url, stringsAsFactors =FALSE)# Save file to output_data fold with today's date which shoul dynamically adjustwrite.csv(data, paste0(here("output_data"), "/mlb_war_data_", Sys.Date(), ".csv"), row.names =FALSE)
Show the code
data_filtered <- data |>select(player_name, lg_ID, team_ID, sched, year_ID, bwar162, br_pwar162) |>filter(sched >100| year_ID >2019) |>mutate(twar162 = bwar162 + br_pwar162) |>mutate(label =ifelse(player_name =="Shohei Ohtani"| player_name =="Babe Ruth", paste(player_name, year_ID), NA)) |>mutate(bwar162 =round(bwar162, digits=1),br_pwar162 =round(br_pwar162, digits=1),twar162 =round(twar162, digits=1)) |>arrange(desc(twar162)) |>mutate(rank_twar =row_number()) |>relocate(rank_twar, .before = player_name)data_filtered[] <-lapply(data_filtered, function(x) {if(is.character(x)) {# Convert to UTF-8 x <-iconv(x, to ="UTF-8", sub ="byte")# Optionally, remove non-ASCII characters if they are not needed x <-gsub("[^\x01-\x7F]", "", x)return(x) } else {return(x) }})data_post_integration <- data_filtered |>filter(year_ID >1944)seasons_with_2war <- data_filtered |>filter(bwar162 >=2, br_pwar162 >=2) |>select(-label)seasons_with_1war_post_integration <- data_post_integration |>filter(bwar162 >=2, br_pwar162 >=2) |>select(-label)top_100_ever <- data_filtered |>head(100) |>select(-label)top_100_post_integration <- data_post_integration |>head(100) |>select(-label)
1 Best Seasons Ever, by WAR per 162 team games
Batting and Pitching WAR per 162 team games by Season (only including seasons with >100 scheduled games)
Show the code
ggplot(data_filtered,aes(x = bwar162,y = br_pwar162)) +geom_point(aes(size = twar162, fill = twar162), alpha =0.7, shape =21, stroke = .5) +geom_text_repel(aes(label = label), size =5, color ="black", na.rm =TRUE,max.overlaps =5) +scale_fill_gradient2(low ="blue", mid ="white", high ="red",midpoint =median(data_filtered$twar162, na.rm =TRUE)) +labs(title ="WAR By Season, pro-rated to 162 team games",subtitle ="Batting and Pitching WAR per 162 team games by Season (only including seasons with >100 scheduled games).\nnickwarino.com, created 2023-08-08",caption ="sources: Neil Pain's MLB historical WAR data",x ="Batting WAR",y ="Pitching WAR",fill ="Total WAR") +theme(legend.position =c(.9, .9)) +My_Theme_WithY()
Show the code
kable(top_100_ever)
rank_twar
player_name
lg_ID
team_ID
sched
year_ID
bwar162
br_pwar162
twar162
1
Walter Johnson
AL
WSH
155
1913
1.0
15.9
16.9
2
Walter Johnson
AL
WSH
154
1912
1.2
15.0
16.2
3
Babe Ruth
AL
NYY
152
1923
15.4
0.0
15.4
4
Cy Young
AL
BOS
138
1901
0.1
14.6
14.7
5
Walter Johnson
AL
WSH
130
1918
1.2
13.0
14.3
6
Babe Ruth
AL
NYY
153
1921
14.1
-0.3
13.8
7
Joe McGinnity
NL
NYG
142
1903
-0.2
13.9
13.7
8
Grover Alexander
NL
CHC
154
1920
0.7
12.6
13.3
9
Babe Ruth
AL
NYY
155
1927
13.3
0.0
13.3
10
Dwight Gooden
NL
NYM
162
1985
0.9
12.2
13.1
11
Ed Walsh
AL
CHW
158
1912
0.8
12.4
13.1
12
Steve Carlton
NL
PHI
156
1972
0.5
12.5
13.0
13
Babe Ruth
AL
NYY
154
1920
13.1
-0.1
13.0
14
Bullet Rogan
NNL
KCM
101
1921
4.4
8.5
12.9
15
Rogers Hornsby
NL
STL
154
1924
12.8
0.0
12.8
16
Walter Johnson
AL
WSH
157
1914
0.6
12.1
12.8
17
Lou Gehrig
AL
NYY
155
1927
12.7
0.0
12.7
18
Walter Johnson
AL
WSH
155
1915
0.6
12.1
12.7
19
Grover Alexander
NL
PHI
154
1916
1.1
11.5
12.6
20
Hal Newhouser
AL
DET
155
1945
0.8
11.8
12.6
21
Eddie Cicotte
AL
CHW
156
1917
0.2
12.4
12.5
22
Babe Ruth
AL
NYY
153
1924
12.5
0.0
12.5
23
Russ Ford
AL
NYY
156
1910
0.6
11.8
12.4
24
Greg Maddux
NL
ATL
114
1994
0.4
12.1
12.4
25
Christy Mathewson
NL
NYG
142
1903
0.5
11.9
12.4
26
Barry Bonds
NL
SFG
162
2001
12.2
0.0
12.2
27
Barry Bonds
NL
SFG
162
2002
12.2
0.0
12.2
28
Walter Johnson
AL
WSH
142
1919
-0.1
12.3
12.2
29
Christy Mathewson
NL
NYG
157
1908
0.0
12.1
12.2
30
Honus Wagner
NL
PIT
155
1908
12.2
0.0
12.2
31
Roger Clemens
AL
TOR
162
1997
0.1
11.9
12.1
32
Babe Ruth
AL
NYY
155
1926
12.1
0.0
12.1
33
Ed Walsh
AL
CHW
156
1910
0.6
11.6
12.1
34
Ty Cobb
AL
DET
154
1917
12.0
0.0
12.0
35
Stan Coveleski
AL
CLE
129
1918
-0.4
12.4
12.0
36
Mickey Mantle
AL
NYY
154
1956
12.0
0.0
12.0
37
Cy Young
AL
BOS
138
1902
0.1
11.8
12.0
38
Jack Chesbro
AL
NYY
155
1904
0.8
11.1
11.9
39
Mickey Mantle
AL
NYY
154
1957
11.9
0.0
11.9
40
Bob Gibson
NL
STL
162
1968
0.6
11.2
11.8
41
Babe Ruth
AL
BOS
138
1919
10.9
0.9
11.8
42
Smoky Joe Wood
AL
BOS
154
1912
1.1
10.7
11.8
43
Carl Yastrzemski
AL
BOS
162
1967
11.8
0.0
11.8
44
Red Faber
AL
CHW
154
1921
-0.3
12.0
11.7
45
Fergie Jenkins
NL
CHC
162
1971
1.6
10.1
11.7
46
Pedro Martinez
AL
BOS
162
2000
0.0
11.7
11.7
47
Mike Schmidt
NL
PHI
107
1981
11.7
0.0
11.7
48
Ted Williams
AL
BOS
152
1942
11.7
0.0
11.7
49
Grover Alexander
NL
PHI
153
1915
-0.1
11.7
11.6
50
Stan Musial
NL
STL
155
1948
11.6
0.0
11.6
51
Rube Waddell
AL
PHA
137
1902
0.8
10.8
11.6
52
Rogers Hornsby
NL
STL
154
1921
11.5
0.0
11.5
53
Ted Williams
AL
BOS
156
1946
11.5
0.0
11.5
54
Ty Cobb
AL
DET
154
1911
11.4
0.0
11.4
55
Jimmie Foxx
AL
PHA
154
1932
11.4
0.0
11.4
56
Dolf Luque
NL
CIN
154
1923
0.2
11.3
11.4
57
Gaylord Perry
AL
CLE
156
1972
0.2
11.2
11.4
58
Jeff Bagwell
NL
HOU
115
1994
11.3
0.0
11.3
59
Barry Bonds
NL
SFG
162
2004
11.3
0.0
11.3
60
Eddie Cicotte
AL
CHW
140
1919
0.1
11.2
11.3
61
Bob Gibson
NL
STL
162
1969
0.9
10.4
11.3
62
Wes Ferrell
AL
BOS
154
1935
2.6
8.6
11.2
63
Lefty Grove
AL
BOS
155
1936
-0.5
11.7
11.2
64
Rogers Hornsby
NL
CHC
156
1929
11.2
0.0
11.2
65
Walter Johnson
AL
WSH
159
1916
1.0
10.2
11.2
66
Babe Ruth
AL
NYY
154
1930
10.9
0.2
11.2
67
Dizzy Trout
AL
DET
156
1944
1.6
9.6
11.2
68
Ed Walsh
AL
CHW
156
1908
0.5
10.7
11.2
69
Ted Williams
AL
BOS
155
1941
11.2
0.0
11.2
70
Rogers Hornsby
NL
STL
153
1925
11.1
0.0
11.1
71
Cal Ripken Jr.
AL
BAL
162
1991
11.1
0.0
11.1
72
Babe Ruth
AL
NYY
155
1931
11.1
0.0
11.1
73
Tom Seaver
NL
NYM
161
1973
0.4
10.7
11.1
74
Lou Boudreau
AL
CLE
156
1948
11.0
0.0
11.0
75
Walter Johnson
AL
WSH
157
1910
0.2
10.8
11.0
76
Christy Mathewson
NL
NYG
155
1905
1.0
10.1
11.0
77
Joe Morgan
NL
CIN
162
1975
11.0
0.0
11.0
78
Jack Taylor
NL
CHC
143
1902
0.4
10.6
11.0
79
Rube Waddell
AL
PHA
155
1904
-0.8
11.8
11.0
80
Honus Wagner
NL
PIT
155
1905
11.0
0.0
11.0
81
Wilbur Wood
AL
CHW
162
1971
-0.8
11.8
11.0
82
Ty Cobb
AL
DET
155
1910
10.9
0.0
10.9
83
Lou Gehrig
AL
NYY
154
1934
10.9
0.0
10.9
84
Lefty Grove
AL
PHA
154
1930
0.0
10.9
10.9
85
Aaron Judge
AL
NYY
143
2024
10.9
0.0
10.9
86
Greg Maddux
NL
ATL
144
1995
-0.1
10.9
10.9
87
Willie Mays
NL
NYG
154
1954
10.9
0.0
10.9
88
Willie Mays
NL
SFG
163
1965
10.9
0.0
10.9
89
Babe Ruth
AL
NYY
154
1928
10.9
0.0
10.9
90
Tris Speaker
AL
BOS
154
1912
10.9
0.0
10.9
91
Wilbur Wood
AL
CHW
154
1972
-0.4
11.3
10.9
92
Rogers Hornsby
NL
STL
154
1922
10.8
0.0
10.8
93
Rogers Hornsby
NL
NYG
155
1927
10.8
0.0
10.8
94
Aaron Judge
AL
NYY
162
2022
10.8
0.0
10.8
95
Christy Mathewson
NL
NYG
158
1909
0.9
9.9
10.8
96
Willie Mays
NL
SFG
162
1964
10.8
0.0
10.8
97
Tom Seaver
NL
NYM
162
1971
0.6
10.2
10.8
98
Dazzy Vance
NL
BRO
154
1924
-0.2
11.0
10.8
99
Lefty Grove
AL
PHA
153
1931
-0.3
11.0
10.7
100
Dazzy Vance
NL
BRO
155
1928
0.1
10.6
10.7
Show the code
kable(seasons_with_2war)
rank_twar
player_name
lg_ID
team_ID
sched
year_ID
bwar162
br_pwar162
twar162
14
Bullet Rogan
NNL
KCM
101
1921
4.4
8.5
12.9
62
Wes Ferrell
AL
BOS
154
1935
2.6
8.6
11.2
144
Shohei Ohtani
AL
LAA
162
2023
6.3
3.9
10.1
158
Lewis Hampton
NNL
ABC
101
1922
3.3
6.7
10.0
199
Shohei Ohtani
AL
LAA
162
2022
3.5
6.2
9.7
276
Babe Ruth
AL
BOS
126
1918
6.3
3.0
9.3
329
Shohei Ohtani
AL
LAA
162
2021
4.9
4.1
9.0
437
Wes Ferrell
AL
CLE
155
1931
2.0
6.5
8.5
2740
Don Drysdale
NL
LAD
162
1965
2.3
3.2
5.6
2974
Don Newcombe
NL
BRO
154
1955
2.4
3.1
5.5
2 Best Seasons Since MLB Integration, by WAR per 162 team games
Batting and Pitching WAR per 162 team games by Season (only including seasons with >100 scheduled games)
Show the code
ggplot(data_post_integration,aes(x = bwar162,y = br_pwar162)) +geom_point(aes(size = twar162, fill = twar162), alpha =0.7, shape =21, stroke = .5) +scale_size(range =c(1, 10)) +geom_text_repel(aes(label = label), size =5, color ="black", na.rm =TRUE,max.overlaps =5) +scale_fill_gradient2(low ="blue", mid ="white", high ="red",midpoint =median(data_post_integration$twar162, na.rm =TRUE)) +labs(title ="WAR By Season (Post-Integrated MLB), pro-rated to 162 team games",subtitle ="Batting and Pitching WAR per 162 team games by Season (only including seasons with >100 scheduled games.\nnickwarino.com, created 2023-08-08",caption ="sources: Neil Pain's MLB historical WAR data",x ="Batting WAR",y ="Pitching WAR",fill ="Total WAR") +theme(legend.position =c(.9, .9)) +My_Theme_WithY()
Show the code
kable(top_100_post_integration)
rank_twar
player_name
lg_ID
team_ID
sched
year_ID
bwar162
br_pwar162
twar162
10
Dwight Gooden
NL
NYM
162
1985
0.9
12.2
13.1
12
Steve Carlton
NL
PHI
156
1972
0.5
12.5
13.0
20
Hal Newhouser
AL
DET
155
1945
0.8
11.8
12.6
24
Greg Maddux
NL
ATL
114
1994
0.4
12.1
12.4
26
Barry Bonds
NL
SFG
162
2001
12.2
0.0
12.2
27
Barry Bonds
NL
SFG
162
2002
12.2
0.0
12.2
31
Roger Clemens
AL
TOR
162
1997
0.1
11.9
12.1
36
Mickey Mantle
AL
NYY
154
1956
12.0
0.0
12.0
39
Mickey Mantle
AL
NYY
154
1957
11.9
0.0
11.9
40
Bob Gibson
NL
STL
162
1968
0.6
11.2
11.8
43
Carl Yastrzemski
AL
BOS
162
1967
11.8
0.0
11.8
45
Fergie Jenkins
NL
CHC
162
1971
1.6
10.1
11.7
46
Pedro Martinez
AL
BOS
162
2000
0.0
11.7
11.7
47
Mike Schmidt
NL
PHI
107
1981
11.7
0.0
11.7
50
Stan Musial
NL
STL
155
1948
11.6
0.0
11.6
53
Ted Williams
AL
BOS
156
1946
11.5
0.0
11.5
57
Gaylord Perry
AL
CLE
156
1972
0.2
11.2
11.4
58
Jeff Bagwell
NL
HOU
115
1994
11.3
0.0
11.3
59
Barry Bonds
NL
SFG
162
2004
11.3
0.0
11.3
61
Bob Gibson
NL
STL
162
1969
0.9
10.4
11.3
71
Cal Ripken Jr.
AL
BAL
162
1991
11.1
0.0
11.1
73
Tom Seaver
NL
NYM
161
1973
0.4
10.7
11.1
74
Lou Boudreau
AL
CLE
156
1948
11.0
0.0
11.0
77
Joe Morgan
NL
CIN
162
1975
11.0
0.0
11.0
81
Wilbur Wood
AL
CHW
162
1971
-0.8
11.8
11.0
85
Aaron Judge
AL
NYY
143
2024
10.9
0.0
10.9
86
Greg Maddux
NL
ATL
144
1995
-0.1
10.9
10.9
87
Willie Mays
NL
NYG
154
1954
10.9
0.0
10.9
88
Willie Mays
NL
SFG
163
1965
10.9
0.0
10.9
91
Wilbur Wood
AL
CHW
154
1972
-0.4
11.3
10.9
94
Aaron Judge
AL
NYY
162
2022
10.8
0.0
10.8
96
Willie Mays
NL
SFG
162
1964
10.8
0.0
10.8
97
Tom Seaver
NL
NYM
162
1971
0.6
10.2
10.8
101
Andre Dawson
NL
MON
108
1981
10.6
0.0
10.6
103
Randy Johnson
NL
ARI
162
2002
-0.2
10.7
10.5
104
Willie Mays
NL
SFG
154
1958
10.5
0.0
10.5
105
Bobby Witt Jr.
AL
KCR
144
2024
10.5
0.0
10.5
106
Ernie Banks
NL
CHC
155
1959
10.4
0.0
10.4
107
Mookie Betts
AL
BOS
162
2018
10.4
0.0
10.4
108
Roger Clemens
AL
BOS
162
1990
0.0
10.4
10.4
112
Zack Greinke
AL
KCR
162
2009
0.0
10.4
10.4
114
Phil Niekro
NL
ATL
162
1978
0.5
10.0
10.4
118
Mickey Mantle
AL
NYY
163
1961
10.3
0.0
10.3
119
Juan Marichal
NL
SFG
163
1965
0.1
10.2
10.3
120
Willie Mays
NL
SFG
165
1962
10.3
0.0
10.3
121
Robin Roberts
NL
PHI
156
1953
0.1
10.2
10.3
123
Mike Trout
AL
LAA
162
2012
10.3
0.0
10.3
124
Barry Bonds
NL
SFG
162
1993
10.2
0.0
10.2
126
Bob Gibson
NL
STL
162
1970
1.3
8.9
10.2
129
Mickey Mantle
AL
NYY
154
1955
10.2
0.0
10.2
130
Willie Mays
NL
SFG
162
1963
10.2
0.0
10.2
131
Jose Rijo
NL
CIN
162
1993
1.0
9.2
10.2
133
Ted Williams
AL
BOS
157
1947
10.2
0.0
10.2
134
Ted Williams
AL
BOS
154
1957
10.2
0.0
10.2
137
Steve Carlton
NL
PHI
162
1980
-0.1
10.2
10.1
141
Rickey Henderson
AL
OAK
162
1990
10.1
0.0
10.1
144
Shohei Ohtani
AL
LAA
162
2023
6.3
3.9
10.1
145
Al Rosen
AL
CLE
155
1953
10.1
0.0
10.1
146
Sammy Sosa
NL
CHC
162
2001
10.1
0.0
10.1
149
Robin Yount
AL
MIL
163
1982
10.1
0.0
10.1
154
Dwight Evans
AL
BOS
108
1981
10.0
0.0
10.0
155
Bob Feller
AL
CLE
156
1946
-0.4
10.4
10.0
160
Hal Newhouser
AL
DET
155
1946
-0.2
10.1
10.0
162
Jacob deGrom
NL
NYM
162
2018
0.4
9.5
9.9
164
Dick Ellsworth
NL
CHC
162
1963
-0.3
10.2
9.9
166
Ken Griffey Jr.
AL
SEA
112
1994
9.9
0.0
9.9
167
Rickey Henderson
AL
OAK
109
1981
9.9
0.0
9.9
168
Rickey Henderson
AL
NYY
161
1985
9.9
0.0
9.9
169
Sandy Koufax
NL
LAD
163
1963
-0.7
10.6
9.9
170
Kenny Lofton
AL
CLE
113
1994
9.9
0.0
9.9
171
Juan Marichal
NL
SFG
161
1966
0.7
9.1
9.9
173
Cal Ripken Jr.
AL
BAL
162
1984
9.9
0.0
9.9
174
Alex Rodriguez
AL
SEA
162
2000
9.9
0.0
9.9
175
Warren Spahn
NL
BSN
154
1947
-0.1
10.0
9.9
178
Ted Williams
AL
BOS
155
1949
9.9
0.0
9.9
179
Carl Yastrzemski
AL
BOS
162
1968
9.9
0.0
9.9
182
Barry Bonds
NL
PIT
162
1990
9.8
0.0
9.8
184
Ken Griffey Jr.
AL
SEA
161
1996
9.8
0.0
9.8
188
Pedro Martinez
AL
BOS
162
1999
0.0
9.8
9.8
189
Rico Petrocelli
AL
BOS
162
1969
9.8
0.0
9.8
190
Jackie Robinson
NL
BRO
156
1949
9.8
0.0
9.8
194
Bert Blyleven
AL
MIN
162
1973
0.0
9.7
9.7
195
Barry Bonds
NL
SFG
161
2003
9.7
0.0
9.7
197
David Cone
AL
KCR
115
1994
0.0
9.7
9.7
199
Shohei Ohtani
AL
LAA
162
2022
3.5
6.2
9.7
201
Bret Saberhagen
AL
KCR
162
1989
0.0
9.7
9.7
202
Bobby Shantz
AL
PHA
155
1952
0.4
9.2
9.7
203
Mike Trout
AL
LAA
162
2018
9.7
0.0
9.7
205
Henry Aaron
NL
MLN
155
1961
9.6
0.0
9.6
206
Adrian Beltre
NL
LAD
162
2004
9.6
0.0
9.6
207
Norm Cash
AL
DET
163
1961
9.6
0.0
9.6
209
Mark Fidrych
AL
DET
161
1976
0.0
9.6
9.6
213
Randy Johnson
AL
SEA
145
1995
0.0
9.6
9.6
214
Randy Johnson
NL
ARI
162
2001
-0.5
10.1
9.6
215
Sandy Koufax
NL
LAD
162
1966
-0.6
10.3
9.6
218
Willie Mays
NL
NYG
154
1955
9.6
0.0
9.6
219
Joe Morgan
NL
CIN
162
1976
9.6
0.0
9.6
220
Rick Reuschel
NL
CHC
162
1977
0.2
9.4
9.6
221
Jackie Robinson
NL
BRO
158
1951
9.6
0.0
9.6
222
Ron Santo
NL
CHC
162
1967
9.6
0.0
9.6
Show the code
kable(seasons_with_1war_post_integration)
rank_twar
player_name
lg_ID
team_ID
sched
year_ID
bwar162
br_pwar162
twar162
144
Shohei Ohtani
AL
LAA
162
2023
6.3
3.9
10.1
199
Shohei Ohtani
AL
LAA
162
2022
3.5
6.2
9.7
329
Shohei Ohtani
AL
LAA
162
2021
4.9
4.1
9.0
2740
Don Drysdale
NL
LAD
162
1965
2.3
3.2
5.6
2974
Don Newcombe
NL
BRO
154
1955
2.4
3.1
5.5
3 2024 Hit Data
Show the code
# Load CSV from raw datahit_data <-read.csv(here("raw_data", "exit_velocity.csv"), stringsAsFactors =FALSE)hit_data2 <-read.csv(here("raw_data", "expected_stats.csv"), stringsAsFactors =FALSE)# Join by player_idhit_data <- hit_data |>left_join(hit_data2, by ="player_id")# Rename column 1 to Playercolnames(hit_data)[1] <-"player_name"hit_data_above_30_attempts <- hit_data |>filter(attempts >30)# Plot max_hit_speed and avg_hit_speed by playerggplot(hit_data_above_30_attempts, aes(x = max_hit_speed, y = avg_hit_speed)) +geom_point(aes(size = attempts, fill = max_distance), alpha =0.7, shape =21, stroke = .5) +geom_text_repel(aes(label = player_name), size =5, color ="black", na.rm =TRUE,max.overlaps =5) +scale_size(range =c(1, 15)) +scale_fill_gradient2(low ="blue", mid ="white", high ="red",midpoint =median(hit_data_above_30_attempts$max_distance, na.rm =TRUE)) +labs(title ="No one crushes the ball like Ohtani",subtitle ="Max Hit Speed, Average Hit Speed, Max Distance, and Average Distance by Player\nnickwarino.com, created 2024-04-25",caption ="sources: Statcast",x ="Max Hit Speed",y ="Average Hit Speed",fill ="Max Distance",size ="Attempts") +theme(legend.position =c(.9, .9)) +My_Theme_WithY()
Show the code
# Plot max_hit_speed and avg_hit_speed by playerggplot(hit_data_above_30_attempts, aes(x = max_hit_speed, y = max_distance)) +geom_point(aes(size = attempts, fill = avg_hit_speed), alpha =0.7, shape =21, stroke = .5) +geom_text_repel(aes(label = player_name), size =5, color ="black", na.rm =TRUE,max.overlaps =5) +scale_size(range =c(1, 15)) +scale_fill_gradient2(low ="blue", mid ="white", high ="red",midpoint =median(hit_data_above_30_attempts$avg_hit_speed, na.rm =TRUE)) +labs(title ="No one crushes the ball like Ohtani",subtitle ="Max Hit Speed, Average Hit Speed, Max Distance, and Average Distance by Player\nnickwarino.com, created 2024-04-25",caption ="sources: Statcast",x ="Max Hit Speed",y ="Max Distance",fill ="Average Hit Speed",size ="Attempts") +theme(legend.position =c(.9, .9)) +My_Theme_WithY()
Show the code
# Plot max_hit_speed and avg_hit_speed by playerggplot(hit_data_above_30_attempts, aes(x = max_hit_speed, y = max_distance)) +geom_point(aes(size = attempts, fill = est_woba), alpha =0.7, shape =21, stroke = .5) +geom_text_repel(aes(label = player_name), size =5, color ="black", na.rm =TRUE,max.overlaps =5) +scale_size(range =c(1, 15)) +scale_fill_gradient2(low ="blue", mid ="white", high ="red",midpoint =median(hit_data_above_30_attempts$est_woba, na.rm =TRUE)) +labs(title ="No one crushes the ball like Ohtani",subtitle ="Max Hit Speed, Max Distance, and Expected wOBA by Player\nnickwarino.com, created 2024-04-25",caption ="sources: Statcast",x ="Max Hit Speed",y ="Max Distance",fill ="Expected wOBA",size ="Attempts") +theme(legend.position =c(.9, .9)) +My_Theme_WithY()
# Calculate new column that is total league payroll grouped by Year, and then another new column that is the percentage of the league payroll that each team is spendingteam_salaries <- team_salaries |>group_by(Year) |>mutate(league_payroll =sum(Payroll), pct_of_league_payroll =round((Payroll / league_payroll)*100, 2))
Show the code
library(tidyverse)# Assuming team_salaries is your datasethighlighted_teams <-c("Yankees", "Dodgers", "Red Sox", "Giants") # Define highlighted teamsteam_salaries %>%ggplot(aes(x = Year, y = pct_of_league_payroll, group = TeamName,color =ifelse(TeamName %in% highlighted_teams, TeamName, "Other"))) +geom_line(aes(size =ifelse(TeamName %in% highlighted_teams, 2, 0.5))) +# Adjust this line for line sizesscale_size_identity() +# Ensure that sizes are used as isscale_y_continuous(limits =c(0, 10)) +scale_color_manual(values =c("Yankees"="black","Dodgers"="blue","Red Sox"="red","Giants"="orange","Other"="lightgrey"# Light grey for all other teams )) +labs(title ="Fuck the Yankees: Team Payroll as a Percentage of League Payroll (1985-2024)",x ="Year",y ="Percentage of League Payroll",caption ="Source: Baseball-Reference, Lahman package. Created by Nick Warino on 2024-05-02") +guides(color =guide_legend(title ="Team Name"), size =FALSE) +# Disable size legendMy_Theme_WithY() +# Assuming this is your custom theme functiontheme(legend.position ="top") # Move legend to top