Oakland Athletics

library(dplyr)
library(Lahman)
library(knitr)
library(kableExtra)
library(ggplot2)
library(scales)
library(stringr)
library(ggthemes)

green <- "#003831"
yellow <- "#F0B21E"
red <- "#A31621"
bleu <- "#3772FF"
violet <- "#0B0033"
white <- "#ECF8F8"
mycols <- c(yellow,green,red,bleu,violet,white)

Presentation

yearID Rank
1968 6
1969 2
1970 2
1971 1
1972 1
1973 1
1974 1
1975 1
1976 2
1977 7
1978 6
1979 7
1980 2
1981 1
1982 5
1983 4
1984 4
1985 4
1986 3
1987 3
1988 1
1989 1
1990 1
1991 4
1992 1
1993 7
1994 2
1995 4
1996 3
1997 4
1998 4
1999 2
2000 1
2001 2
2002 1
2003 1
2004 2
2005 2
2006 1
2007 3
2008 3
2009 4
2010 2
2011 3
2012 1
2013 1
2014 2
2015 5
2016 5
2017 5
2018 2
2019 2
2020 1
ggplot(Teams_table, aes(x = yearID, y = Rank)) +
  geom_line(colour = green) +
  geom_point(aes(colour = Rank > min(Rank,na.rm = T))) +
  scale_color_manual(values=mycols) +
  scale_y_reverse()+
  geom_hline(yintercept = mean(Teams_table$Rank,na.rm = T), colour = yellow)+
  theme(text=element_text(size=12),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        panel.background = element_blank(),
        legend.position = "none",
        axis.line = element_line(colour = green))+
    labs(title="Oakland Athletics",
       subtitle=paste0("Ranking: ",min(Teams_table$yearID)," - ",max(Teams_table$yearID)),
       caption="Data from Lahman",
       y="Ranking", x="Year")

League

franchID franchName
ANA Los Angeles Angels of Anaheim
ARI Arizona Diamondbacks
ATL Atlanta Braves
BAL Baltimore Orioles
BOS Boston Red Sox
CHC Chicago Cubs
CHW Chicago White Sox
CIN Cincinnati Reds
CLE Cleveland Indians
COL Colorado Rockies
DET Detroit Tigers
FLA Florida Marlins
HOU Houston Astros
KCR Kansas City Royals
LAD Los Angeles Dodgers
MIL Milwaukee Brewers
MIN Minnesota Twins
NYM New York Mets
NYY New York Yankees
OAK Oakland Athletics
PHI Philadelphia Phillies
PIT Pittsburgh Pirates
SDP San Diego Padres
SEA Seattle Mariners
SFG San Francisco Giants
STL St. Louis Cardinals
TBD Tampa Bay Rays
TEX Texas Rangers
TOR Toronto Blue Jays
WSN Washington Nationals

Oakland Athletics

Fielding 2001 vs 2002

Comparaison of the players that played in 2001 vs 2002

Top players 2001 (number of games played)

nameLast nameFirst playerID yearID stint teamID lgID POS G GS InnOuts PO A E DP
Long Terrence longte01 2001 1 OAK AL OF 162 158 4276 332 5 7 3
Tejada Miguel tejadmi01 2001 1 OAK AL SS 162 160 4294 256 473 20 93
Damon Johnny damonjo01 2001 1 OAK AL OF 154 152 4051 345 4 3 1
Chavez Eric chaveer01 2001 1 OAK AL 3B 149 146 3902 100 321 12 27
Giambi Jason giambja01 2001 1 OAK AL 1B 136 136 3529 1224 76 11 107
Menechio Frank menecfr01 2001 1 OAK AL 2B 136 127 3482 253 406 15 90

6 Top players from 2001 that did not play in 2002

nameLast nameFirst playerID yearID stint teamID lgID POS G GS InnOuts PO A E DP
Damon Johnny damonjo01 2001 1 OAK AL OF 154 152 4051 345 4 3 1
Giambi Jason giambja01 2001 1 OAK AL 1B 136 136 3529 1224 76 11 107
Isringhausen Jason isrinja01 2001 1 OAK AL P 65 0 214 1 8 2 0
Guthrie Mark guthrma01 2001 1 OAK AL P 54 0 157 0 4 0 0
Vizcaino Luis vizcalu01 2001 1 OAK AL P 36 0 110 0 3 0 0
Heredia Gil heredgi01 2001 1 OAK AL P 24 18 329 16 17 2 0

https://www.nytimes.com/2001/12/05/sports/baseball-giambi-to-join-yanks-barring-bid-by-the-a-s.html

Run differential

The run differential break down

Pythagorean Theorem of Baseball

W: Wins

L: Losses

R: Runs scored

RA: Opponents runs scored (Runs Allowed)



\(\frac{R^2}{R^2+RA^2}= Est.W\)%

teamID G W L R RA W-L W% R-RA Est.W% Revised Est.W%
ANA 162 75 87 691 730 -12 46.3% -39 47.3% 47.5%
ARI 162 92 70 818 677 22 56.8% 141 59.3% 58.5%
ATL 162 88 74 729 643 14 54.3% 86 56.2% 55.7%
BAL 162 63 98 687 829 -35 38.9% -142 40.7% 41.6%
BOS 161 82 79 772 745 3 50.9% 27 51.8% 51.6%
CHA 162 83 79 798 795 4 51.2% 3 50.2% 50.2%
CHN 162 88 74 777 701 14 54.3% 76 55.1% 54.6%
CIN 162 66 96 735 850 -30 40.7% -115 42.8% 43.5%
CLE 162 91 71 897 821 20 56.2% 76 54.4% 54.0%
COL 162 73 89 923 906 -16 45.1% 17 50.9% 50.8%
DET 162 66 96 724 876 -30 40.7% -152 40.6% 41.5%
FLO 162 76 86 742 744 -10 46.9% -2 49.9% 49.9%
HOU 162 93 69 847 769 24 57.4% 78 54.8% 54.4%
KCA 162 65 97 729 858 -32 40.1% -129 41.9% 42.7%
LAN 162 86 76 758 744 10 53.1% 14 50.9% 50.8%
MIL 162 68 94 740 806 -26 42.0% -66 45.7% 46.1%
MIN 162 85 77 771 766 8 52.5% 5 50.3% 50.3%
MON 162 68 94 670 812 -26 42.0% -142 40.5% 41.4%
NYA 161 95 65 804 713 30 59.0% 91 56.0% 55.4%
NYN 162 82 80 642 713 2 50.6% -71 44.8% 45.3%
OAK 162 102 60 884 645 42 63.0% 239 65.3% 63.9%
PHI 162 86 76 746 719 10 53.1% 27 51.8% 51.7%
PIT 162 62 100 657 858 -38 38.3% -201 37.0% 38.2%
SDN 162 79 83 789 812 -4 48.8% -23 48.6% 48.7%
SEA 162 116 46 927 627 70 71.6% 300 68.6% 67.0%
SFN 162 90 72 799 748 18 55.6% 51 53.3% 53.0%
SLN 162 93 69 814 684 24 57.4% 130 58.6% 57.8%
TBA 162 62 100 672 887 -38 38.3% -215 36.5% 37.7%
TEX 162 73 89 890 968 -16 45.1% -78 45.8% 46.2%
TOR 162 80 82 767 753 -2 49.4% 14 50.9% 50.8%

Runs created

Thanks to Ibrahim

Basic runs created

In the most basic runs created formula:

\(RC = \frac{(H + BB) * TB}{AB + BB}\)

where H is hits, BB is base on balls, TB is total bases and AB is at-bats.

“Stolen base” version of runs created

This formula expands on the basic formula by accounting for a player’s basestealing ability.

\(RC = \frac{(H + BB - CS) * (TB + (0.55 * SB))}{AB + BB}\)

where H is hits, BB is base on balls, CS is caught stealing, TB is total bases, SB is stolen bases, and AB is at bats.

“Technical” version of runs created

This formula accounts for all basic, easily available offensive statistics.

\(RC = \frac{(H + BB - CS + HBP - GIDP) * (TB + (0.26 * (BB - IBB + HDP))+(0.52 * (SH + SF + SB)))}{AB + BB + HBP + SH + SF}\)

where H is hits, BB is base on balls, CS is caught stealing, HBP is hit by pitch, GIDP is grounded into double play, TB is total bases, IBB is intentional base on balls, SH is sacrifice hit, SF is sacrifice fly, SB is stolen base, and AB is at bats.

Batting <- Lahman::Batting
People <- Lahman::People
Batting <- merge(x=Batting,y=People,by="playerID")
Batting <- Batting %>%
  filter(yearID==2001 & teamID=="OAK") %>%
  mutate(TB = H + X2B + 2 * X3B + 3 * HR) %>% 
  select("nameLast","nameFirst","yearID","AB","H","BB","CS","SB","HBP","GIDP","IBB","SH","SF","TB") %>%
  mutate(RC1 = ((H + BB) * TB)/(AB + BB)) %>% 
  mutate(RC2 = ((H + BB - CS) * (TB + (0.55 * SB)))/(AB + BB)) %>% 
  mutate(RC3 = ((H + BB - CS + HBP - GIDP) * (TB + (0.26 * (BB - IBB + HBP))+(0.52 * (SH + SF + SB))))/(AB + BB + HBP + SH + SF))

Batting %>% 
  kable(escape = F) %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = T, position="float_left")
nameLast nameFirst yearID AB H BB CS SB HBP GIDP IBB SH SF TB RC1 RC2 RC3
Abad Andy 2001 1 0 0 0 0 0 0 0 0 0 0 0.0000000 0.0000000 0.0000000
Bellhorn Mark 2001 74 10 7 0 0 0 1 0 1 0 18 3.7777778 3.7777778 3.9687805
Bradford Chad 2001 0 0 0 0 0 0 0 0 0 0 0 NaN NaN NaN
Byrnes Eric 2001 38 9 4 0 1 1 0 0 0 0 19 5.8809524 6.0511905 6.7786047
Chavez Eric 2001 552 159 41 2 8 4 7 9 0 7 298 100.5059022 100.9699831 101.7486755
Christenson Ryan 2001 4 0 0 0 0 0 0 0 0 0 0 0.0000000 0.0000000 0.0000000
Damon Johnny 2001 644 165 61 12 27 5 7 1 5 4 234 75.0127660 75.5374468 79.4985257
Dye Jermaine 2001 232 69 27 0 2 1 6 3 0 5 127 47.0733591 47.4810811 47.0933585
Fasano Sal 2001 21 1 1 0 0 1 1 0 0 0 1 0.0909091 0.0909091 0.1321739
Fyhrie Mike 2001 0 0 0 0 0 0 0 0 0 0 0 NaN NaN NaN
Gant Ron 2001 81 21 11 0 2 0 0 0 0 1 34 11.8260870 12.2086957 13.2197849
Giambi Jason 2001 520 178 129 0 2 13 17 24 0 9 343 162.2511556 162.7714946 171.3236960
Giambi Jeremy 2001 371 105 63 1 0 4 13 1 3 2 167 64.6451613 64.2603687 66.6096614
Guthrie Mark 2001 0 0 0 0 0 0 0 0 0 0 0 NaN NaN NaN
Harville Chad 2001 0 0 0 0 0 0 0 0 0 0 0 NaN NaN NaN
Heredia Gil 2001 3 1 0 0 0 0 0 0 0 0 1 0.3333333 0.3333333 0.3333333
Hernandez Ramon 2001 453 115 37 1 1 6 10 3 9 4 185 57.3877551 57.1796939 58.5343026
Hiljus Erik 2001 0 0 0 0 0 0 0 0 0 0 0 NaN NaN NaN
Hudson Tim 2001 8 0 0 0 0 0 0 0 0 0 0 0.0000000 0.0000000 0.0000000
Isringhausen Jason 2001 0 0 0 0 0 0 0 0 0 0 0 NaN NaN NaN
Jaha John 2001 45 4 6 0 0 0 3 0 0 1 7 1.3725490 1.3725490 1.2223077
Jennings Robin 2001 52 13 2 0 0 0 0 0 0 1 16 4.4444444 4.4444444 4.6472727
Lidle Cory 2001 2 0 0 0 0 0 0 0 0 0 0 0.0000000 0.0000000 0.0000000
Long Terrence 2001 629 178 52 3 9 0 17 8 0 6 259 87.4743025 87.9833333 85.0515284
Magnante Mike 2001 0 0 0 0 0 0 0 0 0 0 0 NaN NaN NaN
Mathews T. J. 2001 0 0 0 0 0 0 0 0 0 0 0 NaN NaN NaN
McMillon Billy 2001 58 17 5 0 1 1 0 0 0 1 26 9.0793651 9.2714286 10.1200000
Mecir Jim 2001 0 0 0 0 0 0 0 0 0 0 0 NaN NaN NaN
Menechino Frank 2001 471 114 79 3 2 19 13 0 3 6 176 61.7600000 61.1800000 70.2615917
Mulder Mark 2001 5 1 0 0 0 0 0 0 0 0 1 0.2000000 0.2000000 0.2000000
Myers Greg 2001 87 16 13 0 0 0 2 1 0 0 38 11.0200000 11.0200000 11.1024000
Ortiz Jose 2001 42 7 3 0 1 0 4 0 0 1 7 1.5555556 1.6777778 1.1504348
Piatt Adam 2001 95 20 13 0 0 0 5 0 1 2 27 8.2500000 8.2500000 8.0569369
Ryan Rob 2001 7 0 0 0 0 0 0 0 0 0 0 0.0000000 0.0000000 0.0000000
Saenz Olmedo 2001 305 67 19 1 0 13 9 1 1 3 117 31.0555556 30.6944444 33.1831672
Santangelo F. P. 2001 71 14 11 1 1 5 1 0 1 1 18 5.4878049 5.4292683 7.4624719
Tam Jeff 2001 0 0 0 0 0 0 0 0 0 0 0 NaN NaN NaN
Tejada Miguel 2001 622 166 43 5 11 13 14 5 1 4 296 93.0285714 92.6589474 94.3905417
Valdez Mario 2001 54 15 12 0 0 1 0 1 0 0 19 7.7727273 7.7727273 9.2441791
Vizcaino Luis 2001 0 0 0 0 0 0 0 0 0 0 0 NaN NaN NaN
Wilson Tom 2001 21 4 1 0 0 1 1 0 0 1 10 2.2727273 2.2727273 2.3000000
Zito Barry 2001 5 0 0 0 0 0 0 0 0 0 0 0.0000000 0.0000000 0.0000000

Free Part

Batting2 <- Lahman::Batting %>%
  group_by(yearID) %>%
  summarise(G = sum(G), AB = sum(AB), R = sum(R), H = sum(H)) %>%
  mutate(batting_average = H/AB*100)

#H = Hits: times reached base because of a batted, fair ball without error by the defense
#AB = At Bats

Batting2_graph <- ggplot(Batting2, aes(x = yearID, y = batting_average)) +
  geom_smooth(col = red, method = lm, formula = y ~ splines::bs(x, 5), se = FALSE)+
  geom_point(color = green) +
  guides(fill=FALSE)+
  theme(text=element_text(size=12),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        panel.background = element_blank(),
        legend.position = "none",
        axis.line = element_line(colour = green))+
  labs(title="Number of successful hits",
       subtitle = "Over time",
       y="Percentage", x="Year")
Batting2_graph

People <- Lahman::People
AwardsPlayers <- Lahman::AwardsSharePlayers

AwardsPlayers <- AwardsPlayers %>% filter(yearID == 2001)

AwardsPlayers <- AwardsPlayers %>%
  select(playerID,pointsWon,pointsMax) %>%
  group_by(playerID) %>%
  summarise(pointsWon=mean(pointsWon),pointsMax=mean(pointsMax))

AwardsPlayers <- merge(x=AwardsPlayers,y=People,by="playerID") 

AwardsPlayers <- AwardsPlayers %>%
  select(nameGiven,pointsWon,pointsMax)

AwardsPlayers$pointsPerc <- round(AwardsPlayers$pointsWon/AwardsPlayers$pointsMax*100,1)

AwardsPlayers_graph <- ggplot(AwardsPlayers, aes(x = pointsWon, y = pointsPerc))+
  xlim(0,500)+
  geom_vline(xintercept = mean(AwardsPlayers$pointsWon,na.rm = T),
            col = "grey")+
  geom_point(aes(colour = pointsWon > mean(pointsWon,na.rm = T)))+
  geom_smooth(col = yellow, method = lm, formula = y ~ splines::bs(x, 3), se = FALSE)+
  geom_text(aes(label = nameGiven,colour = pointsWon > mean(pointsWon,na.rm = T)),
            hjust=-0.2,
            vjust=-0,
            check_overlap = T)+
  scale_color_manual(values=c(red,green))+
  theme(text=element_text(size=12),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        panel.background = element_blank(),
        legend.position = "none",
        axis.line = element_line(colour = green))+
  labs(title="Best players according to the number of points won",
       subtitle = "in 2001",
       y="Percentage", x="Points Won")
AwardsPlayers_graph

Salaries <- Lahman::Salaries %>%
  filter(teamID=="OAK") %>% 
  select(playerID,yearID,salary) %>% 
  filter(yearID>=2001)

Salaries_graph <- ggplot(Salaries, aes(x=yearID %>% as.character(), y=salary))+
  geom_boxplot(color=green, fill = yellow)+
  theme(text=element_text(size=12),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        panel.background = element_blank(),
        legend.position = "none",
        axis.line = element_line(colour = green))+
  labs(title="Evolution of players salaries for Oakland",
       subtitle = "Between 2001 and 2016",
       y="Salaries", x="Year")
Salaries_graph

Top 10 most awarded players

HallOfFame <- Lahman::HallOfFame %>%
  filter(yearID>2000) %>% 
  select(playerID) %>% 
  table() %>%
  as.data.frame() %>%
  `colnames<-`(c("playerID", "Freq"))

merge(x=HallOfFame,y=Lahman::People,by="playerID") %>% 
  select(nameGiven,Freq) %>%
  arrange(-Freq) %>%
  `colnames<-`(c("Name", "Number of Price")) %>% 
  head(10) %>% 
  kable(escape = F) %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = F, position="center")
Name Number of Price
Alan Stuart 16
Donald Arthur 15
John Scott 15
Lee Arthur 15
Dale Bryan 13
Rik Aalbert 11
David Gene 11
Mark David 10
Timothy 10
Andre Nolan 9