Procrastination Fantasy Baseball

 

The following synopsis of information analyzes and explores the unlike qualities of the Procrastination Fantasy Point system. Its intention is to find player correlations that can benefit selection and predictability of players.

# Batting Data Filtering
BatTotal <- Batting  %>% filter(yearID > 1905, lgID == "AL" | lgID == "NL", G > 0) %>% 
  replace(., is.na(.), 0)


# Pitching Data Filtering
PitTotal <- Pitching  %>% 
  filter(yearID > 1905, lgID == "AL" | lgID == "NL", G > 0) %>% 
  replace(., is.na(.), 0)

# Fielding Data Filtering
FieldTotal <- Fielding  %>% 
  filter(yearID > 1905, lgID == "AL" | lgID == "NL", G > 0) %>% 
  replace(., is.na(.), 0)

# Fielding Position Facotring
FieldTotal$POS <- factor(FieldTotal$POS, levels=c("C", "1B", "2B", "3B", "P", "SS", "OF"))

# Master Data Filtering
MasterTotal <- Master  

#creating Batting Points

BatTotal <- BatTotal %>% 
  mutate(PointsB = (R + RBI + BB + IBB - SO + SB - CS + (H - X2B - X3B - HR) + (2 * X2B) + (3 * X3B) + (4 * HR) + (HR *2) ) ) %>%
  mutate(AVG = round( (H/AB), 3) )  %>%
  mutate(SLG = round( ( (H - X2B - X3B - HR) + (2 * X2B) + (3 * X3B) + (4 * HR) ), 3) ) %>%
  mutate(OBP = round( ( (H+BB+HBP) / (AB+BB+SF+HBP) ), 3) ) %>%
  mutate(OPS = round( ( (H + BB + HBP) / (AB + BB + SF + HBP) ) + ( ( (1 * (H - X2B - X3B - HR) ) + (2 * X2B) + (3 * X3B) + (4 * HR) ) / AB), 3) ) %>%
  mutate(ISO = round( (SLG - AVG), 3) )


# Joining the Batting statistics to the Master dataset for use in Batting specific Calculations

BatTotalCol <- BatTotal %>% 
  left_join(MasterTotal, by = c("playerID")) %>%
  select(nameFirst, nameLast, playerID, yearID, teamID, lgID, PointsB, AVG, SLG, OBP, OPS, ISO) %>%
  arrange(-PointsB)

#Creating Pitching Points

PitTotal <- PitTotal %>% 
  mutate(PointsP = ((W * 5) - (L * 5) + (SV * 5) + (CG * 10) - WP - BK + SO - HBP - BB - IBB - (ER * 2) - H + (IPouts/3) + (SHO * 5) )) %>%
  mutate(ERA = round( (ER/(IPouts/3))*9, 2) ) %>%
  mutate(WHIP = round( (H + BB) / (IPouts/3), 2))  %>%
  mutate(FIP = round( ( ( (13 * HR)+(3 * (BB + HBP) ) - (2 * SO) )/(IPouts/3) + 3.10 ), 2) )

# Joining the Pitching statistics to the Master dataset for use in Pitching specific Calculations


PitTotalCol <- PitTotal %>% 
  left_join(MasterTotal, by = c("playerID")) %>%
  select(nameFirst, nameLast, playerID, yearID, teamID, lgID, PointsP, ERA, WHIP, FIP) %>%
  arrange(-PointsP)


# calculating Fielding Points 

FieldTotal <- FieldTotal %>% 
  mutate(PointsF = (-E))


#Merging all datasets together

PointsCombine <- MasterTotal %>% 
  left_join(BatTotal, by = c("playerID")) %>%
  left_join(PitTotal, by = c("playerID", "yearID")) %>%  
  left_join(FieldTotal, by = c("playerID", "yearID"))

#Remove NA fields

ColNA <- c(32:53,57:58,60:84,90:101)

PointsCombine[ , ColNA][is.na(PointsCombine[ , ColNA] ) ] = 0 

#Adding Points together, removing NA Player fields, Arrange by points
# and create the Year Era ID

PointsCombine <- PointsCombine %>% 
  mutate(Points = (PointsB + PointsP + PointsF)) %>%
  filter(playerID != "NA") %>%
  arrange(-Points) %>%
  mutate(yearIDEra = PointsCombine$yearID)
  

#Creates Era Breaks

EraLabel <- c("Dead Ball Era (1901-1919)", "Live Ball Era (1920-1941)", "Integration Era (1942-1960)", "Expansion Era (1961-1976)", "Free Agency Era (1977-1993)", "Long Ball/Steroid Era (1994-2005)", "Post Steroid Era (2006-Current)" )


EraBreaks <- c(1905,1919,1941,1960,1976,1993,2005,2018)

PointsCombine$yearIDEra <- cut(PointsCombine$yearIDEra, breaks = EraBreaks, labels = EraLabel) 


# Creating a smaller dataset for later use


PointsCol <- PointsCombine %>% 
  select(nameFirst, nameLast, yearID, teamID, lgID, Points, POS, AVG, OPS, ERA, yearIDEra) %>%
  arrange(-Points) 



# Select the desired fields tocreate the avg player

TeamCol <- c(ColNA,102)

#create the avg League Player
PlayerAVGlg <- PointsCombine  %>%
  group_by(lgID) %>%
  filter(lgID != "NA") %>%
  summarise_at(vars(TeamCol), funs(mean(., na.rm=TRUE)))

#create the avg team player
PlayerAVGteam <- PointsCombine  %>%
  group_by(teamID) %>%
  filter(teamID != "NA") %>%
  summarise_at(vars(TeamCol), funs(mean(., na.rm=TRUE)))
  
#create the avg player by position
PlayerAVGpos <- PointsCombine  %>%
  group_by(POS) %>%
  filter(POS != "NA") %>%
  summarise_at(vars(TeamCol), funs(mean(., na.rm=TRUE)))

#create avg player by Year
PlayerAVGYear <- PointsCombine  %>%
  group_by(yearID) %>%
  filter(yearID != "NA") %>%
  summarise_at(vars(TeamCol), funs(mean(., na.rm=TRUE)))

#create avg player by Era
PlayerAVGEra <- PointsCombine  %>%
  group_by(yearIDEra) %>%
  filter(yearIDEra != "NA") %>%
  summarise_at(vars(TeamCol), funs(mean(., na.rm=TRUE)))

Era Point Exploration

 

Exploring the Era and Year trends and characteristics in order to derive Baseballs overall trends as it pertains to the Procrastination League. For this analysis I will use the following Era designations for comparison. In the future I would like to go into more depth about each field, but for now I will settle for a brief overview look at the information.

 

Baseball Eras


The Dead Ball Era (1901 - 1920)
World War 2 (1941 - 1945)
Segregation Era (1901 - 1947ish)
Post-War Era/Yankees Era (1945 - late 50s/early 60s)
Westward Expansion (1953 - 1961)
Dead Ball 2 (The Sixties, roughly)
Designated Hitter Era (1973 - current, AL only)
Free Agency/Arbitration Era (1975 - current)
Steroid Era (unknown, but late 80s - 2005 seems likely)
Wild Card Era (1994 - current)

EraStudyPC <- PointsCombine %>%
  filter(yearIDEra != "NA" & yearIDEra != "Inf") %>%
  filter(lgID == "NL" | lgID == "AL")

ggplot(EraStudyPC, aes(yearIDEra, fill = lgID)) + 
  geom_bar()+ 
  labs(title = "Number of Players by League by Era", x = "Era", y = "Count") + 
  theme(axis.text.x = element_text(angle = 30, hjust = 1))

ggplot(EraStudyPC, aes(x = yearIDEra, y = Points)) +
  stat_boxplot(geom='errorbar', width=0.5) +
  geom_boxplot(outlier.size = 1, aes(fill=lgID)) +
  coord_flip() +
  stat_summary(fun.y = mean, color="yellow", geom="point", size=2, shape=18) +
  labs(title = "Points by Era and League")

ggplot(EraStudyPC, aes(x = yearIDEra, y = Points, color = lgID))+
  geom_point() +
  geom_smooth(aes(color = yearID, fill = lgID), method = "lm") +
  labs(title = "Point Change over the course of Time by League")  + 
  theme(axis.text.x = element_text(angle = 30, hjust = 1))

#creating avg weight and height
EraSize <- EraStudyPC %>% 
  group_by(yearID) %>%
  summarise(weightavg = mean(weight), heightavg = mean(height)) %>%
  filter(yearID != "NA" & weightavg != "NA" & heightavg != "NA" & weightavg != "Inf" & heightavg != "Inf") %>%
  select(yearID, weightavg, heightavg)

ggplot(EraSize, aes(x = yearID, y = weightavg)) + 
  geom_point() + 
  scale_x_continuous("Year")+
  scale_y_continuous("League Weight's Average") + 
  theme_bw() + labs(title="League's Weight Average by Year") +
  geom_smooth(method = "lm", aes(x = yearID, y = weightavg))

ggplot(EraSize, aes(x = yearID, y = heightavg)) + 
  geom_point() + 
  scale_x_continuous("Year")+
  scale_y_continuous("League Height's Average") + 
  theme_bw() + labs(title="League's Height Average by Year") +
  geom_smooth(method = "lm", aes(x = yearID, y = heightavg))

ggplot(PlayerAVGEra, aes(x = HR.x, y = Points, color = yearIDEra))+
  geom_point(aes(size = 4)) +
  geom_smooth(method = "lm") +
  labs(title = "Points vs HR by Era")

League Point Exploration

 

Exploring the point characteristics and trends from one league to the other over the course of history.

 

leagueTotal <- PointsCombine %>%
  filter(lgID != "NA" & lgID != "Inf") %>%
  filter(lgID == "NL" | lgID == "AL")%>%
  filter(yearID > 1974, lgID == "NL" | lgID == "AL")

leagueTotalAVG <- leagueTotal  %>%
  group_by(lgID) %>%
  summarise_at(vars(TeamCol), funs(mean(., na.rm=TRUE)))

  
leagueTotalAVG
ggplot(leagueTotalAVG, aes(x = lgID, y = Points))+
  geom_point() +
  labs(title = "Team Average Player Points") + 
  theme(axis.text.x = element_text(angle = 90, hjust = 1))

Team Point Exploration

 

Exploring the point characteristics and trends of Teams of the course of History. Do some teams have a greater impact on players point totals or are teams likely to follow point trends over time that can be predicted?

 

For this section we are going to focus on more modern trends to best represent the teams at large and their current trajectory. I will focus our attention to point totals from 1975 until the present. Later in the analysis I will investigate the Modern St. Louis Cardinals. In the future I would like to focus in on each team individually and then comparatively with other teams.

teamTotal <- PointsCombine %>%
  filter(yearID > 1974)

teamTotalAVG <- teamTotal  %>%
  group_by(teamID) %>%
  summarise_at(vars(TeamCol), funs(mean(., na.rm=TRUE)))

  
teamTotalAVG

Team AVG Player Graphs

ggplot(teamTotalAVG, aes(x = teamID, y = Points))+
  geom_point() +
  labs(title = "Team Average Player Points") + 
  theme(axis.text.x = element_text(angle = 90, hjust = 1))

ggplot(teamTotalAVG, aes(x = ISO, y = Points))+
  geom_point() +
  geom_smooth(method = "lm") +
  labs(title = "Team Average Player Points vs ISO by League")

St. Louis Cardinal’s Historical Study

STLTotal <- PointsCombine %>%
  filter(teamID == "SLN", POS != "P", POS != "NA")

ggplot(STLTotal, aes(x = yearID, y = Points, color=POS))+
  geom_point() +
  labs(title = "Team Average Player Points by Position for the STL Cardinals") +
  geom_smooth(aes(color = yearID, fill = POS), method = "lm") + 
  theme(axis.text.x = element_text(angle = 90, hjust = 1))

ggplot(STLTotal, aes(x = POS, y = Points)) +
  stat_boxplot(geom='errorbar', width=0.5) +
  geom_boxplot(outlier.size = 1, aes(fill = lgID)) +
  coord_flip() +
  stat_summary(fun.y = mean, color="yellow", geom="point", size=2, shape=18) +
  labs(title = "Points by Position for the STL Cardinals")

ggplot(STLTotal, aes(x = AVG, y = Points, color=POS) ) +
  geom_point() +
  geom_smooth(method = "lm") +
  labs(title = "Team Average Player Points vs AVG for the STL Cardinals")

Player Position Point Exploration for the St. Louis Cardinals

 

Exploring the point characteristics and trends of a Players position relative to history of the St. Louis Cardinals. Are certain positions more likely to project higher player totals or have some sort of unseen impact on players potential? The goal of this investigation is to examine the changes of the course of the Cardinal historical Data.

 

#First Create Position Specific Data Sets for later Use
PointsCombineC <- PointsCombine %>% filter( POS == "C" & teamID == "SLN") %>% filter(yearIDEra != "NA") %>%
  arrange(-Points)

PointsCombine1B <- PointsCombine %>% filter( POS == "1B" & teamID == "SLN") %>% filter(yearIDEra != "NA") %>%
  arrange(-Points)

PointsCombine2B <- PointsCombine %>% filter( POS == "2B" & teamID == "SLN") %>% filter(yearIDEra != "NA") %>% 
  arrange(-Points)

PointsCombineSS <- PointsCombine %>% filter( POS == "SS" & teamID == "SLN") %>% filter(yearIDEra != "NA") %>% 
  arrange(-Points)

PointsCombine3B <- PointsCombine %>% filter( POS == "3B" & teamID == "SLN") %>% filter(yearIDEra != "NA") %>% 
  arrange(-Points)

PointsCombineOF <- PointsCombine %>% filter( POS == "OF" & teamID == "SLN") %>% filter(yearIDEra != "NA") %>% 
  arrange(-Points)

PointsCombineP <- PointsCombine %>% filter( POS == "P" & teamID == "SLN") %>% filter(yearIDEra != "NA") %>% 
  arrange(-Points)

#First Create Position Specific Data Sets for later Use
PointsCombineCLim <- PointsCombine %>% filter( POS == "C" & teamID == "SLN") %>% filter(yearIDEra != "NA") %>%
  select("nameFirst", "nameLast", "teamID", "yearID", "yearIDEra", "Points", "AVG", "OBP", "SLG", "OPS", "ISO") %>%
  arrange(-Points)

PointsCombine1BLim <- PointsCombine %>% filter( POS == "1B" & teamID == "SLN") %>% filter(yearIDEra != "NA") %>%
  select("nameFirst", "nameLast", "teamID", "yearID", "yearIDEra", "Points", "AVG", "OBP", "SLG", "OPS", "ISO") %>%
  arrange(-Points)

PointsCombine2BLim <- PointsCombine %>% filter( POS == "2B" & teamID == "SLN") %>% filter(yearIDEra != "NA") %>% 
  select("nameFirst", "nameLast", "teamID", "yearID", "yearIDEra", "Points", "AVG", "OBP", "SLG", "OPS", "ISO") %>% 
  arrange(-Points)

PointsCombineSSLim <- PointsCombine %>% filter( POS == "SS" & teamID == "SLN") %>% filter(yearIDEra != "NA") %>% 
  select("nameFirst", "nameLast", "teamID", "yearID", "yearIDEra", "Points", "AVG", "OBP", "SLG", "OPS", "ISO") %>% 
  arrange(-Points)

PointsCombine3BLim <- PointsCombine %>% filter( POS == "3B" & teamID == "SLN") %>% filter(yearIDEra != "NA") %>% 
  select("nameFirst", "nameLast", "teamID", "yearID", "yearIDEra", "Points", "AVG", "OBP", "SLG", "OPS", "ISO") %>% 
  arrange(-Points)

PointsCombineOFLim <- PointsCombine %>% filter( POS == "OF" & teamID == "SLN") %>% filter(yearIDEra != "NA") %>% 
  select("nameFirst", "nameLast", "teamID", "yearID", "yearIDEra", "Points", "AVG", "OBP", "SLG", "OPS", "ISO") %>% 
  arrange(-Points)

PointsCombinePLim <- PointsCombine %>% filter( POS == "P" & teamID == "SLN") %>% filter(yearIDEra != "NA") %>% 
  select("nameFirst", "nameLast", "teamID", "yearID", "yearIDEra", "Points", "AVG", "OBP", "SLG", "OPS", "ISO", "ERA", "WHIP", "FIP") %>% 
  arrange(-Points)
#Creates a dataset without Pitchers included to better see data base on only hitters
PointsCombineWOPit <- PointsCombine %>% filter( POS != "P" & POS != "NA" & teamID == "SLN" & yearIDEra != "NA")

PointsCombineSLN <- PointsCombine %>% filter(POS != "NA" & teamID == "SLN")

ggplot(PointsCombineSLN, aes(yearID, fill = POS)) + 
  geom_bar()+ 
  labs(title = "Number of Players by Position by Year for the St. Louis Cardinals", x = "Year", y = "Count")

ggplot(PointsCombineSLN, aes(yearID, fill = POS)) + 
  geom_bar(position="fill")+ 
  labs(title = "Percentage of the number of players by Position by Year for the St. Louis Cardinals", x = "Year", y = "Percentage")

ggplot(PointsCombineSLN, aes(x = POS, y = Points)) +
  stat_boxplot(geom='errorbar', width=0.5) +
  geom_boxplot(outlier.size = 1, aes(fill=yearIDEra)) +
  coord_flip() +
  stat_summary(fun.y = mean, color="yellow", geom="point", size=2, shape=18) +
  labs(title = "Points by Position and Era for the St. Louis Cardinals")

ggplot(PointsCombineWOPit, aes(yearID, fill = POS)) + 
  geom_bar()+ 
  labs(title = "Number of Players by Position by Year (Not Including Pitchers) for the St. Louis Cardinals", x = "Points", y = "Count")

ggplot(PointsCombineWOPit, aes(yearID, fill = POS)) + 
  geom_bar(position="fill")+ 
  labs(title = "Percentage of the number of players by Position by Year (Not Including Pitchers) for the St. Louis Cardinals", x = "Ages", y = "Percentage")

# A histogram of points by Position and League without Pitchers
ggplot(PointsCombineWOPit, aes(Points, fill = POS)) + 
  geom_histogram(bins = 40, color="white") +
  facet_grid(~lgID) +
  labs(title = "Points by Position and Era (Not Including Pitchers) for the St. Louis Cardinals", y = "Count of Observations", x = "Points")

Catchers Exploration for the St. Louis Cardinals

 

First we explore the History of Catchers

 

#first Display the Dataset
PointsCombineCLim
ggplot(PointsCombineC, aes(x = yearID) ) +
  geom_histogram(aes(fill = yearIDEra), bins = 40)  +
  labs(title = "Number of Players by Era by Year for Catchers for the St. Louis Cardinals")

ggplot(PointsCombineC, aes(x = teamID, y = Points)) +
  stat_boxplot(geom='errorbar', width=0.5) +
  geom_boxplot(outlier.size = 1, aes(fill=yearIDEra)) +
  coord_flip() +
  stat_summary(fun.y = mean, color="yellow", geom="point", size=2, shape=18) +
  labs(title = "Points by Team and Era for the St. Louis Cardinals")

ggplot(PointsCombineC, aes(x = yearID, y = Points, color = yearIDEra))+
  geom_point() +
  geom_smooth(aes(color = yearID, fill = yearIDEra), method = "lm") +
  labs(title = "Point Change over the course of Time by Era for the St. Louis Cardinals")

ggplot(PointsCombineC, aes(x = ISO, y = Points, color = yearIDEra))+
  geom_point() +
  geom_smooth(aes(color = yearID, fill = lgID), method = "lm") +
  labs(title = "Points vs ISO by Era for the St. Louis Cardinals")

First Base Exploration for the St. Louis Cardinals

 

Next we explore the History of First Base

 

#first Display the Dataset
PointsCombine1BLim
ggplot(PointsCombine1B, aes(x = yearID) ) +
  geom_histogram(aes(fill = yearIDEra), bins = 40)  +
  labs(title = "Number of Players by Era by Year for First Base for the St. Louis Cardinals")

ggplot(PointsCombine1B, aes(x = teamID, y = Points)) +
  stat_boxplot(geom='errorbar', width=0.5) +
  geom_boxplot(outlier.size = 1, aes(fill=yearIDEra)) +
  coord_flip() +
  stat_summary(fun.y = mean, color="yellow", geom="point", size=2, shape=18) +
  labs(title = "Points by Team and Era for the St. Louis Cardinals")

ggplot(PointsCombine1B, aes(x = yearID, y = Points, color = yearIDEra))+
  geom_point() +
  geom_smooth(aes(color = yearID, fill = yearIDEra), method = "lm") +
  labs(title = "Point Change over the course of Time by Era for the St. Louis Cardinals")

ggplot(PointsCombine1B, aes(x = ISO, y = Points, color = yearIDEra))+
  geom_point() +
  geom_smooth(aes(color = yearID, fill = yearIDEra), method = "lm") +
  labs(title = "Points vs ISO by Era for the St. Louis Cardinals")

Second Base Exploration for the St. Louis Cardinals

 

Next we explore the History of Second Base

 

#first Display the Dataset
PointsCombine2BLim
ggplot(PointsCombine2B, aes(x = yearID) ) +
  geom_histogram(aes(fill = yearIDEra), bins = 40)  +
  labs(title = "Number of Players by Era by Year for Second Base for the St. Louis Cardinals")

ggplot(PointsCombine2B, aes(x = teamID, y = Points)) +
  stat_boxplot(geom='errorbar', width=0.5) +
  geom_boxplot(outlier.size = 1, aes(fill=yearIDEra)) +
  coord_flip() +
  stat_summary(fun.y = mean, color="yellow", geom="point", size=2, shape=18) +
  labs(title = "Points by Team and Era for the St. Louis Cardinals")

ggplot(PointsCombine2B, aes(x = yearID, y = Points, color = yearIDEra))+
  geom_point() +
  geom_smooth(aes(color = yearID, fill = yearIDEra), method = "lm") +
  labs(title = "Point Change over the course of Time by Era for the St. Louis Cardinals")

ggplot(PointsCombine2B, aes(x = ISO, y = Points, color = yearIDEra))+
  geom_point() +
  geom_smooth(aes(color = yearID, fill = yearIDEra), method = "lm") +
  labs(title = "Points vs ISO by Era for the St. Louis Cardinals")

Short Stop Exploration for the St. Louis Cardinals

 

Next we explore the History of Short Stop

 

#first Display the Dataset
PointsCombineSSLim
ggplot(PointsCombineSS, aes(x = yearID) ) +
  geom_histogram(aes(fill = yearIDEra), bins = 40)  +
  labs(title = "Number of Players by Era by Year for Shortstop for the St. Louis Cardinals")

ggplot(PointsCombineSS, aes(x = teamID, y = Points)) +
  stat_boxplot(geom='errorbar', width=0.5) +
  geom_boxplot(outlier.size = 1, aes(fill=yearIDEra)) +
  coord_flip() +
  stat_summary(fun.y = mean, color="yellow", geom="point", size=2, shape=18) +
  labs(title = "Points by Team and Era for the St. Louis Cardinals")

ggplot(PointsCombineSS, aes(x = yearID, y = Points, color = yearIDEra))+
  geom_point() +
  geom_smooth(aes(color = yearID, fill = yearIDEra), method = "lm") +
  labs(title = "Point Change over the course of Time by Era for the St. Louis Cardinals")

ggplot(PointsCombineSS, aes(x = ISO, y = Points, color = yearIDEra))+
  geom_point() +
  geom_smooth(aes(color = yearID, fill = yearIDEra), method = "lm") +
  labs(title = "Points vs ISO by Era for the St. Louis Cardinals")

Third Base Exploration for the St. Louis Cardinals

 

Next we explore the History of Third Base

 

#first Display the Dataset
PointsCombine3BLim
ggplot(PointsCombine3B, aes(x = yearID) ) +
  geom_histogram(aes(fill = yearIDEra), bins = 40)  +
  labs(title = "Number of Players by Era by Year for Third Base for the St. Louis Cardinals")

ggplot(PointsCombine3B, aes(x = teamID, y = Points)) +
  stat_boxplot(geom='errorbar', width=0.5) +
  geom_boxplot(outlier.size = 1, aes(fill=yearIDEra)) +
  coord_flip() +
  stat_summary(fun.y = mean, color="yellow", geom="point", size=2, shape=18) +
  labs(title = "Points by Team and Era for the St. Louis Cardinals")

ggplot(PointsCombine3B, aes(x = yearID, y = Points, color = yearIDEra))+
  geom_point() +
  geom_smooth(aes(color = yearID, fill = yearIDEra), method = "lm") +
  labs(title = "Point Change over the course of Time by Era for the St. Louis Cardinals")

ggplot(PointsCombine3B, aes(x = ISO, y = Points, color = yearIDEra))+
  geom_point() +
  geom_smooth(aes(color = yearID, fill = yearIDEra), method = "lm") +
  labs(title = "Points vs ISO by Era for the St. Louis Cardinals")

Outfield Exploration for the St. Louis Cardinals

 

Next we explore the History of Outfield

 

#first Display the Dataset
PointsCombineOFLim
ggplot(PointsCombineOF, aes(x = yearID) ) +
  geom_histogram(aes(fill = yearIDEra), bins = 40)  +
  labs(title = "Number of Players by Era by Year for Outfielders for the St. Louis Cardinals")

ggplot(PointsCombineOF, aes(x = teamID, y = Points)) +
  stat_boxplot(geom='errorbar', width=0.5) +
  geom_boxplot(outlier.size = 1, aes(fill=yearIDEra)) +
  coord_flip() +
  stat_summary(fun.y = mean, color="yellow", geom="point", size=2, shape=18) +
  labs(title = "Points by Team and Era for the St. Louis Cardinals")

ggplot(PointsCombineOF, aes(x = yearID, y = Points, color = yearIDEra))+
  geom_point() +
  geom_smooth(aes(color = yearID, fill = yearIDEra), method = "lm") +
  labs(title = "Point Change over the course of Time by Era for the St. Louis Cardinals")

ggplot(PointsCombineOF, aes(x = ISO, y = Points, color = yearIDEra))+
  geom_point() +
  geom_smooth(aes(color = yearID, fill = yearIDEra), method = "lm") +
  labs(title = "Points vs ISO by Era for the St. Louis Cardinals")

Pitchers Exploration for the St. Louis Cardinals

 

Next we explore the History of Pitchers

 

#first Display the Dataset
PointsCombinePLim
ggplot(PointsCombineP, aes(x = yearID) ) +
  geom_histogram(aes(fill = yearIDEra), bins = 40)  +
  labs(title = "Number of Players by Era by Year for Pitchers for the St. Louis Cardinals")

ggplot(PointsCombineP, aes(yearID, fill = yearIDEra)) + 
  geom_bar()+ 
  labs(title = "Number of Players by Era by Year for Pitchers for the St. Louis Cardinals", x = "Year", y = "Count")

ggplot(PointsCombineP, aes(x = teamID, y = Points)) +
  stat_boxplot(geom='errorbar', width=0.5) +
  geom_boxplot(outlier.size = 1, aes(fill=yearIDEra)) +
  coord_flip() +
  stat_summary(fun.y = mean, color="yellow", geom="point", size=2, shape=18) +
  labs(title = "Points by Team and Era for the St. Louis Cardinals")

# A histogram of points
ggplot(PointsCombineP, aes(Points, fill = teamID)) + 
  geom_histogram(bins = 10, color="white") +
  facet_grid(~yearIDEra) +
  labs(title = "Points by Team", y = "Count of Observations", x = "Points") 

ggplot(PointsCombineP, aes(ERA, fill = yearIDEra)) + 
  geom_histogram(bins = 300, color="white") +
  labs(title = "ERA by Team", y = "Count of Observations", x = "ERA") +
  coord_cartesian(ylim=c(-1,10000), xlim=c(-3,20))
## Warning: Removed 4 rows containing non-finite values (stat_bin).

ggplot(PointsCombineP, aes(x = yearID, y = Points, color = yearIDEra))+
  geom_point() +
  geom_smooth(aes(color = yearID, fill = yearIDEra), method = "lm") +
  labs(title = "Point Change over the course of Time by Era for the St. Louis Cardinals")

ggplot(PointsCombineP, aes(x = FIP, y = Points, color = yearIDEra))+
  geom_point() +
  geom_smooth(aes(color = yearID, fill = yearIDEra), method = "lm") +
  labs(title = "Points vs FIP by Era for the St. Louis Cardinals")
## Warning: Removed 9 rows containing non-finite values (stat_smooth).

Conculsion

 

Through this brief overview several trends and analysis information can be gained. One of the biggest points of interest for me at the current time is the noticeable, though slight difference between the National League and American League Point totals. It appears as if the American League has a slight bump statistically in points, Hits, Homeruns, etc… I would like to dive deeper in the future to uncover if it is related to the individual parks, the addition of the Designated Hitter, or if the American League has or currently is a more difficult league.

 

#First Remove all the Objects from the Data Set Up
remove(BatTotal,PitTotal,FieldTotal,MasterTotal,BatTotalCol,PitTotalCol,PointsCombine,PointsCol,ColNA,EraLabel,EraBreaks,TeamCol,PlayerAVGEra,PlayerAVGYear,PlayerAVGpos,PlayerAVGteam,PlayerAVGlg)

#
remove(leagueTotal,leagueTotalAVG,STLTotal,teamTotal,teamTotalAVG)