Procrastination Fantasy Baseball

 

The following synopsis of information analyzes and explores the unlike qualities of the Procrastination Fantasy Point system. Its intention is to find player correlations that can benefit selection and predictability of players.

# Batting Data Filtering
BatTotal <- Batting  %>% filter(yearID > 1905, lgID == "AL" | lgID == "NL", G > 0) %>% 
  replace(., is.na(.), 0)


# Pitching Data Filtering
PitTotal <- Pitching  %>% 
  filter(yearID > 1905, lgID == "AL" | lgID == "NL", G > 0) %>% 
  replace(., is.na(.), 0)

# Fielding Data Filtering
FieldTotal <- Fielding  %>% 
  filter(yearID > 1905, lgID == "AL" | lgID == "NL", G > 0) %>% 
  replace(., is.na(.), 0)

# Fielding Position Facotring
FieldTotal$POS <- factor(FieldTotal$POS, levels=c("C", "1B", "2B", "3B", "P", "SS", "OF"))

# Master Data Filtering
MasterTotal <- Master  

#creating Batting Points

BatTotal <- BatTotal %>% 
  mutate(PointsB = (R + RBI + BB + IBB - SO + SB - CS + (H - X2B - X3B - HR) + (2 * X2B) + (3 * X3B) + (4 * HR) + (HR *2) ) ) %>%
  mutate(AVG = round( (H/AB), 3) )  %>%
  mutate(SLG = round( ( (H - X2B - X3B - HR) + (2 * X2B) + (3 * X3B) + (4 * HR) ), 3) ) %>%
  mutate(OBP = round( ( (H+BB+HBP) / (AB+BB+SF+HBP) ), 3) ) %>%
  mutate(OPS = round( ( (H + BB + HBP) / (AB + BB + SF + HBP) ) + ( ( (1 * (H - X2B - X3B - HR) ) + (2 * X2B) + (3 * X3B) + (4 * HR) ) / AB), 3) ) %>%
  mutate(ISO = round( (SLG - AVG), 3) )


# Joining the Batting statistics to the Master dataset for use in Batting specific Calculations

BatTotalCol <- BatTotal %>% 
  left_join(MasterTotal, by = c("playerID")) %>%
  select(nameFirst, nameLast, playerID, yearID, teamID, lgID, PointsB, AVG, SLG, OBP, OPS, ISO) %>%
  arrange(-PointsB)

#Creating Pitching Points

PitTotal <- PitTotal %>% 
  mutate(PointsP = ((W * 5) - (L * 5) + (SV * 5) + (CG * 10) - WP - BK + SO - HBP - BB - IBB - (ER * 2) - H + (IPouts/3) + (SHO * 5) )) %>%
  mutate(ERA = round( (ER/(IPouts/3))*9, 2) ) %>%
  mutate(WHIP = round( (H + BB) / (IPouts/3), 2))  %>%
  mutate(FIP = round( ( ( (13 * HR)+(3 * (BB + HBP) ) - (2 * SO) )/(IPouts/3) + 3.10 ), 2) )

# Joining the Pitching statistics to the Master dataset for use in Pitching specific Calculations


PitTotalCol <- PitTotal %>% 
  left_join(MasterTotal, by = c("playerID")) %>%
  select(nameFirst, nameLast, playerID, yearID, teamID, lgID, PointsP, ERA, WHIP, FIP) %>%
  arrange(-PointsP)


# calculating Fielding Points 

FieldTotal <- FieldTotal %>% 
  mutate(PointsF = (-E))


#Merging all datasets together

PointsCombine <- MasterTotal %>% 
  left_join(BatTotal, by = c("playerID")) %>%
  left_join(PitTotal, by = c("playerID", "yearID")) %>%  
  left_join(FieldTotal, by = c("playerID", "yearID"))

#Remove NA fields

ColNA <- c(32:53,57:58,60:84,90:101)

PointsCombine[ , ColNA][is.na(PointsCombine[ , ColNA] ) ] = 0 

#Adding Points together, removing NA Player fields, Arrange by points
# and create the Year Era ID

PointsCombine <- PointsCombine %>% 
  mutate(Points = (PointsB + PointsP + PointsF)) %>%
  filter(playerID != "NA") %>%
  arrange(-Points) %>%
  mutate(yearIDEra = PointsCombine$yearID)
  

#Creates Era Breaks

EraLabel <- c("Dead Ball Era (1901-1919)", "Live Ball Era (1920-1941)", "Integration Era (1942-1960)", "Expansion Era (1961-1976)", "Free Agency Era (1977-1993)", "Long Ball/Steroid Era (1994-2005)", "Post Steroid Era (2006-Current)" )


EraBreaks <- c(1905,1919,1941,1960,1976,1993,2005,2018)

PointsCombine$yearIDEra <- cut(PointsCombine$yearIDEra, breaks = EraBreaks, labels = EraLabel) 


# Creating a smaller dataset for later use


PointsCol <- PointsCombine %>% 
  select(nameFirst, nameLast, yearID, teamID, lgID, Points, POS, AVG, OPS, ERA, yearIDEra) %>%
  arrange(-Points) 



# Select the desired fields tocreate the avg player

TeamCol <- c(ColNA,102)

#create the avg League Player
PlayerAVGlg <- PointsCombine  %>%
  group_by(lgID) %>%
  filter(lgID != "NA") %>%
  summarise_at(vars(TeamCol), funs(mean(., na.rm=TRUE)))

#create the avg team player
PlayerAVGteam <- PointsCombine  %>%
  group_by(teamID) %>%
  filter(teamID != "NA") %>%
  summarise_at(vars(TeamCol), funs(mean(., na.rm=TRUE)))
  
#create the avg player by position
PlayerAVGpos <- PointsCombine  %>%
  group_by(POS) %>%
  filter(POS != "NA") %>%
  summarise_at(vars(TeamCol), funs(mean(., na.rm=TRUE)))

#create avg player by Year
PlayerAVGYear <- PointsCombine  %>%
  group_by(yearID) %>%
  filter(yearID != "NA") %>%
  summarise_at(vars(TeamCol), funs(mean(., na.rm=TRUE)))

#create avg player by Era
PlayerAVGEra <- PointsCombine  %>%
  group_by(yearIDEra) %>%
  filter(yearIDEra != "NA") %>%
  summarise_at(vars(TeamCol), funs(mean(., na.rm=TRUE)))

Player Position Point Exploration

 

Exploring the point characteristics and trends of a Players position relative to history. Are certain positions more likely to project higher player totals or have some sort of unseen impact on players potential?

 

#First Create Position Specific Data Sets for later Use
PointsCombineC <- PointsCombine %>% filter( POS == "C") %>%
  arrange(-Points)

PointsCombine1B <- PointsCombine %>% filter( POS == "1B") %>%
  arrange(-Points)

PointsCombine2B <- PointsCombine %>% filter( POS == "2B") %>% 
  arrange(-Points)

PointsCombineSS <- PointsCombine %>% filter( POS == "SS") %>% 
  arrange(-Points)

PointsCombine3B <- PointsCombine %>% filter( POS == "3B") %>% 
  arrange(-Points)

PointsCombineOF <- PointsCombine %>% filter( POS == "OF") %>% 
  arrange(-Points)

PointsCombineP <- PointsCombine %>% filter( POS == "P") %>% 
  arrange(-Points)

#First Create Position Specific Data Sets for later Use
PointsCombineCLim <- PointsCombine %>% filter( POS == "C") %>%
  select("nameFirst", "nameLast", "teamID", "yearID", "lgID", "Points", "AVG", "OBP", "SLG", "OPS", "ISO") %>%
  arrange(-Points)

PointsCombine1BLim <- PointsCombine %>% filter( POS == "1B") %>%
  select("nameFirst", "nameLast", "teamID", "yearID", "lgID", "Points", "AVG", "OBP", "SLG", "OPS", "ISO") %>%
  arrange(-Points)

PointsCombine2BLim <- PointsCombine %>% filter( POS == "2B") %>% 
  select("nameFirst", "nameLast", "teamID", "yearID", "lgID", "Points", "AVG", "OBP", "SLG", "OPS", "ISO") %>% 
  arrange(-Points)

PointsCombineSSLim <- PointsCombine %>% filter( POS == "SS") %>% 
  select("nameFirst", "nameLast", "teamID", "yearID", "lgID", "Points", "AVG", "OBP", "SLG", "OPS", "ISO") %>% 
  arrange(-Points)

PointsCombine3BLim <- PointsCombine %>% filter( POS == "3B") %>% 
  select("nameFirst", "nameLast", "teamID", "yearID", "lgID", "Points", "AVG", "OBP", "SLG", "OPS", "ISO") %>% 
  arrange(-Points)

PointsCombineOFLim <- PointsCombine %>% filter( POS == "OF") %>% 
  select("nameFirst", "nameLast", "teamID", "yearID", "lgID", "Points", "AVG", "OBP", "SLG", "OPS", "ISO") %>% 
  arrange(-Points)

PointsCombinePLim <- PointsCombine %>% filter( POS == "P") %>% 
  select("nameFirst", "nameLast", "teamID", "yearID", "lgID", "Points", "AVG", "OBP", "SLG", "OPS", "ISO", "ERA", "WHIP", "FIP") %>% 
  arrange(-Points)
#Creates a dataset without Pitchers included to better see data base on only hitters
PointsCombineWOPit <- PointsCombine %>% filter( POS != "P" & POS != "NA")

PointsCombineA <- PointsCombine %>% filter(POS != "NA")


ggplot(PointsCombineA, aes(yearID, fill = POS)) + 
  geom_bar()+ 
  labs(title = "Number of Players by Position by Year", x = "Year", y = "Count")

ggplot(PointsCombineA, aes(yearID, fill = POS)) + 
  geom_bar(position="fill")+ 
  labs(title = "Percentage of the number of players by Position by Year", x = "Year", y = "Percentage")

ggplot(PointsCombineA, aes(x = POS, y = Points)) +
  stat_boxplot(geom='errorbar', width=0.5) +
  geom_boxplot(outlier.size = 1, aes(fill=lgID)) +
  coord_flip() +
  stat_summary(fun.y = mean, color="yellow", geom="point", size=2, shape=18) +
  labs(title = "Points by Position and League")

ggplot(PointsCombineWOPit, aes(yearID, fill = POS)) + 
  geom_bar()+ 
  labs(title = "Number of Players by Position by Year (Not Including Pitchers)", x = "Points", y = "Count")

ggplot(PointsCombineWOPit, aes(yearID, fill = POS)) + 
  geom_bar(position="fill")+ 
  labs(title = "Percentage of the number of players by Position by Year (Not Including Pitchers)", x = "Ages", y = "Percentage")

# A histogram of points by Position and League without Pitchers
ggplot(PointsCombineWOPit, aes(Points, fill = POS)) + 
  geom_histogram(bins = 40, color="white") +
  facet_grid(~lgID) +
  labs(title = "Points by Position and League (Not Including Pitchers)", y = "Count of Observations", x = "Points")

Catchers Exploration

 

First we explore the History of Catchers

 

#first Display the Dataset
PointsCombineCLim
ggplot(PointsCombineC, aes(yearID, fill = lgID)) + 
  geom_bar()+ 
  labs(title = "Number of Players by League by Year for Catchers", x = "Year", y = "Count")

ggplot(PointsCombineC, aes(x = teamID, y = Points)) +
  stat_boxplot(geom='errorbar', width=0.5) +
  geom_boxplot(outlier.size = 1, aes(fill=lgID)) +
  coord_flip() +
  stat_summary(fun.y = mean, color="yellow", geom="point", size=2, shape=18) +
  labs(title = "Points by Team and League")

ggplot(PointsCombineC, aes(x = yearID, y = Points, color = lgID))+
  geom_point() +
  geom_smooth(aes(color = yearID, fill = lgID), method = "lm") +
  labs(title = "Point Change over the course of Time by League")

ggplot(PointsCombineC, aes(x = ISO, y = Points, color = lgID))+
  geom_point() +
  geom_smooth(aes(color = yearID, fill = lgID), method = "lm") +
  labs(title = "Points vs ISO by League")

First Base Exploration

 

Next we explore the History of First Base

 

#first Display the Dataset
PointsCombine1BLim
ggplot(PointsCombine1B, aes(yearID, fill = lgID)) + 
  geom_bar()+ 
  labs(title = "Number of Players by League by Year for First Basebman", x = "Year", y = "Count")

ggplot(PointsCombine1B, aes(x = teamID, y = Points)) +
  stat_boxplot(geom='errorbar', width=0.5) +
  geom_boxplot(outlier.size = 1, aes(fill=lgID)) +
  coord_flip() +
  stat_summary(fun.y = mean, color="yellow", geom="point", size=2, shape=18) +
  labs(title = "Points by Team and League")

ggplot(PointsCombine1B, aes(x = yearID, y = Points, color = lgID))+
  geom_point() +
  geom_smooth(aes(color = yearID, fill = lgID), method = "lm") +
  labs(title = "Point Change over the course of Time by League")

ggplot(PointsCombine1B, aes(x = ISO, y = Points, color = lgID))+
  geom_point() +
  geom_smooth(aes(color = yearID, fill = lgID), method = "lm") +
  labs(title = "Points vs ISO by League")

Second Base Exploration

 

Next we explore the History of Second Base

 

#first Display the Dataset
PointsCombine2BLim
ggplot(PointsCombine2B, aes(yearID, fill = lgID)) + 
  geom_bar()+ 
  labs(title = "Number of Players by League by Year for Second Baseman", x = "Year", y = "Count")

ggplot(PointsCombine2B, aes(x = teamID, y = Points)) +
  stat_boxplot(geom='errorbar', width=0.5) +
  geom_boxplot(outlier.size = 1, aes(fill=lgID)) +
  coord_flip() +
  stat_summary(fun.y = mean, color="yellow", geom="point", size=2, shape=18) +
  labs(title = "Points by Team and League")

ggplot(PointsCombine2B, aes(x = yearID, y = Points, color = lgID))+
  geom_point() +
  geom_smooth(aes(color = yearID, fill = lgID), method = "lm") +
  labs(title = "Point Change over the course of Time by League")

ggplot(PointsCombine2B, aes(x = ISO, y = Points, color = lgID))+
  geom_point() +
  geom_smooth(aes(color = yearID, fill = lgID), method = "lm") +
  labs(title = "Points vs ISO by League")

Short Stop Exploration

 

Next we explore the History of Short Stop

 

#first Display the Dataset
PointsCombineSSLim
ggplot(PointsCombineSS, aes(yearID, fill = lgID)) + 
  geom_bar()+ 
  labs(title = "Number of Players by League by Year for Short Stop", x = "Year", y = "Count")

ggplot(PointsCombineSS, aes(x = teamID, y = Points)) +
  stat_boxplot(geom='errorbar', width=0.5) +
  geom_boxplot(outlier.size = 1, aes(fill=lgID)) +
  coord_flip() +
  stat_summary(fun.y = mean, color="yellow", geom="point", size=2, shape=18) +
  labs(title = "Points by Team and League")

ggplot(PointsCombineSS, aes(x = yearID, y = Points, color = lgID))+
  geom_point() +
  geom_smooth(aes(color = yearID, fill = lgID), method = "lm") +
  labs(title = "Point Change over the course of Time by League")

ggplot(PointsCombineSS, aes(x = ISO, y = Points, color = lgID))+
  geom_point() +
  geom_smooth(aes(color = yearID, fill = lgID), method = "lm") +
  labs(title = "Points vs ISO by League")

Third Base Exploration

 

Next we explore the History of Third Base

 

#first Display the Dataset
PointsCombine3BLim
ggplot(PointsCombine3B, aes(yearID, fill = lgID)) + 
  geom_bar()+ 
  labs(title = "Number of Players by League by Year for Third Baseman", x = "Year", y = "Count")

ggplot(PointsCombine3B, aes(x = teamID, y = Points)) +
  stat_boxplot(geom='errorbar', width=0.5) +
  geom_boxplot(outlier.size = 1, aes(fill=lgID)) +
  coord_flip() +
  stat_summary(fun.y = mean, color="yellow", geom="point", size=2, shape=18) +
  labs(title = "Points by Team and League")

ggplot(PointsCombine3B, aes(x = yearID, y = Points, color = lgID))+
  geom_point() +
  geom_smooth(aes(color = yearID, fill = lgID), method = "lm") +
  labs(title = "Point Change over the course of Time by League")

ggplot(PointsCombine3B, aes(x = ISO, y = Points, color = lgID))+
  geom_point() +
  geom_smooth(aes(color = yearID, fill = lgID), method = "lm") +
  labs(title = "Points vs ISO by League")

Outfield Exploration

 

Next we explore the History of Outfield

 

#first Display the Dataset
PointsCombineOFLim
ggplot(PointsCombineOF, aes(yearID, fill = lgID)) + 
  geom_bar()+ 
  labs(title = "Number of Players by League by Year for Outfielders", x = "Year", y = "Count")

ggplot(PointsCombineOF, aes(x = teamID, y = Points)) +
  stat_boxplot(geom='errorbar', width=0.5) +
  geom_boxplot(outlier.size = 1, aes(fill=lgID)) +
  coord_flip() +
  stat_summary(fun.y = mean, color="yellow", geom="point", size=2, shape=18) +
  labs(title = "Points by Team and League")

ggplot(PointsCombineOF, aes(x = yearID, y = Points, color = lgID))+
  geom_point() +
  geom_smooth(aes(color = yearID, fill = lgID), method = "lm") +
  labs(title = "Point Change over the course of Time by League")

ggplot(PointsCombineOF, aes(x = ISO, y = Points, color = lgID))+
  geom_point() +
  geom_smooth(aes(color = yearID, fill = lgID), method = "lm") +
  labs(title = "Points vs ISO by League")

Pitchers Exploration

 

Next we explore the History of Pitchers

 

#first Display the Dataset
PointsCombinePLim
ggplot(PointsCombineP, aes(yearID, fill = lgID)) + 
  geom_bar()+ 
  labs(title = "Number of Players by League by Year for Pitchers", x = "Year", y = "Count")

ggplot(PointsCombineP, aes(x = teamID, y = Points)) +
  stat_boxplot(geom='errorbar', width=0.5) +
  geom_boxplot(outlier.size = 1, aes(fill=lgID)) +
  coord_flip() +
  stat_summary(fun.y = mean, color="yellow", geom="point", size=2, shape=18) +
  labs(title = "Points by Team and League")

# A histogram of points
ggplot(PointsCombineP, aes(Points, fill = teamID)) + 
  geom_histogram(bins = 40, color="white") +
  facet_grid(~lgID) +
  labs(title = "Points by Team", y = "Count of Observations", x = "Points") 

ggplot(PointsCombineP, aes(ERA, fill = lgID)) + 
  geom_histogram(bins = 300, color="white") +
  labs(title = "ERA by Team", y = "Count of Observations", x = "ERA") +
  coord_cartesian(ylim=c(-1,10000), xlim=c(-3,20))
## Warning: Removed 51 rows containing non-finite values (stat_bin).

ggplot(PointsCombineP, aes(x = yearID, y = Points, color = lgID))+
  geom_point() +
  geom_smooth(aes(color = yearID, fill = lgID), method = "lm") +
  labs(title = "Point Change over the course of Time by League")

ggplot(PointsCombineP, aes(x = W, y = Points, color = lgID))+
  geom_point() +
  geom_smooth(aes(color = yearID, fill = lgID), method = "lm") +
  labs(title = "Points vs Wins by League")

AVGThrow <- PointsCombineP  %>%
  group_by(throws) %>%
  filter(throws != "NA") %>%
  summarise_at(vars(TeamCol), funs(mean(., na.rm=TRUE)))


print("The following is the Average Pitcher by Throwing Arm")
## [1] "The following is the Average Pitcher by Throwing Arm"
AVGThrow 

Conclusion

 

After conducting the above analysis I have concluded that further study in specific points of interested are needed. I would like to investigate the comparison between Left and right handed pitchers to uncover if one throwing arm is superior in some way. Another area of interest is looking into the characteristics of pitching and how it has changed over time.

 

#First Remove all the Objects from the Data Set Up
remove(BatTotal,PitTotal,FieldTotal,MasterTotal,BatTotalCol,PitTotalCol,PointsCombine,PointsCol,ColNA,EraLabel,EraBreaks,TeamCol,PlayerAVGEra,PlayerAVGYear,PlayerAVGpos,PlayerAVGteam,PlayerAVGlg)

#
remove(PointsCombine1B,PointsCombine1BLim,PointsCombine2B,PointsCombine2BLim,PointsCombine3B,PointsCombine3BLim,PointsCombineSS,PointsCombineSSLim,PointsCombineOF,PointsCombineOFLim,PointsCombineP,PointsCombinePLim,PointsCombineC,PointsCombineCLim,PointsCombineWOPit)