The following synopsis of information analyzes and explores the unlike qualities of the Procrastination Fantasy Point system. Its intention is to find player correlations that can benefit selection and predictability of players.
# Batting Data Filtering
BatTotal <- Batting %>% filter(yearID > 1905, lgID == "AL" | lgID == "NL", G > 0) %>%
replace(., is.na(.), 0)
# Pitching Data Filtering
PitTotal <- Pitching %>%
filter(yearID > 1905, lgID == "AL" | lgID == "NL", G > 0) %>%
replace(., is.na(.), 0)
# Fielding Data Filtering
FieldTotal <- Fielding %>%
filter(yearID > 1905, lgID == "AL" | lgID == "NL", G > 0) %>%
replace(., is.na(.), 0)
# Fielding Position Facotring
FieldTotal$POS <- factor(FieldTotal$POS, levels=c("C", "1B", "2B", "3B", "P", "SS", "OF"))
# Master Data Filtering
MasterTotal <- Master
#creating Batting Points
BatTotal <- BatTotal %>%
mutate(PointsB = (R + RBI + BB + IBB - SO + SB - CS + (H - X2B - X3B - HR) + (2 * X2B) + (3 * X3B) + (4 * HR) + (HR *2) ) ) %>%
mutate(AVG = round( (H/AB), 3) ) %>%
mutate(SLG = round( ( (H - X2B - X3B - HR) + (2 * X2B) + (3 * X3B) + (4 * HR) ), 3) ) %>%
mutate(OBP = round( ( (H+BB+HBP) / (AB+BB+SF+HBP) ), 3) ) %>%
mutate(OPS = round( ( (H + BB + HBP) / (AB + BB + SF + HBP) ) + ( ( (1 * (H - X2B - X3B - HR) ) + (2 * X2B) + (3 * X3B) + (4 * HR) ) / AB), 3) ) %>%
mutate(ISO = round( (SLG - AVG), 3) )
# Joining the Batting statistics to the Master dataset for use in Batting specific Calculations
BatTotalCol <- BatTotal %>%
left_join(MasterTotal, by = c("playerID")) %>%
select(nameFirst, nameLast, playerID, yearID, teamID, lgID, PointsB, AVG, SLG, OBP, OPS, ISO) %>%
arrange(-PointsB)
#Creating Pitching Points
PitTotal <- PitTotal %>%
mutate(PointsP = ((W * 5) - (L * 5) + (SV * 5) + (CG * 10) - WP - BK + SO - HBP - BB - IBB - (ER * 2) - H + (IPouts/3) + (SHO * 5) )) %>%
mutate(ERA = round( (ER/(IPouts/3))*9, 2) ) %>%
mutate(WHIP = round( (H + BB) / (IPouts/3), 2)) %>%
mutate(FIP = round( ( ( (13 * HR)+(3 * (BB + HBP) ) - (2 * SO) )/(IPouts/3) + 3.10 ), 2) )
# Joining the Pitching statistics to the Master dataset for use in Pitching specific Calculations
PitTotalCol <- PitTotal %>%
left_join(MasterTotal, by = c("playerID")) %>%
select(nameFirst, nameLast, playerID, yearID, teamID, lgID, PointsP, ERA, WHIP, FIP) %>%
arrange(-PointsP)
# calculating Fielding Points
FieldTotal <- FieldTotal %>%
mutate(PointsF = (-E))
#Merging all datasets together
PointsCombine <- MasterTotal %>%
left_join(BatTotal, by = c("playerID")) %>%
left_join(PitTotal, by = c("playerID", "yearID")) %>%
left_join(FieldTotal, by = c("playerID", "yearID"))
#Remove NA fields
ColNA <- c(32:53,57:58,60:84,90:101)
PointsCombine[ , ColNA][is.na(PointsCombine[ , ColNA] ) ] = 0
#Adding Points together, removing NA Player fields, Arrange by points
# and create the Year Era ID
PointsCombine <- PointsCombine %>%
mutate(Points = (PointsB + PointsP + PointsF)) %>%
filter(playerID != "NA") %>%
arrange(-Points) %>%
mutate(yearIDEra = PointsCombine$yearID)
#Creates Era Breaks
EraLabel <- c("Dead Ball Era (1901-1919)", "Live Ball Era (1920-1941)", "Integration Era (1942-1960)", "Expansion Era (1961-1976)", "Free Agency Era (1977-1993)", "Long Ball/Steroid Era (1994-2005)", "Post Steroid Era (2006-Current)" )
EraBreaks <- c(1905,1919,1941,1960,1976,1993,2005,2018)
PointsCombine$yearIDEra <- cut(PointsCombine$yearIDEra, breaks = EraBreaks, labels = EraLabel)
# Creating a smaller dataset for later use
PointsCol <- PointsCombine %>%
select(nameFirst, nameLast, yearID, teamID, lgID, Points, POS, AVG, OPS, ERA, yearIDEra) %>%
arrange(-Points)
# Select the desired fields tocreate the avg player
TeamCol <- c(ColNA,102)
#create the avg League Player
PlayerAVGlg <- PointsCombine %>%
group_by(lgID) %>%
filter(lgID != "NA") %>%
summarise_at(vars(TeamCol), funs(mean(., na.rm=TRUE)))
#create the avg team player
PlayerAVGteam <- PointsCombine %>%
group_by(teamID) %>%
filter(teamID != "NA") %>%
summarise_at(vars(TeamCol), funs(mean(., na.rm=TRUE)))
#create the avg player by position
PlayerAVGpos <- PointsCombine %>%
group_by(POS) %>%
filter(POS != "NA") %>%
summarise_at(vars(TeamCol), funs(mean(., na.rm=TRUE)))
#create avg player by Year
PlayerAVGYear <- PointsCombine %>%
group_by(yearID) %>%
filter(yearID != "NA") %>%
summarise_at(vars(TeamCol), funs(mean(., na.rm=TRUE)))
#create avg player by Era
PlayerAVGEra <- PointsCombine %>%
group_by(yearIDEra) %>%
filter(yearIDEra != "NA") %>%
summarise_at(vars(TeamCol), funs(mean(., na.rm=TRUE)))
Exploring the point characteristics and trends of a Players position relative to history. Are certain positions more likely to project higher player totals or have some sort of unseen impact on players potential?
#First Create Position Specific Data Sets for later Use
PointsCombineC <- PointsCombine %>% filter( POS == "C") %>%
arrange(-Points)
PointsCombine1B <- PointsCombine %>% filter( POS == "1B") %>%
arrange(-Points)
PointsCombine2B <- PointsCombine %>% filter( POS == "2B") %>%
arrange(-Points)
PointsCombineSS <- PointsCombine %>% filter( POS == "SS") %>%
arrange(-Points)
PointsCombine3B <- PointsCombine %>% filter( POS == "3B") %>%
arrange(-Points)
PointsCombineOF <- PointsCombine %>% filter( POS == "OF") %>%
arrange(-Points)
PointsCombineP <- PointsCombine %>% filter( POS == "P") %>%
arrange(-Points)
#First Create Position Specific Data Sets for later Use
PointsCombineCLim <- PointsCombine %>% filter( POS == "C") %>%
select("nameFirst", "nameLast", "teamID", "yearID", "lgID", "Points", "AVG", "OBP", "SLG", "OPS", "ISO") %>%
arrange(-Points)
PointsCombine1BLim <- PointsCombine %>% filter( POS == "1B") %>%
select("nameFirst", "nameLast", "teamID", "yearID", "lgID", "Points", "AVG", "OBP", "SLG", "OPS", "ISO") %>%
arrange(-Points)
PointsCombine2BLim <- PointsCombine %>% filter( POS == "2B") %>%
select("nameFirst", "nameLast", "teamID", "yearID", "lgID", "Points", "AVG", "OBP", "SLG", "OPS", "ISO") %>%
arrange(-Points)
PointsCombineSSLim <- PointsCombine %>% filter( POS == "SS") %>%
select("nameFirst", "nameLast", "teamID", "yearID", "lgID", "Points", "AVG", "OBP", "SLG", "OPS", "ISO") %>%
arrange(-Points)
PointsCombine3BLim <- PointsCombine %>% filter( POS == "3B") %>%
select("nameFirst", "nameLast", "teamID", "yearID", "lgID", "Points", "AVG", "OBP", "SLG", "OPS", "ISO") %>%
arrange(-Points)
PointsCombineOFLim <- PointsCombine %>% filter( POS == "OF") %>%
select("nameFirst", "nameLast", "teamID", "yearID", "lgID", "Points", "AVG", "OBP", "SLG", "OPS", "ISO") %>%
arrange(-Points)
PointsCombinePLim <- PointsCombine %>% filter( POS == "P") %>%
select("nameFirst", "nameLast", "teamID", "yearID", "lgID", "Points", "AVG", "OBP", "SLG", "OPS", "ISO", "ERA", "WHIP", "FIP") %>%
arrange(-Points)
#Creates a dataset without Pitchers included to better see data base on only hitters
PointsCombineWOPit <- PointsCombine %>% filter( POS != "P" & POS != "NA")
PointsCombineA <- PointsCombine %>% filter(POS != "NA")
ggplot(PointsCombineA, aes(yearID, fill = POS)) +
geom_bar()+
labs(title = "Number of Players by Position by Year", x = "Year", y = "Count")
ggplot(PointsCombineA, aes(yearID, fill = POS)) +
geom_bar(position="fill")+
labs(title = "Percentage of the number of players by Position by Year", x = "Year", y = "Percentage")
ggplot(PointsCombineA, aes(x = POS, y = Points)) +
stat_boxplot(geom='errorbar', width=0.5) +
geom_boxplot(outlier.size = 1, aes(fill=lgID)) +
coord_flip() +
stat_summary(fun.y = mean, color="yellow", geom="point", size=2, shape=18) +
labs(title = "Points by Position and League")
ggplot(PointsCombineWOPit, aes(yearID, fill = POS)) +
geom_bar()+
labs(title = "Number of Players by Position by Year (Not Including Pitchers)", x = "Points", y = "Count")
ggplot(PointsCombineWOPit, aes(yearID, fill = POS)) +
geom_bar(position="fill")+
labs(title = "Percentage of the number of players by Position by Year (Not Including Pitchers)", x = "Ages", y = "Percentage")
# A histogram of points by Position and League without Pitchers
ggplot(PointsCombineWOPit, aes(Points, fill = POS)) +
geom_histogram(bins = 40, color="white") +
facet_grid(~lgID) +
labs(title = "Points by Position and League (Not Including Pitchers)", y = "Count of Observations", x = "Points")
First we explore the History of Catchers
#first Display the Dataset
PointsCombineCLim
ggplot(PointsCombineC, aes(yearID, fill = lgID)) +
geom_bar()+
labs(title = "Number of Players by League by Year for Catchers", x = "Year", y = "Count")
ggplot(PointsCombineC, aes(x = teamID, y = Points)) +
stat_boxplot(geom='errorbar', width=0.5) +
geom_boxplot(outlier.size = 1, aes(fill=lgID)) +
coord_flip() +
stat_summary(fun.y = mean, color="yellow", geom="point", size=2, shape=18) +
labs(title = "Points by Team and League")
ggplot(PointsCombineC, aes(x = yearID, y = Points, color = lgID))+
geom_point() +
geom_smooth(aes(color = yearID, fill = lgID), method = "lm") +
labs(title = "Point Change over the course of Time by League")
ggplot(PointsCombineC, aes(x = ISO, y = Points, color = lgID))+
geom_point() +
geom_smooth(aes(color = yearID, fill = lgID), method = "lm") +
labs(title = "Points vs ISO by League")
Next we explore the History of First Base
#first Display the Dataset
PointsCombine1BLim
ggplot(PointsCombine1B, aes(yearID, fill = lgID)) +
geom_bar()+
labs(title = "Number of Players by League by Year for First Basebman", x = "Year", y = "Count")
ggplot(PointsCombine1B, aes(x = teamID, y = Points)) +
stat_boxplot(geom='errorbar', width=0.5) +
geom_boxplot(outlier.size = 1, aes(fill=lgID)) +
coord_flip() +
stat_summary(fun.y = mean, color="yellow", geom="point", size=2, shape=18) +
labs(title = "Points by Team and League")
ggplot(PointsCombine1B, aes(x = yearID, y = Points, color = lgID))+
geom_point() +
geom_smooth(aes(color = yearID, fill = lgID), method = "lm") +
labs(title = "Point Change over the course of Time by League")
ggplot(PointsCombine1B, aes(x = ISO, y = Points, color = lgID))+
geom_point() +
geom_smooth(aes(color = yearID, fill = lgID), method = "lm") +
labs(title = "Points vs ISO by League")
Next we explore the History of Second Base
#first Display the Dataset
PointsCombine2BLim
ggplot(PointsCombine2B, aes(yearID, fill = lgID)) +
geom_bar()+
labs(title = "Number of Players by League by Year for Second Baseman", x = "Year", y = "Count")
ggplot(PointsCombine2B, aes(x = teamID, y = Points)) +
stat_boxplot(geom='errorbar', width=0.5) +
geom_boxplot(outlier.size = 1, aes(fill=lgID)) +
coord_flip() +
stat_summary(fun.y = mean, color="yellow", geom="point", size=2, shape=18) +
labs(title = "Points by Team and League")
ggplot(PointsCombine2B, aes(x = yearID, y = Points, color = lgID))+
geom_point() +
geom_smooth(aes(color = yearID, fill = lgID), method = "lm") +
labs(title = "Point Change over the course of Time by League")
ggplot(PointsCombine2B, aes(x = ISO, y = Points, color = lgID))+
geom_point() +
geom_smooth(aes(color = yearID, fill = lgID), method = "lm") +
labs(title = "Points vs ISO by League")
Next we explore the History of Short Stop
#first Display the Dataset
PointsCombineSSLim
ggplot(PointsCombineSS, aes(yearID, fill = lgID)) +
geom_bar()+
labs(title = "Number of Players by League by Year for Short Stop", x = "Year", y = "Count")
ggplot(PointsCombineSS, aes(x = teamID, y = Points)) +
stat_boxplot(geom='errorbar', width=0.5) +
geom_boxplot(outlier.size = 1, aes(fill=lgID)) +
coord_flip() +
stat_summary(fun.y = mean, color="yellow", geom="point", size=2, shape=18) +
labs(title = "Points by Team and League")
ggplot(PointsCombineSS, aes(x = yearID, y = Points, color = lgID))+
geom_point() +
geom_smooth(aes(color = yearID, fill = lgID), method = "lm") +
labs(title = "Point Change over the course of Time by League")
ggplot(PointsCombineSS, aes(x = ISO, y = Points, color = lgID))+
geom_point() +
geom_smooth(aes(color = yearID, fill = lgID), method = "lm") +
labs(title = "Points vs ISO by League")
Next we explore the History of Third Base
#first Display the Dataset
PointsCombine3BLim
ggplot(PointsCombine3B, aes(yearID, fill = lgID)) +
geom_bar()+
labs(title = "Number of Players by League by Year for Third Baseman", x = "Year", y = "Count")
ggplot(PointsCombine3B, aes(x = teamID, y = Points)) +
stat_boxplot(geom='errorbar', width=0.5) +
geom_boxplot(outlier.size = 1, aes(fill=lgID)) +
coord_flip() +
stat_summary(fun.y = mean, color="yellow", geom="point", size=2, shape=18) +
labs(title = "Points by Team and League")
ggplot(PointsCombine3B, aes(x = yearID, y = Points, color = lgID))+
geom_point() +
geom_smooth(aes(color = yearID, fill = lgID), method = "lm") +
labs(title = "Point Change over the course of Time by League")
ggplot(PointsCombine3B, aes(x = ISO, y = Points, color = lgID))+
geom_point() +
geom_smooth(aes(color = yearID, fill = lgID), method = "lm") +
labs(title = "Points vs ISO by League")
Next we explore the History of Outfield
#first Display the Dataset
PointsCombineOFLim
ggplot(PointsCombineOF, aes(yearID, fill = lgID)) +
geom_bar()+
labs(title = "Number of Players by League by Year for Outfielders", x = "Year", y = "Count")
ggplot(PointsCombineOF, aes(x = teamID, y = Points)) +
stat_boxplot(geom='errorbar', width=0.5) +
geom_boxplot(outlier.size = 1, aes(fill=lgID)) +
coord_flip() +
stat_summary(fun.y = mean, color="yellow", geom="point", size=2, shape=18) +
labs(title = "Points by Team and League")
ggplot(PointsCombineOF, aes(x = yearID, y = Points, color = lgID))+
geom_point() +
geom_smooth(aes(color = yearID, fill = lgID), method = "lm") +
labs(title = "Point Change over the course of Time by League")
ggplot(PointsCombineOF, aes(x = ISO, y = Points, color = lgID))+
geom_point() +
geom_smooth(aes(color = yearID, fill = lgID), method = "lm") +
labs(title = "Points vs ISO by League")
Next we explore the History of Pitchers
#first Display the Dataset
PointsCombinePLim
ggplot(PointsCombineP, aes(yearID, fill = lgID)) +
geom_bar()+
labs(title = "Number of Players by League by Year for Pitchers", x = "Year", y = "Count")
ggplot(PointsCombineP, aes(x = teamID, y = Points)) +
stat_boxplot(geom='errorbar', width=0.5) +
geom_boxplot(outlier.size = 1, aes(fill=lgID)) +
coord_flip() +
stat_summary(fun.y = mean, color="yellow", geom="point", size=2, shape=18) +
labs(title = "Points by Team and League")
# A histogram of points
ggplot(PointsCombineP, aes(Points, fill = teamID)) +
geom_histogram(bins = 40, color="white") +
facet_grid(~lgID) +
labs(title = "Points by Team", y = "Count of Observations", x = "Points")
ggplot(PointsCombineP, aes(ERA, fill = lgID)) +
geom_histogram(bins = 300, color="white") +
labs(title = "ERA by Team", y = "Count of Observations", x = "ERA") +
coord_cartesian(ylim=c(-1,10000), xlim=c(-3,20))
## Warning: Removed 51 rows containing non-finite values (stat_bin).
ggplot(PointsCombineP, aes(x = yearID, y = Points, color = lgID))+
geom_point() +
geom_smooth(aes(color = yearID, fill = lgID), method = "lm") +
labs(title = "Point Change over the course of Time by League")
ggplot(PointsCombineP, aes(x = W, y = Points, color = lgID))+
geom_point() +
geom_smooth(aes(color = yearID, fill = lgID), method = "lm") +
labs(title = "Points vs Wins by League")
AVGThrow <- PointsCombineP %>%
group_by(throws) %>%
filter(throws != "NA") %>%
summarise_at(vars(TeamCol), funs(mean(., na.rm=TRUE)))
print("The following is the Average Pitcher by Throwing Arm")
## [1] "The following is the Average Pitcher by Throwing Arm"
AVGThrow
After conducting the above analysis I have concluded that further study in specific points of interested are needed. I would like to investigate the comparison between Left and right handed pitchers to uncover if one throwing arm is superior in some way. Another area of interest is looking into the characteristics of pitching and how it has changed over time.
#First Remove all the Objects from the Data Set Up
remove(BatTotal,PitTotal,FieldTotal,MasterTotal,BatTotalCol,PitTotalCol,PointsCombine,PointsCol,ColNA,EraLabel,EraBreaks,TeamCol,PlayerAVGEra,PlayerAVGYear,PlayerAVGpos,PlayerAVGteam,PlayerAVGlg)
#
remove(PointsCombine1B,PointsCombine1BLim,PointsCombine2B,PointsCombine2BLim,PointsCombine3B,PointsCombine3BLim,PointsCombineSS,PointsCombineSSLim,PointsCombineOF,PointsCombineOFLim,PointsCombineP,PointsCombinePLim,PointsCombineC,PointsCombineCLim,PointsCombineWOPit)