library(Lahman)
library(ggplot2)
library(dplyr)
library(ggalt)
library(ggExtra)
## Warning: package 'ggExtra' was built under R version 3.4.3
library(ggcorrplot)
## Warning: package 'ggcorrplot' was built under R version 3.4.3
Pitch14 <- Pitching %>% filter(yearID == "2014")
Pitch13 <- Pitching %>% filter(yearID == "2013")
Pitch12 <- Pitching %>% filter(yearID == "2012")
Pitch11 <- Pitching %>% filter(yearID == "2011")
Pitch10 <- Pitching %>% filter(yearID == "2010")
scatterplot <- ggplot(data = Pitch14, aes(x = IPouts, y = ERA)) +
geom_point(aes(col = W, size = ER)) +
geom_smooth(method = "loess", se = FALSE) +
xlim(c(0, 750)) +
ylim(c(0, 15)) +
labs(title = "MLB ERA, Wins, and Earned Runs", subtitle = "from Lahman data set",
x = "Outs (Innings Pitched)", y = "Earned Run Average", caption = "for 2014 season")
scatterplot
## Warning: Removed 27 rows containing non-finite values (stat_smooth).
## Warning: Removed 27 rows containing missing values (geom_point).

ERA_select <- Pitch14[Pitch14$IPouts > 650 & Pitch14$ERA < 5,]
scatterplot_enc <- ggplot(data = Pitch14, aes(x = IPouts, y = ERA)) +
geom_point(aes(col = W, size = ER)) +
geom_smooth(method = "loess", se = FALSE) +
xlim(c(0, 750)) +
ylim(c(0, 15)) +
geom_encircle(aes(x = IPouts, y = ERA), data = ERA_select, color = "red", size = 2, expand = 0.08) +
labs(title = "MLB ERA, Wins, and Earned Runs", subtitle = "from Lahman data set",
x = "Outs (Innings Pitched)", y = "Earned Run Average", caption = "for 2014 season")
scatterplot_enc

theme_set(theme_bw())
ggplot(data = Pitch14, aes(x = IPouts, y = ERA)) + geom_point() +
geom_smooth(method = loess, se = FALSE) +
xlim(c(0, 750)) +
ylim(c(0, 15)) +
labs(title = "MLB ERA by Innings Pitched", subtitle = "from Lahman data set",
x = "Outs (Innings Pitched)", y = "Earned Run Average", caption = "for 2014 season")

dim(Pitch14)
## [1] 746 30
ggplot(data = Pitch14, aes(x = IPouts, y = ERA)) + geom_jitter(width = 0.5, size = 1) +
geom_smooth(method = loess, se = FALSE) +
xlim(c(0, 750)) +
ylim(c(0, 15)) +
labs(title = "MLB ERA by Innings Pitched", subtitle = "from Lahman data set",
x = "Outs (Innings Pitched)", y = "Earned Run Average", caption = "for 2014 season")

ggplot(data = Pitch14, aes(x = IPouts, y = ERA)) + geom_count(aes(size = HR), col = "firebrick") +
xlim(c(0, 750)) +
ylim(c(0, 15)) +
labs(title = "MLB ERA by Innings Pitched and Home Runs", subtitle = "from Lahman data set",
x = "Outs (Innings Pitched)", y = "Earned Run Average", caption = "for 2014 season")

ERA14 <- Pitch14 %>% group_by(teamID) %>% summarize(era = sum(ER)/sum(IPouts)*27, na.rm = TRUE) %>% arrange(era)
View(ERA14)
BestTeamERA <- Pitch14[Pitch14$teamID %in% c("WAS", "SEA", "OAK", "SDN", "ATL"), ]
ggplot(BestTeamERA, aes(x = IPouts, y = ERA)) + geom_smooth(aes(col = teamID), method = loess, se = FALSE) +
geom_jitter(aes(col = teamID, size = HR)) +
xlim(c(0, 750)) +
ylim(c(0, 15)) +
labs(title = "Five Best Team ERAs in 2014", subtitle = "from Lahman data set",
x = "Outs (Innings Pitched)", y = "Earned Run Average", caption = "for 2014 season")

WorstERA14 <- Pitch14 %>% group_by(teamID) %>% summarize(era = sum(ER)/sum(IPouts)*27, na.rm = TRUE) %>% arrange(desc(era))
WorstTeamERA <- Pitch14[Pitch14$teamID %in% c("COL", "MIN", "TEX", "CHA", "ARI"), ]
ggplot(WorstTeamERA, aes(x = IPouts, y = ERA)) + geom_smooth(aes(col = teamID), method = loess, se = FALSE) +
geom_jitter(aes(col = teamID, size = HR)) +
xlim(c(0, 750)) +
ylim(c(0, 15)) +
labs(title = "Five Worst Team ERAs in 2014", subtitle = "from Lahman data set",
x = "Outs (Innings Pitched)", y = "Earned Run Average", caption = "for 2014 season")

Scatterplot/Histograms
theme_set(theme_bw())
ERAplot <- ggplot(data = Pitch14, aes(x = IPouts, y = ERA)) + geom_smooth(aes(col = lgID), method = loess, se = FALSE) +
geom_jitter(aes(col = lgID)) +
xlim(c(0, 750)) +
ylim(c(0, 15)) +
labs(title = "MLB ERA by Innings Pitched and League", subtitle = "from Lahman data set",
x = "Outs (Innings Pitched)", y = "Earned Run Average", caption = "for 2014 season")
ERAplot

ggMarginal(ERAplot, type = "histogram", fill = "transparent")

ggMarginal(ERAplot, type = "density", fill = "transparent")

ggMarginal(ERAplot, type = "boxplot", fill = "transparent")

Correlelogram
Pitch14Corr <- Pitch14[-c(1:5, 13, 19:20, 23, 30)]
PitchCorr <- round(cor(Pitch14Corr), 1)
PitchCorr2 <- cor(Pitch14Corr)
ggcorrplot(PitchCorr2, type = "upper", lab = TRUE, lab_size = 2,
method = "circle", colors = c("red", "white", "blue"),
title = "Correlations in MLB Pitching Stats, 2014",
ggtheme = theme_bw)
