if(!require(ggplot2)) install.packages("ggplot2")
library(ggplot2)
if(!require(dplyr)) install.packages("dplyr")
library(dplyr)
if(!require(magrittr)) install.packages("magrittr")
library(magrittr)
if(!require(Lahman)) install.packages("Lahman")
library(Lahman)
if(!require(rmarkdown)) install.packages("rmarkdown") #ignore this package if you didn't use R Markdown
library(rmarkdown)
Batting$yearID <- as.factor(Batting$yearID)
Pitching$yearID <- as.factor(Pitching$yearID)
Pitching0019 <- filter(Pitching, yearID == 2000 | yearID == 2019)
#Question 1
# This step will filter out some bench pitchers who had very minimum appearance in one season. Including these bench players couldn't shed much insights as to the distribution pattern. Therefore, filtering out these outliers can make the distribution pattern clear.
Pitching0019 <- Pitching0019 %>% filter(SO >= 10) %>%
paged_table()
Q1 <- ggplot(Pitching0019, aes(x=SO, y=ERA)) +
geom_point()
#Your summary of the distribution pattern.
Q1

Q2 <- ggplot(Pitching0019, aes(x=SO, y=ERA, color=yearID)) +
geom_point()
#Your summary of the distribution pattern.
Q2

# Bonus Questions
# There are multiple solutions. The solution below is using the density plot. Other solutions such as using facet_wrap() + bar chart/scatter plot/density plot also work.
BQ.Batting <- Batting %>% filter(yearID %in% c(2000, 2006, 2012, 2019)) %>% #yearID %in% c(2000, 2006, 2012, 2019) is similar with filter(Pitching, yearID == 2000 | yearID == 2019)
filter(H > 10) %>%
mutate(BA = H/AB)
BQ.Batting$yearID <- as.factor(BQ.Batting$yearID)
plot.Batting <- ggplot(BQ.Batting, aes(x = BA, color=yearID)) +
geom_density()
#Your summary
plot.Batting

BQ.Pitching <- Pitching %>% filter(yearID %in% c(2000, 2006, 2012, 2019)) %>%
filter(SO > 3) %>%
mutate(SOBB = SO/BB)
BQ.Pitching$yearID <- as.factor(BQ.Pitching$yearID) #R by default treats 2000, 2006, 2012, 2019 as numeric value. So this step will save the variable of yearID as a factor variable (i.e., Categorical variable.)
plot.Pitching <- ggplot(BQ.Pitching, aes(x = SOBB, color=yearID)) + geom_density()
#Your summary
plot.Pitching
