Introduction

Team Name: L1 Team Members: Conor McGrath, Tina Wang, Tiffany Tseng, Michael Mayor, Brock Gallagher

Load Data sets

library(tidyverse)
## Registered S3 methods overwritten by 'tibble':
##   method     from  
##   format.tbl pillar
##   print.tbl  pillar
## -- Attaching packages --------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.3     v purrr   0.3.4
## v tibble  3.0.3     v dplyr   1.0.2
## v tidyr   1.1.3     v stringr 1.4.0
## v readr   1.3.1     v forcats 0.5.1
## Warning: package 'ggplot2' was built under R version 4.0.4
## Warning: package 'tidyr' was built under R version 4.0.4
## Warning: package 'forcats' was built under R version 4.0.4
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
Baseball2019 <- read_csv("~/MSBA/Fall 2020/Fall Interterm/Data Wrangling/stats2019.csv")
## Warning: Missing column names filled in: 'X20' [20]
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   last_name = col_character(),
##   first_name = col_character(),
##   X20 = col_logical()
## )
## See spec(...) for full column specifications.
Baseball2020 <- read_csv("~/MSBA/Fall 2020/Fall Interterm/Data Wrangling/stats2020.csv")
## Warning: Missing column names filled in: 'X20' [20]
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   last_name = col_character(),
##   first_name = col_character(),
##   X20 = col_logical()
## )
## See spec(...) for full column specifications.

Combine names columns for each table.

Baseball2019$Name <- paste(Baseball2019$first_name, Baseball2019$last_name, sep = ' ')

Baseball2020$Name <- paste(Baseball2020$first_name, Baseball2020$last_name, sep = ' ')

Create average HR per AB column

Baseball2019$AvgHrPerAB <- round(Baseball2019$b_home_run / Baseball2019$b_ab, 2)

Baseball2020$AvgHrPerAB <- round(Baseball2020$b_home_run / Baseball2020$b_ab, 2)

Select necesarry columns

Baseball2019 <- Baseball2019 %>%
  select(Name, AvgHrPerAB, exit_velocity_avg, launch_angle_avg)

Baseball2020 <- Baseball2020 %>%
  select(Name, AvgHrPerAB, exit_velocity_avg, launch_angle_avg)

Innerjoin on new player name column

BaseballFinal <- inner_join(Baseball2019, Baseball2020, by = "Name")

BaseballFinal <- BaseballFinal %>%
  select(1, 2, 5, 3, 6, 4, 7)

colnames(BaseballFinal) <- c("Name", "AvgHrPerAB2019", "AvgHrPerAB2020", "EV2019", "EV2020", "LA2019", "LA2020")

Create column with difference in EV and LA from 2019 to 2020

BaseballFinal$EVDiff <- round(BaseballFinal$EV2020 - BaseballFinal$EV2019, 2)

BaseballFinal$LADiff <- round(BaseballFinal$LA2020 - BaseballFinal$LA2019, 2)

BaseballFinal$HRperABDiff <- round(BaseballFinal$AvgHrPerAB2020 - BaseballFinal$AvgHrPerAB2019, 2)

Select only relevant columns

BaseballFinal <- BaseballFinal %>%
  select(1,8,9,10)

Created a binary variable for Avg HR Diff

BaseballFinal$AvgHR <- if_else(BaseballFinal$HRperABDiff < 0, "Decreased", "Increased")

BaseballFinal$AvgHR <- as.factor(BaseballFinal$AvgHR)

Created 3 scatterplotts

library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
ggplot(BaseballFinal, aes(x=EVDiff, y=HRperABDiff)) + geom_point(aes(color=AvgHR)) + geom_smooth() + scale_color_manual(values = c("red", "green")) + theme_minimal()
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

ggplot(BaseballFinal, aes(x=LADiff, y=HRperABDiff)) + geom_point(aes(color=AvgHR)) + geom_smooth() + scale_color_manual(values = c("red", "green")) + theme_minimal()
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

ggplotBB3 <- ggplot(BaseballFinal, aes(text = paste("Player Name:", Name, "<br>Change in HR per AB ratio:", HRperABDiff, "<br>Change in Avg EV:", EVDiff, "<br>Change in Avg LA:", LADiff, "<br>Avg HR:", AvgHR), x=EVDiff, y=LADiff, color = AvgHR)) + geom_point() + scale_color_manual(values = c("red", "green")) + ggtitle("Effects of Launch Angle and Exit Velocity", subtitle = "Source: Baseball Savant") + theme_minimal()

ggplotly(ggplotBB3, tooltip = "text")