remove(list = ls())
moneyball.training.data <- read.csv("~/Downloads/moneyball-training-data.csv")
Clean Data
colSums(is.na(moneyball.training.data))
## INDEX TARGET_WINS TEAM_BATTING_H TEAM_BATTING_2B
## 0 0 0 0
## TEAM_BATTING_3B TEAM_BATTING_HR TEAM_BATTING_BB TEAM_BATTING_SO
## 0 0 0 102
## TEAM_BASERUN_SB TEAM_BASERUN_CS TEAM_BATTING_HBP TEAM_PITCHING_H
## 131 772 2085 0
## TEAM_PITCHING_HR TEAM_PITCHING_BB TEAM_PITCHING_SO TEAM_FIELDING_E
## 0 0 102 0
## TEAM_FIELDING_DP
## 286
moneyball_data_clean <- na.omit(moneyball.training.data)
money_clean <- moneyball_data_clean[, colSums(is.na(moneyball_data_clean)) == 0]
Summary Statistics Table
library(stargazer)
##
## Please cite as:
## Hlavac, Marek (2022). stargazer: Well-Formatted Regression and Summary Statistics Tables.
## R package version 5.2.3. https://CRAN.R-project.org/package=stargazer
library(ggplot2)
stargazer(money_clean, type = "text", title = "Summary Statistics", digits = 2, omit.summary.stat = "n", notes = "n = 2276")
##
## Summary Statistics
## ==============================================
## Statistic Mean St. Dev. Min Max
## ----------------------------------------------
## INDEX 1,383.59 765.24 41 2,534
## TARGET_WINS 80.93 12.12 43 116
## TEAM_BATTING_H 1,478.63 76.15 1,308 1,667
## TEAM_BATTING_2B 297.20 26.33 201 373
## TEAM_BATTING_3B 30.74 9.04 12 61
## TEAM_BATTING_HR 178.05 32.41 116 260
## TEAM_BATTING_BB 543.32 74.84 365 775
## TEAM_BATTING_SO 1,051.03 104.16 805 1,399
## TEAM_BASERUN_SB 90.91 29.92 31 177
## TEAM_BASERUN_CS 39.94 11.90 12 74
## TEAM_BATTING_HBP 59.36 12.97 29 95
## TEAM_PITCHING_H 1,479.70 75.79 1,312 1,667
## TEAM_PITCHING_HR 178.18 32.39 116 260
## TEAM_PITCHING_BB 543.72 74.92 367 775
## TEAM_PITCHING_SO 1,051.82 104.35 805 1,399
## TEAM_FIELDING_E 107.05 16.63 65 145
## TEAM_FIELDING_DP 152.34 17.61 113 204
## ----------------------------------------------
## n = 2276
ggplot(data = money_clean, mapping = aes(x = TEAM_BATTING_HR, y = TARGET_WINS)) + geom_point()

Move Data to Excel
# install.packages("writexl")
library(writexl)
write_xlsx(money_clean, "GroupProject.xlsx")