Set-Up

Install/Load Packages

rm(list =ls())
library(ggplot2)
library(visdat)
library(stargazer)
## 
## Please cite as:
##  Hlavac, Marek (2022). stargazer: Well-Formatted Regression and Summary Statistics Tables.
##  R package version 5.2.3. https://CRAN.R-project.org/package=stargazer
library(reshape2)

Import Data

dfmoneyball <- read.csv("moneyball-training-data.csv")

Visualize Missing Values

vis_dat(dfmoneyball)

Clean Data

dfmoneyball2 <- dfmoneyball[ , c(-11)]
dfmoneyballclean <- na.omit(dfmoneyball2)

Summary Statistics

Base Money Ball Statistics

head(dfmoneyballclean)
##   INDEX TARGET_WINS TEAM_BATTING_H TEAM_BATTING_2B TEAM_BATTING_3B
## 2     2          70           1339             219              22
## 3     3          86           1377             232              35
## 4     4          70           1387             209              38
## 5     5          82           1297             186              27
## 6     6          75           1279             200              36
## 7     7          80           1244             179              54
##   TEAM_BATTING_HR TEAM_BATTING_BB TEAM_BATTING_SO TEAM_BASERUN_SB
## 2             190             685            1075              37
## 3             137             602             917              46
## 4              96             451             922              43
## 5             102             472             920              49
## 6              92             443             973             107
## 7             122             525            1062              80
##   TEAM_BASERUN_CS TEAM_PITCHING_H TEAM_PITCHING_HR TEAM_PITCHING_BB
## 2              28            1347              191              689
## 3              27            1377              137              602
## 4              30            1396               97              454
## 5              39            1297              102              472
## 6              59            1279               92              443
## 7              54            1244              122              525
##   TEAM_PITCHING_SO TEAM_FIELDING_E TEAM_FIELDING_DP
## 2             1082             193              155
## 3              917             175              153
## 4              928             164              156
## 5              920             138              168
## 6              973             123              149
## 7             1062             136              186

Summary Statistics Table

stargazer(dfmoneyballclean, type = "text", 
          title = "Money Ball Statistics",
          summary.stat = c("Mean", "sd", "Min", "Max"),
          notes = "1486 Trials for all Stats",
          covariate.labels = c("Index", 
                               "Wins",
                               "Batting Base Hits ",
                               "Batting Doubles",
                               "Batting Triples",
                               "Batting Homeruns",
                               "Batting Walks",
                               "Batting Strikeouts",
                               "Baserunning Stolen Bases",
                               "Baserunning Caught Stealing",
                               "Pitching Hits Allowed",
                               "Pitching Homeruns Allowed",
                               "Pitching Walks Allowed",
                               "Pitching Strikeouts",
                               "Fielding Errors",
                               "Fielding Double Plays"))
## 
## Money Ball Statistics
## ==========================================================
## Statistic                     Mean    St. Dev.  Min   Max 
## ----------------------------------------------------------
## Index                       1,273.812 725.508    2   2,534
## Wins                         80.997    12.694   41    117 
## Batting Base Hits           1,452.157 104.336  1,137 1,786
## Batting Doubles              250.970   42.026   154   377 
## Batting Triples              42.905    18.649   11    129 
## Batting Homeruns             129.842   48.609   11    264 
## Batting Walks                541.888   80.567   309   878 
## Batting Strikeouts           841.743  200.355   326  1,399
## Baserunning Stolen Bases     95.858    44.345   18    314 
## Baserunning Caught Stealing  52.963    22.851   11    201 
## Pitching Hits Allowed       1,505.122 173.472  1,137 2,394
## Pitching Homeruns Allowed    134.069   50.902   12    343 
## Pitching Walks Allowed       561.570   97.347   325  1,090
## Pitching Strikeouts          869.253  211.859   345  1,781
## Fielding Errors              143.145   38.954   65    360 
## Fielding Double Plays        153.743   20.321   87    228 
## ----------------------------------------------------------
## 1486 Trials for all Stats

Graphs

df_melted <- melt(dfmoneyballclean)
## No id variables; using all as measure variables
ggplot(df_melted, aes(x = value)) + 
  geom_histogram() +
  facet_wrap(~variable, scales = "free_x")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.