library(stargazer)
## 
## Please cite as:
##  Hlavac, Marek (2022). stargazer: Well-Formatted Regression and Summary Statistics Tables.
##  R package version 5.2.3. https://CRAN.R-project.org/package=stargazer
library(ggplot2)
library(visdat)
library(reshape2)

Importing data

df <- read.csv("moneyball-training-data.csv")

Missing values

vis_dat(df)

Clean dataset

df_clean <- na.omit(df)

Summary stats

stargazer(df_clean, type = "text")
## 
## ===================================================
## Statistic         N    Mean    St. Dev.  Min   Max 
## ---------------------------------------------------
## INDEX            191 1,383.592 765.240   41   2,534
## TARGET_WINS      191  80.927    12.115   43    116 
## TEAM_BATTING_H   191 1,478.628  76.148  1,308 1,667
## TEAM_BATTING_2B  191  297.199   26.329   201   373 
## TEAM_BATTING_3B  191  30.743    9.044    12    61  
## TEAM_BATTING_HR  191  178.052   32.413   116   260 
## TEAM_BATTING_BB  191  543.319   74.842   365   775 
## TEAM_BATTING_SO  191 1,051.026 104.156   805  1,399
## TEAM_BASERUN_SB  191  90.906    29.916   31    177 
## TEAM_BASERUN_CS  191  39.942    11.898   12    74  
## TEAM_BATTING_HBP 191  59.356    12.967   29    95  
## TEAM_PITCHING_H  191 1,479.702  75.789  1,312 1,667
## TEAM_PITCHING_HR 191  178.178   32.392   116   260 
## TEAM_PITCHING_BB 191  543.717   74.917   367   775 
## TEAM_PITCHING_SO 191 1,051.817 104.347   805  1,399
## TEAM_FIELDING_E  191  107.052   16.632   65    145 
## TEAM_FIELDING_DP 191  152.335   17.612   113   204 
## ---------------------------------------------------

Graphs

df_melted <- melt(df_clean)
## No id variables; using all as measure variables
ggplot(df_melted, aes(x = value)) + 
  geom_histogram() +
  facet_wrap(~variable, scales = "free_x")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.