library(stargazer)
##
## Please cite as:
## Hlavac, Marek (2022). stargazer: Well-Formatted Regression and Summary Statistics Tables.
## R package version 5.2.3. https://CRAN.R-project.org/package=stargazer
library(ggplot2)
library(visdat)
library(reshape2)
Importing data
df <- read.csv("moneyball-training-data.csv")
Missing values
vis_dat(df)

Clean dataset
df_clean <- na.omit(df)
Summary stats
stargazer(df_clean, type = "text")
##
## ===================================================
## Statistic N Mean St. Dev. Min Max
## ---------------------------------------------------
## INDEX 191 1,383.592 765.240 41 2,534
## TARGET_WINS 191 80.927 12.115 43 116
## TEAM_BATTING_H 191 1,478.628 76.148 1,308 1,667
## TEAM_BATTING_2B 191 297.199 26.329 201 373
## TEAM_BATTING_3B 191 30.743 9.044 12 61
## TEAM_BATTING_HR 191 178.052 32.413 116 260
## TEAM_BATTING_BB 191 543.319 74.842 365 775
## TEAM_BATTING_SO 191 1,051.026 104.156 805 1,399
## TEAM_BASERUN_SB 191 90.906 29.916 31 177
## TEAM_BASERUN_CS 191 39.942 11.898 12 74
## TEAM_BATTING_HBP 191 59.356 12.967 29 95
## TEAM_PITCHING_H 191 1,479.702 75.789 1,312 1,667
## TEAM_PITCHING_HR 191 178.178 32.392 116 260
## TEAM_PITCHING_BB 191 543.717 74.917 367 775
## TEAM_PITCHING_SO 191 1,051.817 104.347 805 1,399
## TEAM_FIELDING_E 191 107.052 16.632 65 145
## TEAM_FIELDING_DP 191 152.335 17.612 113 204
## ---------------------------------------------------
Graphs
df_melted <- melt(df_clean)
## No id variables; using all as measure variables
ggplot(df_melted, aes(x = value)) +
geom_histogram() +
facet_wrap(~variable, scales = "free_x")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
