#install.packages("tidyverse")
#install.packages("visdat")
#install.packages("stargazer")

library("psych")
library("tidyverse")
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.2     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.4.2     ✔ tibble    3.2.1
## ✔ lubridate 1.9.2     ✔ tidyr     1.3.0
## ✔ purrr     1.0.1     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ ggplot2::%+%()   masks psych::%+%()
## ✖ ggplot2::alpha() masks psych::alpha()
## ✖ dplyr::filter()  masks stats::filter()
## ✖ dplyr::lag()     masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library("visdat") #for visualizing the data
library("stargazer")
## 
## Please cite as: 
## 
##  Hlavac, Marek (2022). stargazer: Well-Formatted Regression and Summary Statistics Tables.
##  R package version 5.2.3. https://CRAN.R-project.org/package=stargazer
library("ggplot2")
library("reshape2")  #makeing data long
## 
## Attaching package: 'reshape2'
## 
## The following object is masked from 'package:tidyr':
## 
##     smiths

Step 1: Import the Data

moneyball <- read.csv("C:/Users/Alex Law/Documents/BC Data Analysis/moneyball-training-data (2).csv")

Step 2: Clean Data

clean_moneyball <- na.omit(moneyball)

Step 3:Summary Stat

stargazer(clean_moneyball, type = "text")
## 
## ===================================================
## Statistic         N    Mean    St. Dev.  Min   Max 
## ---------------------------------------------------
## INDEX            191 1,383.592 765.240   41   2,534
## TARGET_WINS      191  80.927    12.115   43    116 
## TEAM_BATTING_H   191 1,478.628  76.148  1,308 1,667
## TEAM_BATTING_2B  191  297.199   26.329   201   373 
## TEAM_BATTING_3B  191  30.743    9.044    12    61  
## TEAM_BATTING_HR  191  178.052   32.413   116   260 
## TEAM_BATTING_BB  191  543.319   74.842   365   775 
## TEAM_BATTING_SO  191 1,051.026 104.156   805  1,399
## TEAM_BASERUN_SB  191  90.906    29.916   31    177 
## TEAM_BASERUN_CS  191  39.942    11.898   12    74  
## TEAM_BATTING_HBP 191  59.356    12.967   29    95  
## TEAM_PITCHING_H  191 1,479.702  75.789  1,312 1,667
## TEAM_PITCHING_HR 191  178.178   32.392   116   260 
## TEAM_PITCHING_BB 191  543.717   74.917   367   775 
## TEAM_PITCHING_SO 191 1,051.817 104.347   805  1,399
## TEAM_FIELDING_E  191  107.052   16.632   65    145 
## TEAM_FIELDING_DP 191  152.335   17.612   113   204 
## ---------------------------------------------------

Step 4: Graphs

moneyball_melted <- melt(clean_moneyball)
## No id variables; using all as measure variables
ggplot(data = moneyball_melted, 
       aes(x = value)
       ) + 
  geom_histogram() + 
  facet_wrap(facets = ~ variable, 
             scales = "free_x")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.