#install.packages("tidyverse")
#install.packages("visdat")
#install.packages("stargazer")
library("psych")
library("tidyverse")
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.2 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.2 ✔ tibble 3.2.1
## ✔ lubridate 1.9.2 ✔ tidyr 1.3.0
## ✔ purrr 1.0.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ ggplot2::%+%() masks psych::%+%()
## ✖ ggplot2::alpha() masks psych::alpha()
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library("visdat") #for visualizing the data
library("stargazer")
##
## Please cite as:
##
## Hlavac, Marek (2022). stargazer: Well-Formatted Regression and Summary Statistics Tables.
## R package version 5.2.3. https://CRAN.R-project.org/package=stargazer
library("ggplot2")
library("reshape2") #makeing data long
##
## Attaching package: 'reshape2'
##
## The following object is masked from 'package:tidyr':
##
## smiths
Step 1: Import the Data
moneyball <- read.csv("C:/Users/Alex Law/Documents/BC Data Analysis/moneyball-training-data (2).csv")
Step 2: Clean Data
clean_moneyball <- na.omit(moneyball)
Step 3:Summary Stat
stargazer(clean_moneyball, type = "text")
##
## ===================================================
## Statistic N Mean St. Dev. Min Max
## ---------------------------------------------------
## INDEX 191 1,383.592 765.240 41 2,534
## TARGET_WINS 191 80.927 12.115 43 116
## TEAM_BATTING_H 191 1,478.628 76.148 1,308 1,667
## TEAM_BATTING_2B 191 297.199 26.329 201 373
## TEAM_BATTING_3B 191 30.743 9.044 12 61
## TEAM_BATTING_HR 191 178.052 32.413 116 260
## TEAM_BATTING_BB 191 543.319 74.842 365 775
## TEAM_BATTING_SO 191 1,051.026 104.156 805 1,399
## TEAM_BASERUN_SB 191 90.906 29.916 31 177
## TEAM_BASERUN_CS 191 39.942 11.898 12 74
## TEAM_BATTING_HBP 191 59.356 12.967 29 95
## TEAM_PITCHING_H 191 1,479.702 75.789 1,312 1,667
## TEAM_PITCHING_HR 191 178.178 32.392 116 260
## TEAM_PITCHING_BB 191 543.717 74.917 367 775
## TEAM_PITCHING_SO 191 1,051.817 104.347 805 1,399
## TEAM_FIELDING_E 191 107.052 16.632 65 145
## TEAM_FIELDING_DP 191 152.335 17.612 113 204
## ---------------------------------------------------
Step 4: Graphs
moneyball_melted <- melt(clean_moneyball)
## No id variables; using all as measure variables
ggplot(data = moneyball_melted,
aes(x = value)
) +
geom_histogram() +
facet_wrap(facets = ~ variable,
scales = "free_x")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
