1. Data Setup

1.1 Package and Studio Setup

# Packages to iterate over
packages <- c("dplyr", "stargazer", "ggplot2", "Amelia", "ggcorrplot", "lmtest", "e1071", "MASS")

# Install and load packages if not already installed
for (pkg in packages) {
  if (!pkg %in% rownames(installed.packages())) {
    install.packages(pkg, repos = "http://cran.rstudio.com/", dependencies = TRUE)
  }
  library(pkg, character.only = TRUE)
}

rm(packages, pkg)  # Clean up


require("Amelia") # dependencies

rm(list = ls()) # Clear environment-remove all files from your workspace
gc()            # Clear unused memory
##           used (Mb) gc trigger (Mb) max used (Mb)
## Ncells  919517 49.2    1758918   94  1393315 74.5
## Vcells 1607334 12.3    8388608   64  2780740 21.3
graphics.off()  # Clear all graphs
cat("\f")       # Clear the console

1.2 Data Import

moneyball <- read.csv('moneyball-training-data.csv') # import data

1.3 Base Data Visualization

visdat::vis_dat(moneyball) # visualize data

2. Data Prep

2.1 Renaming Columns For Readability

names(moneyball) <- gsub("TEAM_", "", names(moneyball)) # replace all "TEAM_" with nothing in order to make models more readable

2.2 Missing Value Visualization

Amelia::missmap(moneyball) # visualizing mizzing data

moneyball <- na.omit(moneyball) # remove all rows with N/A values

2.3 Clean Data Visualization

head(moneyball) # preview data
##    INDEX TARGET_WINS BATTING_H BATTING_2B BATTING_3B BATTING_HR BATTING_BB
## 38    41          82      1574        309         34        236        608
## 39    42          75      1447        275         26        158        494
## 40    43          99      1603        333         32        152        462
## 41    44          77      1473        276         33        150        476
## 42    45          92      1603        272         37        162        450
## 43    46          95      1520        278         30        147        447
##    BATTING_SO BASERUN_SB BASERUN_CS BATTING_HBP PITCHING_H PITCHING_HR
## 38       1024         93         52          47       1574         236
## 39       1001        116         52          77       1447         158
## 40        805        117         51          74       1603         152
## 41        838        129         61          56       1473         150
## 42        942        143         46          73       1603         162
## 43        848        161         57          29       1520         147
##    PITCHING_BB PITCHING_SO FIELDING_E FIELDING_DP
## 38         608        1024        134         184
## 39         494        1001        103         142
## 40         462         805         87         151
## 41         476         838        105         138
## 42         450         942         90         126
## 43         447         848         87         139
stargazer(moneyball, type = "text", digits = 0) # summary statistics
## 
## ==========================================
## Statistic    N  Mean  St. Dev.  Min   Max 
## ------------------------------------------
## INDEX       191 1,384   765     41   2,534
## TARGET_WINS 191  81      12     43    116 
## BATTING_H   191 1,479    76    1,308 1,667
## BATTING_2B  191  297     26     201   373 
## BATTING_3B  191  31      9      12    61  
## BATTING_HR  191  178     32     116   260 
## BATTING_BB  191  543     75     365   775 
## BATTING_SO  191 1,051   104     805  1,399
## BASERUN_SB  191  91      30     31    177 
## BASERUN_CS  191  40      12     12    74  
## BATTING_HBP 191  59      13     29    95  
## PITCHING_H  191 1,480    76    1,312 1,667
## PITCHING_HR 191  178     32     116   260 
## PITCHING_BB 191  544     75     367   775 
## PITCHING_SO 191 1,052   104     805  1,399
## FIELDING_E  191  107     17     65    145 
## FIELDING_DP 191  152     18     113   204 
## ------------------------------------------

3 Data Analysis

3.1 Linear Regression for Pitching and Batting Homeruns

moneyball_pos_model <- lm(TARGET_WINS ~ PITCHING_HR + BATTING_HR, data = moneyball)
summary(moneyball) # create liner regression model that predicts target wins based on pitching and batting homerun
##      INDEX         TARGET_WINS       BATTING_H      BATTING_2B   
##  Min.   :  41.0   Min.   : 43.00   Min.   :1308   Min.   :201.0  
##  1st Qu.: 824.5   1st Qu.: 71.50   1st Qu.:1426   1st Qu.:279.5  
##  Median :1380.0   Median : 82.00   Median :1477   Median :296.0  
##  Mean   :1383.6   Mean   : 80.93   Mean   :1479   Mean   :297.2  
##  3rd Qu.:2092.0   3rd Qu.: 90.00   3rd Qu.:1524   3rd Qu.:312.5  
##  Max.   :2534.0   Max.   :116.00   Max.   :1667   Max.   :373.0  
##    BATTING_3B      BATTING_HR      BATTING_BB      BATTING_SO  
##  Min.   :12.00   Min.   :116.0   Min.   :365.0   Min.   : 805  
##  1st Qu.:24.00   1st Qu.:152.5   1st Qu.:492.0   1st Qu.: 982  
##  Median :29.00   Median :175.0   Median :535.0   Median :1050  
##  Mean   :30.74   Mean   :178.1   Mean   :543.3   Mean   :1051  
##  3rd Qu.:36.00   3rd Qu.:199.5   3rd Qu.:595.0   3rd Qu.:1107  
##  Max.   :61.00   Max.   :260.0   Max.   :775.0   Max.   :1399  
##    BASERUN_SB       BASERUN_CS     BATTING_HBP      PITCHING_H  
##  Min.   : 31.00   Min.   :12.00   Min.   :29.00   Min.   :1312  
##  1st Qu.: 67.50   1st Qu.:32.00   1st Qu.:50.50   1st Qu.:1430  
##  Median : 87.00   Median :38.00   Median :58.00   Median :1480  
##  Mean   : 90.91   Mean   :39.94   Mean   :59.36   Mean   :1480  
##  3rd Qu.:110.00   3rd Qu.:48.00   3rd Qu.:67.00   3rd Qu.:1526  
##  Max.   :177.00   Max.   :74.00   Max.   :95.00   Max.   :1667  
##   PITCHING_HR     PITCHING_BB     PITCHING_SO     FIELDING_E     FIELDING_DP   
##  Min.   :116.0   Min.   :367.0   Min.   : 805   Min.   : 65.0   Min.   :113.0  
##  1st Qu.:152.5   1st Qu.:492.0   1st Qu.: 982   1st Qu.: 95.0   1st Qu.:139.0  
##  Median :175.0   Median :537.0   Median :1052   Median :106.0   Median :152.0  
##  Mean   :178.2   Mean   :543.7   Mean   :1052   Mean   :107.1   Mean   :152.3  
##  3rd Qu.:200.0   3rd Qu.:595.0   3rd Qu.:1108   3rd Qu.:118.0   3rd Qu.:165.0  
##  Max.   :260.0   Max.   :775.0   Max.   :1399   Max.   :145.0   Max.   :204.0
plot(moneyball_pos_model) # produce models

3.1.1 Comparing Actual vs Predicted

names(moneyball)
##  [1] "INDEX"       "TARGET_WINS" "BATTING_H"   "BATTING_2B"  "BATTING_3B" 
##  [6] "BATTING_HR"  "BATTING_BB"  "BATTING_SO"  "BASERUN_SB"  "BASERUN_CS" 
## [11] "BATTING_HBP" "PITCHING_H"  "PITCHING_HR" "PITCHING_BB" "PITCHING_SO"
## [16] "FIELDING_E"  "FIELDING_DP"
moneyball_pos_predictions <- moneyball %>%
  mutate(predictions = predict(moneyball_pos_model, newdata = moneyball)) %>%
  dplyr::select(INDEX, TARGET_WINS, PITCHING_HR, BATTING_HR, predictions) # compare actual target_wins with predictions

3.1.2 Graph Target Win Prediction Correlation

# create line graphs for visualization of predictions vs actual
ggplot(moneyball_pos_predictions, aes(x = BATTING_HR)) +
  geom_line(aes(y = TARGET_WINS), color = "darkred") +
  geom_line(aes(y = predictions), color = "darkblue") +
  ggtitle("Actual vs Predicted Target Wins") +
  xlab("Team Batting Homeruns") +
  ylab("Target Wins")

ggplot(moneyball_pos_predictions, aes(x = PITCHING_HR)) +
  geom_line(aes(y = TARGET_WINS), color = "darkred") +
  geom_line(aes(y = predictions), color = "darkblue") +
  ggtitle("Actual vs Predicted Target Wins") +
  xlab("Team Pitching Homeruns") +
  ylab("Target Wins")

3.2 Linear Regression For Field Errors and Hits Allowed

# linear model for field errors and hits allowed
moneyball_neg_model <- lm(TARGET_WINS ~ FIELDING_E + PITCHING_H, data = moneyball)
summary(moneyball_neg_model)
## 
## Call:
## lm(formula = TARGET_WINS ~ FIELDING_E + PITCHING_H, data = moneyball)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -23.0697  -7.2835   0.7463   6.8837  25.5026 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  8.80236   16.85662   0.522    0.602    
## FIELDING_E  -0.20873    0.04595  -4.543 9.91e-06 ***
## PITCHING_H   0.06384    0.01008   6.332 1.74e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 10.2 on 188 degrees of freedom
## Multiple R-squared:  0.299,  Adjusted R-squared:  0.2916 
## F-statistic:  40.1 on 2 and 188 DF,  p-value: 3.14e-15
# plot model diagnostics
plot(moneyball_neg_model)

3.2.1 Comparing Actual Vs Predicted

moneyball_neg_predictions <- moneyball %>%
  mutate(predictions = predict(moneyball_neg_model, newdata = moneyball)) %>%
  dplyr::select(INDEX, TARGET_WINS, FIELDING_E, PITCHING_H, predictions) # predict target_wins based on field errors and pitching hits (negative theoretical effects)

3.2.2 Graph Target Win Prediction Correlation

ggplot(moneyball_neg_predictions, aes(x = FIELDING_E)) +
  geom_line(aes(y = TARGET_WINS), color = "darkred") +
  geom_line(aes(y = predictions), color = "darkblue") +
  ggtitle("Actual vs Predicted Target Wins") +
  xlab("Team Fielding Errors") +
  ylab("Target Wins")

3.3 Correlation Matrix

moneyball_cor <- cor(moneyball[,c(2:17)]) # input: every column besides index

ggcorrplot(moneyball_cor, 
           hc.order = TRUE, 
           type = "lower", 
           lab = TRUE, 
           lab_size = 2,  # Increased label size
           method = "square", 
           colors = c("tomato2", "white", "springgreen3"), 
           title = "Correlation Matrix", 
           ggtheme = ggplot2::theme_minimal() + 
                     theme(axis.text.x = element_text(angle = 90, hjust = 1, size = 1),
                           axis.text.y = element_text(size = 12),  # increase size of y-axis labels
                           plot.margin = unit(c(1,1,1,1), "cm"))  # increasing plot margins
          )

rm(moneyball_cor)

3.4 Box Plot of Team Batting Homeruns

ggplot(moneyball, mapping = aes(x =BATTING_HR)) + geom_histogram() # create histogram for frequency of team batting homeruns
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

4. Observations

  1. BATTING_HR and PITCHING_HR have a significant positive impact on TARGET_WINS, indicating that teams with higher home run statistics tend to wind more games.

    • This becomes clear when we see the moneyball_pos_predictions line get compared against the TARGET_WINS line in the actual vs prediction model. The prediction line is on a constant linear increase and the TARGET_WINS followed the graph, with averagely higher wins.
  2. On average, when we look at the correlation between two negative impacting variables (field errors and pitching hits), we see the predicted target wins is significantly lower than the actual target win.

    • When you compare moneyball_neg_predictions line to TARGET_WINS line, you get a incosistent prediction line that is downward sloping.

    • Overall, both lines final point is significantly lower then the first point. This negative relationship demonstrates the negative effect of field errors and pitching hits on the team’s overall wins.

  3. The correlation matrix reveals that BATTING_HR and PITCHING_HR has the highest positive correlation with TARGET_WINS, while BATTING_H and PITCHING_H and FIELDING_ERRORS has a significantly lower or negative correlation with TARGET_WINS. This demonstrates the constrast between the two.

  4. The residual plots for the linear regression of both the positive and negative correlation models indicate some heteroscedacity, which is apparent through the more outward variance on the y-axis, however, eye-balling isn’t enough, as when you use the BP test to test homoscedasticity, both p-values are greater than .05 which means there isn’t enough evidence to reject the null of the hypothesisi of homoscedasticity for both models.

# use Breusch-Pagan Test to determine if heteroscedasticity is present
bptest(moneyball_pos_model)
## 
##  studentized Breusch-Pagan test
## 
## data:  moneyball_pos_model
## BP = 0.93579, df = 2, p-value = 0.6263
bptest(moneyball_neg_model)
## 
##  studentized Breusch-Pagan test
## 
## data:  moneyball_neg_model
## BP = 5.0905, df = 2, p-value = 0.07845
  1. The distribution of BATTING_HR is right-skewed. This implies that teams batting homeruns has a tendency towards higher values, but not excessively.

    skewness(moneyball$BATTING_HR)
    ## [1] 0.2980673
    kurtosis(moneyball$BATTING_HR)
    ## [1] -0.7172373
    • A negative kurtosis for BATTING_HR means that outliers are extremely infrequent, which adds to the base of using this distrubution as a postive effect.

5. Additional Data + Cleanup

5.1 Regression Model Summary Statistics

stargazer(moneyball_pos_predictions, type="text")
## 
## ================================================
## Statistic    N    Mean    St. Dev.  Min    Max  
## ------------------------------------------------
## INDEX       191 1,383.592 765.240    41   2,534 
## TARGET_WINS 191  80.927    12.115    43    116  
## PITCHING_HR 191  178.178   32.392   116    260  
## BATTING_HR  191  178.052   32.413   116    260  
## predictions 191  80.927    5.118   71.092 93.851
## ------------------------------------------------
stargazer(moneyball_neg_predictions, type = "text")
## 
## ================================================
## Statistic    N    Mean    St. Dev.  Min    Max  
## ------------------------------------------------
## INDEX       191 1,383.592 765.240    41   2,534 
## TARGET_WINS 191  80.927    12.115    43    116  
## FIELDING_E  191  107.052   16.632    65    145  
## PITCHING_H  191 1,479.702  75.789  1,312  1,667 
## predictions 191  80.927    6.625   63.761 98.413
## ------------------------------------------------

6. Cleanup

rm(moneyball_neg_model, moneyball_pos_model)

7. Kitchen Sink Model

names(moneyball)[3:17]
##  [1] "BATTING_H"   "BATTING_2B"  "BATTING_3B"  "BATTING_HR"  "BATTING_BB" 
##  [6] "BATTING_SO"  "BASERUN_SB"  "BASERUN_CS"  "BATTING_HBP" "PITCHING_H" 
## [11] "PITCHING_HR" "PITCHING_BB" "PITCHING_SO" "FIELDING_E"  "FIELDING_DP"
kitchen_sink_model <- lm(TARGET_WINS ~ BATTING_H + BATTING_2B + BATTING_3B +  BATTING_HR + BATTING_BB +  BATTING_SO +  BASERUN_SB + BASERUN_CS + BATTING_HBP +  PITCHING_H + PITCHING_HR + PITCHING_BB + PITCHING_SO + FIELDING_E + FIELDING_DP, data = moneyball)

plot(kitchen_sink_model)

best_kitchen_sink_model <- stepAIC(object = kitchen_sink_model, direction = "backward")
## Start:  AIC=831.31
## TARGET_WINS ~ BATTING_H + BATTING_2B + BATTING_3B + BATTING_HR + 
##     BATTING_BB + BATTING_SO + BASERUN_SB + BASERUN_CS + BATTING_HBP + 
##     PITCHING_H + PITCHING_HR + PITCHING_BB + PITCHING_SO + FIELDING_E + 
##     FIELDING_DP
## 
##               Df Sum of Sq   RSS    AIC
## - BATTING_SO   1      1.24 12547 829.33
## - PITCHING_SO  1      1.48 12547 829.33
## - BASERUN_CS   1      1.71 12548 829.34
## - BATTING_HR   1     15.23 12561 829.54
## - PITCHING_HR  1     15.79 12562 829.55
## - PITCHING_H   1     33.63 12580 829.82
## - BATTING_H    1     34.42 12580 829.83
## - BATTING_2B   1     54.41 12600 830.14
## - BASERUN_SB   1     95.22 12641 830.76
## - BATTING_BB   1    107.84 12654 830.95
## - PITCHING_BB  1    110.48 12656 830.99
## - BATTING_3B   1    122.16 12668 831.16
## <none>                     12546 831.31
## - BATTING_HBP  1    198.21 12744 832.31
## - FIELDING_DP  1    628.49 13174 838.65
## - FIELDING_E   1   1237.79 13784 847.28
## 
## Step:  AIC=829.33
## TARGET_WINS ~ BATTING_H + BATTING_2B + BATTING_3B + BATTING_HR + 
##     BATTING_BB + BASERUN_SB + BASERUN_CS + BATTING_HBP + PITCHING_H + 
##     PITCHING_HR + PITCHING_BB + PITCHING_SO + FIELDING_E + FIELDING_DP
## 
##               Df Sum of Sq   RSS    AIC
## - BASERUN_CS   1      1.59 12549 827.35
## - BATTING_HR   1     15.82 12563 827.57
## - PITCHING_HR  1     16.39 12564 827.58
## - BATTING_2B   1     53.47 12601 828.14
## - PITCHING_H   1     88.45 12636 828.67
## - BATTING_H    1     90.30 12637 828.70
## - BASERUN_SB   1     94.19 12641 828.76
## - BATTING_BB   1    107.95 12655 828.97
## - PITCHING_BB  1    110.60 12658 829.01
## - BATTING_3B   1    122.20 12669 829.18
## <none>                     12547 829.33
## - BATTING_HBP  1    197.11 12744 830.31
## - FIELDING_DP  1    630.68 13178 836.70
## - FIELDING_E   1   1240.80 13788 845.34
## - PITCHING_SO  1   1312.89 13860 846.34
## 
## Step:  AIC=827.35
## TARGET_WINS ~ BATTING_H + BATTING_2B + BATTING_3B + BATTING_HR + 
##     BATTING_BB + BASERUN_SB + BATTING_HBP + PITCHING_H + PITCHING_HR + 
##     PITCHING_BB + PITCHING_SO + FIELDING_E + FIELDING_DP
## 
##               Df Sum of Sq   RSS    AIC
## - BATTING_HR   1     16.06 12565 825.60
## - PITCHING_HR  1     16.64 12565 825.61
## - BATTING_2B   1     53.05 12602 826.16
## - PITCHING_H   1     90.24 12639 826.72
## - BATTING_H    1     92.13 12641 826.75
## - BATTING_BB   1    110.31 12659 827.03
## - PITCHING_BB  1    113.00 12662 827.07
## - BASERUN_SB   1    123.42 12672 827.22
## - BATTING_3B   1    129.33 12678 827.31
## <none>                     12549 827.35
## - BATTING_HBP  1    197.23 12746 828.33
## - FIELDING_DP  1    635.62 13184 834.79
## - PITCHING_SO  1   1311.88 13861 844.35
## - FIELDING_E   1   1322.05 13871 844.49
## 
## Step:  AIC=825.6
## TARGET_WINS ~ BATTING_H + BATTING_2B + BATTING_3B + BATTING_BB + 
##     BASERUN_SB + BATTING_HBP + PITCHING_H + PITCHING_HR + PITCHING_BB + 
##     PITCHING_SO + FIELDING_E + FIELDING_DP
## 
##               Df Sum of Sq   RSS    AIC
## - BATTING_2B   1     55.48 12620 824.44
## - PITCHING_H   1     89.26 12654 824.95
## - BATTING_H    1     91.97 12657 824.99
## - BATTING_BB   1    104.58 12669 825.18
## - PITCHING_BB  1    107.19 12672 825.22
## <none>                     12565 825.60
## - BATTING_3B   1    137.48 12702 825.68
## - BASERUN_SB   1    146.90 12712 825.82
## - BATTING_HBP  1    200.36 12765 826.62
## - FIELDING_DP  1    628.95 13194 832.93
## - PITCHING_HR  1    853.54 13418 836.15
## - PITCHING_SO  1   1316.68 13882 842.63
## - FIELDING_E   1   1333.15 13898 842.86
## 
## Step:  AIC=824.44
## TARGET_WINS ~ BATTING_H + BATTING_3B + BATTING_BB + BASERUN_SB + 
##     BATTING_HBP + PITCHING_H + PITCHING_HR + PITCHING_BB + PITCHING_SO + 
##     FIELDING_E + FIELDING_DP
## 
##               Df Sum of Sq   RSS    AIC
## - PITCHING_H   1     84.47 12705 823.71
## - BATTING_H    1     87.79 12708 823.76
## - BATTING_BB   1     98.92 12719 823.93
## - PITCHING_BB  1    101.48 12722 823.97
## - BASERUN_SB   1    109.27 12730 824.09
## <none>                     12620 824.44
## - BATTING_3B   1    147.01 12767 824.65
## - BATTING_HBP  1    204.39 12825 825.51
## - FIELDING_DP  1    649.12 13269 832.02
## - PITCHING_HR  1    812.92 13433 834.36
## - PITCHING_SO  1   1262.90 13883 840.66
## - FIELDING_E   1   1379.34 14000 842.25
## 
## Step:  AIC=823.71
## TARGET_WINS ~ BATTING_H + BATTING_3B + BATTING_BB + BASERUN_SB + 
##     BATTING_HBP + PITCHING_HR + PITCHING_BB + PITCHING_SO + FIELDING_E + 
##     FIELDING_DP
## 
##               Df Sum of Sq   RSS    AIC
## - BATTING_BB   1     32.85 12738 822.21
## - PITCHING_BB  1     43.42 12748 822.37
## - BASERUN_SB   1    105.16 12810 823.29
## <none>                     12705 823.71
## - BATTING_3B   1    153.13 12858 824.00
## - BATTING_HBP  1    183.82 12888 824.46
## - BATTING_H    1    504.11 13209 829.15
## - FIELDING_DP  1    602.80 13308 830.57
## - PITCHING_HR  1    850.25 13555 834.09
## - PITCHING_SO  1   1259.72 13964 839.77
## - FIELDING_E   1   1419.39 14124 841.94
## 
## Step:  AIC=822.21
## TARGET_WINS ~ BATTING_H + BATTING_3B + BASERUN_SB + BATTING_HBP + 
##     PITCHING_HR + PITCHING_BB + PITCHING_SO + FIELDING_E + FIELDING_DP
## 
##               Df Sum of Sq   RSS    AIC
## - BASERUN_SB   1    109.99 12848 821.85
## <none>                     12738 822.21
## - BATTING_3B   1    156.45 12894 822.54
## - BATTING_HBP  1    186.58 12924 822.98
## - BATTING_H    1    485.67 13223 827.35
## - FIELDING_DP  1    623.19 13361 829.33
## - PITCHING_HR  1    843.83 13581 832.46
## - PITCHING_SO  1   1267.25 14005 838.32
## - FIELDING_E   1   1395.02 14133 840.06
## - PITCHING_BB  1   2364.81 15102 852.73
## 
## Step:  AIC=821.85
## TARGET_WINS ~ BATTING_H + BATTING_3B + BATTING_HBP + PITCHING_HR + 
##     PITCHING_BB + PITCHING_SO + FIELDING_E + FIELDING_DP
## 
##               Df Sum of Sq   RSS    AIC
## - BATTING_3B   1    133.47 12981 821.82
## <none>                     12848 821.85
## - BATTING_HBP  1    177.11 13025 822.46
## - BATTING_H    1    566.11 13414 828.09
## - FIELDING_DP  1    737.46 13585 830.51
## - PITCHING_HR  1    756.49 13604 830.78
## - PITCHING_SO  1   1257.91 14106 837.69
## - FIELDING_E   1   1330.40 14178 838.67
## - PITCHING_BB  1   2371.12 15219 852.20
## 
## Step:  AIC=821.82
## TARGET_WINS ~ BATTING_H + BATTING_HBP + PITCHING_HR + PITCHING_BB + 
##     PITCHING_SO + FIELDING_E + FIELDING_DP
## 
##               Df Sum of Sq   RSS    AIC
## <none>                     12981 821.82
## - BATTING_HBP  1    228.70 13210 823.16
## - BATTING_H    1    449.87 13431 826.33
## - FIELDING_DP  1    813.17 13794 831.43
## - PITCHING_HR  1    990.20 13971 833.86
## - PITCHING_SO  1   1316.56 14298 838.27
## - FIELDING_E   1   1334.60 14316 838.52
## - PITCHING_BB  1   2583.00 15564 854.49