1 Setup

# Set working directory and path to data
cd <- "/Users/arvindsharma/Dropbox/WCAS/Econometrics/"
  setwd(cd)

# Clear the workspace
  rm(list = ls()) # Clear environment
  gc()            # Clear unused memory
##           used (Mb) gc trigger (Mb) limit (Mb) max used (Mb)
## Ncells  625681 33.5    1429844 76.4         NA   715631 38.3
## Vcells 1181919  9.1    8388608 64.0      49152  2010534 15.4
  cat("\f")       # Clear the console
  dev.off         # Clear the charts
## function (which = dev.cur()) 
## {
##     if (which == 1) 
##         stop("cannot shut down device 1 (the null device)")
##     .External(C_devoff, as.integer(which))
##     dev.cur()
## }
## <bytecode: 0x131e50d90>
## <environment: namespace:grDevices>
# Prepare needed libraries
packages <- c("stargazer","psych","tidyverse", "MASS")

  for (i in 1:length(packages)) {
    if (!packages[i] %in% rownames(installed.packages())) {
      install.packages(packages[i]
                       , repos = "http://cran.rstudio.com/"
                       , dependencies = TRUE
                       )
    }
    library(packages[i], character.only = TRUE)
  }
## 
## Please cite as:
##  Hlavac, Marek (2022). stargazer: Well-Formatted Regression and Summary Statistics Tables.
##  R package version 5.2.3. https://CRAN.R-project.org/package=stargazer
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.2     ✔ tibble    3.3.0
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.1.0     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ ggplot2::%+%()   masks psych::%+%()
## ✖ ggplot2::alpha() masks psych::alpha()
## ✖ dplyr::filter()  masks stats::filter()
## ✖ dplyr::lag()     masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## 
## Attaching package: 'MASS'
## 
## 
## The following object is masked from 'package:dplyr':
## 
##     select
rm(packages)

df_train    <- read.csv("moneyball-training-data.csv")
# df_eval     <- read.csv("moneyball-evaluation-data.csv")
df_train$INDEX <- NULL

glimpse(df_train)
## Rows: 2,276
## Columns: 16
## $ TARGET_WINS      <int> 39, 70, 86, 70, 82, 75, 80, 85, 86, 76, 78, 68, 72, 7…
## $ TEAM_BATTING_H   <int> 1445, 1339, 1377, 1387, 1297, 1279, 1244, 1273, 1391,…
## $ TEAM_BATTING_2B  <int> 194, 219, 232, 209, 186, 200, 179, 171, 197, 213, 179…
## $ TEAM_BATTING_3B  <int> 39, 22, 35, 38, 27, 36, 54, 37, 40, 18, 27, 31, 41, 2…
## $ TEAM_BATTING_HR  <int> 13, 190, 137, 96, 102, 92, 122, 115, 114, 96, 82, 95,…
## $ TEAM_BATTING_BB  <int> 143, 685, 602, 451, 472, 443, 525, 456, 447, 441, 374…
## $ TEAM_BATTING_SO  <int> 842, 1075, 917, 922, 920, 973, 1062, 1027, 922, 827, …
## $ TEAM_BASERUN_SB  <int> NA, 37, 46, 43, 49, 107, 80, 40, 69, 72, 60, 119, 221…
## $ TEAM_BASERUN_CS  <int> NA, 28, 27, 30, 39, 59, 54, 36, 27, 34, 39, 79, 109, …
## $ TEAM_BATTING_HBP <int> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ TEAM_PITCHING_H  <int> 9364, 1347, 1377, 1396, 1297, 1279, 1244, 1281, 1391,…
## $ TEAM_PITCHING_HR <int> 84, 191, 137, 97, 102, 92, 122, 116, 114, 96, 86, 95,…
## $ TEAM_PITCHING_BB <int> 927, 689, 602, 454, 472, 443, 525, 459, 447, 441, 391…
## $ TEAM_PITCHING_SO <int> 5456, 1082, 917, 928, 920, 973, 1062, 1033, 922, 827,…
## $ TEAM_FIELDING_E  <int> 1011, 193, 175, 164, 138, 123, 136, 112, 127, 131, 11…
## $ TEAM_FIELDING_DP <int> NA, 155, 153, 156, 168, 149, 186, 136, 169, 159, 141,…

2 Model

2.1 Backward Elimination

# MODEL 3:  Step AIC...Backward selection... drop variable systematically...

library(MASS)
?stepAIC

model3_backward <- stepAIC(object = lm(data = df_train, TARGET_WINS ~ .), 
                  direction = c("backward")
                  )
## Start:  AIC=831.31
## TARGET_WINS ~ TEAM_BATTING_H + TEAM_BATTING_2B + TEAM_BATTING_3B + 
##     TEAM_BATTING_HR + TEAM_BATTING_BB + TEAM_BATTING_SO + TEAM_BASERUN_SB + 
##     TEAM_BASERUN_CS + TEAM_BATTING_HBP + TEAM_PITCHING_H + TEAM_PITCHING_HR + 
##     TEAM_PITCHING_BB + TEAM_PITCHING_SO + TEAM_FIELDING_E + TEAM_FIELDING_DP
## 
##                    Df Sum of Sq   RSS    AIC
## - TEAM_BATTING_SO   1      1.24 12547 829.33
## - TEAM_PITCHING_SO  1      1.48 12547 829.33
## - TEAM_BASERUN_CS   1      1.71 12548 829.34
## - TEAM_BATTING_HR   1     15.23 12561 829.54
## - TEAM_PITCHING_HR  1     15.79 12562 829.55
## - TEAM_PITCHING_H   1     33.63 12580 829.82
## - TEAM_BATTING_H    1     34.42 12580 829.83
## - TEAM_BATTING_2B   1     54.41 12600 830.14
## - TEAM_BASERUN_SB   1     95.22 12641 830.76
## - TEAM_BATTING_BB   1    107.84 12654 830.95
## - TEAM_PITCHING_BB  1    110.48 12656 830.99
## - TEAM_BATTING_3B   1    122.16 12668 831.16
## <none>                          12546 831.31
## - TEAM_BATTING_HBP  1    198.21 12744 832.31
## - TEAM_FIELDING_DP  1    628.49 13174 838.65
## - TEAM_FIELDING_E   1   1237.79 13784 847.28
## 
## Step:  AIC=829.33
## TARGET_WINS ~ TEAM_BATTING_H + TEAM_BATTING_2B + TEAM_BATTING_3B + 
##     TEAM_BATTING_HR + TEAM_BATTING_BB + TEAM_BASERUN_SB + TEAM_BASERUN_CS + 
##     TEAM_BATTING_HBP + TEAM_PITCHING_H + TEAM_PITCHING_HR + TEAM_PITCHING_BB + 
##     TEAM_PITCHING_SO + TEAM_FIELDING_E + TEAM_FIELDING_DP
## 
##                    Df Sum of Sq   RSS    AIC
## - TEAM_BASERUN_CS   1      1.59 12549 827.35
## - TEAM_BATTING_HR   1     15.82 12563 827.57
## - TEAM_PITCHING_HR  1     16.39 12564 827.58
## - TEAM_BATTING_2B   1     53.47 12601 828.14
## - TEAM_PITCHING_H   1     88.45 12636 828.67
## - TEAM_BATTING_H    1     90.30 12637 828.70
## - TEAM_BASERUN_SB   1     94.19 12641 828.76
## - TEAM_BATTING_BB   1    107.95 12655 828.97
## - TEAM_PITCHING_BB  1    110.60 12658 829.01
## - TEAM_BATTING_3B   1    122.20 12669 829.18
## <none>                          12547 829.33
## - TEAM_BATTING_HBP  1    197.11 12744 830.31
## - TEAM_FIELDING_DP  1    630.68 13178 836.70
## - TEAM_FIELDING_E   1   1240.80 13788 845.34
## - TEAM_PITCHING_SO  1   1312.89 13860 846.34
## 
## Step:  AIC=827.35
## TARGET_WINS ~ TEAM_BATTING_H + TEAM_BATTING_2B + TEAM_BATTING_3B + 
##     TEAM_BATTING_HR + TEAM_BATTING_BB + TEAM_BASERUN_SB + TEAM_BATTING_HBP + 
##     TEAM_PITCHING_H + TEAM_PITCHING_HR + TEAM_PITCHING_BB + TEAM_PITCHING_SO + 
##     TEAM_FIELDING_E + TEAM_FIELDING_DP
## 
##                    Df Sum of Sq   RSS    AIC
## - TEAM_BATTING_HR   1     16.06 12565 825.60
## - TEAM_PITCHING_HR  1     16.64 12565 825.61
## - TEAM_BATTING_2B   1     53.05 12602 826.16
## - TEAM_PITCHING_H   1     90.24 12639 826.72
## - TEAM_BATTING_H    1     92.13 12641 826.75
## - TEAM_BATTING_BB   1    110.31 12659 827.03
## - TEAM_PITCHING_BB  1    113.00 12662 827.07
## - TEAM_BASERUN_SB   1    123.42 12672 827.22
## - TEAM_BATTING_3B   1    129.33 12678 827.31
## <none>                          12549 827.35
## - TEAM_BATTING_HBP  1    197.23 12746 828.33
## - TEAM_FIELDING_DP  1    635.62 13184 834.79
## - TEAM_PITCHING_SO  1   1311.88 13861 844.35
## - TEAM_FIELDING_E   1   1322.05 13871 844.49
## 
## Step:  AIC=825.6
## TARGET_WINS ~ TEAM_BATTING_H + TEAM_BATTING_2B + TEAM_BATTING_3B + 
##     TEAM_BATTING_BB + TEAM_BASERUN_SB + TEAM_BATTING_HBP + TEAM_PITCHING_H + 
##     TEAM_PITCHING_HR + TEAM_PITCHING_BB + TEAM_PITCHING_SO + 
##     TEAM_FIELDING_E + TEAM_FIELDING_DP
## 
##                    Df Sum of Sq   RSS    AIC
## - TEAM_BATTING_2B   1     55.48 12620 824.44
## - TEAM_PITCHING_H   1     89.26 12654 824.95
## - TEAM_BATTING_H    1     91.97 12657 824.99
## - TEAM_BATTING_BB   1    104.58 12669 825.18
## - TEAM_PITCHING_BB  1    107.19 12672 825.22
## <none>                          12565 825.60
## - TEAM_BATTING_3B   1    137.48 12702 825.68
## - TEAM_BASERUN_SB   1    146.90 12712 825.82
## - TEAM_BATTING_HBP  1    200.36 12765 826.62
## - TEAM_FIELDING_DP  1    628.95 13194 832.93
## - TEAM_PITCHING_HR  1    853.54 13418 836.15
## - TEAM_PITCHING_SO  1   1316.68 13882 842.63
## - TEAM_FIELDING_E   1   1333.15 13898 842.86
## 
## Step:  AIC=824.44
## TARGET_WINS ~ TEAM_BATTING_H + TEAM_BATTING_3B + TEAM_BATTING_BB + 
##     TEAM_BASERUN_SB + TEAM_BATTING_HBP + TEAM_PITCHING_H + TEAM_PITCHING_HR + 
##     TEAM_PITCHING_BB + TEAM_PITCHING_SO + TEAM_FIELDING_E + TEAM_FIELDING_DP
## 
##                    Df Sum of Sq   RSS    AIC
## - TEAM_PITCHING_H   1     84.47 12705 823.71
## - TEAM_BATTING_H    1     87.79 12708 823.76
## - TEAM_BATTING_BB   1     98.92 12719 823.93
## - TEAM_PITCHING_BB  1    101.48 12722 823.97
## - TEAM_BASERUN_SB   1    109.27 12730 824.09
## <none>                          12620 824.44
## - TEAM_BATTING_3B   1    147.01 12767 824.65
## - TEAM_BATTING_HBP  1    204.39 12825 825.51
## - TEAM_FIELDING_DP  1    649.12 13269 832.02
## - TEAM_PITCHING_HR  1    812.92 13433 834.36
## - TEAM_PITCHING_SO  1   1262.90 13883 840.66
## - TEAM_FIELDING_E   1   1379.34 14000 842.25
## 
## Step:  AIC=823.71
## TARGET_WINS ~ TEAM_BATTING_H + TEAM_BATTING_3B + TEAM_BATTING_BB + 
##     TEAM_BASERUN_SB + TEAM_BATTING_HBP + TEAM_PITCHING_HR + TEAM_PITCHING_BB + 
##     TEAM_PITCHING_SO + TEAM_FIELDING_E + TEAM_FIELDING_DP
## 
##                    Df Sum of Sq   RSS    AIC
## - TEAM_BATTING_BB   1     32.85 12738 822.21
## - TEAM_PITCHING_BB  1     43.42 12748 822.37
## - TEAM_BASERUN_SB   1    105.16 12810 823.29
## <none>                          12705 823.71
## - TEAM_BATTING_3B   1    153.13 12858 824.00
## - TEAM_BATTING_HBP  1    183.82 12888 824.46
## - TEAM_BATTING_H    1    504.11 13209 829.15
## - TEAM_FIELDING_DP  1    602.80 13308 830.57
## - TEAM_PITCHING_HR  1    850.25 13555 834.09
## - TEAM_PITCHING_SO  1   1259.72 13964 839.77
## - TEAM_FIELDING_E   1   1419.39 14124 841.94
## 
## Step:  AIC=822.21
## TARGET_WINS ~ TEAM_BATTING_H + TEAM_BATTING_3B + TEAM_BASERUN_SB + 
##     TEAM_BATTING_HBP + TEAM_PITCHING_HR + TEAM_PITCHING_BB + 
##     TEAM_PITCHING_SO + TEAM_FIELDING_E + TEAM_FIELDING_DP
## 
##                    Df Sum of Sq   RSS    AIC
## - TEAM_BASERUN_SB   1    109.99 12848 821.85
## <none>                          12738 822.21
## - TEAM_BATTING_3B   1    156.45 12894 822.54
## - TEAM_BATTING_HBP  1    186.58 12924 822.98
## - TEAM_BATTING_H    1    485.67 13223 827.35
## - TEAM_FIELDING_DP  1    623.19 13361 829.33
## - TEAM_PITCHING_HR  1    843.83 13581 832.46
## - TEAM_PITCHING_SO  1   1267.25 14005 838.32
## - TEAM_FIELDING_E   1   1395.02 14133 840.06
## - TEAM_PITCHING_BB  1   2364.81 15102 852.73
## 
## Step:  AIC=821.85
## TARGET_WINS ~ TEAM_BATTING_H + TEAM_BATTING_3B + TEAM_BATTING_HBP + 
##     TEAM_PITCHING_HR + TEAM_PITCHING_BB + TEAM_PITCHING_SO + 
##     TEAM_FIELDING_E + TEAM_FIELDING_DP
## 
##                    Df Sum of Sq   RSS    AIC
## - TEAM_BATTING_3B   1    133.47 12981 821.82
## <none>                          12848 821.85
## - TEAM_BATTING_HBP  1    177.11 13025 822.46
## - TEAM_BATTING_H    1    566.11 13414 828.09
## - TEAM_FIELDING_DP  1    737.46 13585 830.51
## - TEAM_PITCHING_HR  1    756.49 13604 830.78
## - TEAM_PITCHING_SO  1   1257.91 14106 837.69
## - TEAM_FIELDING_E   1   1330.40 14178 838.67
## - TEAM_PITCHING_BB  1   2371.12 15219 852.20
## 
## Step:  AIC=821.82
## TARGET_WINS ~ TEAM_BATTING_H + TEAM_BATTING_HBP + TEAM_PITCHING_HR + 
##     TEAM_PITCHING_BB + TEAM_PITCHING_SO + TEAM_FIELDING_E + TEAM_FIELDING_DP
## 
##                    Df Sum of Sq   RSS    AIC
## <none>                          12981 821.82
## - TEAM_BATTING_HBP  1    228.70 13210 823.16
## - TEAM_BATTING_H    1    449.87 13431 826.33
## - TEAM_FIELDING_DP  1    813.17 13794 831.43
## - TEAM_PITCHING_HR  1    990.20 13971 833.86
## - TEAM_PITCHING_SO  1   1316.56 14298 838.27
## - TEAM_FIELDING_E   1   1334.60 14316 838.52
## - TEAM_PITCHING_BB  1   2583.00 15564 854.49

2.2 Forward Selection

# Drop all rows with NA values across all predictors and target
df_train_clean <- df_train[complete.cases(df_train), ]

# Define models
null <- lm(TARGET_WINS ~ 1, data = df_train_clean)
full <- lm(TARGET_WINS ~ ., data = df_train_clean)

# Forward selection
model3_forward <- stepAIC(
  object = null,
  scope = formula(full),
  direction = "forward",
  trace = TRUE
)
## Start:  AIC=953.88
## TARGET_WINS ~ 1
## 
##                    Df Sum of Sq   RSS    AIC
## + TEAM_PITCHING_H   1    6192.6 21694 907.91
## + TEAM_BATTING_H    1    6158.8 21728 908.21
## + TEAM_BATTING_BB   1    6125.9 21761 908.50
## + TEAM_PITCHING_BB  1    6118.3 21769 908.57
## + TEAM_PITCHING_HR  1    4977.2 22910 918.33
## + TEAM_BATTING_HR   1    4976.0 22911 918.34
## + TEAM_FIELDING_E   1    4169.9 23717 924.94
## + TEAM_BATTING_2B   1    2731.8 25155 936.18
## + TEAM_PITCHING_SO  1    1467.1 26420 945.55
## + TEAM_BATTING_SO   1    1461.1 26426 945.60
## + TEAM_FIELDING_DP  1    1069.8 26817 948.40
## + TEAM_BASERUN_CS   1     891.1 26996 949.67
## + TEAM_BATTING_3B   1     431.2 27456 952.90
## <none>                          27887 953.88
## + TEAM_BATTING_HBP  1     150.7 27736 954.84
## + TEAM_BASERUN_SB   1       6.1 27881 955.83
## 
## Step:  AIC=907.91
## TARGET_WINS ~ TEAM_PITCHING_H
## 
##                    Df Sum of Sq   RSS    AIC
## + TEAM_PITCHING_BB  1    4092.0 17602 869.99
## + TEAM_BATTING_BB   1    4085.8 17608 870.06
## + TEAM_FIELDING_E   1    2145.9 19548 890.02
## + TEAM_PITCHING_HR  1    1847.5 19847 892.91
## + TEAM_BATTING_HR   1    1841.9 19852 892.97
## + TEAM_BATTING_3B   1    1471.8 20223 896.50
## + TEAM_FIELDING_DP  1    1144.2 20550 899.56
## + TEAM_BASERUN_CS   1     512.2 21182 905.35
## <none>                          21694 907.91
## + TEAM_BATTING_HBP  1     209.1 21485 908.06
## + TEAM_BATTING_SO   1     145.9 21548 908.62
## + TEAM_PITCHING_SO  1     144.4 21550 908.64
## + TEAM_BATTING_2B   1      97.1 21597 909.06
## + TEAM_BATTING_H    1      14.2 21680 909.79
## + TEAM_BASERUN_SB   1      11.2 21683 909.81
## 
## Step:  AIC=869.99
## TARGET_WINS ~ TEAM_PITCHING_H + TEAM_PITCHING_BB
## 
##                    Df Sum of Sq   RSS    AIC
## + TEAM_FIELDING_E   1   1979.12 15623 849.21
## + TEAM_BATTING_SO   1   1129.82 16472 859.32
## + TEAM_PITCHING_SO  1   1128.21 16474 859.34
## + TEAM_FIELDING_DP  1    812.28 16790 862.97
## + TEAM_BATTING_3B   1    507.81 17095 866.40
## + TEAM_PITCHING_HR  1    316.07 17286 868.53
## + TEAM_BATTING_HR   1    314.48 17288 868.55
## <none>                          17602 869.99
## + TEAM_BATTING_HBP  1    120.92 17481 870.67
## + TEAM_BASERUN_CS   1    107.54 17495 870.82
## + TEAM_BASERUN_SB   1     11.15 17591 871.87
## + TEAM_BATTING_2B   1      9.85 17592 871.88
## + TEAM_BATTING_BB   1      6.71 17596 871.92
## + TEAM_BATTING_H    1      1.44 17601 871.98
## 
## Step:  AIC=849.21
## TARGET_WINS ~ TEAM_PITCHING_H + TEAM_PITCHING_BB + TEAM_FIELDING_E
## 
##                    Df Sum of Sq   RSS    AIC
## + TEAM_FIELDING_DP  1    708.40 14915 842.35
## + TEAM_PITCHING_HR  1    628.58 14995 843.37
## + TEAM_BATTING_HR   1    623.28 15000 843.43
## + TEAM_BATTING_3B   1    550.55 15073 844.36
## + TEAM_BATTING_SO   1    508.85 15114 844.89
## + TEAM_PITCHING_SO  1    504.24 15119 844.94
## <none>                          15623 849.21
## + TEAM_BATTING_HBP  1    160.80 15462 849.23
## + TEAM_BATTING_BB   1     38.38 15585 850.74
## + TEAM_BASERUN_SB   1     36.78 15586 850.76
## + TEAM_BATTING_H    1     25.38 15598 850.90
## + TEAM_BASERUN_CS   1      3.93 15619 851.16
## + TEAM_BATTING_2B   1      0.08 15623 851.21
## 
## Step:  AIC=842.35
## TARGET_WINS ~ TEAM_PITCHING_H + TEAM_PITCHING_BB + TEAM_FIELDING_E + 
##     TEAM_FIELDING_DP
## 
##                    Df Sum of Sq   RSS    AIC
## + TEAM_BATTING_SO   1    674.53 14240 835.51
## + TEAM_PITCHING_SO  1    672.63 14242 835.53
## + TEAM_PITCHING_HR  1    564.45 14350 836.98
## + TEAM_BATTING_HR   1    561.02 14354 837.02
## + TEAM_BATTING_3B   1    420.31 14494 838.89
## <none>                          14915 842.35
## + TEAM_BATTING_HBP  1    118.56 14796 842.82
## + TEAM_BATTING_BB   1     18.14 14897 844.11
## + TEAM_BASERUN_CS   1      7.92 14907 844.25
## + TEAM_BATTING_H    1      6.90 14908 844.26
## + TEAM_BASERUN_SB   1      4.99 14910 844.28
## + TEAM_BATTING_2B   1      0.23 14915 844.34
## 
## Step:  AIC=835.51
## TARGET_WINS ~ TEAM_PITCHING_H + TEAM_PITCHING_BB + TEAM_FIELDING_E + 
##     TEAM_FIELDING_DP + TEAM_BATTING_SO
## 
##                    Df Sum of Sq   RSS    AIC
## + TEAM_PITCHING_HR  1   1047.77 13193 822.91
## + TEAM_BATTING_HR   1   1043.39 13197 822.97
## + TEAM_BATTING_3B   1    462.05 13778 831.21
## + TEAM_BATTING_HBP  1    280.52 13960 833.71
## <none>                          14240 835.51
## + TEAM_BASERUN_CS   1     28.38 14212 837.13
## + TEAM_BATTING_2B   1     13.77 14227 837.32
## + TEAM_BATTING_BB   1     11.87 14228 837.35
## + TEAM_BATTING_H    1      3.18 14237 837.47
## + TEAM_PITCHING_SO  1      1.32 14239 837.49
## + TEAM_BASERUN_SB   1      0.53 14240 837.50
## 
## Step:  AIC=822.91
## TARGET_WINS ~ TEAM_PITCHING_H + TEAM_PITCHING_BB + TEAM_FIELDING_E + 
##     TEAM_FIELDING_DP + TEAM_BATTING_SO + TEAM_PITCHING_HR
## 
##                    Df Sum of Sq   RSS    AIC
## + TEAM_BATTING_HBP  1   227.046 12966 821.59
## + TEAM_BATTING_3B   1   185.328 13007 822.21
## <none>                          13193 822.91
## + TEAM_BASERUN_SB   1    71.924 13121 823.87
## + TEAM_BATTING_2B   1    26.924 13166 824.52
## + TEAM_BATTING_BB   1    22.120 13170 824.59
## + TEAM_BATTING_HR   1    19.591 13173 824.63
## + TEAM_BATTING_H    1    11.576 13181 824.74
## + TEAM_PITCHING_SO  1    11.572 13181 824.74
## + TEAM_BASERUN_CS   1     3.807 13189 824.86
## 
## Step:  AIC=821.59
## TARGET_WINS ~ TEAM_PITCHING_H + TEAM_PITCHING_BB + TEAM_FIELDING_E + 
##     TEAM_FIELDING_DP + TEAM_BATTING_SO + TEAM_PITCHING_HR + TEAM_BATTING_HBP
## 
##                    Df Sum of Sq   RSS    AIC
## <none>                          12966 821.59
## + TEAM_BATTING_3B   1   133.718 12832 821.61
## + TEAM_BASERUN_SB   1    85.053 12880 822.34
## + TEAM_BATTING_2B   1    21.039 12944 823.28
## + TEAM_BATTING_BB   1    19.019 12946 823.31
## + TEAM_BATTING_HR   1    14.639 12951 823.38
## + TEAM_BATTING_H    1     8.289 12957 823.47
## + TEAM_PITCHING_SO  1     7.817 12958 823.48
## + TEAM_BASERUN_CS   1     6.989 12958 823.49
# View final model
summary(model3_forward)
## 
## Call:
## lm(formula = TARGET_WINS ~ TEAM_PITCHING_H + TEAM_PITCHING_BB + 
##     TEAM_FIELDING_E + TEAM_FIELDING_DP + TEAM_BATTING_SO + TEAM_PITCHING_HR + 
##     TEAM_BATTING_HBP, data = df_train_clean)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -20.1398  -5.7083  -0.0577   5.0988  21.3042 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      60.402074  19.132094   3.157 0.001864 ** 
## TEAM_PITCHING_H   0.025768   0.010114   2.548 0.011664 *  
## TEAM_PITCHING_BB  0.056595   0.009394   6.025 9.10e-09 ***
## TEAM_FIELDING_E  -0.172831   0.039620  -4.362 2.15e-05 ***
## TEAM_FIELDING_DP -0.118319   0.035129  -3.368 0.000923 ***
## TEAM_BATTING_SO  -0.031364   0.007280  -4.308 2.68e-05 ***
## TEAM_PITCHING_HR  0.089586   0.023914   3.746 0.000240 ***
## TEAM_BATTING_HBP  0.086790   0.048482   1.790 0.075084 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 8.417 on 183 degrees of freedom
## Multiple R-squared:  0.5351, Adjusted R-squared:  0.5173 
## F-statistic: 30.09 on 7 and 183 DF,  p-value: < 2.2e-16

2.3 Both forward and backward

model3_both <- stepAIC(object = lm(data = df_train, TARGET_WINS ~ .), 
                  direction = c("both")
                  )
## Start:  AIC=831.31
## TARGET_WINS ~ TEAM_BATTING_H + TEAM_BATTING_2B + TEAM_BATTING_3B + 
##     TEAM_BATTING_HR + TEAM_BATTING_BB + TEAM_BATTING_SO + TEAM_BASERUN_SB + 
##     TEAM_BASERUN_CS + TEAM_BATTING_HBP + TEAM_PITCHING_H + TEAM_PITCHING_HR + 
##     TEAM_PITCHING_BB + TEAM_PITCHING_SO + TEAM_FIELDING_E + TEAM_FIELDING_DP
## 
##                    Df Sum of Sq   RSS    AIC
## - TEAM_BATTING_SO   1      1.24 12547 829.33
## - TEAM_PITCHING_SO  1      1.48 12547 829.33
## - TEAM_BASERUN_CS   1      1.71 12548 829.34
## - TEAM_BATTING_HR   1     15.23 12561 829.54
## - TEAM_PITCHING_HR  1     15.79 12562 829.55
## - TEAM_PITCHING_H   1     33.63 12580 829.82
## - TEAM_BATTING_H    1     34.42 12580 829.83
## - TEAM_BATTING_2B   1     54.41 12600 830.14
## - TEAM_BASERUN_SB   1     95.22 12641 830.76
## - TEAM_BATTING_BB   1    107.84 12654 830.95
## - TEAM_PITCHING_BB  1    110.48 12656 830.99
## - TEAM_BATTING_3B   1    122.16 12668 831.16
## <none>                          12546 831.31
## - TEAM_BATTING_HBP  1    198.21 12744 832.31
## - TEAM_FIELDING_DP  1    628.49 13174 838.65
## - TEAM_FIELDING_E   1   1237.79 13784 847.28
## 
## Step:  AIC=829.33
## TARGET_WINS ~ TEAM_BATTING_H + TEAM_BATTING_2B + TEAM_BATTING_3B + 
##     TEAM_BATTING_HR + TEAM_BATTING_BB + TEAM_BASERUN_SB + TEAM_BASERUN_CS + 
##     TEAM_BATTING_HBP + TEAM_PITCHING_H + TEAM_PITCHING_HR + TEAM_PITCHING_BB + 
##     TEAM_PITCHING_SO + TEAM_FIELDING_E + TEAM_FIELDING_DP
## 
##                    Df Sum of Sq   RSS    AIC
## - TEAM_BASERUN_CS   1      1.59 12549 827.35
## - TEAM_BATTING_HR   1     15.82 12563 827.57
## - TEAM_PITCHING_HR  1     16.39 12564 827.58
## - TEAM_BATTING_2B   1     53.47 12601 828.14
## - TEAM_PITCHING_H   1     88.45 12636 828.67
## - TEAM_BATTING_H    1     90.30 12637 828.70
## - TEAM_BASERUN_SB   1     94.19 12641 828.76
## - TEAM_BATTING_BB   1    107.95 12655 828.97
## - TEAM_PITCHING_BB  1    110.60 12658 829.01
## - TEAM_BATTING_3B   1    122.20 12669 829.18
## <none>                          12547 829.33
## - TEAM_BATTING_HBP  1    197.11 12744 830.31
## + TEAM_BATTING_SO   1      1.24 12546 831.31
## - TEAM_FIELDING_DP  1    630.68 13178 836.70
## - TEAM_FIELDING_E   1   1240.80 13788 845.34
## - TEAM_PITCHING_SO  1   1312.89 13860 846.34
## 
## Step:  AIC=827.35
## TARGET_WINS ~ TEAM_BATTING_H + TEAM_BATTING_2B + TEAM_BATTING_3B + 
##     TEAM_BATTING_HR + TEAM_BATTING_BB + TEAM_BASERUN_SB + TEAM_BATTING_HBP + 
##     TEAM_PITCHING_H + TEAM_PITCHING_HR + TEAM_PITCHING_BB + TEAM_PITCHING_SO + 
##     TEAM_FIELDING_E + TEAM_FIELDING_DP
## 
##                    Df Sum of Sq   RSS    AIC
## - TEAM_BATTING_HR   1     16.06 12565 825.60
## - TEAM_PITCHING_HR  1     16.64 12565 825.61
## - TEAM_BATTING_2B   1     53.05 12602 826.16
## - TEAM_PITCHING_H   1     90.24 12639 826.72
## - TEAM_BATTING_H    1     92.13 12641 826.75
## - TEAM_BATTING_BB   1    110.31 12659 827.03
## - TEAM_PITCHING_BB  1    113.00 12662 827.07
## - TEAM_BASERUN_SB   1    123.42 12672 827.22
## - TEAM_BATTING_3B   1    129.33 12678 827.31
## <none>                          12549 827.35
## - TEAM_BATTING_HBP  1    197.23 12746 828.33
## + TEAM_BASERUN_CS   1      1.59 12547 829.33
## + TEAM_BATTING_SO   1      1.12 12548 829.34
## - TEAM_FIELDING_DP  1    635.62 13184 834.79
## - TEAM_PITCHING_SO  1   1311.88 13861 844.35
## - TEAM_FIELDING_E   1   1322.05 13871 844.49
## 
## Step:  AIC=825.6
## TARGET_WINS ~ TEAM_BATTING_H + TEAM_BATTING_2B + TEAM_BATTING_3B + 
##     TEAM_BATTING_BB + TEAM_BASERUN_SB + TEAM_BATTING_HBP + TEAM_PITCHING_H + 
##     TEAM_PITCHING_HR + TEAM_PITCHING_BB + TEAM_PITCHING_SO + 
##     TEAM_FIELDING_E + TEAM_FIELDING_DP
## 
##                    Df Sum of Sq   RSS    AIC
## - TEAM_BATTING_2B   1     55.48 12620 824.44
## - TEAM_PITCHING_H   1     89.26 12654 824.95
## - TEAM_BATTING_H    1     91.97 12657 824.99
## - TEAM_BATTING_BB   1    104.58 12669 825.18
## - TEAM_PITCHING_BB  1    107.19 12672 825.22
## <none>                          12565 825.60
## - TEAM_BATTING_3B   1    137.48 12702 825.68
## - TEAM_BASERUN_SB   1    146.90 12712 825.82
## - TEAM_BATTING_HBP  1    200.36 12765 826.62
## + TEAM_BATTING_HR   1     16.06 12549 827.35
## + TEAM_BASERUN_CS   1      1.83 12563 827.57
## + TEAM_BATTING_SO   1      1.67 12563 827.57
## - TEAM_FIELDING_DP  1    628.95 13194 832.93
## - TEAM_PITCHING_HR  1    853.54 13418 836.15
## - TEAM_PITCHING_SO  1   1316.68 13882 842.63
## - TEAM_FIELDING_E   1   1333.15 13898 842.86
## 
## Step:  AIC=824.44
## TARGET_WINS ~ TEAM_BATTING_H + TEAM_BATTING_3B + TEAM_BATTING_BB + 
##     TEAM_BASERUN_SB + TEAM_BATTING_HBP + TEAM_PITCHING_H + TEAM_PITCHING_HR + 
##     TEAM_PITCHING_BB + TEAM_PITCHING_SO + TEAM_FIELDING_E + TEAM_FIELDING_DP
## 
##                    Df Sum of Sq   RSS    AIC
## - TEAM_PITCHING_H   1     84.47 12705 823.71
## - TEAM_BATTING_H    1     87.79 12708 823.76
## - TEAM_BATTING_BB   1     98.92 12719 823.93
## - TEAM_PITCHING_BB  1    101.48 12722 823.97
## - TEAM_BASERUN_SB   1    109.27 12730 824.09
## <none>                          12620 824.44
## - TEAM_BATTING_3B   1    147.01 12767 824.65
## - TEAM_BATTING_HBP  1    204.39 12825 825.51
## + TEAM_BATTING_2B   1     55.48 12565 825.60
## + TEAM_BATTING_HR   1     18.48 12602 826.16
## + TEAM_BASERUN_CS   1      1.38 12619 826.42
## + TEAM_BATTING_SO   1      0.55 12620 826.43
## - TEAM_FIELDING_DP  1    649.12 13269 832.02
## - TEAM_PITCHING_HR  1    812.92 13433 834.36
## - TEAM_PITCHING_SO  1   1262.90 13883 840.66
## - TEAM_FIELDING_E   1   1379.34 14000 842.25
## 
## Step:  AIC=823.71
## TARGET_WINS ~ TEAM_BATTING_H + TEAM_BATTING_3B + TEAM_BATTING_BB + 
##     TEAM_BASERUN_SB + TEAM_BATTING_HBP + TEAM_PITCHING_HR + TEAM_PITCHING_BB + 
##     TEAM_PITCHING_SO + TEAM_FIELDING_E + TEAM_FIELDING_DP
## 
##                    Df Sum of Sq   RSS    AIC
## - TEAM_BATTING_BB   1     32.85 12738 822.21
## - TEAM_PITCHING_BB  1     43.42 12748 822.37
## - TEAM_BASERUN_SB   1    105.16 12810 823.29
## <none>                          12705 823.71
## - TEAM_BATTING_3B   1    153.13 12858 824.00
## + TEAM_PITCHING_H   1     84.47 12620 824.44
## - TEAM_BATTING_HBP  1    183.82 12888 824.46
## + TEAM_BATTING_SO   1     62.04 12643 824.78
## + TEAM_BATTING_2B   1     50.69 12654 824.95
## + TEAM_BATTING_HR   1     12.25 12692 825.53
## + TEAM_BASERUN_CS   1      3.11 12702 825.67
## - TEAM_BATTING_H    1    504.11 13209 829.15
## - TEAM_FIELDING_DP  1    602.80 13308 830.57
## - TEAM_PITCHING_HR  1    850.25 13555 834.09
## - TEAM_PITCHING_SO  1   1259.72 13964 839.77
## - TEAM_FIELDING_E   1   1419.39 14124 841.94
## 
## Step:  AIC=822.21
## TARGET_WINS ~ TEAM_BATTING_H + TEAM_BATTING_3B + TEAM_BASERUN_SB + 
##     TEAM_BATTING_HBP + TEAM_PITCHING_HR + TEAM_PITCHING_BB + 
##     TEAM_PITCHING_SO + TEAM_FIELDING_E + TEAM_FIELDING_DP
## 
##                    Df Sum of Sq   RSS    AIC
## - TEAM_BASERUN_SB   1    109.99 12848 821.85
## <none>                          12738 822.21
## - TEAM_BATTING_3B   1    156.45 12894 822.54
## - TEAM_BATTING_HBP  1    186.58 12924 822.98
## + TEAM_BATTING_2B   1     48.63 12689 823.48
## + TEAM_BATTING_BB   1     32.85 12705 823.71
## + TEAM_BATTING_HR   1     22.99 12715 823.86
## + TEAM_PITCHING_H   1     18.40 12719 823.93
## + TEAM_BATTING_SO   1     17.51 12720 823.94
## + TEAM_BASERUN_CS   1      3.86 12734 824.15
## - TEAM_BATTING_H    1    485.67 13223 827.35
## - TEAM_FIELDING_DP  1    623.19 13361 829.33
## - TEAM_PITCHING_HR  1    843.83 13581 832.46
## - TEAM_PITCHING_SO  1   1267.25 14005 838.32
## - TEAM_FIELDING_E   1   1395.02 14133 840.06
## - TEAM_PITCHING_BB  1   2364.81 15102 852.73
## 
## Step:  AIC=821.85
## TARGET_WINS ~ TEAM_BATTING_H + TEAM_BATTING_3B + TEAM_BATTING_HBP + 
##     TEAM_PITCHING_HR + TEAM_PITCHING_BB + TEAM_PITCHING_SO + 
##     TEAM_FIELDING_E + TEAM_FIELDING_DP
## 
##                    Df Sum of Sq   RSS    AIC
## - TEAM_BATTING_3B   1    133.47 12981 821.82
## <none>                          12848 821.85
## + TEAM_BASERUN_SB   1    109.99 12738 822.21
## - TEAM_BATTING_HBP  1    177.11 13025 822.46
## + TEAM_BATTING_BB   1     37.69 12810 823.29
## + TEAM_BATTING_HR   1     30.72 12817 823.39
## + TEAM_BASERUN_CS   1     23.16 12824 823.51
## + TEAM_PITCHING_H   1     22.34 12825 823.52
## + TEAM_BATTING_SO   1     21.53 12826 823.53
## + TEAM_BATTING_2B   1     14.11 12834 823.64
## - TEAM_BATTING_H    1    566.11 13414 828.09
## - TEAM_FIELDING_DP  1    737.46 13585 830.51
## - TEAM_PITCHING_HR  1    756.49 13604 830.78
## - TEAM_PITCHING_SO  1   1257.91 14106 837.69
## - TEAM_FIELDING_E   1   1330.40 14178 838.67
## - TEAM_PITCHING_BB  1   2371.12 15219 852.20
## 
## Step:  AIC=821.82
## TARGET_WINS ~ TEAM_BATTING_H + TEAM_BATTING_HBP + TEAM_PITCHING_HR + 
##     TEAM_PITCHING_BB + TEAM_PITCHING_SO + TEAM_FIELDING_E + TEAM_FIELDING_DP
## 
##                    Df Sum of Sq   RSS    AIC
## <none>                          12981 821.82
## + TEAM_BATTING_3B   1    133.47 12848 821.85
## + TEAM_BASERUN_SB   1     87.02 12894 822.54
## - TEAM_BATTING_HBP  1    228.70 13210 823.16
## + TEAM_BATTING_BB   1     40.42 12941 823.23
## + TEAM_BATTING_HR   1     33.83 12947 823.33
## + TEAM_PITCHING_H   1     23.95 12957 823.47
## + TEAM_BATTING_SO   1     23.13 12958 823.48
## + TEAM_BATTING_2B   1     21.28 12960 823.51
## + TEAM_BASERUN_CS   1      7.07 12974 823.72
## - TEAM_BATTING_H    1    449.87 13431 826.33
## - TEAM_FIELDING_DP  1    813.17 13794 831.43
## - TEAM_PITCHING_HR  1    990.20 13971 833.86
## - TEAM_PITCHING_SO  1   1316.56 14298 838.27
## - TEAM_FIELDING_E   1   1334.60 14316 838.52
## - TEAM_PITCHING_BB  1   2583.00 15564 854.49

3 Presenting Models

stargazer(model3_backward, model3_forward, model3_both, 
          type = "text")
## 
## ============================================================
##                                     Dependent variable:     
##                                -----------------------------
##                                         TARGET_WINS         
##                                   (1)       (2)       (3)   
## ------------------------------------------------------------
## TEAM_BATTING_H                  0.025**             0.025** 
##                                 (0.010)             (0.010) 
##                                                             
## TEAM_PITCHING_H                           0.026**           
##                                           (0.010)           
##                                                             
## TEAM_BATTING_HBP                0.087*    0.087*    0.087*  
##                                 (0.049)   (0.048)   (0.049) 
##                                                             
## TEAM_PITCHING_HR               0.089***  0.090***  0.089*** 
##                                 (0.024)   (0.024)   (0.024) 
##                                                             
## TEAM_PITCHING_BB               0.057***  0.057***  0.057*** 
##                                 (0.009)   (0.009)   (0.009) 
##                                                             
## TEAM_PITCHING_SO               -0.031***           -0.031***
##                                 (0.007)             (0.007) 
##                                                             
## TEAM_FIELDING_E                -0.172*** -0.173*** -0.172***
##                                 (0.040)   (0.040)   (0.040) 
##                                                             
## TEAM_FIELDING_DP               -0.119*** -0.118*** -0.119***
##                                 (0.035)   (0.035)   (0.035) 
##                                                             
## TEAM_BATTING_SO                          -0.031***          
##                                           (0.007)           
##                                                             
## Constant                       60.955*** 60.402*** 60.955***
##                                (19.103)  (19.132)  (19.103) 
##                                                             
## ------------------------------------------------------------
## Observations                      191       191       191   
## R2                               0.535     0.535     0.535  
## Adjusted R2                      0.517     0.517     0.517  
## Residual Std. Error (df = 183)   8.422     8.417     8.422  
## F Statistic (df = 7; 183)      30.019*** 30.087*** 30.019***
## ============================================================
## Note:                            *p<0.1; **p<0.05; ***p<0.01