# Set working directory and path to data
cd <- "/Users/arvindsharma/Dropbox/WCAS/Econometrics/"
setwd(cd)
# Clear the workspace
rm(list = ls()) # Clear environment
gc() # Clear unused memory
## used (Mb) gc trigger (Mb) limit (Mb) max used (Mb)
## Ncells 625681 33.5 1429844 76.4 NA 715631 38.3
## Vcells 1181919 9.1 8388608 64.0 49152 2010534 15.4
cat("\f") # Clear the console
dev.off # Clear the charts
## function (which = dev.cur())
## {
## if (which == 1)
## stop("cannot shut down device 1 (the null device)")
## .External(C_devoff, as.integer(which))
## dev.cur()
## }
## <bytecode: 0x131e50d90>
## <environment: namespace:grDevices>
# Prepare needed libraries
packages <- c("stargazer","psych","tidyverse", "MASS")
for (i in 1:length(packages)) {
if (!packages[i] %in% rownames(installed.packages())) {
install.packages(packages[i]
, repos = "http://cran.rstudio.com/"
, dependencies = TRUE
)
}
library(packages[i], character.only = TRUE)
}
##
## Please cite as:
## Hlavac, Marek (2022). stargazer: Well-Formatted Regression and Summary Statistics Tables.
## R package version 5.2.3. https://CRAN.R-project.org/package=stargazer
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.2 ✔ tibble 3.3.0
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.1.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ ggplot2::%+%() masks psych::%+%()
## ✖ ggplot2::alpha() masks psych::alpha()
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
##
## Attaching package: 'MASS'
##
##
## The following object is masked from 'package:dplyr':
##
## select
rm(packages)
df_train <- read.csv("moneyball-training-data.csv")
# df_eval <- read.csv("moneyball-evaluation-data.csv")
df_train$INDEX <- NULL
glimpse(df_train)
## Rows: 2,276
## Columns: 16
## $ TARGET_WINS <int> 39, 70, 86, 70, 82, 75, 80, 85, 86, 76, 78, 68, 72, 7…
## $ TEAM_BATTING_H <int> 1445, 1339, 1377, 1387, 1297, 1279, 1244, 1273, 1391,…
## $ TEAM_BATTING_2B <int> 194, 219, 232, 209, 186, 200, 179, 171, 197, 213, 179…
## $ TEAM_BATTING_3B <int> 39, 22, 35, 38, 27, 36, 54, 37, 40, 18, 27, 31, 41, 2…
## $ TEAM_BATTING_HR <int> 13, 190, 137, 96, 102, 92, 122, 115, 114, 96, 82, 95,…
## $ TEAM_BATTING_BB <int> 143, 685, 602, 451, 472, 443, 525, 456, 447, 441, 374…
## $ TEAM_BATTING_SO <int> 842, 1075, 917, 922, 920, 973, 1062, 1027, 922, 827, …
## $ TEAM_BASERUN_SB <int> NA, 37, 46, 43, 49, 107, 80, 40, 69, 72, 60, 119, 221…
## $ TEAM_BASERUN_CS <int> NA, 28, 27, 30, 39, 59, 54, 36, 27, 34, 39, 79, 109, …
## $ TEAM_BATTING_HBP <int> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ TEAM_PITCHING_H <int> 9364, 1347, 1377, 1396, 1297, 1279, 1244, 1281, 1391,…
## $ TEAM_PITCHING_HR <int> 84, 191, 137, 97, 102, 92, 122, 116, 114, 96, 86, 95,…
## $ TEAM_PITCHING_BB <int> 927, 689, 602, 454, 472, 443, 525, 459, 447, 441, 391…
## $ TEAM_PITCHING_SO <int> 5456, 1082, 917, 928, 920, 973, 1062, 1033, 922, 827,…
## $ TEAM_FIELDING_E <int> 1011, 193, 175, 164, 138, 123, 136, 112, 127, 131, 11…
## $ TEAM_FIELDING_DP <int> NA, 155, 153, 156, 168, 149, 186, 136, 169, 159, 141,…
# MODEL 3: Step AIC...Backward selection... drop variable systematically...
library(MASS)
?stepAIC
model3_backward <- stepAIC(object = lm(data = df_train, TARGET_WINS ~ .),
direction = c("backward")
)
## Start: AIC=831.31
## TARGET_WINS ~ TEAM_BATTING_H + TEAM_BATTING_2B + TEAM_BATTING_3B +
## TEAM_BATTING_HR + TEAM_BATTING_BB + TEAM_BATTING_SO + TEAM_BASERUN_SB +
## TEAM_BASERUN_CS + TEAM_BATTING_HBP + TEAM_PITCHING_H + TEAM_PITCHING_HR +
## TEAM_PITCHING_BB + TEAM_PITCHING_SO + TEAM_FIELDING_E + TEAM_FIELDING_DP
##
## Df Sum of Sq RSS AIC
## - TEAM_BATTING_SO 1 1.24 12547 829.33
## - TEAM_PITCHING_SO 1 1.48 12547 829.33
## - TEAM_BASERUN_CS 1 1.71 12548 829.34
## - TEAM_BATTING_HR 1 15.23 12561 829.54
## - TEAM_PITCHING_HR 1 15.79 12562 829.55
## - TEAM_PITCHING_H 1 33.63 12580 829.82
## - TEAM_BATTING_H 1 34.42 12580 829.83
## - TEAM_BATTING_2B 1 54.41 12600 830.14
## - TEAM_BASERUN_SB 1 95.22 12641 830.76
## - TEAM_BATTING_BB 1 107.84 12654 830.95
## - TEAM_PITCHING_BB 1 110.48 12656 830.99
## - TEAM_BATTING_3B 1 122.16 12668 831.16
## <none> 12546 831.31
## - TEAM_BATTING_HBP 1 198.21 12744 832.31
## - TEAM_FIELDING_DP 1 628.49 13174 838.65
## - TEAM_FIELDING_E 1 1237.79 13784 847.28
##
## Step: AIC=829.33
## TARGET_WINS ~ TEAM_BATTING_H + TEAM_BATTING_2B + TEAM_BATTING_3B +
## TEAM_BATTING_HR + TEAM_BATTING_BB + TEAM_BASERUN_SB + TEAM_BASERUN_CS +
## TEAM_BATTING_HBP + TEAM_PITCHING_H + TEAM_PITCHING_HR + TEAM_PITCHING_BB +
## TEAM_PITCHING_SO + TEAM_FIELDING_E + TEAM_FIELDING_DP
##
## Df Sum of Sq RSS AIC
## - TEAM_BASERUN_CS 1 1.59 12549 827.35
## - TEAM_BATTING_HR 1 15.82 12563 827.57
## - TEAM_PITCHING_HR 1 16.39 12564 827.58
## - TEAM_BATTING_2B 1 53.47 12601 828.14
## - TEAM_PITCHING_H 1 88.45 12636 828.67
## - TEAM_BATTING_H 1 90.30 12637 828.70
## - TEAM_BASERUN_SB 1 94.19 12641 828.76
## - TEAM_BATTING_BB 1 107.95 12655 828.97
## - TEAM_PITCHING_BB 1 110.60 12658 829.01
## - TEAM_BATTING_3B 1 122.20 12669 829.18
## <none> 12547 829.33
## - TEAM_BATTING_HBP 1 197.11 12744 830.31
## - TEAM_FIELDING_DP 1 630.68 13178 836.70
## - TEAM_FIELDING_E 1 1240.80 13788 845.34
## - TEAM_PITCHING_SO 1 1312.89 13860 846.34
##
## Step: AIC=827.35
## TARGET_WINS ~ TEAM_BATTING_H + TEAM_BATTING_2B + TEAM_BATTING_3B +
## TEAM_BATTING_HR + TEAM_BATTING_BB + TEAM_BASERUN_SB + TEAM_BATTING_HBP +
## TEAM_PITCHING_H + TEAM_PITCHING_HR + TEAM_PITCHING_BB + TEAM_PITCHING_SO +
## TEAM_FIELDING_E + TEAM_FIELDING_DP
##
## Df Sum of Sq RSS AIC
## - TEAM_BATTING_HR 1 16.06 12565 825.60
## - TEAM_PITCHING_HR 1 16.64 12565 825.61
## - TEAM_BATTING_2B 1 53.05 12602 826.16
## - TEAM_PITCHING_H 1 90.24 12639 826.72
## - TEAM_BATTING_H 1 92.13 12641 826.75
## - TEAM_BATTING_BB 1 110.31 12659 827.03
## - TEAM_PITCHING_BB 1 113.00 12662 827.07
## - TEAM_BASERUN_SB 1 123.42 12672 827.22
## - TEAM_BATTING_3B 1 129.33 12678 827.31
## <none> 12549 827.35
## - TEAM_BATTING_HBP 1 197.23 12746 828.33
## - TEAM_FIELDING_DP 1 635.62 13184 834.79
## - TEAM_PITCHING_SO 1 1311.88 13861 844.35
## - TEAM_FIELDING_E 1 1322.05 13871 844.49
##
## Step: AIC=825.6
## TARGET_WINS ~ TEAM_BATTING_H + TEAM_BATTING_2B + TEAM_BATTING_3B +
## TEAM_BATTING_BB + TEAM_BASERUN_SB + TEAM_BATTING_HBP + TEAM_PITCHING_H +
## TEAM_PITCHING_HR + TEAM_PITCHING_BB + TEAM_PITCHING_SO +
## TEAM_FIELDING_E + TEAM_FIELDING_DP
##
## Df Sum of Sq RSS AIC
## - TEAM_BATTING_2B 1 55.48 12620 824.44
## - TEAM_PITCHING_H 1 89.26 12654 824.95
## - TEAM_BATTING_H 1 91.97 12657 824.99
## - TEAM_BATTING_BB 1 104.58 12669 825.18
## - TEAM_PITCHING_BB 1 107.19 12672 825.22
## <none> 12565 825.60
## - TEAM_BATTING_3B 1 137.48 12702 825.68
## - TEAM_BASERUN_SB 1 146.90 12712 825.82
## - TEAM_BATTING_HBP 1 200.36 12765 826.62
## - TEAM_FIELDING_DP 1 628.95 13194 832.93
## - TEAM_PITCHING_HR 1 853.54 13418 836.15
## - TEAM_PITCHING_SO 1 1316.68 13882 842.63
## - TEAM_FIELDING_E 1 1333.15 13898 842.86
##
## Step: AIC=824.44
## TARGET_WINS ~ TEAM_BATTING_H + TEAM_BATTING_3B + TEAM_BATTING_BB +
## TEAM_BASERUN_SB + TEAM_BATTING_HBP + TEAM_PITCHING_H + TEAM_PITCHING_HR +
## TEAM_PITCHING_BB + TEAM_PITCHING_SO + TEAM_FIELDING_E + TEAM_FIELDING_DP
##
## Df Sum of Sq RSS AIC
## - TEAM_PITCHING_H 1 84.47 12705 823.71
## - TEAM_BATTING_H 1 87.79 12708 823.76
## - TEAM_BATTING_BB 1 98.92 12719 823.93
## - TEAM_PITCHING_BB 1 101.48 12722 823.97
## - TEAM_BASERUN_SB 1 109.27 12730 824.09
## <none> 12620 824.44
## - TEAM_BATTING_3B 1 147.01 12767 824.65
## - TEAM_BATTING_HBP 1 204.39 12825 825.51
## - TEAM_FIELDING_DP 1 649.12 13269 832.02
## - TEAM_PITCHING_HR 1 812.92 13433 834.36
## - TEAM_PITCHING_SO 1 1262.90 13883 840.66
## - TEAM_FIELDING_E 1 1379.34 14000 842.25
##
## Step: AIC=823.71
## TARGET_WINS ~ TEAM_BATTING_H + TEAM_BATTING_3B + TEAM_BATTING_BB +
## TEAM_BASERUN_SB + TEAM_BATTING_HBP + TEAM_PITCHING_HR + TEAM_PITCHING_BB +
## TEAM_PITCHING_SO + TEAM_FIELDING_E + TEAM_FIELDING_DP
##
## Df Sum of Sq RSS AIC
## - TEAM_BATTING_BB 1 32.85 12738 822.21
## - TEAM_PITCHING_BB 1 43.42 12748 822.37
## - TEAM_BASERUN_SB 1 105.16 12810 823.29
## <none> 12705 823.71
## - TEAM_BATTING_3B 1 153.13 12858 824.00
## - TEAM_BATTING_HBP 1 183.82 12888 824.46
## - TEAM_BATTING_H 1 504.11 13209 829.15
## - TEAM_FIELDING_DP 1 602.80 13308 830.57
## - TEAM_PITCHING_HR 1 850.25 13555 834.09
## - TEAM_PITCHING_SO 1 1259.72 13964 839.77
## - TEAM_FIELDING_E 1 1419.39 14124 841.94
##
## Step: AIC=822.21
## TARGET_WINS ~ TEAM_BATTING_H + TEAM_BATTING_3B + TEAM_BASERUN_SB +
## TEAM_BATTING_HBP + TEAM_PITCHING_HR + TEAM_PITCHING_BB +
## TEAM_PITCHING_SO + TEAM_FIELDING_E + TEAM_FIELDING_DP
##
## Df Sum of Sq RSS AIC
## - TEAM_BASERUN_SB 1 109.99 12848 821.85
## <none> 12738 822.21
## - TEAM_BATTING_3B 1 156.45 12894 822.54
## - TEAM_BATTING_HBP 1 186.58 12924 822.98
## - TEAM_BATTING_H 1 485.67 13223 827.35
## - TEAM_FIELDING_DP 1 623.19 13361 829.33
## - TEAM_PITCHING_HR 1 843.83 13581 832.46
## - TEAM_PITCHING_SO 1 1267.25 14005 838.32
## - TEAM_FIELDING_E 1 1395.02 14133 840.06
## - TEAM_PITCHING_BB 1 2364.81 15102 852.73
##
## Step: AIC=821.85
## TARGET_WINS ~ TEAM_BATTING_H + TEAM_BATTING_3B + TEAM_BATTING_HBP +
## TEAM_PITCHING_HR + TEAM_PITCHING_BB + TEAM_PITCHING_SO +
## TEAM_FIELDING_E + TEAM_FIELDING_DP
##
## Df Sum of Sq RSS AIC
## - TEAM_BATTING_3B 1 133.47 12981 821.82
## <none> 12848 821.85
## - TEAM_BATTING_HBP 1 177.11 13025 822.46
## - TEAM_BATTING_H 1 566.11 13414 828.09
## - TEAM_FIELDING_DP 1 737.46 13585 830.51
## - TEAM_PITCHING_HR 1 756.49 13604 830.78
## - TEAM_PITCHING_SO 1 1257.91 14106 837.69
## - TEAM_FIELDING_E 1 1330.40 14178 838.67
## - TEAM_PITCHING_BB 1 2371.12 15219 852.20
##
## Step: AIC=821.82
## TARGET_WINS ~ TEAM_BATTING_H + TEAM_BATTING_HBP + TEAM_PITCHING_HR +
## TEAM_PITCHING_BB + TEAM_PITCHING_SO + TEAM_FIELDING_E + TEAM_FIELDING_DP
##
## Df Sum of Sq RSS AIC
## <none> 12981 821.82
## - TEAM_BATTING_HBP 1 228.70 13210 823.16
## - TEAM_BATTING_H 1 449.87 13431 826.33
## - TEAM_FIELDING_DP 1 813.17 13794 831.43
## - TEAM_PITCHING_HR 1 990.20 13971 833.86
## - TEAM_PITCHING_SO 1 1316.56 14298 838.27
## - TEAM_FIELDING_E 1 1334.60 14316 838.52
## - TEAM_PITCHING_BB 1 2583.00 15564 854.49
# Drop all rows with NA values across all predictors and target
df_train_clean <- df_train[complete.cases(df_train), ]
# Define models
null <- lm(TARGET_WINS ~ 1, data = df_train_clean)
full <- lm(TARGET_WINS ~ ., data = df_train_clean)
# Forward selection
model3_forward <- stepAIC(
object = null,
scope = formula(full),
direction = "forward",
trace = TRUE
)
## Start: AIC=953.88
## TARGET_WINS ~ 1
##
## Df Sum of Sq RSS AIC
## + TEAM_PITCHING_H 1 6192.6 21694 907.91
## + TEAM_BATTING_H 1 6158.8 21728 908.21
## + TEAM_BATTING_BB 1 6125.9 21761 908.50
## + TEAM_PITCHING_BB 1 6118.3 21769 908.57
## + TEAM_PITCHING_HR 1 4977.2 22910 918.33
## + TEAM_BATTING_HR 1 4976.0 22911 918.34
## + TEAM_FIELDING_E 1 4169.9 23717 924.94
## + TEAM_BATTING_2B 1 2731.8 25155 936.18
## + TEAM_PITCHING_SO 1 1467.1 26420 945.55
## + TEAM_BATTING_SO 1 1461.1 26426 945.60
## + TEAM_FIELDING_DP 1 1069.8 26817 948.40
## + TEAM_BASERUN_CS 1 891.1 26996 949.67
## + TEAM_BATTING_3B 1 431.2 27456 952.90
## <none> 27887 953.88
## + TEAM_BATTING_HBP 1 150.7 27736 954.84
## + TEAM_BASERUN_SB 1 6.1 27881 955.83
##
## Step: AIC=907.91
## TARGET_WINS ~ TEAM_PITCHING_H
##
## Df Sum of Sq RSS AIC
## + TEAM_PITCHING_BB 1 4092.0 17602 869.99
## + TEAM_BATTING_BB 1 4085.8 17608 870.06
## + TEAM_FIELDING_E 1 2145.9 19548 890.02
## + TEAM_PITCHING_HR 1 1847.5 19847 892.91
## + TEAM_BATTING_HR 1 1841.9 19852 892.97
## + TEAM_BATTING_3B 1 1471.8 20223 896.50
## + TEAM_FIELDING_DP 1 1144.2 20550 899.56
## + TEAM_BASERUN_CS 1 512.2 21182 905.35
## <none> 21694 907.91
## + TEAM_BATTING_HBP 1 209.1 21485 908.06
## + TEAM_BATTING_SO 1 145.9 21548 908.62
## + TEAM_PITCHING_SO 1 144.4 21550 908.64
## + TEAM_BATTING_2B 1 97.1 21597 909.06
## + TEAM_BATTING_H 1 14.2 21680 909.79
## + TEAM_BASERUN_SB 1 11.2 21683 909.81
##
## Step: AIC=869.99
## TARGET_WINS ~ TEAM_PITCHING_H + TEAM_PITCHING_BB
##
## Df Sum of Sq RSS AIC
## + TEAM_FIELDING_E 1 1979.12 15623 849.21
## + TEAM_BATTING_SO 1 1129.82 16472 859.32
## + TEAM_PITCHING_SO 1 1128.21 16474 859.34
## + TEAM_FIELDING_DP 1 812.28 16790 862.97
## + TEAM_BATTING_3B 1 507.81 17095 866.40
## + TEAM_PITCHING_HR 1 316.07 17286 868.53
## + TEAM_BATTING_HR 1 314.48 17288 868.55
## <none> 17602 869.99
## + TEAM_BATTING_HBP 1 120.92 17481 870.67
## + TEAM_BASERUN_CS 1 107.54 17495 870.82
## + TEAM_BASERUN_SB 1 11.15 17591 871.87
## + TEAM_BATTING_2B 1 9.85 17592 871.88
## + TEAM_BATTING_BB 1 6.71 17596 871.92
## + TEAM_BATTING_H 1 1.44 17601 871.98
##
## Step: AIC=849.21
## TARGET_WINS ~ TEAM_PITCHING_H + TEAM_PITCHING_BB + TEAM_FIELDING_E
##
## Df Sum of Sq RSS AIC
## + TEAM_FIELDING_DP 1 708.40 14915 842.35
## + TEAM_PITCHING_HR 1 628.58 14995 843.37
## + TEAM_BATTING_HR 1 623.28 15000 843.43
## + TEAM_BATTING_3B 1 550.55 15073 844.36
## + TEAM_BATTING_SO 1 508.85 15114 844.89
## + TEAM_PITCHING_SO 1 504.24 15119 844.94
## <none> 15623 849.21
## + TEAM_BATTING_HBP 1 160.80 15462 849.23
## + TEAM_BATTING_BB 1 38.38 15585 850.74
## + TEAM_BASERUN_SB 1 36.78 15586 850.76
## + TEAM_BATTING_H 1 25.38 15598 850.90
## + TEAM_BASERUN_CS 1 3.93 15619 851.16
## + TEAM_BATTING_2B 1 0.08 15623 851.21
##
## Step: AIC=842.35
## TARGET_WINS ~ TEAM_PITCHING_H + TEAM_PITCHING_BB + TEAM_FIELDING_E +
## TEAM_FIELDING_DP
##
## Df Sum of Sq RSS AIC
## + TEAM_BATTING_SO 1 674.53 14240 835.51
## + TEAM_PITCHING_SO 1 672.63 14242 835.53
## + TEAM_PITCHING_HR 1 564.45 14350 836.98
## + TEAM_BATTING_HR 1 561.02 14354 837.02
## + TEAM_BATTING_3B 1 420.31 14494 838.89
## <none> 14915 842.35
## + TEAM_BATTING_HBP 1 118.56 14796 842.82
## + TEAM_BATTING_BB 1 18.14 14897 844.11
## + TEAM_BASERUN_CS 1 7.92 14907 844.25
## + TEAM_BATTING_H 1 6.90 14908 844.26
## + TEAM_BASERUN_SB 1 4.99 14910 844.28
## + TEAM_BATTING_2B 1 0.23 14915 844.34
##
## Step: AIC=835.51
## TARGET_WINS ~ TEAM_PITCHING_H + TEAM_PITCHING_BB + TEAM_FIELDING_E +
## TEAM_FIELDING_DP + TEAM_BATTING_SO
##
## Df Sum of Sq RSS AIC
## + TEAM_PITCHING_HR 1 1047.77 13193 822.91
## + TEAM_BATTING_HR 1 1043.39 13197 822.97
## + TEAM_BATTING_3B 1 462.05 13778 831.21
## + TEAM_BATTING_HBP 1 280.52 13960 833.71
## <none> 14240 835.51
## + TEAM_BASERUN_CS 1 28.38 14212 837.13
## + TEAM_BATTING_2B 1 13.77 14227 837.32
## + TEAM_BATTING_BB 1 11.87 14228 837.35
## + TEAM_BATTING_H 1 3.18 14237 837.47
## + TEAM_PITCHING_SO 1 1.32 14239 837.49
## + TEAM_BASERUN_SB 1 0.53 14240 837.50
##
## Step: AIC=822.91
## TARGET_WINS ~ TEAM_PITCHING_H + TEAM_PITCHING_BB + TEAM_FIELDING_E +
## TEAM_FIELDING_DP + TEAM_BATTING_SO + TEAM_PITCHING_HR
##
## Df Sum of Sq RSS AIC
## + TEAM_BATTING_HBP 1 227.046 12966 821.59
## + TEAM_BATTING_3B 1 185.328 13007 822.21
## <none> 13193 822.91
## + TEAM_BASERUN_SB 1 71.924 13121 823.87
## + TEAM_BATTING_2B 1 26.924 13166 824.52
## + TEAM_BATTING_BB 1 22.120 13170 824.59
## + TEAM_BATTING_HR 1 19.591 13173 824.63
## + TEAM_BATTING_H 1 11.576 13181 824.74
## + TEAM_PITCHING_SO 1 11.572 13181 824.74
## + TEAM_BASERUN_CS 1 3.807 13189 824.86
##
## Step: AIC=821.59
## TARGET_WINS ~ TEAM_PITCHING_H + TEAM_PITCHING_BB + TEAM_FIELDING_E +
## TEAM_FIELDING_DP + TEAM_BATTING_SO + TEAM_PITCHING_HR + TEAM_BATTING_HBP
##
## Df Sum of Sq RSS AIC
## <none> 12966 821.59
## + TEAM_BATTING_3B 1 133.718 12832 821.61
## + TEAM_BASERUN_SB 1 85.053 12880 822.34
## + TEAM_BATTING_2B 1 21.039 12944 823.28
## + TEAM_BATTING_BB 1 19.019 12946 823.31
## + TEAM_BATTING_HR 1 14.639 12951 823.38
## + TEAM_BATTING_H 1 8.289 12957 823.47
## + TEAM_PITCHING_SO 1 7.817 12958 823.48
## + TEAM_BASERUN_CS 1 6.989 12958 823.49
# View final model
summary(model3_forward)
##
## Call:
## lm(formula = TARGET_WINS ~ TEAM_PITCHING_H + TEAM_PITCHING_BB +
## TEAM_FIELDING_E + TEAM_FIELDING_DP + TEAM_BATTING_SO + TEAM_PITCHING_HR +
## TEAM_BATTING_HBP, data = df_train_clean)
##
## Residuals:
## Min 1Q Median 3Q Max
## -20.1398 -5.7083 -0.0577 5.0988 21.3042
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 60.402074 19.132094 3.157 0.001864 **
## TEAM_PITCHING_H 0.025768 0.010114 2.548 0.011664 *
## TEAM_PITCHING_BB 0.056595 0.009394 6.025 9.10e-09 ***
## TEAM_FIELDING_E -0.172831 0.039620 -4.362 2.15e-05 ***
## TEAM_FIELDING_DP -0.118319 0.035129 -3.368 0.000923 ***
## TEAM_BATTING_SO -0.031364 0.007280 -4.308 2.68e-05 ***
## TEAM_PITCHING_HR 0.089586 0.023914 3.746 0.000240 ***
## TEAM_BATTING_HBP 0.086790 0.048482 1.790 0.075084 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 8.417 on 183 degrees of freedom
## Multiple R-squared: 0.5351, Adjusted R-squared: 0.5173
## F-statistic: 30.09 on 7 and 183 DF, p-value: < 2.2e-16
model3_both <- stepAIC(object = lm(data = df_train, TARGET_WINS ~ .),
direction = c("both")
)
## Start: AIC=831.31
## TARGET_WINS ~ TEAM_BATTING_H + TEAM_BATTING_2B + TEAM_BATTING_3B +
## TEAM_BATTING_HR + TEAM_BATTING_BB + TEAM_BATTING_SO + TEAM_BASERUN_SB +
## TEAM_BASERUN_CS + TEAM_BATTING_HBP + TEAM_PITCHING_H + TEAM_PITCHING_HR +
## TEAM_PITCHING_BB + TEAM_PITCHING_SO + TEAM_FIELDING_E + TEAM_FIELDING_DP
##
## Df Sum of Sq RSS AIC
## - TEAM_BATTING_SO 1 1.24 12547 829.33
## - TEAM_PITCHING_SO 1 1.48 12547 829.33
## - TEAM_BASERUN_CS 1 1.71 12548 829.34
## - TEAM_BATTING_HR 1 15.23 12561 829.54
## - TEAM_PITCHING_HR 1 15.79 12562 829.55
## - TEAM_PITCHING_H 1 33.63 12580 829.82
## - TEAM_BATTING_H 1 34.42 12580 829.83
## - TEAM_BATTING_2B 1 54.41 12600 830.14
## - TEAM_BASERUN_SB 1 95.22 12641 830.76
## - TEAM_BATTING_BB 1 107.84 12654 830.95
## - TEAM_PITCHING_BB 1 110.48 12656 830.99
## - TEAM_BATTING_3B 1 122.16 12668 831.16
## <none> 12546 831.31
## - TEAM_BATTING_HBP 1 198.21 12744 832.31
## - TEAM_FIELDING_DP 1 628.49 13174 838.65
## - TEAM_FIELDING_E 1 1237.79 13784 847.28
##
## Step: AIC=829.33
## TARGET_WINS ~ TEAM_BATTING_H + TEAM_BATTING_2B + TEAM_BATTING_3B +
## TEAM_BATTING_HR + TEAM_BATTING_BB + TEAM_BASERUN_SB + TEAM_BASERUN_CS +
## TEAM_BATTING_HBP + TEAM_PITCHING_H + TEAM_PITCHING_HR + TEAM_PITCHING_BB +
## TEAM_PITCHING_SO + TEAM_FIELDING_E + TEAM_FIELDING_DP
##
## Df Sum of Sq RSS AIC
## - TEAM_BASERUN_CS 1 1.59 12549 827.35
## - TEAM_BATTING_HR 1 15.82 12563 827.57
## - TEAM_PITCHING_HR 1 16.39 12564 827.58
## - TEAM_BATTING_2B 1 53.47 12601 828.14
## - TEAM_PITCHING_H 1 88.45 12636 828.67
## - TEAM_BATTING_H 1 90.30 12637 828.70
## - TEAM_BASERUN_SB 1 94.19 12641 828.76
## - TEAM_BATTING_BB 1 107.95 12655 828.97
## - TEAM_PITCHING_BB 1 110.60 12658 829.01
## - TEAM_BATTING_3B 1 122.20 12669 829.18
## <none> 12547 829.33
## - TEAM_BATTING_HBP 1 197.11 12744 830.31
## + TEAM_BATTING_SO 1 1.24 12546 831.31
## - TEAM_FIELDING_DP 1 630.68 13178 836.70
## - TEAM_FIELDING_E 1 1240.80 13788 845.34
## - TEAM_PITCHING_SO 1 1312.89 13860 846.34
##
## Step: AIC=827.35
## TARGET_WINS ~ TEAM_BATTING_H + TEAM_BATTING_2B + TEAM_BATTING_3B +
## TEAM_BATTING_HR + TEAM_BATTING_BB + TEAM_BASERUN_SB + TEAM_BATTING_HBP +
## TEAM_PITCHING_H + TEAM_PITCHING_HR + TEAM_PITCHING_BB + TEAM_PITCHING_SO +
## TEAM_FIELDING_E + TEAM_FIELDING_DP
##
## Df Sum of Sq RSS AIC
## - TEAM_BATTING_HR 1 16.06 12565 825.60
## - TEAM_PITCHING_HR 1 16.64 12565 825.61
## - TEAM_BATTING_2B 1 53.05 12602 826.16
## - TEAM_PITCHING_H 1 90.24 12639 826.72
## - TEAM_BATTING_H 1 92.13 12641 826.75
## - TEAM_BATTING_BB 1 110.31 12659 827.03
## - TEAM_PITCHING_BB 1 113.00 12662 827.07
## - TEAM_BASERUN_SB 1 123.42 12672 827.22
## - TEAM_BATTING_3B 1 129.33 12678 827.31
## <none> 12549 827.35
## - TEAM_BATTING_HBP 1 197.23 12746 828.33
## + TEAM_BASERUN_CS 1 1.59 12547 829.33
## + TEAM_BATTING_SO 1 1.12 12548 829.34
## - TEAM_FIELDING_DP 1 635.62 13184 834.79
## - TEAM_PITCHING_SO 1 1311.88 13861 844.35
## - TEAM_FIELDING_E 1 1322.05 13871 844.49
##
## Step: AIC=825.6
## TARGET_WINS ~ TEAM_BATTING_H + TEAM_BATTING_2B + TEAM_BATTING_3B +
## TEAM_BATTING_BB + TEAM_BASERUN_SB + TEAM_BATTING_HBP + TEAM_PITCHING_H +
## TEAM_PITCHING_HR + TEAM_PITCHING_BB + TEAM_PITCHING_SO +
## TEAM_FIELDING_E + TEAM_FIELDING_DP
##
## Df Sum of Sq RSS AIC
## - TEAM_BATTING_2B 1 55.48 12620 824.44
## - TEAM_PITCHING_H 1 89.26 12654 824.95
## - TEAM_BATTING_H 1 91.97 12657 824.99
## - TEAM_BATTING_BB 1 104.58 12669 825.18
## - TEAM_PITCHING_BB 1 107.19 12672 825.22
## <none> 12565 825.60
## - TEAM_BATTING_3B 1 137.48 12702 825.68
## - TEAM_BASERUN_SB 1 146.90 12712 825.82
## - TEAM_BATTING_HBP 1 200.36 12765 826.62
## + TEAM_BATTING_HR 1 16.06 12549 827.35
## + TEAM_BASERUN_CS 1 1.83 12563 827.57
## + TEAM_BATTING_SO 1 1.67 12563 827.57
## - TEAM_FIELDING_DP 1 628.95 13194 832.93
## - TEAM_PITCHING_HR 1 853.54 13418 836.15
## - TEAM_PITCHING_SO 1 1316.68 13882 842.63
## - TEAM_FIELDING_E 1 1333.15 13898 842.86
##
## Step: AIC=824.44
## TARGET_WINS ~ TEAM_BATTING_H + TEAM_BATTING_3B + TEAM_BATTING_BB +
## TEAM_BASERUN_SB + TEAM_BATTING_HBP + TEAM_PITCHING_H + TEAM_PITCHING_HR +
## TEAM_PITCHING_BB + TEAM_PITCHING_SO + TEAM_FIELDING_E + TEAM_FIELDING_DP
##
## Df Sum of Sq RSS AIC
## - TEAM_PITCHING_H 1 84.47 12705 823.71
## - TEAM_BATTING_H 1 87.79 12708 823.76
## - TEAM_BATTING_BB 1 98.92 12719 823.93
## - TEAM_PITCHING_BB 1 101.48 12722 823.97
## - TEAM_BASERUN_SB 1 109.27 12730 824.09
## <none> 12620 824.44
## - TEAM_BATTING_3B 1 147.01 12767 824.65
## - TEAM_BATTING_HBP 1 204.39 12825 825.51
## + TEAM_BATTING_2B 1 55.48 12565 825.60
## + TEAM_BATTING_HR 1 18.48 12602 826.16
## + TEAM_BASERUN_CS 1 1.38 12619 826.42
## + TEAM_BATTING_SO 1 0.55 12620 826.43
## - TEAM_FIELDING_DP 1 649.12 13269 832.02
## - TEAM_PITCHING_HR 1 812.92 13433 834.36
## - TEAM_PITCHING_SO 1 1262.90 13883 840.66
## - TEAM_FIELDING_E 1 1379.34 14000 842.25
##
## Step: AIC=823.71
## TARGET_WINS ~ TEAM_BATTING_H + TEAM_BATTING_3B + TEAM_BATTING_BB +
## TEAM_BASERUN_SB + TEAM_BATTING_HBP + TEAM_PITCHING_HR + TEAM_PITCHING_BB +
## TEAM_PITCHING_SO + TEAM_FIELDING_E + TEAM_FIELDING_DP
##
## Df Sum of Sq RSS AIC
## - TEAM_BATTING_BB 1 32.85 12738 822.21
## - TEAM_PITCHING_BB 1 43.42 12748 822.37
## - TEAM_BASERUN_SB 1 105.16 12810 823.29
## <none> 12705 823.71
## - TEAM_BATTING_3B 1 153.13 12858 824.00
## + TEAM_PITCHING_H 1 84.47 12620 824.44
## - TEAM_BATTING_HBP 1 183.82 12888 824.46
## + TEAM_BATTING_SO 1 62.04 12643 824.78
## + TEAM_BATTING_2B 1 50.69 12654 824.95
## + TEAM_BATTING_HR 1 12.25 12692 825.53
## + TEAM_BASERUN_CS 1 3.11 12702 825.67
## - TEAM_BATTING_H 1 504.11 13209 829.15
## - TEAM_FIELDING_DP 1 602.80 13308 830.57
## - TEAM_PITCHING_HR 1 850.25 13555 834.09
## - TEAM_PITCHING_SO 1 1259.72 13964 839.77
## - TEAM_FIELDING_E 1 1419.39 14124 841.94
##
## Step: AIC=822.21
## TARGET_WINS ~ TEAM_BATTING_H + TEAM_BATTING_3B + TEAM_BASERUN_SB +
## TEAM_BATTING_HBP + TEAM_PITCHING_HR + TEAM_PITCHING_BB +
## TEAM_PITCHING_SO + TEAM_FIELDING_E + TEAM_FIELDING_DP
##
## Df Sum of Sq RSS AIC
## - TEAM_BASERUN_SB 1 109.99 12848 821.85
## <none> 12738 822.21
## - TEAM_BATTING_3B 1 156.45 12894 822.54
## - TEAM_BATTING_HBP 1 186.58 12924 822.98
## + TEAM_BATTING_2B 1 48.63 12689 823.48
## + TEAM_BATTING_BB 1 32.85 12705 823.71
## + TEAM_BATTING_HR 1 22.99 12715 823.86
## + TEAM_PITCHING_H 1 18.40 12719 823.93
## + TEAM_BATTING_SO 1 17.51 12720 823.94
## + TEAM_BASERUN_CS 1 3.86 12734 824.15
## - TEAM_BATTING_H 1 485.67 13223 827.35
## - TEAM_FIELDING_DP 1 623.19 13361 829.33
## - TEAM_PITCHING_HR 1 843.83 13581 832.46
## - TEAM_PITCHING_SO 1 1267.25 14005 838.32
## - TEAM_FIELDING_E 1 1395.02 14133 840.06
## - TEAM_PITCHING_BB 1 2364.81 15102 852.73
##
## Step: AIC=821.85
## TARGET_WINS ~ TEAM_BATTING_H + TEAM_BATTING_3B + TEAM_BATTING_HBP +
## TEAM_PITCHING_HR + TEAM_PITCHING_BB + TEAM_PITCHING_SO +
## TEAM_FIELDING_E + TEAM_FIELDING_DP
##
## Df Sum of Sq RSS AIC
## - TEAM_BATTING_3B 1 133.47 12981 821.82
## <none> 12848 821.85
## + TEAM_BASERUN_SB 1 109.99 12738 822.21
## - TEAM_BATTING_HBP 1 177.11 13025 822.46
## + TEAM_BATTING_BB 1 37.69 12810 823.29
## + TEAM_BATTING_HR 1 30.72 12817 823.39
## + TEAM_BASERUN_CS 1 23.16 12824 823.51
## + TEAM_PITCHING_H 1 22.34 12825 823.52
## + TEAM_BATTING_SO 1 21.53 12826 823.53
## + TEAM_BATTING_2B 1 14.11 12834 823.64
## - TEAM_BATTING_H 1 566.11 13414 828.09
## - TEAM_FIELDING_DP 1 737.46 13585 830.51
## - TEAM_PITCHING_HR 1 756.49 13604 830.78
## - TEAM_PITCHING_SO 1 1257.91 14106 837.69
## - TEAM_FIELDING_E 1 1330.40 14178 838.67
## - TEAM_PITCHING_BB 1 2371.12 15219 852.20
##
## Step: AIC=821.82
## TARGET_WINS ~ TEAM_BATTING_H + TEAM_BATTING_HBP + TEAM_PITCHING_HR +
## TEAM_PITCHING_BB + TEAM_PITCHING_SO + TEAM_FIELDING_E + TEAM_FIELDING_DP
##
## Df Sum of Sq RSS AIC
## <none> 12981 821.82
## + TEAM_BATTING_3B 1 133.47 12848 821.85
## + TEAM_BASERUN_SB 1 87.02 12894 822.54
## - TEAM_BATTING_HBP 1 228.70 13210 823.16
## + TEAM_BATTING_BB 1 40.42 12941 823.23
## + TEAM_BATTING_HR 1 33.83 12947 823.33
## + TEAM_PITCHING_H 1 23.95 12957 823.47
## + TEAM_BATTING_SO 1 23.13 12958 823.48
## + TEAM_BATTING_2B 1 21.28 12960 823.51
## + TEAM_BASERUN_CS 1 7.07 12974 823.72
## - TEAM_BATTING_H 1 449.87 13431 826.33
## - TEAM_FIELDING_DP 1 813.17 13794 831.43
## - TEAM_PITCHING_HR 1 990.20 13971 833.86
## - TEAM_PITCHING_SO 1 1316.56 14298 838.27
## - TEAM_FIELDING_E 1 1334.60 14316 838.52
## - TEAM_PITCHING_BB 1 2583.00 15564 854.49
stargazer(model3_backward, model3_forward, model3_both,
type = "text")
##
## ============================================================
## Dependent variable:
## -----------------------------
## TARGET_WINS
## (1) (2) (3)
## ------------------------------------------------------------
## TEAM_BATTING_H 0.025** 0.025**
## (0.010) (0.010)
##
## TEAM_PITCHING_H 0.026**
## (0.010)
##
## TEAM_BATTING_HBP 0.087* 0.087* 0.087*
## (0.049) (0.048) (0.049)
##
## TEAM_PITCHING_HR 0.089*** 0.090*** 0.089***
## (0.024) (0.024) (0.024)
##
## TEAM_PITCHING_BB 0.057*** 0.057*** 0.057***
## (0.009) (0.009) (0.009)
##
## TEAM_PITCHING_SO -0.031*** -0.031***
## (0.007) (0.007)
##
## TEAM_FIELDING_E -0.172*** -0.173*** -0.172***
## (0.040) (0.040) (0.040)
##
## TEAM_FIELDING_DP -0.119*** -0.118*** -0.119***
## (0.035) (0.035) (0.035)
##
## TEAM_BATTING_SO -0.031***
## (0.007)
##
## Constant 60.955*** 60.402*** 60.955***
## (19.103) (19.132) (19.103)
##
## ------------------------------------------------------------
## Observations 191 191 191
## R2 0.535 0.535 0.535
## Adjusted R2 0.517 0.517 0.517
## Residual Std. Error (df = 183) 8.422 8.417 8.422
## F Statistic (df = 7; 183) 30.019*** 30.087*** 30.019***
## ============================================================
## Note: *p<0.1; **p<0.05; ***p<0.01