# load required packages
library(ggplot2)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(corrplot)
## Warning: package 'corrplot' was built under R version 4.0.2
## corrplot 0.84 loaded

Overview of Data

##   INDEX TEAM_BATTING_H TEAM_BATTING_2B TEAM_BATTING_3B TEAM_BATTING_HR
## 1     9           1209             170              33              83
## 2    10           1221             151              29              88
## 3    14           1395             183              29              93
## 4    47           1539             309              29             159
## 5    60           1445             203              68               5
## 6    63           1431             236              53              10
##   TEAM_BATTING_BB TEAM_BATTING_SO TEAM_BASERUN_SB TEAM_BASERUN_CS
## 1             447            1080              62              50
## 2             516             929              54              39
## 3             509             816              59              47
## 4             486             914             148              57
## 5              95             416              NA              NA
## 6             215             377              NA              NA
##   TEAM_BATTING_HBP TEAM_PITCHING_H TEAM_PITCHING_HR TEAM_PITCHING_BB
## 1               NA            1209               83              447
## 2               NA            1221               88              516
## 3               NA            1395               93              509
## 4               42            1539              159              486
## 5               NA            3902               14              257
## 6               NA            2793               20              420
##   TEAM_PITCHING_SO TEAM_FIELDING_E TEAM_FIELDING_DP
## 1             1080             140              156
## 2              929             135              164
## 3              816             156              153
## 4              914             124              154
## 5             1123             616              130
## 6              736             572              105
##      INDEX      TEAM_BATTING_H TEAM_BATTING_2B TEAM_BATTING_3B 
##  Min.   :   9   Min.   : 819   Min.   : 44.0   Min.   : 14.00  
##  1st Qu.: 708   1st Qu.:1387   1st Qu.:210.0   1st Qu.: 35.00  
##  Median :1249   Median :1455   Median :239.0   Median : 52.00  
##  Mean   :1264   Mean   :1469   Mean   :241.3   Mean   : 55.91  
##  3rd Qu.:1832   3rd Qu.:1548   3rd Qu.:278.5   3rd Qu.: 72.00  
##  Max.   :2525   Max.   :2170   Max.   :376.0   Max.   :155.00  
##                                                                
##  TEAM_BATTING_HR  TEAM_BATTING_BB TEAM_BATTING_SO  TEAM_BASERUN_SB
##  Min.   :  0.00   Min.   : 15.0   Min.   :   0.0   Min.   :  0.0  
##  1st Qu.: 44.50   1st Qu.:436.5   1st Qu.: 545.0   1st Qu.: 59.0  
##  Median :101.00   Median :509.0   Median : 686.0   Median : 92.0  
##  Mean   : 95.63   Mean   :499.0   Mean   : 709.3   Mean   :123.7  
##  3rd Qu.:135.50   3rd Qu.:565.5   3rd Qu.: 912.0   3rd Qu.:151.8  
##  Max.   :242.00   Max.   :792.0   Max.   :1268.0   Max.   :580.0  
##                                   NA's   :18       NA's   :13     
##  TEAM_BASERUN_CS  TEAM_BATTING_HBP TEAM_PITCHING_H TEAM_PITCHING_HR
##  Min.   :  0.00   Min.   :42.00    Min.   : 1155   Min.   :  0.0   
##  1st Qu.: 38.00   1st Qu.:53.50    1st Qu.: 1426   1st Qu.: 52.0   
##  Median : 49.50   Median :62.00    Median : 1515   Median :104.0   
##  Mean   : 52.32   Mean   :62.37    Mean   : 1813   Mean   :102.1   
##  3rd Qu.: 63.00   3rd Qu.:67.50    3rd Qu.: 1681   3rd Qu.:142.5   
##  Max.   :154.00   Max.   :96.00    Max.   :22768   Max.   :336.0   
##  NA's   :87       NA's   :240                                      
##  TEAM_PITCHING_BB TEAM_PITCHING_SO TEAM_FIELDING_E  TEAM_FIELDING_DP
##  Min.   : 136.0   Min.   :   0.0   Min.   :  73.0   Min.   : 69.0   
##  1st Qu.: 471.0   1st Qu.: 613.0   1st Qu.: 131.0   1st Qu.:131.0   
##  Median : 526.0   Median : 745.0   Median : 163.0   Median :148.0   
##  Mean   : 552.4   Mean   : 799.7   Mean   : 249.7   Mean   :146.1   
##  3rd Qu.: 606.5   3rd Qu.: 938.0   3rd Qu.: 252.0   3rd Qu.:164.0   
##  Max.   :2008.0   Max.   :9963.0   Max.   :1568.0   Max.   :204.0   
##                   NA's   :18                        NA's   :31
##   INDEX TARGET_WINS TEAM_BATTING_H TEAM_BATTING_2B TEAM_BATTING_3B
## 1     1          39           1445             194              39
## 2     2          70           1339             219              22
## 3     3          86           1377             232              35
## 4     4          70           1387             209              38
## 5     5          82           1297             186              27
## 6     6          75           1279             200              36
##   TEAM_BATTING_HR TEAM_BATTING_BB TEAM_BATTING_SO TEAM_BASERUN_SB
## 1              13             143             842              NA
## 2             190             685            1075              37
## 3             137             602             917              46
## 4              96             451             922              43
## 5             102             472             920              49
## 6              92             443             973             107
##   TEAM_BASERUN_CS TEAM_BATTING_HBP TEAM_PITCHING_H TEAM_PITCHING_HR
## 1              NA               NA            9364               84
## 2              28               NA            1347              191
## 3              27               NA            1377              137
## 4              30               NA            1396               97
## 5              39               NA            1297              102
## 6              59               NA            1279               92
##   TEAM_PITCHING_BB TEAM_PITCHING_SO TEAM_FIELDING_E TEAM_FIELDING_DP
## 1              927             5456            1011               NA
## 2              689             1082             193              155
## 3              602              917             175              153
## 4              454              928             164              156
## 5              472              920             138              168
## 6              443              973             123              149
##      INDEX         TARGET_WINS     TEAM_BATTING_H TEAM_BATTING_2B
##  Min.   :   1.0   Min.   :  0.00   Min.   : 891   Min.   : 69.0  
##  1st Qu.: 630.8   1st Qu.: 71.00   1st Qu.:1383   1st Qu.:208.0  
##  Median :1270.5   Median : 82.00   Median :1454   Median :238.0  
##  Mean   :1268.5   Mean   : 80.79   Mean   :1469   Mean   :241.2  
##  3rd Qu.:1915.5   3rd Qu.: 92.00   3rd Qu.:1537   3rd Qu.:273.0  
##  Max.   :2535.0   Max.   :146.00   Max.   :2554   Max.   :458.0  
##                                                                  
##  TEAM_BATTING_3B  TEAM_BATTING_HR  TEAM_BATTING_BB TEAM_BATTING_SO 
##  Min.   :  0.00   Min.   :  0.00   Min.   :  0.0   Min.   :   0.0  
##  1st Qu.: 34.00   1st Qu.: 42.00   1st Qu.:451.0   1st Qu.: 548.0  
##  Median : 47.00   Median :102.00   Median :512.0   Median : 750.0  
##  Mean   : 55.25   Mean   : 99.61   Mean   :501.6   Mean   : 735.6  
##  3rd Qu.: 72.00   3rd Qu.:147.00   3rd Qu.:580.0   3rd Qu.: 930.0  
##  Max.   :223.00   Max.   :264.00   Max.   :878.0   Max.   :1399.0  
##                                                    NA's   :102     
##  TEAM_BASERUN_SB TEAM_BASERUN_CS TEAM_BATTING_HBP TEAM_PITCHING_H
##  Min.   :  0.0   Min.   :  0.0   Min.   :29.00    Min.   : 1137  
##  1st Qu.: 66.0   1st Qu.: 38.0   1st Qu.:50.50    1st Qu.: 1419  
##  Median :101.0   Median : 49.0   Median :58.00    Median : 1518  
##  Mean   :124.8   Mean   : 52.8   Mean   :59.36    Mean   : 1779  
##  3rd Qu.:156.0   3rd Qu.: 62.0   3rd Qu.:67.00    3rd Qu.: 1682  
##  Max.   :697.0   Max.   :201.0   Max.   :95.00    Max.   :30132  
##  NA's   :131     NA's   :772     NA's   :2085                    
##  TEAM_PITCHING_HR TEAM_PITCHING_BB TEAM_PITCHING_SO  TEAM_FIELDING_E 
##  Min.   :  0.0    Min.   :   0.0   Min.   :    0.0   Min.   :  65.0  
##  1st Qu.: 50.0    1st Qu.: 476.0   1st Qu.:  615.0   1st Qu.: 127.0  
##  Median :107.0    Median : 536.5   Median :  813.5   Median : 159.0  
##  Mean   :105.7    Mean   : 553.0   Mean   :  817.7   Mean   : 246.5  
##  3rd Qu.:150.0    3rd Qu.: 611.0   3rd Qu.:  968.0   3rd Qu.: 249.2  
##  Max.   :343.0    Max.   :3645.0   Max.   :19278.0   Max.   :1898.0  
##                                    NA's   :102                       
##  TEAM_FIELDING_DP
##  Min.   : 52.0   
##  1st Qu.:131.0   
##  Median :149.0   
##  Mean   :146.4   
##  3rd Qu.:164.0   
##  Max.   :228.0   
##  NA's   :286
## 'data.frame':    2276 obs. of  17 variables:
##  $ INDEX           : int  1 2 3 4 5 6 7 8 11 12 ...
##  $ TARGET_WINS     : int  39 70 86 70 82 75 80 85 86 76 ...
##  $ TEAM_BATTING_H  : int  1445 1339 1377 1387 1297 1279 1244 1273 1391 1271 ...
##  $ TEAM_BATTING_2B : int  194 219 232 209 186 200 179 171 197 213 ...
##  $ TEAM_BATTING_3B : int  39 22 35 38 27 36 54 37 40 18 ...
##  $ TEAM_BATTING_HR : int  13 190 137 96 102 92 122 115 114 96 ...
##  $ TEAM_BATTING_BB : int  143 685 602 451 472 443 525 456 447 441 ...
##  $ TEAM_BATTING_SO : int  842 1075 917 922 920 973 1062 1027 922 827 ...
##  $ TEAM_BASERUN_SB : int  NA 37 46 43 49 107 80 40 69 72 ...
##  $ TEAM_BASERUN_CS : int  NA 28 27 30 39 59 54 36 27 34 ...
##  $ TEAM_BATTING_HBP: int  NA NA NA NA NA NA NA NA NA NA ...
##  $ TEAM_PITCHING_H : int  9364 1347 1377 1396 1297 1279 1244 1281 1391 1271 ...
##  $ TEAM_PITCHING_HR: int  84 191 137 97 102 92 122 116 114 96 ...
##  $ TEAM_PITCHING_BB: int  927 689 602 454 472 443 525 459 447 441 ...
##  $ TEAM_PITCHING_SO: int  5456 1082 917 928 920 973 1062 1033 922 827 ...
##  $ TEAM_FIELDING_E : int  1011 193 175 164 138 123 136 112 127 131 ...
##  $ TEAM_FIELDING_DP: int  NA 155 153 156 168 149 186 136 169 159 ...

Hypothesis

Model 1

## 
## Call:
## lm(formula = TARGET_WINS ~ TEAM_BATTING_H + TEAM_BATTING_2B + 
##     TEAM_BATTING_3B + TEAM_BATTING_HR + TEAM_BATTING_BB + TEAM_BATTING_SO + 
##     TEAM_BASERUN_SB + TEAM_BASERUN_CS + TEAM_BATTING_HBP + TEAM_PITCHING_H + 
##     TEAM_PITCHING_HR + TEAM_PITCHING_BB + TEAM_PITCHING_SO + 
##     TEAM_FIELDING_E + TEAM_FIELDING_DP, data = money_b_train)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -58.041  -8.558   0.145   8.907  53.331 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      28.2930631  4.5824723   6.174 7.86e-10 ***
## TEAM_BATTING_H    0.0429317  0.0034842  12.322  < 2e-16 ***
## TEAM_BATTING_2B  -0.0042264  0.0097037  -0.436 0.663209    
## TEAM_BATTING_3B   0.0649817  0.0170796   3.805 0.000146 ***
## TEAM_BATTING_HR   0.0703822  0.0282035   2.496 0.012648 *  
## TEAM_BATTING_BB   0.0022019  0.0059003   0.373 0.709047    
## TEAM_BATTING_SO  -0.0101940  0.0020913  -4.875 1.17e-06 ***
## TEAM_BASERUN_SB   0.0042336  0.0042081   1.006 0.314494    
## TEAM_BASERUN_CS  -0.0077185  0.0109760  -0.703 0.481993    
## TEAM_BATTING_HBP -0.0542327  0.0194784  -2.784 0.005410 ** 
## TEAM_PITCHING_H  -0.0008096  0.0003800  -2.130 0.033238 *  
## TEAM_PITCHING_HR -0.0025561  0.0249180  -0.103 0.918304    
## TEAM_PITCHING_BB  0.0028645  0.0042178   0.679 0.497119    
## TEAM_PITCHING_SO  0.0019800  0.0009422   2.101 0.035710 *  
## TEAM_FIELDING_E  -0.0286814  0.0030151  -9.512  < 2e-16 ***
## TEAM_FIELDING_DP -0.0657273  0.0101980  -6.445 1.41e-10 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 13.23 on 2260 degrees of freedom
## Multiple R-squared:  0.299,  Adjusted R-squared:  0.2944 
## F-statistic: 64.27 on 15 and 2260 DF,  p-value: < 2.2e-16

Backward elimination process

We will be rejecting predictors with p-value greater than 0.05 with the backward elimination process. We will stop after all the predictors are less than 0.05

## 
## Call:
## lm(formula = TARGET_WINS ~ TEAM_BATTING_H + TEAM_BATTING_2B + 
##     TEAM_BATTING_3B + TEAM_BATTING_HR + TEAM_BATTING_BB + TEAM_BATTING_SO + 
##     TEAM_BASERUN_SB + TEAM_BASERUN_CS + TEAM_BATTING_HBP + TEAM_PITCHING_H + 
##     TEAM_PITCHING_BB + TEAM_PITCHING_SO + TEAM_FIELDING_E + TEAM_FIELDING_DP, 
##     data = money_b_train)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -58.042  -8.547   0.145   8.864  53.323 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      28.3391588  4.5593894   6.216 6.07e-10 ***
## TEAM_BATTING_H    0.0429009  0.0034704  12.362  < 2e-16 ***
## TEAM_BATTING_2B  -0.0042423  0.0097003  -0.437 0.661908    
## TEAM_BATTING_3B   0.0647784  0.0169605   3.819 0.000137 ***
## TEAM_BATTING_HR   0.0676580  0.0094944   7.126 1.38e-12 ***
## TEAM_BATTING_BB   0.0024621  0.0053262   0.462 0.643941    
## TEAM_BATTING_SO  -0.0102303  0.0020605  -4.965 7.39e-07 ***
## TEAM_BASERUN_SB   0.0042559  0.0042016   1.013 0.311203    
## TEAM_BASERUN_CS  -0.0077347  0.0109725  -0.705 0.480933    
## TEAM_BATTING_HBP -0.0539519  0.0192808  -2.798 0.005182 ** 
## TEAM_PITCHING_H  -0.0008071  0.0003792  -2.129 0.033385 *  
## TEAM_PITCHING_BB  0.0026460  0.0036397   0.727 0.467318    
## TEAM_PITCHING_SO  0.0020109  0.0008924   2.253 0.024335 *  
## TEAM_FIELDING_E  -0.0286897  0.0030134  -9.521  < 2e-16 ***
## TEAM_FIELDING_DP -0.0657165  0.0101952  -6.446 1.40e-10 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 13.23 on 2261 degrees of freedom
## Multiple R-squared:  0.299,  Adjusted R-squared:  0.2947 
## F-statistic: 68.89 on 14 and 2261 DF,  p-value: < 2.2e-16
## 
## Call:
## lm(formula = TARGET_WINS ~ TEAM_BATTING_H + TEAM_BATTING_3B + 
##     TEAM_BATTING_HR + TEAM_BATTING_BB + TEAM_BATTING_SO + TEAM_BASERUN_SB + 
##     TEAM_BASERUN_CS + TEAM_BATTING_HBP + TEAM_PITCHING_H + TEAM_PITCHING_BB + 
##     TEAM_PITCHING_SO + TEAM_FIELDING_E + TEAM_FIELDING_DP, data = money_b_train)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -57.881  -8.564   0.157   8.871  52.974 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      28.9383318  4.3479010   6.656 3.53e-11 ***
## TEAM_BATTING_H    0.0419016  0.0026116  16.044  < 2e-16 ***
## TEAM_BATTING_3B   0.0653117  0.0169136   3.861 0.000116 ***
## TEAM_BATTING_HR   0.0678105  0.0094863   7.148 1.18e-12 ***
## TEAM_BATTING_BB   0.0023148  0.0053146   0.436 0.663204    
## TEAM_BATTING_SO  -0.0103433  0.0020439  -5.060 4.52e-07 ***
## TEAM_BASERUN_SB   0.0043889  0.0041898   1.048 0.294972    
## TEAM_BASERUN_CS  -0.0077472  0.0109705  -0.706 0.480143    
## TEAM_BATTING_HBP -0.0563987  0.0184480  -3.057 0.002260 ** 
## TEAM_PITCHING_H  -0.0008085  0.0003791  -2.133 0.033042 *  
## TEAM_PITCHING_BB  0.0026416  0.0036391   0.726 0.467973    
## TEAM_PITCHING_SO  0.0019677  0.0008868   2.219 0.026592 *  
## TEAM_FIELDING_E  -0.0285535  0.0029967  -9.528  < 2e-16 ***
## TEAM_FIELDING_DP -0.0660580  0.0101634  -6.500 9.88e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 13.23 on 2262 degrees of freedom
## Multiple R-squared:  0.2989, Adjusted R-squared:  0.2949 
## F-statistic:  74.2 on 13 and 2262 DF,  p-value: < 2.2e-16
## 
## Call:
## lm(formula = TARGET_WINS ~ TEAM_BATTING_H + TEAM_BATTING_3B + 
##     TEAM_BATTING_HR + TEAM_BATTING_SO + TEAM_BASERUN_SB + TEAM_BASERUN_CS + 
##     TEAM_BATTING_HBP + TEAM_PITCHING_H + TEAM_PITCHING_BB + TEAM_PITCHING_SO + 
##     TEAM_FIELDING_E + TEAM_FIELDING_DP, data = money_b_train)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -57.964  -8.518   0.118   8.918  52.990 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      29.5329754  4.1272489   7.156 1.12e-12 ***
## TEAM_BATTING_H    0.0419019  0.0026111  16.047  < 2e-16 ***
## TEAM_BATTING_3B   0.0652171  0.0169092   3.857 0.000118 ***
## TEAM_BATTING_HR   0.0686535  0.0092851   7.394 2.00e-13 ***
## TEAM_BATTING_SO  -0.0101948  0.0020150  -5.060 4.54e-07 ***
## TEAM_BASERUN_SB   0.0045692  0.0041685   1.096 0.273145    
## TEAM_BASERUN_CS  -0.0075804  0.0109618  -0.692 0.489303    
## TEAM_BATTING_HBP -0.0567545  0.0184266  -3.080 0.002095 ** 
## TEAM_PITCHING_H  -0.0008720  0.0003499  -2.492 0.012759 *  
## TEAM_PITCHING_BB  0.0038763  0.0022814   1.699 0.089443 .  
## TEAM_PITCHING_SO  0.0017785  0.0007730   2.301 0.021494 *  
## TEAM_FIELDING_E  -0.0288608  0.0029119  -9.911  < 2e-16 ***
## TEAM_FIELDING_DP -0.0659700  0.0101596  -6.493 1.03e-10 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 13.22 on 2263 degrees of freedom
## Multiple R-squared:  0.2989, Adjusted R-squared:  0.2952 
## F-statistic: 80.39 on 12 and 2263 DF,  p-value: < 2.2e-16
## 
## Call:
## lm(formula = TARGET_WINS ~ TEAM_BATTING_H + TEAM_BATTING_3B + 
##     TEAM_BATTING_HR + TEAM_BATTING_SO + TEAM_BASERUN_SB + TEAM_BATTING_HBP + 
##     TEAM_PITCHING_H + TEAM_PITCHING_BB + TEAM_PITCHING_SO + TEAM_FIELDING_E + 
##     TEAM_FIELDING_DP, data = money_b_train)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -57.994  -8.566   0.139   8.920  53.075 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      29.9071741  4.0911511   7.310 3.68e-13 ***
## TEAM_BATTING_H    0.0416877  0.0025924  16.081  < 2e-16 ***
## TEAM_BATTING_3B   0.0660782  0.0168613   3.919 9.16e-05 ***
## TEAM_BATTING_HR   0.0693627  0.0092272   7.517 8.02e-14 ***
## TEAM_BATTING_SO  -0.0105859  0.0019337  -5.474 4.88e-08 ***
## TEAM_BASERUN_SB   0.0041471  0.0041231   1.006  0.31461    
## TEAM_BATTING_HBP -0.0549311  0.0182349  -3.012  0.00262 ** 
## TEAM_PITCHING_H  -0.0009103  0.0003454  -2.635  0.00846 ** 
## TEAM_PITCHING_BB  0.0039810  0.0022761   1.749  0.08042 .  
## TEAM_PITCHING_SO  0.0018089  0.0007717   2.344  0.01915 *  
## TEAM_FIELDING_E  -0.0286461  0.0028950  -9.895  < 2e-16 ***
## TEAM_FIELDING_DP -0.0674800  0.0099210  -6.802 1.32e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 13.22 on 2264 degrees of freedom
## Multiple R-squared:  0.2987, Adjusted R-squared:  0.2953 
## F-statistic: 87.68 on 11 and 2264 DF,  p-value: < 2.2e-16
## 
## Call:
## lm(formula = TARGET_WINS ~ TEAM_BATTING_H + TEAM_BATTING_3B + 
##     TEAM_BATTING_HR + TEAM_BATTING_SO + TEAM_BATTING_HBP + TEAM_PITCHING_H + 
##     TEAM_PITCHING_BB + TEAM_PITCHING_SO + TEAM_FIELDING_E + TEAM_FIELDING_DP, 
##     data = money_b_train)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -58.356  -8.522   0.150   8.918  52.512 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      30.8812953  3.9748749   7.769 1.19e-14 ***
## TEAM_BATTING_H    0.0416515  0.0025922  16.068  < 2e-16 ***
## TEAM_BATTING_3B   0.0684400  0.0166970   4.099 4.30e-05 ***
## TEAM_BATTING_HR   0.0680528  0.0091349   7.450 1.32e-13 ***
## TEAM_BATTING_SO  -0.0103713  0.0019219  -5.396 7.51e-08 ***
## TEAM_BATTING_HBP -0.0544337  0.0182282  -2.986  0.00285 ** 
## TEAM_PITCHING_H  -0.0009219  0.0003452  -2.670  0.00763 ** 
## TEAM_PITCHING_BB  0.0046295  0.0021829   2.121  0.03405 *  
## TEAM_PITCHING_SO  0.0016404  0.0007532   2.178  0.02953 *  
## TEAM_FIELDING_E  -0.0294204  0.0027908 -10.542  < 2e-16 ***
## TEAM_FIELDING_DP -0.0721842  0.0087494  -8.250 2.65e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 13.22 on 2265 degrees of freedom
## Multiple R-squared:  0.2984, Adjusted R-squared:  0.2953 
## F-statistic: 96.35 on 10 and 2265 DF,  p-value: < 2.2e-16

The intercept of our example is 30.8812953 if all the other predictor values are 0. Approximately 90% of variation in Target Wins can be explained by our model. This f statistic tells us if there is a relationship between the dependent and independent variables we are testing. Generally, a large F indicates a stronger relationship and here we have 96.35.

Our residuals look pretty symmetrical around 0, suggesting that our model fits the data well.

We observed that many of the points fit close to the line.

Conclusion

“TEAM_BATTING_H”, “TEAM_BATTING_2B”, “TEAM_BATTING_3B”, “TEAM_BATTING_HR”, “TEAM_BATTING_BB”, “TEAM_BATTING_SO”, “TEAM_BASERUN_SB”, “TEAM_BASERUN_CS”, “TEAM_BATTING_HBP”, “TEAM_PITCHING_H”, “TEAM_PITCHING_HR”,“TEAM_PITCHING_BB”, “TEAM_PITCHING_SO”, “TEAM_FIELDING_E”, “TEAM_FIELDING_DP” are all significant variables to predict target wins.

Prediction

Using the model we formed, we will predict the target wins for our evaluation model.

##  [1] "INDEX"            "TEAM_BATTING_H"   "TEAM_BATTING_2B"  "TEAM_BATTING_3B" 
##  [5] "TEAM_BATTING_HR"  "TEAM_BATTING_BB"  "TEAM_BATTING_SO"  "TEAM_BASERUN_SB" 
##  [9] "TEAM_BASERUN_CS"  "TEAM_BATTING_HBP" "TEAM_PITCHING_H"  "TEAM_PITCHING_HR"
## [13] "TEAM_PITCHING_BB" "TEAM_PITCHING_SO" "TEAM_FIELDING_E"  "TEAM_FIELDING_DP"
##         1         2         3         4         5         6         7         8 
##  65.29066  67.05335  75.61094  83.58844  63.67519  67.05147  79.19122  71.82463 
##         9        10        11        12        13        14        15        16 
##  69.16859  75.83207  72.85894  85.86148  85.12034  85.10176  84.27875  79.95146 
##        17        18        19        20        21        22        23        24 
##  73.94082  77.55507  74.53160  93.23044  83.53652  86.22625  83.02278  76.22636 
##        25        26        27        28        29        30        31        32 
##  80.86718  83.07934  55.98740  77.97675  84.45316  78.42339  91.19233  86.82518 
##        33        34        35        36        37        38        39        40 
##  84.66657  86.55304  82.97853  86.90949  77.26532  91.69608  87.43666  92.88114 
##        41        42        43        44        45        46        47        48 
##  85.30480  90.80702  30.92441  94.02767  88.60586  93.81935  98.29944  73.75140 
##        49        50        51        52        53        54        55        56 
##  68.74731  78.75908  80.73946  86.98164  78.66985  76.60196  76.37108  79.80282 
##        57        58        59        60        61        62        63        64 
##  82.72422  70.91085  65.76220  79.90202  85.05061  76.37925  86.51602  83.86209 
##        65        66        67        68        69        70        71        72 
##  79.34097  96.32018  73.25331  79.75918  76.17212  79.46304  89.46515  75.15088 
##        73        74        75        76        77        78        79        80 
##  81.31482  90.27151  83.56028  85.59852  79.77346  82.02732  75.28789  78.90232 
##        81        82        83        84        85        86        87        88 
##  88.67058  91.46127  99.98607  79.50832  85.85703  82.59919  80.95092  83.43583 
##        89        90        91        92        93        94        95        96 
##  84.49936  88.60315  81.04518  83.14081  72.12435  85.15947  82.37573  82.05240 
##        97        98        99       100       101       102       103       104 
##  80.56066 100.73643  87.81227  87.32391  83.35173  76.72292  86.80641  84.59432 
##       105       106       107       108       109       110       111       112 
##  79.90443  68.64333  58.75003  76.67873  83.35499  66.96303  82.54775  82.64571 
##       113       114       115       116       117       118       119       120 
##  90.44238  90.70932  82.42676  80.10387  86.60076  77.60082  74.92640  74.46265 
##       121       122       123       124       125       126       127       128 
##  94.11966  69.79301  73.43003  71.03893  67.27271  89.39861  93.26195  77.00327 
##       129       130       131       132       133       134       135       136 
##  92.94760  95.25139  87.78804  80.46972  76.60287  82.46187  82.76507  65.95787 
##       137       138       139       140       141       142       143       144 
##  75.29018  79.95943  81.42159  79.07409  63.94306  74.73422  91.59832  77.07120 
##       145       146       147       148       149       150       151       152 
##  74.38314  76.10073  78.46125  79.81763  82.32677  84.42143  83.97449  80.39391 
##       153       154       155       156       157       158       159       160 
##  34.56344  73.39020  76.25927  70.03222  84.42043  71.77533  86.26074  74.84888 
##       161       162       163       164       165       166       167       168 
## 103.35588 103.08979  93.55366 104.01778  97.81376  92.49966  84.31036  83.72956 
##       169       170       171       172       173       174       175       176 
##  73.49488  82.14438  88.25983  82.83997  82.79442  93.14520  85.39982  76.46767 
##       177       178       179       180       181       182       183       184 
##  78.47613  75.04096  77.59930  78.70224  76.81687  85.54835  83.64993  82.72803 
##       185       186       187       188       189       190       191       192 
##  88.79406  87.21167  87.88620  59.82733  67.42666 112.68146  73.76840  82.90580 
##       193       194       195       196       197       198       199       200 
##  78.10002  80.51338  82.96158  70.59017  79.43493  84.55084  81.08498  85.39992 
##       201       202       203       204       205       206       207       208 
##  77.91625  81.13866  75.02271  86.77097  79.49386  79.66930  79.62775  78.41551 
##       209       210       211       212       213       214       215       216 
##  83.94179  75.94934 107.15578  98.85732  79.92905  68.66213  73.80250  86.78465 
##       217       218       219       220       221       222       223       224 
##  82.85373  87.33769  77.98820  77.48902  79.20992  74.20021  79.70004  72.39399 
##       225       226       227       228       229       230       231       232 
##  85.88911  75.22900  81.09733  76.31865  78.62687  76.02780  78.43372  92.01634 
##       233       234       235       236       237       238       239       240 
##  81.42248  89.05693  79.72622  74.38382  80.57963  78.15124  95.25373  71.82234 
##       241       242       243       244       245       246       247       248 
##  90.11123  89.48959  85.29521  82.99218  60.68792  87.90637  81.36588  85.17542 
##       249       250       251       252       253       254       255       256 
##  76.66661  79.72225  80.48652  54.90579  93.33442  49.92541  70.11177  76.12061 
##       257       258       259 
##  78.41965  79.00451  80.66990

If the values for target wins are negative, it means the team would lose more games. Overall this model does very well.

Model 2

## 
## Call:
## lm(formula = TARGET_WINS ~ TEAM_BATTING_H + TEAM_BATTING_2B + 
##     TEAM_BATTING_3B + TEAM_BATTING_HR + TEAM_BATTING_BB + TEAM_PITCHING_H + 
##     TEAM_PITCHING_HR + TEAM_PITCHING_BB + TEAM_FIELDING_E, data = money_b_train)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -54.423  -8.867   0.115   8.887  55.548 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       6.738568   3.511940   1.919 0.055140 .  
## TEAM_BATTING_H    0.048908   0.003251  15.045  < 2e-16 ***
## TEAM_BATTING_2B  -0.026239   0.009073  -2.892 0.003865 ** 
## TEAM_BATTING_3B   0.102433   0.016734   6.121 1.09e-09 ***
## TEAM_BATTING_HR   0.057039   0.026548   2.149 0.031778 *  
## TEAM_BATTING_BB  -0.001320   0.004840  -0.273 0.785147    
## TEAM_PITCHING_H  -0.001329   0.000369  -3.602 0.000323 ***
## TEAM_PITCHING_HR -0.019072   0.023835  -0.800 0.423689    
## TEAM_PITCHING_BB  0.011387   0.003085   3.691 0.000228 ***
## TEAM_FIELDING_E  -0.016523   0.002373  -6.963 4.34e-12 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 13.48 on 2266 degrees of freedom
## Multiple R-squared:  0.2703, Adjusted R-squared:  0.2674 
## F-statistic: 93.24 on 9 and 2266 DF,  p-value: < 2.2e-16
## 
## Call:
## lm(formula = TARGET_WINS ~ TEAM_BATTING_H + TEAM_BATTING_2B + 
##     TEAM_BATTING_3B + TEAM_BATTING_HR + TEAM_PITCHING_H + TEAM_PITCHING_HR + 
##     TEAM_PITCHING_BB + TEAM_FIELDING_E, data = money_b_train2)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -54.273  -8.832   0.127   8.886  55.587 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       6.526453   3.423988   1.906   0.0568 .  
## TEAM_BATTING_H    0.048766   0.003208  15.200  < 2e-16 ***
## TEAM_BATTING_2B  -0.026072   0.009050  -2.881   0.0040 ** 
## TEAM_BATTING_3B   0.102196   0.016708   6.116 1.12e-09 ***
## TEAM_BATTING_HR   0.054383   0.024691   2.203   0.0277 *  
## TEAM_PITCHING_H  -0.001282   0.000327  -3.922 9.05e-05 ***
## TEAM_PITCHING_HR -0.016991   0.022575  -0.753   0.4517    
## TEAM_PITCHING_BB  0.010755   0.002036   5.283 1.40e-07 ***
## TEAM_FIELDING_E  -0.016351   0.002287  -7.149 1.18e-12 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 13.48 on 2267 degrees of freedom
## Multiple R-squared:  0.2702, Adjusted R-squared:  0.2677 
## F-statistic: 104.9 on 8 and 2267 DF,  p-value: < 2.2e-16
## 
## Call:
## lm(formula = TARGET_WINS ~ TEAM_BATTING_H + TEAM_BATTING_2B + 
##     TEAM_BATTING_3B + TEAM_BATTING_HR + TEAM_PITCHING_H + TEAM_PITCHING_BB + 
##     TEAM_FIELDING_E, data = money_b_train2)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -54.763  -8.861   0.095   8.860  55.469 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       7.2713462  3.2775220   2.219  0.02662 *  
## TEAM_BATTING_H    0.0484775  0.0031849  15.221  < 2e-16 ***
## TEAM_BATTING_2B  -0.0258127  0.0090430  -2.854  0.00435 ** 
## TEAM_BATTING_3B   0.1010776  0.0166406   6.074 1.46e-09 ***
## TEAM_BATTING_HR   0.0366916  0.0075591   4.854 1.29e-06 ***
## TEAM_PITCHING_H  -0.0013088  0.0003251  -4.026 5.87e-05 ***
## TEAM_PITCHING_BB  0.0103207  0.0019522   5.287 1.36e-07 ***
## TEAM_FIELDING_E  -0.0166263  0.0022577  -7.364 2.48e-13 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 13.48 on 2268 degrees of freedom
## Multiple R-squared:   0.27,  Adjusted R-squared:  0.2678 
## F-statistic: 119.9 on 7 and 2268 DF,  p-value: < 2.2e-16

##  [1] "INDEX"            "TEAM_BATTING_H"   "TEAM_BATTING_2B"  "TEAM_BATTING_3B" 
##  [5] "TEAM_BATTING_HR"  "TEAM_BATTING_BB"  "TEAM_BATTING_SO"  "TEAM_BASERUN_SB" 
##  [9] "TEAM_BASERUN_CS"  "TEAM_BATTING_HBP" "TEAM_PITCHING_H"  "TEAM_PITCHING_HR"
## [13] "TEAM_PITCHING_BB" "TEAM_PITCHING_SO" "TEAM_FIELDING_E"  "TEAM_FIELDING_DP"
##         1         2         3         4         5         6         7         8 
##  68.57679  70.20767  77.35107  83.60728  66.44188  67.44392  74.01699  72.52290 
##         9        10        11        12        13        14        15        16 
##  72.07908  75.86204  76.14127  85.66302  84.25863  82.11244  79.28366  80.65313 
##        17        18        19        20        21        22        23        24 
##  72.72498  80.73209  68.24429  93.15727  84.03790  86.72537  83.94422  76.45507 
##        25        26        27        28        29        30        31        32 
##  82.33443  84.46690  53.99437  77.34772  83.55037  76.54752  89.64897  87.49762 
##        33        34        35        36        37        38        39        40 
##  86.39979  88.63464  83.07959  82.97654  76.59917  90.98962  88.25264  89.93392 
##        41        42        43        44        45        46        47        48 
##  81.06430  86.65244  32.00565  93.94542  84.49850  91.12091  95.25990  72.55215 
##        49        50        51        52        53        54        55        56 
##  70.71842  77.42567  80.56279  86.18097  79.54452  75.66770  76.77920  78.91475 
##        57        58        59        60        61        62        63        64 
##  87.00232  70.24445  62.43238  76.94456  85.57690  82.32992  84.10415  84.08464 
##        65        66        67        68        69        70        71        72 
##  81.72510  88.61128  77.01994  84.45808  75.03575  84.58887  93.11545  78.11656 
##        73        74        75        76        77        78        79        80 
##  83.60987  87.48446  83.25982  87.59647  81.10361  79.45530  69.17038  75.34361 
##        81        82        83        84        85        86        87        88 
##  86.58620  91.02278  98.65784  83.24041  86.29588  81.38914  77.81345  83.29427 
##        89        90        91        92        93        94        95        96 
##  82.14307  85.78844  77.31626  90.17090  74.92238  80.27929  76.63840  76.41073 
##        97        98        99       100       101       102       103       104 
##  83.76351 101.49146  90.66066  91.80633  85.67709  75.74458  85.85636  82.51112 
##       105       106       107       108       109       110       111       112 
##  80.28514  75.74648  59.21657  80.05705  83.36447  63.89810  81.69559  80.89442 
##       113       114       115       116       117       118       119       120 
##  90.51339  88.42404  82.00004  79.88766  89.12636  79.28716  78.32773  70.56117 
##       121       122       123       124       125       126       127       128 
##  88.18073  64.83877  68.79647  62.89740  70.53486  89.14903  93.52098  77.13546 
##       129       130       131       132       133       134       135       136 
##  89.76420  96.00349  87.87496  79.55286  74.18762  83.65916  84.63120  67.92567 
##       137       138       139       140       141       142       143       144 
##  76.76088  79.31622  80.25903  79.00221  65.97271  70.88566  93.96534  80.09868 
##       145       146       147       148       149       150       151       152 
##  75.63502  76.66057  79.09194  81.58381  85.45157  81.03183  83.18578  79.69117 
##       153       154       155       156       157       158       159       160 
##  32.00533  74.74922  76.72696  73.53798  83.62346  70.38656  90.86799  71.82949 
##       161       162       163       164       165       166       167       168 
## 103.86302 102.94796  91.40787 103.43996  96.25437  92.15061  87.44536  83.28689 
##       169       170       171       172       173       174       175       176 
##  73.88550  80.44850  87.53529  83.90489  81.81791  91.73197  83.62750  78.62979 
##       177       178       179       180       181       182       183       184 
##  78.72177  78.62720  77.61974  80.23747  75.75891  82.42463  82.50687  83.40560 
##       185       186       187       188       189       190       191       192 
##  93.86719  84.08224  84.88270  59.90440  62.71131 106.61875  70.30532  79.80179 
##       193       194       195       196       197       198       199       200 
##  77.50981  80.91032  82.33698  71.26555  77.85090  81.87750  80.77272  86.39044 
##       201       202       203       204       205       206       207       208 
##  80.67028  82.42010  76.24716  85.64095  77.63218  78.86158  80.18659  76.75479 
##       209       210       211       212       213       214       215       216 
##  78.45877  74.04968 102.73424  94.95937  83.50166  71.04174  76.47425  88.75922 
##       217       218       219       220       221       222       223       224 
##  87.14607  86.32952  77.07959  76.85061  79.78128  75.26852  82.97115  79.90219 
##       225       226       227       228       229       230       231       232 
##  88.18673  76.73958  79.38705  80.08819  80.21071  76.68290  71.78995  94.28243 
##       233       234       235       236       237       238       239       240 
##  83.96099  86.59268  79.03424  74.35427  81.44309  78.25418  92.42152  75.30730 
##       241       242       243       244       245       246       247       248 
##  90.78059  88.86296  85.17144  83.49939  63.68535  86.98493  79.74425  82.77956 
##       249       250       251       252       253       254       255       256 
##  76.14821  84.12894  82.43395  59.19506  90.43189  46.23627  70.80823  77.18288 
##       257       258       259 
##  75.82183  77.87520  77.54615