##Libraries
library(caret)
## Warning: package 'caret' was built under R version 4.0.4
## Loading required package: lattice
## Loading required package: ggplot2
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.0.4
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(Metrics)
## Warning: package 'Metrics' was built under R version 4.0.5
## 
## Attaching package: 'Metrics'
## The following objects are masked from 'package:caret':
## 
##     precision, recall
library(plyr)
## ------------------------------------------------------------------------------
## You have loaded plyr after dplyr - this is likely to cause problems.
## If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
## library(plyr); library(dplyr)
## ------------------------------------------------------------------------------
## 
## Attaching package: 'plyr'
## The following objects are masked from 'package:dplyr':
## 
##     arrange, count, desc, failwith, id, mutate, rename, summarise,
##     summarize
library(graphics)
library(tinytex)
## Warning: package 'tinytex' was built under R version 4.0.5
##Importing data
train <- read.csv("C:\\Users\\17814\\Downloads\\moneyballtrainingdata.csv", header=TRUE)
test <- read.csv("C:\\Users\\17814\\Downloads\\moneyballevaluationdata.csv", header=TRUE)

summary(train[2:17])
##   TARGET_WINS     TEAM_BATTING_H TEAM_BATTING_2B TEAM_BATTING_3B 
##  Min.   :  0.00   Min.   : 891   Min.   : 69.0   Min.   :  0.00  
##  1st Qu.: 71.00   1st Qu.:1383   1st Qu.:208.0   1st Qu.: 34.00  
##  Median : 82.00   Median :1454   Median :238.0   Median : 47.00  
##  Mean   : 80.79   Mean   :1469   Mean   :241.2   Mean   : 55.25  
##  3rd Qu.: 92.00   3rd Qu.:1537   3rd Qu.:273.0   3rd Qu.: 72.00  
##  Max.   :146.00   Max.   :2554   Max.   :458.0   Max.   :223.00  
##                                                                  
##  TEAM_BATTING_HR  TEAM_BATTING_BB TEAM_BATTING_SO  TEAM_BASERUN_SB
##  Min.   :  0.00   Min.   :  0.0   Min.   :   0.0   Min.   :  0.0  
##  1st Qu.: 42.00   1st Qu.:451.0   1st Qu.: 548.0   1st Qu.: 66.0  
##  Median :102.00   Median :512.0   Median : 750.0   Median :101.0  
##  Mean   : 99.61   Mean   :501.6   Mean   : 735.6   Mean   :124.8  
##  3rd Qu.:147.00   3rd Qu.:580.0   3rd Qu.: 930.0   3rd Qu.:156.0  
##  Max.   :264.00   Max.   :878.0   Max.   :1399.0   Max.   :697.0  
##                                   NA's   :102      NA's   :131    
##  TEAM_BASERUN_CS TEAM_BATTING_HBP TEAM_PITCHING_H TEAM_PITCHING_HR
##  Min.   :  0.0   Min.   :29.00    Min.   : 1137   Min.   :  0.0   
##  1st Qu.: 38.0   1st Qu.:50.50    1st Qu.: 1419   1st Qu.: 50.0   
##  Median : 49.0   Median :58.00    Median : 1518   Median :107.0   
##  Mean   : 52.8   Mean   :59.36    Mean   : 1779   Mean   :105.7   
##  3rd Qu.: 62.0   3rd Qu.:67.00    3rd Qu.: 1682   3rd Qu.:150.0   
##  Max.   :201.0   Max.   :95.00    Max.   :30132   Max.   :343.0   
##  NA's   :772     NA's   :2085                                     
##  TEAM_PITCHING_BB TEAM_PITCHING_SO  TEAM_FIELDING_E  TEAM_FIELDING_DP
##  Min.   :   0.0   Min.   :    0.0   Min.   :  65.0   Min.   : 52.0   
##  1st Qu.: 476.0   1st Qu.:  615.0   1st Qu.: 127.0   1st Qu.:131.0   
##  Median : 536.5   Median :  813.5   Median : 159.0   Median :149.0   
##  Mean   : 553.0   Mean   :  817.7   Mean   : 246.5   Mean   :146.4   
##  3rd Qu.: 611.0   3rd Qu.:  968.0   3rd Qu.: 249.2   3rd Qu.:164.0   
##  Max.   :3645.0   Max.   :19278.0   Max.   :1898.0   Max.   :228.0   
##                   NA's   :102                        NA's   :286
summary(test)
##      INDEX      TEAM_BATTING_H TEAM_BATTING_2B TEAM_BATTING_3B 
##  Min.   :   9   Min.   : 819   Min.   : 44.0   Min.   : 14.00  
##  1st Qu.: 708   1st Qu.:1387   1st Qu.:210.0   1st Qu.: 35.00  
##  Median :1249   Median :1455   Median :239.0   Median : 52.00  
##  Mean   :1264   Mean   :1469   Mean   :241.3   Mean   : 55.91  
##  3rd Qu.:1832   3rd Qu.:1548   3rd Qu.:278.5   3rd Qu.: 72.00  
##  Max.   :2525   Max.   :2170   Max.   :376.0   Max.   :155.00  
##                                                                
##  TEAM_BATTING_HR  TEAM_BATTING_BB TEAM_BATTING_SO  TEAM_BASERUN_SB
##  Min.   :  0.00   Min.   : 15.0   Min.   :   0.0   Min.   :  0.0  
##  1st Qu.: 44.50   1st Qu.:436.5   1st Qu.: 545.0   1st Qu.: 59.0  
##  Median :101.00   Median :509.0   Median : 686.0   Median : 92.0  
##  Mean   : 95.63   Mean   :499.0   Mean   : 709.3   Mean   :123.7  
##  3rd Qu.:135.50   3rd Qu.:565.5   3rd Qu.: 912.0   3rd Qu.:151.8  
##  Max.   :242.00   Max.   :792.0   Max.   :1268.0   Max.   :580.0  
##                                   NA's   :18       NA's   :13     
##  TEAM_BASERUN_CS  TEAM_BATTING_HBP TEAM_PITCHING_H TEAM_PITCHING_HR
##  Min.   :  0.00   Min.   :42.00    Min.   : 1155   Min.   :  0.0   
##  1st Qu.: 38.00   1st Qu.:53.50    1st Qu.: 1426   1st Qu.: 52.0   
##  Median : 49.50   Median :62.00    Median : 1515   Median :104.0   
##  Mean   : 52.32   Mean   :62.37    Mean   : 1813   Mean   :102.1   
##  3rd Qu.: 63.00   3rd Qu.:67.50    3rd Qu.: 1681   3rd Qu.:142.5   
##  Max.   :154.00   Max.   :96.00    Max.   :22768   Max.   :336.0   
##  NA's   :87       NA's   :240                                      
##  TEAM_PITCHING_BB TEAM_PITCHING_SO TEAM_FIELDING_E  TEAM_FIELDING_DP
##  Min.   : 136.0   Min.   :   0.0   Min.   :  73.0   Min.   : 69.0   
##  1st Qu.: 471.0   1st Qu.: 613.0   1st Qu.: 131.0   1st Qu.:131.0   
##  Median : 526.0   Median : 745.0   Median : 163.0   Median :148.0   
##  Mean   : 552.4   Mean   : 799.7   Mean   : 249.7   Mean   :146.1   
##  3rd Qu.: 606.5   3rd Qu.: 938.0   3rd Qu.: 252.0   3rd Qu.:164.0   
##  Max.   :2008.0   Max.   :9963.0   Max.   :1568.0   Max.   :204.0   
##                   NA's   :18                        NA's   :31
##cleaning up missing values with median
train$TEAM_BATTING_SO[is.na(train$TEAM_BATTING_SO)] <- median(train$TEAM_BATTING_SO, na.rm = TRUE)
train$TEAM_BASERUN_SB[is.na(train$TEAM_BASERUN_SB)] <- median(train$TEAM_BASERUN_SB, na.rm = TRUE)
train$TEAM_BASERUN_CS[is.na(train$TEAM_BASERUN_CS)] <- median(train$TEAM_BASERUN_CS, na.rm = TRUE)
train$TEAM_BATTING_HBP[is.na(train$TEAM_BATTING_HBP)] <- median(train$TEAM_BATTING_HBP, na.rm = TRUE)
train$TEAM_PITCHING_SO[is.na(train$TEAM_PITCHING_SO)] <- median(train$TEAM_PITCHING_SO, na.rm = TRUE)
train$TEAM_FIELDING_DP[is.na(train$TEAM_FIELDING_DP)] <- median(train$TEAM_FIELDING_DP, na.rm = TRUE)

test$TEAM_BATTING_SO[is.na(test$TEAM_BATTING_SO)] <- median(test$TEAM_BATTING_SO, na.rm = TRUE)
test$TEAM_BASERUN_SB[is.na(test$TEAM_BASERUN_SB)] <- median(test$TEAM_BASERUN_SB, na.rm = TRUE)
test$TEAM_BASERUN_CS[is.na(test$TEAM_BASERUN_CS)] <- median(test$TEAM_BASERUN_CS, na.rm = TRUE)
test$TEAM_BATTING_HBP[is.na(test$TEAM_BATTING_HBP)] <- median(test$TEAM_BATTING_HBP, na.rm = TRUE)
test$TEAM_PITCHING_SO[is.na(test$TEAM_PITCHING_SO)] <- median(test$TEAM_PITCHING_SO, na.rm = TRUE)
test$TEAM_FIELDING_DP[is.na(test$TEAM_FIELDING_DP)] <- median(test$TEAM_FIELDING_DP, na.rm = TRUE)

##new summary stats to check NAS
summary(train)
##      INDEX         TARGET_WINS     TEAM_BATTING_H TEAM_BATTING_2B
##  Min.   :   1.0   Min.   :  0.00   Min.   : 891   Min.   : 69.0  
##  1st Qu.: 630.8   1st Qu.: 71.00   1st Qu.:1383   1st Qu.:208.0  
##  Median :1270.5   Median : 82.00   Median :1454   Median :238.0  
##  Mean   :1268.5   Mean   : 80.79   Mean   :1469   Mean   :241.2  
##  3rd Qu.:1915.5   3rd Qu.: 92.00   3rd Qu.:1537   3rd Qu.:273.0  
##  Max.   :2535.0   Max.   :146.00   Max.   :2554   Max.   :458.0  
##  TEAM_BATTING_3B  TEAM_BATTING_HR  TEAM_BATTING_BB TEAM_BATTING_SO 
##  Min.   :  0.00   Min.   :  0.00   Min.   :  0.0   Min.   :   0.0  
##  1st Qu.: 34.00   1st Qu.: 42.00   1st Qu.:451.0   1st Qu.: 556.8  
##  Median : 47.00   Median :102.00   Median :512.0   Median : 750.0  
##  Mean   : 55.25   Mean   : 99.61   Mean   :501.6   Mean   : 736.3  
##  3rd Qu.: 72.00   3rd Qu.:147.00   3rd Qu.:580.0   3rd Qu.: 925.0  
##  Max.   :223.00   Max.   :264.00   Max.   :878.0   Max.   :1399.0  
##  TEAM_BASERUN_SB TEAM_BASERUN_CS  TEAM_BATTING_HBP TEAM_PITCHING_H
##  Min.   :  0.0   Min.   :  0.00   Min.   :29.00    Min.   : 1137  
##  1st Qu.: 67.0   1st Qu.: 44.00   1st Qu.:58.00    1st Qu.: 1419  
##  Median :101.0   Median : 49.00   Median :58.00    Median : 1518  
##  Mean   :123.4   Mean   : 51.51   Mean   :58.11    Mean   : 1779  
##  3rd Qu.:151.0   3rd Qu.: 54.25   3rd Qu.:58.00    3rd Qu.: 1682  
##  Max.   :697.0   Max.   :201.00   Max.   :95.00    Max.   :30132  
##  TEAM_PITCHING_HR TEAM_PITCHING_BB TEAM_PITCHING_SO  TEAM_FIELDING_E 
##  Min.   :  0.0    Min.   :   0.0   Min.   :    0.0   Min.   :  65.0  
##  1st Qu.: 50.0    1st Qu.: 476.0   1st Qu.:  626.0   1st Qu.: 127.0  
##  Median :107.0    Median : 536.5   Median :  813.5   Median : 159.0  
##  Mean   :105.7    Mean   : 553.0   Mean   :  817.5   Mean   : 246.5  
##  3rd Qu.:150.0    3rd Qu.: 611.0   3rd Qu.:  957.0   3rd Qu.: 249.2  
##  Max.   :343.0    Max.   :3645.0   Max.   :19278.0   Max.   :1898.0  
##  TEAM_FIELDING_DP
##  Min.   : 52.0   
##  1st Qu.:134.0   
##  Median :149.0   
##  Mean   :146.7   
##  3rd Qu.:161.2   
##  Max.   :228.0
summary(test)
##      INDEX      TEAM_BATTING_H TEAM_BATTING_2B TEAM_BATTING_3B 
##  Min.   :   9   Min.   : 819   Min.   : 44.0   Min.   : 14.00  
##  1st Qu.: 708   1st Qu.:1387   1st Qu.:210.0   1st Qu.: 35.00  
##  Median :1249   Median :1455   Median :239.0   Median : 52.00  
##  Mean   :1264   Mean   :1469   Mean   :241.3   Mean   : 55.91  
##  3rd Qu.:1832   3rd Qu.:1548   3rd Qu.:278.5   3rd Qu.: 72.00  
##  Max.   :2525   Max.   :2170   Max.   :376.0   Max.   :155.00  
##  TEAM_BATTING_HR  TEAM_BATTING_BB TEAM_BATTING_SO  TEAM_BASERUN_SB
##  Min.   :  0.00   Min.   : 15.0   Min.   :   0.0   Min.   :  0.0  
##  1st Qu.: 44.50   1st Qu.:436.5   1st Qu.: 565.0   1st Qu.: 60.5  
##  Median :101.00   Median :509.0   Median : 686.0   Median : 92.0  
##  Mean   : 95.63   Mean   :499.0   Mean   : 707.7   Mean   :122.1  
##  3rd Qu.:135.50   3rd Qu.:565.5   3rd Qu.: 904.5   3rd Qu.:149.0  
##  Max.   :242.00   Max.   :792.0   Max.   :1268.0   Max.   :580.0  
##  TEAM_BASERUN_CS  TEAM_BATTING_HBP TEAM_PITCHING_H TEAM_PITCHING_HR
##  Min.   :  0.00   Min.   :42.00    Min.   : 1155   Min.   :  0.0   
##  1st Qu.: 44.00   1st Qu.:62.00    1st Qu.: 1426   1st Qu.: 52.0   
##  Median : 49.50   Median :62.00    Median : 1515   Median :104.0   
##  Mean   : 51.37   Mean   :62.03    Mean   : 1813   Mean   :102.1   
##  3rd Qu.: 56.00   3rd Qu.:62.00    3rd Qu.: 1681   3rd Qu.:142.5   
##  Max.   :154.00   Max.   :96.00    Max.   :22768   Max.   :336.0   
##  TEAM_PITCHING_BB TEAM_PITCHING_SO TEAM_FIELDING_E  TEAM_FIELDING_DP
##  Min.   : 136.0   Min.   :   0.0   Min.   :  73.0   Min.   : 69.0   
##  1st Qu.: 471.0   1st Qu.: 622.5   1st Qu.: 131.0   1st Qu.:134.5   
##  Median : 526.0   Median : 745.0   Median : 163.0   Median :148.0   
##  Mean   : 552.4   Mean   : 795.9   Mean   : 249.7   Mean   :146.3   
##  3rd Qu.: 606.5   3rd Qu.: 927.5   3rd Qu.: 252.0   3rd Qu.:160.5   
##  Max.   :2008.0   Max.   :9963.0   Max.   :1568.0   Max.   :204.0
##Final Summary Stats
summary(train)
##      INDEX         TARGET_WINS     TEAM_BATTING_H TEAM_BATTING_2B
##  Min.   :   1.0   Min.   :  0.00   Min.   : 891   Min.   : 69.0  
##  1st Qu.: 630.8   1st Qu.: 71.00   1st Qu.:1383   1st Qu.:208.0  
##  Median :1270.5   Median : 82.00   Median :1454   Median :238.0  
##  Mean   :1268.5   Mean   : 80.79   Mean   :1469   Mean   :241.2  
##  3rd Qu.:1915.5   3rd Qu.: 92.00   3rd Qu.:1537   3rd Qu.:273.0  
##  Max.   :2535.0   Max.   :146.00   Max.   :2554   Max.   :458.0  
##  TEAM_BATTING_3B  TEAM_BATTING_HR  TEAM_BATTING_BB TEAM_BATTING_SO 
##  Min.   :  0.00   Min.   :  0.00   Min.   :  0.0   Min.   :   0.0  
##  1st Qu.: 34.00   1st Qu.: 42.00   1st Qu.:451.0   1st Qu.: 556.8  
##  Median : 47.00   Median :102.00   Median :512.0   Median : 750.0  
##  Mean   : 55.25   Mean   : 99.61   Mean   :501.6   Mean   : 736.3  
##  3rd Qu.: 72.00   3rd Qu.:147.00   3rd Qu.:580.0   3rd Qu.: 925.0  
##  Max.   :223.00   Max.   :264.00   Max.   :878.0   Max.   :1399.0  
##  TEAM_BASERUN_SB TEAM_BASERUN_CS  TEAM_BATTING_HBP TEAM_PITCHING_H
##  Min.   :  0.0   Min.   :  0.00   Min.   :29.00    Min.   : 1137  
##  1st Qu.: 67.0   1st Qu.: 44.00   1st Qu.:58.00    1st Qu.: 1419  
##  Median :101.0   Median : 49.00   Median :58.00    Median : 1518  
##  Mean   :123.4   Mean   : 51.51   Mean   :58.11    Mean   : 1779  
##  3rd Qu.:151.0   3rd Qu.: 54.25   3rd Qu.:58.00    3rd Qu.: 1682  
##  Max.   :697.0   Max.   :201.00   Max.   :95.00    Max.   :30132  
##  TEAM_PITCHING_HR TEAM_PITCHING_BB TEAM_PITCHING_SO  TEAM_FIELDING_E 
##  Min.   :  0.0    Min.   :   0.0   Min.   :    0.0   Min.   :  65.0  
##  1st Qu.: 50.0    1st Qu.: 476.0   1st Qu.:  626.0   1st Qu.: 127.0  
##  Median :107.0    Median : 536.5   Median :  813.5   Median : 159.0  
##  Mean   :105.7    Mean   : 553.0   Mean   :  817.5   Mean   : 246.5  
##  3rd Qu.:150.0    3rd Qu.: 611.0   3rd Qu.:  957.0   3rd Qu.: 249.2  
##  Max.   :343.0    Max.   :3645.0   Max.   :19278.0   Max.   :1898.0  
##  TEAM_FIELDING_DP
##  Min.   : 52.0   
##  1st Qu.:134.0   
##  Median :149.0   
##  Mean   :146.7   
##  3rd Qu.:161.2   
##  Max.   :228.0
##correlation
cor(train)
##                         INDEX TARGET_WINS TEAM_BATTING_H TEAM_BATTING_2B
## INDEX             1.000000000 -0.02105643  -0.0179202413      0.01118301
## TARGET_WINS      -0.021056435  1.00000000   0.3887675211      0.28910365
## TEAM_BATTING_H   -0.017920241  0.38876752   1.0000000000      0.56284968
## TEAM_BATTING_2B   0.011183013  0.28910365   0.5628496778      1.00000000
## TEAM_BATTING_3B  -0.005814683  0.14260841   0.4276965751     -0.10730582
## TEAM_BATTING_HR   0.051481047  0.17615320  -0.0065446845      0.43539729
## TEAM_BATTING_BB  -0.026567236  0.23255986  -0.0724640128      0.25572610
## TEAM_BATTING_SO   0.079147864 -0.03058135  -0.4526861592      0.15173438
## TEAM_BASERUN_SB   0.039365347  0.12361087   0.1078237673     -0.18340432
## TEAM_BASERUN_CS   0.001338699  0.01595982   0.0008261984     -0.04584955
## TEAM_BATTING_HBP  0.027793759  0.01651641  -0.0024521129      0.04359347
## TEAM_PITCHING_H   0.017103148 -0.10993705   0.3026937094      0.02369219
## TEAM_PITCHING_HR  0.050985897  0.18901373   0.0728531193      0.45455082
## TEAM_PITCHING_BB -0.015287513  0.12417454   0.0941930273      0.17805420
## TEAM_PITCHING_SO  0.054739946 -0.07579967  -0.2451699012      0.06213042
## TEAM_FIELDING_E  -0.009233126 -0.17648476   0.2649024778     -0.23515099
## TEAM_FIELDING_DP  0.019279238 -0.03008630   0.1248087998      0.25696798
##                  TEAM_BATTING_3B TEAM_BATTING_HR TEAM_BATTING_BB
## INDEX               -0.005814683     0.051481047     -0.02656724
## TARGET_WINS          0.142608411     0.176153200      0.23255986
## TEAM_BATTING_H       0.427696575    -0.006544685     -0.07246401
## TEAM_BATTING_2B     -0.107305824     0.435397293      0.25572610
## TEAM_BATTING_3B      1.000000000    -0.635566946     -0.28723584
## TEAM_BATTING_HR     -0.635566946     1.000000000      0.51373481
## TEAM_BATTING_BB     -0.287235841     0.513734810      1.00000000
## TEAM_BATTING_SO     -0.655709613     0.693007648      0.37148892
## TEAM_BASERUN_SB      0.485740156    -0.406889074     -0.04268402
## TEAM_BASERUN_CS      0.136181182    -0.225458666     -0.04581766
## TEAM_BATTING_HBP    -0.042734050     0.055506730      0.01861664
## TEAM_PITCHING_H      0.194879411    -0.250145481     -0.44977762
## TEAM_PITCHING_HR    -0.567836679     0.969371396      0.45955207
## TEAM_PITCHING_BB    -0.002224148     0.136927564      0.48936126
## TEAM_PITCHING_SO    -0.254238104     0.177418187     -0.02017989
## TEAM_FIELDING_E      0.509778447    -0.587339098     -0.65597081
## TEAM_FIELDING_DP    -0.227771884     0.391652434      0.32963974
##                  TEAM_BATTING_SO TEAM_BASERUN_SB TEAM_BASERUN_CS
## INDEX                 0.07914786      0.03936535    0.0013386990
## TARGET_WINS          -0.03058135      0.12361087    0.0159598172
## TEAM_BATTING_H       -0.45268616      0.10782377    0.0008261984
## TEAM_BATTING_2B       0.15173438     -0.18340432   -0.0458495544
## TEAM_BATTING_3B      -0.65570961      0.48574016    0.1361811823
## TEAM_BATTING_HR       0.69300765     -0.40688907   -0.2254586663
## TEAM_BATTING_BB       0.37148892     -0.04268402   -0.0458176601
## TEAM_BATTING_SO       1.00000000     -0.21178758   -0.1025019312
## TEAM_BASERUN_SB      -0.21178758      1.00000000    0.2332417104
## TEAM_BASERUN_CS      -0.10250193      0.23324171    1.0000000000
## TEAM_BATTING_HBP      0.06641291     -0.01794567   -0.0315285080
## TEAM_PITCHING_H      -0.37571553      0.03957227   -0.0525918342
## TEAM_PITCHING_HR      0.63286033     -0.38005624   -0.2281852483
## TEAM_PITCHING_BB      0.03498809      0.12928969   -0.0472289272
## TEAM_PITCHING_SO      0.41618159     -0.06424741   -0.0565380017
## TEAM_FIELDING_E      -0.58259305      0.32615276   -0.0291782138
## TEAM_FIELDING_DP      0.11089804     -0.27023400   -0.1020021365
##                  TEAM_BATTING_HBP TEAM_PITCHING_H TEAM_PITCHING_HR
## INDEX                 0.027793759     0.017103148       0.05098590
## TARGET_WINS           0.016516411    -0.109937054       0.18901373
## TEAM_BATTING_H       -0.002452113     0.302693709       0.07285312
## TEAM_BATTING_2B       0.043593471     0.023692188       0.45455082
## TEAM_BATTING_3B      -0.042734050     0.194879411      -0.56783668
## TEAM_BATTING_HR       0.055506730    -0.250145481       0.96937140
## TEAM_BATTING_BB       0.018616645    -0.449777625       0.45955207
## TEAM_BATTING_SO       0.066412909    -0.375715533       0.63286033
## TEAM_BASERUN_SB      -0.017945673     0.039572266      -0.38005624
## TEAM_BASERUN_CS      -0.031528508    -0.052591834      -0.22818525
## TEAM_BATTING_HBP      1.000000000    -0.006864463       0.05196343
## TEAM_PITCHING_H      -0.006864463     1.000000000      -0.14161276
## TEAM_PITCHING_HR      0.051963427    -0.141612759       1.00000000
## TEAM_PITCHING_BB      0.004508255     0.320676162       0.22193750
## TEAM_PITCHING_SO      0.025400354     0.266935871       0.19691491
## TEAM_FIELDING_E      -0.017626524     0.667759010      -0.49314447
## TEAM_FIELDING_DP     -0.007774257    -0.044647837       0.38959550
##                  TEAM_PITCHING_BB TEAM_PITCHING_SO TEAM_FIELDING_E
## INDEX                -0.015287513      0.054739946    -0.009233126
## TARGET_WINS           0.124174536     -0.075799674    -0.176484759
## TEAM_BATTING_H        0.094193027     -0.245169901     0.264902478
## TEAM_BATTING_2B       0.178054204      0.062130422    -0.235150986
## TEAM_BATTING_3B      -0.002224148     -0.254238104     0.509778447
## TEAM_BATTING_HR       0.136927564      0.177418187    -0.587339098
## TEAM_BATTING_BB       0.489361263     -0.020179893    -0.655970815
## TEAM_BATTING_SO       0.034988093      0.416181592    -0.582593046
## TEAM_BASERUN_SB       0.129289686     -0.064247407     0.326152759
## TEAM_BASERUN_CS      -0.047228927     -0.056538002    -0.029178214
## TEAM_BATTING_HBP      0.004508255      0.025400354    -0.017626524
## TEAM_PITCHING_H       0.320676162      0.266935871     0.667759010
## TEAM_PITCHING_HR      0.221937505      0.196914911    -0.493144466
## TEAM_PITCHING_BB      1.000000000      0.482172000    -0.022837561
## TEAM_PITCHING_SO      0.482172000      1.000000000    -0.023322782
## TEAM_FIELDING_E      -0.022837561     -0.023322782     1.000000000
## TEAM_FIELDING_DP      0.192348657      0.009552324    -0.227394807
##                  TEAM_FIELDING_DP
## INDEX                 0.019279238
## TARGET_WINS          -0.030086302
## TEAM_BATTING_H        0.124808800
## TEAM_BATTING_2B       0.256967975
## TEAM_BATTING_3B      -0.227771884
## TEAM_BATTING_HR       0.391652434
## TEAM_BATTING_BB       0.329639737
## TEAM_BATTING_SO       0.110898035
## TEAM_BASERUN_SB      -0.270234003
## TEAM_BASERUN_CS      -0.102002137
## TEAM_BATTING_HBP     -0.007774257
## TEAM_PITCHING_H      -0.044647837
## TEAM_PITCHING_HR      0.389595503
## TEAM_PITCHING_BB      0.192348657
## TEAM_PITCHING_SO      0.009552324
## TEAM_FIELDING_E      -0.227394807
## TEAM_FIELDING_DP      1.000000000
cor(test)
##                         INDEX TEAM_BATTING_H TEAM_BATTING_2B TEAM_BATTING_3B
## INDEX             1.000000000   -0.003457155     0.032046194     0.025177894
## TEAM_BATTING_H   -0.003457155    1.000000000     0.624905809     0.430419749
## TEAM_BATTING_2B   0.032046194    0.624905809     1.000000000     0.003455806
## TEAM_BATTING_3B   0.025177894    0.430419749     0.003455806     1.000000000
## TEAM_BATTING_HR   0.063303334    0.097305913     0.422890506    -0.531913902
## TEAM_BATTING_BB   0.010955620    0.178479481     0.368121773    -0.002859557
## TEAM_BATTING_SO   0.158353667   -0.460559158     0.044597395    -0.595937902
## TEAM_BASERUN_SB   0.021703477    0.153725354    -0.140463351     0.507602437
## TEAM_BASERUN_CS   0.083968204    0.033611495     0.047579599     0.122876495
## TEAM_BATTING_HBP  0.050855426    0.003020941    -0.016604053    -0.015793325
## TEAM_PITCHING_H   0.033420809    0.231400887    -0.185528011     0.150439843
## TEAM_PITCHING_HR  0.073473674    0.167273848     0.412568887    -0.414772447
## TEAM_PITCHING_BB  0.058575739    0.234012526     0.156393712     0.301234789
## TEAM_PITCHING_SO  0.156509867   -0.398007572    -0.204195969    -0.147916706
## TEAM_FIELDING_E   0.005424957    0.114796481    -0.321932501     0.397313906
## TEAM_FIELDING_DP  0.029123782    0.213565179     0.344801350    -0.179655726
##                  TEAM_BATTING_HR TEAM_BATTING_BB TEAM_BATTING_SO
## INDEX                 0.06330333    0.0109556197      0.15835367
## TEAM_BATTING_H        0.09730591    0.1784794814     -0.46055916
## TEAM_BATTING_2B       0.42289051    0.3681217727      0.04459739
## TEAM_BATTING_3B      -0.53191390   -0.0028595571     -0.59593790
## TEAM_BATTING_HR       1.00000000    0.4494817976      0.62874436
## TEAM_BATTING_BB       0.44948180    1.0000000000      0.20819320
## TEAM_BATTING_SO       0.62874436    0.2081932003      1.00000000
## TEAM_BASERUN_SB      -0.39459124   -0.0003214402     -0.23351680
## TEAM_BASERUN_CS      -0.17704634    0.0287176499     -0.05852908
## TEAM_BATTING_HBP      0.03701944   -0.0663518889      0.02206774
## TEAM_PITCHING_H      -0.23492162   -0.4357893854     -0.39111288
## TEAM_PITCHING_HR      0.92580412    0.3692659236      0.52347273
## TEAM_PITCHING_BB      0.07063501    0.4237480076     -0.11353606
## TEAM_PITCHING_SO      0.09423620   -0.1396864030      0.41680102
## TEAM_FIELDING_E      -0.57823629   -0.6263606352     -0.56795695
## TEAM_FIELDING_DP      0.40627970    0.2317689893      0.08612598
##                  TEAM_BASERUN_SB TEAM_BASERUN_CS TEAM_BATTING_HBP
## INDEX               0.0217034769     0.083968204      0.050855426
## TEAM_BATTING_H      0.1537253540     0.033611495      0.003020941
## TEAM_BATTING_2B    -0.1404633508     0.047579599     -0.016604053
## TEAM_BATTING_3B     0.5076024368     0.122876495     -0.015793325
## TEAM_BATTING_HR    -0.3945912364    -0.177046344      0.037019435
## TEAM_BATTING_BB    -0.0003214402     0.028717650     -0.066351889
## TEAM_BATTING_SO    -0.2335168039    -0.058529078      0.022067737
## TEAM_BASERUN_SB     1.0000000000     0.289450394     -0.021586849
## TEAM_BASERUN_CS     0.2894503944     1.000000000     -0.015157275
## TEAM_BATTING_HBP   -0.0215868488    -0.015157275      1.000000000
## TEAM_PITCHING_H     0.0515111613    -0.128806581     -0.001273268
## TEAM_PITCHING_HR   -0.3111451694    -0.184877928      0.035657719
## TEAM_PITCHING_BB    0.2165139009    -0.006344267     -0.048408821
## TEAM_PITCHING_SO   -0.0831972400    -0.043420787      0.007571710
## TEAM_FIELDING_E     0.3210643032    -0.027603684     -0.013049220
## TEAM_FIELDING_DP   -0.3521561769    -0.206196525      0.094152451
##                  TEAM_PITCHING_H TEAM_PITCHING_HR TEAM_PITCHING_BB
## INDEX                0.033420809       0.07347367      0.058575739
## TEAM_BATTING_H       0.231400887       0.16727385      0.234012526
## TEAM_BATTING_2B     -0.185528011       0.41256889      0.156393712
## TEAM_BATTING_3B      0.150439843      -0.41477245      0.301234789
## TEAM_BATTING_HR     -0.234921623       0.92580412      0.070635012
## TEAM_BATTING_BB     -0.435789385       0.36926592      0.423748008
## TEAM_BATTING_SO     -0.391112875       0.52347273     -0.113536059
## TEAM_BASERUN_SB      0.051511161      -0.31114517      0.216513901
## TEAM_BASERUN_CS     -0.128806581      -0.18487793     -0.006344267
## TEAM_BATTING_HBP    -0.001273268       0.03565772     -0.048408821
## TEAM_PITCHING_H      1.000000000      -0.03223577      0.259412178
## TEAM_PITCHING_HR    -0.032235770       1.00000000      0.362068264
## TEAM_PITCHING_BB     0.259412178       0.36206826      1.000000000
## TEAM_PITCHING_SO     0.133016040       0.22189911      0.379054554
## TEAM_FIELDING_E      0.684568722      -0.38155409      0.126263657
## TEAM_FIELDING_DP    -0.002364655       0.40556060      0.130657231
##                  TEAM_PITCHING_SO TEAM_FIELDING_E TEAM_FIELDING_DP
## INDEX                 0.156509867     0.005424957      0.029123782
## TEAM_BATTING_H       -0.398007572     0.114796481      0.213565179
## TEAM_BATTING_2B      -0.204195969    -0.321932501      0.344801350
## TEAM_BATTING_3B      -0.147916706     0.397313906     -0.179655726
## TEAM_BATTING_HR       0.094236196    -0.578236294      0.406279696
## TEAM_BATTING_BB      -0.139686403    -0.626360635      0.231768989
## TEAM_BATTING_SO       0.416801016    -0.567956953      0.086125982
## TEAM_BASERUN_SB      -0.083197240     0.321064303     -0.352156177
## TEAM_BASERUN_CS      -0.043420787    -0.027603684     -0.206196525
## TEAM_BATTING_HBP      0.007571710    -0.013049220      0.094152451
## TEAM_PITCHING_H       0.133016040     0.684568722     -0.002364655
## TEAM_PITCHING_HR      0.221899111    -0.381554088      0.405560595
## TEAM_PITCHING_BB      0.379054554     0.126263657      0.130657231
## TEAM_PITCHING_SO      1.000000000     0.063705823      0.008982151
## TEAM_FIELDING_E       0.063705823     1.000000000     -0.207059598
## TEAM_FIELDING_DP      0.008982151    -0.207059598      1.000000000
##Histograms
hist(train$TARGET_WINS)

hist(train$TEAM_BATTING_H)

hist(train$TEAM_BATTING_2B)

hist(train$TEAM_BATTING_3B)

hist(train$TEAM_BATTING_HR)

hist(train$TEAM_BATTING_BB)

hist(train$TEAM_BATTING_SO)

hist(train$TEAM_BASERUN_SB)

hist(train$TEAM_BASERUN_CS)

hist(train$TEAM_BATTING_HBP)

hist(train$TEAM_PITCHING_H)

hist(train$TEAM_PITCHING_HR)

hist(train$TEAM_PITCHING_BB)

hist(train$TEAM_PITCHING_SO)

hist(train$TEAM_FIELDING_E)

hist(train$TEAM_FIELDING_DP)

## lm

set.seed(105)
train_new <- train %>% select(-INDEX)
lm <- train(TARGET_WINS~ ., data=train_new, method="lm")
lm
## Linear Regression 
## 
## 2276 samples
##   15 predictor
## 
## No pre-processing
## Resampling: Bootstrapped (25 reps) 
## Summary of sample sizes: 2276, 2276, 2276, 2276, 2276, 2276, ... 
## Resampling results:
## 
##   RMSE      Rsquared   MAE     
##   13.38482  0.2828243  10.32272
## 
## Tuning parameter 'intercept' was held constant at a value of TRUE
summary(lm) 
## 
## Call:
## lm(formula = .outcome ~ ., data = dat)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -49.745  -8.623   0.137   8.390  58.605 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      21.0038417  6.7925780   3.092 0.002011 ** 
## TEAM_BATTING_H    0.0489011  0.0036954  13.233  < 2e-16 ***
## TEAM_BATTING_2B  -0.0210986  0.0091822  -2.298 0.021666 *  
## TEAM_BATTING_3B   0.0645246  0.0168064   3.839 0.000127 ***
## TEAM_BATTING_HR   0.0525039  0.0274974   1.909 0.056335 .  
## TEAM_BATTING_BB   0.0104483  0.0058384   1.790 0.073657 .  
## TEAM_BATTING_SO  -0.0084975  0.0025484  -3.334 0.000869 ***
## TEAM_BASERUN_SB   0.0254442  0.0043572   5.840 5.99e-09 ***
## TEAM_BASERUN_CS  -0.0108293  0.0157886  -0.686 0.492852    
## TEAM_BATTING_HBP  0.0466590  0.0730825   0.638 0.523250    
## TEAM_PITCHING_H  -0.0008451  0.0003674  -2.300 0.021540 *  
## TEAM_PITCHING_HR  0.0131780  0.0243950   0.540 0.589116    
## TEAM_PITCHING_BB  0.0007612  0.0041578   0.183 0.854747    
## TEAM_PITCHING_SO  0.0028222  0.0009221   3.061 0.002235 ** 
## TEAM_FIELDING_E  -0.0195730  0.0024620  -7.950 2.92e-15 ***
## TEAM_FIELDING_DP -0.1215789  0.0129476  -9.390  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 13.08 on 2260 degrees of freedom
## Multiple R-squared:  0.3155, Adjusted R-squared:  0.311 
## F-statistic: 69.45 on 15 and 2260 DF,  p-value: < 2.2e-16
lmplot <- lm(TARGET_WINS~ ., data=train_new)
plot(lmplot)

prediction <- predict(lm, test)

#New Variables
train$TEAM_BATTING_WALKS<- train$TEAM_BATTING_BB + train$TEAM_BATTING_HBP
test$TEAM_BATTING_WALKS<- test$TEAM_BATTING_BB + test$TEAM_BATTING_HBP

train$TEAM_BATTING_1B <- (train$TEAM_BATTING_H - train$TEAM_BATTING_2B - train$TEAM_BATTING_3B - train$TEAM_BATTING_HR)
test$TEAM_BATTING_1B <- (test$TEAM_BATTING_H - test$TEAM_BATTING_2B - test$TEAM_BATTING_3B - test$TEAM_BATTING_HR)

hist(train$TEAM_BATTING_WALKS)

hist(train$TEAM_BATTING_1B)

summary(train)
##      INDEX         TARGET_WINS     TEAM_BATTING_H TEAM_BATTING_2B
##  Min.   :   1.0   Min.   :  0.00   Min.   : 891   Min.   : 69.0  
##  1st Qu.: 630.8   1st Qu.: 71.00   1st Qu.:1383   1st Qu.:208.0  
##  Median :1270.5   Median : 82.00   Median :1454   Median :238.0  
##  Mean   :1268.5   Mean   : 80.79   Mean   :1469   Mean   :241.2  
##  3rd Qu.:1915.5   3rd Qu.: 92.00   3rd Qu.:1537   3rd Qu.:273.0  
##  Max.   :2535.0   Max.   :146.00   Max.   :2554   Max.   :458.0  
##  TEAM_BATTING_3B  TEAM_BATTING_HR  TEAM_BATTING_BB TEAM_BATTING_SO 
##  Min.   :  0.00   Min.   :  0.00   Min.   :  0.0   Min.   :   0.0  
##  1st Qu.: 34.00   1st Qu.: 42.00   1st Qu.:451.0   1st Qu.: 556.8  
##  Median : 47.00   Median :102.00   Median :512.0   Median : 750.0  
##  Mean   : 55.25   Mean   : 99.61   Mean   :501.6   Mean   : 736.3  
##  3rd Qu.: 72.00   3rd Qu.:147.00   3rd Qu.:580.0   3rd Qu.: 925.0  
##  Max.   :223.00   Max.   :264.00   Max.   :878.0   Max.   :1399.0  
##  TEAM_BASERUN_SB TEAM_BASERUN_CS  TEAM_BATTING_HBP TEAM_PITCHING_H
##  Min.   :  0.0   Min.   :  0.00   Min.   :29.00    Min.   : 1137  
##  1st Qu.: 67.0   1st Qu.: 44.00   1st Qu.:58.00    1st Qu.: 1419  
##  Median :101.0   Median : 49.00   Median :58.00    Median : 1518  
##  Mean   :123.4   Mean   : 51.51   Mean   :58.11    Mean   : 1779  
##  3rd Qu.:151.0   3rd Qu.: 54.25   3rd Qu.:58.00    3rd Qu.: 1682  
##  Max.   :697.0   Max.   :201.00   Max.   :95.00    Max.   :30132  
##  TEAM_PITCHING_HR TEAM_PITCHING_BB TEAM_PITCHING_SO  TEAM_FIELDING_E 
##  Min.   :  0.0    Min.   :   0.0   Min.   :    0.0   Min.   :  65.0  
##  1st Qu.: 50.0    1st Qu.: 476.0   1st Qu.:  626.0   1st Qu.: 127.0  
##  Median :107.0    Median : 536.5   Median :  813.5   Median : 159.0  
##  Mean   :105.7    Mean   : 553.0   Mean   :  817.5   Mean   : 246.5  
##  3rd Qu.:150.0    3rd Qu.: 611.0   3rd Qu.:  957.0   3rd Qu.: 249.2  
##  Max.   :343.0    Max.   :3645.0   Max.   :19278.0   Max.   :1898.0  
##  TEAM_FIELDING_DP TEAM_BATTING_WALKS TEAM_BATTING_1B 
##  Min.   : 52.0    Min.   : 58.0      Min.   : 709.0  
##  1st Qu.:134.0    1st Qu.:509.0      1st Qu.: 990.8  
##  Median :149.0    Median :571.0      Median :1050.0  
##  Mean   :146.7    Mean   :559.7      Mean   :1073.2  
##  3rd Qu.:161.2    3rd Qu.:638.0      3rd Qu.:1129.0  
##  Max.   :228.0    Max.   :936.0      Max.   :2112.0
## lm2

lm2 <- train(TARGET_WINS~ TEAM_BATTING_H + TEAM_BATTING_2B + TEAM_BATTING_3B + TEAM_BATTING_HR + TEAM_BATTING_WALKS + TEAM_BATTING_SO + TEAM_BASERUN_SB + TEAM_PITCHING_H + TEAM_PITCHING_HR + TEAM_PITCHING_BB + TEAM_PITCHING_SO + TEAM_FIELDING_E + TEAM_FIELDING_DP, data=train, method="lm")
lm2
## Linear Regression 
## 
## 2276 samples
##   13 predictor
## 
## No pre-processing
## Resampling: Bootstrapped (25 reps) 
## Summary of sample sizes: 2276, 2276, 2276, 2276, 2276, 2276, ... 
## Resampling results:
## 
##   RMSE      Rsquared  MAE     
##   13.43261  0.28126   10.29282
## 
## Tuning parameter 'intercept' was held constant at a value of TRUE
summary(lm2)
## 
## Call:
## lm(formula = .outcome ~ ., data = dat)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -49.768  -8.583   0.103   8.418  58.551 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        22.3197243  5.3902520   4.141 3.59e-05 ***
## TEAM_BATTING_H      0.0488792  0.0036939  13.232  < 2e-16 ***
## TEAM_BATTING_2B    -0.0212600  0.0091702  -2.318 0.020518 *  
## TEAM_BATTING_3B     0.0649115  0.0167887   3.866 0.000114 ***
## TEAM_BATTING_HR     0.0540036  0.0273588   1.974 0.048514 *  
## TEAM_BATTING_WALKS  0.0107842  0.0058163   1.854 0.063847 .  
## TEAM_BATTING_SO    -0.0084454  0.0025465  -3.317 0.000926 ***
## TEAM_BASERUN_SB     0.0247746  0.0042567   5.820 6.71e-09 ***
## TEAM_PITCHING_H    -0.0008541  0.0003666  -2.330 0.019910 *  
## TEAM_PITCHING_HR    0.0127981  0.0243677   0.525 0.599489    
## TEAM_PITCHING_BB    0.0007455  0.0041470   0.180 0.857347    
## TEAM_PITCHING_SO    0.0028310  0.0009213   3.073 0.002146 ** 
## TEAM_FIELDING_E    -0.0191418  0.0023992  -7.978 2.33e-15 ***
## TEAM_FIELDING_DP   -0.1219967  0.0129335  -9.433  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 13.07 on 2262 degrees of freedom
## Multiple R-squared:  0.3153, Adjusted R-squared:  0.3114 
## F-statistic: 80.12 on 13 and 2262 DF,  p-value: < 2.2e-16
lmplot2 <- lm(TARGET_WINS~ TEAM_BATTING_H + TEAM_BATTING_2B + TEAM_BATTING_3B + TEAM_BATTING_HR + TEAM_BATTING_WALKS + TEAM_BATTING_SO + TEAM_BASERUN_SB + TEAM_PITCHING_H + TEAM_PITCHING_HR + TEAM_PITCHING_BB + TEAM_PITCHING_SO + TEAM_FIELDING_E + TEAM_FIELDING_DP, data=train)
plot(lmplot2)

prediction2 <- predict(lm2, test)

## lm3 
lm3 <- train(TARGET_WINS~ TEAM_BATTING_1B + TEAM_BATTING_2B + TEAM_BATTING_3B + TEAM_BATTING_HR + TEAM_BATTING_WALKS + TEAM_BATTING_SO + TEAM_BASERUN_SB + TEAM_PITCHING_H + TEAM_PITCHING_HR + TEAM_PITCHING_BB + TEAM_PITCHING_SO + TEAM_FIELDING_E + TEAM_FIELDING_DP, data=train, method="lm")
lm3
## Linear Regression 
## 
## 2276 samples
##   13 predictor
## 
## No pre-processing
## Resampling: Bootstrapped (25 reps) 
## Summary of sample sizes: 2276, 2276, 2276, 2276, 2276, 2276, ... 
## Resampling results:
## 
##   RMSE      Rsquared   MAE     
##   13.63651  0.2656024  10.39976
## 
## Tuning parameter 'intercept' was held constant at a value of TRUE
summary(lm3)
## 
## Call:
## lm(formula = .outcome ~ ., data = dat)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -49.768  -8.583   0.103   8.418  58.551 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        22.3197243  5.3902520   4.141 3.59e-05 ***
## TEAM_BATTING_1B     0.0488792  0.0036939  13.232  < 2e-16 ***
## TEAM_BATTING_2B     0.0276192  0.0073239   3.771 0.000167 ***
## TEAM_BATTING_3B     0.1137906  0.0159198   7.148 1.19e-12 ***
## TEAM_BATTING_HR     0.1028828  0.0274671   3.746 0.000184 ***
## TEAM_BATTING_WALKS  0.0107842  0.0058163   1.854 0.063847 .  
## TEAM_BATTING_SO    -0.0084454  0.0025465  -3.317 0.000926 ***
## TEAM_BASERUN_SB     0.0247746  0.0042567   5.820 6.71e-09 ***
## TEAM_PITCHING_H    -0.0008541  0.0003666  -2.330 0.019910 *  
## TEAM_PITCHING_HR    0.0127981  0.0243677   0.525 0.599489    
## TEAM_PITCHING_BB    0.0007455  0.0041470   0.180 0.857347    
## TEAM_PITCHING_SO    0.0028310  0.0009213   3.073 0.002146 ** 
## TEAM_FIELDING_E    -0.0191418  0.0023992  -7.978 2.33e-15 ***
## TEAM_FIELDING_DP   -0.1219967  0.0129335  -9.433  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 13.07 on 2262 degrees of freedom
## Multiple R-squared:  0.3153, Adjusted R-squared:  0.3114 
## F-statistic: 80.12 on 13 and 2262 DF,  p-value: < 2.2e-16
lmplot3 <- lm(TARGET_WINS~ TEAM_BATTING_1B + TEAM_BATTING_2B + TEAM_BATTING_3B + TEAM_BATTING_HR + TEAM_BATTING_WALKS + TEAM_BATTING_SO + TEAM_BASERUN_SB + TEAM_PITCHING_H + TEAM_PITCHING_HR + TEAM_PITCHING_BB + TEAM_PITCHING_SO + TEAM_FIELDING_E + TEAM_FIELDING_DP, data=train)
plot(lmplot3)

prediction3 <- predict(lm3, test)

## lm4

lm4 <- train(TARGET_WINS~ TEAM_BATTING_1B + TEAM_BATTING_2B + TEAM_BATTING_3B + TEAM_BATTING_HR + TEAM_BATTING_WALKS + TEAM_BATTING_SO + TEAM_BASERUN_SB + TEAM_PITCHING_H + TEAM_PITCHING_SO + TEAM_FIELDING_E + TEAM_FIELDING_DP, data=train, method="lm")
lm4
## Linear Regression 
## 
## 2276 samples
##   11 predictor
## 
## No pre-processing
## Resampling: Bootstrapped (25 reps) 
## Summary of sample sizes: 2276, 2276, 2276, 2276, 2276, 2276, ... 
## Resampling results:
## 
##   RMSE      Rsquared   MAE    
##   13.12713  0.2989129  10.2186
## 
## Tuning parameter 'intercept' was held constant at a value of TRUE
summary(lm4)
## 
## Call:
## lm(formula = .outcome ~ ., data = dat)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -49.597  -8.606   0.091   8.452  58.593 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        21.6480329  5.3017083   4.083 4.60e-05 ***
## TEAM_BATTING_1B     0.0490996  0.0036697  13.380  < 2e-16 ***
## TEAM_BATTING_2B     0.0276725  0.0073202   3.780 0.000161 ***
## TEAM_BATTING_3B     0.1156645  0.0156857   7.374 2.31e-13 ***
## TEAM_BATTING_HR     0.1164379  0.0087994  13.232  < 2e-16 ***
## TEAM_BATTING_WALKS  0.0116157  0.0033693   3.448 0.000576 ***
## TEAM_BATTING_SO    -0.0085273  0.0024520  -3.478 0.000515 ***
## TEAM_BASERUN_SB     0.0248952  0.0042078   5.916 3.79e-09 ***
## TEAM_PITCHING_H    -0.0007768  0.0003209  -2.420 0.015585 *  
## TEAM_PITCHING_SO    0.0029673  0.0006719   4.416 1.05e-05 ***
## TEAM_FIELDING_E    -0.0189976  0.0023899  -7.949 2.93e-15 ***
## TEAM_FIELDING_DP   -0.1217989  0.0129254  -9.423  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 13.07 on 2264 degrees of freedom
## Multiple R-squared:  0.3151, Adjusted R-squared:  0.3118 
## F-statistic:  94.7 on 11 and 2264 DF,  p-value: < 2.2e-16
lmplot4 <- lm(TARGET_WINS~ TEAM_BATTING_1B + TEAM_BATTING_2B + TEAM_BATTING_3B + TEAM_BATTING_HR + TEAM_BATTING_WALKS + TEAM_BATTING_SO + TEAM_BASERUN_SB + TEAM_PITCHING_H + TEAM_PITCHING_HR + TEAM_PITCHING_BB + TEAM_PITCHING_SO + TEAM_FIELDING_E + TEAM_FIELDING_DP, data=train)
plot(lmplot4)

prediction4 <- predict(lm4, test)
s <- data.frame(Id=test$INDEX,TARGET_WINS=prediction4)
write.csv(s,file="Kevin Clifford_Moneyball.csv",row.names=F)


list <- list(lm = lm, lm2 = lm2, lm3 = lm3, lm4 = lm4)
resamps <- resamples(list) 
summary(resamps)
## 
## Call:
## summary.resamples(object = resamps)
## 
## Models: lm, lm2, lm3, lm4 
## Number of resamples: 25 
## 
## MAE 
##         Min.  1st Qu.   Median     Mean  3rd Qu.     Max. NA's
## lm  9.961277 10.16730 10.25491 10.32272 10.49488 10.88444    0
## lm2 9.835041 10.08716 10.31951 10.29282 10.47781 10.99912    0
## lm3 9.952265 10.27558 10.41906 10.39976 10.51371 10.71350    0
## lm4 9.688258 10.05410 10.24484 10.21860 10.34119 10.72937    0
## 
## RMSE 
##         Min.  1st Qu.   Median     Mean  3rd Qu.     Max. NA's
## lm  12.60857 13.01832 13.29922 13.38482 13.48333 15.06184    0
## lm2 12.85879 12.97175 13.38597 13.43261 13.57570 15.79908    0
## lm3 12.83118 13.21200 13.49722 13.63651 13.80887 16.80105    0
## lm4 12.42088 12.81473 13.16864 13.12713 13.30879 13.93305    0
## 
## Rsquared 
##          Min.   1st Qu.    Median      Mean   3rd Qu.      Max. NA's
## lm  0.1637539 0.2720980 0.2982756 0.2828243 0.3065852 0.3525814    0
## lm2 0.1646494 0.2586982 0.2934729 0.2812600 0.3161060 0.3465058    0
## lm3 0.1270404 0.2364335 0.2682614 0.2656024 0.3018795 0.3311813    0
## lm4 0.2294138 0.2667416 0.3044849 0.2989129 0.3202999 0.3567697    0
bwplot(resamps, metric = "RMSE")

## Prediction using Fourth lm4
prediction4
##         1         2         3         4         5         6         7         8 
##  64.00418  65.62409  75.29557  85.65602  66.44192  69.70606  78.38523  77.51147 
##         9        10        11        12        13        14        15        16 
##  71.16935  74.21666  69.88395  82.59223  82.17443  82.18074  84.66775  77.45826 
##        17        18        19        20        21        22        23        24 
##  74.83401  78.50093  73.52244  91.49028  81.59694  83.84399  81.60889  72.50299 
##        25        26        27        28        29        30        31        32 
##  81.66746  86.58812  52.07475  75.70539  84.40925  75.98160  90.88176  85.57752 
##        33        34        35        36        37        38        39        40 
##  82.63780  85.03399  81.02759  87.04153  76.14703  90.74307  85.69786  92.98750 
##        41        42        43        44        45        46        47        48 
##  82.62593  90.54931  29.15563 100.31875  89.57609  92.85505  98.12423  77.66925 
##        49        50        51        52        53        54        55        56 
##  70.49839  79.94731  76.80872  84.82076  78.32531  74.05676  75.97034  78.74607 
##        57        58        59        60        61        62        63        64 
##  92.92158  75.56843  65.28878  80.46285  87.05764  74.48347  87.96369  85.19227 
##        65        66        67        68        69        70        71        72 
##  83.09928  94.76139  78.20840  83.42524  78.38069  89.11923  86.92432  69.56969 
##        73        74        75        76        77        78        79        80 
##  77.32890  89.30181  82.27432  86.40923  81.62466  83.25395  73.59094  77.43153 
##        81        82        83        84        85        86        87        88 
##  84.57377  89.10235  97.79658  75.10009  86.11691  79.73819  82.27755  83.51349 
##        89        90        91        92        93        94        95        96 
##  87.26894  89.58173  78.47304  84.39198  75.74736  86.22996  84.94220  84.72691 
##        97        98        99       100       101       102       103       104 
##  88.30910 104.65980  87.39215  87.05741  80.24811  74.66593  84.21093  84.27829 
##       105       106       107       108       109       110       111       112 
##  80.04728  70.39091  52.96351  77.39627  86.53134  59.56109  83.32387  83.44210 
##       113       114       115       116       117       118       119       120 
##  92.86681  91.02426  80.88640  78.04504  85.44422  80.09144  75.17375  73.79603 
##       121       122       123       124       125       126       127       128 
##  90.48837  70.65497  70.83683  69.08646  69.74598  88.54585  92.73335  77.91948 
##       129       130       131       132       133       134       135       136 
##  93.59647  92.66841  86.84226  78.58701  79.69745  85.92237  86.90648  73.01206 
##       137       138       139       140       141       142       143       144 
##  73.85382  77.47655  84.26845  80.40082  67.96089  74.49130  90.74362  74.65175 
##       145       146       147       148       149       150       151       152 
##  71.83890  72.47550  78.04253  78.67625  78.72615  82.74561  82.23046  80.05632 
##       153       154       155       156       157       158       159       160 
##  37.29047  71.39215  77.18104  70.59751  88.73258  65.04440  96.33973  75.91820 
##       161       162       163       164       165       166       167       168 
## 105.54188 107.24926  94.37640 104.60994  98.61810  89.51059  81.83453  80.44973 
##       169       170       171       172       173       174       175       176 
##  72.80115  80.09204  90.02777  88.62996  81.05232  94.21169  84.31884  73.73865 
##       177       178       179       180       181       182       183       184 
##  77.40234  71.33349  74.78871  79.37394  84.07195  88.47303  84.60065  85.33382 
##       185       186       187       188       189       190       191       192 
##  88.20775  92.27669  86.87870  54.23700  58.84117 112.16294  74.31672  82.82364 
##       193       194       195       196       197       198       199       200 
##  77.33662  77.69147  80.77480  68.79504  79.24417  84.58739  80.10226  85.41686 
##       201       202       203       204       205       206       207       208 
##  77.43499  80.55952  74.74920  87.99361  80.13479  83.45366  78.03317  77.72922 
##       209       210       211       212       213       214       215       216 
##  79.98223  73.16086 104.81745  93.42431  82.85094  65.83109  69.28941  84.50872 
##       217       218       219       220       221       222       223       224 
##  79.89789  91.71781  77.39208  78.59881  78.62456  73.86714  81.57110  74.01862 
##       225       226       227       228       229       230       231       232 
##  86.45604  74.94264  81.65661  79.67162  81.59493  70.80176  80.07643  92.78061 
##       233       234       235       236       237       238       239       240 
##  78.67870  88.92478  80.73262  75.65273  83.45927  77.42813  91.96333  73.15729 
##       241       242       243       244       245       246       247       248 
##  90.01150  85.89853  83.27760  80.76670  61.25411  86.56774  81.03800  85.02030 
##       249       250       251       252       253       254       255       256 
##  72.83782  82.84458  81.33910  62.80318  92.76028  50.22280  69.61516  76.81198 
##       257       258       259 
##  81.51005  81.96020  77.67332