##Libraries
library(caret)
## Warning: package 'caret' was built under R version 4.0.4
## Loading required package: lattice
## Loading required package: ggplot2
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.0.4
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(Metrics)
## Warning: package 'Metrics' was built under R version 4.0.5
##
## Attaching package: 'Metrics'
## The following objects are masked from 'package:caret':
##
## precision, recall
library(plyr)
## ------------------------------------------------------------------------------
## You have loaded plyr after dplyr - this is likely to cause problems.
## If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
## library(plyr); library(dplyr)
## ------------------------------------------------------------------------------
##
## Attaching package: 'plyr'
## The following objects are masked from 'package:dplyr':
##
## arrange, count, desc, failwith, id, mutate, rename, summarise,
## summarize
library(graphics)
library(tinytex)
## Warning: package 'tinytex' was built under R version 4.0.5
##Importing data
train <- read.csv("C:\\Users\\17814\\Downloads\\moneyballtrainingdata.csv", header=TRUE)
test <- read.csv("C:\\Users\\17814\\Downloads\\moneyballevaluationdata.csv", header=TRUE)
summary(train[2:17])
## TARGET_WINS TEAM_BATTING_H TEAM_BATTING_2B TEAM_BATTING_3B
## Min. : 0.00 Min. : 891 Min. : 69.0 Min. : 0.00
## 1st Qu.: 71.00 1st Qu.:1383 1st Qu.:208.0 1st Qu.: 34.00
## Median : 82.00 Median :1454 Median :238.0 Median : 47.00
## Mean : 80.79 Mean :1469 Mean :241.2 Mean : 55.25
## 3rd Qu.: 92.00 3rd Qu.:1537 3rd Qu.:273.0 3rd Qu.: 72.00
## Max. :146.00 Max. :2554 Max. :458.0 Max. :223.00
##
## TEAM_BATTING_HR TEAM_BATTING_BB TEAM_BATTING_SO TEAM_BASERUN_SB
## Min. : 0.00 Min. : 0.0 Min. : 0.0 Min. : 0.0
## 1st Qu.: 42.00 1st Qu.:451.0 1st Qu.: 548.0 1st Qu.: 66.0
## Median :102.00 Median :512.0 Median : 750.0 Median :101.0
## Mean : 99.61 Mean :501.6 Mean : 735.6 Mean :124.8
## 3rd Qu.:147.00 3rd Qu.:580.0 3rd Qu.: 930.0 3rd Qu.:156.0
## Max. :264.00 Max. :878.0 Max. :1399.0 Max. :697.0
## NA's :102 NA's :131
## TEAM_BASERUN_CS TEAM_BATTING_HBP TEAM_PITCHING_H TEAM_PITCHING_HR
## Min. : 0.0 Min. :29.00 Min. : 1137 Min. : 0.0
## 1st Qu.: 38.0 1st Qu.:50.50 1st Qu.: 1419 1st Qu.: 50.0
## Median : 49.0 Median :58.00 Median : 1518 Median :107.0
## Mean : 52.8 Mean :59.36 Mean : 1779 Mean :105.7
## 3rd Qu.: 62.0 3rd Qu.:67.00 3rd Qu.: 1682 3rd Qu.:150.0
## Max. :201.0 Max. :95.00 Max. :30132 Max. :343.0
## NA's :772 NA's :2085
## TEAM_PITCHING_BB TEAM_PITCHING_SO TEAM_FIELDING_E TEAM_FIELDING_DP
## Min. : 0.0 Min. : 0.0 Min. : 65.0 Min. : 52.0
## 1st Qu.: 476.0 1st Qu.: 615.0 1st Qu.: 127.0 1st Qu.:131.0
## Median : 536.5 Median : 813.5 Median : 159.0 Median :149.0
## Mean : 553.0 Mean : 817.7 Mean : 246.5 Mean :146.4
## 3rd Qu.: 611.0 3rd Qu.: 968.0 3rd Qu.: 249.2 3rd Qu.:164.0
## Max. :3645.0 Max. :19278.0 Max. :1898.0 Max. :228.0
## NA's :102 NA's :286
summary(test)
## INDEX TEAM_BATTING_H TEAM_BATTING_2B TEAM_BATTING_3B
## Min. : 9 Min. : 819 Min. : 44.0 Min. : 14.00
## 1st Qu.: 708 1st Qu.:1387 1st Qu.:210.0 1st Qu.: 35.00
## Median :1249 Median :1455 Median :239.0 Median : 52.00
## Mean :1264 Mean :1469 Mean :241.3 Mean : 55.91
## 3rd Qu.:1832 3rd Qu.:1548 3rd Qu.:278.5 3rd Qu.: 72.00
## Max. :2525 Max. :2170 Max. :376.0 Max. :155.00
##
## TEAM_BATTING_HR TEAM_BATTING_BB TEAM_BATTING_SO TEAM_BASERUN_SB
## Min. : 0.00 Min. : 15.0 Min. : 0.0 Min. : 0.0
## 1st Qu.: 44.50 1st Qu.:436.5 1st Qu.: 545.0 1st Qu.: 59.0
## Median :101.00 Median :509.0 Median : 686.0 Median : 92.0
## Mean : 95.63 Mean :499.0 Mean : 709.3 Mean :123.7
## 3rd Qu.:135.50 3rd Qu.:565.5 3rd Qu.: 912.0 3rd Qu.:151.8
## Max. :242.00 Max. :792.0 Max. :1268.0 Max. :580.0
## NA's :18 NA's :13
## TEAM_BASERUN_CS TEAM_BATTING_HBP TEAM_PITCHING_H TEAM_PITCHING_HR
## Min. : 0.00 Min. :42.00 Min. : 1155 Min. : 0.0
## 1st Qu.: 38.00 1st Qu.:53.50 1st Qu.: 1426 1st Qu.: 52.0
## Median : 49.50 Median :62.00 Median : 1515 Median :104.0
## Mean : 52.32 Mean :62.37 Mean : 1813 Mean :102.1
## 3rd Qu.: 63.00 3rd Qu.:67.50 3rd Qu.: 1681 3rd Qu.:142.5
## Max. :154.00 Max. :96.00 Max. :22768 Max. :336.0
## NA's :87 NA's :240
## TEAM_PITCHING_BB TEAM_PITCHING_SO TEAM_FIELDING_E TEAM_FIELDING_DP
## Min. : 136.0 Min. : 0.0 Min. : 73.0 Min. : 69.0
## 1st Qu.: 471.0 1st Qu.: 613.0 1st Qu.: 131.0 1st Qu.:131.0
## Median : 526.0 Median : 745.0 Median : 163.0 Median :148.0
## Mean : 552.4 Mean : 799.7 Mean : 249.7 Mean :146.1
## 3rd Qu.: 606.5 3rd Qu.: 938.0 3rd Qu.: 252.0 3rd Qu.:164.0
## Max. :2008.0 Max. :9963.0 Max. :1568.0 Max. :204.0
## NA's :18 NA's :31
##cleaning up missing values with median
train$TEAM_BATTING_SO[is.na(train$TEAM_BATTING_SO)] <- median(train$TEAM_BATTING_SO, na.rm = TRUE)
train$TEAM_BASERUN_SB[is.na(train$TEAM_BASERUN_SB)] <- median(train$TEAM_BASERUN_SB, na.rm = TRUE)
train$TEAM_BASERUN_CS[is.na(train$TEAM_BASERUN_CS)] <- median(train$TEAM_BASERUN_CS, na.rm = TRUE)
train$TEAM_BATTING_HBP[is.na(train$TEAM_BATTING_HBP)] <- median(train$TEAM_BATTING_HBP, na.rm = TRUE)
train$TEAM_PITCHING_SO[is.na(train$TEAM_PITCHING_SO)] <- median(train$TEAM_PITCHING_SO, na.rm = TRUE)
train$TEAM_FIELDING_DP[is.na(train$TEAM_FIELDING_DP)] <- median(train$TEAM_FIELDING_DP, na.rm = TRUE)
test$TEAM_BATTING_SO[is.na(test$TEAM_BATTING_SO)] <- median(test$TEAM_BATTING_SO, na.rm = TRUE)
test$TEAM_BASERUN_SB[is.na(test$TEAM_BASERUN_SB)] <- median(test$TEAM_BASERUN_SB, na.rm = TRUE)
test$TEAM_BASERUN_CS[is.na(test$TEAM_BASERUN_CS)] <- median(test$TEAM_BASERUN_CS, na.rm = TRUE)
test$TEAM_BATTING_HBP[is.na(test$TEAM_BATTING_HBP)] <- median(test$TEAM_BATTING_HBP, na.rm = TRUE)
test$TEAM_PITCHING_SO[is.na(test$TEAM_PITCHING_SO)] <- median(test$TEAM_PITCHING_SO, na.rm = TRUE)
test$TEAM_FIELDING_DP[is.na(test$TEAM_FIELDING_DP)] <- median(test$TEAM_FIELDING_DP, na.rm = TRUE)
##new summary stats to check NAS
summary(train)
## INDEX TARGET_WINS TEAM_BATTING_H TEAM_BATTING_2B
## Min. : 1.0 Min. : 0.00 Min. : 891 Min. : 69.0
## 1st Qu.: 630.8 1st Qu.: 71.00 1st Qu.:1383 1st Qu.:208.0
## Median :1270.5 Median : 82.00 Median :1454 Median :238.0
## Mean :1268.5 Mean : 80.79 Mean :1469 Mean :241.2
## 3rd Qu.:1915.5 3rd Qu.: 92.00 3rd Qu.:1537 3rd Qu.:273.0
## Max. :2535.0 Max. :146.00 Max. :2554 Max. :458.0
## TEAM_BATTING_3B TEAM_BATTING_HR TEAM_BATTING_BB TEAM_BATTING_SO
## Min. : 0.00 Min. : 0.00 Min. : 0.0 Min. : 0.0
## 1st Qu.: 34.00 1st Qu.: 42.00 1st Qu.:451.0 1st Qu.: 556.8
## Median : 47.00 Median :102.00 Median :512.0 Median : 750.0
## Mean : 55.25 Mean : 99.61 Mean :501.6 Mean : 736.3
## 3rd Qu.: 72.00 3rd Qu.:147.00 3rd Qu.:580.0 3rd Qu.: 925.0
## Max. :223.00 Max. :264.00 Max. :878.0 Max. :1399.0
## TEAM_BASERUN_SB TEAM_BASERUN_CS TEAM_BATTING_HBP TEAM_PITCHING_H
## Min. : 0.0 Min. : 0.00 Min. :29.00 Min. : 1137
## 1st Qu.: 67.0 1st Qu.: 44.00 1st Qu.:58.00 1st Qu.: 1419
## Median :101.0 Median : 49.00 Median :58.00 Median : 1518
## Mean :123.4 Mean : 51.51 Mean :58.11 Mean : 1779
## 3rd Qu.:151.0 3rd Qu.: 54.25 3rd Qu.:58.00 3rd Qu.: 1682
## Max. :697.0 Max. :201.00 Max. :95.00 Max. :30132
## TEAM_PITCHING_HR TEAM_PITCHING_BB TEAM_PITCHING_SO TEAM_FIELDING_E
## Min. : 0.0 Min. : 0.0 Min. : 0.0 Min. : 65.0
## 1st Qu.: 50.0 1st Qu.: 476.0 1st Qu.: 626.0 1st Qu.: 127.0
## Median :107.0 Median : 536.5 Median : 813.5 Median : 159.0
## Mean :105.7 Mean : 553.0 Mean : 817.5 Mean : 246.5
## 3rd Qu.:150.0 3rd Qu.: 611.0 3rd Qu.: 957.0 3rd Qu.: 249.2
## Max. :343.0 Max. :3645.0 Max. :19278.0 Max. :1898.0
## TEAM_FIELDING_DP
## Min. : 52.0
## 1st Qu.:134.0
## Median :149.0
## Mean :146.7
## 3rd Qu.:161.2
## Max. :228.0
summary(test)
## INDEX TEAM_BATTING_H TEAM_BATTING_2B TEAM_BATTING_3B
## Min. : 9 Min. : 819 Min. : 44.0 Min. : 14.00
## 1st Qu.: 708 1st Qu.:1387 1st Qu.:210.0 1st Qu.: 35.00
## Median :1249 Median :1455 Median :239.0 Median : 52.00
## Mean :1264 Mean :1469 Mean :241.3 Mean : 55.91
## 3rd Qu.:1832 3rd Qu.:1548 3rd Qu.:278.5 3rd Qu.: 72.00
## Max. :2525 Max. :2170 Max. :376.0 Max. :155.00
## TEAM_BATTING_HR TEAM_BATTING_BB TEAM_BATTING_SO TEAM_BASERUN_SB
## Min. : 0.00 Min. : 15.0 Min. : 0.0 Min. : 0.0
## 1st Qu.: 44.50 1st Qu.:436.5 1st Qu.: 565.0 1st Qu.: 60.5
## Median :101.00 Median :509.0 Median : 686.0 Median : 92.0
## Mean : 95.63 Mean :499.0 Mean : 707.7 Mean :122.1
## 3rd Qu.:135.50 3rd Qu.:565.5 3rd Qu.: 904.5 3rd Qu.:149.0
## Max. :242.00 Max. :792.0 Max. :1268.0 Max. :580.0
## TEAM_BASERUN_CS TEAM_BATTING_HBP TEAM_PITCHING_H TEAM_PITCHING_HR
## Min. : 0.00 Min. :42.00 Min. : 1155 Min. : 0.0
## 1st Qu.: 44.00 1st Qu.:62.00 1st Qu.: 1426 1st Qu.: 52.0
## Median : 49.50 Median :62.00 Median : 1515 Median :104.0
## Mean : 51.37 Mean :62.03 Mean : 1813 Mean :102.1
## 3rd Qu.: 56.00 3rd Qu.:62.00 3rd Qu.: 1681 3rd Qu.:142.5
## Max. :154.00 Max. :96.00 Max. :22768 Max. :336.0
## TEAM_PITCHING_BB TEAM_PITCHING_SO TEAM_FIELDING_E TEAM_FIELDING_DP
## Min. : 136.0 Min. : 0.0 Min. : 73.0 Min. : 69.0
## 1st Qu.: 471.0 1st Qu.: 622.5 1st Qu.: 131.0 1st Qu.:134.5
## Median : 526.0 Median : 745.0 Median : 163.0 Median :148.0
## Mean : 552.4 Mean : 795.9 Mean : 249.7 Mean :146.3
## 3rd Qu.: 606.5 3rd Qu.: 927.5 3rd Qu.: 252.0 3rd Qu.:160.5
## Max. :2008.0 Max. :9963.0 Max. :1568.0 Max. :204.0
##Final Summary Stats
summary(train)
## INDEX TARGET_WINS TEAM_BATTING_H TEAM_BATTING_2B
## Min. : 1.0 Min. : 0.00 Min. : 891 Min. : 69.0
## 1st Qu.: 630.8 1st Qu.: 71.00 1st Qu.:1383 1st Qu.:208.0
## Median :1270.5 Median : 82.00 Median :1454 Median :238.0
## Mean :1268.5 Mean : 80.79 Mean :1469 Mean :241.2
## 3rd Qu.:1915.5 3rd Qu.: 92.00 3rd Qu.:1537 3rd Qu.:273.0
## Max. :2535.0 Max. :146.00 Max. :2554 Max. :458.0
## TEAM_BATTING_3B TEAM_BATTING_HR TEAM_BATTING_BB TEAM_BATTING_SO
## Min. : 0.00 Min. : 0.00 Min. : 0.0 Min. : 0.0
## 1st Qu.: 34.00 1st Qu.: 42.00 1st Qu.:451.0 1st Qu.: 556.8
## Median : 47.00 Median :102.00 Median :512.0 Median : 750.0
## Mean : 55.25 Mean : 99.61 Mean :501.6 Mean : 736.3
## 3rd Qu.: 72.00 3rd Qu.:147.00 3rd Qu.:580.0 3rd Qu.: 925.0
## Max. :223.00 Max. :264.00 Max. :878.0 Max. :1399.0
## TEAM_BASERUN_SB TEAM_BASERUN_CS TEAM_BATTING_HBP TEAM_PITCHING_H
## Min. : 0.0 Min. : 0.00 Min. :29.00 Min. : 1137
## 1st Qu.: 67.0 1st Qu.: 44.00 1st Qu.:58.00 1st Qu.: 1419
## Median :101.0 Median : 49.00 Median :58.00 Median : 1518
## Mean :123.4 Mean : 51.51 Mean :58.11 Mean : 1779
## 3rd Qu.:151.0 3rd Qu.: 54.25 3rd Qu.:58.00 3rd Qu.: 1682
## Max. :697.0 Max. :201.00 Max. :95.00 Max. :30132
## TEAM_PITCHING_HR TEAM_PITCHING_BB TEAM_PITCHING_SO TEAM_FIELDING_E
## Min. : 0.0 Min. : 0.0 Min. : 0.0 Min. : 65.0
## 1st Qu.: 50.0 1st Qu.: 476.0 1st Qu.: 626.0 1st Qu.: 127.0
## Median :107.0 Median : 536.5 Median : 813.5 Median : 159.0
## Mean :105.7 Mean : 553.0 Mean : 817.5 Mean : 246.5
## 3rd Qu.:150.0 3rd Qu.: 611.0 3rd Qu.: 957.0 3rd Qu.: 249.2
## Max. :343.0 Max. :3645.0 Max. :19278.0 Max. :1898.0
## TEAM_FIELDING_DP
## Min. : 52.0
## 1st Qu.:134.0
## Median :149.0
## Mean :146.7
## 3rd Qu.:161.2
## Max. :228.0
##correlation
cor(train)
## INDEX TARGET_WINS TEAM_BATTING_H TEAM_BATTING_2B
## INDEX 1.000000000 -0.02105643 -0.0179202413 0.01118301
## TARGET_WINS -0.021056435 1.00000000 0.3887675211 0.28910365
## TEAM_BATTING_H -0.017920241 0.38876752 1.0000000000 0.56284968
## TEAM_BATTING_2B 0.011183013 0.28910365 0.5628496778 1.00000000
## TEAM_BATTING_3B -0.005814683 0.14260841 0.4276965751 -0.10730582
## TEAM_BATTING_HR 0.051481047 0.17615320 -0.0065446845 0.43539729
## TEAM_BATTING_BB -0.026567236 0.23255986 -0.0724640128 0.25572610
## TEAM_BATTING_SO 0.079147864 -0.03058135 -0.4526861592 0.15173438
## TEAM_BASERUN_SB 0.039365347 0.12361087 0.1078237673 -0.18340432
## TEAM_BASERUN_CS 0.001338699 0.01595982 0.0008261984 -0.04584955
## TEAM_BATTING_HBP 0.027793759 0.01651641 -0.0024521129 0.04359347
## TEAM_PITCHING_H 0.017103148 -0.10993705 0.3026937094 0.02369219
## TEAM_PITCHING_HR 0.050985897 0.18901373 0.0728531193 0.45455082
## TEAM_PITCHING_BB -0.015287513 0.12417454 0.0941930273 0.17805420
## TEAM_PITCHING_SO 0.054739946 -0.07579967 -0.2451699012 0.06213042
## TEAM_FIELDING_E -0.009233126 -0.17648476 0.2649024778 -0.23515099
## TEAM_FIELDING_DP 0.019279238 -0.03008630 0.1248087998 0.25696798
## TEAM_BATTING_3B TEAM_BATTING_HR TEAM_BATTING_BB
## INDEX -0.005814683 0.051481047 -0.02656724
## TARGET_WINS 0.142608411 0.176153200 0.23255986
## TEAM_BATTING_H 0.427696575 -0.006544685 -0.07246401
## TEAM_BATTING_2B -0.107305824 0.435397293 0.25572610
## TEAM_BATTING_3B 1.000000000 -0.635566946 -0.28723584
## TEAM_BATTING_HR -0.635566946 1.000000000 0.51373481
## TEAM_BATTING_BB -0.287235841 0.513734810 1.00000000
## TEAM_BATTING_SO -0.655709613 0.693007648 0.37148892
## TEAM_BASERUN_SB 0.485740156 -0.406889074 -0.04268402
## TEAM_BASERUN_CS 0.136181182 -0.225458666 -0.04581766
## TEAM_BATTING_HBP -0.042734050 0.055506730 0.01861664
## TEAM_PITCHING_H 0.194879411 -0.250145481 -0.44977762
## TEAM_PITCHING_HR -0.567836679 0.969371396 0.45955207
## TEAM_PITCHING_BB -0.002224148 0.136927564 0.48936126
## TEAM_PITCHING_SO -0.254238104 0.177418187 -0.02017989
## TEAM_FIELDING_E 0.509778447 -0.587339098 -0.65597081
## TEAM_FIELDING_DP -0.227771884 0.391652434 0.32963974
## TEAM_BATTING_SO TEAM_BASERUN_SB TEAM_BASERUN_CS
## INDEX 0.07914786 0.03936535 0.0013386990
## TARGET_WINS -0.03058135 0.12361087 0.0159598172
## TEAM_BATTING_H -0.45268616 0.10782377 0.0008261984
## TEAM_BATTING_2B 0.15173438 -0.18340432 -0.0458495544
## TEAM_BATTING_3B -0.65570961 0.48574016 0.1361811823
## TEAM_BATTING_HR 0.69300765 -0.40688907 -0.2254586663
## TEAM_BATTING_BB 0.37148892 -0.04268402 -0.0458176601
## TEAM_BATTING_SO 1.00000000 -0.21178758 -0.1025019312
## TEAM_BASERUN_SB -0.21178758 1.00000000 0.2332417104
## TEAM_BASERUN_CS -0.10250193 0.23324171 1.0000000000
## TEAM_BATTING_HBP 0.06641291 -0.01794567 -0.0315285080
## TEAM_PITCHING_H -0.37571553 0.03957227 -0.0525918342
## TEAM_PITCHING_HR 0.63286033 -0.38005624 -0.2281852483
## TEAM_PITCHING_BB 0.03498809 0.12928969 -0.0472289272
## TEAM_PITCHING_SO 0.41618159 -0.06424741 -0.0565380017
## TEAM_FIELDING_E -0.58259305 0.32615276 -0.0291782138
## TEAM_FIELDING_DP 0.11089804 -0.27023400 -0.1020021365
## TEAM_BATTING_HBP TEAM_PITCHING_H TEAM_PITCHING_HR
## INDEX 0.027793759 0.017103148 0.05098590
## TARGET_WINS 0.016516411 -0.109937054 0.18901373
## TEAM_BATTING_H -0.002452113 0.302693709 0.07285312
## TEAM_BATTING_2B 0.043593471 0.023692188 0.45455082
## TEAM_BATTING_3B -0.042734050 0.194879411 -0.56783668
## TEAM_BATTING_HR 0.055506730 -0.250145481 0.96937140
## TEAM_BATTING_BB 0.018616645 -0.449777625 0.45955207
## TEAM_BATTING_SO 0.066412909 -0.375715533 0.63286033
## TEAM_BASERUN_SB -0.017945673 0.039572266 -0.38005624
## TEAM_BASERUN_CS -0.031528508 -0.052591834 -0.22818525
## TEAM_BATTING_HBP 1.000000000 -0.006864463 0.05196343
## TEAM_PITCHING_H -0.006864463 1.000000000 -0.14161276
## TEAM_PITCHING_HR 0.051963427 -0.141612759 1.00000000
## TEAM_PITCHING_BB 0.004508255 0.320676162 0.22193750
## TEAM_PITCHING_SO 0.025400354 0.266935871 0.19691491
## TEAM_FIELDING_E -0.017626524 0.667759010 -0.49314447
## TEAM_FIELDING_DP -0.007774257 -0.044647837 0.38959550
## TEAM_PITCHING_BB TEAM_PITCHING_SO TEAM_FIELDING_E
## INDEX -0.015287513 0.054739946 -0.009233126
## TARGET_WINS 0.124174536 -0.075799674 -0.176484759
## TEAM_BATTING_H 0.094193027 -0.245169901 0.264902478
## TEAM_BATTING_2B 0.178054204 0.062130422 -0.235150986
## TEAM_BATTING_3B -0.002224148 -0.254238104 0.509778447
## TEAM_BATTING_HR 0.136927564 0.177418187 -0.587339098
## TEAM_BATTING_BB 0.489361263 -0.020179893 -0.655970815
## TEAM_BATTING_SO 0.034988093 0.416181592 -0.582593046
## TEAM_BASERUN_SB 0.129289686 -0.064247407 0.326152759
## TEAM_BASERUN_CS -0.047228927 -0.056538002 -0.029178214
## TEAM_BATTING_HBP 0.004508255 0.025400354 -0.017626524
## TEAM_PITCHING_H 0.320676162 0.266935871 0.667759010
## TEAM_PITCHING_HR 0.221937505 0.196914911 -0.493144466
## TEAM_PITCHING_BB 1.000000000 0.482172000 -0.022837561
## TEAM_PITCHING_SO 0.482172000 1.000000000 -0.023322782
## TEAM_FIELDING_E -0.022837561 -0.023322782 1.000000000
## TEAM_FIELDING_DP 0.192348657 0.009552324 -0.227394807
## TEAM_FIELDING_DP
## INDEX 0.019279238
## TARGET_WINS -0.030086302
## TEAM_BATTING_H 0.124808800
## TEAM_BATTING_2B 0.256967975
## TEAM_BATTING_3B -0.227771884
## TEAM_BATTING_HR 0.391652434
## TEAM_BATTING_BB 0.329639737
## TEAM_BATTING_SO 0.110898035
## TEAM_BASERUN_SB -0.270234003
## TEAM_BASERUN_CS -0.102002137
## TEAM_BATTING_HBP -0.007774257
## TEAM_PITCHING_H -0.044647837
## TEAM_PITCHING_HR 0.389595503
## TEAM_PITCHING_BB 0.192348657
## TEAM_PITCHING_SO 0.009552324
## TEAM_FIELDING_E -0.227394807
## TEAM_FIELDING_DP 1.000000000
cor(test)
## INDEX TEAM_BATTING_H TEAM_BATTING_2B TEAM_BATTING_3B
## INDEX 1.000000000 -0.003457155 0.032046194 0.025177894
## TEAM_BATTING_H -0.003457155 1.000000000 0.624905809 0.430419749
## TEAM_BATTING_2B 0.032046194 0.624905809 1.000000000 0.003455806
## TEAM_BATTING_3B 0.025177894 0.430419749 0.003455806 1.000000000
## TEAM_BATTING_HR 0.063303334 0.097305913 0.422890506 -0.531913902
## TEAM_BATTING_BB 0.010955620 0.178479481 0.368121773 -0.002859557
## TEAM_BATTING_SO 0.158353667 -0.460559158 0.044597395 -0.595937902
## TEAM_BASERUN_SB 0.021703477 0.153725354 -0.140463351 0.507602437
## TEAM_BASERUN_CS 0.083968204 0.033611495 0.047579599 0.122876495
## TEAM_BATTING_HBP 0.050855426 0.003020941 -0.016604053 -0.015793325
## TEAM_PITCHING_H 0.033420809 0.231400887 -0.185528011 0.150439843
## TEAM_PITCHING_HR 0.073473674 0.167273848 0.412568887 -0.414772447
## TEAM_PITCHING_BB 0.058575739 0.234012526 0.156393712 0.301234789
## TEAM_PITCHING_SO 0.156509867 -0.398007572 -0.204195969 -0.147916706
## TEAM_FIELDING_E 0.005424957 0.114796481 -0.321932501 0.397313906
## TEAM_FIELDING_DP 0.029123782 0.213565179 0.344801350 -0.179655726
## TEAM_BATTING_HR TEAM_BATTING_BB TEAM_BATTING_SO
## INDEX 0.06330333 0.0109556197 0.15835367
## TEAM_BATTING_H 0.09730591 0.1784794814 -0.46055916
## TEAM_BATTING_2B 0.42289051 0.3681217727 0.04459739
## TEAM_BATTING_3B -0.53191390 -0.0028595571 -0.59593790
## TEAM_BATTING_HR 1.00000000 0.4494817976 0.62874436
## TEAM_BATTING_BB 0.44948180 1.0000000000 0.20819320
## TEAM_BATTING_SO 0.62874436 0.2081932003 1.00000000
## TEAM_BASERUN_SB -0.39459124 -0.0003214402 -0.23351680
## TEAM_BASERUN_CS -0.17704634 0.0287176499 -0.05852908
## TEAM_BATTING_HBP 0.03701944 -0.0663518889 0.02206774
## TEAM_PITCHING_H -0.23492162 -0.4357893854 -0.39111288
## TEAM_PITCHING_HR 0.92580412 0.3692659236 0.52347273
## TEAM_PITCHING_BB 0.07063501 0.4237480076 -0.11353606
## TEAM_PITCHING_SO 0.09423620 -0.1396864030 0.41680102
## TEAM_FIELDING_E -0.57823629 -0.6263606352 -0.56795695
## TEAM_FIELDING_DP 0.40627970 0.2317689893 0.08612598
## TEAM_BASERUN_SB TEAM_BASERUN_CS TEAM_BATTING_HBP
## INDEX 0.0217034769 0.083968204 0.050855426
## TEAM_BATTING_H 0.1537253540 0.033611495 0.003020941
## TEAM_BATTING_2B -0.1404633508 0.047579599 -0.016604053
## TEAM_BATTING_3B 0.5076024368 0.122876495 -0.015793325
## TEAM_BATTING_HR -0.3945912364 -0.177046344 0.037019435
## TEAM_BATTING_BB -0.0003214402 0.028717650 -0.066351889
## TEAM_BATTING_SO -0.2335168039 -0.058529078 0.022067737
## TEAM_BASERUN_SB 1.0000000000 0.289450394 -0.021586849
## TEAM_BASERUN_CS 0.2894503944 1.000000000 -0.015157275
## TEAM_BATTING_HBP -0.0215868488 -0.015157275 1.000000000
## TEAM_PITCHING_H 0.0515111613 -0.128806581 -0.001273268
## TEAM_PITCHING_HR -0.3111451694 -0.184877928 0.035657719
## TEAM_PITCHING_BB 0.2165139009 -0.006344267 -0.048408821
## TEAM_PITCHING_SO -0.0831972400 -0.043420787 0.007571710
## TEAM_FIELDING_E 0.3210643032 -0.027603684 -0.013049220
## TEAM_FIELDING_DP -0.3521561769 -0.206196525 0.094152451
## TEAM_PITCHING_H TEAM_PITCHING_HR TEAM_PITCHING_BB
## INDEX 0.033420809 0.07347367 0.058575739
## TEAM_BATTING_H 0.231400887 0.16727385 0.234012526
## TEAM_BATTING_2B -0.185528011 0.41256889 0.156393712
## TEAM_BATTING_3B 0.150439843 -0.41477245 0.301234789
## TEAM_BATTING_HR -0.234921623 0.92580412 0.070635012
## TEAM_BATTING_BB -0.435789385 0.36926592 0.423748008
## TEAM_BATTING_SO -0.391112875 0.52347273 -0.113536059
## TEAM_BASERUN_SB 0.051511161 -0.31114517 0.216513901
## TEAM_BASERUN_CS -0.128806581 -0.18487793 -0.006344267
## TEAM_BATTING_HBP -0.001273268 0.03565772 -0.048408821
## TEAM_PITCHING_H 1.000000000 -0.03223577 0.259412178
## TEAM_PITCHING_HR -0.032235770 1.00000000 0.362068264
## TEAM_PITCHING_BB 0.259412178 0.36206826 1.000000000
## TEAM_PITCHING_SO 0.133016040 0.22189911 0.379054554
## TEAM_FIELDING_E 0.684568722 -0.38155409 0.126263657
## TEAM_FIELDING_DP -0.002364655 0.40556060 0.130657231
## TEAM_PITCHING_SO TEAM_FIELDING_E TEAM_FIELDING_DP
## INDEX 0.156509867 0.005424957 0.029123782
## TEAM_BATTING_H -0.398007572 0.114796481 0.213565179
## TEAM_BATTING_2B -0.204195969 -0.321932501 0.344801350
## TEAM_BATTING_3B -0.147916706 0.397313906 -0.179655726
## TEAM_BATTING_HR 0.094236196 -0.578236294 0.406279696
## TEAM_BATTING_BB -0.139686403 -0.626360635 0.231768989
## TEAM_BATTING_SO 0.416801016 -0.567956953 0.086125982
## TEAM_BASERUN_SB -0.083197240 0.321064303 -0.352156177
## TEAM_BASERUN_CS -0.043420787 -0.027603684 -0.206196525
## TEAM_BATTING_HBP 0.007571710 -0.013049220 0.094152451
## TEAM_PITCHING_H 0.133016040 0.684568722 -0.002364655
## TEAM_PITCHING_HR 0.221899111 -0.381554088 0.405560595
## TEAM_PITCHING_BB 0.379054554 0.126263657 0.130657231
## TEAM_PITCHING_SO 1.000000000 0.063705823 0.008982151
## TEAM_FIELDING_E 0.063705823 1.000000000 -0.207059598
## TEAM_FIELDING_DP 0.008982151 -0.207059598 1.000000000
##Histograms
hist(train$TARGET_WINS)

hist(train$TEAM_BATTING_H)

hist(train$TEAM_BATTING_2B)

hist(train$TEAM_BATTING_3B)

hist(train$TEAM_BATTING_HR)

hist(train$TEAM_BATTING_BB)

hist(train$TEAM_BATTING_SO)

hist(train$TEAM_BASERUN_SB)

hist(train$TEAM_BASERUN_CS)

hist(train$TEAM_BATTING_HBP)

hist(train$TEAM_PITCHING_H)

hist(train$TEAM_PITCHING_HR)

hist(train$TEAM_PITCHING_BB)

hist(train$TEAM_PITCHING_SO)

hist(train$TEAM_FIELDING_E)

hist(train$TEAM_FIELDING_DP)

## lm
set.seed(105)
train_new <- train %>% select(-INDEX)
lm <- train(TARGET_WINS~ ., data=train_new, method="lm")
lm
## Linear Regression
##
## 2276 samples
## 15 predictor
##
## No pre-processing
## Resampling: Bootstrapped (25 reps)
## Summary of sample sizes: 2276, 2276, 2276, 2276, 2276, 2276, ...
## Resampling results:
##
## RMSE Rsquared MAE
## 13.38482 0.2828243 10.32272
##
## Tuning parameter 'intercept' was held constant at a value of TRUE
summary(lm)
##
## Call:
## lm(formula = .outcome ~ ., data = dat)
##
## Residuals:
## Min 1Q Median 3Q Max
## -49.745 -8.623 0.137 8.390 58.605
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 21.0038417 6.7925780 3.092 0.002011 **
## TEAM_BATTING_H 0.0489011 0.0036954 13.233 < 2e-16 ***
## TEAM_BATTING_2B -0.0210986 0.0091822 -2.298 0.021666 *
## TEAM_BATTING_3B 0.0645246 0.0168064 3.839 0.000127 ***
## TEAM_BATTING_HR 0.0525039 0.0274974 1.909 0.056335 .
## TEAM_BATTING_BB 0.0104483 0.0058384 1.790 0.073657 .
## TEAM_BATTING_SO -0.0084975 0.0025484 -3.334 0.000869 ***
## TEAM_BASERUN_SB 0.0254442 0.0043572 5.840 5.99e-09 ***
## TEAM_BASERUN_CS -0.0108293 0.0157886 -0.686 0.492852
## TEAM_BATTING_HBP 0.0466590 0.0730825 0.638 0.523250
## TEAM_PITCHING_H -0.0008451 0.0003674 -2.300 0.021540 *
## TEAM_PITCHING_HR 0.0131780 0.0243950 0.540 0.589116
## TEAM_PITCHING_BB 0.0007612 0.0041578 0.183 0.854747
## TEAM_PITCHING_SO 0.0028222 0.0009221 3.061 0.002235 **
## TEAM_FIELDING_E -0.0195730 0.0024620 -7.950 2.92e-15 ***
## TEAM_FIELDING_DP -0.1215789 0.0129476 -9.390 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 13.08 on 2260 degrees of freedom
## Multiple R-squared: 0.3155, Adjusted R-squared: 0.311
## F-statistic: 69.45 on 15 and 2260 DF, p-value: < 2.2e-16
lmplot <- lm(TARGET_WINS~ ., data=train_new)
plot(lmplot)




prediction <- predict(lm, test)
#New Variables
train$TEAM_BATTING_WALKS<- train$TEAM_BATTING_BB + train$TEAM_BATTING_HBP
test$TEAM_BATTING_WALKS<- test$TEAM_BATTING_BB + test$TEAM_BATTING_HBP
train$TEAM_BATTING_1B <- (train$TEAM_BATTING_H - train$TEAM_BATTING_2B - train$TEAM_BATTING_3B - train$TEAM_BATTING_HR)
test$TEAM_BATTING_1B <- (test$TEAM_BATTING_H - test$TEAM_BATTING_2B - test$TEAM_BATTING_3B - test$TEAM_BATTING_HR)
hist(train$TEAM_BATTING_WALKS)

hist(train$TEAM_BATTING_1B)

summary(train)
## INDEX TARGET_WINS TEAM_BATTING_H TEAM_BATTING_2B
## Min. : 1.0 Min. : 0.00 Min. : 891 Min. : 69.0
## 1st Qu.: 630.8 1st Qu.: 71.00 1st Qu.:1383 1st Qu.:208.0
## Median :1270.5 Median : 82.00 Median :1454 Median :238.0
## Mean :1268.5 Mean : 80.79 Mean :1469 Mean :241.2
## 3rd Qu.:1915.5 3rd Qu.: 92.00 3rd Qu.:1537 3rd Qu.:273.0
## Max. :2535.0 Max. :146.00 Max. :2554 Max. :458.0
## TEAM_BATTING_3B TEAM_BATTING_HR TEAM_BATTING_BB TEAM_BATTING_SO
## Min. : 0.00 Min. : 0.00 Min. : 0.0 Min. : 0.0
## 1st Qu.: 34.00 1st Qu.: 42.00 1st Qu.:451.0 1st Qu.: 556.8
## Median : 47.00 Median :102.00 Median :512.0 Median : 750.0
## Mean : 55.25 Mean : 99.61 Mean :501.6 Mean : 736.3
## 3rd Qu.: 72.00 3rd Qu.:147.00 3rd Qu.:580.0 3rd Qu.: 925.0
## Max. :223.00 Max. :264.00 Max. :878.0 Max. :1399.0
## TEAM_BASERUN_SB TEAM_BASERUN_CS TEAM_BATTING_HBP TEAM_PITCHING_H
## Min. : 0.0 Min. : 0.00 Min. :29.00 Min. : 1137
## 1st Qu.: 67.0 1st Qu.: 44.00 1st Qu.:58.00 1st Qu.: 1419
## Median :101.0 Median : 49.00 Median :58.00 Median : 1518
## Mean :123.4 Mean : 51.51 Mean :58.11 Mean : 1779
## 3rd Qu.:151.0 3rd Qu.: 54.25 3rd Qu.:58.00 3rd Qu.: 1682
## Max. :697.0 Max. :201.00 Max. :95.00 Max. :30132
## TEAM_PITCHING_HR TEAM_PITCHING_BB TEAM_PITCHING_SO TEAM_FIELDING_E
## Min. : 0.0 Min. : 0.0 Min. : 0.0 Min. : 65.0
## 1st Qu.: 50.0 1st Qu.: 476.0 1st Qu.: 626.0 1st Qu.: 127.0
## Median :107.0 Median : 536.5 Median : 813.5 Median : 159.0
## Mean :105.7 Mean : 553.0 Mean : 817.5 Mean : 246.5
## 3rd Qu.:150.0 3rd Qu.: 611.0 3rd Qu.: 957.0 3rd Qu.: 249.2
## Max. :343.0 Max. :3645.0 Max. :19278.0 Max. :1898.0
## TEAM_FIELDING_DP TEAM_BATTING_WALKS TEAM_BATTING_1B
## Min. : 52.0 Min. : 58.0 Min. : 709.0
## 1st Qu.:134.0 1st Qu.:509.0 1st Qu.: 990.8
## Median :149.0 Median :571.0 Median :1050.0
## Mean :146.7 Mean :559.7 Mean :1073.2
## 3rd Qu.:161.2 3rd Qu.:638.0 3rd Qu.:1129.0
## Max. :228.0 Max. :936.0 Max. :2112.0
## lm2
lm2 <- train(TARGET_WINS~ TEAM_BATTING_H + TEAM_BATTING_2B + TEAM_BATTING_3B + TEAM_BATTING_HR + TEAM_BATTING_WALKS + TEAM_BATTING_SO + TEAM_BASERUN_SB + TEAM_PITCHING_H + TEAM_PITCHING_HR + TEAM_PITCHING_BB + TEAM_PITCHING_SO + TEAM_FIELDING_E + TEAM_FIELDING_DP, data=train, method="lm")
lm2
## Linear Regression
##
## 2276 samples
## 13 predictor
##
## No pre-processing
## Resampling: Bootstrapped (25 reps)
## Summary of sample sizes: 2276, 2276, 2276, 2276, 2276, 2276, ...
## Resampling results:
##
## RMSE Rsquared MAE
## 13.43261 0.28126 10.29282
##
## Tuning parameter 'intercept' was held constant at a value of TRUE
summary(lm2)
##
## Call:
## lm(formula = .outcome ~ ., data = dat)
##
## Residuals:
## Min 1Q Median 3Q Max
## -49.768 -8.583 0.103 8.418 58.551
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 22.3197243 5.3902520 4.141 3.59e-05 ***
## TEAM_BATTING_H 0.0488792 0.0036939 13.232 < 2e-16 ***
## TEAM_BATTING_2B -0.0212600 0.0091702 -2.318 0.020518 *
## TEAM_BATTING_3B 0.0649115 0.0167887 3.866 0.000114 ***
## TEAM_BATTING_HR 0.0540036 0.0273588 1.974 0.048514 *
## TEAM_BATTING_WALKS 0.0107842 0.0058163 1.854 0.063847 .
## TEAM_BATTING_SO -0.0084454 0.0025465 -3.317 0.000926 ***
## TEAM_BASERUN_SB 0.0247746 0.0042567 5.820 6.71e-09 ***
## TEAM_PITCHING_H -0.0008541 0.0003666 -2.330 0.019910 *
## TEAM_PITCHING_HR 0.0127981 0.0243677 0.525 0.599489
## TEAM_PITCHING_BB 0.0007455 0.0041470 0.180 0.857347
## TEAM_PITCHING_SO 0.0028310 0.0009213 3.073 0.002146 **
## TEAM_FIELDING_E -0.0191418 0.0023992 -7.978 2.33e-15 ***
## TEAM_FIELDING_DP -0.1219967 0.0129335 -9.433 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 13.07 on 2262 degrees of freedom
## Multiple R-squared: 0.3153, Adjusted R-squared: 0.3114
## F-statistic: 80.12 on 13 and 2262 DF, p-value: < 2.2e-16
lmplot2 <- lm(TARGET_WINS~ TEAM_BATTING_H + TEAM_BATTING_2B + TEAM_BATTING_3B + TEAM_BATTING_HR + TEAM_BATTING_WALKS + TEAM_BATTING_SO + TEAM_BASERUN_SB + TEAM_PITCHING_H + TEAM_PITCHING_HR + TEAM_PITCHING_BB + TEAM_PITCHING_SO + TEAM_FIELDING_E + TEAM_FIELDING_DP, data=train)
plot(lmplot2)




prediction2 <- predict(lm2, test)
## lm3
lm3 <- train(TARGET_WINS~ TEAM_BATTING_1B + TEAM_BATTING_2B + TEAM_BATTING_3B + TEAM_BATTING_HR + TEAM_BATTING_WALKS + TEAM_BATTING_SO + TEAM_BASERUN_SB + TEAM_PITCHING_H + TEAM_PITCHING_HR + TEAM_PITCHING_BB + TEAM_PITCHING_SO + TEAM_FIELDING_E + TEAM_FIELDING_DP, data=train, method="lm")
lm3
## Linear Regression
##
## 2276 samples
## 13 predictor
##
## No pre-processing
## Resampling: Bootstrapped (25 reps)
## Summary of sample sizes: 2276, 2276, 2276, 2276, 2276, 2276, ...
## Resampling results:
##
## RMSE Rsquared MAE
## 13.63651 0.2656024 10.39976
##
## Tuning parameter 'intercept' was held constant at a value of TRUE
summary(lm3)
##
## Call:
## lm(formula = .outcome ~ ., data = dat)
##
## Residuals:
## Min 1Q Median 3Q Max
## -49.768 -8.583 0.103 8.418 58.551
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 22.3197243 5.3902520 4.141 3.59e-05 ***
## TEAM_BATTING_1B 0.0488792 0.0036939 13.232 < 2e-16 ***
## TEAM_BATTING_2B 0.0276192 0.0073239 3.771 0.000167 ***
## TEAM_BATTING_3B 0.1137906 0.0159198 7.148 1.19e-12 ***
## TEAM_BATTING_HR 0.1028828 0.0274671 3.746 0.000184 ***
## TEAM_BATTING_WALKS 0.0107842 0.0058163 1.854 0.063847 .
## TEAM_BATTING_SO -0.0084454 0.0025465 -3.317 0.000926 ***
## TEAM_BASERUN_SB 0.0247746 0.0042567 5.820 6.71e-09 ***
## TEAM_PITCHING_H -0.0008541 0.0003666 -2.330 0.019910 *
## TEAM_PITCHING_HR 0.0127981 0.0243677 0.525 0.599489
## TEAM_PITCHING_BB 0.0007455 0.0041470 0.180 0.857347
## TEAM_PITCHING_SO 0.0028310 0.0009213 3.073 0.002146 **
## TEAM_FIELDING_E -0.0191418 0.0023992 -7.978 2.33e-15 ***
## TEAM_FIELDING_DP -0.1219967 0.0129335 -9.433 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 13.07 on 2262 degrees of freedom
## Multiple R-squared: 0.3153, Adjusted R-squared: 0.3114
## F-statistic: 80.12 on 13 and 2262 DF, p-value: < 2.2e-16
lmplot3 <- lm(TARGET_WINS~ TEAM_BATTING_1B + TEAM_BATTING_2B + TEAM_BATTING_3B + TEAM_BATTING_HR + TEAM_BATTING_WALKS + TEAM_BATTING_SO + TEAM_BASERUN_SB + TEAM_PITCHING_H + TEAM_PITCHING_HR + TEAM_PITCHING_BB + TEAM_PITCHING_SO + TEAM_FIELDING_E + TEAM_FIELDING_DP, data=train)
plot(lmplot3)




prediction3 <- predict(lm3, test)
## lm4
lm4 <- train(TARGET_WINS~ TEAM_BATTING_1B + TEAM_BATTING_2B + TEAM_BATTING_3B + TEAM_BATTING_HR + TEAM_BATTING_WALKS + TEAM_BATTING_SO + TEAM_BASERUN_SB + TEAM_PITCHING_H + TEAM_PITCHING_SO + TEAM_FIELDING_E + TEAM_FIELDING_DP, data=train, method="lm")
lm4
## Linear Regression
##
## 2276 samples
## 11 predictor
##
## No pre-processing
## Resampling: Bootstrapped (25 reps)
## Summary of sample sizes: 2276, 2276, 2276, 2276, 2276, 2276, ...
## Resampling results:
##
## RMSE Rsquared MAE
## 13.12713 0.2989129 10.2186
##
## Tuning parameter 'intercept' was held constant at a value of TRUE
summary(lm4)
##
## Call:
## lm(formula = .outcome ~ ., data = dat)
##
## Residuals:
## Min 1Q Median 3Q Max
## -49.597 -8.606 0.091 8.452 58.593
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 21.6480329 5.3017083 4.083 4.60e-05 ***
## TEAM_BATTING_1B 0.0490996 0.0036697 13.380 < 2e-16 ***
## TEAM_BATTING_2B 0.0276725 0.0073202 3.780 0.000161 ***
## TEAM_BATTING_3B 0.1156645 0.0156857 7.374 2.31e-13 ***
## TEAM_BATTING_HR 0.1164379 0.0087994 13.232 < 2e-16 ***
## TEAM_BATTING_WALKS 0.0116157 0.0033693 3.448 0.000576 ***
## TEAM_BATTING_SO -0.0085273 0.0024520 -3.478 0.000515 ***
## TEAM_BASERUN_SB 0.0248952 0.0042078 5.916 3.79e-09 ***
## TEAM_PITCHING_H -0.0007768 0.0003209 -2.420 0.015585 *
## TEAM_PITCHING_SO 0.0029673 0.0006719 4.416 1.05e-05 ***
## TEAM_FIELDING_E -0.0189976 0.0023899 -7.949 2.93e-15 ***
## TEAM_FIELDING_DP -0.1217989 0.0129254 -9.423 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 13.07 on 2264 degrees of freedom
## Multiple R-squared: 0.3151, Adjusted R-squared: 0.3118
## F-statistic: 94.7 on 11 and 2264 DF, p-value: < 2.2e-16
lmplot4 <- lm(TARGET_WINS~ TEAM_BATTING_1B + TEAM_BATTING_2B + TEAM_BATTING_3B + TEAM_BATTING_HR + TEAM_BATTING_WALKS + TEAM_BATTING_SO + TEAM_BASERUN_SB + TEAM_PITCHING_H + TEAM_PITCHING_HR + TEAM_PITCHING_BB + TEAM_PITCHING_SO + TEAM_FIELDING_E + TEAM_FIELDING_DP, data=train)
plot(lmplot4)




prediction4 <- predict(lm4, test)
s <- data.frame(Id=test$INDEX,TARGET_WINS=prediction4)
write.csv(s,file="Kevin Clifford_Moneyball.csv",row.names=F)
list <- list(lm = lm, lm2 = lm2, lm3 = lm3, lm4 = lm4)
resamps <- resamples(list)
summary(resamps)
##
## Call:
## summary.resamples(object = resamps)
##
## Models: lm, lm2, lm3, lm4
## Number of resamples: 25
##
## MAE
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## lm 9.961277 10.16730 10.25491 10.32272 10.49488 10.88444 0
## lm2 9.835041 10.08716 10.31951 10.29282 10.47781 10.99912 0
## lm3 9.952265 10.27558 10.41906 10.39976 10.51371 10.71350 0
## lm4 9.688258 10.05410 10.24484 10.21860 10.34119 10.72937 0
##
## RMSE
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## lm 12.60857 13.01832 13.29922 13.38482 13.48333 15.06184 0
## lm2 12.85879 12.97175 13.38597 13.43261 13.57570 15.79908 0
## lm3 12.83118 13.21200 13.49722 13.63651 13.80887 16.80105 0
## lm4 12.42088 12.81473 13.16864 13.12713 13.30879 13.93305 0
##
## Rsquared
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## lm 0.1637539 0.2720980 0.2982756 0.2828243 0.3065852 0.3525814 0
## lm2 0.1646494 0.2586982 0.2934729 0.2812600 0.3161060 0.3465058 0
## lm3 0.1270404 0.2364335 0.2682614 0.2656024 0.3018795 0.3311813 0
## lm4 0.2294138 0.2667416 0.3044849 0.2989129 0.3202999 0.3567697 0
bwplot(resamps, metric = "RMSE")

## Prediction using Fourth lm4
prediction4
## 1 2 3 4 5 6 7 8
## 64.00418 65.62409 75.29557 85.65602 66.44192 69.70606 78.38523 77.51147
## 9 10 11 12 13 14 15 16
## 71.16935 74.21666 69.88395 82.59223 82.17443 82.18074 84.66775 77.45826
## 17 18 19 20 21 22 23 24
## 74.83401 78.50093 73.52244 91.49028 81.59694 83.84399 81.60889 72.50299
## 25 26 27 28 29 30 31 32
## 81.66746 86.58812 52.07475 75.70539 84.40925 75.98160 90.88176 85.57752
## 33 34 35 36 37 38 39 40
## 82.63780 85.03399 81.02759 87.04153 76.14703 90.74307 85.69786 92.98750
## 41 42 43 44 45 46 47 48
## 82.62593 90.54931 29.15563 100.31875 89.57609 92.85505 98.12423 77.66925
## 49 50 51 52 53 54 55 56
## 70.49839 79.94731 76.80872 84.82076 78.32531 74.05676 75.97034 78.74607
## 57 58 59 60 61 62 63 64
## 92.92158 75.56843 65.28878 80.46285 87.05764 74.48347 87.96369 85.19227
## 65 66 67 68 69 70 71 72
## 83.09928 94.76139 78.20840 83.42524 78.38069 89.11923 86.92432 69.56969
## 73 74 75 76 77 78 79 80
## 77.32890 89.30181 82.27432 86.40923 81.62466 83.25395 73.59094 77.43153
## 81 82 83 84 85 86 87 88
## 84.57377 89.10235 97.79658 75.10009 86.11691 79.73819 82.27755 83.51349
## 89 90 91 92 93 94 95 96
## 87.26894 89.58173 78.47304 84.39198 75.74736 86.22996 84.94220 84.72691
## 97 98 99 100 101 102 103 104
## 88.30910 104.65980 87.39215 87.05741 80.24811 74.66593 84.21093 84.27829
## 105 106 107 108 109 110 111 112
## 80.04728 70.39091 52.96351 77.39627 86.53134 59.56109 83.32387 83.44210
## 113 114 115 116 117 118 119 120
## 92.86681 91.02426 80.88640 78.04504 85.44422 80.09144 75.17375 73.79603
## 121 122 123 124 125 126 127 128
## 90.48837 70.65497 70.83683 69.08646 69.74598 88.54585 92.73335 77.91948
## 129 130 131 132 133 134 135 136
## 93.59647 92.66841 86.84226 78.58701 79.69745 85.92237 86.90648 73.01206
## 137 138 139 140 141 142 143 144
## 73.85382 77.47655 84.26845 80.40082 67.96089 74.49130 90.74362 74.65175
## 145 146 147 148 149 150 151 152
## 71.83890 72.47550 78.04253 78.67625 78.72615 82.74561 82.23046 80.05632
## 153 154 155 156 157 158 159 160
## 37.29047 71.39215 77.18104 70.59751 88.73258 65.04440 96.33973 75.91820
## 161 162 163 164 165 166 167 168
## 105.54188 107.24926 94.37640 104.60994 98.61810 89.51059 81.83453 80.44973
## 169 170 171 172 173 174 175 176
## 72.80115 80.09204 90.02777 88.62996 81.05232 94.21169 84.31884 73.73865
## 177 178 179 180 181 182 183 184
## 77.40234 71.33349 74.78871 79.37394 84.07195 88.47303 84.60065 85.33382
## 185 186 187 188 189 190 191 192
## 88.20775 92.27669 86.87870 54.23700 58.84117 112.16294 74.31672 82.82364
## 193 194 195 196 197 198 199 200
## 77.33662 77.69147 80.77480 68.79504 79.24417 84.58739 80.10226 85.41686
## 201 202 203 204 205 206 207 208
## 77.43499 80.55952 74.74920 87.99361 80.13479 83.45366 78.03317 77.72922
## 209 210 211 212 213 214 215 216
## 79.98223 73.16086 104.81745 93.42431 82.85094 65.83109 69.28941 84.50872
## 217 218 219 220 221 222 223 224
## 79.89789 91.71781 77.39208 78.59881 78.62456 73.86714 81.57110 74.01862
## 225 226 227 228 229 230 231 232
## 86.45604 74.94264 81.65661 79.67162 81.59493 70.80176 80.07643 92.78061
## 233 234 235 236 237 238 239 240
## 78.67870 88.92478 80.73262 75.65273 83.45927 77.42813 91.96333 73.15729
## 241 242 243 244 245 246 247 248
## 90.01150 85.89853 83.27760 80.76670 61.25411 86.56774 81.03800 85.02030
## 249 250 251 252 253 254 255 256
## 72.83782 82.84458 81.33910 62.80318 92.76028 50.22280 69.61516 76.81198
## 257 258 259
## 81.51005 81.96020 77.67332