0 which are impossible## Warning: package 'stringr' was built under R version 3.4.4
## Warning: package 'corrplot' was built under R version 3.4.4
| vars | n | mean | sd | median | trimmed | mad | min | max | range | skew | kurtosis | se | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| INDEX | 1 | 2276 | 1268.46353 | 736.34904 | 1270.5 | 1268.56970 | 952.5705 | 1 | 2535 | 2534 | 0.0042149 | -1.2167564 | 15.4346788 |
| TARGET_WINS | 2 | 2276 | 80.79086 | 15.75215 | 82.0 | 81.31229 | 14.8260 | 0 | 146 | 146 | -0.3987232 | 1.0274757 | 0.3301823 |
| TEAM_BATTING_H | 3 | 2276 | 1469.26977 | 144.59120 | 1454.0 | 1459.04116 | 114.1602 | 891 | 2554 | 1663 | 1.5713335 | 7.2785261 | 3.0307891 |
| TEAM_BATTING_2B | 4 | 2276 | 241.24692 | 46.80141 | 238.0 | 240.39627 | 47.4432 | 69 | 458 | 389 | 0.2151018 | 0.0061609 | 0.9810087 |
| TEAM_BATTING_3B | 5 | 2276 | 55.25000 | 27.93856 | 47.0 | 52.17563 | 23.7216 | 0 | 223 | 223 | 1.1094652 | 1.5032418 | 0.5856226 |
| TEAM_BATTING_HR | 6 | 2276 | 99.61204 | 60.54687 | 102.0 | 97.38529 | 78.5778 | 0 | 264 | 264 | 0.1860421 | -0.9631189 | 1.2691285 |
| TEAM_BATTING_BB | 7 | 2276 | 501.55888 | 122.67086 | 512.0 | 512.18331 | 94.8864 | 0 | 878 | 878 | -1.0257599 | 2.1828544 | 2.5713150 |
| TEAM_BATTING_SO | 8 | 2174 | 735.60534 | 248.52642 | 750.0 | 742.31322 | 284.6592 | 0 | 1399 | 1399 | -0.2978001 | -0.3207992 | 5.3301912 |
| TEAM_BASERUN_SB | 9 | 2145 | 124.76177 | 87.79117 | 101.0 | 110.81188 | 60.7866 | 0 | 697 | 697 | 1.9724140 | 5.4896754 | 1.8955584 |
| TEAM_BASERUN_CS | 10 | 1504 | 52.80386 | 22.95634 | 49.0 | 50.35963 | 17.7912 | 0 | 201 | 201 | 1.9762180 | 7.6203818 | 0.5919414 |
| TEAM_BATTING_HBP | 11 | 191 | 59.35602 | 12.96712 | 58.0 | 58.86275 | 11.8608 | 29 | 95 | 66 | 0.3185754 | -0.1119828 | 0.9382681 |
| TEAM_PITCHING_H | 12 | 2276 | 1779.21046 | 1406.84293 | 1518.0 | 1555.89517 | 174.9468 | 1137 | 30132 | 28995 | 10.3295111 | 141.8396985 | 29.4889618 |
| TEAM_PITCHING_HR | 13 | 2276 | 105.69859 | 61.29875 | 107.0 | 103.15697 | 74.1300 | 0 | 343 | 343 | 0.2877877 | -0.6046311 | 1.2848886 |
| TEAM_PITCHING_BB | 14 | 2276 | 553.00791 | 166.35736 | 536.5 | 542.62459 | 98.5929 | 0 | 3645 | 3645 | 6.7438995 | 96.9676398 | 3.4870317 |
| TEAM_PITCHING_SO | 15 | 2174 | 817.73045 | 553.08503 | 813.5 | 796.93391 | 257.2311 | 0 | 19278 | 19278 | 22.1745535 | 671.1891292 | 11.8621151 |
| TEAM_FIELDING_E | 16 | 2276 | 246.48067 | 227.77097 | 159.0 | 193.43798 | 62.2692 | 65 | 1898 | 1833 | 2.9904656 | 10.9702717 | 4.7743279 |
| TEAM_FIELDING_DP | 17 | 1990 | 146.38794 | 26.22639 | 149.0 | 147.57789 | 23.7216 | 52 | 228 | 176 | -0.3889390 | 0.1817397 | 0.5879114 |
| vars | n | mean | sd | median | trimmed | mad | min | max | range | skew | kurtosis | se | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| X1 | 1 | 2276 | 80.79086 | 15.75215 | 82 | 81.31229 | 14.826 | 0 | 146 | 146 | -0.3987232 | 1.027476 | 0.3301823 |
Although not exact as seasons since 1800’s havent all been 162 games, but my interval test shows a left tail interval of 80.35-81 which our 80.79 mean falls within confidently
| vars | n | mean | sd | median | trimmed | mad | min | max | range | skew | kurtosis | se | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| X1 | 1 | 2255 | 80.58537 | 15.05826 | 82 | 81.18726 | 14.826 | 21 | 116 | 95 | -0.4395385 | 0.3152447 | 0.3171039 |
| vars | n | mean | sd | median | trimmed | mad | min | max | range | skew | kurtosis | se | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| TEAM_BATTING_H | 1 | 2255 | 1466.81685 | 135.92839 | 1454 | 1458.24377 | 114.1602 | 992 | 2496 | 1504 | 1.2098420 | 4.8627441 | 2.8624434 |
| TEAM_BATTING_2B | 2 | 2255 | 241.09490 | 46.28571 | 238 | 240.29695 | 47.4432 | 69 | 458 | 389 | 0.2012387 | -0.0427041 | 0.9747061 |
| TEAM_BATTING_3B | 3 | 2255 | 54.93348 | 27.63678 | 47 | 51.89695 | 23.7216 | 0 | 223 | 223 | 1.1205339 | 1.5849716 | 0.5819881 |
| TEAM_BATTING_HR | 4 | 2255 | 100.22395 | 60.39496 | 103 | 98.07867 | 77.0952 | 0 | 264 | 264 | 0.1771368 | -0.9584953 | 1.2718252 |
| TEAM_BATTING_BB | 5 | 2255 | 503.80089 | 119.41435 | 513 | 513.29640 | 93.4038 | 29 | 878 | 849 | -0.9614819 | 2.1131941 | 2.5146829 |
| TEAM_BATTING_SO | 6 | 2155 | 739.51323 | 244.66509 | 753 | 744.96464 | 284.6592 | 0 | 1399 | 1399 | -0.2526910 | -0.4140827 | 5.2704582 |
| TEAM_BATTING_HBP | 7 | 191 | 59.35602 | 12.96712 | 58 | 58.86275 | 11.8608 | 29 | 95 | 66 | 0.3185754 | -0.1119828 | 0.9382681 |
| Singles | 8 | 2255 | 1070.56452 | 121.79442 | 1049 | 1058.05319 | 97.8516 | 709 | 2112 | 1403 | 1.7574832 | 6.7989398 | 2.5648036 |
## TEAM_BATTING_H TEAM_BATTING_2B TEAM_BATTING_3B
## breaks Integer,17 Integer,10 Numeric,13
## counts Integer,16 Integer,9 Integer,12
## density Numeric,16 Numeric,9 Numeric,12
## mids Numeric,16 Numeric,9 Numeric,12
## xname "dots[[1L]][[1L]]" "dots[[1L]][[2L]]" "dots[[1L]][[3L]]"
## equidist TRUE TRUE TRUE
## TEAM_BATTING_HR TEAM_BATTING_BB TEAM_BATTING_SO
## breaks Numeric,15 Numeric,19 Numeric,15
## counts Integer,14 Integer,18 Integer,14
## density Numeric,14 Numeric,18 Numeric,14
## mids Numeric,14 Numeric,18 Numeric,14
## xname "dots[[1L]][[4L]]" "dots[[1L]][[5L]]" "dots[[1L]][[6L]]"
## equidist TRUE TRUE TRUE
## TEAM_BATTING_HBP Singles
## breaks Integer,9 Integer,16
## counts Integer,8 Integer,15
## density Numeric,8 Numeric,15
## mids Numeric,8 Numeric,15
## xname "dots[[1L]][[7L]]" "dots[[1L]][[8L]]"
## equidist TRUE TRUE
## TEAM_BATTING_H TEAM_BATTING_2B TEAM_BATTING_3B TEAM_BATTING_HR
## stats Numeric,5 Numeric,5 Numeric,5 Numeric,5
## n 2255 2255 2255 2255
## conf Numeric,2 Numeric,2 Numeric,2 Numeric,2
## out Numeric,61 Numeric,11 Numeric,26 Numeric,0
## group Numeric,61 Numeric,11 Numeric,26 Numeric,0
## names "" "" "" ""
## TEAM_BATTING_BB TEAM_BATTING_SO TEAM_BATTING_HBP Singles
## stats Numeric,5 Numeric,5 Numeric,5 Numeric,5
## n 2255 2155 191 2255
## conf Numeric,2 Numeric,2 Numeric,2 Numeric,2
## out Numeric,120 Numeric,0 95 Numeric,70
## group Numeric,120 Numeric,0 1 Numeric,70
## names "" "" "" ""
0's per category. Given that the other categories in these observations seem in line, some sort of data entry mistake was likely made again. In this case, lets replace the 0’s with NA.## Warning in `[<-.data.frame`(`*tmp*`, , c(3:8, 11), value = structure(list(:
## provided 8 variables to replace 7 variables
| vars | n | mean | sd | median | trimmed | mad | min | max | range | skew | kurtosis | se | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| TEAM_BATTING_H | 1 | 2255 | 1466.81685 | 135.92839 | 1454.0 | 1458.24377 | 114.1602 | 992 | 2496 | 1504 | 1.2098420 | 4.8627441 | 2.8624434 |
| TEAM_BATTING_2B | 2 | 2255 | 241.09490 | 46.28571 | 238.0 | 240.29695 | 47.4432 | 69 | 458 | 389 | 0.2012387 | -0.0427041 | 0.9747061 |
| TEAM_BATTING_3B | 3 | 2254 | 54.95785 | 27.61866 | 47.0 | 51.91075 | 23.7216 | 8 | 223 | 215 | 1.1240833 | 1.5881365 | 0.5817357 |
| TEAM_BATTING_HR | 4 | 2244 | 100.71524 | 60.13266 | 104.0 | 98.55679 | 75.6126 | 3 | 264 | 261 | 0.1785408 | -0.9565209 | 1.2694014 |
| TEAM_BATTING_BB | 5 | 2255 | 503.80089 | 119.41435 | 513.0 | 513.29640 | 93.4038 | 29 | 878 | 849 | -0.9614819 | 2.1131941 | 2.5146829 |
| TEAM_BATTING_SO | 6 | 2140 | 744.69673 | 237.52652 | 756.5 | 747.73598 | 283.9179 | 67 | 1399 | 1332 | -0.1320162 | -0.7184334 | 5.1345834 |
| TEAM_BATTING_HBP | 7 | 191 | 59.35602 | 12.96712 | 58.0 | 58.86275 | 11.8608 | 29 | 95 | 66 | 0.3185754 | -0.1119828 | 0.9382681 |
| Singles | 8 | 2255 | 1070.56452 | 121.79442 | 1049.0 | 1058.05319 | 97.8516 | 709 | 2112 | 1403 | 1.7574832 | 6.7989398 | 2.5648036 |
| na_count | |
|---|---|
| INDEX | 0 |
| TARGET_WINS | 0 |
| TEAM_BATTING_H | 0 |
| TEAM_BATTING_2B | 0 |
| TEAM_BATTING_3B | 1 |
| TEAM_BATTING_HR | 11 |
| TEAM_BATTING_BB | 0 |
| TEAM_BATTING_SO | 115 |
| TEAM_BASERUN_SB | 121 |
| TEAM_BASERUN_CS | 757 |
| TEAM_BATTING_HBP | 2064 |
| TEAM_PITCHING_H | 0 |
| TEAM_PITCHING_HR | 0 |
| TEAM_PITCHING_BB | 0 |
| TEAM_PITCHING_SO | 100 |
| TEAM_FIELDING_E | 0 |
| TEAM_FIELDING_DP | 271 |
| Singles | 0 |
## Change in batting dataset
batting <- batting %>%
filter(TEAM_BATTING_3B < 154 &
TEAM_BATTING_3B > 10 &
TEAM_BATTING_2B < 377 &
TEAM_BATTING_2B > 109 &
TEAM_BATTING_H < 1784 &
TEAM_BATTING_BB > 281 &
TEAM_BATTING_BB < 836 &
Singles < 1339 &
Singles > 810 )
# FILL Strikeout NA's with median value
batting$TEAM_BATTING_SO[is.na(batting$TEAM_BATTING_SO)] <- median(batting$TEAM_BATTING_SO, na.rm=TRUE)
## Change in overall dataset's
money_ball_train_2 <- money_ball_train_2 %>%
filter(TEAM_BATTING_3B < 154 &
TEAM_BATTING_3B > 10 &
TEAM_BATTING_2B < 377 &
TEAM_BATTING_2B > 109 &
TEAM_BATTING_H < 1784 &
TEAM_BATTING_BB > 281 &
TEAM_BATTING_BB < 836 &
Singles < 1339 &
Singles > 810 )
# FILL Strikeout NA's with median value
money_ball_train_2$TEAM_BATTING_SO[is.na(money_ball_train_2$TEAM_BATTING_SO)] <- median(money_ball_train_2$TEAM_BATTING_SO, na.rm=TRUE)
money_eval <- money_eval %>%
filter(TEAM_BATTING_3B < 154 &
TEAM_BATTING_3B > 10 &
TEAM_BATTING_2B < 377 &
TEAM_BATTING_2B > 109 &
TEAM_BATTING_H < 1784 &
TEAM_BATTING_BB > 281 &
TEAM_BATTING_BB < 836 &
Singles < 1339 &
Singles > 810 )
# FILL Strikeout NA's with median value
money_eval$TEAM_BATTING_SO[is.na(money_eval$TEAM_BATTING_SO)] <- median(money_eval$TEAM_BATTING_SO, na.rm=TRUE)## TEAM_BATTING_H TEAM_BATTING_2B TEAM_BATTING_3B TEAM_BATTING_HR
## stats Numeric,5 Numeric,5 Numeric,5 Numeric,5
## n 2092 2092 2092 2092
## conf Numeric,2 Numeric,2 Numeric,2 Numeric,2
## out Numeric,15 Numeric,6 Numeric,29 Numeric,0
## group Numeric,15 Numeric,6 Numeric,29 Numeric,0
## names "" "" "" ""
## TEAM_BATTING_BB TEAM_BATTING_SO TEAM_BATTING_HBP Singles
## stats Numeric,5 Numeric,5 Numeric,5 Numeric,5
## n 2092 2092 191 2092
## conf Numeric,2 Numeric,2 Numeric,2 Numeric,2
## out Numeric,12 Numeric,0 95 Numeric,15
## group Numeric,12 Numeric,0 1 Numeric,15
## names "" "" "" ""
| na_count | |
|---|---|
| INDEX | 0 |
| TARGET_WINS | 0 |
| TEAM_BATTING_H | 0 |
| TEAM_BATTING_2B | 0 |
| TEAM_BATTING_3B | 0 |
| TEAM_BATTING_HR | 0 |
| TEAM_BATTING_BB | 0 |
| TEAM_BATTING_SO | 0 |
| TEAM_BASERUN_SB | 0 |
| TEAM_BASERUN_CS | 0 |
| TEAM_BATTING_HBP | 1853 |
| TEAM_PITCHING_H | 0 |
| TEAM_PITCHING_HR | 0 |
| TEAM_PITCHING_BB | 0 |
| TEAM_PITCHING_SO | 100 |
| TEAM_FIELDING_E | 0 |
| TEAM_FIELDING_DP | 128 |
| Singles | 0 |
pitching <- money_ball_train_2 %>%
select(2,12:17)
kable(describe(pitching))| vars | n | mean | sd | median | trimmed | mad | min | max | range | skew | kurtosis | se | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| TARGET_WINS | 1 | 2044 | 80.52789 | 13.89309 | 81.0 | 80.96638 | 14.8260 | 21 | 116 | 95 | -0.3090862 | -0.0162643 | 0.3072970 |
| TEAM_PITCHING_H | 2 | 2044 | 1539.55479 | 220.16288 | 1498.0 | 1513.97127 | 146.7774 | 1137 | 6723 | 5586 | 7.2180667 | 150.1985045 | 4.8697161 |
| TEAM_PITCHING_HR | 3 | 2044 | 111.71771 | 59.92485 | 114.0 | 110.10391 | 68.1996 | 3 | 343 | 340 | 0.1784958 | -0.5918350 | 1.3254597 |
| TEAM_PITCHING_BB | 4 | 2044 | 553.66830 | 109.40697 | 541.0 | 546.10452 | 91.9212 | 312 | 2169 | 1857 | 2.2520497 | 23.8773902 | 2.4199397 |
| TEAM_PITCHING_SO | 5 | 1944 | 807.61060 | 228.05424 | 818.0 | 803.06427 | 250.5594 | 301 | 2309 | 2008 | 0.3567090 | 0.6392627 | 5.1723752 |
| TEAM_FIELDING_E | 6 | 2044 | 188.89922 | 109.36885 | 150.5 | 166.38570 | 49.6671 | 65 | 765 | 700 | 2.3312285 | 5.9463732 | 2.4190964 |
| TEAM_FIELDING_DP | 7 | 1916 | 147.77192 | 24.99992 | 149.0 | 148.74967 | 23.7216 | 68 | 228 | 160 | -0.3456466 | 0.2527489 | 0.5711377 |
par(mfrow=c(2, 4))
mapply(hist,pitching,main=colnames(pitching),xlab="FIG-5",col="blue")## TARGET_WINS TEAM_PITCHING_H TEAM_PITCHING_HR
## breaks Integer,11 Integer,13 Numeric,8
## counts Integer,10 Integer,12 Integer,7
## density Numeric,10 Numeric,12 Numeric,7
## mids Numeric,10 Numeric,12 Numeric,7
## xname "dots[[1L]][[1L]]" "dots[[1L]][[2L]]" "dots[[1L]][[3L]]"
## equidist TRUE TRUE TRUE
## TEAM_PITCHING_BB TEAM_PITCHING_SO TEAM_FIELDING_E
## breaks Integer,11 Integer,12 Integer,16
## counts Integer,10 Integer,11 Integer,15
## density Numeric,10 Numeric,11 Numeric,15
## mids Numeric,10 Numeric,11 Numeric,15
## xname "dots[[1L]][[4L]]" "dots[[1L]][[5L]]" "dots[[1L]][[6L]]"
## equidist TRUE TRUE TRUE
## TEAM_FIELDING_DP
## breaks Integer,18
## counts Integer,17
## density Numeric,17
## mids Numeric,17
## xname "dots[[1L]][[7L]]"
## equidist TRUE
mtext("Fig 5", SOUTH<-1, line=2, adj=5.0,
col="blue")
par(mfrow=c(2, 4)) mapply(boxplot,pitching,main=colnames(pitching),xlab=" FIG-6")## TARGET_WINS TEAM_PITCHING_H TEAM_PITCHING_HR TEAM_PITCHING_BB
## stats Numeric,5 Numeric,5 Numeric,5 Numeric,5
## n 2044 2044 2044 2044
## conf Numeric,2 Numeric,2 Numeric,2 Numeric,2
## out Numeric,7 Numeric,99 Numeric,3 Numeric,49
## group Numeric,7 Numeric,99 Numeric,3 Numeric,49
## names "" "" "" ""
## TEAM_PITCHING_SO TEAM_FIELDING_E TEAM_FIELDING_DP
## stats Numeric,5 Numeric,5 Numeric,5
## n 1944 2044 1916
## conf Numeric,2 Numeric,2 Numeric,2
## out Numeric,9 Numeric,178 Numeric,36
## group Numeric,9 Numeric,178 Numeric,36
## names "" "" ""
M <- cor(money_ball_train_2,use = "pairwise.complete.obs")
corrplot(M)## total_equal_categories
## 1 785
{r, ref.label=knitr::all_labels(),echo=TRUE,eval=FALSE}