I. Data Exploration

library(ggplot2)
library(reshape)
library(reshape2)
library(corrplot)
#library(Hmisc)
library(psych)
#library(PerformanceAnalytics)
library(MASS)
require(readr)

#get moneyball training dataset from gihub library

moneyball = read.csv("moneyball-training-data.csv", header=TRUE)

#sampler of rows and columns contained in dataset 
head(moneyball, 10)
##    INDEX TARGET_WINS TEAM_BATTING_H TEAM_BATTING_2B TEAM_BATTING_3B
## 1      1          39           1445             194              39
## 2      2          70           1339             219              22
## 3      3          86           1377             232              35
## 4      4          70           1387             209              38
## 5      5          82           1297             186              27
## 6      6          75           1279             200              36
## 7      7          80           1244             179              54
## 8      8          85           1273             171              37
## 9     11          86           1391             197              40
## 10    12          76           1271             213              18
##    TEAM_BATTING_HR TEAM_BATTING_BB TEAM_BATTING_SO TEAM_BASERUN_SB
## 1               13             143             842              NA
## 2              190             685            1075              37
## 3              137             602             917              46
## 4               96             451             922              43
## 5              102             472             920              49
## 6               92             443             973             107
## 7              122             525            1062              80
## 8              115             456            1027              40
## 9              114             447             922              69
## 10              96             441             827              72
##    TEAM_BASERUN_CS TEAM_BATTING_HBP TEAM_PITCHING_H TEAM_PITCHING_HR
## 1               NA               NA            9364               84
## 2               28               NA            1347              191
## 3               27               NA            1377              137
## 4               30               NA            1396               97
## 5               39               NA            1297              102
## 6               59               NA            1279               92
## 7               54               NA            1244              122
## 8               36               NA            1281              116
## 9               27               NA            1391              114
## 10              34               NA            1271               96
##    TEAM_PITCHING_BB TEAM_PITCHING_SO TEAM_FIELDING_E TEAM_FIELDING_DP
## 1               927             5456            1011               NA
## 2               689             1082             193              155
## 3               602              917             175              153
## 4               454              928             164              156
## 5               472              920             138              168
## 6               443              973             123              149
## 7               525             1062             136              186
## 8               459             1033             112              136
## 9               447              922             127              169
## 10              441              827             131              159
#number of rows and columns in moneyball 
NROW(moneyball)
## [1] 2276
NCOL(moneyball)
## [1] 17
#name of column in the moneyball dataframe
names(moneyball)
##  [1] "INDEX"            "TARGET_WINS"      "TEAM_BATTING_H"  
##  [4] "TEAM_BATTING_2B"  "TEAM_BATTING_3B"  "TEAM_BATTING_HR" 
##  [7] "TEAM_BATTING_BB"  "TEAM_BATTING_SO"  "TEAM_BASERUN_SB" 
## [10] "TEAM_BASERUN_CS"  "TEAM_BATTING_HBP" "TEAM_PITCHING_H" 
## [13] "TEAM_PITCHING_HR" "TEAM_PITCHING_BB" "TEAM_PITCHING_SO"
## [16] "TEAM_FIELDING_E"  "TEAM_FIELDING_DP"
#number of unknown values for each of the columns in the moneyball dataframe
na_count <-sapply(moneyball, function(y) sum(length(which(is.na(y)))))
na_count <- data.frame(na_count)
na_count
##                  na_count
## INDEX                   0
## TARGET_WINS             0
## TEAM_BATTING_H          0
## TEAM_BATTING_2B         0
## TEAM_BATTING_3B         0
## TEAM_BATTING_HR         0
## TEAM_BATTING_BB         0
## TEAM_BATTING_SO       102
## TEAM_BASERUN_SB       131
## TEAM_BASERUN_CS       772
## TEAM_BATTING_HBP     2085
## TEAM_PITCHING_H         0
## TEAM_PITCHING_HR        0
## TEAM_PITCHING_BB        0
## TEAM_PITCHING_SO      102
## TEAM_FIELDING_E         0
## TEAM_FIELDING_DP      286
summary(moneyball)
##      INDEX         TARGET_WINS     TEAM_BATTING_H TEAM_BATTING_2B
##  Min.   :   1.0   Min.   :  0.00   Min.   : 891   Min.   : 69.0  
##  1st Qu.: 630.8   1st Qu.: 71.00   1st Qu.:1383   1st Qu.:208.0  
##  Median :1270.5   Median : 82.00   Median :1454   Median :238.0  
##  Mean   :1268.5   Mean   : 80.79   Mean   :1469   Mean   :241.2  
##  3rd Qu.:1915.5   3rd Qu.: 92.00   3rd Qu.:1537   3rd Qu.:273.0  
##  Max.   :2535.0   Max.   :146.00   Max.   :2554   Max.   :458.0  
##                                                                  
##  TEAM_BATTING_3B  TEAM_BATTING_HR  TEAM_BATTING_BB TEAM_BATTING_SO 
##  Min.   :  0.00   Min.   :  0.00   Min.   :  0.0   Min.   :   0.0  
##  1st Qu.: 34.00   1st Qu.: 42.00   1st Qu.:451.0   1st Qu.: 548.0  
##  Median : 47.00   Median :102.00   Median :512.0   Median : 750.0  
##  Mean   : 55.25   Mean   : 99.61   Mean   :501.6   Mean   : 735.6  
##  3rd Qu.: 72.00   3rd Qu.:147.00   3rd Qu.:580.0   3rd Qu.: 930.0  
##  Max.   :223.00   Max.   :264.00   Max.   :878.0   Max.   :1399.0  
##                                                    NA's   :102     
##  TEAM_BASERUN_SB TEAM_BASERUN_CS TEAM_BATTING_HBP TEAM_PITCHING_H
##  Min.   :  0.0   Min.   :  0.0   Min.   :29.00    Min.   : 1137  
##  1st Qu.: 66.0   1st Qu.: 38.0   1st Qu.:50.50    1st Qu.: 1419  
##  Median :101.0   Median : 49.0   Median :58.00    Median : 1518  
##  Mean   :124.8   Mean   : 52.8   Mean   :59.36    Mean   : 1779  
##  3rd Qu.:156.0   3rd Qu.: 62.0   3rd Qu.:67.00    3rd Qu.: 1682  
##  Max.   :697.0   Max.   :201.0   Max.   :95.00    Max.   :30132  
##  NA's   :131     NA's   :772     NA's   :2085                    
##  TEAM_PITCHING_HR TEAM_PITCHING_BB TEAM_PITCHING_SO  TEAM_FIELDING_E 
##  Min.   :  0.0    Min.   :   0.0   Min.   :    0.0   Min.   :  65.0  
##  1st Qu.: 50.0    1st Qu.: 476.0   1st Qu.:  615.0   1st Qu.: 127.0  
##  Median :107.0    Median : 536.5   Median :  813.5   Median : 159.0  
##  Mean   :105.7    Mean   : 553.0   Mean   :  817.7   Mean   : 246.5  
##  3rd Qu.:150.0    3rd Qu.: 611.0   3rd Qu.:  968.0   3rd Qu.: 249.2  
##  Max.   :343.0    Max.   :3645.0   Max.   :19278.0   Max.   :1898.0  
##                                    NA's   :102                       
##  TEAM_FIELDING_DP
##  Min.   : 52.0   
##  1st Qu.:131.0   
##  Median :149.0   
##  Mean   :146.4   
##  3rd Qu.:164.0   
##  Max.   :228.0   
##  NA's   :286
describe(moneyball)
##                  vars    n    mean      sd median trimmed    mad  min
## INDEX               1 2276 1268.46  736.35 1270.5 1268.57 952.57    1
## TARGET_WINS         2 2276   80.79   15.75   82.0   81.31  14.83    0
## TEAM_BATTING_H      3 2276 1469.27  144.59 1454.0 1459.04 114.16  891
## TEAM_BATTING_2B     4 2276  241.25   46.80  238.0  240.40  47.44   69
## TEAM_BATTING_3B     5 2276   55.25   27.94   47.0   52.18  23.72    0
## TEAM_BATTING_HR     6 2276   99.61   60.55  102.0   97.39  78.58    0
## TEAM_BATTING_BB     7 2276  501.56  122.67  512.0  512.18  94.89    0
## TEAM_BATTING_SO     8 2174  735.61  248.53  750.0  742.31 284.66    0
## TEAM_BASERUN_SB     9 2145  124.76   87.79  101.0  110.81  60.79    0
## TEAM_BASERUN_CS    10 1504   52.80   22.96   49.0   50.36  17.79    0
## TEAM_BATTING_HBP   11  191   59.36   12.97   58.0   58.86  11.86   29
## TEAM_PITCHING_H    12 2276 1779.21 1406.84 1518.0 1555.90 174.95 1137
## TEAM_PITCHING_HR   13 2276  105.70   61.30  107.0  103.16  74.13    0
## TEAM_PITCHING_BB   14 2276  553.01  166.36  536.5  542.62  98.59    0
## TEAM_PITCHING_SO   15 2174  817.73  553.09  813.5  796.93 257.23    0
## TEAM_FIELDING_E    16 2276  246.48  227.77  159.0  193.44  62.27   65
## TEAM_FIELDING_DP   17 1990  146.39   26.23  149.0  147.58  23.72   52
##                    max range  skew kurtosis    se
## INDEX             2535  2534  0.00    -1.22 15.43
## TARGET_WINS        146   146 -0.40     1.03  0.33
## TEAM_BATTING_H    2554  1663  1.57     7.28  3.03
## TEAM_BATTING_2B    458   389  0.22     0.01  0.98
## TEAM_BATTING_3B    223   223  1.11     1.50  0.59
## TEAM_BATTING_HR    264   264  0.19    -0.96  1.27
## TEAM_BATTING_BB    878   878 -1.03     2.18  2.57
## TEAM_BATTING_SO   1399  1399 -0.30    -0.32  5.33
## TEAM_BASERUN_SB    697   697  1.97     5.49  1.90
## TEAM_BASERUN_CS    201   201  1.98     7.62  0.59
## TEAM_BATTING_HBP    95    66  0.32    -0.11  0.94
## TEAM_PITCHING_H  30132 28995 10.33   141.84 29.49
## TEAM_PITCHING_HR   343   343  0.29    -0.60  1.28
## TEAM_PITCHING_BB  3645  3645  6.74    96.97  3.49
## TEAM_PITCHING_SO 19278 19278 22.17   671.19 11.86
## TEAM_FIELDING_E   1898  1833  2.99    10.97  4.77
## TEAM_FIELDING_DP   228   176 -0.39     0.18  0.59
col_class <-sapply(moneyball, class)
col_class
##            INDEX      TARGET_WINS   TEAM_BATTING_H  TEAM_BATTING_2B 
##        "integer"        "integer"        "integer"        "integer" 
##  TEAM_BATTING_3B  TEAM_BATTING_HR  TEAM_BATTING_BB  TEAM_BATTING_SO 
##        "integer"        "integer"        "integer"        "integer" 
##  TEAM_BASERUN_SB  TEAM_BASERUN_CS TEAM_BATTING_HBP  TEAM_PITCHING_H 
##        "integer"        "integer"        "integer"        "integer" 
## TEAM_PITCHING_HR TEAM_PITCHING_BB TEAM_PITCHING_SO  TEAM_FIELDING_E 
##        "integer"        "integer"        "integer"        "integer" 
## TEAM_FIELDING_DP 
##        "integer"
require(reshape)
meltMoneyball <- melt(moneyball)
require(ggplot2)
bp = ggplot(meltMoneyball, aes(variable, value)) + geom_boxplot() + facet_wrap(~variable, scale="free") 
bp

M<-cor(moneyball)
head(round(M,2))
##                 INDEX TARGET_WINS TEAM_BATTING_H TEAM_BATTING_2B
## INDEX            1.00       -0.02          -0.02            0.01
## TARGET_WINS     -0.02        1.00           0.39            0.29
## TEAM_BATTING_H  -0.02        0.39           1.00            0.56
## TEAM_BATTING_2B  0.01        0.29           0.56            1.00
## TEAM_BATTING_3B -0.01        0.14           0.43           -0.11
## TEAM_BATTING_HR  0.05        0.18          -0.01            0.44
##                 TEAM_BATTING_3B TEAM_BATTING_HR TEAM_BATTING_BB
## INDEX                     -0.01            0.05           -0.03
## TARGET_WINS                0.14            0.18            0.23
## TEAM_BATTING_H             0.43           -0.01           -0.07
## TEAM_BATTING_2B           -0.11            0.44            0.26
## TEAM_BATTING_3B            1.00           -0.64           -0.29
## TEAM_BATTING_HR           -0.64            1.00            0.51
##                 TEAM_BATTING_SO TEAM_BASERUN_SB TEAM_BASERUN_CS
## INDEX                        NA              NA              NA
## TARGET_WINS                  NA              NA              NA
## TEAM_BATTING_H               NA              NA              NA
## TEAM_BATTING_2B              NA              NA              NA
## TEAM_BATTING_3B              NA              NA              NA
## TEAM_BATTING_HR              NA              NA              NA
##                 TEAM_BATTING_HBP TEAM_PITCHING_H TEAM_PITCHING_HR
## INDEX                         NA            0.02             0.05
## TARGET_WINS                   NA           -0.11             0.19
## TEAM_BATTING_H                NA            0.30             0.07
## TEAM_BATTING_2B               NA            0.02             0.45
## TEAM_BATTING_3B               NA            0.19            -0.57
## TEAM_BATTING_HR               NA           -0.25             0.97
##                 TEAM_PITCHING_BB TEAM_PITCHING_SO TEAM_FIELDING_E
## INDEX                      -0.02               NA           -0.01
## TARGET_WINS                 0.12               NA           -0.18
## TEAM_BATTING_H              0.09               NA            0.26
## TEAM_BATTING_2B             0.18               NA           -0.24
## TEAM_BATTING_3B             0.00               NA            0.51
## TEAM_BATTING_HR             0.14               NA           -0.59
##                 TEAM_FIELDING_DP
## INDEX                         NA
## TARGET_WINS                   NA
## TEAM_BATTING_H                NA
## TEAM_BATTING_2B               NA
## TEAM_BATTING_3B               NA
## TEAM_BATTING_HR               NA
corrplot(M, method="circle")

knitr::opts_chunk$set(echo = TRUE)

II. Data Preparation

moneyball_rev1 <- moneyball[ -c(1,11) ]
head(moneyball_rev1, 10)
##    TARGET_WINS TEAM_BATTING_H TEAM_BATTING_2B TEAM_BATTING_3B
## 1           39           1445             194              39
## 2           70           1339             219              22
## 3           86           1377             232              35
## 4           70           1387             209              38
## 5           82           1297             186              27
## 6           75           1279             200              36
## 7           80           1244             179              54
## 8           85           1273             171              37
## 9           86           1391             197              40
## 10          76           1271             213              18
##    TEAM_BATTING_HR TEAM_BATTING_BB TEAM_BATTING_SO TEAM_BASERUN_SB
## 1               13             143             842              NA
## 2              190             685            1075              37
## 3              137             602             917              46
## 4               96             451             922              43
## 5              102             472             920              49
## 6               92             443             973             107
## 7              122             525            1062              80
## 8              115             456            1027              40
## 9              114             447             922              69
## 10              96             441             827              72
##    TEAM_BASERUN_CS TEAM_PITCHING_H TEAM_PITCHING_HR TEAM_PITCHING_BB
## 1               NA            9364               84              927
## 2               28            1347              191              689
## 3               27            1377              137              602
## 4               30            1396               97              454
## 5               39            1297              102              472
## 6               59            1279               92              443
## 7               54            1244              122              525
## 8               36            1281              116              459
## 9               27            1391              114              447
## 10              34            1271               96              441
##    TEAM_PITCHING_SO TEAM_FIELDING_E TEAM_FIELDING_DP
## 1              5456            1011               NA
## 2              1082             193              155
## 3               917             175              153
## 4               928             164              156
## 5               920             138              168
## 6               973             123              149
## 7              1062             136              186
## 8              1033             112              136
## 9               922             127              169
## 10              827             131              159
describe(moneyball_rev1)
##                  vars    n    mean      sd median trimmed    mad  min
## TARGET_WINS         1 2276   80.79   15.75   82.0   81.31  14.83    0
## TEAM_BATTING_H      2 2276 1469.27  144.59 1454.0 1459.04 114.16  891
## TEAM_BATTING_2B     3 2276  241.25   46.80  238.0  240.40  47.44   69
## TEAM_BATTING_3B     4 2276   55.25   27.94   47.0   52.18  23.72    0
## TEAM_BATTING_HR     5 2276   99.61   60.55  102.0   97.39  78.58    0
## TEAM_BATTING_BB     6 2276  501.56  122.67  512.0  512.18  94.89    0
## TEAM_BATTING_SO     7 2174  735.61  248.53  750.0  742.31 284.66    0
## TEAM_BASERUN_SB     8 2145  124.76   87.79  101.0  110.81  60.79    0
## TEAM_BASERUN_CS     9 1504   52.80   22.96   49.0   50.36  17.79    0
## TEAM_PITCHING_H    10 2276 1779.21 1406.84 1518.0 1555.90 174.95 1137
## TEAM_PITCHING_HR   11 2276  105.70   61.30  107.0  103.16  74.13    0
## TEAM_PITCHING_BB   12 2276  553.01  166.36  536.5  542.62  98.59    0
## TEAM_PITCHING_SO   13 2174  817.73  553.09  813.5  796.93 257.23    0
## TEAM_FIELDING_E    14 2276  246.48  227.77  159.0  193.44  62.27   65
## TEAM_FIELDING_DP   15 1990  146.39   26.23  149.0  147.58  23.72   52
##                    max range  skew kurtosis    se
## TARGET_WINS        146   146 -0.40     1.03  0.33
## TEAM_BATTING_H    2554  1663  1.57     7.28  3.03
## TEAM_BATTING_2B    458   389  0.22     0.01  0.98
## TEAM_BATTING_3B    223   223  1.11     1.50  0.59
## TEAM_BATTING_HR    264   264  0.19    -0.96  1.27
## TEAM_BATTING_BB    878   878 -1.03     2.18  2.57
## TEAM_BATTING_SO   1399  1399 -0.30    -0.32  5.33
## TEAM_BASERUN_SB    697   697  1.97     5.49  1.90
## TEAM_BASERUN_CS    201   201  1.98     7.62  0.59
## TEAM_PITCHING_H  30132 28995 10.33   141.84 29.49
## TEAM_PITCHING_HR   343   343  0.29    -0.60  1.28
## TEAM_PITCHING_BB  3645  3645  6.74    96.97  3.49
## TEAM_PITCHING_SO 19278 19278 22.17   671.19 11.86
## TEAM_FIELDING_E   1898  1833  2.99    10.97  4.77
## TEAM_FIELDING_DP   228   176 -0.39     0.18  0.59
for(i in 1:ncol(moneyball_rev1)){
  moneyball_rev1[is.na(moneyball_rev1[,i]), i] <- median(moneyball_rev1[,i], na.rm = TRUE)
  }

describe(moneyball_rev1)
##                  vars    n    mean      sd median trimmed    mad  min
## TARGET_WINS         1 2276   80.79   15.75   82.0   81.31  14.83    0
## TEAM_BATTING_H      2 2276 1469.27  144.59 1454.0 1459.04 114.16  891
## TEAM_BATTING_2B     3 2276  241.25   46.80  238.0  240.40  47.44   69
## TEAM_BATTING_3B     4 2276   55.25   27.94   47.0   52.18  23.72    0
## TEAM_BATTING_HR     5 2276   99.61   60.55  102.0   97.39  78.58    0
## TEAM_BATTING_BB     6 2276  501.56  122.67  512.0  512.18  94.89    0
## TEAM_BATTING_SO     7 2276  736.25  242.91  750.0  742.82 272.80    0
## TEAM_BASERUN_SB     8 2276  123.39   85.41  101.0  109.73  57.82    0
## TEAM_BASERUN_CS     9 2276   51.51   18.75   49.0   49.58   7.41    0
## TEAM_PITCHING_H    10 2276 1779.21 1406.84 1518.0 1555.90 174.95 1137
## TEAM_PITCHING_HR   11 2276  105.70   61.30  107.0  103.16  74.13    0
## TEAM_PITCHING_BB   12 2276  553.01  166.36  536.5  542.62  98.59    0
## TEAM_PITCHING_SO   13 2276  817.54  540.54  813.5  797.90 245.37    0
## TEAM_FIELDING_E    14 2276  246.48  227.77  159.0  193.44  62.27   65
## TEAM_FIELDING_DP   15 2276  146.72   24.54  149.0  147.91  19.27   52
##                    max range  skew kurtosis    se
## TARGET_WINS        146   146 -0.40     1.03  0.33
## TEAM_BATTING_H    2554  1663  1.57     7.28  3.03
## TEAM_BATTING_2B    458   389  0.22     0.01  0.98
## TEAM_BATTING_3B    223   223  1.11     1.50  0.59
## TEAM_BATTING_HR    264   264  0.19    -0.96  1.27
## TEAM_BATTING_BB    878   878 -1.03     2.18  2.57
## TEAM_BATTING_SO   1399  1399 -0.31    -0.19  5.09
## TEAM_BASERUN_SB    697   697  2.07     6.06  1.79
## TEAM_BASERUN_CS    201   201  2.60    13.47  0.39
## TEAM_PITCHING_H  30132 28995 10.33   141.84 29.49
## TEAM_PITCHING_HR   343   343  0.29    -0.60  1.28
## TEAM_PITCHING_BB  3645  3645  6.74    96.97  3.49
## TEAM_PITCHING_SO 19278 19278 22.69   702.88 11.33
## TEAM_FIELDING_E   1898  1833  2.99    10.97  4.77
## TEAM_FIELDING_DP   228   176 -0.46     0.65  0.51
moneyball_rev1m1 <- melt(moneyball_rev1)
## No id variables; using all as measure variables
ggplot(moneyball_rev1m1, aes(value)) + geom_histogram() + facet_wrap(~variable, scale='free') 
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

outlier <- function(x) {
 x[x < quantile(x,0.25) - 1.5 * IQR(x) | x > quantile(x,0.75) + 1.5 * IQR(x)] <- median(x)
 x
}

moneyball_rev1[] <- lapply(moneyball_rev1, outlier)
describe(moneyball_rev1)
##                  vars    n    mean     sd median trimmed    mad  min  max
## TARGET_WINS         1 2276   81.23  14.45   82.0   81.53  14.83   40  123
## TEAM_BATTING_H      2 2276 1457.19 112.01 1454.0 1454.15 105.26 1156 1768
## TEAM_BATTING_2B     3 2276  240.42  45.24  238.0  239.93  46.70  112  367
## TEAM_BATTING_3B     4 2276   53.91  25.55   47.0   51.41  23.72    0  129
## TEAM_BATTING_HR     5 2276   99.61  60.55  102.0   97.39  78.58    0  264
## TEAM_BATTING_BB     6 2276  518.53  88.72  512.0  519.54  80.06  258  770
## TEAM_BATTING_SO     7 2276  742.84 232.80  750.0  746.34 266.87   66 1399
## TEAM_BASERUN_SB     8 2276  107.22  54.19  101.0  101.82  50.41    0  277
## TEAM_BASERUN_CS     9 2276   48.53   7.40   49.0   48.53   0.00   29   69
## TEAM_PITCHING_H    10 2276 1530.35 162.52 1518.0 1513.96 137.88 1137 2074
## TEAM_PITCHING_HR   11 2276  105.32  60.63  107.0  102.98  74.13    0  297
## TEAM_PITCHING_BB   12 2276  542.31  96.28  536.5  539.62  89.70  277  810
## TEAM_PITCHING_SO   13 2276  798.73 218.81  813.5  797.59 237.96  181 1436
## TEAM_FIELDING_E    14 2276  169.51  65.65  159.0  158.82  45.96   65  432
## TEAM_FIELDING_DP   15 2276  148.28  20.76  149.0  148.92  17.79   94  202
##                  range  skew kurtosis   se
## TARGET_WINS         83 -0.17    -0.17 0.30
## TEAM_BATTING_H     612  0.22    -0.09 2.35
## TEAM_BATTING_2B    255  0.10    -0.39 0.95
## TEAM_BATTING_3B    129  0.78    -0.22 0.54
## TEAM_BATTING_HR    264  0.19    -0.96 1.27
## TEAM_BATTING_BB    512 -0.10     0.22 1.86
## TEAM_BATTING_SO   1333 -0.16    -0.54 4.88
## TEAM_BASERUN_SB    277  0.86     0.33 1.14
## TEAM_BASERUN_CS     40 -0.01     1.13 0.16
## TEAM_PITCHING_H    937  1.01     1.15 3.41
## TEAM_PITCHING_HR   297  0.24    -0.78 1.27
## TEAM_PITCHING_BB   533  0.26     0.03 2.02
## TEAM_PITCHING_SO  1255  0.06    -0.39 4.59
## TEAM_FIELDING_E    367  1.64     2.66 1.38
## TEAM_FIELDING_DP   108 -0.27     0.17 0.44
moneyball_rev1m1 <- melt(moneyball_rev1)
## No id variables; using all as measure variables
ggplot(moneyball_rev1m1, aes(value), Color='red') + geom_histogram() + facet_wrap(~variable, scale='free') 
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Wins <- cut(moneyball_rev1[,1], 4, labels=c("Winners", "Contenders", "Strugglers", "Losers"))
head(cbind(moneyball_rev1, Wins),5)
##   TARGET_WINS TEAM_BATTING_H TEAM_BATTING_2B TEAM_BATTING_3B
## 1          82           1445             194              39
## 2          70           1339             219              22
## 3          86           1377             232              35
## 4          70           1387             209              38
## 5          82           1297             186              27
##   TEAM_BATTING_HR TEAM_BATTING_BB TEAM_BATTING_SO TEAM_BASERUN_SB
## 1              13             512             842             101
## 2             190             685            1075              37
## 3             137             602             917              46
## 4              96             451             922              43
## 5             102             472             920              49
##   TEAM_BASERUN_CS TEAM_PITCHING_H TEAM_PITCHING_HR TEAM_PITCHING_BB
## 1              49            1518               84            536.5
## 2              49            1347              191            689.0
## 3              49            1377              137            602.0
## 4              30            1396               97            454.0
## 5              39            1297              102            472.0
##   TEAM_PITCHING_SO TEAM_FIELDING_E TEAM_FIELDING_DP       Wins
## 1            813.5             159              149 Strugglers
## 2           1082.0             193              155 Contenders
## 3            917.0             175              153 Strugglers
## 4            928.0             164              156 Contenders
## 5            920.0             138              168 Strugglers
Hits <- cut(moneyball_rev1[,2], 4, labels=c("Heavy", "Above Average", "Average", "Light"))
head(cbind(moneyball_rev1, Hits),5)
##   TARGET_WINS TEAM_BATTING_H TEAM_BATTING_2B TEAM_BATTING_3B
## 1          82           1445             194              39
## 2          70           1339             219              22
## 3          86           1377             232              35
## 4          70           1387             209              38
## 5          82           1297             186              27
##   TEAM_BATTING_HR TEAM_BATTING_BB TEAM_BATTING_SO TEAM_BASERUN_SB
## 1              13             512             842             101
## 2             190             685            1075              37
## 3             137             602             917              46
## 4              96             451             922              43
## 5             102             472             920              49
##   TEAM_BASERUN_CS TEAM_PITCHING_H TEAM_PITCHING_HR TEAM_PITCHING_BB
## 1              49            1518               84            536.5
## 2              49            1347              191            689.0
## 3              49            1377              137            602.0
## 4              30            1396               97            454.0
## 5              39            1297              102            472.0
##   TEAM_PITCHING_SO TEAM_FIELDING_E TEAM_FIELDING_DP          Hits
## 1            813.5             159              149 Above Average
## 2           1082.0             193              155 Above Average
## 3            917.0             175              153 Above Average
## 4            928.0             164              156 Above Average
## 5            920.0             138              168         Heavy
attach(moneyball_rev1)
Pitch_Strength = TEAM_PITCHING_SO - TEAM_PITCHING_H - TEAM_PITCHING_BB
Bat_Strength = (TEAM_BATTING_H - TEAM_BATTING_2B - TEAM_BATTING_3B - TEAM_BATTING_HR) + 2 * TEAM_BATTING_2B + 3 * TEAM_BATTING_3B + 4 * TEAM_BATTING_HR +  TEAM_BATTING_BB - TEAM_BATTING_SO
moneyball_rev2 = (cbind(moneyball_rev1, Pitch_Strength, Bat_Strength))
head(moneyball_rev2, 10)
##    TARGET_WINS TEAM_BATTING_H TEAM_BATTING_2B TEAM_BATTING_3B
## 1           82           1445             194              39
## 2           70           1339             219              22
## 3           86           1377             232              35
## 4           70           1387             209              38
## 5           82           1297             186              27
## 6           75           1279             200              36
## 7           80           1244             179              54
## 8           85           1273             171              37
## 9           86           1391             197              40
## 10          76           1271             213              18
##    TEAM_BATTING_HR TEAM_BATTING_BB TEAM_BATTING_SO TEAM_BASERUN_SB
## 1               13             512             842             101
## 2              190             685            1075              37
## 3              137             602             917              46
## 4               96             451             922              43
## 5              102             472             920              49
## 6               92             443             973             107
## 7              122             525            1062              80
## 8              115             456            1027              40
## 9              114             447             922              69
## 10              96             441             827              72
##    TEAM_BASERUN_CS TEAM_PITCHING_H TEAM_PITCHING_HR TEAM_PITCHING_BB
## 1               49            1518               84            536.5
## 2               49            1347              191            689.0
## 3               49            1377              137            602.0
## 4               30            1396               97            454.0
## 5               39            1297              102            472.0
## 6               59            1279               92            443.0
## 7               54            1244              122            525.0
## 8               36            1281              116            459.0
## 9               49            1391              114            447.0
## 10              34            1271               96            441.0
##    TEAM_PITCHING_SO TEAM_FIELDING_E TEAM_FIELDING_DP Pitch_Strength
## 1             813.5             159              149          -1241
## 2            1082.0             193              155           -954
## 3             917.0             175              153          -1062
## 4             928.0             164              156           -922
## 5             920.0             138              168           -849
## 6             973.0             123              149           -749
## 7            1062.0             136              186           -707
## 8            1033.0             112              136           -707
## 9             922.0             127              169           -916
## 10            827.0             131              159           -885
##    Bat_Strength
## 1          1426
## 2          1782
## 3          1775
## 4          1489
## 5          1395
## 6          1297
## 7          1360
## 8          1292
## 9          1535
## 10         1422
detach(moneyball_rev1)


ggplot(moneyball_rev1, aes(x=TEAM_BATTING_H, y=Wins)) + geom_jitter() + labs(x="Hits (1B, 2B, 3B, HR)", y="Number of Wins") + geom_smooth(method="lm")

ggplot(moneyball_rev1, aes(x=Pitch_Strength, y=Wins)) + geom_jitter() + labs(x="Pitching Strength", y="Number of Wins") + geom_smooth(method="lm")

ggplot(moneyball_rev1, aes(x=Bat_Strength, y=Wins)) + geom_jitter() + labs(x="Batting Strength", y="Number of Wins") + geom_smooth(method="lm")

lm_samp <- lm(TARGET_WINS ~ TEAM_BATTING_H, data=moneyball_rev1)
par(mfrow=c(2,2))
plot(lm_samp)

par(mfrow=c(1,1))
M<-cor(moneyball_rev1)
corrplot(M, method="circle")

III. Build Models -

names(moneyball_rev2)
##  [1] "TARGET_WINS"      "TEAM_BATTING_H"   "TEAM_BATTING_2B" 
##  [4] "TEAM_BATTING_3B"  "TEAM_BATTING_HR"  "TEAM_BATTING_BB" 
##  [7] "TEAM_BATTING_SO"  "TEAM_BASERUN_SB"  "TEAM_BASERUN_CS" 
## [10] "TEAM_PITCHING_H"  "TEAM_PITCHING_HR" "TEAM_PITCHING_BB"
## [13] "TEAM_PITCHING_SO" "TEAM_FIELDING_E"  "TEAM_FIELDING_DP"
## [16] "Pitch_Strength"   "Bat_Strength"
model1 <- lm(TARGET_WINS ~ ., data=moneyball_rev1)
summary(model1)
## 
## Call:
## lm(formula = TARGET_WINS ~ ., data = moneyball_rev1)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -51.847  -8.223   0.183   8.309  55.948 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      48.369198   6.140945   7.877 5.18e-15 ***
## TEAM_BATTING_H    0.033212   0.004230   7.852 6.29e-15 ***
## TEAM_BATTING_2B  -0.002898   0.008380  -0.346 0.729503    
## TEAM_BATTING_3B   0.113426   0.016979   6.681 2.99e-11 ***
## TEAM_BATTING_HR   0.039798   0.024142   1.648 0.099399 .  
## TEAM_BATTING_BB   0.019476   0.005551   3.508 0.000460 ***
## TEAM_BATTING_SO  -0.009944   0.003385  -2.937 0.003344 ** 
## TEAM_BASERUN_SB   0.040590   0.006019   6.743 1.96e-11 ***
## TEAM_BASERUN_CS  -0.033590   0.037831  -0.888 0.374680    
## TEAM_PITCHING_H  -0.008647   0.002482  -3.483 0.000504 ***
## TEAM_PITCHING_HR  0.029648   0.021851   1.357 0.174977    
## TEAM_PITCHING_BB  0.006737   0.004971   1.355 0.175483    
## TEAM_PITCHING_SO -0.001993   0.002895  -0.688 0.491366    
## TEAM_FIELDING_E  -0.038773   0.005920  -6.549 7.14e-11 ***
## TEAM_FIELDING_DP -0.106103   0.014641  -7.247 5.83e-13 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 12.74 on 2261 degrees of freedom
## Multiple R-squared:  0.2269, Adjusted R-squared:  0.2221 
## F-statistic: 47.39 on 14 and 2261 DF,  p-value: < 2.2e-16
model2 <- lm(TARGET_WINS ~ TEAM_BATTING_H + TEAM_BATTING_3B + TEAM_BATTING_BB + TEAM_BATTING_SO + TEAM_BASERUN_SB + TEAM_PITCHING_H + TEAM_FIELDING_E + TEAM_FIELDING_DP, data=moneyball_rev2)
summary(model2)
## 
## Call:
## lm(formula = TARGET_WINS ~ TEAM_BATTING_H + TEAM_BATTING_3B + 
##     TEAM_BATTING_BB + TEAM_BATTING_SO + TEAM_BASERUN_SB + TEAM_PITCHING_H + 
##     TEAM_FIELDING_E + TEAM_FIELDING_DP, data = moneyball_rev2)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -51.617  -8.227   0.225   8.421  50.724 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      28.565232   5.289333   5.401 7.34e-08 ***
## TEAM_BATTING_H    0.043769   0.003331  13.141  < 2e-16 ***
## TEAM_BATTING_3B   0.073133   0.016301   4.486 7.60e-06 ***
## TEAM_BATTING_BB   0.032244   0.003329   9.687  < 2e-16 ***
## TEAM_BATTING_SO  -0.002642   0.001711  -1.544  0.12263    
## TEAM_BASERUN_SB   0.030480   0.005763   5.289 1.35e-07 ***
## TEAM_PITCHING_H  -0.007793   0.002376  -3.279  0.00106 ** 
## TEAM_FIELDING_E  -0.045720   0.005653  -8.088 9.75e-16 ***
## TEAM_FIELDING_DP -0.090430   0.014615  -6.188 7.23e-10 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 12.93 on 2267 degrees of freedom
## Multiple R-squared:  0.2016, Adjusted R-squared:  0.1988 
## F-statistic: 71.58 on 8 and 2267 DF,  p-value: < 2.2e-16
model3 <- lm(TARGET_WINS ~ TEAM_BATTING_H + TEAM_BATTING_3B + TEAM_BATTING_BB + + TEAM_BASERUN_SB + TEAM_PITCHING_H + TEAM_FIELDING_E + TEAM_FIELDING_DP, data=moneyball_rev2)
summary(model3)
## 
## Call:
## lm(formula = TARGET_WINS ~ TEAM_BATTING_H + TEAM_BATTING_3B + 
##     TEAM_BATTING_BB + +TEAM_BASERUN_SB + TEAM_PITCHING_H + TEAM_FIELDING_E + 
##     TEAM_FIELDING_DP, data = moneyball_rev2)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -50.940  -8.096   0.033   8.300  50.913 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      24.168812   4.459350   5.420 6.60e-08 ***
## TEAM_BATTING_H    0.044299   0.003314  13.367  < 2e-16 ***
## TEAM_BATTING_3B   0.084627   0.014507   5.834 6.20e-09 ***
## TEAM_BATTING_BB   0.032004   0.003326   9.622  < 2e-16 ***
## TEAM_BASERUN_SB   0.028658   0.005643   5.079 4.11e-07 ***
## TEAM_PITCHING_H  -0.007301   0.002356  -3.099  0.00196 ** 
## TEAM_FIELDING_E  -0.043195   0.005413  -7.980 2.29e-15 ***
## TEAM_FIELDING_DP -0.089217   0.014598  -6.111 1.16e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 12.93 on 2268 degrees of freedom
## Multiple R-squared:  0.2008, Adjusted R-squared:  0.1983 
## F-statistic: 81.41 on 7 and 2268 DF,  p-value: < 2.2e-16
# BoxCox Transformation
boxcox(model1, plotit=TRUE)

boxcox(model1, plotit=TRUE, lambda=seq(1.2,1.5, by=0.05))

moneyball_rev3 = moneyball_rev2
moneyball_rev3$TARGET_WINS = moneyball_rev3$TARGET_WINS ** 1.42

model4 <- lm(TARGET_WINS ~ ., data=moneyball_rev3)
summary(model4)
## 
## Call:
## lm(formula = TARGET_WINS ~ ., data = moneyball_rev3)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -437.25  -75.29   -1.54   72.46  513.47 
## 
## Coefficients: (2 not defined because of singularities)
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      229.89338   54.85226   4.191 2.88e-05 ***
## TEAM_BATTING_H     0.29128    0.03778   7.709 1.88e-14 ***
## TEAM_BATTING_2B   -0.02500    0.07485  -0.334 0.738355    
## TEAM_BATTING_3B    1.03443    0.15166   6.821 1.16e-11 ***
## TEAM_BATTING_HR    0.34311    0.21565   1.591 0.111731    
## TEAM_BATTING_BB    0.17520    0.04959   3.533 0.000419 ***
## TEAM_BATTING_SO   -0.09605    0.03024  -3.176 0.001512 ** 
## TEAM_BASERUN_SB    0.35338    0.05377   6.572 6.13e-11 ***
## TEAM_BASERUN_CS   -0.30543    0.33791  -0.904 0.366155    
## TEAM_PITCHING_H   -0.07550    0.02217  -3.405 0.000673 ***
## TEAM_PITCHING_HR   0.27330    0.19518   1.400 0.161569    
## TEAM_PITCHING_BB   0.06032    0.04440   1.359 0.174414    
## TEAM_PITCHING_SO  -0.01204    0.02586  -0.466 0.641483    
## TEAM_FIELDING_E   -0.33621    0.05288  -6.358 2.47e-10 ***
## TEAM_FIELDING_DP  -0.94631    0.13078  -7.236 6.30e-13 ***
## Pitch_Strength          NA         NA      NA       NA    
## Bat_Strength            NA         NA      NA       NA    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 113.8 on 2261 degrees of freedom
## Multiple R-squared:  0.2252, Adjusted R-squared:  0.2204 
## F-statistic: 46.95 on 14 and 2261 DF,  p-value: < 2.2e-16

IV. Select Model -

#choose model1
par(mfrow=c(2,3))
plot(model1)
hist(resid(model1), main="Histogram of Residuals")
par(mfrow=c(1,1))

moneyball_eval = read.csv("moneyball-evaluation-data.csv", header=TRUE)

moneyball_eval1 <- moneyball_eval[ -c(1,10) ]
head(moneyball_eval1, 10)
##    TEAM_BATTING_H TEAM_BATTING_2B TEAM_BATTING_3B TEAM_BATTING_HR
## 1            1209             170              33              83
## 2            1221             151              29              88
## 3            1395             183              29              93
## 4            1539             309              29             159
## 5            1445             203              68               5
## 6            1431             236              53              10
## 7            1430             219              55              37
## 8            1385             158              42              33
## 9            1259             177              78              23
## 10           1397             212              42              58
##    TEAM_BATTING_BB TEAM_BATTING_SO TEAM_BASERUN_SB TEAM_BASERUN_CS
## 1              447            1080              62              50
## 2              516             929              54              39
## 3              509             816              59              47
## 4              486             914             148              57
## 5               95             416              NA              NA
## 6              215             377              NA              NA
## 7              568             527             365              NA
## 8              356             609             185              NA
## 9              466             689             150              NA
## 10             452             584              52              NA
##    TEAM_PITCHING_H TEAM_PITCHING_HR TEAM_PITCHING_BB TEAM_PITCHING_SO
## 1             1209               83              447             1080
## 2             1221               88              516              929
## 3             1395               93              509              816
## 4             1539              159              486              914
## 5             3902               14              257             1123
## 6             2793               20              420              736
## 7             1544               40              613              569
## 8             1626               39              418              715
## 9             1342               25              497              734
## 10            1489               62              482              622
##    TEAM_FIELDING_E TEAM_FIELDING_DP
## 1              140              156
## 2              135              164
## 3              156              153
## 4              124              154
## 5              616              130
## 6              572              105
## 7              490               NA
## 8              328              104
## 9              226              132
## 10             184              145
describe(moneyball_eval1)
##                  vars   n    mean      sd median trimmed    mad  min   max
## TEAM_BATTING_H      1 259 1469.39  150.66 1455.0 1463.68 114.16  819  2170
## TEAM_BATTING_2B     2 259  241.32   49.52  239.0  242.33  48.93   44   376
## TEAM_BATTING_3B     3 259   55.91   27.14   52.0   52.95  26.69   14   155
## TEAM_BATTING_HR     4 259   95.63   56.33  101.0   93.68  66.72    0   242
## TEAM_BATTING_BB     5 259  498.96  120.59  509.0  505.98  94.89   15   792
## TEAM_BATTING_SO     6 241  709.34  243.11  686.0  715.56 271.32    0  1268
## TEAM_BASERUN_SB     7 246  123.70   93.39   92.0  108.07  60.05    0   580
## TEAM_BASERUN_CS     8 172   52.32   23.10   49.5   50.13  18.53    0   154
## TEAM_PITCHING_H     9 259 1813.46 1662.91 1515.0 1554.25 173.46 1155 22768
## TEAM_PITCHING_HR   10 259  102.15   57.65  104.0  100.08  63.75    0   336
## TEAM_PITCHING_BB   11 259  552.42  172.95  526.0  536.46  97.85  136  2008
## TEAM_PITCHING_SO   12 241  799.67  634.31  745.0  765.41 249.08    0  9963
## TEAM_FIELDING_E    13 259  249.75  230.90  163.0  197.36  59.30   73  1568
## TEAM_FIELDING_DP   14 228  146.06   25.88  148.0  147.24  23.72   69   204
##                  range  skew kurtosis     se
## TEAM_BATTING_H    1351  0.59     3.66   9.36
## TEAM_BATTING_2B    332 -0.33     0.67   3.08
## TEAM_BATTING_3B    141  0.98     0.70   1.69
## TEAM_BATTING_HR    242  0.17    -0.90   3.50
## TEAM_BATTING_BB    777 -0.92     2.53   7.49
## TEAM_BATTING_SO   1268 -0.26    -0.23  15.66
## TEAM_BASERUN_SB    580  1.79     3.85   5.95
## TEAM_BASERUN_CS    154  1.41     3.58   1.76
## TEAM_PITCHING_H  21613  9.28   102.07 103.33
## TEAM_PITCHING_HR   336  0.34    -0.10   3.58
## TEAM_PITCHING_BB  1872  4.11    29.21  10.75
## TEAM_PITCHING_SO  9963 12.48   177.77  40.86
## TEAM_FIELDING_E   1495  3.09    10.87  14.35
## TEAM_FIELDING_DP   135 -0.42     0.16   1.71
for(i in 1:ncol(moneyball_eval1)){
  moneyball_eval1[is.na(moneyball_eval1[,i]), i] <- median(moneyball_eval1[,i], na.rm = TRUE)
  }

describe(moneyball_eval1)
##                  vars   n    mean      sd median trimmed    mad  min   max
## TEAM_BATTING_H      1 259 1469.39  150.66 1455.0 1463.68 114.16  819  2170
## TEAM_BATTING_2B     2 259  241.32   49.52  239.0  242.33  48.93   44   376
## TEAM_BATTING_3B     3 259   55.91   27.14   52.0   52.95  26.69   14   155
## TEAM_BATTING_HR     4 259   95.63   56.33  101.0   93.68  66.72    0   242
## TEAM_BATTING_BB     5 259  498.96  120.59  509.0  505.98  94.89   15   792
## TEAM_BATTING_SO     6 259  707.71  234.55  686.0  712.96 253.52    0  1268
## TEAM_BASERUN_SB     7 259  122.11   91.27   92.0  106.65  57.82    0   580
## TEAM_BASERUN_CS     8 259   51.37   18.86   49.5   49.70   9.64    0   154
## TEAM_PITCHING_H     9 259 1813.46 1662.91 1515.0 1554.25 173.46 1155 22768
## TEAM_PITCHING_HR   10 259  102.15   57.65  104.0  100.08  63.75    0   336
## TEAM_PITCHING_BB   11 259  552.42  172.95  526.0  536.46  97.85  136  2008
## TEAM_PITCHING_SO   12 259  795.87  611.94  745.0  763.71 232.77    0  9963
## TEAM_FIELDING_E    13 259  249.75  230.90  163.0  197.36  59.30   73  1568
## TEAM_FIELDING_DP   14 259  146.29   24.29  148.0  147.49  19.27   69   204
##                  range  skew kurtosis     se
## TEAM_BATTING_H    1351  0.59     3.66   9.36
## TEAM_BATTING_2B    332 -0.33     0.67   3.08
## TEAM_BATTING_3B    141  0.98     0.70   1.69
## TEAM_BATTING_HR    242  0.17    -0.90   3.50
## TEAM_BATTING_BB    777 -0.92     2.53   7.49
## TEAM_BATTING_SO   1268 -0.25    -0.04  14.57
## TEAM_BASERUN_SB    580  1.87     4.27   5.67
## TEAM_BASERUN_CS    154  1.87     7.21   1.17
## TEAM_PITCHING_H  21613  9.28   102.07 103.33
## TEAM_PITCHING_HR   336  0.34    -0.10   3.58
## TEAM_PITCHING_BB  1872  4.11    29.21  10.75
## TEAM_PITCHING_SO  9963 12.95   191.51  38.02
## TEAM_FIELDING_E   1495  3.09    10.87  14.35
## TEAM_FIELDING_DP   135 -0.48     0.61   1.51
predict(model1, newdata = moneyball_eval1, interval="prediction")
##             fit          lwr       upr
## 1     64.763721   39.7158157  89.81163
## 2     68.008045   42.9613982  93.05469
## 3     73.989277   48.9682398  99.01031
## 4     84.383341   59.3547412 109.41194
## 5     31.958117    4.1699025  59.74633
## 6     48.615522   22.6024612  74.62858
## 7     78.040522   52.7114066 103.36964
## 8     71.445084   46.3539437  96.53622
## 9     74.335907   49.3097653  99.36205
## 10    73.118087   48.1015521  98.13462
## 11    69.385248   44.3482068  94.42229
## 12    82.154614   57.0859226 107.22331
## 13    83.699001   58.5938590 108.80414
## 14    82.065583   56.8721395 107.25903
## 15    83.754452   58.6813527 108.82755
## 16    75.534463   50.4882189 100.58071
## 17    74.156664   49.1313912  99.18194
## 18    78.845238   53.8275587 103.86292
## 19    71.053118   45.9798767  96.12636
## 20    87.766366   62.5893488 112.94338
## 21    84.669897   59.6309859 109.70881
## 22    83.386368   58.3476986 108.42504
## 23    84.179009   59.1506690 109.20735
## 24    73.130402   48.0951997  98.16560
## 25    79.545677   54.5048880 104.58647
## 26    84.688019   59.6233406 109.75270
## 27    -1.012063  -32.9250524  30.90093
## 28    73.126813   48.0576427  98.19598
## 29    85.314340   60.2205342 110.40815
## 30    72.885257   47.8120491  97.95846
## 31    92.271439   67.2220277 117.32085
## 32    86.160277   61.1155522 111.20500
## 33    85.632349   60.5869127 110.67778
## 34    87.093212   61.9644070 112.22202
## 35    81.368373   56.3403476 106.39640
## 36    86.363808   61.3249158 111.40270
## 37    78.106863   53.0781278 103.13560
## 38    88.818206   63.7805146 113.85590
## 39    78.475809   53.0906805 103.86094
## 40    89.178591   64.0885859 114.26860
## 41    83.729882   58.6730621 108.78670
## 42    91.460028   66.3966358 116.52342
## 43   -18.434216  -50.3238203  13.45539
## 44   103.113648   77.4885074 128.73879
## 45    88.402614   63.0875823 113.71765
## 46    88.581223   63.2357223 113.92672
## 47    89.965145   64.6212304 115.30906
## 48    75.390335   49.6242577 101.15641
## 49    71.609626   46.5811225  96.63813
## 50    77.679234   51.8436987 103.51477
## 51    75.668991   50.6397922 100.69819
## 52    82.477296   57.4374205 107.51717
## 53    77.557166   52.5408159 102.57352
## 54    75.003735   49.9467729 100.06070
## 55    74.231523   49.1995846  99.26346
## 56    77.598603   52.5813266 102.61588
## 57    92.153459   66.9795080 117.32741
## 58    78.131290   53.0684730 103.19411
## 59    67.509038   42.4197057  92.59837
## 60    79.646008   54.6089351 104.68308
## 61    86.882234   61.8528867 111.91158
## 62    79.934141   54.8305057 105.03778
## 63    86.711384   61.6988128 111.72396
## 64    83.528308   58.4874386 108.56918
## 65    81.246377   56.1707323 106.32202
## 66    94.853352   69.5116072 120.19510
## 67    71.636928   46.5298091  96.74405
## 68    74.810110   49.7231540  99.89707
## 69    79.916511   54.8315310 105.00149
## 70    92.273473   67.1603863 117.38656
## 71    87.348029   62.2712426 112.42482
## 72    71.355695   46.2881880  96.42320
## 73    79.940106   54.8725867 105.00762
## 74    88.943401   63.8208416 114.06596
## 75    76.062088   51.0025247 101.12165
## 76    80.664240   55.6077037 105.72078
## 77    84.254475   59.2053701 109.30358
## 78    82.142197   57.1241820 107.16021
## 79    73.700534   48.6568177  98.74425
## 80    75.697691   50.6730469 100.72233
## 81    84.551650   59.4655721 109.63773
## 82    86.584038   61.5337977 111.63428
## 83    94.447787   69.3712731 119.52430
## 84    75.021043   49.9683077 100.07378
## 85    86.759160   61.7307012 111.78762
## 86    78.638374   53.5447744 103.73197
## 87    82.607833   57.5734077 107.64226
## 88    82.182371   57.0069662 107.35778
## 89    89.723502   64.6403922 114.80661
## 90    90.019371   64.9736212 115.06512
## 91    75.890824   50.6962233 101.08542
## 92     9.660513  -38.9504036  58.27143
## 93    72.047324   46.9498646  97.14478
## 94    80.980015   55.9075801 106.05245
## 95    83.392767   58.3014752 108.48406
## 96    85.093387   60.0200666 110.16671
## 97    90.447051   64.2414741 116.65263
## 98    98.874581   73.5489889 124.20017
## 99    87.920341   62.7950157 113.04567
## 100   89.076792   63.9318634 114.22172
## 101   80.459777   55.4249758 105.49458
## 102   75.860332   50.8287851 100.89188
## 103   82.962078   57.9414929 107.98266
## 104   85.527034   60.4123405 110.64173
## 105   78.017917   52.9871480 103.04869
## 106   49.590854   23.4329662  75.74874
## 107   37.344646   11.2528599  63.43643
## 108   76.712059   51.6609410 101.76318
## 109   88.200754   63.1523392 113.24917
## 110   49.178645   23.6192586  74.73803
## 111   86.706847   61.6173692 111.79633
## 112   86.330726   61.1628274 111.49863
## 113   91.925305   66.8820136 116.96860
## 114   89.181779   64.1554849 114.20807
## 115   79.186483   54.1448602 104.22811
## 116   78.545890   53.4995878 103.59219
## 117   84.818305   59.7680242 109.86859
## 118   81.754495   56.7163982 106.79259
## 119   73.616863   48.5654589  98.66827
## 120   71.425295   45.9913934  96.85920
## 121   91.198985   66.0779389 116.32003
## 122   71.039734   45.9418814  96.13759
## 123   69.906136   44.8576998  94.95457
## 124   70.456030   45.3082752  95.60378
## 125   72.761950   47.7280984  97.79580
## 126   83.449440   58.3783574 108.52052
## 127   87.985341   62.9092128 113.06147
## 128   77.518820   52.4981625 102.53948
## 129   91.932444   66.8969272 116.96796
## 130   88.860283   63.8177586 113.90281
## 131   84.549211   59.5215275 109.57689
## 132   80.991830   55.9534693 106.03019
## 133   77.618438   52.5555845 102.68129
## 134   82.048984   56.9878996 107.11007
## 135   86.395157   61.3623457 111.42797
## 136   46.092452   19.4848267  72.70008
## 137   75.682557   50.5873547 100.77776
## 138   75.765857   50.7404065 100.79131
## 139   85.208963   59.7396255 110.67830
## 140   79.336494   54.1281721 104.54482
## 141   64.685102   39.6091661  89.76104
## 142   74.256616   49.2028612  99.31037
## 143   91.344438   66.2997056 116.38917
## 144   74.674225   49.6390091  99.70944
## 145   76.144382   51.1012760 101.18749
## 146   74.570342   49.5487957  99.59189
## 147   79.244354   54.2164462 104.27226
## 148   80.803287   55.7834928 105.82308
## 149   78.498590   53.4583883 103.53879
## 150   83.427154   58.4133466 108.44096
## 151   81.487749   56.4467771 106.52872
## 152   79.376901   54.3380681 104.41573
## 153 -166.767191 -271.9356287 -61.59875
## 154   67.947050   42.8782682  93.01583
## 155   77.810549   52.7875999 102.83350
## 156   69.571993   44.5083334  94.63565
## 157   90.430106   65.3645857 115.49563
## 158   49.599773   23.8599538  75.33959
## 159   89.009075   63.7975173 114.22063
## 160   73.697205   48.6337475  98.76066
## 161  103.961870   78.8210093 129.10273
## 162  106.973452   81.8110139 132.13589
## 163   95.424718   70.3784852 120.47095
## 164  104.503809   79.4111109 129.59651
## 165  100.217427   75.1374370 125.29742
## 166   93.187668   68.0957822 118.27955
## 167   82.905728   57.8641239 107.94733
## 168   81.766807   56.7301194 106.80349
## 169   72.569927   47.5433217  97.59653
## 170   80.173714   55.1524244 105.19500
## 171   90.389867   65.3123964 115.46734
## 172   90.886668   65.8444049 115.92893
## 173   83.033080   58.0062307 108.05993
## 174   92.355277   67.2832595 117.42729
## 175   83.695365   58.6583091 108.73242
## 176   78.154119   53.1052778 103.20296
## 177   82.143182   57.0929245 107.19344
## 178   71.759363   46.7011287  96.81760
## 179   76.451897   51.3198195 101.58397
## 180   80.650161   55.6298987 105.67042
## 181   88.031769   62.3839152 113.67962
## 182   88.400534   63.3442791 113.45679
## 183   84.657693   59.6452749 109.67011
## 184   86.483869   61.3965032 111.57124
## 185    3.225685  -44.9967744  51.44814
## 186   85.857741   60.0191903 111.69629
## 187   81.081181   55.6271166 106.53524
## 188   28.550084    1.3655171  55.73465
## 189   45.665403   19.9656513  71.36515
## 190  100.397808   74.9081129 125.88750
## 191   69.178553   44.0547680  94.30234
## 192   80.463527   55.4311379 105.49592
## 193   72.088164   46.8798051  97.29652
## 194   74.204476   49.1711567  99.23780
## 195   75.270877   50.2056305 100.33612
## 196   68.085300   43.0368777  93.13372
## 197   77.120270   52.1039177 102.13662
## 198   89.455207   64.3793227 114.53109
## 199   83.424354   58.3567167 108.49199
## 200   85.250634   60.2189637 110.28230
## 201   76.178451   51.1228717 101.23403
## 202   79.657481   54.6375511 104.67741
## 203   76.393645   51.3165320 101.47076
## 204   84.006288   58.6496297 109.36295
## 205   79.733269   54.6486215 104.81792
## 206   85.698008   60.5898696 110.80615
## 207   77.963276   52.8769942 103.04956
## 208   78.548438   53.5155268 103.58135
## 209   77.089161   51.8555313 102.32279
## 210   68.771078   43.4354874  94.10667
## 211  100.868109   75.5829643 126.15325
## 212   87.688923   62.5647432 112.81310
## 213   86.638681   61.5878761 111.68949
## 214   67.528714   42.4922812  92.56515
## 215   72.457045   47.3605685  97.55352
## 216   82.537735   57.4695590 107.60591
## 217   78.819055   53.7436370 103.89447
## 218   89.648273   64.1158916 115.18065
## 219   76.142560   51.1141559 101.17096
## 220   80.885861   55.8556752 105.91605
## 221   75.738831   50.6820661 100.79560
## 222   73.129949   48.0890067  98.17089
## 223   81.181433   56.1543287 106.20854
## 224   72.371137   47.3160933  97.42618
## 225   10.978747  -24.2879213  46.24542
## 226   77.903733   52.8812243 102.92624
## 227   79.173536   54.1233013 104.22377
## 228   80.723945   55.6964505 105.75144
## 229   83.773563   58.7605319 108.78659
## 230   51.961548   26.0246530  77.89844
## 231   82.013360   56.9282605 107.09846
## 232   89.506309   63.9322474 115.08037
## 233   81.917893   56.8747281 106.96106
## 234   86.419527   61.3815181 111.45754
## 235   81.038545   56.0179689 106.05912
## 236   76.788052   51.7646692 101.81143
## 237   79.187518   54.0817180 104.29332
## 238   78.485255   53.4586733 103.51184
## 239   87.616938   62.3242686 112.90961
## 240   71.987017   46.9491795  97.02485
## 241   88.072829   63.0594144 113.08624
## 242   86.739320   61.7170327 111.76161
## 243   82.154481   57.1280465 107.18091
## 244   80.167403   55.1369972 105.19781
## 245   63.696362   38.5576347  88.83509
## 246   83.136024   58.0906354 108.18141
## 247   78.157533   53.1398510 103.17521
## 248   83.523533   58.4704942 108.57657
## 249   73.548253   48.5192163  98.57729
## 250   82.200518   57.1452561 107.25578
## 251   81.881196   56.7694721 106.99292
## 252   26.835261   -0.7139863  54.38451
## 253   88.739619   63.4291436 114.05009
## 254  -39.616446 -100.9910062  21.75812
## 255   69.916054   44.8375069  94.99460
## 256   79.059002   53.9686138 104.14939
## 257   82.338469   57.2816846 107.39525
## 258   84.649267   59.6044201 109.69411
## 259   76.716970   51.6559752 101.77796