I. Data Exploration
library(ggplot2)
library(reshape)
library(reshape2)
library(corrplot)
#library(Hmisc)
library(psych)
#library(PerformanceAnalytics)
library(MASS)
require(readr)
#get moneyball training dataset from gihub library
moneyball = read.csv("moneyball-training-data.csv", header=TRUE)
#sampler of rows and columns contained in dataset
head(moneyball, 10)
## INDEX TARGET_WINS TEAM_BATTING_H TEAM_BATTING_2B TEAM_BATTING_3B
## 1 1 39 1445 194 39
## 2 2 70 1339 219 22
## 3 3 86 1377 232 35
## 4 4 70 1387 209 38
## 5 5 82 1297 186 27
## 6 6 75 1279 200 36
## 7 7 80 1244 179 54
## 8 8 85 1273 171 37
## 9 11 86 1391 197 40
## 10 12 76 1271 213 18
## TEAM_BATTING_HR TEAM_BATTING_BB TEAM_BATTING_SO TEAM_BASERUN_SB
## 1 13 143 842 NA
## 2 190 685 1075 37
## 3 137 602 917 46
## 4 96 451 922 43
## 5 102 472 920 49
## 6 92 443 973 107
## 7 122 525 1062 80
## 8 115 456 1027 40
## 9 114 447 922 69
## 10 96 441 827 72
## TEAM_BASERUN_CS TEAM_BATTING_HBP TEAM_PITCHING_H TEAM_PITCHING_HR
## 1 NA NA 9364 84
## 2 28 NA 1347 191
## 3 27 NA 1377 137
## 4 30 NA 1396 97
## 5 39 NA 1297 102
## 6 59 NA 1279 92
## 7 54 NA 1244 122
## 8 36 NA 1281 116
## 9 27 NA 1391 114
## 10 34 NA 1271 96
## TEAM_PITCHING_BB TEAM_PITCHING_SO TEAM_FIELDING_E TEAM_FIELDING_DP
## 1 927 5456 1011 NA
## 2 689 1082 193 155
## 3 602 917 175 153
## 4 454 928 164 156
## 5 472 920 138 168
## 6 443 973 123 149
## 7 525 1062 136 186
## 8 459 1033 112 136
## 9 447 922 127 169
## 10 441 827 131 159
#number of rows and columns in moneyball
NROW(moneyball)
## [1] 2276
NCOL(moneyball)
## [1] 17
#name of column in the moneyball dataframe
names(moneyball)
## [1] "INDEX" "TARGET_WINS" "TEAM_BATTING_H"
## [4] "TEAM_BATTING_2B" "TEAM_BATTING_3B" "TEAM_BATTING_HR"
## [7] "TEAM_BATTING_BB" "TEAM_BATTING_SO" "TEAM_BASERUN_SB"
## [10] "TEAM_BASERUN_CS" "TEAM_BATTING_HBP" "TEAM_PITCHING_H"
## [13] "TEAM_PITCHING_HR" "TEAM_PITCHING_BB" "TEAM_PITCHING_SO"
## [16] "TEAM_FIELDING_E" "TEAM_FIELDING_DP"
#number of unknown values for each of the columns in the moneyball dataframe
na_count <-sapply(moneyball, function(y) sum(length(which(is.na(y)))))
na_count <- data.frame(na_count)
na_count
## na_count
## INDEX 0
## TARGET_WINS 0
## TEAM_BATTING_H 0
## TEAM_BATTING_2B 0
## TEAM_BATTING_3B 0
## TEAM_BATTING_HR 0
## TEAM_BATTING_BB 0
## TEAM_BATTING_SO 102
## TEAM_BASERUN_SB 131
## TEAM_BASERUN_CS 772
## TEAM_BATTING_HBP 2085
## TEAM_PITCHING_H 0
## TEAM_PITCHING_HR 0
## TEAM_PITCHING_BB 0
## TEAM_PITCHING_SO 102
## TEAM_FIELDING_E 0
## TEAM_FIELDING_DP 286
summary(moneyball)
## INDEX TARGET_WINS TEAM_BATTING_H TEAM_BATTING_2B
## Min. : 1.0 Min. : 0.00 Min. : 891 Min. : 69.0
## 1st Qu.: 630.8 1st Qu.: 71.00 1st Qu.:1383 1st Qu.:208.0
## Median :1270.5 Median : 82.00 Median :1454 Median :238.0
## Mean :1268.5 Mean : 80.79 Mean :1469 Mean :241.2
## 3rd Qu.:1915.5 3rd Qu.: 92.00 3rd Qu.:1537 3rd Qu.:273.0
## Max. :2535.0 Max. :146.00 Max. :2554 Max. :458.0
##
## TEAM_BATTING_3B TEAM_BATTING_HR TEAM_BATTING_BB TEAM_BATTING_SO
## Min. : 0.00 Min. : 0.00 Min. : 0.0 Min. : 0.0
## 1st Qu.: 34.00 1st Qu.: 42.00 1st Qu.:451.0 1st Qu.: 548.0
## Median : 47.00 Median :102.00 Median :512.0 Median : 750.0
## Mean : 55.25 Mean : 99.61 Mean :501.6 Mean : 735.6
## 3rd Qu.: 72.00 3rd Qu.:147.00 3rd Qu.:580.0 3rd Qu.: 930.0
## Max. :223.00 Max. :264.00 Max. :878.0 Max. :1399.0
## NA's :102
## TEAM_BASERUN_SB TEAM_BASERUN_CS TEAM_BATTING_HBP TEAM_PITCHING_H
## Min. : 0.0 Min. : 0.0 Min. :29.00 Min. : 1137
## 1st Qu.: 66.0 1st Qu.: 38.0 1st Qu.:50.50 1st Qu.: 1419
## Median :101.0 Median : 49.0 Median :58.00 Median : 1518
## Mean :124.8 Mean : 52.8 Mean :59.36 Mean : 1779
## 3rd Qu.:156.0 3rd Qu.: 62.0 3rd Qu.:67.00 3rd Qu.: 1682
## Max. :697.0 Max. :201.0 Max. :95.00 Max. :30132
## NA's :131 NA's :772 NA's :2085
## TEAM_PITCHING_HR TEAM_PITCHING_BB TEAM_PITCHING_SO TEAM_FIELDING_E
## Min. : 0.0 Min. : 0.0 Min. : 0.0 Min. : 65.0
## 1st Qu.: 50.0 1st Qu.: 476.0 1st Qu.: 615.0 1st Qu.: 127.0
## Median :107.0 Median : 536.5 Median : 813.5 Median : 159.0
## Mean :105.7 Mean : 553.0 Mean : 817.7 Mean : 246.5
## 3rd Qu.:150.0 3rd Qu.: 611.0 3rd Qu.: 968.0 3rd Qu.: 249.2
## Max. :343.0 Max. :3645.0 Max. :19278.0 Max. :1898.0
## NA's :102
## TEAM_FIELDING_DP
## Min. : 52.0
## 1st Qu.:131.0
## Median :149.0
## Mean :146.4
## 3rd Qu.:164.0
## Max. :228.0
## NA's :286
describe(moneyball)
## vars n mean sd median trimmed mad min
## INDEX 1 2276 1268.46 736.35 1270.5 1268.57 952.57 1
## TARGET_WINS 2 2276 80.79 15.75 82.0 81.31 14.83 0
## TEAM_BATTING_H 3 2276 1469.27 144.59 1454.0 1459.04 114.16 891
## TEAM_BATTING_2B 4 2276 241.25 46.80 238.0 240.40 47.44 69
## TEAM_BATTING_3B 5 2276 55.25 27.94 47.0 52.18 23.72 0
## TEAM_BATTING_HR 6 2276 99.61 60.55 102.0 97.39 78.58 0
## TEAM_BATTING_BB 7 2276 501.56 122.67 512.0 512.18 94.89 0
## TEAM_BATTING_SO 8 2174 735.61 248.53 750.0 742.31 284.66 0
## TEAM_BASERUN_SB 9 2145 124.76 87.79 101.0 110.81 60.79 0
## TEAM_BASERUN_CS 10 1504 52.80 22.96 49.0 50.36 17.79 0
## TEAM_BATTING_HBP 11 191 59.36 12.97 58.0 58.86 11.86 29
## TEAM_PITCHING_H 12 2276 1779.21 1406.84 1518.0 1555.90 174.95 1137
## TEAM_PITCHING_HR 13 2276 105.70 61.30 107.0 103.16 74.13 0
## TEAM_PITCHING_BB 14 2276 553.01 166.36 536.5 542.62 98.59 0
## TEAM_PITCHING_SO 15 2174 817.73 553.09 813.5 796.93 257.23 0
## TEAM_FIELDING_E 16 2276 246.48 227.77 159.0 193.44 62.27 65
## TEAM_FIELDING_DP 17 1990 146.39 26.23 149.0 147.58 23.72 52
## max range skew kurtosis se
## INDEX 2535 2534 0.00 -1.22 15.43
## TARGET_WINS 146 146 -0.40 1.03 0.33
## TEAM_BATTING_H 2554 1663 1.57 7.28 3.03
## TEAM_BATTING_2B 458 389 0.22 0.01 0.98
## TEAM_BATTING_3B 223 223 1.11 1.50 0.59
## TEAM_BATTING_HR 264 264 0.19 -0.96 1.27
## TEAM_BATTING_BB 878 878 -1.03 2.18 2.57
## TEAM_BATTING_SO 1399 1399 -0.30 -0.32 5.33
## TEAM_BASERUN_SB 697 697 1.97 5.49 1.90
## TEAM_BASERUN_CS 201 201 1.98 7.62 0.59
## TEAM_BATTING_HBP 95 66 0.32 -0.11 0.94
## TEAM_PITCHING_H 30132 28995 10.33 141.84 29.49
## TEAM_PITCHING_HR 343 343 0.29 -0.60 1.28
## TEAM_PITCHING_BB 3645 3645 6.74 96.97 3.49
## TEAM_PITCHING_SO 19278 19278 22.17 671.19 11.86
## TEAM_FIELDING_E 1898 1833 2.99 10.97 4.77
## TEAM_FIELDING_DP 228 176 -0.39 0.18 0.59
col_class <-sapply(moneyball, class)
col_class
## INDEX TARGET_WINS TEAM_BATTING_H TEAM_BATTING_2B
## "integer" "integer" "integer" "integer"
## TEAM_BATTING_3B TEAM_BATTING_HR TEAM_BATTING_BB TEAM_BATTING_SO
## "integer" "integer" "integer" "integer"
## TEAM_BASERUN_SB TEAM_BASERUN_CS TEAM_BATTING_HBP TEAM_PITCHING_H
## "integer" "integer" "integer" "integer"
## TEAM_PITCHING_HR TEAM_PITCHING_BB TEAM_PITCHING_SO TEAM_FIELDING_E
## "integer" "integer" "integer" "integer"
## TEAM_FIELDING_DP
## "integer"
require(reshape)
meltMoneyball <- melt(moneyball)
require(ggplot2)
bp = ggplot(meltMoneyball, aes(variable, value)) + geom_boxplot() + facet_wrap(~variable, scale="free")
bp

M<-cor(moneyball)
head(round(M,2))
## INDEX TARGET_WINS TEAM_BATTING_H TEAM_BATTING_2B
## INDEX 1.00 -0.02 -0.02 0.01
## TARGET_WINS -0.02 1.00 0.39 0.29
## TEAM_BATTING_H -0.02 0.39 1.00 0.56
## TEAM_BATTING_2B 0.01 0.29 0.56 1.00
## TEAM_BATTING_3B -0.01 0.14 0.43 -0.11
## TEAM_BATTING_HR 0.05 0.18 -0.01 0.44
## TEAM_BATTING_3B TEAM_BATTING_HR TEAM_BATTING_BB
## INDEX -0.01 0.05 -0.03
## TARGET_WINS 0.14 0.18 0.23
## TEAM_BATTING_H 0.43 -0.01 -0.07
## TEAM_BATTING_2B -0.11 0.44 0.26
## TEAM_BATTING_3B 1.00 -0.64 -0.29
## TEAM_BATTING_HR -0.64 1.00 0.51
## TEAM_BATTING_SO TEAM_BASERUN_SB TEAM_BASERUN_CS
## INDEX NA NA NA
## TARGET_WINS NA NA NA
## TEAM_BATTING_H NA NA NA
## TEAM_BATTING_2B NA NA NA
## TEAM_BATTING_3B NA NA NA
## TEAM_BATTING_HR NA NA NA
## TEAM_BATTING_HBP TEAM_PITCHING_H TEAM_PITCHING_HR
## INDEX NA 0.02 0.05
## TARGET_WINS NA -0.11 0.19
## TEAM_BATTING_H NA 0.30 0.07
## TEAM_BATTING_2B NA 0.02 0.45
## TEAM_BATTING_3B NA 0.19 -0.57
## TEAM_BATTING_HR NA -0.25 0.97
## TEAM_PITCHING_BB TEAM_PITCHING_SO TEAM_FIELDING_E
## INDEX -0.02 NA -0.01
## TARGET_WINS 0.12 NA -0.18
## TEAM_BATTING_H 0.09 NA 0.26
## TEAM_BATTING_2B 0.18 NA -0.24
## TEAM_BATTING_3B 0.00 NA 0.51
## TEAM_BATTING_HR 0.14 NA -0.59
## TEAM_FIELDING_DP
## INDEX NA
## TARGET_WINS NA
## TEAM_BATTING_H NA
## TEAM_BATTING_2B NA
## TEAM_BATTING_3B NA
## TEAM_BATTING_HR NA
corrplot(M, method="circle")

knitr::opts_chunk$set(echo = TRUE)
II. Data Preparation
moneyball_rev1 <- moneyball[ -c(1,11) ]
head(moneyball_rev1, 10)
## TARGET_WINS TEAM_BATTING_H TEAM_BATTING_2B TEAM_BATTING_3B
## 1 39 1445 194 39
## 2 70 1339 219 22
## 3 86 1377 232 35
## 4 70 1387 209 38
## 5 82 1297 186 27
## 6 75 1279 200 36
## 7 80 1244 179 54
## 8 85 1273 171 37
## 9 86 1391 197 40
## 10 76 1271 213 18
## TEAM_BATTING_HR TEAM_BATTING_BB TEAM_BATTING_SO TEAM_BASERUN_SB
## 1 13 143 842 NA
## 2 190 685 1075 37
## 3 137 602 917 46
## 4 96 451 922 43
## 5 102 472 920 49
## 6 92 443 973 107
## 7 122 525 1062 80
## 8 115 456 1027 40
## 9 114 447 922 69
## 10 96 441 827 72
## TEAM_BASERUN_CS TEAM_PITCHING_H TEAM_PITCHING_HR TEAM_PITCHING_BB
## 1 NA 9364 84 927
## 2 28 1347 191 689
## 3 27 1377 137 602
## 4 30 1396 97 454
## 5 39 1297 102 472
## 6 59 1279 92 443
## 7 54 1244 122 525
## 8 36 1281 116 459
## 9 27 1391 114 447
## 10 34 1271 96 441
## TEAM_PITCHING_SO TEAM_FIELDING_E TEAM_FIELDING_DP
## 1 5456 1011 NA
## 2 1082 193 155
## 3 917 175 153
## 4 928 164 156
## 5 920 138 168
## 6 973 123 149
## 7 1062 136 186
## 8 1033 112 136
## 9 922 127 169
## 10 827 131 159
describe(moneyball_rev1)
## vars n mean sd median trimmed mad min
## TARGET_WINS 1 2276 80.79 15.75 82.0 81.31 14.83 0
## TEAM_BATTING_H 2 2276 1469.27 144.59 1454.0 1459.04 114.16 891
## TEAM_BATTING_2B 3 2276 241.25 46.80 238.0 240.40 47.44 69
## TEAM_BATTING_3B 4 2276 55.25 27.94 47.0 52.18 23.72 0
## TEAM_BATTING_HR 5 2276 99.61 60.55 102.0 97.39 78.58 0
## TEAM_BATTING_BB 6 2276 501.56 122.67 512.0 512.18 94.89 0
## TEAM_BATTING_SO 7 2174 735.61 248.53 750.0 742.31 284.66 0
## TEAM_BASERUN_SB 8 2145 124.76 87.79 101.0 110.81 60.79 0
## TEAM_BASERUN_CS 9 1504 52.80 22.96 49.0 50.36 17.79 0
## TEAM_PITCHING_H 10 2276 1779.21 1406.84 1518.0 1555.90 174.95 1137
## TEAM_PITCHING_HR 11 2276 105.70 61.30 107.0 103.16 74.13 0
## TEAM_PITCHING_BB 12 2276 553.01 166.36 536.5 542.62 98.59 0
## TEAM_PITCHING_SO 13 2174 817.73 553.09 813.5 796.93 257.23 0
## TEAM_FIELDING_E 14 2276 246.48 227.77 159.0 193.44 62.27 65
## TEAM_FIELDING_DP 15 1990 146.39 26.23 149.0 147.58 23.72 52
## max range skew kurtosis se
## TARGET_WINS 146 146 -0.40 1.03 0.33
## TEAM_BATTING_H 2554 1663 1.57 7.28 3.03
## TEAM_BATTING_2B 458 389 0.22 0.01 0.98
## TEAM_BATTING_3B 223 223 1.11 1.50 0.59
## TEAM_BATTING_HR 264 264 0.19 -0.96 1.27
## TEAM_BATTING_BB 878 878 -1.03 2.18 2.57
## TEAM_BATTING_SO 1399 1399 -0.30 -0.32 5.33
## TEAM_BASERUN_SB 697 697 1.97 5.49 1.90
## TEAM_BASERUN_CS 201 201 1.98 7.62 0.59
## TEAM_PITCHING_H 30132 28995 10.33 141.84 29.49
## TEAM_PITCHING_HR 343 343 0.29 -0.60 1.28
## TEAM_PITCHING_BB 3645 3645 6.74 96.97 3.49
## TEAM_PITCHING_SO 19278 19278 22.17 671.19 11.86
## TEAM_FIELDING_E 1898 1833 2.99 10.97 4.77
## TEAM_FIELDING_DP 228 176 -0.39 0.18 0.59
for(i in 1:ncol(moneyball_rev1)){
moneyball_rev1[is.na(moneyball_rev1[,i]), i] <- median(moneyball_rev1[,i], na.rm = TRUE)
}
describe(moneyball_rev1)
## vars n mean sd median trimmed mad min
## TARGET_WINS 1 2276 80.79 15.75 82.0 81.31 14.83 0
## TEAM_BATTING_H 2 2276 1469.27 144.59 1454.0 1459.04 114.16 891
## TEAM_BATTING_2B 3 2276 241.25 46.80 238.0 240.40 47.44 69
## TEAM_BATTING_3B 4 2276 55.25 27.94 47.0 52.18 23.72 0
## TEAM_BATTING_HR 5 2276 99.61 60.55 102.0 97.39 78.58 0
## TEAM_BATTING_BB 6 2276 501.56 122.67 512.0 512.18 94.89 0
## TEAM_BATTING_SO 7 2276 736.25 242.91 750.0 742.82 272.80 0
## TEAM_BASERUN_SB 8 2276 123.39 85.41 101.0 109.73 57.82 0
## TEAM_BASERUN_CS 9 2276 51.51 18.75 49.0 49.58 7.41 0
## TEAM_PITCHING_H 10 2276 1779.21 1406.84 1518.0 1555.90 174.95 1137
## TEAM_PITCHING_HR 11 2276 105.70 61.30 107.0 103.16 74.13 0
## TEAM_PITCHING_BB 12 2276 553.01 166.36 536.5 542.62 98.59 0
## TEAM_PITCHING_SO 13 2276 817.54 540.54 813.5 797.90 245.37 0
## TEAM_FIELDING_E 14 2276 246.48 227.77 159.0 193.44 62.27 65
## TEAM_FIELDING_DP 15 2276 146.72 24.54 149.0 147.91 19.27 52
## max range skew kurtosis se
## TARGET_WINS 146 146 -0.40 1.03 0.33
## TEAM_BATTING_H 2554 1663 1.57 7.28 3.03
## TEAM_BATTING_2B 458 389 0.22 0.01 0.98
## TEAM_BATTING_3B 223 223 1.11 1.50 0.59
## TEAM_BATTING_HR 264 264 0.19 -0.96 1.27
## TEAM_BATTING_BB 878 878 -1.03 2.18 2.57
## TEAM_BATTING_SO 1399 1399 -0.31 -0.19 5.09
## TEAM_BASERUN_SB 697 697 2.07 6.06 1.79
## TEAM_BASERUN_CS 201 201 2.60 13.47 0.39
## TEAM_PITCHING_H 30132 28995 10.33 141.84 29.49
## TEAM_PITCHING_HR 343 343 0.29 -0.60 1.28
## TEAM_PITCHING_BB 3645 3645 6.74 96.97 3.49
## TEAM_PITCHING_SO 19278 19278 22.69 702.88 11.33
## TEAM_FIELDING_E 1898 1833 2.99 10.97 4.77
## TEAM_FIELDING_DP 228 176 -0.46 0.65 0.51
moneyball_rev1m1 <- melt(moneyball_rev1)
## No id variables; using all as measure variables
ggplot(moneyball_rev1m1, aes(value)) + geom_histogram() + facet_wrap(~variable, scale='free')
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

outlier <- function(x) {
x[x < quantile(x,0.25) - 1.5 * IQR(x) | x > quantile(x,0.75) + 1.5 * IQR(x)] <- median(x)
x
}
moneyball_rev1[] <- lapply(moneyball_rev1, outlier)
describe(moneyball_rev1)
## vars n mean sd median trimmed mad min max
## TARGET_WINS 1 2276 81.23 14.45 82.0 81.53 14.83 40 123
## TEAM_BATTING_H 2 2276 1457.19 112.01 1454.0 1454.15 105.26 1156 1768
## TEAM_BATTING_2B 3 2276 240.42 45.24 238.0 239.93 46.70 112 367
## TEAM_BATTING_3B 4 2276 53.91 25.55 47.0 51.41 23.72 0 129
## TEAM_BATTING_HR 5 2276 99.61 60.55 102.0 97.39 78.58 0 264
## TEAM_BATTING_BB 6 2276 518.53 88.72 512.0 519.54 80.06 258 770
## TEAM_BATTING_SO 7 2276 742.84 232.80 750.0 746.34 266.87 66 1399
## TEAM_BASERUN_SB 8 2276 107.22 54.19 101.0 101.82 50.41 0 277
## TEAM_BASERUN_CS 9 2276 48.53 7.40 49.0 48.53 0.00 29 69
## TEAM_PITCHING_H 10 2276 1530.35 162.52 1518.0 1513.96 137.88 1137 2074
## TEAM_PITCHING_HR 11 2276 105.32 60.63 107.0 102.98 74.13 0 297
## TEAM_PITCHING_BB 12 2276 542.31 96.28 536.5 539.62 89.70 277 810
## TEAM_PITCHING_SO 13 2276 798.73 218.81 813.5 797.59 237.96 181 1436
## TEAM_FIELDING_E 14 2276 169.51 65.65 159.0 158.82 45.96 65 432
## TEAM_FIELDING_DP 15 2276 148.28 20.76 149.0 148.92 17.79 94 202
## range skew kurtosis se
## TARGET_WINS 83 -0.17 -0.17 0.30
## TEAM_BATTING_H 612 0.22 -0.09 2.35
## TEAM_BATTING_2B 255 0.10 -0.39 0.95
## TEAM_BATTING_3B 129 0.78 -0.22 0.54
## TEAM_BATTING_HR 264 0.19 -0.96 1.27
## TEAM_BATTING_BB 512 -0.10 0.22 1.86
## TEAM_BATTING_SO 1333 -0.16 -0.54 4.88
## TEAM_BASERUN_SB 277 0.86 0.33 1.14
## TEAM_BASERUN_CS 40 -0.01 1.13 0.16
## TEAM_PITCHING_H 937 1.01 1.15 3.41
## TEAM_PITCHING_HR 297 0.24 -0.78 1.27
## TEAM_PITCHING_BB 533 0.26 0.03 2.02
## TEAM_PITCHING_SO 1255 0.06 -0.39 4.59
## TEAM_FIELDING_E 367 1.64 2.66 1.38
## TEAM_FIELDING_DP 108 -0.27 0.17 0.44
moneyball_rev1m1 <- melt(moneyball_rev1)
## No id variables; using all as measure variables
ggplot(moneyball_rev1m1, aes(value), Color='red') + geom_histogram() + facet_wrap(~variable, scale='free')
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Wins <- cut(moneyball_rev1[,1], 4, labels=c("Winners", "Contenders", "Strugglers", "Losers"))
head(cbind(moneyball_rev1, Wins),5)
## TARGET_WINS TEAM_BATTING_H TEAM_BATTING_2B TEAM_BATTING_3B
## 1 82 1445 194 39
## 2 70 1339 219 22
## 3 86 1377 232 35
## 4 70 1387 209 38
## 5 82 1297 186 27
## TEAM_BATTING_HR TEAM_BATTING_BB TEAM_BATTING_SO TEAM_BASERUN_SB
## 1 13 512 842 101
## 2 190 685 1075 37
## 3 137 602 917 46
## 4 96 451 922 43
## 5 102 472 920 49
## TEAM_BASERUN_CS TEAM_PITCHING_H TEAM_PITCHING_HR TEAM_PITCHING_BB
## 1 49 1518 84 536.5
## 2 49 1347 191 689.0
## 3 49 1377 137 602.0
## 4 30 1396 97 454.0
## 5 39 1297 102 472.0
## TEAM_PITCHING_SO TEAM_FIELDING_E TEAM_FIELDING_DP Wins
## 1 813.5 159 149 Strugglers
## 2 1082.0 193 155 Contenders
## 3 917.0 175 153 Strugglers
## 4 928.0 164 156 Contenders
## 5 920.0 138 168 Strugglers
Hits <- cut(moneyball_rev1[,2], 4, labels=c("Heavy", "Above Average", "Average", "Light"))
head(cbind(moneyball_rev1, Hits),5)
## TARGET_WINS TEAM_BATTING_H TEAM_BATTING_2B TEAM_BATTING_3B
## 1 82 1445 194 39
## 2 70 1339 219 22
## 3 86 1377 232 35
## 4 70 1387 209 38
## 5 82 1297 186 27
## TEAM_BATTING_HR TEAM_BATTING_BB TEAM_BATTING_SO TEAM_BASERUN_SB
## 1 13 512 842 101
## 2 190 685 1075 37
## 3 137 602 917 46
## 4 96 451 922 43
## 5 102 472 920 49
## TEAM_BASERUN_CS TEAM_PITCHING_H TEAM_PITCHING_HR TEAM_PITCHING_BB
## 1 49 1518 84 536.5
## 2 49 1347 191 689.0
## 3 49 1377 137 602.0
## 4 30 1396 97 454.0
## 5 39 1297 102 472.0
## TEAM_PITCHING_SO TEAM_FIELDING_E TEAM_FIELDING_DP Hits
## 1 813.5 159 149 Above Average
## 2 1082.0 193 155 Above Average
## 3 917.0 175 153 Above Average
## 4 928.0 164 156 Above Average
## 5 920.0 138 168 Heavy
attach(moneyball_rev1)
Pitch_Strength = TEAM_PITCHING_SO - TEAM_PITCHING_H - TEAM_PITCHING_BB
Bat_Strength = (TEAM_BATTING_H - TEAM_BATTING_2B - TEAM_BATTING_3B - TEAM_BATTING_HR) + 2 * TEAM_BATTING_2B + 3 * TEAM_BATTING_3B + 4 * TEAM_BATTING_HR + TEAM_BATTING_BB - TEAM_BATTING_SO
moneyball_rev2 = (cbind(moneyball_rev1, Pitch_Strength, Bat_Strength))
head(moneyball_rev2, 10)
## TARGET_WINS TEAM_BATTING_H TEAM_BATTING_2B TEAM_BATTING_3B
## 1 82 1445 194 39
## 2 70 1339 219 22
## 3 86 1377 232 35
## 4 70 1387 209 38
## 5 82 1297 186 27
## 6 75 1279 200 36
## 7 80 1244 179 54
## 8 85 1273 171 37
## 9 86 1391 197 40
## 10 76 1271 213 18
## TEAM_BATTING_HR TEAM_BATTING_BB TEAM_BATTING_SO TEAM_BASERUN_SB
## 1 13 512 842 101
## 2 190 685 1075 37
## 3 137 602 917 46
## 4 96 451 922 43
## 5 102 472 920 49
## 6 92 443 973 107
## 7 122 525 1062 80
## 8 115 456 1027 40
## 9 114 447 922 69
## 10 96 441 827 72
## TEAM_BASERUN_CS TEAM_PITCHING_H TEAM_PITCHING_HR TEAM_PITCHING_BB
## 1 49 1518 84 536.5
## 2 49 1347 191 689.0
## 3 49 1377 137 602.0
## 4 30 1396 97 454.0
## 5 39 1297 102 472.0
## 6 59 1279 92 443.0
## 7 54 1244 122 525.0
## 8 36 1281 116 459.0
## 9 49 1391 114 447.0
## 10 34 1271 96 441.0
## TEAM_PITCHING_SO TEAM_FIELDING_E TEAM_FIELDING_DP Pitch_Strength
## 1 813.5 159 149 -1241
## 2 1082.0 193 155 -954
## 3 917.0 175 153 -1062
## 4 928.0 164 156 -922
## 5 920.0 138 168 -849
## 6 973.0 123 149 -749
## 7 1062.0 136 186 -707
## 8 1033.0 112 136 -707
## 9 922.0 127 169 -916
## 10 827.0 131 159 -885
## Bat_Strength
## 1 1426
## 2 1782
## 3 1775
## 4 1489
## 5 1395
## 6 1297
## 7 1360
## 8 1292
## 9 1535
## 10 1422
detach(moneyball_rev1)
ggplot(moneyball_rev1, aes(x=TEAM_BATTING_H, y=Wins)) + geom_jitter() + labs(x="Hits (1B, 2B, 3B, HR)", y="Number of Wins") + geom_smooth(method="lm")

ggplot(moneyball_rev1, aes(x=Pitch_Strength, y=Wins)) + geom_jitter() + labs(x="Pitching Strength", y="Number of Wins") + geom_smooth(method="lm")

ggplot(moneyball_rev1, aes(x=Bat_Strength, y=Wins)) + geom_jitter() + labs(x="Batting Strength", y="Number of Wins") + geom_smooth(method="lm")

lm_samp <- lm(TARGET_WINS ~ TEAM_BATTING_H, data=moneyball_rev1)
par(mfrow=c(2,2))
plot(lm_samp)

par(mfrow=c(1,1))
M<-cor(moneyball_rev1)
corrplot(M, method="circle")

III. Build Models -
names(moneyball_rev2)
## [1] "TARGET_WINS" "TEAM_BATTING_H" "TEAM_BATTING_2B"
## [4] "TEAM_BATTING_3B" "TEAM_BATTING_HR" "TEAM_BATTING_BB"
## [7] "TEAM_BATTING_SO" "TEAM_BASERUN_SB" "TEAM_BASERUN_CS"
## [10] "TEAM_PITCHING_H" "TEAM_PITCHING_HR" "TEAM_PITCHING_BB"
## [13] "TEAM_PITCHING_SO" "TEAM_FIELDING_E" "TEAM_FIELDING_DP"
## [16] "Pitch_Strength" "Bat_Strength"
model1 <- lm(TARGET_WINS ~ ., data=moneyball_rev1)
summary(model1)
##
## Call:
## lm(formula = TARGET_WINS ~ ., data = moneyball_rev1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -51.847 -8.223 0.183 8.309 55.948
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 48.369198 6.140945 7.877 5.18e-15 ***
## TEAM_BATTING_H 0.033212 0.004230 7.852 6.29e-15 ***
## TEAM_BATTING_2B -0.002898 0.008380 -0.346 0.729503
## TEAM_BATTING_3B 0.113426 0.016979 6.681 2.99e-11 ***
## TEAM_BATTING_HR 0.039798 0.024142 1.648 0.099399 .
## TEAM_BATTING_BB 0.019476 0.005551 3.508 0.000460 ***
## TEAM_BATTING_SO -0.009944 0.003385 -2.937 0.003344 **
## TEAM_BASERUN_SB 0.040590 0.006019 6.743 1.96e-11 ***
## TEAM_BASERUN_CS -0.033590 0.037831 -0.888 0.374680
## TEAM_PITCHING_H -0.008647 0.002482 -3.483 0.000504 ***
## TEAM_PITCHING_HR 0.029648 0.021851 1.357 0.174977
## TEAM_PITCHING_BB 0.006737 0.004971 1.355 0.175483
## TEAM_PITCHING_SO -0.001993 0.002895 -0.688 0.491366
## TEAM_FIELDING_E -0.038773 0.005920 -6.549 7.14e-11 ***
## TEAM_FIELDING_DP -0.106103 0.014641 -7.247 5.83e-13 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 12.74 on 2261 degrees of freedom
## Multiple R-squared: 0.2269, Adjusted R-squared: 0.2221
## F-statistic: 47.39 on 14 and 2261 DF, p-value: < 2.2e-16
model2 <- lm(TARGET_WINS ~ TEAM_BATTING_H + TEAM_BATTING_3B + TEAM_BATTING_BB + TEAM_BATTING_SO + TEAM_BASERUN_SB + TEAM_PITCHING_H + TEAM_FIELDING_E + TEAM_FIELDING_DP, data=moneyball_rev2)
summary(model2)
##
## Call:
## lm(formula = TARGET_WINS ~ TEAM_BATTING_H + TEAM_BATTING_3B +
## TEAM_BATTING_BB + TEAM_BATTING_SO + TEAM_BASERUN_SB + TEAM_PITCHING_H +
## TEAM_FIELDING_E + TEAM_FIELDING_DP, data = moneyball_rev2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -51.617 -8.227 0.225 8.421 50.724
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 28.565232 5.289333 5.401 7.34e-08 ***
## TEAM_BATTING_H 0.043769 0.003331 13.141 < 2e-16 ***
## TEAM_BATTING_3B 0.073133 0.016301 4.486 7.60e-06 ***
## TEAM_BATTING_BB 0.032244 0.003329 9.687 < 2e-16 ***
## TEAM_BATTING_SO -0.002642 0.001711 -1.544 0.12263
## TEAM_BASERUN_SB 0.030480 0.005763 5.289 1.35e-07 ***
## TEAM_PITCHING_H -0.007793 0.002376 -3.279 0.00106 **
## TEAM_FIELDING_E -0.045720 0.005653 -8.088 9.75e-16 ***
## TEAM_FIELDING_DP -0.090430 0.014615 -6.188 7.23e-10 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 12.93 on 2267 degrees of freedom
## Multiple R-squared: 0.2016, Adjusted R-squared: 0.1988
## F-statistic: 71.58 on 8 and 2267 DF, p-value: < 2.2e-16
model3 <- lm(TARGET_WINS ~ TEAM_BATTING_H + TEAM_BATTING_3B + TEAM_BATTING_BB + + TEAM_BASERUN_SB + TEAM_PITCHING_H + TEAM_FIELDING_E + TEAM_FIELDING_DP, data=moneyball_rev2)
summary(model3)
##
## Call:
## lm(formula = TARGET_WINS ~ TEAM_BATTING_H + TEAM_BATTING_3B +
## TEAM_BATTING_BB + +TEAM_BASERUN_SB + TEAM_PITCHING_H + TEAM_FIELDING_E +
## TEAM_FIELDING_DP, data = moneyball_rev2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -50.940 -8.096 0.033 8.300 50.913
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 24.168812 4.459350 5.420 6.60e-08 ***
## TEAM_BATTING_H 0.044299 0.003314 13.367 < 2e-16 ***
## TEAM_BATTING_3B 0.084627 0.014507 5.834 6.20e-09 ***
## TEAM_BATTING_BB 0.032004 0.003326 9.622 < 2e-16 ***
## TEAM_BASERUN_SB 0.028658 0.005643 5.079 4.11e-07 ***
## TEAM_PITCHING_H -0.007301 0.002356 -3.099 0.00196 **
## TEAM_FIELDING_E -0.043195 0.005413 -7.980 2.29e-15 ***
## TEAM_FIELDING_DP -0.089217 0.014598 -6.111 1.16e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 12.93 on 2268 degrees of freedom
## Multiple R-squared: 0.2008, Adjusted R-squared: 0.1983
## F-statistic: 81.41 on 7 and 2268 DF, p-value: < 2.2e-16
# BoxCox Transformation
boxcox(model1, plotit=TRUE)

boxcox(model1, plotit=TRUE, lambda=seq(1.2,1.5, by=0.05))

moneyball_rev3 = moneyball_rev2
moneyball_rev3$TARGET_WINS = moneyball_rev3$TARGET_WINS ** 1.42
model4 <- lm(TARGET_WINS ~ ., data=moneyball_rev3)
summary(model4)
##
## Call:
## lm(formula = TARGET_WINS ~ ., data = moneyball_rev3)
##
## Residuals:
## Min 1Q Median 3Q Max
## -437.25 -75.29 -1.54 72.46 513.47
##
## Coefficients: (2 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 229.89338 54.85226 4.191 2.88e-05 ***
## TEAM_BATTING_H 0.29128 0.03778 7.709 1.88e-14 ***
## TEAM_BATTING_2B -0.02500 0.07485 -0.334 0.738355
## TEAM_BATTING_3B 1.03443 0.15166 6.821 1.16e-11 ***
## TEAM_BATTING_HR 0.34311 0.21565 1.591 0.111731
## TEAM_BATTING_BB 0.17520 0.04959 3.533 0.000419 ***
## TEAM_BATTING_SO -0.09605 0.03024 -3.176 0.001512 **
## TEAM_BASERUN_SB 0.35338 0.05377 6.572 6.13e-11 ***
## TEAM_BASERUN_CS -0.30543 0.33791 -0.904 0.366155
## TEAM_PITCHING_H -0.07550 0.02217 -3.405 0.000673 ***
## TEAM_PITCHING_HR 0.27330 0.19518 1.400 0.161569
## TEAM_PITCHING_BB 0.06032 0.04440 1.359 0.174414
## TEAM_PITCHING_SO -0.01204 0.02586 -0.466 0.641483
## TEAM_FIELDING_E -0.33621 0.05288 -6.358 2.47e-10 ***
## TEAM_FIELDING_DP -0.94631 0.13078 -7.236 6.30e-13 ***
## Pitch_Strength NA NA NA NA
## Bat_Strength NA NA NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 113.8 on 2261 degrees of freedom
## Multiple R-squared: 0.2252, Adjusted R-squared: 0.2204
## F-statistic: 46.95 on 14 and 2261 DF, p-value: < 2.2e-16
IV. Select Model -
#choose model1
par(mfrow=c(2,3))
plot(model1)
hist(resid(model1), main="Histogram of Residuals")
par(mfrow=c(1,1))

moneyball_eval = read.csv("moneyball-evaluation-data.csv", header=TRUE)
moneyball_eval1 <- moneyball_eval[ -c(1,10) ]
head(moneyball_eval1, 10)
## TEAM_BATTING_H TEAM_BATTING_2B TEAM_BATTING_3B TEAM_BATTING_HR
## 1 1209 170 33 83
## 2 1221 151 29 88
## 3 1395 183 29 93
## 4 1539 309 29 159
## 5 1445 203 68 5
## 6 1431 236 53 10
## 7 1430 219 55 37
## 8 1385 158 42 33
## 9 1259 177 78 23
## 10 1397 212 42 58
## TEAM_BATTING_BB TEAM_BATTING_SO TEAM_BASERUN_SB TEAM_BASERUN_CS
## 1 447 1080 62 50
## 2 516 929 54 39
## 3 509 816 59 47
## 4 486 914 148 57
## 5 95 416 NA NA
## 6 215 377 NA NA
## 7 568 527 365 NA
## 8 356 609 185 NA
## 9 466 689 150 NA
## 10 452 584 52 NA
## TEAM_PITCHING_H TEAM_PITCHING_HR TEAM_PITCHING_BB TEAM_PITCHING_SO
## 1 1209 83 447 1080
## 2 1221 88 516 929
## 3 1395 93 509 816
## 4 1539 159 486 914
## 5 3902 14 257 1123
## 6 2793 20 420 736
## 7 1544 40 613 569
## 8 1626 39 418 715
## 9 1342 25 497 734
## 10 1489 62 482 622
## TEAM_FIELDING_E TEAM_FIELDING_DP
## 1 140 156
## 2 135 164
## 3 156 153
## 4 124 154
## 5 616 130
## 6 572 105
## 7 490 NA
## 8 328 104
## 9 226 132
## 10 184 145
describe(moneyball_eval1)
## vars n mean sd median trimmed mad min max
## TEAM_BATTING_H 1 259 1469.39 150.66 1455.0 1463.68 114.16 819 2170
## TEAM_BATTING_2B 2 259 241.32 49.52 239.0 242.33 48.93 44 376
## TEAM_BATTING_3B 3 259 55.91 27.14 52.0 52.95 26.69 14 155
## TEAM_BATTING_HR 4 259 95.63 56.33 101.0 93.68 66.72 0 242
## TEAM_BATTING_BB 5 259 498.96 120.59 509.0 505.98 94.89 15 792
## TEAM_BATTING_SO 6 241 709.34 243.11 686.0 715.56 271.32 0 1268
## TEAM_BASERUN_SB 7 246 123.70 93.39 92.0 108.07 60.05 0 580
## TEAM_BASERUN_CS 8 172 52.32 23.10 49.5 50.13 18.53 0 154
## TEAM_PITCHING_H 9 259 1813.46 1662.91 1515.0 1554.25 173.46 1155 22768
## TEAM_PITCHING_HR 10 259 102.15 57.65 104.0 100.08 63.75 0 336
## TEAM_PITCHING_BB 11 259 552.42 172.95 526.0 536.46 97.85 136 2008
## TEAM_PITCHING_SO 12 241 799.67 634.31 745.0 765.41 249.08 0 9963
## TEAM_FIELDING_E 13 259 249.75 230.90 163.0 197.36 59.30 73 1568
## TEAM_FIELDING_DP 14 228 146.06 25.88 148.0 147.24 23.72 69 204
## range skew kurtosis se
## TEAM_BATTING_H 1351 0.59 3.66 9.36
## TEAM_BATTING_2B 332 -0.33 0.67 3.08
## TEAM_BATTING_3B 141 0.98 0.70 1.69
## TEAM_BATTING_HR 242 0.17 -0.90 3.50
## TEAM_BATTING_BB 777 -0.92 2.53 7.49
## TEAM_BATTING_SO 1268 -0.26 -0.23 15.66
## TEAM_BASERUN_SB 580 1.79 3.85 5.95
## TEAM_BASERUN_CS 154 1.41 3.58 1.76
## TEAM_PITCHING_H 21613 9.28 102.07 103.33
## TEAM_PITCHING_HR 336 0.34 -0.10 3.58
## TEAM_PITCHING_BB 1872 4.11 29.21 10.75
## TEAM_PITCHING_SO 9963 12.48 177.77 40.86
## TEAM_FIELDING_E 1495 3.09 10.87 14.35
## TEAM_FIELDING_DP 135 -0.42 0.16 1.71
for(i in 1:ncol(moneyball_eval1)){
moneyball_eval1[is.na(moneyball_eval1[,i]), i] <- median(moneyball_eval1[,i], na.rm = TRUE)
}
describe(moneyball_eval1)
## vars n mean sd median trimmed mad min max
## TEAM_BATTING_H 1 259 1469.39 150.66 1455.0 1463.68 114.16 819 2170
## TEAM_BATTING_2B 2 259 241.32 49.52 239.0 242.33 48.93 44 376
## TEAM_BATTING_3B 3 259 55.91 27.14 52.0 52.95 26.69 14 155
## TEAM_BATTING_HR 4 259 95.63 56.33 101.0 93.68 66.72 0 242
## TEAM_BATTING_BB 5 259 498.96 120.59 509.0 505.98 94.89 15 792
## TEAM_BATTING_SO 6 259 707.71 234.55 686.0 712.96 253.52 0 1268
## TEAM_BASERUN_SB 7 259 122.11 91.27 92.0 106.65 57.82 0 580
## TEAM_BASERUN_CS 8 259 51.37 18.86 49.5 49.70 9.64 0 154
## TEAM_PITCHING_H 9 259 1813.46 1662.91 1515.0 1554.25 173.46 1155 22768
## TEAM_PITCHING_HR 10 259 102.15 57.65 104.0 100.08 63.75 0 336
## TEAM_PITCHING_BB 11 259 552.42 172.95 526.0 536.46 97.85 136 2008
## TEAM_PITCHING_SO 12 259 795.87 611.94 745.0 763.71 232.77 0 9963
## TEAM_FIELDING_E 13 259 249.75 230.90 163.0 197.36 59.30 73 1568
## TEAM_FIELDING_DP 14 259 146.29 24.29 148.0 147.49 19.27 69 204
## range skew kurtosis se
## TEAM_BATTING_H 1351 0.59 3.66 9.36
## TEAM_BATTING_2B 332 -0.33 0.67 3.08
## TEAM_BATTING_3B 141 0.98 0.70 1.69
## TEAM_BATTING_HR 242 0.17 -0.90 3.50
## TEAM_BATTING_BB 777 -0.92 2.53 7.49
## TEAM_BATTING_SO 1268 -0.25 -0.04 14.57
## TEAM_BASERUN_SB 580 1.87 4.27 5.67
## TEAM_BASERUN_CS 154 1.87 7.21 1.17
## TEAM_PITCHING_H 21613 9.28 102.07 103.33
## TEAM_PITCHING_HR 336 0.34 -0.10 3.58
## TEAM_PITCHING_BB 1872 4.11 29.21 10.75
## TEAM_PITCHING_SO 9963 12.95 191.51 38.02
## TEAM_FIELDING_E 1495 3.09 10.87 14.35
## TEAM_FIELDING_DP 135 -0.48 0.61 1.51
predict(model1, newdata = moneyball_eval1, interval="prediction")
## fit lwr upr
## 1 64.763721 39.7158157 89.81163
## 2 68.008045 42.9613982 93.05469
## 3 73.989277 48.9682398 99.01031
## 4 84.383341 59.3547412 109.41194
## 5 31.958117 4.1699025 59.74633
## 6 48.615522 22.6024612 74.62858
## 7 78.040522 52.7114066 103.36964
## 8 71.445084 46.3539437 96.53622
## 9 74.335907 49.3097653 99.36205
## 10 73.118087 48.1015521 98.13462
## 11 69.385248 44.3482068 94.42229
## 12 82.154614 57.0859226 107.22331
## 13 83.699001 58.5938590 108.80414
## 14 82.065583 56.8721395 107.25903
## 15 83.754452 58.6813527 108.82755
## 16 75.534463 50.4882189 100.58071
## 17 74.156664 49.1313912 99.18194
## 18 78.845238 53.8275587 103.86292
## 19 71.053118 45.9798767 96.12636
## 20 87.766366 62.5893488 112.94338
## 21 84.669897 59.6309859 109.70881
## 22 83.386368 58.3476986 108.42504
## 23 84.179009 59.1506690 109.20735
## 24 73.130402 48.0951997 98.16560
## 25 79.545677 54.5048880 104.58647
## 26 84.688019 59.6233406 109.75270
## 27 -1.012063 -32.9250524 30.90093
## 28 73.126813 48.0576427 98.19598
## 29 85.314340 60.2205342 110.40815
## 30 72.885257 47.8120491 97.95846
## 31 92.271439 67.2220277 117.32085
## 32 86.160277 61.1155522 111.20500
## 33 85.632349 60.5869127 110.67778
## 34 87.093212 61.9644070 112.22202
## 35 81.368373 56.3403476 106.39640
## 36 86.363808 61.3249158 111.40270
## 37 78.106863 53.0781278 103.13560
## 38 88.818206 63.7805146 113.85590
## 39 78.475809 53.0906805 103.86094
## 40 89.178591 64.0885859 114.26860
## 41 83.729882 58.6730621 108.78670
## 42 91.460028 66.3966358 116.52342
## 43 -18.434216 -50.3238203 13.45539
## 44 103.113648 77.4885074 128.73879
## 45 88.402614 63.0875823 113.71765
## 46 88.581223 63.2357223 113.92672
## 47 89.965145 64.6212304 115.30906
## 48 75.390335 49.6242577 101.15641
## 49 71.609626 46.5811225 96.63813
## 50 77.679234 51.8436987 103.51477
## 51 75.668991 50.6397922 100.69819
## 52 82.477296 57.4374205 107.51717
## 53 77.557166 52.5408159 102.57352
## 54 75.003735 49.9467729 100.06070
## 55 74.231523 49.1995846 99.26346
## 56 77.598603 52.5813266 102.61588
## 57 92.153459 66.9795080 117.32741
## 58 78.131290 53.0684730 103.19411
## 59 67.509038 42.4197057 92.59837
## 60 79.646008 54.6089351 104.68308
## 61 86.882234 61.8528867 111.91158
## 62 79.934141 54.8305057 105.03778
## 63 86.711384 61.6988128 111.72396
## 64 83.528308 58.4874386 108.56918
## 65 81.246377 56.1707323 106.32202
## 66 94.853352 69.5116072 120.19510
## 67 71.636928 46.5298091 96.74405
## 68 74.810110 49.7231540 99.89707
## 69 79.916511 54.8315310 105.00149
## 70 92.273473 67.1603863 117.38656
## 71 87.348029 62.2712426 112.42482
## 72 71.355695 46.2881880 96.42320
## 73 79.940106 54.8725867 105.00762
## 74 88.943401 63.8208416 114.06596
## 75 76.062088 51.0025247 101.12165
## 76 80.664240 55.6077037 105.72078
## 77 84.254475 59.2053701 109.30358
## 78 82.142197 57.1241820 107.16021
## 79 73.700534 48.6568177 98.74425
## 80 75.697691 50.6730469 100.72233
## 81 84.551650 59.4655721 109.63773
## 82 86.584038 61.5337977 111.63428
## 83 94.447787 69.3712731 119.52430
## 84 75.021043 49.9683077 100.07378
## 85 86.759160 61.7307012 111.78762
## 86 78.638374 53.5447744 103.73197
## 87 82.607833 57.5734077 107.64226
## 88 82.182371 57.0069662 107.35778
## 89 89.723502 64.6403922 114.80661
## 90 90.019371 64.9736212 115.06512
## 91 75.890824 50.6962233 101.08542
## 92 9.660513 -38.9504036 58.27143
## 93 72.047324 46.9498646 97.14478
## 94 80.980015 55.9075801 106.05245
## 95 83.392767 58.3014752 108.48406
## 96 85.093387 60.0200666 110.16671
## 97 90.447051 64.2414741 116.65263
## 98 98.874581 73.5489889 124.20017
## 99 87.920341 62.7950157 113.04567
## 100 89.076792 63.9318634 114.22172
## 101 80.459777 55.4249758 105.49458
## 102 75.860332 50.8287851 100.89188
## 103 82.962078 57.9414929 107.98266
## 104 85.527034 60.4123405 110.64173
## 105 78.017917 52.9871480 103.04869
## 106 49.590854 23.4329662 75.74874
## 107 37.344646 11.2528599 63.43643
## 108 76.712059 51.6609410 101.76318
## 109 88.200754 63.1523392 113.24917
## 110 49.178645 23.6192586 74.73803
## 111 86.706847 61.6173692 111.79633
## 112 86.330726 61.1628274 111.49863
## 113 91.925305 66.8820136 116.96860
## 114 89.181779 64.1554849 114.20807
## 115 79.186483 54.1448602 104.22811
## 116 78.545890 53.4995878 103.59219
## 117 84.818305 59.7680242 109.86859
## 118 81.754495 56.7163982 106.79259
## 119 73.616863 48.5654589 98.66827
## 120 71.425295 45.9913934 96.85920
## 121 91.198985 66.0779389 116.32003
## 122 71.039734 45.9418814 96.13759
## 123 69.906136 44.8576998 94.95457
## 124 70.456030 45.3082752 95.60378
## 125 72.761950 47.7280984 97.79580
## 126 83.449440 58.3783574 108.52052
## 127 87.985341 62.9092128 113.06147
## 128 77.518820 52.4981625 102.53948
## 129 91.932444 66.8969272 116.96796
## 130 88.860283 63.8177586 113.90281
## 131 84.549211 59.5215275 109.57689
## 132 80.991830 55.9534693 106.03019
## 133 77.618438 52.5555845 102.68129
## 134 82.048984 56.9878996 107.11007
## 135 86.395157 61.3623457 111.42797
## 136 46.092452 19.4848267 72.70008
## 137 75.682557 50.5873547 100.77776
## 138 75.765857 50.7404065 100.79131
## 139 85.208963 59.7396255 110.67830
## 140 79.336494 54.1281721 104.54482
## 141 64.685102 39.6091661 89.76104
## 142 74.256616 49.2028612 99.31037
## 143 91.344438 66.2997056 116.38917
## 144 74.674225 49.6390091 99.70944
## 145 76.144382 51.1012760 101.18749
## 146 74.570342 49.5487957 99.59189
## 147 79.244354 54.2164462 104.27226
## 148 80.803287 55.7834928 105.82308
## 149 78.498590 53.4583883 103.53879
## 150 83.427154 58.4133466 108.44096
## 151 81.487749 56.4467771 106.52872
## 152 79.376901 54.3380681 104.41573
## 153 -166.767191 -271.9356287 -61.59875
## 154 67.947050 42.8782682 93.01583
## 155 77.810549 52.7875999 102.83350
## 156 69.571993 44.5083334 94.63565
## 157 90.430106 65.3645857 115.49563
## 158 49.599773 23.8599538 75.33959
## 159 89.009075 63.7975173 114.22063
## 160 73.697205 48.6337475 98.76066
## 161 103.961870 78.8210093 129.10273
## 162 106.973452 81.8110139 132.13589
## 163 95.424718 70.3784852 120.47095
## 164 104.503809 79.4111109 129.59651
## 165 100.217427 75.1374370 125.29742
## 166 93.187668 68.0957822 118.27955
## 167 82.905728 57.8641239 107.94733
## 168 81.766807 56.7301194 106.80349
## 169 72.569927 47.5433217 97.59653
## 170 80.173714 55.1524244 105.19500
## 171 90.389867 65.3123964 115.46734
## 172 90.886668 65.8444049 115.92893
## 173 83.033080 58.0062307 108.05993
## 174 92.355277 67.2832595 117.42729
## 175 83.695365 58.6583091 108.73242
## 176 78.154119 53.1052778 103.20296
## 177 82.143182 57.0929245 107.19344
## 178 71.759363 46.7011287 96.81760
## 179 76.451897 51.3198195 101.58397
## 180 80.650161 55.6298987 105.67042
## 181 88.031769 62.3839152 113.67962
## 182 88.400534 63.3442791 113.45679
## 183 84.657693 59.6452749 109.67011
## 184 86.483869 61.3965032 111.57124
## 185 3.225685 -44.9967744 51.44814
## 186 85.857741 60.0191903 111.69629
## 187 81.081181 55.6271166 106.53524
## 188 28.550084 1.3655171 55.73465
## 189 45.665403 19.9656513 71.36515
## 190 100.397808 74.9081129 125.88750
## 191 69.178553 44.0547680 94.30234
## 192 80.463527 55.4311379 105.49592
## 193 72.088164 46.8798051 97.29652
## 194 74.204476 49.1711567 99.23780
## 195 75.270877 50.2056305 100.33612
## 196 68.085300 43.0368777 93.13372
## 197 77.120270 52.1039177 102.13662
## 198 89.455207 64.3793227 114.53109
## 199 83.424354 58.3567167 108.49199
## 200 85.250634 60.2189637 110.28230
## 201 76.178451 51.1228717 101.23403
## 202 79.657481 54.6375511 104.67741
## 203 76.393645 51.3165320 101.47076
## 204 84.006288 58.6496297 109.36295
## 205 79.733269 54.6486215 104.81792
## 206 85.698008 60.5898696 110.80615
## 207 77.963276 52.8769942 103.04956
## 208 78.548438 53.5155268 103.58135
## 209 77.089161 51.8555313 102.32279
## 210 68.771078 43.4354874 94.10667
## 211 100.868109 75.5829643 126.15325
## 212 87.688923 62.5647432 112.81310
## 213 86.638681 61.5878761 111.68949
## 214 67.528714 42.4922812 92.56515
## 215 72.457045 47.3605685 97.55352
## 216 82.537735 57.4695590 107.60591
## 217 78.819055 53.7436370 103.89447
## 218 89.648273 64.1158916 115.18065
## 219 76.142560 51.1141559 101.17096
## 220 80.885861 55.8556752 105.91605
## 221 75.738831 50.6820661 100.79560
## 222 73.129949 48.0890067 98.17089
## 223 81.181433 56.1543287 106.20854
## 224 72.371137 47.3160933 97.42618
## 225 10.978747 -24.2879213 46.24542
## 226 77.903733 52.8812243 102.92624
## 227 79.173536 54.1233013 104.22377
## 228 80.723945 55.6964505 105.75144
## 229 83.773563 58.7605319 108.78659
## 230 51.961548 26.0246530 77.89844
## 231 82.013360 56.9282605 107.09846
## 232 89.506309 63.9322474 115.08037
## 233 81.917893 56.8747281 106.96106
## 234 86.419527 61.3815181 111.45754
## 235 81.038545 56.0179689 106.05912
## 236 76.788052 51.7646692 101.81143
## 237 79.187518 54.0817180 104.29332
## 238 78.485255 53.4586733 103.51184
## 239 87.616938 62.3242686 112.90961
## 240 71.987017 46.9491795 97.02485
## 241 88.072829 63.0594144 113.08624
## 242 86.739320 61.7170327 111.76161
## 243 82.154481 57.1280465 107.18091
## 244 80.167403 55.1369972 105.19781
## 245 63.696362 38.5576347 88.83509
## 246 83.136024 58.0906354 108.18141
## 247 78.157533 53.1398510 103.17521
## 248 83.523533 58.4704942 108.57657
## 249 73.548253 48.5192163 98.57729
## 250 82.200518 57.1452561 107.25578
## 251 81.881196 56.7694721 106.99292
## 252 26.835261 -0.7139863 54.38451
## 253 88.739619 63.4291436 114.05009
## 254 -39.616446 -100.9910062 21.75812
## 255 69.916054 44.8375069 94.99460
## 256 79.059002 53.9686138 104.14939
## 257 82.338469 57.2816846 107.39525
## 258 84.649267 59.6044201 109.69411
## 259 76.716970 51.6559752 101.77796