getwd()
[1] "/cloud/project"
baseball = read.csv("baseball.csv")
str(baseball)
'data.frame': 1232 obs. of 15 variables:
$ Team : chr "ARI" "ATL" "BAL" "BOS" ...
$ League : chr "NL" "NL" "AL" "AL" ...
$ Year : int 2012 2012 2012 2012 2012 2012 2012 2012 2012 2012 ...
$ RS : int 734 700 712 734 613 748 669 667 758 726 ...
$ RA : int 688 600 705 806 759 676 588 845 890 670 ...
$ W : int 81 94 93 69 61 85 97 68 64 88 ...
$ OBP : num 0.328 0.32 0.311 0.315 0.302 0.318 0.315 0.324 0.33 0.335 ...
$ SLG : num 0.418 0.389 0.417 0.415 0.378 0.422 0.411 0.381 0.436 0.422 ...
$ BA : num 0.259 0.247 0.247 0.26 0.24 0.255 0.251 0.251 0.274 0.268 ...
$ Playoffs : int 0 1 1 0 0 0 1 0 0 1 ...
$ RankSeason : int NA 4 5 NA NA NA 2 NA NA 6 ...
$ RankPlayoffs: int NA 5 4 NA NA NA 4 NA NA 2 ...
$ G : int 162 162 162 162 162 162 162 162 162 162 ...
$ OOBP : num 0.317 0.306 0.315 0.331 0.335 0.319 0.305 0.336 0.357 0.314 ...
$ OSLG : num 0.415 0.378 0.403 0.428 0.424 0.405 0.39 0.43 0.47 0.402 ...
moneyball = subset(baseball, Year < 2002)
str(moneyball)
'data.frame': 902 obs. of 15 variables:
$ Team : chr "ANA" "ARI" "ATL" "BAL" ...
$ League : chr "AL" "NL" "NL" "AL" ...
$ Year : int 2001 2001 2001 2001 2001 2001 2001 2001 2001 2001 ...
$ RS : int 691 818 729 687 772 777 798 735 897 923 ...
$ RA : int 730 677 643 829 745 701 795 850 821 906 ...
$ W : int 75 92 88 63 82 88 83 66 91 73 ...
$ OBP : num 0.327 0.341 0.324 0.319 0.334 0.336 0.334 0.324 0.35 0.354 ...
$ SLG : num 0.405 0.442 0.412 0.38 0.439 0.43 0.451 0.419 0.458 0.483 ...
$ BA : num 0.261 0.267 0.26 0.248 0.266 0.261 0.268 0.262 0.278 0.292 ...
$ Playoffs : int 0 1 1 0 0 0 0 0 1 0 ...
$ RankSeason : int NA 5 7 NA NA NA NA NA 6 NA ...
$ RankPlayoffs: int NA 1 3 NA NA NA NA NA 4 NA ...
$ G : int 162 162 162 162 161 162 162 162 162 162 ...
$ OOBP : num 0.331 0.311 0.314 0.337 0.329 0.321 0.334 0.341 0.341 0.35 ...
$ OSLG : num 0.412 0.404 0.384 0.439 0.393 0.398 0.427 0.455 0.417 0.48 ...
moneyball$RD = moneyball$RS - moneyball$RA
str(moneyball)
'data.frame': 902 obs. of 16 variables:
$ Team : chr "ANA" "ARI" "ATL" "BAL" ...
$ League : chr "AL" "NL" "NL" "AL" ...
$ Year : int 2001 2001 2001 2001 2001 2001 2001 2001 2001 2001 ...
$ RS : int 691 818 729 687 772 777 798 735 897 923 ...
$ RA : int 730 677 643 829 745 701 795 850 821 906 ...
$ W : int 75 92 88 63 82 88 83 66 91 73 ...
$ OBP : num 0.327 0.341 0.324 0.319 0.334 0.336 0.334 0.324 0.35 0.354 ...
$ SLG : num 0.405 0.442 0.412 0.38 0.439 0.43 0.451 0.419 0.458 0.483 ...
$ BA : num 0.261 0.267 0.26 0.248 0.266 0.261 0.268 0.262 0.278 0.292 ...
$ Playoffs : int 0 1 1 0 0 0 0 0 1 0 ...
$ RankSeason : int NA 5 7 NA NA NA NA NA 6 NA ...
$ RankPlayoffs: int NA 1 3 NA NA NA NA NA 4 NA ...
$ G : int 162 162 162 162 161 162 162 162 162 162 ...
$ OOBP : num 0.331 0.311 0.314 0.337 0.329 0.321 0.334 0.341 0.341 0.35 ...
$ OSLG : num 0.412 0.404 0.384 0.439 0.393 0.398 0.427 0.455 0.417 0.48 ...
$ RD : int -39 141 86 -142 27 76 3 -115 76 17 ...
plot(moneyball$RD, moneyball$W)
WinsReg = lm(W ~ RD, data=moneyball)
summary(WinsReg)
Call:
lm(formula = W ~ RD, data = moneyball)
Residuals:
Min 1Q Median 3Q Max
-14.2662 -2.6509 0.1234 2.9364 11.6570
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 80.881375 0.131157 616.67 <2e-16 ***
RD 0.105766 0.001297 81.55 <2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 3.939 on 900 degrees of freedom
Multiple R-squared: 0.8808, Adjusted R-squared: 0.8807
F-statistic: 6651 on 1 and 900 DF, p-value: < 2.2e-16
In-class activity 7
#If a baseball team scores 763 runs and allows 614 runs, how many games do we expect the team to win?
NumberofWins=80.88+0.106*(763-614)
NumberofWins
[1] 96.674
A team with a runs difference of 149 is expected to win around 97 games.
RunsReg = lm(RS ~ OBP + SLG + BA, data=moneyball)
summary(RunsReg)
Call:
lm(formula = RS ~ OBP + SLG + BA, data = moneyball)
Residuals:
Min 1Q Median 3Q Max
-70.941 -17.247 -0.621 16.754 90.998
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -788.46 19.70 -40.029 < 2e-16 ***
OBP 2917.42 110.47 26.410 < 2e-16 ***
SLG 1637.93 45.99 35.612 < 2e-16 ***
BA -368.97 130.58 -2.826 0.00482 **
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 24.69 on 898 degrees of freedom
Multiple R-squared: 0.9302, Adjusted R-squared: 0.93
F-statistic: 3989 on 3 and 898 DF, p-value: < 2.2e-16
cor(moneyball$BA, moneyball$OBP)
[1] 0.8540549
# Regression model to predict runs scored again but removing the batting average
RunsReg = lm(RS ~ OBP + SLG, data=moneyball)
summary(RunsReg)
Call:
lm(formula = RS ~ OBP + SLG, data = moneyball)
Residuals:
Min 1Q Median 3Q Max
-70.838 -17.174 -1.108 16.770 90.036
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -804.63 18.92 -42.53 <2e-16 ***
OBP 2737.77 90.68 30.19 <2e-16 ***
SLG 1584.91 42.16 37.60 <2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 24.79 on 899 degrees of freedom
Multiple R-squared: 0.9296, Adjusted R-squared: 0.9294
F-statistic: 5934 on 2 and 899 DF, p-value: < 2.2e-16
In-class activity 8: Part 1
If a baseball team’s OBP is 0.361 and SLG is 0.409, how many runs do we expect the team to score?
ExpectedRuns= -804.63 + 2737.77 * (0.361) + 1584.91 * (0.409)
ExpectedRuns
[1] 831.9332
Based on the calculations, we expect the team to score around 832 runs.
install.packages("car")
Installing package into ‘/cloud/lib/x86_64-pc-linux-gnu-library/4.4’
(as ‘lib’ is unspecified)
also installing the dependencies ‘fansi’, ‘pkgconfig’, ‘cpp11’, ‘utf8’, ‘withr’, ‘gtable’, ‘isoband’, ‘rbibutils’, ‘backports’, ‘generics’, ‘purrr’, ‘tibble’, ‘tidyr’, ‘pillar’, ‘tidyselect’, ‘cowplot’, ‘Deriv’, ‘ggplot2’, ‘modelr’, ‘microbenchmark’, ‘Rdpack’, ‘colorspace’, ‘broom’, ‘dplyr’, ‘numDeriv’, ‘doBy’, ‘SparseM’, ‘MatrixModels’, ‘minqa’, ‘nloptr’, ‘reformulas’, ‘Rcpp’, ‘RcppEigen’, ‘farver’, ‘labeling’, ‘munsell’, ‘RColorBrewer’, ‘viridisLite’, ‘carData’, ‘abind’, ‘Formula’, ‘pbkrtest’, ‘quantreg’, ‘lme4’, ‘scales’
trying URL 'http://rspm/default/__linux__/focal/latest/src/contrib/fansi_1.0.6.tar.gz'
Content type 'application/x-gzip' length 303572 bytes (296 KB)
==================================================
downloaded 296 KB
trying URL 'http://rspm/default/__linux__/focal/latest/src/contrib/pkgconfig_2.0.3.tar.gz'
Content type 'application/x-gzip' length 17998 bytes (17 KB)
==================================================
downloaded 17 KB
trying URL 'http://rspm/default/__linux__/focal/latest/src/contrib/cpp11_0.5.2.tar.gz'
Content type 'application/x-gzip' length 289097 bytes (282 KB)
==================================================
downloaded 282 KB
trying URL 'http://rspm/default/__linux__/focal/latest/src/contrib/utf8_1.2.4.tar.gz'
Content type 'application/x-gzip' length 145987 bytes (142 KB)
==================================================
downloaded 142 KB
trying URL 'http://rspm/default/__linux__/focal/latest/src/contrib/withr_3.0.2.tar.gz'
Content type 'application/x-gzip' length 217740 bytes (212 KB)
==================================================
downloaded 212 KB
trying URL 'http://rspm/default/__linux__/focal/latest/src/contrib/gtable_0.3.6.tar.gz'
Content type 'application/x-gzip' length 219376 bytes (214 KB)
==================================================
downloaded 214 KB
trying URL 'http://rspm/default/__linux__/focal/latest/src/contrib/isoband_0.2.7.tar.gz'
Content type 'application/x-gzip' length 1642543 bytes (1.6 MB)
==================================================
downloaded 1.6 MB
trying URL 'http://rspm/default/__linux__/focal/latest/src/contrib/rbibutils_2.3.tar.gz'
Content type 'application/x-gzip' length 1137759 bytes (1.1 MB)
==================================================
downloaded 1.1 MB
trying URL 'http://rspm/default/__linux__/focal/latest/src/contrib/backports_1.5.0.tar.gz'
Content type 'application/x-gzip' length 116627 bytes (113 KB)
==================================================
downloaded 113 KB
trying URL 'http://rspm/default/__linux__/focal/latest/src/contrib/generics_0.1.3.tar.gz'
Content type 'application/x-gzip' length 77807 bytes (75 KB)
==================================================
downloaded 75 KB
trying URL 'http://rspm/default/__linux__/focal/latest/src/contrib/purrr_1.0.4.tar.gz'
Content type 'application/x-gzip' length 519404 bytes (507 KB)
==================================================
downloaded 507 KB
trying URL 'http://rspm/default/__linux__/focal/latest/src/contrib/tibble_3.2.1.tar.gz'
Content type 'application/x-gzip' length 676334 bytes (660 KB)
==================================================
downloaded 660 KB
trying URL 'http://rspm/default/__linux__/focal/latest/src/contrib/tidyr_1.3.1.tar.gz'
Content type 'application/x-gzip' length 1180945 bytes (1.1 MB)
==================================================
downloaded 1.1 MB
trying URL 'http://rspm/default/__linux__/focal/latest/src/contrib/pillar_1.10.1.tar.gz'
Content type 'application/x-gzip' length 654756 bytes (639 KB)
==================================================
downloaded 639 KB
trying URL 'http://rspm/default/__linux__/focal/latest/src/contrib/tidyselect_1.2.1.tar.gz'
Content type 'application/x-gzip' length 221715 bytes (216 KB)
==================================================
downloaded 216 KB
trying URL 'http://rspm/default/__linux__/focal/latest/src/contrib/cowplot_1.1.3.tar.gz'
Content type 'application/x-gzip' length 1377427 bytes (1.3 MB)
==================================================
downloaded 1.3 MB
trying URL 'http://rspm/default/__linux__/focal/latest/src/contrib/Deriv_4.1.6.tar.gz'
Content type 'application/x-gzip' length 149823 bytes (146 KB)
==================================================
downloaded 146 KB
trying URL 'http://rspm/default/__linux__/focal/latest/src/contrib/ggplot2_3.5.1.tar.gz'
Content type 'application/x-gzip' length 4957008 bytes (4.7 MB)
==================================================
downloaded 4.7 MB
trying URL 'http://rspm/default/__linux__/focal/latest/src/contrib/modelr_0.1.11.tar.gz'
Content type 'application/x-gzip' length 201200 bytes (196 KB)
==================================================
downloaded 196 KB
trying URL 'http://rspm/default/__linux__/focal/latest/src/contrib/microbenchmark_1.5.0.tar.gz'
Content type 'application/x-gzip' length 65220 bytes (63 KB)
==================================================
downloaded 63 KB
trying URL 'http://rspm/default/__linux__/focal/latest/src/contrib/Rdpack_2.6.3.tar.gz'
Content type 'application/x-gzip' length 626756 bytes (612 KB)
==================================================
downloaded 612 KB
trying URL 'http://rspm/default/__linux__/focal/latest/src/contrib/colorspace_2.1-1.tar.gz'
Content type 'application/x-gzip' length 2629335 bytes (2.5 MB)
==================================================
downloaded 2.5 MB
trying URL 'http://rspm/default/__linux__/focal/latest/src/contrib/broom_1.0.7.tar.gz'
Content type 'application/x-gzip' length 1853567 bytes (1.8 MB)
==================================================
downloaded 1.8 MB
trying URL 'http://rspm/default/__linux__/focal/latest/src/contrib/dplyr_1.1.4.tar.gz'
Content type 'application/x-gzip' length 1475398 bytes (1.4 MB)
==================================================
downloaded 1.4 MB
trying URL 'http://rspm/default/__linux__/focal/latest/src/contrib/numDeriv_2016.8-1.1.tar.gz'
Content type 'application/x-gzip' length 112835 bytes (110 KB)
==================================================
downloaded 110 KB
trying URL 'http://rspm/default/__linux__/focal/latest/src/contrib/doBy_4.6.25.tar.gz'
Content type 'application/x-gzip' length 4827686 bytes (4.6 MB)
==================================================
downloaded 4.6 MB
trying URL 'http://rspm/default/__linux__/focal/latest/src/contrib/SparseM_1.84-2.tar.gz'
Content type 'application/x-gzip' length 883858 bytes (863 KB)
==================================================
downloaded 863 KB
trying URL 'http://rspm/default/__linux__/focal/latest/src/contrib/MatrixModels_0.5-4.tar.gz'
Content type 'application/x-gzip' length 408103 bytes (398 KB)
==================================================
downloaded 398 KB
trying URL 'http://rspm/default/__linux__/focal/latest/src/contrib/minqa_1.2.8.tar.gz'
Content type 'application/x-gzip' length 121524 bytes (118 KB)
==================================================
downloaded 118 KB
trying URL 'http://rspm/default/__linux__/focal/latest/src/contrib/nloptr_2.2.1.tar.gz'
Content type 'application/x-gzip' length 557968 bytes (544 KB)
==================================================
downloaded 544 KB
trying URL 'http://rspm/default/__linux__/focal/latest/src/contrib/reformulas_0.4.0.tar.gz'
Content type 'application/x-gzip' length 90631 bytes (88 KB)
==================================================
downloaded 88 KB
trying URL 'http://rspm/default/__linux__/focal/latest/src/contrib/Rcpp_1.0.14.tar.gz'
Content type 'application/x-gzip' length 2178844 bytes (2.1 MB)
==================================================
downloaded 2.1 MB
trying URL 'http://rspm/default/__linux__/focal/latest/src/contrib/RcppEigen_0.3.4.0.2.tar.gz'
Content type 'application/x-gzip' length 1845890 bytes (1.8 MB)
==================================================
downloaded 1.8 MB
trying URL 'http://rspm/default/__linux__/focal/latest/src/contrib/farver_2.1.2.tar.gz'
Content type 'application/x-gzip' length 1485495 bytes (1.4 MB)
==================================================
downloaded 1.4 MB
trying URL 'http://rspm/default/__linux__/focal/latest/src/contrib/labeling_0.4.3.tar.gz'
Content type 'application/x-gzip' length 59707 bytes (58 KB)
==================================================
downloaded 58 KB
trying URL 'http://rspm/default/__linux__/focal/latest/src/contrib/munsell_0.5.1.tar.gz'
Content type 'application/x-gzip' length 242364 bytes (236 KB)
==================================================
downloaded 236 KB
trying URL 'http://rspm/default/__linux__/focal/latest/src/contrib/RColorBrewer_1.1-3.tar.gz'
Content type 'application/x-gzip' length 53281 bytes (52 KB)
==================================================
downloaded 52 KB
trying URL 'http://rspm/default/__linux__/focal/latest/src/contrib/viridisLite_0.4.2.tar.gz'
Content type 'application/x-gzip' length 1296920 bytes (1.2 MB)
==================================================
downloaded 1.2 MB
trying URL 'http://rspm/default/__linux__/focal/latest/src/contrib/carData_3.0-5.tar.gz'
Content type 'application/x-gzip' length 1821260 bytes (1.7 MB)
==================================================
downloaded 1.7 MB
trying URL 'http://rspm/default/__linux__/focal/latest/src/contrib/abind_1.4-8.tar.gz'
Content type 'application/x-gzip' length 63828 bytes (62 KB)
==================================================
downloaded 62 KB
trying URL 'http://rspm/default/__linux__/focal/latest/src/contrib/Formula_1.2-5.tar.gz'
Content type 'application/x-gzip' length 158504 bytes (154 KB)
==================================================
downloaded 154 KB
trying URL 'http://rspm/default/__linux__/focal/latest/src/contrib/pbkrtest_0.5.3.tar.gz'
Content type 'application/x-gzip' length 176405 bytes (172 KB)
==================================================
downloaded 172 KB
trying URL 'http://rspm/default/__linux__/focal/latest/src/contrib/quantreg_6.1.tar.gz'
Content type 'application/x-gzip' length 1447246 bytes (1.4 MB)
==================================================
downloaded 1.4 MB
trying URL 'http://rspm/default/__linux__/focal/latest/src/contrib/lme4_1.1-37.tar.gz'
Content type 'application/x-gzip' length 4249630 bytes (4.1 MB)
==================================================
downloaded 4.1 MB
trying URL 'http://rspm/default/__linux__/focal/latest/src/contrib/scales_1.3.0.tar.gz'
Content type 'application/x-gzip' length 703254 bytes (686 KB)
==================================================
downloaded 686 KB
trying URL 'http://rspm/default/__linux__/focal/latest/src/contrib/car_3.1-3.tar.gz'
Content type 'application/x-gzip' length 1510494 bytes (1.4 MB)
==================================================
downloaded 1.4 MB
* installing *binary* package ‘fansi’ ...
* DONE (fansi)
* installing *binary* package ‘pkgconfig’ ...
* DONE (pkgconfig)
* installing *binary* package ‘cpp11’ ...
* DONE (cpp11)
* installing *binary* package ‘utf8’ ...
* DONE (utf8)
* installing *binary* package ‘withr’ ...
* DONE (withr)
* installing *binary* package ‘gtable’ ...
* DONE (gtable)
* installing *binary* package ‘isoband’ ...
* DONE (isoband)
* installing *binary* package ‘rbibutils’ ...
* DONE (rbibutils)
* installing *binary* package ‘backports’ ...
* DONE (backports)
* installing *binary* package ‘generics’ ...
* DONE (generics)
* installing *binary* package ‘purrr’ ...
* DONE (purrr)
* installing *binary* package ‘Deriv’ ...
* DONE (Deriv)
* installing *binary* package ‘microbenchmark’ ...
* DONE (microbenchmark)
* installing *binary* package ‘colorspace’ ...
* DONE (colorspace)
* installing *binary* package ‘numDeriv’ ...
* DONE (numDeriv)
* installing *binary* package ‘SparseM’ ...
* DONE (SparseM)
* installing *binary* package ‘MatrixModels’ ...
* DONE (MatrixModels)
* installing *binary* package ‘nloptr’ ...
* DONE (nloptr)
* installing *binary* package ‘Rcpp’ ...
* DONE (Rcpp)
* installing *binary* package ‘farver’ ...
* DONE (farver)
* installing *binary* package ‘labeling’ ...
* DONE (labeling)
* installing *binary* package ‘RColorBrewer’ ...
* DONE (RColorBrewer)
* installing *binary* package ‘viridisLite’ ...
* DONE (viridisLite)
* installing *binary* package ‘carData’ ...
* DONE (carData)
* installing *binary* package ‘abind’ ...
* DONE (abind)
* installing *binary* package ‘Formula’ ...
* DONE (Formula)
* installing *binary* package ‘pillar’ ...
* DONE (pillar)
* installing *binary* package ‘tidyselect’ ...
* DONE (tidyselect)
* installing *binary* package ‘Rdpack’ ...
* DONE (Rdpack)
* installing *binary* package ‘minqa’ ...
* DONE (minqa)
* installing *binary* package ‘RcppEigen’ ...
* DONE (RcppEigen)
* installing *binary* package ‘munsell’ ...
* DONE (munsell)
* installing *binary* package ‘quantreg’ ...
* DONE (quantreg)
* installing *binary* package ‘tibble’ ...
* DONE (tibble)
* installing *binary* package ‘reformulas’ ...
* DONE (reformulas)
* installing *binary* package ‘scales’ ...
* DONE (scales)
* installing *binary* package ‘ggplot2’ ...
* DONE (ggplot2)
* installing *binary* package ‘dplyr’ ...
* DONE (dplyr)
* installing *binary* package ‘lme4’ ...
* DONE (lme4)
* installing *binary* package ‘tidyr’ ...
* DONE (tidyr)
* installing *binary* package ‘cowplot’ ...
* DONE (cowplot)
* installing *binary* package ‘broom’ ...
* DONE (broom)
* installing *binary* package ‘modelr’ ...
* DONE (modelr)
* installing *binary* package ‘doBy’ ...
* DONE (doBy)
* installing *binary* package ‘pbkrtest’ ...
* DONE (pbkrtest)
* installing *binary* package ‘car’ ...
* DONE (car)
The downloaded source packages are in
‘/tmp/RtmpfeObDJ/downloaded_packages’
#install.packages("carData")
library(car)
vif(RunsReg)
OBP SLG BA
4.271126 3.426472 4.433501
RunsAllowedReg = lm(RA ~ OOBP + OSLG, data=moneyball)
summary(RunsAllowedReg)
Call:
lm(formula = RA ~ OOBP + OSLG, data = moneyball)
Residuals:
Min 1Q Median 3Q Max
-82.397 -15.178 -0.129 17.679 60.955
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -837.38 60.26 -13.897 < 2e-16 ***
OOBP 2913.60 291.97 9.979 4.46e-16 ***
OSLG 1514.29 175.43 8.632 2.55e-13 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 25.67 on 87 degrees of freedom
(812 observations deleted due to missingness)
Multiple R-squared: 0.9073, Adjusted R-squared: 0.9052
F-statistic: 425.8 on 2 and 87 DF, p-value: < 2.2e-16
In-class activity 8: Part 2
If a baseball team’s opponents OBP (OOBP) is 0.267 and opponents SLG (OSLG) is 0.392, how many runs do we expect the team to allow?
ExpectedRunsAllowed = -837.38 + 2913.60 * (0.267) + 1514.29 * (0.392)
ExpectedRunsAllowed
[1] 534.1529
The team is expected to allow around 534 runs.
vif(RunsAllowedReg)
OOBP OSLG
2.735629 2.735629