getwd()
[1] "C:/Users/orgac/OneDrive/Documents"
baseball = read.csv("baseball.csv")
str(baseball)
'data.frame': 1232 obs. of 15 variables:
$ Team : chr "ARI" "ATL" "BAL" "BOS" ...
$ League : chr "NL" "NL" "AL" "AL" ...
$ Year : int 2012 2012 2012 2012 2012 2012 2012 2012 2012 2012 ...
$ RS : int 734 700 712 734 613 748 669 667 758 726 ...
$ RA : int 688 600 705 806 759 676 588 845 890 670 ...
$ W : int 81 94 93 69 61 85 97 68 64 88 ...
$ OBP : num 0.328 0.32 0.311 0.315 0.302 0.318 0.315 0.324 0.33 0.335 ...
$ SLG : num 0.418 0.389 0.417 0.415 0.378 0.422 0.411 0.381 0.436 0.422 ...
$ BA : num 0.259 0.247 0.247 0.26 0.24 0.255 0.251 0.251 0.274 0.268 ...
$ Playoffs : int 0 1 1 0 0 0 1 0 0 1 ...
$ RankSeason : int NA 4 5 NA NA NA 2 NA NA 6 ...
$ RankPlayoffs: int NA 5 4 NA NA NA 4 NA NA 2 ...
$ G : int 162 162 162 162 162 162 162 162 162 162 ...
$ OOBP : num 0.317 0.306 0.315 0.331 0.335 0.319 0.305 0.336 0.357 0.314 ...
$ OSLG : num 0.415 0.378 0.403 0.428 0.424 0.405 0.39 0.43 0.47 0.402 ...
moneyball=subset(baseball, Year < 2002)
str(moneyball)
'data.frame': 902 obs. of 15 variables:
$ Team : chr "ANA" "ARI" "ATL" "BAL" ...
$ League : chr "AL" "NL" "NL" "AL" ...
$ Year : int 2001 2001 2001 2001 2001 2001 2001 2001 2001 2001 ...
$ RS : int 691 818 729 687 772 777 798 735 897 923 ...
$ RA : int 730 677 643 829 745 701 795 850 821 906 ...
$ W : int 75 92 88 63 82 88 83 66 91 73 ...
$ OBP : num 0.327 0.341 0.324 0.319 0.334 0.336 0.334 0.324 0.35 0.354 ...
$ SLG : num 0.405 0.442 0.412 0.38 0.439 0.43 0.451 0.419 0.458 0.483 ...
$ BA : num 0.261 0.267 0.26 0.248 0.266 0.261 0.268 0.262 0.278 0.292 ...
$ Playoffs : int 0 1 1 0 0 0 0 0 1 0 ...
$ RankSeason : int NA 5 7 NA NA NA NA NA 6 NA ...
$ RankPlayoffs: int NA 1 3 NA NA NA NA NA 4 NA ...
$ G : int 162 162 162 162 161 162 162 162 162 162 ...
$ OOBP : num 0.331 0.311 0.314 0.337 0.329 0.321 0.334 0.341 0.341 0.35 ...
$ OSLG : num 0.412 0.404 0.384 0.439 0.393 0.398 0.427 0.455 0.417 0.48 ...
moneyball$RD=moneyball$RS-moneyball$RA
str(moneyball)
'data.frame': 902 obs. of 16 variables:
$ Team : chr "ANA" "ARI" "ATL" "BAL" ...
$ League : chr "AL" "NL" "NL" "AL" ...
$ Year : int 2001 2001 2001 2001 2001 2001 2001 2001 2001 2001 ...
$ RS : int 691 818 729 687 772 777 798 735 897 923 ...
$ RA : int 730 677 643 829 745 701 795 850 821 906 ...
$ W : int 75 92 88 63 82 88 83 66 91 73 ...
$ OBP : num 0.327 0.341 0.324 0.319 0.334 0.336 0.334 0.324 0.35 0.354 ...
$ SLG : num 0.405 0.442 0.412 0.38 0.439 0.43 0.451 0.419 0.458 0.483 ...
$ BA : num 0.261 0.267 0.26 0.248 0.266 0.261 0.268 0.262 0.278 0.292 ...
$ Playoffs : int 0 1 1 0 0 0 0 0 1 0 ...
$ RankSeason : int NA 5 7 NA NA NA NA NA 6 NA ...
$ RankPlayoffs: int NA 1 3 NA NA NA NA NA 4 NA ...
$ G : int 162 162 162 162 161 162 162 162 162 162 ...
$ OOBP : num 0.331 0.311 0.314 0.337 0.329 0.321 0.334 0.341 0.341 0.35 ...
$ OSLG : num 0.412 0.404 0.384 0.439 0.393 0.398 0.427 0.455 0.417 0.48 ...
$ RD : int -39 141 86 -142 27 76 3 -115 76 17 ...
plot(moneyball$RD,moneyball$W)
WinsReg=lm(W~RD, data = moneyball)
summary(WinsReg)
Call:
lm(formula = W ~ RD, data = moneyball)
Residuals:
Min 1Q Median 3Q Max
-14.2662 -2.6509 0.1234 2.9364 11.6570
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 80.881375 0.131157 616.67 <2e-16 ***
RD 0.105766 0.001297 81.55 <2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 3.939 on 900 degrees of freedom
Multiple R-squared: 0.8808, Adjusted R-squared: 0.8807
F-statistic: 6651 on 1 and 900 DF, p-value: < 2.2e-16
RunsReg=lm(RS~OBP+SLG+BA, data = moneyball)
summary(RunsReg)
Call:
lm(formula = RS ~ OBP + SLG + BA, data = moneyball)
Residuals:
Min 1Q Median 3Q Max
-70.941 -17.247 -0.621 16.754 90.998
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -788.46 19.70 -40.029 < 2e-16 ***
OBP 2917.42 110.47 26.410 < 2e-16 ***
SLG 1637.93 45.99 35.612 < 2e-16 ***
BA -368.97 130.58 -2.826 0.00482 **
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 24.69 on 898 degrees of freedom
Multiple R-squared: 0.9302, Adjusted R-squared: 0.93
F-statistic: 3989 on 3 and 898 DF, p-value: < 2.2e-16
cor(moneyball$BA, moneyball$OBP)
[1] 0.8540549
install.packages("car")
WARNING: Rtools is required to build R packages but is not currently installed. Please download and install the appropriate version of Rtools before proceeding:
https://cran.rstudio.com/bin/windows/Rtools/
Installing package into ‘C:/Users/orgac/AppData/Local/R/win-library/4.4’
(as ‘lib’ is unspecified)
also installing the dependencies ‘fansi’, ‘pkgconfig’, ‘cpp11’, ‘utf8’, ‘withr’, ‘gtable’, ‘isoband’, ‘rbibutils’, ‘backports’, ‘generics’, ‘purrr’, ‘tibble’, ‘tidyr’, ‘pillar’, ‘tidyselect’, ‘cowplot’, ‘Deriv’, ‘ggplot2’, ‘modelr’, ‘microbenchmark’, ‘Rdpack’, ‘colorspace’, ‘broom’, ‘dplyr’, ‘numDeriv’, ‘doBy’, ‘SparseM’, ‘MatrixModels’, ‘minqa’, ‘nloptr’, ‘reformulas’, ‘Rcpp’, ‘RcppEigen’, ‘farver’, ‘labeling’, ‘munsell’, ‘RColorBrewer’, ‘viridisLite’, ‘carData’, ‘abind’, ‘Formula’, ‘pbkrtest’, ‘quantreg’, ‘lme4’, ‘scales’
There is a binary version available but the source version
is later:
trying URL 'https://cran.rstudio.com/bin/windows/contrib/4.4/fansi_1.0.6.zip'
Content type 'application/zip' length 324453 bytes (316 KB)
downloaded 316 KB
trying URL 'https://cran.rstudio.com/bin/windows/contrib/4.4/pkgconfig_2.0.3.zip'
Content type 'application/zip' length 23193 bytes (22 KB)
downloaded 22 KB
trying URL 'https://cran.rstudio.com/bin/windows/contrib/4.4/cpp11_0.5.2.zip'
Content type 'application/zip' length 310779 bytes (303 KB)
downloaded 303 KB
trying URL 'https://cran.rstudio.com/bin/windows/contrib/4.4/utf8_1.2.4.zip'
Content type 'application/zip' length 151326 bytes (147 KB)
downloaded 147 KB
trying URL 'https://cran.rstudio.com/bin/windows/contrib/4.4/withr_3.0.2.zip'
Content type 'application/zip' length 232619 bytes (227 KB)
downloaded 227 KB
trying URL 'https://cran.rstudio.com/bin/windows/contrib/4.4/gtable_0.3.6.zip'
Content type 'application/zip' length 251029 bytes (245 KB)
downloaded 245 KB
trying URL 'https://cran.rstudio.com/bin/windows/contrib/4.4/isoband_0.2.7.zip'
Content type 'application/zip' length 1929529 bytes (1.8 MB)
downloaded 1.8 MB
trying URL 'https://cran.rstudio.com/bin/windows/contrib/4.4/rbibutils_2.3.zip'
Content type 'application/zip' length 1032568 bytes (1008 KB)
downloaded 1008 KB
trying URL 'https://cran.rstudio.com/bin/windows/contrib/4.4/backports_1.5.0.zip'
Content type 'application/zip' length 122682 bytes (119 KB)
downloaded 119 KB
trying URL 'https://cran.rstudio.com/bin/windows/contrib/4.4/generics_0.1.3.zip'
Content type 'application/zip' length 85897 bytes (83 KB)
downloaded 83 KB
trying URL 'https://cran.rstudio.com/bin/windows/contrib/4.4/purrr_1.0.4.zip'
Content type 'application/zip' length 551187 bytes (538 KB)
downloaded 538 KB
trying URL 'https://cran.rstudio.com/bin/windows/contrib/4.4/tibble_3.2.1.zip'
Content type 'application/zip' length 696681 bytes (680 KB)
downloaded 680 KB
trying URL 'https://cran.rstudio.com/bin/windows/contrib/4.4/tidyr_1.3.1.zip'
Content type 'application/zip' length 1273643 bytes (1.2 MB)
downloaded 1.2 MB
trying URL 'https://cran.rstudio.com/bin/windows/contrib/4.4/pillar_1.10.1.zip'
Content type 'application/zip' length 671775 bytes (656 KB)
downloaded 656 KB
trying URL 'https://cran.rstudio.com/bin/windows/contrib/4.4/tidyselect_1.2.1.zip'
Content type 'application/zip' length 229108 bytes (223 KB)
downloaded 223 KB
trying URL 'https://cran.rstudio.com/bin/windows/contrib/4.4/cowplot_1.1.3.zip'
Content type 'application/zip' length 1381124 bytes (1.3 MB)
downloaded 1.3 MB
trying URL 'https://cran.rstudio.com/bin/windows/contrib/4.4/Deriv_4.1.6.zip'
Content type 'application/zip' length 152060 bytes (148 KB)
downloaded 148 KB
trying URL 'https://cran.rstudio.com/bin/windows/contrib/4.4/ggplot2_3.5.1.zip'
Content type 'application/zip' length 5023039 bytes (4.8 MB)
downloaded 4.8 MB
trying URL 'https://cran.rstudio.com/bin/windows/contrib/4.4/modelr_0.1.11.zip'
Content type 'application/zip' length 204160 bytes (199 KB)
downloaded 199 KB
trying URL 'https://cran.rstudio.com/bin/windows/contrib/4.4/microbenchmark_1.5.0.zip'
Content type 'application/zip' length 73654 bytes (71 KB)
downloaded 71 KB
trying URL 'https://cran.rstudio.com/bin/windows/contrib/4.4/Rdpack_2.6.3.zip'
Content type 'application/zip' length 641276 bytes (626 KB)
downloaded 626 KB
trying URL 'https://cran.rstudio.com/bin/windows/contrib/4.4/colorspace_2.1-1.zip'
Content type 'application/zip' length 2668016 bytes (2.5 MB)
downloaded 2.5 MB
trying URL 'https://cran.rstudio.com/bin/windows/contrib/4.4/dplyr_1.1.4.zip'
Content type 'application/zip' length 1590624 bytes (1.5 MB)
downloaded 1.5 MB
trying URL 'https://cran.rstudio.com/bin/windows/contrib/4.4/numDeriv_2016.8-1.1.zip'
Content type 'application/zip' length 117304 bytes (114 KB)
downloaded 114 KB
trying URL 'https://cran.rstudio.com/bin/windows/contrib/4.4/doBy_4.6.25.zip'
Content type 'application/zip' length 4848423 bytes (4.6 MB)
downloaded 4.6 MB
trying URL 'https://cran.rstudio.com/bin/windows/contrib/4.4/SparseM_1.84-2.zip'
Content type 'application/zip' length 887185 bytes (866 KB)
downloaded 866 KB
trying URL 'https://cran.rstudio.com/bin/windows/contrib/4.4/MatrixModels_0.5-4.zip'
Content type 'application/zip' length 408264 bytes (398 KB)
downloaded 398 KB
trying URL 'https://cran.rstudio.com/bin/windows/contrib/4.4/minqa_1.2.8.zip'
Content type 'application/zip' length 441915 bytes (431 KB)
downloaded 431 KB
trying URL 'https://cran.rstudio.com/bin/windows/contrib/4.4/nloptr_2.2.1.zip'
Content type 'application/zip' length 886339 bytes (865 KB)
downloaded 865 KB
trying URL 'https://cran.rstudio.com/bin/windows/contrib/4.4/reformulas_0.4.0.zip'
Content type 'application/zip' length 94721 bytes (92 KB)
downloaded 92 KB
trying URL 'https://cran.rstudio.com/bin/windows/contrib/4.4/Rcpp_1.0.14.zip'
Content type 'application/zip' length 2901347 bytes (2.8 MB)
downloaded 2.8 MB
trying URL 'https://cran.rstudio.com/bin/windows/contrib/4.4/RcppEigen_0.3.4.0.2.zip'
Content type 'application/zip' length 2592467 bytes (2.5 MB)
downloaded 2.5 MB
trying URL 'https://cran.rstudio.com/bin/windows/contrib/4.4/farver_2.1.2.zip'
Content type 'application/zip' length 1520093 bytes (1.4 MB)
downloaded 1.4 MB
trying URL 'https://cran.rstudio.com/bin/windows/contrib/4.4/labeling_0.4.3.zip'
Content type 'application/zip' length 63169 bytes (61 KB)
downloaded 61 KB
trying URL 'https://cran.rstudio.com/bin/windows/contrib/4.4/munsell_0.5.1.zip'
Content type 'application/zip' length 245505 bytes (239 KB)
downloaded 239 KB
trying URL 'https://cran.rstudio.com/bin/windows/contrib/4.4/RColorBrewer_1.1-3.zip'
Content type 'application/zip' length 54471 bytes (53 KB)
downloaded 53 KB
trying URL 'https://cran.rstudio.com/bin/windows/contrib/4.4/viridisLite_0.4.2.zip'
Content type 'application/zip' length 1300906 bytes (1.2 MB)
downloaded 1.2 MB
trying URL 'https://cran.rstudio.com/bin/windows/contrib/4.4/carData_3.0-5.zip'
Content type 'application/zip' length 1708392 bytes (1.6 MB)
downloaded 1.6 MB
trying URL 'https://cran.rstudio.com/bin/windows/contrib/4.4/abind_1.4-8.zip'
Content type 'application/zip' length 67211 bytes (65 KB)
downloaded 65 KB
trying URL 'https://cran.rstudio.com/bin/windows/contrib/4.4/Formula_1.2-5.zip'
Content type 'application/zip' length 161370 bytes (157 KB)
downloaded 157 KB
trying URL 'https://cran.rstudio.com/bin/windows/contrib/4.4/pbkrtest_0.5.3.zip'
Content type 'application/zip' length 186216 bytes (181 KB)
downloaded 181 KB
trying URL 'https://cran.rstudio.com/bin/windows/contrib/4.4/quantreg_6.1.zip'
Content type 'application/zip' length 1473921 bytes (1.4 MB)
downloaded 1.4 MB
trying URL 'https://cran.rstudio.com/bin/windows/contrib/4.4/lme4_1.1-37.zip'
Content type 'application/zip' length 4566862 bytes (4.4 MB)
downloaded 4.4 MB
trying URL 'https://cran.rstudio.com/bin/windows/contrib/4.4/scales_1.3.0.zip'
Content type 'application/zip' length 721231 bytes (704 KB)
downloaded 704 KB
trying URL 'https://cran.rstudio.com/bin/windows/contrib/4.4/car_3.1-3.zip'
Content type 'application/zip' length 1542543 bytes (1.5 MB)
downloaded 1.5 MB
package ‘fansi’ successfully unpacked and MD5 sums checked
package ‘pkgconfig’ successfully unpacked and MD5 sums checked
package ‘cpp11’ successfully unpacked and MD5 sums checked
package ‘utf8’ successfully unpacked and MD5 sums checked
package ‘withr’ successfully unpacked and MD5 sums checked
package ‘gtable’ successfully unpacked and MD5 sums checked
package ‘isoband’ successfully unpacked and MD5 sums checked
package ‘rbibutils’ successfully unpacked and MD5 sums checked
package ‘backports’ successfully unpacked and MD5 sums checked
package ‘generics’ successfully unpacked and MD5 sums checked
package ‘purrr’ successfully unpacked and MD5 sums checked
package ‘tibble’ successfully unpacked and MD5 sums checked
package ‘tidyr’ successfully unpacked and MD5 sums checked
package ‘pillar’ successfully unpacked and MD5 sums checked
package ‘tidyselect’ successfully unpacked and MD5 sums checked
package ‘cowplot’ successfully unpacked and MD5 sums checked
package ‘Deriv’ successfully unpacked and MD5 sums checked
package ‘ggplot2’ successfully unpacked and MD5 sums checked
package ‘modelr’ successfully unpacked and MD5 sums checked
package ‘microbenchmark’ successfully unpacked and MD5 sums checked
package ‘Rdpack’ successfully unpacked and MD5 sums checked
package ‘colorspace’ successfully unpacked and MD5 sums checked
package ‘dplyr’ successfully unpacked and MD5 sums checked
package ‘numDeriv’ successfully unpacked and MD5 sums checked
package ‘doBy’ successfully unpacked and MD5 sums checked
package ‘SparseM’ successfully unpacked and MD5 sums checked
package ‘MatrixModels’ successfully unpacked and MD5 sums checked
package ‘minqa’ successfully unpacked and MD5 sums checked
package ‘nloptr’ successfully unpacked and MD5 sums checked
package ‘reformulas’ successfully unpacked and MD5 sums checked
package ‘Rcpp’ successfully unpacked and MD5 sums checked
package ‘RcppEigen’ successfully unpacked and MD5 sums checked
package ‘farver’ successfully unpacked and MD5 sums checked
package ‘labeling’ successfully unpacked and MD5 sums checked
package ‘munsell’ successfully unpacked and MD5 sums checked
package ‘RColorBrewer’ successfully unpacked and MD5 sums checked
package ‘viridisLite’ successfully unpacked and MD5 sums checked
package ‘carData’ successfully unpacked and MD5 sums checked
package ‘abind’ successfully unpacked and MD5 sums checked
package ‘Formula’ successfully unpacked and MD5 sums checked
package ‘pbkrtest’ successfully unpacked and MD5 sums checked
package ‘quantreg’ successfully unpacked and MD5 sums checked
package ‘lme4’ successfully unpacked and MD5 sums checked
package ‘scales’ successfully unpacked and MD5 sums checked
package ‘car’ successfully unpacked and MD5 sums checked
The downloaded binary packages are in
C:\Users\orgac\AppData\Local\Temp\RtmpYPHcA0\downloaded_packages
installing the source package ‘broom’
trying URL 'https://cran.rstudio.com/src/contrib/broom_1.0.8.tar.gz'
Content type 'application/x-gzip' length 651236 bytes (635 KB)
downloaded 635 KB
* installing *source* package 'broom' ...
** package 'broom' successfully unpacked and MD5 sums checked
** using staged installation
** R
** inst
** byte-compile and prepare package for lazy loading
** help
*** installing help indices
*** copying figures
** building package indices
** installing vignettes
** testing if installed package can be loaded from temporary location
** testing if installed package can be loaded from final location
** testing if installed package keeps a record of temporary installation path
* DONE (broom)
The downloaded source packages are in
‘C:\Users\orgac\AppData\Local\Temp\RtmpYPHcA0\downloaded_packages’
RunsAllowedReg = lm(RA ~ OOBP + OSLG, data=moneyball)
summary(RunsAllowedReg)
Call:
lm(formula = RA ~ OOBP + OSLG, data = moneyball)
Residuals:
Min 1Q Median 3Q Max
-82.397 -15.178 -0.129 17.679 60.955
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -837.38 60.26 -13.897 < 2e-16 ***
OOBP 2913.60 291.97 9.979 4.46e-16 ***
OSLG 1514.29 175.43 8.632 2.55e-13 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 25.67 on 87 degrees of freedom
(812 observations deleted due to missingness)
Multiple R-squared: 0.9073, Adjusted R-squared: 0.9052
F-statistic: 425.8 on 2 and 87 DF, p-value: < 2.2e-16
In-class activity 7
NumberofWins=80.88+0.106*(763-614)
NumberofWins
[1] 96.674
#A team with a runs difference of 149 is expected to win around 97 games.