firstbase = read.csv("firstbasestats.csv")
str(firstbase)
'data.frame': 23 obs. of 15 variables:
$ Player : chr "Freddie Freeman" "Jose Abreu" "Nate Lowe" "Paul Goldschmidt" ...
$ Pos : chr "1B" "1B" "1B" "1B" ...
$ Team : chr "LAD" "CHW" "TEX" "STL" ...
$ GP : int 159 157 157 151 160 140 160 145 146 143 ...
$ AB : int 612 601 593 561 638 551 583 555 545 519 ...
$ H : int 199 183 179 178 175 152 141 139 132 124 ...
$ X2B : int 47 40 26 41 35 27 25 28 40 23 ...
$ HR : int 21 15 27 35 32 20 36 22 8 18 ...
$ RBI : int 100 75 76 115 97 84 94 85 53 63 ...
$ AVG : num 0.325 0.305 0.302 0.317 0.274 0.276 0.242 0.251 0.242 0.239 ...
$ OBP : num 0.407 0.379 0.358 0.404 0.339 0.34 0.327 0.305 0.288 0.319 ...
$ SLG : num 0.511 0.446 0.492 0.578 0.48 0.437 0.477 0.423 0.36 0.391 ...
$ OPS : num 0.918 0.824 0.851 0.981 0.818 0.777 0.804 0.729 0.647 0.71 ...
$ WAR : num 5.77 4.19 3.21 7.86 3.85 3.07 5.05 1.32 -0.33 1.87 ...
$ Payroll.Salary2023: num 27000000 19500000 4050000 26000000 14500000 ...
summary(firstbase)
Player Pos Team GP AB H X2B HR
Length:23 Length:23 Length:23 Min. : 5.0 Min. : 14.0 Min. : 3.0 Min. : 1.00 Min. : 0.00
Class :character Class :character Class :character 1st Qu.:105.5 1st Qu.:309.0 1st Qu.: 74.5 1st Qu.:13.50 1st Qu.: 8.00
Mode :character Mode :character Mode :character Median :131.0 Median :465.0 Median :115.0 Median :23.00 Median :18.00
Mean :120.2 Mean :426.9 Mean :110.0 Mean :22.39 Mean :17.09
3rd Qu.:152.0 3rd Qu.:558.0 3rd Qu.:146.5 3rd Qu.:28.00 3rd Qu.:24.50
Max. :160.0 Max. :638.0 Max. :199.0 Max. :47.00 Max. :36.00
RBI AVG OBP SLG OPS WAR Payroll.Salary2023
Min. : 1.00 Min. :0.2020 Min. :0.2140 Min. :0.2860 Min. :0.5000 Min. :-1.470 Min. : 720000
1st Qu.: 27.00 1st Qu.:0.2180 1st Qu.:0.3030 1st Qu.:0.3505 1st Qu.:0.6445 1st Qu.: 0.190 1st Qu.: 739200
Median : 63.00 Median :0.2420 Median :0.3210 Median :0.4230 Median :0.7290 Median : 1.310 Median : 4050000
Mean : 59.43 Mean :0.2499 Mean :0.3242 Mean :0.4106 Mean :0.7346 Mean : 1.788 Mean : 6972743
3rd Qu.: 84.50 3rd Qu.:0.2750 3rd Qu.:0.3395 3rd Qu.:0.4690 3rd Qu.:0.8175 3rd Qu.: 3.140 3rd Qu.: 8150000
Max. :115.00 Max. :0.3250 Max. :0.4070 Max. :0.5780 Max. :0.9810 Max. : 7.860 Max. :27000000
model1 = lm(Payroll.Salary2023 ~ RBI, data=firstbase)
summary(model1)
Call:
lm(formula = Payroll.Salary2023 ~ RBI, data = firstbase)
Residuals:
Min 1Q Median 3Q Max
-10250331 -5220790 -843455 2386848 13654950
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -2363744 2866320 -0.825 0.41883
RBI 157088 42465 3.699 0.00133 **
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 6516000 on 21 degrees of freedom
Multiple R-squared: 0.3945, Adjusted R-squared: 0.3657
F-statistic: 13.68 on 1 and 21 DF, p-value: 0.001331
model1$residuals
1 2 3 4 5 6 7 8 9 10 11 12
13654950.2 10082148.6 -5524939.3 10298631.2 1626214.0 -6731642.8 -5902522.2 -10250330.7 -4711916.8 -532796.1 -6667082.5 -6696203.1
13 14 15 16 17 18 19 20 21 22 23
7582148.6 -4916640.9 -1898125.3 -336532.3 -995042.5 -1311618.3 -843454.5 8050721.3 1250336.9 1847040.4 2926656.0
SSE = sum(model1$residuals^2)
SSE
[1] 8.914926e+14
model2 = lm(Payroll.Salary2023 ~ AVG + RBI, data=firstbase)
summary(model2)
Call:
lm(formula = Payroll.Salary2023 ~ AVG + RBI, data = firstbase)
Residuals:
Min 1Q Median 3Q Max
-9097952 -4621582 -33233 3016541 10260245
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -18083756 9479037 -1.908 0.0709 .
AVG 74374031 42934155 1.732 0.0986 .
RBI 108850 49212 2.212 0.0388 *
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 6226000 on 20 degrees of freedom
Multiple R-squared: 0.4735, Adjusted R-squared: 0.4209
F-statistic: 8.994 on 2 and 20 DF, p-value: 0.001636
SSE = sum(model2$residuals^2)
SSE
[1] 7.751841e+14
model3 = lm(Payroll.Salary2023 ~ HR + RBI + AVG + OBP+ OPS, data=firstbase)
summary(model3)
Call:
lm(formula = Payroll.Salary2023 ~ HR + RBI + AVG + OBP + OPS,
data = firstbase)
Residuals:
Min 1Q Median 3Q Max
-9611440 -3338119 64016 4472451 9490309
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -31107859 11738494 -2.650 0.0168 *
HR -341069 552069 -0.618 0.5449
RBI 115786 113932 1.016 0.3237
AVG -63824769 104544645 -0.611 0.5496
OBP 27054948 131210166 0.206 0.8391
OPS 60181012 95415131 0.631 0.5366
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 6023000 on 17 degrees of freedom
Multiple R-squared: 0.5811, Adjusted R-squared: 0.4579
F-statistic: 4.717 on 5 and 17 DF, p-value: 0.006951
SSE = sum(model3$residuals^2)
SSE
[1] 6.167793e+14
model4 = lm(Payroll.Salary2023 ~ RBI + AVG + OBP+OPS, data=firstbase)
summary(model4)
Call:
lm(formula = Payroll.Salary2023 ~ RBI + AVG + OBP + OPS, data = firstbase)
Residuals:
Min 1Q Median 3Q Max
-9399551 -3573842 98921 3979339 9263512
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -29466887 11235931 -2.623 0.0173 *
RBI 71495 87015 0.822 0.4220
AVG -11035457 59192453 -0.186 0.8542
OBP 86360720 87899074 0.982 0.3389
OPS 9464546 47788458 0.198 0.8452
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 5919000 on 18 degrees of freedom
Multiple R-squared: 0.5717, Adjusted R-squared: 0.4765
F-statistic: 6.007 on 4 and 18 DF, p-value: 0.00298
firstbase<-firstbase[,-(1:3)]
cor(firstbase$RBI, firstbase$Payroll.Salary2023)
[1] 0.6281239
cor(firstbase$AVG, firstbase$OBP)
[1] 0.8028894
model5 = lm(Payroll.Salary2023 ~ RBI + OBP+OPS, data=firstbase)
summary(model5)
Call:
lm(formula = Payroll.Salary2023 ~ RBI + OBP + OPS, data = firstbase)
Residuals:
Min 1Q Median 3Q Max
-9465449 -3411234 259746 4102864 8876798
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -29737007 10855411 -2.739 0.013 *
RBI 72393 84646 0.855 0.403
OBP 82751360 83534224 0.991 0.334
OPS 7598051 45525575 0.167 0.869
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 5767000 on 19 degrees of freedom
Multiple R-squared: 0.5709, Adjusted R-squared: 0.5031
F-statistic: 8.426 on 3 and 19 DF, p-value: 0.000913
model6 = lm(Payroll.Salary2023 ~ RBI + OBP, data=firstbase)
summary(model6)
Call:
lm(formula = Payroll.Salary2023 ~ RBI + OBP, data = firstbase)
Residuals:
Min 1Q Median 3Q Max
-9045497 -3487008 139497 4084739 9190185
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -28984802 9632560 -3.009 0.00693 **
RBI 84278 44634 1.888 0.07360 .
OBP 95468873 33385182 2.860 0.00969 **
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 5625000 on 20 degrees of freedom
Multiple R-squared: 0.5703, Adjusted R-squared: 0.5273
F-statistic: 13.27 on 2 and 20 DF, p-value: 0.0002149
firstbaseTest = read.csv("firstbasestats_test.csv")
str(firstbaseTest)
'data.frame': 2 obs. of 15 variables:
$ Player : chr "Matt Olson" "Josh Bell"
$ Pos : chr "1B" "1B"
$ Team : chr "ATL" "SD"
$ GP : int 162 156
$ AB : int 616 552
$ H : int 148 147
$ X2B : int 44 29
$ HR : int 34 17
$ RBI : int 103 71
$ AVG : num 0.24 0.266
$ OBP : num 0.325 0.362
$ SLG : num 0.477 0.422
$ OPS : num 0.802 0.784
$ WAR : num 3.29 3.5
$ Payroll.Salary2023: num 21000000 16500000
predictTest = predict(model6, newdata=firstbaseTest)
predictTest
1 2
10723186 11558647
SSE = sum((firstbaseTest$Payroll.Salary2023 - predictTest)^2)
SST = sum((firstbaseTest$Payroll.Salary2023 - mean(firstbase$Payroll.Salary2023))^2)
1 - SSE/SST
[1] 0.5477734
LS0tDQp0aXRsZTogIkluLUNsYXNzIEFjdGl2aXR5IDYiDQpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sNCi0tLQ0KDQoNCmBgYHtyfQ0KZmlyc3RiYXNlID0gcmVhZC5jc3YoImZpcnN0YmFzZXN0YXRzLmNzdiIpDQpzdHIoZmlyc3RiYXNlKQ0KYGBgDQoNCg0KYGBge3J9DQpzdW1tYXJ5KGZpcnN0YmFzZSkNCmBgYA0KDQoNCmBgYHtyfQ0KbW9kZWwxID0gbG0oUGF5cm9sbC5TYWxhcnkyMDIzIH4gUkJJLCBkYXRhPWZpcnN0YmFzZSkNCnN1bW1hcnkobW9kZWwxKQ0KYGBgDQoNCg0KYGBge3J9DQptb2RlbDEkcmVzaWR1YWxzDQpgYGANCmBgYHtyfQ0KU1NFID0gc3VtKG1vZGVsMSRyZXNpZHVhbHNeMikNClNTRQ0KYGBgDQoNCmBgYHtyfQ0KbW9kZWwyID0gbG0oUGF5cm9sbC5TYWxhcnkyMDIzIH4gQVZHICsgUkJJLCBkYXRhPWZpcnN0YmFzZSkNCnN1bW1hcnkobW9kZWwyKQ0KYGBgDQpgYGB7cn0NClNTRSA9IHN1bShtb2RlbDIkcmVzaWR1YWxzXjIpDQpTU0UNCmBgYA0KDQpgYGB7cn0NCm1vZGVsMyA9IGxtKFBheXJvbGwuU2FsYXJ5MjAyMyB+IEhSICsgUkJJICsgQVZHICsgT0JQKyBPUFMsIGRhdGE9Zmlyc3RiYXNlKQ0Kc3VtbWFyeShtb2RlbDMpDQpgYGANCg0KYGBge3J9DQpTU0UgPSBzdW0obW9kZWwzJHJlc2lkdWFsc14yKQ0KU1NFDQpgYGANCg0KYGBge3J9DQptb2RlbDQgPSBsbShQYXlyb2xsLlNhbGFyeTIwMjMgfiBSQkkgKyBBVkcgKyBPQlArT1BTLCBkYXRhPWZpcnN0YmFzZSkNCnN1bW1hcnkobW9kZWw0KQ0KYGBgDQpgYGB7cn0NCmZpcnN0YmFzZTwtZmlyc3RiYXNlWywtKDE6MyldDQpgYGANCg0KDQpgYGB7cn0NCmNvcihmaXJzdGJhc2UkUkJJLCBmaXJzdGJhc2UkUGF5cm9sbC5TYWxhcnkyMDIzKQ0KYGBgDQoNCg0KYGBge3J9DQpjb3IoZmlyc3RiYXNlJEFWRywgZmlyc3RiYXNlJE9CUCkNCmBgYA0KDQpgYGB7cn0NCm1vZGVsNSA9IGxtKFBheXJvbGwuU2FsYXJ5MjAyMyB+IFJCSSArIE9CUCtPUFMsIGRhdGE9Zmlyc3RiYXNlKQ0Kc3VtbWFyeShtb2RlbDUpDQpgYGANCg0KDQpgYGB7cn0NCm1vZGVsNiA9IGxtKFBheXJvbGwuU2FsYXJ5MjAyMyB+IFJCSSArIE9CUCwgZGF0YT1maXJzdGJhc2UpDQpzdW1tYXJ5KG1vZGVsNikNCmBgYA0KDQoNCg0KYGBge3J9DQpmaXJzdGJhc2VUZXN0ID0gcmVhZC5jc3YoImZpcnN0YmFzZXN0YXRzX3Rlc3QuY3N2IikNCnN0cihmaXJzdGJhc2VUZXN0KQ0KYGBgDQoNCmBgYHtyfQ0KcHJlZGljdFRlc3QgPSBwcmVkaWN0KG1vZGVsNiwgbmV3ZGF0YT1maXJzdGJhc2VUZXN0KQ0KcHJlZGljdFRlc3QNCmBgYA0KDQoNCmBgYHtyfQ0KU1NFID0gc3VtKChmaXJzdGJhc2VUZXN0JFBheXJvbGwuU2FsYXJ5MjAyMyAtIHByZWRpY3RUZXN0KV4yKQ0KU1NUID0gc3VtKChmaXJzdGJhc2VUZXN0JFBheXJvbGwuU2FsYXJ5MjAyMyAtIG1lYW4oZmlyc3RiYXNlJFBheXJvbGwuU2FsYXJ5MjAyMykpXjIpDQoxIC0gU1NFL1NTVA0KYGBgDQoNCg0KDQoNCg0KDQoNCg==