firstbase = read.csv("firstbasestats.csv")
str(firstbase)
summary(firstbase)
# Linear Regression (one variable)
model1 = lm(Payroll.Salary2023 ~ RBI, data=firstbase)
summary(model1)
#payroll.salary is our dependent variable(target, response variable)
model1$residuals
SSE = sum(model1$residuals^2)
SSE
[1] 8.914926e+14
SSE = sum(model2$residuals^2)
SSE
# Linear Regression (all variables)
model3 = lm(Payroll.Salary2023 ~ HR + RBI + AVG + OBP+ OPS, data=firstbase)
summary(model3)
Call:
lm(formula = Payroll.Salary2023 ~ HR + RBI + AVG + OBP + OPS,
data = firstbase)
Residuals:
Min 1Q Median 3Q Max
-9611440 -3338119 64016 4472451 9490309
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -31107858 11738494 -2.650 0.0168
HR -341069 552069 -0.618 0.5449
RBI 115786 113932 1.016 0.3237
AVG -63824769 104544645 -0.611 0.5496
OBP 27054948 131210166 0.206 0.8391
OPS 60181012 95415131 0.631 0.5366
(Intercept) *
HR
RBI
AVG
OBP
OPS
---
Signif. codes:
0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 6023000 on 17 degrees of freedom
Multiple R-squared: 0.5811, Adjusted R-squared: 0.4579
F-statistic: 4.717 on 5 and 17 DF, p-value: 0.006951
SSE = sum(model3$residuals^2)
SSE
[1] 6.167793e+14
model4 = lm(Payroll.Salary2023 ~ RBI + AVG + OBP+OPS, data=firstbase)
summary(model4)
firstbase<-firstbase[,-(1:3)]
cor(firstbase$RBI, firstbase$Payroll.Salary2023)
cor(firstbase$AVG, firstbase$OBP)
[1] 0.8028894
cor(firstbase)
model5 = lm(Payroll.Salary2023 ~ RBI + OBP+OPS, data=firstbase)
summary(model5)
model6 = lm(Payroll.Salary2023 ~ RBI + OBP, data=firstbase)
summary(model6)
Call:
lm(formula = Payroll.Salary2023 ~ RBI + OBP, data = firstbase)
Residuals:
Min 1Q Median 3Q Max
-9045497 -3487008 139497 4084739 9190185
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -28984802 9632560 -3.009 0.00693
RBI 84278 44634 1.888 0.07360
OBP 95468873 33385182 2.860 0.00969
(Intercept) **
RBI .
OBP **
---
Signif. codes:
0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 5625000 on 20 degrees of freedom
Multiple R-squared: 0.5703, Adjusted R-squared: 0.5273
F-statistic: 13.27 on 2 and 20 DF, p-value: 0.0002149
firstbaseTest = read.csv("firstbasestats_test.csv")
str(firstbaseTest)
'data.frame': 2 obs. of 15 variables:
$ Player : chr "Matt Olson" "Josh Bell"
$ Pos : chr "1B" "1B"
$ Team : chr "ATL" "SD"
$ GP : int 162 156
$ AB : int 616 552
$ H : int 148 147
$ X2B : int 44 29
$ HR : int 34 17
$ RBI : int 103 71
$ AVG : num 0.24 0.266
$ OBP : num 0.325 0.362
$ SLG : num 0.477 0.422
$ OPS : num 0.802 0.784
$ WAR : num 3.29 3.5
$ Payroll.Salary2023: num 21000000 16500000
# Make test set predictions
predictTest = predict(model6, newdata=firstbaseTest)
predictTest
1 2
10723186 11558647
LS0tCnRpdGxlOiAiUiBOb3RlYm9vayIKb3V0cHV0OiBodG1sX25vdGVib29rCi0tLQoKYGBge3J9CmZpcnN0YmFzZSA9IHJlYWQuY3N2KCJmaXJzdGJhc2VzdGF0cy5jc3YiKQpzdHIoZmlyc3RiYXNlKQpgYGAKCmBgYHtyfQpzdW1tYXJ5KGZpcnN0YmFzZSkKYGBgCmBgYHtyfQojIExpbmVhciBSZWdyZXNzaW9uIChvbmUgdmFyaWFibGUpCm1vZGVsMSA9IGxtKFBheXJvbGwuU2FsYXJ5MjAyMyB+IFJCSSwgZGF0YT1maXJzdGJhc2UpCnN1bW1hcnkobW9kZWwxKQojcGF5cm9sbC5zYWxhcnkgaXMgb3VyIGRlcGVuZGVudCB2YXJpYWJsZSh0YXJnZXQsIHJlc3BvbnNlIHZhcmlhYmxlKQpgYGAKCgpgYGB7cn0KbW9kZWwxJHJlc2lkdWFscwpgYGAKCmBgYHtyfQpTU0UgPSBzdW0obW9kZWwxJHJlc2lkdWFsc14yKQpTU0UKYGBgCmBgYHtyfQpTU0UgPSBzdW0obW9kZWwyJHJlc2lkdWFsc14yKQpTU0UKYGBgCgpgYGB7cn0KIyBMaW5lYXIgUmVncmVzc2lvbiAoYWxsIHZhcmlhYmxlcykKbW9kZWwzID0gbG0oUGF5cm9sbC5TYWxhcnkyMDIzIH4gSFIgKyBSQkkgKyBBVkcgKyBPQlArIE9QUywgZGF0YT1maXJzdGJhc2UpCnN1bW1hcnkobW9kZWwzKQpgYGAKCmBgYHtyfQpTU0UgPSBzdW0obW9kZWwzJHJlc2lkdWFsc14yKQpTU0UKYGBgCgpgYGB7cn0KbW9kZWw0ID0gbG0oUGF5cm9sbC5TYWxhcnkyMDIzIH4gUkJJICsgQVZHICsgT0JQK09QUywgZGF0YT1maXJzdGJhc2UpCnN1bW1hcnkobW9kZWw0KQpgYGAKCmBgYHtyfQpmaXJzdGJhc2U8LWZpcnN0YmFzZVssLSgxOjMpXQpgYGAKCmBgYHtyfQpjb3IoZmlyc3RiYXNlJFJCSSwgZmlyc3RiYXNlJFBheXJvbGwuU2FsYXJ5MjAyMykKYGBgCgpgYGB7cn0KY29yKGZpcnN0YmFzZSRBVkcsIGZpcnN0YmFzZSRPQlApCmBgYAoKYGBge3J9CmNvcihmaXJzdGJhc2UpCmBgYAoKYGBge3J9Cm1vZGVsNSA9IGxtKFBheXJvbGwuU2FsYXJ5MjAyMyB+IFJCSSArIE9CUCtPUFMsIGRhdGE9Zmlyc3RiYXNlKQpzdW1tYXJ5KG1vZGVsNSkKYGBgCgpgYGB7cn0KbW9kZWw2ID0gbG0oUGF5cm9sbC5TYWxhcnkyMDIzIH4gUkJJICsgT0JQLCBkYXRhPWZpcnN0YmFzZSkKc3VtbWFyeShtb2RlbDYpCmBgYAoKYGBge3J9CmZpcnN0YmFzZVRlc3QgPSByZWFkLmNzdigiZmlyc3RiYXNlc3RhdHNfdGVzdC5jc3YiKQpzdHIoZmlyc3RiYXNlVGVzdCkKYGBgCgoKYGBge3J9CiMgTWFrZSB0ZXN0IHNldCBwcmVkaWN0aW9ucwpwcmVkaWN0VGVzdCA9IHByZWRpY3QobW9kZWw2LCBuZXdkYXRhPWZpcnN0YmFzZVRlc3QpCnByZWRpY3RUZXN0CmBgYAoKCg==