firstbase = read.csv("firstbasestats.csv")
str(firstbase)
summary(firstbase)
# Linear Regression (one variable)
model1 = lm(Payroll.Salary2023 ~ RBI, data=firstbase)
summary(model1)
#payroll.salary is our dependent variable(target, response variable)
model1$residuals
SSE = sum(model1$residuals^2)
SSE
[1] 8.914926e+14
SSE = sum(model2$residuals^2)
SSE
# Linear Regression (all variables)
model3 = lm(Payroll.Salary2023 ~ HR + RBI + AVG + OBP+ OPS, data=firstbase)
summary(model3)

Call:
lm(formula = Payroll.Salary2023 ~ HR + RBI + AVG + OBP + OPS, 
    data = firstbase)

Residuals:
     Min       1Q   Median       3Q      Max 
-9611440 -3338119    64016  4472451  9490309 

Coefficients:
             Estimate Std. Error t value Pr(>|t|)
(Intercept) -31107858   11738494  -2.650   0.0168
HR            -341069     552069  -0.618   0.5449
RBI            115786     113932   1.016   0.3237
AVG         -63824769  104544645  -0.611   0.5496
OBP          27054948  131210166   0.206   0.8391
OPS          60181012   95415131   0.631   0.5366
             
(Intercept) *
HR           
RBI          
AVG          
OBP          
OPS          
---
Signif. codes:  
0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 6023000 on 17 degrees of freedom
Multiple R-squared:  0.5811,    Adjusted R-squared:  0.4579 
F-statistic: 4.717 on 5 and 17 DF,  p-value: 0.006951
SSE = sum(model3$residuals^2)
SSE
[1] 6.167793e+14
model4 = lm(Payroll.Salary2023 ~ RBI + AVG + OBP+OPS, data=firstbase)
summary(model4)
firstbase<-firstbase[,-(1:3)]
cor(firstbase$RBI, firstbase$Payroll.Salary2023)
cor(firstbase$AVG, firstbase$OBP)
[1] 0.8028894
cor(firstbase)
model5 = lm(Payroll.Salary2023 ~ RBI + OBP+OPS, data=firstbase)
summary(model5)
model6 = lm(Payroll.Salary2023 ~ RBI + OBP, data=firstbase)
summary(model6)

Call:
lm(formula = Payroll.Salary2023 ~ RBI + OBP, data = firstbase)

Residuals:
     Min       1Q   Median       3Q      Max 
-9045497 -3487008   139497  4084739  9190185 

Coefficients:
             Estimate Std. Error t value Pr(>|t|)
(Intercept) -28984802    9632560  -3.009  0.00693
RBI             84278      44634   1.888  0.07360
OBP          95468873   33385182   2.860  0.00969
              
(Intercept) **
RBI         . 
OBP         **
---
Signif. codes:  
0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 5625000 on 20 degrees of freedom
Multiple R-squared:  0.5703,    Adjusted R-squared:  0.5273 
F-statistic: 13.27 on 2 and 20 DF,  p-value: 0.0002149
firstbaseTest = read.csv("firstbasestats_test.csv")
str(firstbaseTest)
'data.frame':   2 obs. of  15 variables:
 $ Player            : chr  "Matt Olson" "Josh Bell"
 $ Pos               : chr  "1B" "1B"
 $ Team              : chr  "ATL" "SD"
 $ GP                : int  162 156
 $ AB                : int  616 552
 $ H                 : int  148 147
 $ X2B               : int  44 29
 $ HR                : int  34 17
 $ RBI               : int  103 71
 $ AVG               : num  0.24 0.266
 $ OBP               : num  0.325 0.362
 $ SLG               : num  0.477 0.422
 $ OPS               : num  0.802 0.784
 $ WAR               : num  3.29 3.5
 $ Payroll.Salary2023: num  21000000 16500000
# Make test set predictions
predictTest = predict(model6, newdata=firstbaseTest)
predictTest
       1        2 
10723186 11558647 
LS0tCnRpdGxlOiAiUiBOb3RlYm9vayIKb3V0cHV0OiBodG1sX25vdGVib29rCi0tLQoKYGBge3J9CmZpcnN0YmFzZSA9IHJlYWQuY3N2KCJmaXJzdGJhc2VzdGF0cy5jc3YiKQpzdHIoZmlyc3RiYXNlKQpgYGAKCmBgYHtyfQpzdW1tYXJ5KGZpcnN0YmFzZSkKYGBgCmBgYHtyfQojIExpbmVhciBSZWdyZXNzaW9uIChvbmUgdmFyaWFibGUpCm1vZGVsMSA9IGxtKFBheXJvbGwuU2FsYXJ5MjAyMyB+IFJCSSwgZGF0YT1maXJzdGJhc2UpCnN1bW1hcnkobW9kZWwxKQojcGF5cm9sbC5zYWxhcnkgaXMgb3VyIGRlcGVuZGVudCB2YXJpYWJsZSh0YXJnZXQsIHJlc3BvbnNlIHZhcmlhYmxlKQpgYGAKCgpgYGB7cn0KbW9kZWwxJHJlc2lkdWFscwpgYGAKCmBgYHtyfQpTU0UgPSBzdW0obW9kZWwxJHJlc2lkdWFsc14yKQpTU0UKYGBgCmBgYHtyfQpTU0UgPSBzdW0obW9kZWwyJHJlc2lkdWFsc14yKQpTU0UKYGBgCgpgYGB7cn0KIyBMaW5lYXIgUmVncmVzc2lvbiAoYWxsIHZhcmlhYmxlcykKbW9kZWwzID0gbG0oUGF5cm9sbC5TYWxhcnkyMDIzIH4gSFIgKyBSQkkgKyBBVkcgKyBPQlArIE9QUywgZGF0YT1maXJzdGJhc2UpCnN1bW1hcnkobW9kZWwzKQpgYGAKCmBgYHtyfQpTU0UgPSBzdW0obW9kZWwzJHJlc2lkdWFsc14yKQpTU0UKYGBgCgpgYGB7cn0KbW9kZWw0ID0gbG0oUGF5cm9sbC5TYWxhcnkyMDIzIH4gUkJJICsgQVZHICsgT0JQK09QUywgZGF0YT1maXJzdGJhc2UpCnN1bW1hcnkobW9kZWw0KQpgYGAKCmBgYHtyfQpmaXJzdGJhc2U8LWZpcnN0YmFzZVssLSgxOjMpXQpgYGAKCmBgYHtyfQpjb3IoZmlyc3RiYXNlJFJCSSwgZmlyc3RiYXNlJFBheXJvbGwuU2FsYXJ5MjAyMykKYGBgCgpgYGB7cn0KY29yKGZpcnN0YmFzZSRBVkcsIGZpcnN0YmFzZSRPQlApCmBgYAoKYGBge3J9CmNvcihmaXJzdGJhc2UpCmBgYAoKYGBge3J9Cm1vZGVsNSA9IGxtKFBheXJvbGwuU2FsYXJ5MjAyMyB+IFJCSSArIE9CUCtPUFMsIGRhdGE9Zmlyc3RiYXNlKQpzdW1tYXJ5KG1vZGVsNSkKYGBgCgpgYGB7cn0KbW9kZWw2ID0gbG0oUGF5cm9sbC5TYWxhcnkyMDIzIH4gUkJJICsgT0JQLCBkYXRhPWZpcnN0YmFzZSkKc3VtbWFyeShtb2RlbDYpCmBgYAoKYGBge3J9CmZpcnN0YmFzZVRlc3QgPSByZWFkLmNzdigiZmlyc3RiYXNlc3RhdHNfdGVzdC5jc3YiKQpzdHIoZmlyc3RiYXNlVGVzdCkKYGBgCgoKYGBge3J9CiMgTWFrZSB0ZXN0IHNldCBwcmVkaWN0aW9ucwpwcmVkaWN0VGVzdCA9IHByZWRpY3QobW9kZWw2LCBuZXdkYXRhPWZpcnN0YmFzZVRlc3QpCnByZWRpY3RUZXN0CmBgYAoKCg==