firstbase = read.csv("firstbasestats.csv")
str(firstbase)
'data.frame':   23 obs. of  15 variables:
 $ Player            : chr  "Freddie Freeman" "Jose Abreu" "Nate Lowe" "Paul Goldschmidt" ...
 $ Pos               : chr  "1B" "1B" "1B" "1B" ...
 $ Team              : chr  "LAD" "CHW" "TEX" "STL" ...
 $ GP                : int  159 157 157 151 160 140 160 145 146 143 ...
 $ AB                : int  612 601 593 561 638 551 583 555 545 519 ...
 $ H                 : int  199 183 179 178 175 152 141 139 132 124 ...
 $ X2B               : int  47 40 26 41 35 27 25 28 40 23 ...
 $ HR                : int  21 15 27 35 32 20 36 22 8 18 ...
 $ RBI               : int  100 75 76 115 97 84 94 85 53 63 ...
 $ AVG               : num  0.325 0.305 0.302 0.317 0.274 0.276 0.242 0.251 0.242 0.239 ...
 $ OBP               : num  0.407 0.379 0.358 0.404 0.339 0.34 0.327 0.305 0.288 0.319 ...
 $ SLG               : num  0.511 0.446 0.492 0.578 0.48 0.437 0.477 0.423 0.36 0.391 ...
 $ OPS               : num  0.918 0.824 0.851 0.981 0.818 0.777 0.804 0.729 0.647 0.71 ...
 $ WAR               : num  5.77 4.19 3.21 7.86 3.85 3.07 5.05 1.32 -0.33 1.87 ...
 $ Payroll.Salary2023: num  27000000 19500000 4050000 26000000 14500000 ...
summary(firstbase)
    Player              Pos                Team          
 Length:23          Length:23          Length:23         
 Class :character   Class :character   Class :character  
 Mode  :character   Mode  :character   Mode  :character  
                                                         
                                                         
                                                         
       GP              AB              H              X2B       
 Min.   :  5.0   Min.   : 14.0   Min.   :  3.0   Min.   : 1.00  
 1st Qu.:105.5   1st Qu.:309.0   1st Qu.: 74.5   1st Qu.:13.50  
 Median :131.0   Median :465.0   Median :115.0   Median :23.00  
 Mean   :120.2   Mean   :426.9   Mean   :110.0   Mean   :22.39  
 3rd Qu.:152.0   3rd Qu.:558.0   3rd Qu.:146.5   3rd Qu.:28.00  
 Max.   :160.0   Max.   :638.0   Max.   :199.0   Max.   :47.00  
       HR             RBI              AVG              OBP        
 Min.   : 0.00   Min.   :  1.00   Min.   :0.2020   Min.   :0.2140  
 1st Qu.: 8.00   1st Qu.: 27.00   1st Qu.:0.2180   1st Qu.:0.3030  
 Median :18.00   Median : 63.00   Median :0.2420   Median :0.3210  
 Mean   :17.09   Mean   : 59.43   Mean   :0.2499   Mean   :0.3242  
 3rd Qu.:24.50   3rd Qu.: 84.50   3rd Qu.:0.2750   3rd Qu.:0.3395  
 Max.   :36.00   Max.   :115.00   Max.   :0.3250   Max.   :0.4070  
      SLG              OPS              WAR        
 Min.   :0.2860   Min.   :0.5000   Min.   :-1.470  
 1st Qu.:0.3505   1st Qu.:0.6445   1st Qu.: 0.190  
 Median :0.4230   Median :0.7290   Median : 1.310  
 Mean   :0.4106   Mean   :0.7346   Mean   : 1.788  
 3rd Qu.:0.4690   3rd Qu.:0.8175   3rd Qu.: 3.140  
 Max.   :0.5780   Max.   :0.9810   Max.   : 7.860  
 Payroll.Salary2023
 Min.   :  720000  
 1st Qu.:  739200  
 Median : 4050000  
 Mean   : 6972743  
 3rd Qu.: 8150000  
 Max.   :27000000  
model1 = lm(Payroll.Salary2023 ~ RBI, data=firstbase)
summary(model1)

Call:
lm(formula = Payroll.Salary2023 ~ RBI, data = firstbase)

Residuals:
      Min        1Q    Median        3Q       Max 
-10250331  -5220790   -843455   2386848  13654950 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)   
(Intercept) -2363744    2866320  -0.825  0.41883   
RBI           157088      42465   3.699  0.00133 **
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 6516000 on 21 degrees of freedom
Multiple R-squared:  0.3945,    Adjusted R-squared:  0.3657 
F-statistic: 13.68 on 1 and 21 DF,  p-value: 0.001331
model1$residuals
          1           2           3           4           5 
 13654950.2  10082148.6  -5524939.3  10298631.2   1626214.0 
          6           7           8           9          10 
 -6731642.8  -5902522.2 -10250330.7  -4711916.8   -532796.1 
         11          12          13          14          15 
 -6667082.5  -6696203.1   7582148.6  -4916640.9  -1898125.3 
         16          17          18          19          20 
  -336532.3   -995042.5  -1311618.3   -843454.5   8050721.3 
         21          22          23 
  1250336.9   1847040.4   2926656.0 
SSE = sum(model1$residuals^2)
SSE
[1] 8.914926e+14
model2 = lm(Payroll.Salary2023 ~ AVG + RBI, data=firstbase)
summary(model2)

Call:
lm(formula = Payroll.Salary2023 ~ AVG + RBI, data = firstbase)

Residuals:
     Min       1Q   Median       3Q      Max 
-9097952 -4621582   -33233  3016541 10260245 

Coefficients:
             Estimate Std. Error t value Pr(>|t|)  
(Intercept) -18083756    9479037  -1.908   0.0709 .
AVG          74374031   42934155   1.732   0.0986 .
RBI            108850      49212   2.212   0.0388 *
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 6226000 on 20 degrees of freedom
Multiple R-squared:  0.4735,    Adjusted R-squared:  0.4209 
F-statistic: 8.994 on 2 and 20 DF,  p-value: 0.001636
SSE = sum(model2$residuals^2)
SSE
[1] 7.751841e+14
model3 = lm(Payroll.Salary2023 ~ HR + RBI + AVG + OBP+ OPS, data=firstbase)
summary(model3)

Call:
lm(formula = Payroll.Salary2023 ~ HR + RBI + AVG + OBP + OPS, 
    data = firstbase)

Residuals:
     Min       1Q   Median       3Q      Max 
-9611440 -3338119    64016  4472451  9490309 

Coefficients:
             Estimate Std. Error t value Pr(>|t|)  
(Intercept) -31107859   11738494  -2.650   0.0168 *
HR            -341069     552069  -0.618   0.5449  
RBI            115786     113932   1.016   0.3237  
AVG         -63824769  104544645  -0.611   0.5496  
OBP          27054948  131210166   0.206   0.8391  
OPS          60181012   95415131   0.631   0.5366  
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 6023000 on 17 degrees of freedom
Multiple R-squared:  0.5811,    Adjusted R-squared:  0.4579 
F-statistic: 4.717 on 5 and 17 DF,  p-value: 0.006951
SSE = sum(model3$residuals^2)
SSE
[1] 6.167793e+14
model4 = lm(Payroll.Salary2023 ~ RBI + AVG + OBP+OPS, data=firstbase)
summary(model4)

Call:
lm(formula = Payroll.Salary2023 ~ RBI + AVG + OBP + OPS, data = firstbase)

Residuals:
     Min       1Q   Median       3Q      Max 
-9399551 -3573842    98921  3979339  9263512 

Coefficients:
             Estimate Std. Error t value Pr(>|t|)  
(Intercept) -29466887   11235931  -2.623   0.0173 *
RBI             71495      87015   0.822   0.4220  
AVG         -11035457   59192453  -0.186   0.8542  
OBP          86360720   87899074   0.982   0.3389  
OPS           9464546   47788458   0.198   0.8452  
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 5919000 on 18 degrees of freedom
Multiple R-squared:  0.5717,    Adjusted R-squared:  0.4765 
F-statistic: 6.007 on 4 and 18 DF,  p-value: 0.00298
firstbase<-firstbase[,-(1:3)]
cor(firstbase$RBI, firstbase$Payroll.Salary2023)
[1] 0.6281239
cor(firstbase$AVG, firstbase$OBP)
[1] 0.8028894
cor(firstbase)
model5 = lm(Payroll.Salary2023 ~ RBI + OBP+OPS, data=firstbase)
summary(model5)

Call:
lm(formula = Payroll.Salary2023 ~ RBI + OBP + OPS, data = firstbase)

Residuals:
     Min       1Q   Median       3Q      Max 
-9465449 -3411234   259746  4102864  8876798 

Coefficients:
             Estimate Std. Error t value Pr(>|t|)  
(Intercept) -29737007   10855411  -2.739    0.013 *
RBI             72393      84646   0.855    0.403  
OBP          82751360   83534224   0.991    0.334  
OPS           7598051   45525575   0.167    0.869  
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 5767000 on 19 degrees of freedom
Multiple R-squared:  0.5709,    Adjusted R-squared:  0.5031 
F-statistic: 8.426 on 3 and 19 DF,  p-value: 0.000913
model6 = lm(Payroll.Salary2023 ~ RBI + OBP, data=firstbase)
summary(model6)

Call:
lm(formula = Payroll.Salary2023 ~ RBI + OBP, data = firstbase)

Residuals:
     Min       1Q   Median       3Q      Max 
-9045497 -3487008   139497  4084739  9190185 

Coefficients:
             Estimate Std. Error t value Pr(>|t|)   
(Intercept) -28984802    9632560  -3.009  0.00693 **
RBI             84278      44634   1.888  0.07360 . 
OBP          95468873   33385182   2.860  0.00969 **
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 5625000 on 20 degrees of freedom
Multiple R-squared:  0.5703,    Adjusted R-squared:  0.5273 
F-statistic: 13.27 on 2 and 20 DF,  p-value: 0.0002149
firstbaseTest = read.csv("firstbasestats_test.csv")
str(firstbaseTest)
'data.frame':   2 obs. of  15 variables:
 $ Player            : chr  "Matt Olson" "Josh Bell"
 $ Pos               : chr  "1B" "1B"
 $ Team              : chr  "ATL" "SD"
 $ GP                : int  162 156
 $ AB                : int  616 552
 $ H                 : int  148 147
 $ X2B               : int  44 29
 $ HR                : int  34 17
 $ RBI               : int  103 71
 $ AVG               : num  0.24 0.266
 $ OBP               : num  0.325 0.362
 $ SLG               : num  0.477 0.422
 $ OPS               : num  0.802 0.784
 $ WAR               : num  3.29 3.5
 $ Payroll.Salary2023: num  21000000 16500000
predictTest = predict(model6, newdata=firstbaseTest)
predictTest
       1        2 
10723186 11558647 
SSE = sum((firstbaseTest$Payroll.Salary2023 - predictTest)^2)
SST = sum((firstbaseTest$Payroll.Salary2023 - mean(firstbase$Payroll.Salary2023))^2)
1 - SSE/SST
[1] 0.5477734
LS0tDQp0aXRsZTogIkluLUNsYXNzIEFjdGl2aXR5IDYiDQpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sNCi0tLQ0KYGBge3J9DQpmaXJzdGJhc2UgPSByZWFkLmNzdigiZmlyc3RiYXNlc3RhdHMuY3N2IikNCnN0cihmaXJzdGJhc2UpDQpgYGANCmBgYHtyfQ0Kc3VtbWFyeShmaXJzdGJhc2UpDQpgYGANCg0KYGBge3J9DQptb2RlbDEgPSBsbShQYXlyb2xsLlNhbGFyeTIwMjMgfiBSQkksIGRhdGE9Zmlyc3RiYXNlKQ0Kc3VtbWFyeShtb2RlbDEpDQpgYGANCmBgYHtyfQ0KbW9kZWwxJHJlc2lkdWFscw0KYGBgDQoNCmBgYHtyfQ0KU1NFID0gc3VtKG1vZGVsMSRyZXNpZHVhbHNeMikNClNTRQ0KYGBgDQpgYGB7cn0NCm1vZGVsMiA9IGxtKFBheXJvbGwuU2FsYXJ5MjAyMyB+IEFWRyArIFJCSSwgZGF0YT1maXJzdGJhc2UpDQpzdW1tYXJ5KG1vZGVsMikNCmBgYA0KYGBge3J9DQpTU0UgPSBzdW0obW9kZWwyJHJlc2lkdWFsc14yKQ0KU1NFDQpgYGANCg0KYGBge3J9DQptb2RlbDMgPSBsbShQYXlyb2xsLlNhbGFyeTIwMjMgfiBIUiArIFJCSSArIEFWRyArIE9CUCsgT1BTLCBkYXRhPWZpcnN0YmFzZSkNCnN1bW1hcnkobW9kZWwzKQ0KYGBgDQoNCmBgYHtyfQ0KU1NFID0gc3VtKG1vZGVsMyRyZXNpZHVhbHNeMikNClNTRQ0KYGBgDQoNCmBgYHtyfQ0KbW9kZWw0ID0gbG0oUGF5cm9sbC5TYWxhcnkyMDIzIH4gUkJJICsgQVZHICsgT0JQK09QUywgZGF0YT1maXJzdGJhc2UpDQpzdW1tYXJ5KG1vZGVsNCkNCmBgYA0KYGBge3J9DQpmaXJzdGJhc2U8LWZpcnN0YmFzZVssLSgxOjMpXQ0KYGBgDQoNCmBgYHtyfQ0KY29yKGZpcnN0YmFzZSRSQkksIGZpcnN0YmFzZSRQYXlyb2xsLlNhbGFyeTIwMjMpDQpgYGANCg0KYGBge3J9DQpjb3IoZmlyc3RiYXNlJEFWRywgZmlyc3RiYXNlJE9CUCkNCmBgYA0KDQpgYGB7cn0NCmNvcihmaXJzdGJhc2UpDQpgYGANCg0KYGBge3J9DQptb2RlbDUgPSBsbShQYXlyb2xsLlNhbGFyeTIwMjMgfiBSQkkgKyBPQlArT1BTLCBkYXRhPWZpcnN0YmFzZSkNCnN1bW1hcnkobW9kZWw1KQ0KYGBgDQoNCmBgYHtyfQ0KbW9kZWw2ID0gbG0oUGF5cm9sbC5TYWxhcnkyMDIzIH4gUkJJICsgT0JQLCBkYXRhPWZpcnN0YmFzZSkNCnN1bW1hcnkobW9kZWw2KQ0KYGBgDQpgYGB7cn0NCmZpcnN0YmFzZVRlc3QgPSByZWFkLmNzdigiZmlyc3RiYXNlc3RhdHNfdGVzdC5jc3YiKQ0Kc3RyKGZpcnN0YmFzZVRlc3QpDQpgYGANCmBgYHtyfQ0KcHJlZGljdFRlc3QgPSBwcmVkaWN0KG1vZGVsNiwgbmV3ZGF0YT1maXJzdGJhc2VUZXN0KQ0KcHJlZGljdFRlc3QNCmBgYA0KDQpgYGB7cn0NClNTRSA9IHN1bSgoZmlyc3RiYXNlVGVzdCRQYXlyb2xsLlNhbGFyeTIwMjMgLSBwcmVkaWN0VGVzdCleMikNClNTVCA9IHN1bSgoZmlyc3RiYXNlVGVzdCRQYXlyb2xsLlNhbGFyeTIwMjMgLSBtZWFuKGZpcnN0YmFzZSRQYXlyb2xsLlNhbGFyeTIwMjMpKV4yKQ0KMSAtIFNTRS9TU1QNCmBgYA0KDQo=