# Read in data
firstbase = read.csv("firstbasestats.csv")
str(firstbase)
'data.frame':   23 obs. of  15 variables:
 $ Player            : chr  "Freddie Freeman" "Jose Abreu" "Nate Lowe" "Paul Goldschmidt" ...
 $ Pos               : chr  "1B" "1B" "1B" "1B" ...
 $ Team              : chr  "LAD" "CHW" "TEX" "STL" ...
 $ GP                : int  159 157 157 151 160 140 160 145 146 143 ...
 $ AB                : int  612 601 593 561 638 551 583 555 545 519 ...
 $ H                 : int  199 183 179 178 175 152 141 139 132 124 ...
 $ X2B               : int  47 40 26 41 35 27 25 28 40 23 ...
 $ HR                : int  21 15 27 35 32 20 36 22 8 18 ...
 $ RBI               : int  100 75 76 115 97 84 94 85 53 63 ...
 $ AVG               : num  0.325 0.305 0.302 0.317 0.274 0.276 0.242 0.251 0.242 0.239 ...
 $ OBP               : num  0.407 0.379 0.358 0.404 0.339 0.34 0.327 0.305 0.288 0.319 ...
 $ SLG               : num  0.511 0.446 0.492 0.578 0.48 0.437 0.477 0.423 0.36 0.391 ...
 $ OPS               : num  0.918 0.824 0.851 0.981 0.818 0.777 0.804 0.729 0.647 0.71 ...
 $ WAR               : num  5.77 4.19 3.21 7.86 3.85 3.07 5.05 1.32 -0.33 1.87 ...
 $ Payroll.Salary2023: num  27000000 19500000 4050000 26000000 14500000 ...
summary(firstbase)
    Player              Pos                Team                 GP              AB       
 Length:23          Length:23          Length:23          Min.   :  5.0   Min.   : 14.0  
 Class :character   Class :character   Class :character   1st Qu.:105.5   1st Qu.:309.0  
 Mode  :character   Mode  :character   Mode  :character   Median :131.0   Median :465.0  
                                                          Mean   :120.2   Mean   :426.9  
                                                          3rd Qu.:152.0   3rd Qu.:558.0  
                                                          Max.   :160.0   Max.   :638.0  
       H              X2B              HR             RBI              AVG        
 Min.   :  3.0   Min.   : 1.00   Min.   : 0.00   Min.   :  1.00   Min.   :0.2020  
 1st Qu.: 74.5   1st Qu.:13.50   1st Qu.: 8.00   1st Qu.: 27.00   1st Qu.:0.2180  
 Median :115.0   Median :23.00   Median :18.00   Median : 63.00   Median :0.2420  
 Mean   :110.0   Mean   :22.39   Mean   :17.09   Mean   : 59.43   Mean   :0.2499  
 3rd Qu.:146.5   3rd Qu.:28.00   3rd Qu.:24.50   3rd Qu.: 84.50   3rd Qu.:0.2750  
 Max.   :199.0   Max.   :47.00   Max.   :36.00   Max.   :115.00   Max.   :0.3250  
      OBP              SLG              OPS              WAR         Payroll.Salary2023
 Min.   :0.2140   Min.   :0.2860   Min.   :0.5000   Min.   :-1.470   Min.   :  720000  
 1st Qu.:0.3030   1st Qu.:0.3505   1st Qu.:0.6445   1st Qu.: 0.190   1st Qu.:  739200  
 Median :0.3210   Median :0.4230   Median :0.7290   Median : 1.310   Median : 4050000  
 Mean   :0.3242   Mean   :0.4106   Mean   :0.7346   Mean   : 1.788   Mean   : 6972743  
 3rd Qu.:0.3395   3rd Qu.:0.4690   3rd Qu.:0.8175   3rd Qu.: 3.140   3rd Qu.: 8150000  
 Max.   :0.4070   Max.   :0.5780   Max.   :0.9810   Max.   : 7.860   Max.   :27000000  
# Linear Regression (one variable)
model1 = lm(Payroll.Salary2023 ~ RBI, data=firstbase)
#RBGI is our independent variable(feature, explanatory variable)
#Payroll.Salary is our dependent variable(target, response variable)
summary(model1)

Call:
lm(formula = Payroll.Salary2023 ~ RBI, data = firstbase)

Residuals:
      Min        1Q    Median        3Q       Max 
-10250331  -5220790   -843455   2386848  13654950 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)   
(Intercept) -2363744    2866320  -0.825  0.41883   
RBI           157088      42465   3.699  0.00133 **
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 6516000 on 21 degrees of freedom
Multiple R-squared:  0.3945,    Adjusted R-squared:  0.3657 
F-statistic: 13.68 on 1 and 21 DF,  p-value: 0.001331

Since the absolute value of t is greater than 2, the RBI independent variable is significant at a 5% significance level. You may use p value as well, in this case p<=0.05.

Either |t|>=2 or p<0.05 the corresponding feature is significant at a 5% significance level

For each additional RBI a 1stBase player gets $157088 more.

RBI explains 36.57 % of the model

SSE = sum(model1$residuals^2)
SSE
[1] 8.914926e+14
# Linear Regression (two variables)
model2 = lm(Payroll.Salary2023 ~ AVG + RBI, data=firstbase)
summary(model2)

Call:
lm(formula = Payroll.Salary2023 ~ AVG + RBI, data = firstbase)

Residuals:
     Min       1Q   Median       3Q      Max 
-9097952 -4621582   -33233  3016541 10260245 

Coefficients:
             Estimate Std. Error t value Pr(>|t|)  
(Intercept) -18083756    9479036  -1.908   0.0709 .
AVG          74374031   42934155   1.732   0.0986 .
RBI            108850      49212   2.212   0.0388 *
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 6226000 on 20 degrees of freedom
Multiple R-squared:  0.4735,    Adjusted R-squared:  0.4209 
F-statistic: 8.994 on 2 and 20 DF,  p-value: 0.001636

AVG is not significant at a 5% significance level RBI is significant at a 5% significance level Adjusted R Squared went up

The model is significant at 1% significance level

SSE = sum(model2$residuals^2)
SSE
[1] 7.751841e+14
# Linear Regression (all variables)
model3 = lm(Payroll.Salary2023 ~ HR + RBI + AVG + OBP+ OPS, data=firstbase)
summary(model3)

Call:
lm(formula = Payroll.Salary2023 ~ HR + RBI + AVG + OBP + OPS, 
    data = firstbase)

Residuals:
     Min       1Q   Median       3Q      Max 
-9611440 -3338119    64016  4472451  9490309 

Coefficients:
             Estimate Std. Error t value Pr(>|t|)  
(Intercept) -31107858   11738494  -2.650   0.0168 *
HR            -341069     552069  -0.618   0.5449  
RBI            115786     113932   1.016   0.3237  
AVG         -63824769  104544645  -0.611   0.5496  
OBP          27054948  131210166   0.206   0.8391  
OPS          60181012   95415131   0.631   0.5366  
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 6023000 on 17 degrees of freedom
Multiple R-squared:  0.5811,    Adjusted R-squared:  0.4579 
F-statistic: 4.717 on 5 and 17 DF,  p-value: 0.006951
# Sum of Squared Errors
SSE = sum(model3$residuals^2)
SSE
[1] 6.167793e+14
# Remove HR
model4 = lm(Payroll.Salary2023 ~ RBI + AVG + OBP+OPS, data=firstbase)
summary(model4)

Call:
lm(formula = Payroll.Salary2023 ~ RBI + AVG + OBP + OPS, data = firstbase)

Residuals:
     Min       1Q   Median       3Q      Max 
-9399551 -3573842    98921  3979339  9263512 

Coefficients:
             Estimate Std. Error t value Pr(>|t|)  
(Intercept) -29466887   11235931  -2.623   0.0173 *
RBI             71495      87015   0.822   0.4220  
AVG         -11035457   59192453  -0.186   0.8542  
OBP          86360720   87899074   0.982   0.3389  
OPS           9464546   47788458   0.198   0.8452  
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 5919000 on 18 degrees of freedom
Multiple R-squared:  0.5717,    Adjusted R-squared:  0.4765 
F-statistic: 6.007 on 4 and 18 DF,  p-value: 0.00298
firstbase<-firstbase[,-(1:3)]
cor(firstbase)
                          GP        AB         H       X2B        HR       RBI       AVG
GP                 1.0000000 0.9779421 0.9056508 0.8446267 0.7432552 0.8813917 0.4430808
AB                 0.9779421 1.0000000 0.9516701 0.8924632 0.7721339 0.9125839 0.5126292
H                  0.9056508 0.9516701 1.0000000 0.9308318 0.7155225 0.9068893 0.7393167
X2B                0.8446267 0.8924632 0.9308318 1.0000000 0.5889699 0.8485911 0.6613085
HR                 0.7432552 0.7721339 0.7155225 0.5889699 1.0000000 0.8929048 0.3444242
RBI                0.8813917 0.9125839 0.9068893 0.8485911 0.8929048 1.0000000 0.5658479
AVG                0.4430808 0.5126292 0.7393167 0.6613085 0.3444242 0.5658479 1.0000000
OBP                0.4841583 0.5026125 0.6560021 0.5466537 0.4603408 0.5704463 0.8028894
SLG                0.6875270 0.7471949 0.8211406 0.7211259 0.8681501 0.8824090 0.7254274
OPS                0.6504483 0.6980141 0.8069779 0.6966830 0.7638721 0.8156612 0.7989005
WAR                0.5645243 0.6211558 0.7688712 0.6757470 0.6897677 0.7885666 0.7855945
Payroll.Salary2023 0.4614889 0.5018820 0.6249911 0.6450730 0.5317619 0.6281239 0.5871543
                         OBP       SLG       OPS       WAR Payroll.Salary2023
GP                 0.4841583 0.6875270 0.6504483 0.5645243          0.4614889
AB                 0.5026125 0.7471949 0.6980141 0.6211558          0.5018820
H                  0.6560021 0.8211406 0.8069779 0.7688712          0.6249911
X2B                0.5466537 0.7211259 0.6966830 0.6757470          0.6450730
HR                 0.4603408 0.8681501 0.7638721 0.6897677          0.5317619
RBI                0.5704463 0.8824090 0.8156612 0.7885666          0.6281239
AVG                0.8028894 0.7254274 0.7989005 0.7855945          0.5871543
OBP                1.0000000 0.7617499 0.8987390 0.7766375          0.7025979
SLG                0.7617499 1.0000000 0.9686752 0.8611140          0.6974086
OPS                0.8987390 0.9686752 1.0000000 0.8799893          0.7394981
WAR                0.7766375 0.8611140 0.8799893 1.0000000          0.8086359
Payroll.Salary2023 0.7025979 0.6974086 0.7394981 0.8086359          1.0000000
#Removing AVG
model5 = lm(Payroll.Salary2023 ~ RBI + OBP+OPS, data=firstbase)
summary(model5)

Call:
lm(formula = Payroll.Salary2023 ~ RBI + OBP + OPS, data = firstbase)

Residuals:
     Min       1Q   Median       3Q      Max 
-9465449 -3411234   259746  4102864  8876798 

Coefficients:
             Estimate Std. Error t value Pr(>|t|)  
(Intercept) -29737007   10855411  -2.739    0.013 *
RBI             72393      84646   0.855    0.403  
OBP          82751360   83534224   0.991    0.334  
OPS           7598051   45525575   0.167    0.869  
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 5767000 on 19 degrees of freedom
Multiple R-squared:  0.5709,    Adjusted R-squared:  0.5031 
F-statistic: 8.426 on 3 and 19 DF,  p-value: 0.000913
firstbase$offensivemetric<-firstbase$RBI+(2*firstbase$OBP+3*firstbase$OPS)
model6 = lm(Payroll.Salary2023 ~ RBI + OBP, data=firstbase)
summary(model6)

Call:
lm(formula = Payroll.Salary2023 ~ RBI + OBP, data = firstbase)

Residuals:
     Min       1Q   Median       3Q      Max 
-9045497 -3487008   139497  4084739  9190185 

Coefficients:
             Estimate Std. Error t value Pr(>|t|)   
(Intercept) -28984802    9632560  -3.009  0.00693 **
RBI             84278      44634   1.888  0.07360 . 
OBP          95468873   33385182   2.860  0.00969 **
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 5625000 on 20 degrees of freedom
Multiple R-squared:  0.5703,    Adjusted R-squared:  0.5273 
F-statistic: 13.27 on 2 and 20 DF,  p-value: 0.0002149
model7<-lm(Payroll.Salary2023~offensivemetric,data = firstbase)
summary(model7)

Call:
lm(formula = Payroll.Salary2023 ~ offensivemetric, data = firstbase)

Residuals:
      Min        1Q    Median        3Q       Max 
-10222164  -5227286   -819148   2439707  13574070 

Coefficients:
                Estimate Std. Error t value Pr(>|t|)   
(Intercept)     -2764104    2940102  -0.940  0.35784   
offensivemetric   156323      41899   3.731  0.00123 **
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 6494000 on 21 degrees of freedom
Multiple R-squared:  0.3986,    Adjusted R-squared:   0.37 
F-statistic: 13.92 on 1 and 21 DF,  p-value: 0.001234
# Read in test set
firstbaseTest = read.csv("firstbasestats_test.csv")
str(firstbaseTest)
'data.frame':   2 obs. of  15 variables:
 $ Player            : chr  "Matt Olson" "Josh Bell"
 $ Pos               : chr  "1B" "1B"
 $ Team              : chr  "ATL" "SD"
 $ GP                : int  162 156
 $ AB                : int  616 552
 $ H                 : int  148 147
 $ X2B               : int  44 29
 $ HR                : int  34 17
 $ RBI               : int  103 71
 $ AVG               : num  0.24 0.266
 $ OBP               : num  0.325 0.362
 $ SLG               : num  0.477 0.422
 $ OPS               : num  0.802 0.784
 $ WAR               : num  3.29 3.5
 $ Payroll.Salary2023: num  21000000 16500000
# Make test set predictions
predictTest = predict(model6, newdata=firstbaseTest)
predictTest
       1        2 
10723186 11558647 
LS0tCnRpdGxlOiAiSW50cm8gdG8gTGluZWFyIFJlZ3Jlc3Npb246IEZpcnN0IEJhc2UgaGl0dGluZyBzdGF0cyIKb3V0cHV0OiBodG1sX25vdGVib29rCi0tLQoKIApgYGB7cn0KIyBSZWFkIGluIGRhdGEKZmlyc3RiYXNlID0gcmVhZC5jc3YoImZpcnN0YmFzZXN0YXRzLmNzdiIpCnN0cihmaXJzdGJhc2UpCmBgYAoKCgpgYGB7cn0Kc3VtbWFyeShmaXJzdGJhc2UpCmBgYAoKCmBgYHtyfQojIExpbmVhciBSZWdyZXNzaW9uIChvbmUgdmFyaWFibGUpCm1vZGVsMSA9IGxtKFBheXJvbGwuU2FsYXJ5MjAyMyB+IFJCSSwgZGF0YT1maXJzdGJhc2UpCiNSQkdJIGlzIG91ciBpbmRlcGVuZGVudCB2YXJpYWJsZShmZWF0dXJlLCBleHBsYW5hdG9yeSB2YXJpYWJsZSkKI1BheXJvbGwuU2FsYXJ5IGlzIG91ciBkZXBlbmRlbnQgdmFyaWFibGUodGFyZ2V0LCByZXNwb25zZSB2YXJpYWJsZSkKc3VtbWFyeShtb2RlbDEpCmBgYAoKU2luY2UgdGhlIGFic29sdXRlIHZhbHVlIG9mIHQgaXMgZ3JlYXRlciB0aGFuIDIsIHRoZSBSQkkgaW5kZXBlbmRlbnQgdmFyaWFibGUgaXMgc2lnbmlmaWNhbnQgYXQgYSA1JSBzaWduaWZpY2FuY2UgbGV2ZWwuIFlvdSBtYXkgdXNlIHAgdmFsdWUgYXMgd2VsbCwgaW4gdGhpcyBjYXNlIHA8PTAuMDUuCgpFaXRoZXIgfHR8Pj0yIG9yIHA8MC4wNSB0aGUgY29ycmVzcG9uZGluZyBmZWF0dXJlIGlzIHNpZ25pZmljYW50IGF0IGEgNSUgc2lnbmlmaWNhbmNlIGxldmVsCgpGb3IgZWFjaCBhZGRpdGlvbmFsIFJCSSBhIDFzdEJhc2UgcGxheWVyIGdldHMgJDE1NzA4OCBtb3JlLgoKClJCSSBleHBsYWlucyAzNi41NyAlIG9mIHRoZSBtb2RlbAoKCgpgYGB7cn0KU1NFID0gc3VtKG1vZGVsMSRyZXNpZHVhbHNeMikKU1NFCmBgYAoKCgpgYGB7cn0KIyBMaW5lYXIgUmVncmVzc2lvbiAodHdvIHZhcmlhYmxlcykKbW9kZWwyID0gbG0oUGF5cm9sbC5TYWxhcnkyMDIzIH4gQVZHICsgUkJJLCBkYXRhPWZpcnN0YmFzZSkKc3VtbWFyeShtb2RlbDIpCmBgYAoKCkFWRyBpcyBub3Qgc2lnbmlmaWNhbnQgYXQgYSA1JSBzaWduaWZpY2FuY2UgbGV2ZWwKUkJJIGlzIHNpZ25pZmljYW50IGF0IGEgNSUgc2lnbmlmaWNhbmNlIGxldmVsCkFkanVzdGVkIFIgU3F1YXJlZCB3ZW50IHVwCgoKVGhlIG1vZGVsIGlzIHNpZ25pZmljYW50IGF0IDElIHNpZ25pZmljYW5jZSBsZXZlbAoKCgpgYGB7cn0KU1NFID0gc3VtKG1vZGVsMiRyZXNpZHVhbHNeMikKU1NFCmBgYAoKCgoKYGBge3J9CiMgTGluZWFyIFJlZ3Jlc3Npb24gKGFsbCB2YXJpYWJsZXMpCm1vZGVsMyA9IGxtKFBheXJvbGwuU2FsYXJ5MjAyMyB+IEhSICsgUkJJICsgQVZHICsgT0JQKyBPUFMsIGRhdGE9Zmlyc3RiYXNlKQpzdW1tYXJ5KG1vZGVsMykKYGBgCgoKYGBge3J9CiMgU3VtIG9mIFNxdWFyZWQgRXJyb3JzClNTRSA9IHN1bShtb2RlbDMkcmVzaWR1YWxzXjIpClNTRQpgYGAKCgpgYGB7cn0KIyBSZW1vdmUgSFIKbW9kZWw0ID0gbG0oUGF5cm9sbC5TYWxhcnkyMDIzIH4gUkJJICsgQVZHICsgT0JQK09QUywgZGF0YT1maXJzdGJhc2UpCnN1bW1hcnkobW9kZWw0KQpgYGAKCgpgYGB7cn0KZmlyc3RiYXNlPC1maXJzdGJhc2VbLC0oMTozKV0KYGBgCgoKYGBge3J9CmNvcihmaXJzdGJhc2UpCmBgYAoKCgpgYGB7cn0KI1JlbW92aW5nIEFWRwptb2RlbDUgPSBsbShQYXlyb2xsLlNhbGFyeTIwMjMgfiBSQkkgKyBPQlArT1BTLCBkYXRhPWZpcnN0YmFzZSkKc3VtbWFyeShtb2RlbDUpCmBgYAoKCgpgYGB7cn0KZmlyc3RiYXNlJG9mZmVuc2l2ZW1ldHJpYzwtZmlyc3RiYXNlJFJCSSsoMipmaXJzdGJhc2UkT0JQKzMqZmlyc3RiYXNlJE9QUykKYGBgCgoKYGBge3J9Cm1vZGVsNiA9IGxtKFBheXJvbGwuU2FsYXJ5MjAyMyB+IFJCSSArIE9CUCwgZGF0YT1maXJzdGJhc2UpCnN1bW1hcnkobW9kZWw2KQpgYGAKCgoKCmBgYHtyfQptb2RlbDc8LWxtKFBheXJvbGwuU2FsYXJ5MjAyM35vZmZlbnNpdmVtZXRyaWMsZGF0YSA9IGZpcnN0YmFzZSkKc3VtbWFyeShtb2RlbDcpCmBgYAoKYGBge3J9CiMgUmVhZCBpbiB0ZXN0IHNldApmaXJzdGJhc2VUZXN0ID0gcmVhZC5jc3YoImZpcnN0YmFzZXN0YXRzX3Rlc3QuY3N2IikKc3RyKGZpcnN0YmFzZVRlc3QpCmBgYAoKCgoKCmBgYHtyfQojIE1ha2UgdGVzdCBzZXQgcHJlZGljdGlvbnMKcHJlZGljdFRlc3QgPSBwcmVkaWN0KG1vZGVsNiwgbmV3ZGF0YT1maXJzdGJhc2VUZXN0KQpwcmVkaWN0VGVzdApgYGAKCg==