#Intro to Linear Regression: First Base hitting stats
firstbase = read.csv("firstbasestats.csv")
str(firstbase)
## 'data.frame': 23 obs. of 15 variables:
## $ Player : chr "Freddie Freeman" "Jose Abreu" "Nate Lowe" "Paul Goldschmidt" ...
## $ Pos : chr "1B" "1B" "1B" "1B" ...
## $ Team : chr "LAD" "CHW" "TEX" "STL" ...
## $ GP : int 159 157 157 151 160 140 160 145 146 143 ...
## $ AB : int 612 601 593 561 638 551 583 555 545 519 ...
## $ H : int 199 183 179 178 175 152 141 139 132 124 ...
## $ X2B : int 47 40 26 41 35 27 25 28 40 23 ...
## $ HR : int 21 15 27 35 32 20 36 22 8 18 ...
## $ RBI : int 100 75 76 115 97 84 94 85 53 63 ...
## $ AVG : num 0.325 0.305 0.302 0.317 0.274 0.276 0.242 0.251 0.242 0.239 ...
## $ OBP : num 0.407 0.379 0.358 0.404 0.339 0.34 0.327 0.305 0.288 0.319 ...
## $ SLG : num 0.511 0.446 0.492 0.578 0.48 0.437 0.477 0.423 0.36 0.391 ...
## $ OPS : num 0.918 0.824 0.851 0.981 0.818 0.777 0.804 0.729 0.647 0.71 ...
## $ WAR : num 5.77 4.19 3.21 7.86 3.85 3.07 5.05 1.32 -0.33 1.87 ...
## $ Payroll.Salary2023: num 27000000 19500000 4050000 26000000 14500000 ...
##‘data.frame’: 23 obs. of 15 variables: #$ Player : chr “Freddie
Freeman” “Jose Abreu” “Nate Lowe” “Paul Goldschmidt” … #$ Pos : chr “1B”
“1B” “1B” “1B” … #$ Team : chr “LAD” “CHW” “TEX” “STL” … #$ GP : int 159
157 157 151 160 140 160 145 146 143 … #$ AB : int 612 601 593 561 638
551 583 555 545 519 … #$ H : int 199 183 179 178 175 152 141 139 132 124
… #$ X2B : int 47 40 26 41 35 27 25 28 40 23 … #$ HR : int 21 15 27 35
32 20 36 22 8 18 … #$ RBI : int 100 75 76 115 97 84 94 85 53 63 … #$ AVG
: num 0.325 0.305 0.302 0.317 0.274 0.276 0.242 0.251 0.242 0.239 … #$
OBP : num 0.407 0.379 0.358 0.404 0.339 0.34 0.327 0.305 0.288 0.319 …
#$ SLG : num 0.511 0.446 0.492 0.578 0.48 0.437 0.477 0.423 0.36 0.391 …
#$ OPS : num 0.918 0.824 0.851 0.981 0.818 0.777 0.804 0.729 0.647 0.71
… #$ WAR : num 5.77 4.19 3.21 7.86 3.85 3.07 5.05 1.32 -0.33 1.87 … #$
Payroll.Salary2023: num 27000000 19500000 4050000 26000000 14500000 …
summary(firstbase) ##Player Pos Team GP AB H
#Length:23 Length:23 Length:23 Min. : 5.0 Min. : 14.0 Min. : 3.0
#Class :character Class :character Class :character 1st Qu.:105.5 1st
Qu.:309.0 1st Qu.: 74.5
#Mode :character Mode :character Mode :character Median :131.0 Median
:465.0 Median :115.0
#Mean :120.2 Mean :426.9 Mean :110.0
#3rd Qu.:152.0 3rd Qu.:558.0 3rd Qu.:146.5
#Max. :160.0 Max. :638.0 Max. :199.0
#X2B HR RBI AVG OBP SLG
#Min. : 1.00 Min. : 0.00 Min. : 1.00 Min. :0.2020 Min. :0.2140 Min.
:0.2860
#1st Qu.:13.50 1st Qu.: 8.00 1st Qu.: 27.00 1st Qu.:0.2180 1st
Qu.:0.3030 1st Qu.:0.3505
#Median :23.00 Median :18.00 Median : 63.00 Median :0.2420 Median
:0.3210 Median :0.4230
#Mean :22.39 Mean :17.09 Mean : 59.43 Mean :0.2499 Mean :0.3242 Mean
:0.4106
#3rd Qu.:28.00 3rd Qu.:24.50 3rd Qu.: 84.50 3rd Qu.:0.2750 3rd
Qu.:0.3395 3rd Qu.:0.4690
#Max. :47.00 Max. :36.00 Max. :115.00 Max. :0.3250 Max. :0.4070 Max.
:0.5780
#OPS WAR Payroll.Salary2023 #Min. :0.5000 Min. :-1.470 Min. :
720000
#1st Qu.:0.6445 1st Qu.: 0.190 1st Qu.: 739200
#Median :0.7290 Median : 1.310 Median : 4050000
#Mean :0.7346 Mean : 1.788 Mean : 6972743
#3rd Qu.:0.8175 3rd Qu.: 3.140 3rd Qu.: 8150000
#Max. :0.9810 Max. : 7.860 Max. :27000000
#Linear Regression (one variable)
model1 = lm(Payroll.Salary2023~RBI, data=firstbase)
summary(model1)
##
## Call:
## lm(formula = Payroll.Salary2023 ~ RBI, data = firstbase)
##
## Residuals:
## Min 1Q Median 3Q Max
## -10250331 -5220790 -843455 2386848 13654950
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2363744 2866320 -0.825 0.41883
## RBI 157088 42465 3.699 0.00133 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 6516000 on 21 degrees of freedom
## Multiple R-squared: 0.3945, Adjusted R-squared: 0.3657
## F-statistic: 13.68 on 1 and 21 DF, p-value: 0.001331
#Call: #lm(formula = Payroll.Salary2023 ~ RBI, data = firstbase)
#Residuals: # Min 1Q Median 3Q Max #-10250331 -5220790 -843455 2386848 13654950
#Coefficients: # Estimate Std. Error t value Pr(>|t|)
#(Intercept) -2363744 2866320 -0.825 0.41883
#RBI 157088 42465 3.699 0.00133 ** # — #Signif. codes: 0 ‘’
0.001 ‘’ 0.01 ‘’ 0.05 ‘.’ 0.1 ‘ ’ 1
#Residual standard error: 6516000 on 21 degrees of freedom #Multiple R-squared: 0.3945, Adjusted R-squared: 0.3657 #F-statistic: 13.68 on 1 and 21 DF, p-value: 0.001331 #Sum of Squared Errors model1\(residuals #1 2 3 4 5 6 7 8 9 #13654950.2 10082148.6 -5524939.3 10298631.2 1626214.0 -6731642.8 -5902522.2 -10250330.7 -4711916.8 #10 11 12 13 14 15 16 17 18 #-532796.1 -6667082.5 -6696203.1 7582148.6 -4916640.9 -1898125.3 -336532.3 -995042.5 -1311618.3 #19 20 21 22 23 #-843454.5 8050721.3 1250336.9 1847040.4 2926656.0 SSE = sum(model1\)residuals^2) SSE #[1] 8.914926e+14 #Linear Regression (two variables)
model2 = lm(Payroll.Salary2023~AVG+RBI,data=firstbase)
summary(model2)
##
## Call:
## lm(formula = Payroll.Salary2023 ~ AVG + RBI, data = firstbase)
##
## Residuals:
## Min 1Q Median 3Q Max
## -9097952 -4621582 -33233 3016541 10260245
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -18083756 9479036 -1.908 0.0709 .
## AVG 74374031 42934155 1.732 0.0986 .
## RBI 108850 49212 2.212 0.0388 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 6226000 on 20 degrees of freedom
## Multiple R-squared: 0.4735, Adjusted R-squared: 0.4209
## F-statistic: 8.994 on 2 and 20 DF, p-value: 0.001636
#Call: #lm(formula = Payroll.Salary2023 ~ AVG + RBI, data = firstbase)
#Residuals: # Min 1Q Median 3Q Max #-9097952 -4621582 -33233 3016541 10260245
#Coefficients: # Estimate Std. Error t value Pr(>|t|)
#(Intercept) -18083756 9479036 -1.908 0.0709 . #AVG 74374031 42934155
1.732 0.0986 . #RBI 108850 49212 2.212 0.0388 * — # Signif. codes: 0
‘’ 0.001 ‘’ 0.01 ‘’ 0.05 ‘.’ 0.1 ‘ ’ 1
#Residual standard error: 6226000 on 20 degrees of freedom #Multiple R-squared: 0.4735, Adjusted R-squared: 0.4209 #F-statistic: 8.994 on 2 and 20 DF, p-value: 0.001636 #Sum of Squared Errors
SSE=sum(model2$residuals^2)
SSE
## [1] 7.751841e+14
#[1] 7.751841e+14 #Linear Regression (all variables)
model3=lm(Payroll.Salary2023~HR+RBI+AVG+OBP+OPS,data=firstbase)
summary(model3)
##
## Call:
## lm(formula = Payroll.Salary2023 ~ HR + RBI + AVG + OBP + OPS,
## data = firstbase)
##
## Residuals:
## Min 1Q Median 3Q Max
## -9611440 -3338119 64016 4472451 9490309
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -31107858 11738494 -2.650 0.0168 *
## HR -341069 552069 -0.618 0.5449
## RBI 115786 113932 1.016 0.3237
## AVG -63824769 104544645 -0.611 0.5496
## OBP 27054948 131210166 0.206 0.8391
## OPS 60181012 95415131 0.631 0.5366
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 6023000 on 17 degrees of freedom
## Multiple R-squared: 0.5811, Adjusted R-squared: 0.4579
## F-statistic: 4.717 on 5 and 17 DF, p-value: 0.006951
#Call: # lm(formula = Payroll.Salary2023 ~ HR + RBI + AVG + OBP + OPS, # data = firstbase)
#Residuals: # Min 1Q Median 3Q Max #-9611440 -3338119 64016 4472451 9490309
#Coefficients: # Estimate Std. Error t value Pr(>|t|)
#(Intercept) -31107858 11738494 -2.650 0.0168 * # HR -341069 552069
-0.618 0.5449
#RBI 115786 113932 1.016 0.3237
#AVG -63824769 104544645 -0.611 0.5496
#OBP 27054948 131210166 0.206 0.8391
#OPS 60181012 95415131 0.631 0.5366
— # Signif. codes: 0 ‘’ 0.001 ‘’ 0.01 ‘’ 0.05
‘.’ 0.1 ‘ ’ 1
#Residual standard error: 6023000 on 17 degrees of freedom #Multiple R-squared: 0.5811, Adjusted R-squared: 0.4579 #F-statistic: 4.717 on 5 and 17 DF, p-value: 0.006951 #Call: # lm(formula = Payroll.Salary2023 ~ HR + RBI + AVG + OBP + OPS, # data = firstbase)
#Residuals: # Min 1Q Median 3Q Max #-9611440 -3338119 64016 4472451 9490309
#Coefficients: # Estimate Std. Error t value Pr(>|t|)
#(Intercept) -31107858 11738494 -2.650 0.0168 * # HR -341069 552069
-0.618 0.5449
#RBI 115786 113932 1.016 0.3237
#AVG -63824769 104544645 -0.611 0.5496
#OBP 27054948 131210166 0.206 0.8391
#OPS 60181012 95415131 0.631 0.5366
— #Signif. codes: 0 ‘’ 0.001 ‘’ 0.01 ‘’ 0.05
‘.’ 0.1 ‘ ’ 1
#Residual standard error: 6023000 on 17 degrees of freedom #Multiple R-squared: 0.5811, Adjusted R-squared: 0.4579 #F-statistic: 4.717 on 5 and 17 DF, p-value: 0.006951 #Sum of Squared Errors
SSE=sum(model3$residuals^2)
SSE
## [1] 6.167793e+14
#[1] 6.167793e+14 #Remove HR
model4=lm(Payroll.Salary2023~RBI+AVG+OBP+OPS,data=firstbase)
summary(model4)
##
## Call:
## lm(formula = Payroll.Salary2023 ~ RBI + AVG + OBP + OPS, data = firstbase)
##
## Residuals:
## Min 1Q Median 3Q Max
## -9399551 -3573842 98921 3979339 9263512
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -29466887 11235931 -2.623 0.0173 *
## RBI 71495 87015 0.822 0.4220
## AVG -11035457 59192453 -0.186 0.8542
## OBP 86360720 87899074 0.982 0.3389
## OPS 9464546 47788458 0.198 0.8452
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5919000 on 18 degrees of freedom
## Multiple R-squared: 0.5717, Adjusted R-squared: 0.4765
## F-statistic: 6.007 on 4 and 18 DF, p-value: 0.00298
#Call: # lm(formula = Payroll.Salary2023 ~ RBI + AVG + OBP + OPS, data = firstbase)
#Residuals: # Min 1Q Median 3Q Max #-9399551 -3573842 98921 3979339 9263512
#Coefficients: # Estimate Std. Error t value Pr(>|t|)
#(Intercept) -29466887 11235931 -2.623 0.0173 * # RBI 71495 87015 0.822
0.4220
#AVG -11035457 59192453 -0.186 0.8542
#OBP 86360720 87899074 0.982 0.3389
#OPS 9464546 47788458 0.198 0.8452
— # Signif. codes: 0 ‘’ 0.001 ‘’ 0.01 ‘’ 0.05
‘.’ 0.1 ‘ ’ 1
#Residual standard error: 5919000 on 18 degrees of freedom #Multiple R-squared: 0.5717, Adjusted R-squared: 0.4765 #F-statistic: 6.007 on 4 and 18 DF, p-value: 0.00298
firstbase<-firstbase[,-(1:3)]
#Correlations
cor(firstbase$RBI,firstbase$Payroll.Salary2023)
## [1] 0.6281239
#[1] 0.6281239
cor(firstbase$AVG, firstbase$OBP)
## [1] 0.8028894
#[1] 0.8028894
cor(firstbase)
## GP AB H X2B HR RBI
## GP 1.0000000 0.9779421 0.9056508 0.8446267 0.7432552 0.8813917
## AB 0.9779421 1.0000000 0.9516701 0.8924632 0.7721339 0.9125839
## H 0.9056508 0.9516701 1.0000000 0.9308318 0.7155225 0.9068893
## X2B 0.8446267 0.8924632 0.9308318 1.0000000 0.5889699 0.8485911
## HR 0.7432552 0.7721339 0.7155225 0.5889699 1.0000000 0.8929048
## RBI 0.8813917 0.9125839 0.9068893 0.8485911 0.8929048 1.0000000
## AVG 0.4430808 0.5126292 0.7393167 0.6613085 0.3444242 0.5658479
## OBP 0.4841583 0.5026125 0.6560021 0.5466537 0.4603408 0.5704463
## SLG 0.6875270 0.7471949 0.8211406 0.7211259 0.8681501 0.8824090
## OPS 0.6504483 0.6980141 0.8069779 0.6966830 0.7638721 0.8156612
## WAR 0.5645243 0.6211558 0.7688712 0.6757470 0.6897677 0.7885666
## Payroll.Salary2023 0.4614889 0.5018820 0.6249911 0.6450730 0.5317619 0.6281239
## AVG OBP SLG OPS WAR
## GP 0.4430808 0.4841583 0.6875270 0.6504483 0.5645243
## AB 0.5126292 0.5026125 0.7471949 0.6980141 0.6211558
## H 0.7393167 0.6560021 0.8211406 0.8069779 0.7688712
## X2B 0.6613085 0.5466537 0.7211259 0.6966830 0.6757470
## HR 0.3444242 0.4603408 0.8681501 0.7638721 0.6897677
## RBI 0.5658479 0.5704463 0.8824090 0.8156612 0.7885666
## AVG 1.0000000 0.8028894 0.7254274 0.7989005 0.7855945
## OBP 0.8028894 1.0000000 0.7617499 0.8987390 0.7766375
## SLG 0.7254274 0.7617499 1.0000000 0.9686752 0.8611140
## OPS 0.7989005 0.8987390 0.9686752 1.0000000 0.8799893
## WAR 0.7855945 0.7766375 0.8611140 0.8799893 1.0000000
## Payroll.Salary2023 0.5871543 0.7025979 0.6974086 0.7394981 0.8086359
## Payroll.Salary2023
## GP 0.4614889
## AB 0.5018820
## H 0.6249911
## X2B 0.6450730
## HR 0.5317619
## RBI 0.6281239
## AVG 0.5871543
## OBP 0.7025979
## SLG 0.6974086
## OPS 0.7394981
## WAR 0.8086359
## Payroll.Salary2023 1.0000000
#GP AB H X2B HR RBI AVG OBP SLG #GP 1.0000000 0.9779421 0.9056508 0.8446267 0.7432552 0.8813917 0.4430808 0.4841583 0.6875270 #AB 0.9779421 1.0000000 0.9516701 0.8924632 0.7721339 0.9125839 0.5126292 0.5026125 0.7471949 #H 0.9056508 0.9516701 1.0000000 0.9308318 0.7155225 0.9068893 0.7393167 0.6560021 0.8211406 #X2B 0.8446267 0.8924632 0.9308318 1.0000000 0.5889699 0.8485911 0.6613085 0.5466537 0.7211259 #HR 0.7432552 0.7721339 0.7155225 0.5889699 1.0000000 0.8929048 0.3444242 0.4603408 0.8681501 #RBI 0.8813917 0.9125839 0.9068893 0.8485911 0.8929048 1.0000000 0.5658479 0.5704463 0.8824090 #AVG 0.4430808 0.5126292 0.7393167 0.6613085 0.3444242 0.5658479 1.0000000 0.8028894 0.7254274 #SLG 0.6875270 0.7471949 0.8211406 0.7211259 0.8681501 0.8824090 0.7254274 0.7617499 1.0000000 #OPS 0.6504483 0.6980141 0.8069779 0.6966830 0.7638721 0.8156612 0.7989005 0.8987390 0.9686752 #WAR 0.5645243 0.6211558 0.7688712 0.6757470 0.6897677 0.7885666 0.7855945 0.7766375 0.8611140 #Payroll.Salary2023 0.4614889 0.5018820 0.6249911 0.6450730 0.5317619 0.6281239 0.5871543 0.7025979 0.6974086 #OPS WAR Payroll.Salary2023 #GP 0.6504483 0.5645243 0.4614889 #AB 0.6980141 0.6211558 0.5018820 #H 0.8069779 0.7688712 0.6249911 #X2B 0.6966830 0.6757470 0.6450730 #HR 0.7638721 0.6897677 0.5317619 #RBI 0.8156612 0.7885666 0.6281239 #AVG 0.7989005 0.7855945 0.5871543 #OBP 0.8987390 0.7766375 0.7025979 #SLG 0.9686752 0.8611140 0.6974086 #OPS 1.0000000 0.8799893 0.7394981 #WAR 0.8799893 1.0000000 0.8086359 #Payroll.Salary2023 0.7394981 0.8086359 1.0000000
#Removing AVG
model5=lm(Payroll.Salary2023~RBI+OBP+OPS,data=firstbase)
summary(model5)
##
## Call:
## lm(formula = Payroll.Salary2023 ~ RBI + OBP + OPS, data = firstbase)
##
## Residuals:
## Min 1Q Median 3Q Max
## -9465449 -3411234 259746 4102864 8876798
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -29737007 10855411 -2.739 0.013 *
## RBI 72393 84646 0.855 0.403
## OBP 82751360 83534224 0.991 0.334
## OPS 7598051 45525575 0.167 0.869
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5767000 on 19 degrees of freedom
## Multiple R-squared: 0.5709, Adjusted R-squared: 0.5031
## F-statistic: 8.426 on 3 and 19 DF, p-value: 0.000913
#Call: # lm(formula = Payroll.Salary2023 ~ RBI + OBP + OPS, data = firstbase)
#Residuals: # Min 1Q Median 3Q Max #-9465449 -3411234 259746 4102864 8876798
#Coefficients: # Estimate Std. Error t value Pr(>|t|)
#(Intercept) -29737007 10855411 -2.739 0.013 * # RBI 72393 84646 0.855
0.403
#OBP 82751360 83534224 0.991 0.334
#OPS 7598051 45525575 0.167 0.869
— # Signif. codes: 0 ‘’ 0.001 ‘’ 0.01 ‘’ 0.05
‘.’ 0.1 ‘ ’ 1
#Residual standard error: 5767000 on 19 degrees of freedom #Multiple R-squared: 0.5709, Adjusted R-squared: 0.5031 #F-statistic: 8.426 on 3 and 19 DF, p-value: 0.000913
model6=lm(Payroll.Salary2023~RBI+OBP,data=firstbase)
summary(model6)
##
## Call:
## lm(formula = Payroll.Salary2023 ~ RBI + OBP, data = firstbase)
##
## Residuals:
## Min 1Q Median 3Q Max
## -9045497 -3487008 139497 4084739 9190185
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -28984802 9632560 -3.009 0.00693 **
## RBI 84278 44634 1.888 0.07360 .
## OBP 95468873 33385182 2.860 0.00969 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5625000 on 20 degrees of freedom
## Multiple R-squared: 0.5703, Adjusted R-squared: 0.5273
## F-statistic: 13.27 on 2 and 20 DF, p-value: 0.0002149
#Call: #lm(formula = Payroll.Salary2023 ~ RBI + OBP, data = firstbase)
#Residuals: # Min 1Q Median 3Q Max #-9045497 -3487008 139497 4084739 9190185
#Coefficients: # Estimate Std. Error t value Pr(>|t|)
#(Intercept) -28984802 9632560 -3.009 0.00693 # RBI 84278 44634
1.888 0.07360 . #OBP 95468873 33385182 2.860 0.00969 —
#Signif. codes: 0 ‘’ 0.001 ‘’ 0.01 ‘’ 0.05 ‘.’
0.1 ‘ ’ 1
#Residual standard error: 5625000 on 20 degrees of freedom #Multiple R-squared: 0.5703, Adjusted R-squared: 0.5273 #F-statistic: 13.27 on 2 and 20 DF, p-value: 0.0002149
#Read in test set
firstbaseTest=read.csv("firstbasestats_test.csv")
str(firstbaseTest)
## 'data.frame': 2 obs. of 15 variables:
## $ Player : chr "Matt Olson" "Josh Bell"
## $ Pos : chr "1B" "1B"
## $ Team : chr "ATL" "SD"
## $ GP : int 162 156
## $ AB : int 616 552
## $ H : int 148 147
## $ X2B : int 44 29
## $ HR : int 34 17
## $ RBI : int 103 71
## $ AVG : num 0.24 0.266
## $ OBP : num 0.325 0.362
## $ SLG : num 0.477 0.422
## $ OPS : num 0.802 0.784
## $ WAR : num 3.29 3.5
## $ Payroll.Salary2023: num 21000000 16500000
#‘data.frame’: 2 obs. of 15 variables: # $ Player : chr “Matt Olson” “Josh Bell” #$ Pos : chr “1B” “1B” #$ Team : chr “ATL” “SD” #$ GP : int 162 156 #$ AB : int 616 552 #$ H : int 148 147 #$ X2B : int 44 29 #$ HR : int 34 17 #$ RBI : int 103 71 #$ AVG : num 0.24 0.266 #$ OBP : num 0.325 0.362 #$ SLG : num 0.477 0.422 #$ OPS : num 0.802 0.784 #$ WAR : num 3.29 3.5 #$ Payroll.Salary2023: num 21000000 16500000
#Make test set predicitions
predictTest=predict(model6,newdata = firstbaseTest)
predictTest
## 1 2
## 10723186 11558647
#1 2 #10723186 11558647
#Compute R-squared
SSE=sum((firstbaseTest$Payroll.Salary2023-predictTest)^2)
SST=sum((firstbaseTest$Payroll.Salary2023-mean(firstbase$Payroll.Salary2023))^2)
1-SSE/SST
## [1] 0.5477734
#[1] 0.5477734 ```