if(!require(tidyverse)) install.packages("tidyverse")
## Loading required package: tidyverse
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.4.0 ✔ purrr 0.3.5
## ✔ tibble 3.1.8 ✔ dplyr 1.0.10
## ✔ tidyr 1.2.1 ✔ stringr 1.4.1
## ✔ readr 2.1.3 ✔ forcats 0.5.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(tidyverse)
if(!require(leaps)) install.packages("leaps")
## Loading required package: leaps
library(leaps)
if(!require(ggplot2)) install.packages("ggplot2")
library(ggplot2)
if(!require(dplyr)) install.packages("dplyr")
library(dplyr)
if(!require(knitr)) install.packages("knitr")
## Loading required package: knitr
library(knitr)
if(!require(tidymodels)) install.packages("tidymodels")
## Loading required package: tidymodels
## ── Attaching packages ────────────────────────────────────── tidymodels 1.0.0 ──
## ✔ broom 1.0.1 ✔ rsample 1.1.0
## ✔ dials 1.1.0 ✔ tune 1.0.1
## ✔ infer 1.0.3 ✔ workflows 1.1.0
## ✔ modeldata 1.0.1 ✔ workflowsets 1.0.0
## ✔ parsnip 1.0.2 ✔ yardstick 1.1.0
## ✔ recipes 1.0.3
## ── Conflicts ───────────────────────────────────────── tidymodels_conflicts() ──
## ✖ scales::discard() masks purrr::discard()
## ✖ dplyr::filter() masks stats::filter()
## ✖ recipes::fixed() masks stringr::fixed()
## ✖ dplyr::lag() masks stats::lag()
## ✖ yardstick::spec() masks readr::spec()
## ✖ recipes::step() masks stats::step()
## • Search for functions across packages at https://www.tidymodels.org/find/
library(tidymodels)
if(!require(GGally)) install.packages("GGally")
## Loading required package: GGally
## Registered S3 method overwritten by 'GGally':
## method from
## +.gg ggplot2
library(GGally)
library(car)
## Loading required package: carData
##
## Attaching package: 'car'
##
## The following object is masked from 'package:dplyr':
##
## recode
##
## The following object is masked from 'package:purrr':
##
## some
library(ggfortify)
## Registered S3 method overwritten by 'ggfortify':
## method from
## autoplot.glmnet parsnip
nba.salary <- read.csv("~/Desktop/SLM 418/NBA_salary_performance.csv")
head(nba.salary)
## X namePlayer salary yearSeason slugPosition agePlayer slugTeamBREF
## 1 1 Aaron Gordon 18136364 2021 PF 25 TOT
## 2 2 Aaron Holiday 2345640 2021 PG 24 IND
## 3 3 Aaron Nesmith 3458400 2021 SF 21 BOS
## 4 4 Abdel Nader 1752950 2021 SF 27 PHO
## 5 5 Al Horford 27500000 2021 C 34 OKC
## 6 6 Al-Farouq Aminu 9720900 2021 PF 30 TOT
## countGames minutes ratioPER pctTrueShooting pct3PRate pctFTRate pctORB pctDRB
## 1 49 1372 14.7 0.549 0.352 0.301 0.059 0.164
## 2 66 1176 9.3 0.503 0.417 0.190 0.014 0.068
## 3 44 627 9.9 0.587 0.628 0.171 0.046 0.169
## 4 24 355 13.4 0.605 0.371 0.319 0.023 0.173
## 5 28 782 17.4 0.538 0.422 0.061 0.038 0.215
## 6 23 434 8.9 0.469 0.374 0.222 0.051 0.219
## pctTRB pctAST pctSTL pctBLK pctTOV pctUSG ratioOWS ratioDWS ratioWS
## 1 0.111 0.179 0.012 0.023 0.148 0.208 0.7 1.2 2.0
## 2 0.041 0.140 0.018 0.900 0.123 0.195 -0.6 0.8 0.2
## 3 0.107 0.043 0.012 0.013 0.106 0.136 0.6 0.5 1.1
## 4 0.099 0.074 0.014 0.023 0.126 0.190 0.3 0.4 0.7
## 5 0.125 0.202 0.015 0.030 0.073 0.216 0.7 0.9 1.6
## 6 0.133 0.101 0.021 0.019 0.205 0.136 -0.4 0.5 0.1
## ratioWSPer48 ratioOBPM ratioDBPM ratioBPM ratioVORP countGamesStarted pctFG
## 1 0.069 0.3 -0.1 0.2 0.8 49 0.465
## 2 0.010 -3.2 -1.0 -4.1 -0.6 8 0.390
## 3 0.083 -2.5 -0.3 -2.8 -0.1 1 0.445
## 4 0.095 -1.2 0.8 -0.4 0.2 0 0.491
## 5 0.100 1.9 1.4 3.3 1.0 28 0.450
## 6 0.011 -5.0 1.0 -4.0 -0.2 14 0.384
## pctFG3 pctFG2 pctEFG pctFT minutesPerGame fgmPerGame fgaPerGame fg3mPerGame
## 1 0.339 0.533 0.524 0.651 28.0 4.7 10.1 1.2
## 2 0.368 0.406 0.467 0.819 17.8 2.6 6.6 1.0
## 3 0.379 0.557 0.564 0.786 14.3 1.7 3.7 0.9
## 4 0.419 0.534 0.569 0.757 14.8 2.4 4.8 0.8
## 5 0.368 0.510 0.528 0.818 27.9 5.8 12.9 2.0
## 6 0.216 0.484 0.424 0.818 18.9 1.7 4.3 0.3
## fg3aPerGame fg2mPerGame fg2aPerGame ftmPerGame ftaPerGame orbPerGame
## 1 3.6 3.5 6.6 2.0 3.0 1.6
## 2 2.8 1.6 3.8 1.0 1.3 0.2
## 3 2.3 0.8 1.4 0.5 0.6 0.6
## 4 1.8 1.6 3.0 1.2 1.5 0.3
## 5 5.4 3.8 7.4 0.6 0.8 1.0
## 6 1.6 1.3 2.7 0.8 1.0 1.0
## drbPerGame trbPerGame astPerGame stlPerGame blkPerGame tovPerGame pfPerGame
## 1 4.2 5.8 3.3 0.7 0.7 2.0 1.8
## 2 1.1 1.3 1.9 0.7 0.2 1.0 1.4
## 3 2.1 2.7 0.5 0.3 0.2 0.5 1.8
## 4 2.3 2.6 0.8 0.4 0.4 0.8 1.4
## 5 5.7 6.7 3.4 0.9 0.9 1.0 1.7
## 6 3.8 4.8 1.3 0.8 0.4 1.2 1.3
## ptsPerGame isAllNBA1 isAllNBA2 isAllNBA3
## 1 12.6 0 0 0
## 2 7.2 0 0 0
## 3 4.7 0 0 0
## 4 6.7 0 0 0
## 5 14.2 0 0 0
## 6 4.4 0 0 0
Full Model
full.model <- lm(salary ~ agePlayer + ratioPER + slugPosition + countGames + countGamesStarted + minutes + pctTrueShooting + pct3PRate + pctFTRate + pctORB + pctDRB + pctTRB + pctAST + pctSTL + pctBLK + pctTOV + pctUSG + ratioOWS + ratioDWS + ratioWS + ratioWSPer48 + ratioOBPM + ratioDBPM + ratioBPM + ratioVORP + countGamesStarted + pctFG + pctFG3 + pctFG2 + pctEFG + pctFT + minutesPerGame + fgmPerGame + fgaPerGame + fg3mPerGame + fg3aPerGame + fg2mPerGame + fg2aPerGame + ftmPerGame + ftaPerGame + orbPerGame + drbPerGame + trbPerGame + astPerGame + stlPerGame + blkPerGame + tovPerGame + pfPerGame + ptsPerGame,
data=nba.salary)
summary(full.model)
##
## Call:
## lm(formula = salary ~ agePlayer + ratioPER + slugPosition + countGames +
## countGamesStarted + minutes + pctTrueShooting + pct3PRate +
## pctFTRate + pctORB + pctDRB + pctTRB + pctAST + pctSTL +
## pctBLK + pctTOV + pctUSG + ratioOWS + ratioDWS + ratioWS +
## ratioWSPer48 + ratioOBPM + ratioDBPM + ratioBPM + ratioVORP +
## countGamesStarted + pctFG + pctFG3 + pctFG2 + pctEFG + pctFT +
## minutesPerGame + fgmPerGame + fgaPerGame + fg3mPerGame +
## fg3aPerGame + fg2mPerGame + fg2aPerGame + ftmPerGame + ftaPerGame +
## orbPerGame + drbPerGame + trbPerGame + astPerGame + stlPerGame +
## blkPerGame + tovPerGame + pfPerGame + ptsPerGame, data = nba.salary)
##
## Residuals:
## Min 1Q Median 3Q Max
## -19297020 -2799667 -321819 2156852 18775585
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 13793136 12840844 1.074 0.283540
## agePlayer 726933 79144 9.185 < 2e-16 ***
## ratioPER -1406129 638184 -2.203 0.028266 *
## slugPositionC-PF -49838 4132635 -0.012 0.990385
## slugPositionPF 882641 1218093 0.725 0.469208
## slugPositionPF-C -1712144 5713817 -0.300 0.764634
## slugPositionPG -1079397 1705975 -0.633 0.527360
## slugPositionPG-SG 1623624 6380593 0.254 0.799297
## slugPositionSF -1251433 1437793 -0.870 0.384727
## slugPositionSF-PF 805949 5930210 0.136 0.891979
## slugPositionSF-SG 1795471 6229642 0.288 0.773365
## slugPositionSG -1683793 1497242 -1.125 0.261582
## slugPositionSG-PG -4611140 4189569 -1.101 0.271867
## slugPositionSG-SF -806364 5764101 -0.140 0.888829
## countGames -37107 69874 -0.531 0.595739
## countGamesStarted 81906 30797 2.660 0.008209 **
## minutes -5979 3526 -1.696 0.090913 .
## pctTrueShooting -8316728 38093846 -0.218 0.827314
## pct3PRate 17400847 14120603 1.232 0.218719
## pctFTRate 3378143 6620281 0.510 0.610204
## pctORB -1300958 2923606 -0.445 0.656625
## pctDRB -80572906 45565775 -1.768 0.077944 .
## pctTRB 89469496 74836686 1.196 0.232744
## pctAST -34161783 16837100 -2.029 0.043270 *
## pctSTL -1638592 1254653 -1.306 0.192464
## pctBLK 217529 1186609 0.183 0.854660
## pctTOV 2256666 18215068 0.124 0.901478
## pctUSG -2044897 32591281 -0.063 0.950009
## ratioOWS -4794630 6342555 -0.756 0.450224
## ratioDWS -2840493 6403123 -0.444 0.657617
## ratioWS 6747935 6391512 1.056 0.291853
## ratioWSPer48 -60441246 40666646 -1.486 0.138171
## ratioOBPM -266273 6104953 -0.044 0.965237
## ratioDBPM -2590451 6149684 -0.421 0.673861
## ratioBPM 3118502 6090660 0.512 0.608986
## ratioVORP -4199494 1707071 -2.460 0.014407 *
## pctFG 133886300 77081246 1.737 0.083335 .
## pctFG3 -6320909 4585212 -1.379 0.168975
## pctFG2 -7819221 9076987 -0.861 0.389629
## pctEFG -101005368 77170618 -1.309 0.191499
## pctFT -491192 2019759 -0.243 0.808008
## minutesPerGame -1045674 291576 -3.586 0.000386 ***
## fgmPerGame -3538307 9377368 -0.377 0.706177
## fgaPerGame 2965454 5655110 0.524 0.600365
## fg3mPerGame 8306604 8340868 0.996 0.320037
## fg3aPerGame -5924367 5863504 -1.010 0.313058
## fg2mPerGame -878363 6313786 -0.139 0.889442
## fg2aPerGame -1705927 5778572 -0.295 0.768016
## ftmPerGame 1869622 4722640 0.396 0.692447
## ftaPerGame -2634068 2100138 -1.254 0.210650
## orbPerGame -1426464 6328796 -0.225 0.821814
## drbPerGame 3238657 6228534 0.520 0.603435
## trbPerGame -306019 6170589 -0.050 0.960477
## astPerGame 3462166 1015140 3.411 0.000729 ***
## stlPerGame 5899763 2235723 2.639 0.008715 **
## blkPerGame 3241413 1514024 2.141 0.033017 *
## tovPerGame -1726613 2101184 -0.822 0.411826
## pfPerGame -242322 902179 -0.269 0.788409
## ptsPerGame 2521802 4210088 0.599 0.549593
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5352000 on 328 degrees of freedom
## Multiple R-squared: 0.7371, Adjusted R-squared: 0.6906
## F-statistic: 15.86 on 58 and 328 DF, p-value: < 2.2e-16
Backwards Model
step.pick.backward <- stats::step(full.model, direction="backward", trace = FALSE)
summary(step.pick.backward)
##
## Call:
## lm(formula = salary ~ agePlayer + ratioPER + countGamesStarted +
## minutes + pctDRB + pctAST + pctSTL + ratioOWS + ratioWS +
## ratioWSPer48 + ratioDBPM + ratioBPM + ratioVORP + pctFG +
## pctFG3 + pctEFG + minutesPerGame + fgmPerGame + fg3mPerGame +
## fg3aPerGame + ftaPerGame + drbPerGame + astPerGame + stlPerGame +
## blkPerGame + ptsPerGame, data = nba.salary)
##
## Residuals:
## Min 1Q Median 3Q Max
## -18052761 -2945580 -418173 2573254 20418248
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 21931341 7657306 2.864 0.004427 **
## agePlayer 735641 73742 9.976 < 2e-16 ***
## ratioPER -1369142 407245 -3.362 0.000857 ***
## countGamesStarted 96854 26121 3.708 0.000242 ***
## minutes -6531 1767 -3.695 0.000254 ***
## pctDRB -20304324 14410190 -1.409 0.159691
## pctAST -43284066 13190884 -3.281 0.001134 **
## pctSTL -1800123 1178915 -1.527 0.127656
## ratioOWS -1757473 1056041 -1.664 0.096941 .
## ratioWS 2909910 1162716 2.503 0.012768 *
## ratioWSPer48 -33147804 22558348 -1.469 0.142591
## ratioDBPM -2311028 677789 -3.410 0.000724 ***
## ratioBPM 2648626 608644 4.352 1.76e-05 ***
## ratioVORP -3040642 1368276 -2.222 0.026888 *
## pctFG 55172643 18096254 3.049 0.002467 **
## pctFG3 -7832828 3886727 -2.015 0.044619 *
## pctEFG -57367117 18535725 -3.095 0.002123 **
## minutesPerGame -953594 206757 -4.612 5.55e-06 ***
## fgmPerGame -6056858 3047454 -1.988 0.047623 *
## fg3mPerGame 4104065 2866338 1.432 0.153064
## fg3aPerGame -2376308 1017503 -2.335 0.020070 *
## ftaPerGame -2542082 1203476 -2.112 0.035350 *
## drbPerGame 2276726 679968 3.348 0.000899 ***
## astPerGame 3529492 714720 4.938 1.21e-06 ***
## stlPerGame 4567942 1870838 2.442 0.015101 *
## blkPerGame 3124701 1202903 2.598 0.009772 **
## ptsPerGame 4156735 1484251 2.801 0.005377 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5234000 on 360 degrees of freedom
## Multiple R-squared: 0.7241, Adjusted R-squared: 0.7041
## F-statistic: 36.33 on 26 and 360 DF, p-value: < 2.2e-16
The most influential points here are agePlayer, ratioPER, countGamesStarted, minutes, ratioDBPM, ratioBPM, minutesPerGame, drbPerGame, astPerGame
best.backward <- lm(salary ~ agePlayer + ratioPER + countGamesStarted + minutes + ratioDBPM + ratioBPM + minutesPerGame + drbPerGame + astPerGame, data = nba.salary)
summary(best.backward)
##
## Call:
## lm(formula = salary ~ agePlayer + ratioPER + countGamesStarted +
## minutes + ratioDBPM + ratioBPM + minutesPerGame + drbPerGame +
## astPerGame, data = nba.salary)
##
## Residuals:
## Min 1Q Median 3Q Max
## -17813901 -3583270 -386265 3340718 18774037
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -15495396 3743638 -4.139 4.30e-05 ***
## agePlayer 631454 78486 8.045 1.13e-14 ***
## ratioPER -34659 159467 -0.217 0.8281
## countGamesStarted 112148 27924 4.016 7.14e-05 ***
## minutes -4404 1120 -3.933 0.0001 ***
## ratioDBPM -1203707 380876 -3.160 0.0017 **
## ratioBPM 698999 292871 2.387 0.0175 *
## minutesPerGame 144489 95167 1.518 0.1298
## drbPerGame 1257082 296050 4.246 2.74e-05 ***
## astPerGame 1564403 232040 6.742 5.88e-11 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5857000 on 377 degrees of freedom
## Multiple R-squared: 0.6381, Adjusted R-squared: 0.6295
## F-statistic: 73.86 on 9 and 377 DF, p-value: < 2.2e-16
Forwards Model
step.pick.forward <- stats::step(full.model, scope=formula(full.model), direction="forward")
## Start: AIC=12045.57
## salary ~ agePlayer + ratioPER + slugPosition + countGames + countGamesStarted +
## minutes + pctTrueShooting + pct3PRate + pctFTRate + pctORB +
## pctDRB + pctTRB + pctAST + pctSTL + pctBLK + pctTOV + pctUSG +
## ratioOWS + ratioDWS + ratioWS + ratioWSPer48 + ratioOBPM +
## ratioDBPM + ratioBPM + ratioVORP + countGamesStarted + pctFG +
## pctFG3 + pctFG2 + pctEFG + pctFT + minutesPerGame + fgmPerGame +
## fgaPerGame + fg3mPerGame + fg3aPerGame + fg2mPerGame + fg2aPerGame +
## ftmPerGame + ftaPerGame + orbPerGame + drbPerGame + trbPerGame +
## astPerGame + stlPerGame + blkPerGame + tovPerGame + pfPerGame +
## ptsPerGame
summary(step.pick.forward)
##
## Call:
## lm(formula = salary ~ agePlayer + ratioPER + slugPosition + countGames +
## countGamesStarted + minutes + pctTrueShooting + pct3PRate +
## pctFTRate + pctORB + pctDRB + pctTRB + pctAST + pctSTL +
## pctBLK + pctTOV + pctUSG + ratioOWS + ratioDWS + ratioWS +
## ratioWSPer48 + ratioOBPM + ratioDBPM + ratioBPM + ratioVORP +
## countGamesStarted + pctFG + pctFG3 + pctFG2 + pctEFG + pctFT +
## minutesPerGame + fgmPerGame + fgaPerGame + fg3mPerGame +
## fg3aPerGame + fg2mPerGame + fg2aPerGame + ftmPerGame + ftaPerGame +
## orbPerGame + drbPerGame + trbPerGame + astPerGame + stlPerGame +
## blkPerGame + tovPerGame + pfPerGame + ptsPerGame, data = nba.salary)
##
## Residuals:
## Min 1Q Median 3Q Max
## -19297020 -2799667 -321819 2156852 18775585
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 13793136 12840844 1.074 0.283540
## agePlayer 726933 79144 9.185 < 2e-16 ***
## ratioPER -1406129 638184 -2.203 0.028266 *
## slugPositionC-PF -49838 4132635 -0.012 0.990385
## slugPositionPF 882641 1218093 0.725 0.469208
## slugPositionPF-C -1712144 5713817 -0.300 0.764634
## slugPositionPG -1079397 1705975 -0.633 0.527360
## slugPositionPG-SG 1623624 6380593 0.254 0.799297
## slugPositionSF -1251433 1437793 -0.870 0.384727
## slugPositionSF-PF 805949 5930210 0.136 0.891979
## slugPositionSF-SG 1795471 6229642 0.288 0.773365
## slugPositionSG -1683793 1497242 -1.125 0.261582
## slugPositionSG-PG -4611140 4189569 -1.101 0.271867
## slugPositionSG-SF -806364 5764101 -0.140 0.888829
## countGames -37107 69874 -0.531 0.595739
## countGamesStarted 81906 30797 2.660 0.008209 **
## minutes -5979 3526 -1.696 0.090913 .
## pctTrueShooting -8316728 38093846 -0.218 0.827314
## pct3PRate 17400847 14120603 1.232 0.218719
## pctFTRate 3378143 6620281 0.510 0.610204
## pctORB -1300958 2923606 -0.445 0.656625
## pctDRB -80572906 45565775 -1.768 0.077944 .
## pctTRB 89469496 74836686 1.196 0.232744
## pctAST -34161783 16837100 -2.029 0.043270 *
## pctSTL -1638592 1254653 -1.306 0.192464
## pctBLK 217529 1186609 0.183 0.854660
## pctTOV 2256666 18215068 0.124 0.901478
## pctUSG -2044897 32591281 -0.063 0.950009
## ratioOWS -4794630 6342555 -0.756 0.450224
## ratioDWS -2840493 6403123 -0.444 0.657617
## ratioWS 6747935 6391512 1.056 0.291853
## ratioWSPer48 -60441246 40666646 -1.486 0.138171
## ratioOBPM -266273 6104953 -0.044 0.965237
## ratioDBPM -2590451 6149684 -0.421 0.673861
## ratioBPM 3118502 6090660 0.512 0.608986
## ratioVORP -4199494 1707071 -2.460 0.014407 *
## pctFG 133886300 77081246 1.737 0.083335 .
## pctFG3 -6320909 4585212 -1.379 0.168975
## pctFG2 -7819221 9076987 -0.861 0.389629
## pctEFG -101005368 77170618 -1.309 0.191499
## pctFT -491192 2019759 -0.243 0.808008
## minutesPerGame -1045674 291576 -3.586 0.000386 ***
## fgmPerGame -3538307 9377368 -0.377 0.706177
## fgaPerGame 2965454 5655110 0.524 0.600365
## fg3mPerGame 8306604 8340868 0.996 0.320037
## fg3aPerGame -5924367 5863504 -1.010 0.313058
## fg2mPerGame -878363 6313786 -0.139 0.889442
## fg2aPerGame -1705927 5778572 -0.295 0.768016
## ftmPerGame 1869622 4722640 0.396 0.692447
## ftaPerGame -2634068 2100138 -1.254 0.210650
## orbPerGame -1426464 6328796 -0.225 0.821814
## drbPerGame 3238657 6228534 0.520 0.603435
## trbPerGame -306019 6170589 -0.050 0.960477
## astPerGame 3462166 1015140 3.411 0.000729 ***
## stlPerGame 5899763 2235723 2.639 0.008715 **
## blkPerGame 3241413 1514024 2.141 0.033017 *
## tovPerGame -1726613 2101184 -0.822 0.411826
## pfPerGame -242322 902179 -0.269 0.788409
## ptsPerGame 2521802 4210088 0.599 0.549593
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5352000 on 328 degrees of freedom
## Multiple R-squared: 0.7371, Adjusted R-squared: 0.6906
## F-statistic: 15.86 on 58 and 328 DF, p-value: < 2.2e-16
Significant variables here are agePlayer, ratioPER, countGamesStarted, pctAST, ratioVORP, minutesPerGame, astPerGame, stlPerGame, blkPerGame
best.forward <- lm(salary ~ agePlayer + ratioPER + countGamesStarted + pctAST + ratioVORP + minutesPerGame + astPerGame + stlPerGame + blkPerGame,data = nba.salary)
summary(best.forward)
##
## Call:
## lm(formula = salary ~ agePlayer + ratioPER + countGamesStarted +
## pctAST + ratioVORP + minutesPerGame + astPerGame + stlPerGame +
## blkPerGame, data = nba.salary)
##
## Residuals:
## Min 1Q Median 3Q Max
## -18423853 -3643818 -397587 3062970 20789177
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -17686026 2665407 -6.635 1.13e-10 ***
## agePlayer 635039 77496 8.194 3.97e-15 ***
## ratioPER 153020 108615 1.409 0.159709
## countGamesStarted 35013 25244 1.387 0.166273
## pctAST -21171663 12142285 -1.744 0.082039 .
## ratioVORP 1496021 515542 2.902 0.003928 **
## minutesPerGame 141284 93116 1.517 0.130033
## astPerGame 2528452 707219 3.575 0.000395 ***
## stlPerGame -867201 1309981 -0.662 0.508379
## blkPerGame 1335449 954453 1.399 0.162582
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 6025000 on 377 degrees of freedom
## Multiple R-squared: 0.6171, Adjusted R-squared: 0.6079
## F-statistic: 67.5 on 9 and 377 DF, p-value: < 2.2e-16
Best Subset Model
best.subsets.model <- regsubsets(formula(step.pick.backward), data = nba.salary, nbest = 1, nvmax = 27)
subsets(best.subsets.model, statistic = "adjr2", legend = FALSE)
## Abbreviation
## agePlayer agP
## ratioPER rP
## countGamesStarted c
## minutes mn
## pctDRB pD
## pctAST pA
## pctSTL pS
## ratioOWS rO
## ratioWS rtWS
## ratioWSPer48 rWSP
## ratioDBPM rD
## ratioBPM rB
## ratioVORP rV
## pctFG pcFG
## pctFG3 pFG3
## pctEFG pE
## minutesPerGame mP
## fgmPerGame fgPG
## fg3mPerGame fg3mPG
## fg3aPerGame fg3PrG
## ftaPerGame ftPG
## drbPerGame d
## astPerGame aPG
## stlPerGame s
## blkPerGame b
## ptsPerGame pP
summary(best.subsets.model)
## Subset selection object
## Call: regsubsets.formula(formula(step.pick.backward), data = nba.salary,
## nbest = 1, nvmax = 27)
## 26 Variables (and intercept)
## Forced in Forced out
## agePlayer FALSE FALSE
## ratioPER FALSE FALSE
## countGamesStarted FALSE FALSE
## minutes FALSE FALSE
## pctDRB FALSE FALSE
## pctAST FALSE FALSE
## pctSTL FALSE FALSE
## ratioOWS FALSE FALSE
## ratioWS FALSE FALSE
## ratioWSPer48 FALSE FALSE
## ratioDBPM FALSE FALSE
## ratioBPM FALSE FALSE
## ratioVORP FALSE FALSE
## pctFG FALSE FALSE
## pctFG3 FALSE FALSE
## pctEFG FALSE FALSE
## minutesPerGame FALSE FALSE
## fgmPerGame FALSE FALSE
## fg3mPerGame FALSE FALSE
## fg3aPerGame FALSE FALSE
## ftaPerGame FALSE FALSE
## drbPerGame FALSE FALSE
## astPerGame FALSE FALSE
## stlPerGame FALSE FALSE
## blkPerGame FALSE FALSE
## ptsPerGame FALSE FALSE
## 1 subsets of each size up to 26
## Selection Algorithm: exhaustive
## agePlayer ratioPER countGamesStarted minutes pctDRB pctAST pctSTL
## 1 ( 1 ) " " " " " " " " " " " " " "
## 2 ( 1 ) "*" " " " " " " " " " " " "
## 3 ( 1 ) "*" " " " " " " " " " " " "
## 4 ( 1 ) "*" " " " " " " " " " " " "
## 5 ( 1 ) "*" " " " " "*" " " " " " "
## 6 ( 1 ) "*" " " "*" "*" " " " " " "
## 7 ( 1 ) "*" " " "*" "*" " " "*" " "
## 8 ( 1 ) "*" " " "*" "*" " " "*" " "
## 9 ( 1 ) "*" " " "*" "*" "*" "*" " "
## 10 ( 1 ) "*" "*" "*" "*" " " "*" " "
## 11 ( 1 ) "*" "*" "*" "*" " " "*" " "
## 12 ( 1 ) "*" "*" "*" "*" "*" "*" " "
## 13 ( 1 ) "*" "*" "*" "*" "*" "*" " "
## 14 ( 1 ) "*" "*" "*" "*" " " "*" " "
## 15 ( 1 ) "*" "*" "*" "*" " " "*" " "
## 16 ( 1 ) "*" "*" "*" "*" " " "*" "*"
## 17 ( 1 ) "*" "*" "*" "*" " " "*" " "
## 18 ( 1 ) "*" "*" "*" "*" " " "*" "*"
## 19 ( 1 ) "*" "*" "*" "*" " " "*" " "
## 20 ( 1 ) "*" "*" "*" "*" " " "*" "*"
## 21 ( 1 ) "*" "*" "*" "*" " " "*" "*"
## 22 ( 1 ) "*" "*" "*" "*" " " "*" "*"
## 23 ( 1 ) "*" "*" "*" "*" " " "*" "*"
## 24 ( 1 ) "*" "*" "*" "*" " " "*" "*"
## 25 ( 1 ) "*" "*" "*" "*" " " "*" "*"
## 26 ( 1 ) "*" "*" "*" "*" "*" "*" "*"
## ratioOWS ratioWS ratioWSPer48 ratioDBPM ratioBPM ratioVORP pctFG
## 1 ( 1 ) " " " " " " " " " " " " " "
## 2 ( 1 ) " " " " " " " " " " " " " "
## 3 ( 1 ) " " " " " " " " " " " " " "
## 4 ( 1 ) " " " " " " " " " " " " " "
## 5 ( 1 ) " " " " " " " " " " " " " "
## 6 ( 1 ) " " " " " " " " " " " " " "
## 7 ( 1 ) " " " " " " " " " " " " " "
## 8 ( 1 ) " " " " " " " " " " " " " "
## 9 ( 1 ) " " " " " " " " " " " " " "
## 10 ( 1 ) " " " " " " " " " " " " " "
## 11 ( 1 ) " " " " " " " " " " " " " "
## 12 ( 1 ) " " " " " " " " " " " " " "
## 13 ( 1 ) " " " " " " " " "*" " " " "
## 14 ( 1 ) " " " " " " "*" "*" " " " "
## 15 ( 1 ) " " " " " " "*" "*" " " "*"
## 16 ( 1 ) " " " " " " "*" "*" " " "*"
## 17 ( 1 ) " " " " " " "*" "*" " " "*"
## 18 ( 1 ) " " " " " " "*" "*" " " "*"
## 19 ( 1 ) " " "*" " " "*" "*" "*" "*"
## 20 ( 1 ) " " "*" " " "*" "*" "*" "*"
## 21 ( 1 ) " " "*" " " "*" "*" "*" "*"
## 22 ( 1 ) "*" "*" " " "*" "*" "*" "*"
## 23 ( 1 ) "*" "*" " " "*" "*" "*" "*"
## 24 ( 1 ) "*" "*" "*" "*" "*" "*" "*"
## 25 ( 1 ) "*" "*" "*" "*" "*" "*" "*"
## 26 ( 1 ) "*" "*" "*" "*" "*" "*" "*"
## pctFG3 pctEFG minutesPerGame fgmPerGame fg3mPerGame fg3aPerGame
## 1 ( 1 ) " " " " " " " " " " " "
## 2 ( 1 ) " " " " " " " " " " " "
## 3 ( 1 ) " " " " " " " " " " " "
## 4 ( 1 ) " " " " " " " " " " " "
## 5 ( 1 ) " " " " " " " " " " " "
## 6 ( 1 ) " " " " " " " " " " " "
## 7 ( 1 ) " " " " " " " " " " " "
## 8 ( 1 ) " " " " "*" " " " " " "
## 9 ( 1 ) " " " " "*" " " " " " "
## 10 ( 1 ) " " " " "*" " " " " " "
## 11 ( 1 ) " " " " "*" " " " " " "
## 12 ( 1 ) " " " " "*" " " " " " "
## 13 ( 1 ) "*" " " "*" " " " " " "
## 14 ( 1 ) "*" " " "*" " " " " " "
## 15 ( 1 ) " " "*" "*" " " " " " "
## 16 ( 1 ) " " "*" "*" " " " " " "
## 17 ( 1 ) " " "*" "*" " " "*" "*"
## 18 ( 1 ) " " "*" "*" " " "*" "*"
## 19 ( 1 ) " " "*" "*" " " "*" "*"
## 20 ( 1 ) " " "*" "*" " " "*" "*"
## 21 ( 1 ) "*" "*" "*" " " "*" "*"
## 22 ( 1 ) "*" "*" "*" " " "*" "*"
## 23 ( 1 ) "*" "*" "*" "*" " " "*"
## 24 ( 1 ) "*" "*" "*" "*" " " "*"
## 25 ( 1 ) "*" "*" "*" "*" "*" "*"
## 26 ( 1 ) "*" "*" "*" "*" "*" "*"
## ftaPerGame drbPerGame astPerGame stlPerGame blkPerGame ptsPerGame
## 1 ( 1 ) " " " " " " " " " " "*"
## 2 ( 1 ) " " " " " " " " " " "*"
## 3 ( 1 ) " " " " "*" " " " " "*"
## 4 ( 1 ) " " "*" "*" " " " " "*"
## 5 ( 1 ) " " "*" "*" " " " " "*"
## 6 ( 1 ) " " "*" "*" " " " " "*"
## 7 ( 1 ) " " "*" "*" " " " " "*"
## 8 ( 1 ) " " "*" "*" " " " " "*"
## 9 ( 1 ) " " "*" "*" " " " " "*"
## 10 ( 1 ) " " "*" "*" "*" " " "*"
## 11 ( 1 ) " " "*" "*" "*" "*" "*"
## 12 ( 1 ) " " "*" "*" "*" "*" "*"
## 13 ( 1 ) " " "*" "*" "*" " " "*"
## 14 ( 1 ) " " "*" "*" "*" "*" "*"
## 15 ( 1 ) " " "*" "*" "*" "*" "*"
## 16 ( 1 ) " " "*" "*" "*" "*" "*"
## 17 ( 1 ) " " "*" "*" "*" "*" "*"
## 18 ( 1 ) " " "*" "*" "*" "*" "*"
## 19 ( 1 ) " " "*" "*" "*" "*" "*"
## 20 ( 1 ) " " "*" "*" "*" "*" "*"
## 21 ( 1 ) " " "*" "*" "*" "*" "*"
## 22 ( 1 ) " " "*" "*" "*" "*" "*"
## 23 ( 1 ) "*" "*" "*" "*" "*" "*"
## 24 ( 1 ) "*" "*" "*" "*" "*" "*"
## 25 ( 1 ) "*" "*" "*" "*" "*" "*"
## 26 ( 1 ) "*" "*" "*" "*" "*" "*"
new.best <- lm(salary ~ agePlayer + ptsPerGame + astPerGame + drbPerGame + minutes + countGamesStarted + pctAST + minutesPerGame + pctDRB, + ratioPER + blkPerGame, data = nba.salary)
summary(new.best)
##
## Call:
## lm(formula = salary ~ agePlayer + ptsPerGame + astPerGame + drbPerGame +
## minutes + countGamesStarted + pctAST + minutesPerGame + pctDRB,
## data = nba.salary, subset = +ratioPER + blkPerGame)
##
## Residuals:
## Min 1Q Median 3Q Max
## -22077007 -1931704 897512 1560478 14803983
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.913e+07 2.338e+06 -8.180 4.42e-15 ***
## agePlayer 9.612e+05 5.820e+04 16.514 < 2e-16 ***
## ptsPerGame 1.296e+06 1.044e+05 12.419 < 2e-16 ***
## astPerGame 1.217e+06 1.225e+06 0.994 0.321
## drbPerGame 4.840e+06 4.355e+05 11.113 < 2e-16 ***
## minutes 9.783e+02 1.064e+03 0.920 0.358
## countGamesStarted 4.925e+03 3.007e+04 0.164 0.870
## pctAST -1.668e+07 2.546e+07 -0.655 0.513
## minutesPerGame -6.915e+05 1.084e+05 -6.381 5.18e-10 ***
## pctDRB -6.219e+07 7.157e+06 -8.690 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4196000 on 376 degrees of freedom
## Multiple R-squared: 0.8712, Adjusted R-squared: 0.8681
## F-statistic: 282.5 on 9 and 376 DF, p-value: < 2.2e-16
These are the seemingly most significant variables to predict salary. I ran the subsets function to see how many variables would be best to predict salary. According to the plot it is about 10-12 because there is a slight inflection point there. I then ran a summary to see what the best combination of variables is with 10, 11, and 12 variables. The model with the best AdjR2 has 11 predictors and they are the ones listed above.
bind_rows(
glance(full.model) %>% mutate(Model="Full main effects"),
glance(step.pick.backward) %>% mutate(Model="Backward Stepwise Regression"),
glance(step.pick.forward) %>% mutate(Model="Forward Stepwise Regression"),
glance(best.backward) %>% mutate(Model="Best Selected from Backward Regression"),
glance(best.forward) %>% mutate(Model="Best Selected from Forward Regression"),
glance(new.best) %>% mutate(Model="Best Subset Regression"),
) %>%
select(Model, Adj.R.Squared = adj.r.squared,
AIC, BIC) %>%
kable()
| Model | Adj.R.Squared | AIC | BIC |
|---|---|---|---|
| Full main effects | 0.6906293 | 13145.83 | 13383.33 |
| Backward Stepwise Regression | 0.7041413 | 13100.57 | 13211.41 |
| Forward Stepwise Regression | 0.6906293 | 13145.83 | 13383.33 |
| Best Selected from Backward Regression | 0.6294817 | 13171.51 | 13215.05 |
| Best Selected from Forward Regression | 0.6079106 | 13193.41 | 13236.95 |
| Best Subset Regression | 0.8680964 | 12879.98 | 12923.49 |
The model with the highest Adjusted R squared, and lowest AIC and BIC is by far the best Subsets Regression, meaning that those variables are the best predictors of Salary in the NBA