if(!require(tidyverse)) install.packages("tidyverse")
## Loading required package: tidyverse
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.4.0      ✔ purrr   0.3.5 
## ✔ tibble  3.1.8      ✔ dplyr   1.0.10
## ✔ tidyr   1.2.1      ✔ stringr 1.4.1 
## ✔ readr   2.1.3      ✔ forcats 0.5.2 
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
library(tidyverse)
if(!require(leaps)) install.packages("leaps")
## Loading required package: leaps
library(leaps)
if(!require(ggplot2)) install.packages("ggplot2")
library(ggplot2)
if(!require(dplyr)) install.packages("dplyr")
library(dplyr)
if(!require(knitr)) install.packages("knitr")
## Loading required package: knitr
library(knitr)
if(!require(tidymodels)) install.packages("tidymodels")
## Loading required package: tidymodels
## ── Attaching packages ────────────────────────────────────── tidymodels 1.0.0 ──
## ✔ broom        1.0.1     ✔ rsample      1.1.0
## ✔ dials        1.1.0     ✔ tune         1.0.1
## ✔ infer        1.0.3     ✔ workflows    1.1.0
## ✔ modeldata    1.0.1     ✔ workflowsets 1.0.0
## ✔ parsnip      1.0.2     ✔ yardstick    1.1.0
## ✔ recipes      1.0.3     
## ── Conflicts ───────────────────────────────────────── tidymodels_conflicts() ──
## ✖ scales::discard() masks purrr::discard()
## ✖ dplyr::filter()   masks stats::filter()
## ✖ recipes::fixed()  masks stringr::fixed()
## ✖ dplyr::lag()      masks stats::lag()
## ✖ yardstick::spec() masks readr::spec()
## ✖ recipes::step()   masks stats::step()
## • Search for functions across packages at https://www.tidymodels.org/find/
library(tidymodels)
if(!require(GGally)) install.packages("GGally")
## Loading required package: GGally
## Registered S3 method overwritten by 'GGally':
##   method from   
##   +.gg   ggplot2
library(GGally)
library(car)
## Loading required package: carData
## 
## Attaching package: 'car'
## 
## The following object is masked from 'package:dplyr':
## 
##     recode
## 
## The following object is masked from 'package:purrr':
## 
##     some
library(ggfortify)
## Registered S3 method overwritten by 'ggfortify':
##   method          from   
##   autoplot.glmnet parsnip
nba.salary <- read.csv("~/Desktop/SLM 418/NBA_salary_performance.csv")
head(nba.salary)
##   X      namePlayer   salary yearSeason slugPosition agePlayer slugTeamBREF
## 1 1    Aaron Gordon 18136364       2021           PF        25          TOT
## 2 2   Aaron Holiday  2345640       2021           PG        24          IND
## 3 3   Aaron Nesmith  3458400       2021           SF        21          BOS
## 4 4     Abdel Nader  1752950       2021           SF        27          PHO
## 5 5      Al Horford 27500000       2021            C        34          OKC
## 6 6 Al-Farouq Aminu  9720900       2021           PF        30          TOT
##   countGames minutes ratioPER pctTrueShooting pct3PRate pctFTRate pctORB pctDRB
## 1         49    1372     14.7           0.549     0.352     0.301  0.059  0.164
## 2         66    1176      9.3           0.503     0.417     0.190  0.014  0.068
## 3         44     627      9.9           0.587     0.628     0.171  0.046  0.169
## 4         24     355     13.4           0.605     0.371     0.319  0.023  0.173
## 5         28     782     17.4           0.538     0.422     0.061  0.038  0.215
## 6         23     434      8.9           0.469     0.374     0.222  0.051  0.219
##   pctTRB pctAST pctSTL pctBLK pctTOV pctUSG ratioOWS ratioDWS ratioWS
## 1  0.111  0.179  0.012  0.023  0.148  0.208      0.7      1.2     2.0
## 2  0.041  0.140  0.018  0.900  0.123  0.195     -0.6      0.8     0.2
## 3  0.107  0.043  0.012  0.013  0.106  0.136      0.6      0.5     1.1
## 4  0.099  0.074  0.014  0.023  0.126  0.190      0.3      0.4     0.7
## 5  0.125  0.202  0.015  0.030  0.073  0.216      0.7      0.9     1.6
## 6  0.133  0.101  0.021  0.019  0.205  0.136     -0.4      0.5     0.1
##   ratioWSPer48 ratioOBPM ratioDBPM ratioBPM ratioVORP countGamesStarted pctFG
## 1        0.069       0.3      -0.1      0.2       0.8                49 0.465
## 2        0.010      -3.2      -1.0     -4.1      -0.6                 8 0.390
## 3        0.083      -2.5      -0.3     -2.8      -0.1                 1 0.445
## 4        0.095      -1.2       0.8     -0.4       0.2                 0 0.491
## 5        0.100       1.9       1.4      3.3       1.0                28 0.450
## 6        0.011      -5.0       1.0     -4.0      -0.2                14 0.384
##   pctFG3 pctFG2 pctEFG pctFT minutesPerGame fgmPerGame fgaPerGame fg3mPerGame
## 1  0.339  0.533  0.524 0.651           28.0        4.7       10.1         1.2
## 2  0.368  0.406  0.467 0.819           17.8        2.6        6.6         1.0
## 3  0.379  0.557  0.564 0.786           14.3        1.7        3.7         0.9
## 4  0.419  0.534  0.569 0.757           14.8        2.4        4.8         0.8
## 5  0.368  0.510  0.528 0.818           27.9        5.8       12.9         2.0
## 6  0.216  0.484  0.424 0.818           18.9        1.7        4.3         0.3
##   fg3aPerGame fg2mPerGame fg2aPerGame ftmPerGame ftaPerGame orbPerGame
## 1         3.6         3.5         6.6        2.0        3.0        1.6
## 2         2.8         1.6         3.8        1.0        1.3        0.2
## 3         2.3         0.8         1.4        0.5        0.6        0.6
## 4         1.8         1.6         3.0        1.2        1.5        0.3
## 5         5.4         3.8         7.4        0.6        0.8        1.0
## 6         1.6         1.3         2.7        0.8        1.0        1.0
##   drbPerGame trbPerGame astPerGame stlPerGame blkPerGame tovPerGame pfPerGame
## 1        4.2        5.8        3.3        0.7        0.7        2.0       1.8
## 2        1.1        1.3        1.9        0.7        0.2        1.0       1.4
## 3        2.1        2.7        0.5        0.3        0.2        0.5       1.8
## 4        2.3        2.6        0.8        0.4        0.4        0.8       1.4
## 5        5.7        6.7        3.4        0.9        0.9        1.0       1.7
## 6        3.8        4.8        1.3        0.8        0.4        1.2       1.3
##   ptsPerGame isAllNBA1 isAllNBA2 isAllNBA3
## 1       12.6         0         0         0
## 2        7.2         0         0         0
## 3        4.7         0         0         0
## 4        6.7         0         0         0
## 5       14.2         0         0         0
## 6        4.4         0         0         0

Full Model

full.model <- lm(salary ~ agePlayer + ratioPER + slugPosition + countGames + countGamesStarted + minutes + pctTrueShooting + pct3PRate + pctFTRate + pctORB + pctDRB + pctTRB + pctAST + pctSTL + pctBLK + pctTOV + pctUSG + ratioOWS + ratioDWS + ratioWS + ratioWSPer48 + ratioOBPM + ratioDBPM + ratioBPM + ratioVORP + countGamesStarted + pctFG + pctFG3 + pctFG2 + pctEFG + pctFT + minutesPerGame + fgmPerGame + fgaPerGame + fg3mPerGame + fg3aPerGame + fg2mPerGame + fg2aPerGame + ftmPerGame + ftaPerGame + orbPerGame + drbPerGame + trbPerGame + astPerGame + stlPerGame + blkPerGame + tovPerGame + pfPerGame + ptsPerGame,
            data=nba.salary)

summary(full.model)
## 
## Call:
## lm(formula = salary ~ agePlayer + ratioPER + slugPosition + countGames + 
##     countGamesStarted + minutes + pctTrueShooting + pct3PRate + 
##     pctFTRate + pctORB + pctDRB + pctTRB + pctAST + pctSTL + 
##     pctBLK + pctTOV + pctUSG + ratioOWS + ratioDWS + ratioWS + 
##     ratioWSPer48 + ratioOBPM + ratioDBPM + ratioBPM + ratioVORP + 
##     countGamesStarted + pctFG + pctFG3 + pctFG2 + pctEFG + pctFT + 
##     minutesPerGame + fgmPerGame + fgaPerGame + fg3mPerGame + 
##     fg3aPerGame + fg2mPerGame + fg2aPerGame + ftmPerGame + ftaPerGame + 
##     orbPerGame + drbPerGame + trbPerGame + astPerGame + stlPerGame + 
##     blkPerGame + tovPerGame + pfPerGame + ptsPerGame, data = nba.salary)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -19297020  -2799667   -321819   2156852  18775585 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         13793136   12840844   1.074 0.283540    
## agePlayer             726933      79144   9.185  < 2e-16 ***
## ratioPER            -1406129     638184  -2.203 0.028266 *  
## slugPositionC-PF      -49838    4132635  -0.012 0.990385    
## slugPositionPF        882641    1218093   0.725 0.469208    
## slugPositionPF-C    -1712144    5713817  -0.300 0.764634    
## slugPositionPG      -1079397    1705975  -0.633 0.527360    
## slugPositionPG-SG    1623624    6380593   0.254 0.799297    
## slugPositionSF      -1251433    1437793  -0.870 0.384727    
## slugPositionSF-PF     805949    5930210   0.136 0.891979    
## slugPositionSF-SG    1795471    6229642   0.288 0.773365    
## slugPositionSG      -1683793    1497242  -1.125 0.261582    
## slugPositionSG-PG   -4611140    4189569  -1.101 0.271867    
## slugPositionSG-SF    -806364    5764101  -0.140 0.888829    
## countGames            -37107      69874  -0.531 0.595739    
## countGamesStarted      81906      30797   2.660 0.008209 ** 
## minutes                -5979       3526  -1.696 0.090913 .  
## pctTrueShooting     -8316728   38093846  -0.218 0.827314    
## pct3PRate           17400847   14120603   1.232 0.218719    
## pctFTRate            3378143    6620281   0.510 0.610204    
## pctORB              -1300958    2923606  -0.445 0.656625    
## pctDRB             -80572906   45565775  -1.768 0.077944 .  
## pctTRB              89469496   74836686   1.196 0.232744    
## pctAST             -34161783   16837100  -2.029 0.043270 *  
## pctSTL              -1638592    1254653  -1.306 0.192464    
## pctBLK                217529    1186609   0.183 0.854660    
## pctTOV               2256666   18215068   0.124 0.901478    
## pctUSG              -2044897   32591281  -0.063 0.950009    
## ratioOWS            -4794630    6342555  -0.756 0.450224    
## ratioDWS            -2840493    6403123  -0.444 0.657617    
## ratioWS              6747935    6391512   1.056 0.291853    
## ratioWSPer48       -60441246   40666646  -1.486 0.138171    
## ratioOBPM            -266273    6104953  -0.044 0.965237    
## ratioDBPM           -2590451    6149684  -0.421 0.673861    
## ratioBPM             3118502    6090660   0.512 0.608986    
## ratioVORP           -4199494    1707071  -2.460 0.014407 *  
## pctFG              133886300   77081246   1.737 0.083335 .  
## pctFG3              -6320909    4585212  -1.379 0.168975    
## pctFG2              -7819221    9076987  -0.861 0.389629    
## pctEFG            -101005368   77170618  -1.309 0.191499    
## pctFT                -491192    2019759  -0.243 0.808008    
## minutesPerGame      -1045674     291576  -3.586 0.000386 ***
## fgmPerGame          -3538307    9377368  -0.377 0.706177    
## fgaPerGame           2965454    5655110   0.524 0.600365    
## fg3mPerGame          8306604    8340868   0.996 0.320037    
## fg3aPerGame         -5924367    5863504  -1.010 0.313058    
## fg2mPerGame          -878363    6313786  -0.139 0.889442    
## fg2aPerGame         -1705927    5778572  -0.295 0.768016    
## ftmPerGame           1869622    4722640   0.396 0.692447    
## ftaPerGame          -2634068    2100138  -1.254 0.210650    
## orbPerGame          -1426464    6328796  -0.225 0.821814    
## drbPerGame           3238657    6228534   0.520 0.603435    
## trbPerGame           -306019    6170589  -0.050 0.960477    
## astPerGame           3462166    1015140   3.411 0.000729 ***
## stlPerGame           5899763    2235723   2.639 0.008715 ** 
## blkPerGame           3241413    1514024   2.141 0.033017 *  
## tovPerGame          -1726613    2101184  -0.822 0.411826    
## pfPerGame            -242322     902179  -0.269 0.788409    
## ptsPerGame           2521802    4210088   0.599 0.549593    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5352000 on 328 degrees of freedom
## Multiple R-squared:  0.7371, Adjusted R-squared:  0.6906 
## F-statistic: 15.86 on 58 and 328 DF,  p-value: < 2.2e-16

Backwards Model

step.pick.backward <- stats::step(full.model, direction="backward", trace = FALSE)
summary(step.pick.backward)
## 
## Call:
## lm(formula = salary ~ agePlayer + ratioPER + countGamesStarted + 
##     minutes + pctDRB + pctAST + pctSTL + ratioOWS + ratioWS + 
##     ratioWSPer48 + ratioDBPM + ratioBPM + ratioVORP + pctFG + 
##     pctFG3 + pctEFG + minutesPerGame + fgmPerGame + fg3mPerGame + 
##     fg3aPerGame + ftaPerGame + drbPerGame + astPerGame + stlPerGame + 
##     blkPerGame + ptsPerGame, data = nba.salary)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -18052761  -2945580   -418173   2573254  20418248 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        21931341    7657306   2.864 0.004427 ** 
## agePlayer            735641      73742   9.976  < 2e-16 ***
## ratioPER           -1369142     407245  -3.362 0.000857 ***
## countGamesStarted     96854      26121   3.708 0.000242 ***
## minutes               -6531       1767  -3.695 0.000254 ***
## pctDRB            -20304324   14410190  -1.409 0.159691    
## pctAST            -43284066   13190884  -3.281 0.001134 ** 
## pctSTL             -1800123    1178915  -1.527 0.127656    
## ratioOWS           -1757473    1056041  -1.664 0.096941 .  
## ratioWS             2909910    1162716   2.503 0.012768 *  
## ratioWSPer48      -33147804   22558348  -1.469 0.142591    
## ratioDBPM          -2311028     677789  -3.410 0.000724 ***
## ratioBPM            2648626     608644   4.352 1.76e-05 ***
## ratioVORP          -3040642    1368276  -2.222 0.026888 *  
## pctFG              55172643   18096254   3.049 0.002467 ** 
## pctFG3             -7832828    3886727  -2.015 0.044619 *  
## pctEFG            -57367117   18535725  -3.095 0.002123 ** 
## minutesPerGame      -953594     206757  -4.612 5.55e-06 ***
## fgmPerGame         -6056858    3047454  -1.988 0.047623 *  
## fg3mPerGame         4104065    2866338   1.432 0.153064    
## fg3aPerGame        -2376308    1017503  -2.335 0.020070 *  
## ftaPerGame         -2542082    1203476  -2.112 0.035350 *  
## drbPerGame          2276726     679968   3.348 0.000899 ***
## astPerGame          3529492     714720   4.938 1.21e-06 ***
## stlPerGame          4567942    1870838   2.442 0.015101 *  
## blkPerGame          3124701    1202903   2.598 0.009772 ** 
## ptsPerGame          4156735    1484251   2.801 0.005377 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5234000 on 360 degrees of freedom
## Multiple R-squared:  0.7241, Adjusted R-squared:  0.7041 
## F-statistic: 36.33 on 26 and 360 DF,  p-value: < 2.2e-16

The most influential points here are agePlayer, ratioPER, countGamesStarted, minutes, ratioDBPM, ratioBPM, minutesPerGame, drbPerGame, astPerGame

best.backward <- lm(salary ~ agePlayer + ratioPER + countGamesStarted + minutes + ratioDBPM + ratioBPM + minutesPerGame + drbPerGame + astPerGame, data = nba.salary)
summary(best.backward)
## 
## Call:
## lm(formula = salary ~ agePlayer + ratioPER + countGamesStarted + 
##     minutes + ratioDBPM + ratioBPM + minutesPerGame + drbPerGame + 
##     astPerGame, data = nba.salary)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -17813901  -3583270   -386265   3340718  18774037 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       -15495396    3743638  -4.139 4.30e-05 ***
## agePlayer            631454      78486   8.045 1.13e-14 ***
## ratioPER             -34659     159467  -0.217   0.8281    
## countGamesStarted    112148      27924   4.016 7.14e-05 ***
## minutes               -4404       1120  -3.933   0.0001 ***
## ratioDBPM          -1203707     380876  -3.160   0.0017 ** 
## ratioBPM             698999     292871   2.387   0.0175 *  
## minutesPerGame       144489      95167   1.518   0.1298    
## drbPerGame          1257082     296050   4.246 2.74e-05 ***
## astPerGame          1564403     232040   6.742 5.88e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5857000 on 377 degrees of freedom
## Multiple R-squared:  0.6381, Adjusted R-squared:  0.6295 
## F-statistic: 73.86 on 9 and 377 DF,  p-value: < 2.2e-16

Forwards Model

step.pick.forward <- stats::step(full.model, scope=formula(full.model), direction="forward")
## Start:  AIC=12045.57
## salary ~ agePlayer + ratioPER + slugPosition + countGames + countGamesStarted + 
##     minutes + pctTrueShooting + pct3PRate + pctFTRate + pctORB + 
##     pctDRB + pctTRB + pctAST + pctSTL + pctBLK + pctTOV + pctUSG + 
##     ratioOWS + ratioDWS + ratioWS + ratioWSPer48 + ratioOBPM + 
##     ratioDBPM + ratioBPM + ratioVORP + countGamesStarted + pctFG + 
##     pctFG3 + pctFG2 + pctEFG + pctFT + minutesPerGame + fgmPerGame + 
##     fgaPerGame + fg3mPerGame + fg3aPerGame + fg2mPerGame + fg2aPerGame + 
##     ftmPerGame + ftaPerGame + orbPerGame + drbPerGame + trbPerGame + 
##     astPerGame + stlPerGame + blkPerGame + tovPerGame + pfPerGame + 
##     ptsPerGame
summary(step.pick.forward)
## 
## Call:
## lm(formula = salary ~ agePlayer + ratioPER + slugPosition + countGames + 
##     countGamesStarted + minutes + pctTrueShooting + pct3PRate + 
##     pctFTRate + pctORB + pctDRB + pctTRB + pctAST + pctSTL + 
##     pctBLK + pctTOV + pctUSG + ratioOWS + ratioDWS + ratioWS + 
##     ratioWSPer48 + ratioOBPM + ratioDBPM + ratioBPM + ratioVORP + 
##     countGamesStarted + pctFG + pctFG3 + pctFG2 + pctEFG + pctFT + 
##     minutesPerGame + fgmPerGame + fgaPerGame + fg3mPerGame + 
##     fg3aPerGame + fg2mPerGame + fg2aPerGame + ftmPerGame + ftaPerGame + 
##     orbPerGame + drbPerGame + trbPerGame + astPerGame + stlPerGame + 
##     blkPerGame + tovPerGame + pfPerGame + ptsPerGame, data = nba.salary)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -19297020  -2799667   -321819   2156852  18775585 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         13793136   12840844   1.074 0.283540    
## agePlayer             726933      79144   9.185  < 2e-16 ***
## ratioPER            -1406129     638184  -2.203 0.028266 *  
## slugPositionC-PF      -49838    4132635  -0.012 0.990385    
## slugPositionPF        882641    1218093   0.725 0.469208    
## slugPositionPF-C    -1712144    5713817  -0.300 0.764634    
## slugPositionPG      -1079397    1705975  -0.633 0.527360    
## slugPositionPG-SG    1623624    6380593   0.254 0.799297    
## slugPositionSF      -1251433    1437793  -0.870 0.384727    
## slugPositionSF-PF     805949    5930210   0.136 0.891979    
## slugPositionSF-SG    1795471    6229642   0.288 0.773365    
## slugPositionSG      -1683793    1497242  -1.125 0.261582    
## slugPositionSG-PG   -4611140    4189569  -1.101 0.271867    
## slugPositionSG-SF    -806364    5764101  -0.140 0.888829    
## countGames            -37107      69874  -0.531 0.595739    
## countGamesStarted      81906      30797   2.660 0.008209 ** 
## minutes                -5979       3526  -1.696 0.090913 .  
## pctTrueShooting     -8316728   38093846  -0.218 0.827314    
## pct3PRate           17400847   14120603   1.232 0.218719    
## pctFTRate            3378143    6620281   0.510 0.610204    
## pctORB              -1300958    2923606  -0.445 0.656625    
## pctDRB             -80572906   45565775  -1.768 0.077944 .  
## pctTRB              89469496   74836686   1.196 0.232744    
## pctAST             -34161783   16837100  -2.029 0.043270 *  
## pctSTL              -1638592    1254653  -1.306 0.192464    
## pctBLK                217529    1186609   0.183 0.854660    
## pctTOV               2256666   18215068   0.124 0.901478    
## pctUSG              -2044897   32591281  -0.063 0.950009    
## ratioOWS            -4794630    6342555  -0.756 0.450224    
## ratioDWS            -2840493    6403123  -0.444 0.657617    
## ratioWS              6747935    6391512   1.056 0.291853    
## ratioWSPer48       -60441246   40666646  -1.486 0.138171    
## ratioOBPM            -266273    6104953  -0.044 0.965237    
## ratioDBPM           -2590451    6149684  -0.421 0.673861    
## ratioBPM             3118502    6090660   0.512 0.608986    
## ratioVORP           -4199494    1707071  -2.460 0.014407 *  
## pctFG              133886300   77081246   1.737 0.083335 .  
## pctFG3              -6320909    4585212  -1.379 0.168975    
## pctFG2              -7819221    9076987  -0.861 0.389629    
## pctEFG            -101005368   77170618  -1.309 0.191499    
## pctFT                -491192    2019759  -0.243 0.808008    
## minutesPerGame      -1045674     291576  -3.586 0.000386 ***
## fgmPerGame          -3538307    9377368  -0.377 0.706177    
## fgaPerGame           2965454    5655110   0.524 0.600365    
## fg3mPerGame          8306604    8340868   0.996 0.320037    
## fg3aPerGame         -5924367    5863504  -1.010 0.313058    
## fg2mPerGame          -878363    6313786  -0.139 0.889442    
## fg2aPerGame         -1705927    5778572  -0.295 0.768016    
## ftmPerGame           1869622    4722640   0.396 0.692447    
## ftaPerGame          -2634068    2100138  -1.254 0.210650    
## orbPerGame          -1426464    6328796  -0.225 0.821814    
## drbPerGame           3238657    6228534   0.520 0.603435    
## trbPerGame           -306019    6170589  -0.050 0.960477    
## astPerGame           3462166    1015140   3.411 0.000729 ***
## stlPerGame           5899763    2235723   2.639 0.008715 ** 
## blkPerGame           3241413    1514024   2.141 0.033017 *  
## tovPerGame          -1726613    2101184  -0.822 0.411826    
## pfPerGame            -242322     902179  -0.269 0.788409    
## ptsPerGame           2521802    4210088   0.599 0.549593    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5352000 on 328 degrees of freedom
## Multiple R-squared:  0.7371, Adjusted R-squared:  0.6906 
## F-statistic: 15.86 on 58 and 328 DF,  p-value: < 2.2e-16

Significant variables here are agePlayer, ratioPER, countGamesStarted, pctAST, ratioVORP, minutesPerGame, astPerGame, stlPerGame, blkPerGame

best.forward <- lm(salary ~ agePlayer + ratioPER + countGamesStarted + pctAST + ratioVORP + minutesPerGame + astPerGame + stlPerGame + blkPerGame,data = nba.salary)
summary(best.forward)
## 
## Call:
## lm(formula = salary ~ agePlayer + ratioPER + countGamesStarted + 
##     pctAST + ratioVORP + minutesPerGame + astPerGame + stlPerGame + 
##     blkPerGame, data = nba.salary)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -18423853  -3643818   -397587   3062970  20789177 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       -17686026    2665407  -6.635 1.13e-10 ***
## agePlayer            635039      77496   8.194 3.97e-15 ***
## ratioPER             153020     108615   1.409 0.159709    
## countGamesStarted     35013      25244   1.387 0.166273    
## pctAST            -21171663   12142285  -1.744 0.082039 .  
## ratioVORP           1496021     515542   2.902 0.003928 ** 
## minutesPerGame       141284      93116   1.517 0.130033    
## astPerGame          2528452     707219   3.575 0.000395 ***
## stlPerGame          -867201    1309981  -0.662 0.508379    
## blkPerGame          1335449     954453   1.399 0.162582    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 6025000 on 377 degrees of freedom
## Multiple R-squared:  0.6171, Adjusted R-squared:  0.6079 
## F-statistic:  67.5 on 9 and 377 DF,  p-value: < 2.2e-16

Best Subset Model

best.subsets.model <- regsubsets(formula(step.pick.backward), data = nba.salary, nbest = 1, nvmax = 27)
subsets(best.subsets.model, statistic = "adjr2", legend = FALSE)

##                   Abbreviation
## agePlayer                  agP
## ratioPER                    rP
## countGamesStarted            c
## minutes                     mn
## pctDRB                      pD
## pctAST                      pA
## pctSTL                      pS
## ratioOWS                    rO
## ratioWS                   rtWS
## ratioWSPer48              rWSP
## ratioDBPM                   rD
## ratioBPM                    rB
## ratioVORP                   rV
## pctFG                     pcFG
## pctFG3                    pFG3
## pctEFG                      pE
## minutesPerGame              mP
## fgmPerGame                fgPG
## fg3mPerGame             fg3mPG
## fg3aPerGame             fg3PrG
## ftaPerGame                ftPG
## drbPerGame                   d
## astPerGame                 aPG
## stlPerGame                   s
## blkPerGame                   b
## ptsPerGame                  pP
summary(best.subsets.model)
## Subset selection object
## Call: regsubsets.formula(formula(step.pick.backward), data = nba.salary, 
##     nbest = 1, nvmax = 27)
## 26 Variables  (and intercept)
##                   Forced in Forced out
## agePlayer             FALSE      FALSE
## ratioPER              FALSE      FALSE
## countGamesStarted     FALSE      FALSE
## minutes               FALSE      FALSE
## pctDRB                FALSE      FALSE
## pctAST                FALSE      FALSE
## pctSTL                FALSE      FALSE
## ratioOWS              FALSE      FALSE
## ratioWS               FALSE      FALSE
## ratioWSPer48          FALSE      FALSE
## ratioDBPM             FALSE      FALSE
## ratioBPM              FALSE      FALSE
## ratioVORP             FALSE      FALSE
## pctFG                 FALSE      FALSE
## pctFG3                FALSE      FALSE
## pctEFG                FALSE      FALSE
## minutesPerGame        FALSE      FALSE
## fgmPerGame            FALSE      FALSE
## fg3mPerGame           FALSE      FALSE
## fg3aPerGame           FALSE      FALSE
## ftaPerGame            FALSE      FALSE
## drbPerGame            FALSE      FALSE
## astPerGame            FALSE      FALSE
## stlPerGame            FALSE      FALSE
## blkPerGame            FALSE      FALSE
## ptsPerGame            FALSE      FALSE
## 1 subsets of each size up to 26
## Selection Algorithm: exhaustive
##           agePlayer ratioPER countGamesStarted minutes pctDRB pctAST pctSTL
## 1  ( 1 )  " "       " "      " "               " "     " "    " "    " "   
## 2  ( 1 )  "*"       " "      " "               " "     " "    " "    " "   
## 3  ( 1 )  "*"       " "      " "               " "     " "    " "    " "   
## 4  ( 1 )  "*"       " "      " "               " "     " "    " "    " "   
## 5  ( 1 )  "*"       " "      " "               "*"     " "    " "    " "   
## 6  ( 1 )  "*"       " "      "*"               "*"     " "    " "    " "   
## 7  ( 1 )  "*"       " "      "*"               "*"     " "    "*"    " "   
## 8  ( 1 )  "*"       " "      "*"               "*"     " "    "*"    " "   
## 9  ( 1 )  "*"       " "      "*"               "*"     "*"    "*"    " "   
## 10  ( 1 ) "*"       "*"      "*"               "*"     " "    "*"    " "   
## 11  ( 1 ) "*"       "*"      "*"               "*"     " "    "*"    " "   
## 12  ( 1 ) "*"       "*"      "*"               "*"     "*"    "*"    " "   
## 13  ( 1 ) "*"       "*"      "*"               "*"     "*"    "*"    " "   
## 14  ( 1 ) "*"       "*"      "*"               "*"     " "    "*"    " "   
## 15  ( 1 ) "*"       "*"      "*"               "*"     " "    "*"    " "   
## 16  ( 1 ) "*"       "*"      "*"               "*"     " "    "*"    "*"   
## 17  ( 1 ) "*"       "*"      "*"               "*"     " "    "*"    " "   
## 18  ( 1 ) "*"       "*"      "*"               "*"     " "    "*"    "*"   
## 19  ( 1 ) "*"       "*"      "*"               "*"     " "    "*"    " "   
## 20  ( 1 ) "*"       "*"      "*"               "*"     " "    "*"    "*"   
## 21  ( 1 ) "*"       "*"      "*"               "*"     " "    "*"    "*"   
## 22  ( 1 ) "*"       "*"      "*"               "*"     " "    "*"    "*"   
## 23  ( 1 ) "*"       "*"      "*"               "*"     " "    "*"    "*"   
## 24  ( 1 ) "*"       "*"      "*"               "*"     " "    "*"    "*"   
## 25  ( 1 ) "*"       "*"      "*"               "*"     " "    "*"    "*"   
## 26  ( 1 ) "*"       "*"      "*"               "*"     "*"    "*"    "*"   
##           ratioOWS ratioWS ratioWSPer48 ratioDBPM ratioBPM ratioVORP pctFG
## 1  ( 1 )  " "      " "     " "          " "       " "      " "       " "  
## 2  ( 1 )  " "      " "     " "          " "       " "      " "       " "  
## 3  ( 1 )  " "      " "     " "          " "       " "      " "       " "  
## 4  ( 1 )  " "      " "     " "          " "       " "      " "       " "  
## 5  ( 1 )  " "      " "     " "          " "       " "      " "       " "  
## 6  ( 1 )  " "      " "     " "          " "       " "      " "       " "  
## 7  ( 1 )  " "      " "     " "          " "       " "      " "       " "  
## 8  ( 1 )  " "      " "     " "          " "       " "      " "       " "  
## 9  ( 1 )  " "      " "     " "          " "       " "      " "       " "  
## 10  ( 1 ) " "      " "     " "          " "       " "      " "       " "  
## 11  ( 1 ) " "      " "     " "          " "       " "      " "       " "  
## 12  ( 1 ) " "      " "     " "          " "       " "      " "       " "  
## 13  ( 1 ) " "      " "     " "          " "       "*"      " "       " "  
## 14  ( 1 ) " "      " "     " "          "*"       "*"      " "       " "  
## 15  ( 1 ) " "      " "     " "          "*"       "*"      " "       "*"  
## 16  ( 1 ) " "      " "     " "          "*"       "*"      " "       "*"  
## 17  ( 1 ) " "      " "     " "          "*"       "*"      " "       "*"  
## 18  ( 1 ) " "      " "     " "          "*"       "*"      " "       "*"  
## 19  ( 1 ) " "      "*"     " "          "*"       "*"      "*"       "*"  
## 20  ( 1 ) " "      "*"     " "          "*"       "*"      "*"       "*"  
## 21  ( 1 ) " "      "*"     " "          "*"       "*"      "*"       "*"  
## 22  ( 1 ) "*"      "*"     " "          "*"       "*"      "*"       "*"  
## 23  ( 1 ) "*"      "*"     " "          "*"       "*"      "*"       "*"  
## 24  ( 1 ) "*"      "*"     "*"          "*"       "*"      "*"       "*"  
## 25  ( 1 ) "*"      "*"     "*"          "*"       "*"      "*"       "*"  
## 26  ( 1 ) "*"      "*"     "*"          "*"       "*"      "*"       "*"  
##           pctFG3 pctEFG minutesPerGame fgmPerGame fg3mPerGame fg3aPerGame
## 1  ( 1 )  " "    " "    " "            " "        " "         " "        
## 2  ( 1 )  " "    " "    " "            " "        " "         " "        
## 3  ( 1 )  " "    " "    " "            " "        " "         " "        
## 4  ( 1 )  " "    " "    " "            " "        " "         " "        
## 5  ( 1 )  " "    " "    " "            " "        " "         " "        
## 6  ( 1 )  " "    " "    " "            " "        " "         " "        
## 7  ( 1 )  " "    " "    " "            " "        " "         " "        
## 8  ( 1 )  " "    " "    "*"            " "        " "         " "        
## 9  ( 1 )  " "    " "    "*"            " "        " "         " "        
## 10  ( 1 ) " "    " "    "*"            " "        " "         " "        
## 11  ( 1 ) " "    " "    "*"            " "        " "         " "        
## 12  ( 1 ) " "    " "    "*"            " "        " "         " "        
## 13  ( 1 ) "*"    " "    "*"            " "        " "         " "        
## 14  ( 1 ) "*"    " "    "*"            " "        " "         " "        
## 15  ( 1 ) " "    "*"    "*"            " "        " "         " "        
## 16  ( 1 ) " "    "*"    "*"            " "        " "         " "        
## 17  ( 1 ) " "    "*"    "*"            " "        "*"         "*"        
## 18  ( 1 ) " "    "*"    "*"            " "        "*"         "*"        
## 19  ( 1 ) " "    "*"    "*"            " "        "*"         "*"        
## 20  ( 1 ) " "    "*"    "*"            " "        "*"         "*"        
## 21  ( 1 ) "*"    "*"    "*"            " "        "*"         "*"        
## 22  ( 1 ) "*"    "*"    "*"            " "        "*"         "*"        
## 23  ( 1 ) "*"    "*"    "*"            "*"        " "         "*"        
## 24  ( 1 ) "*"    "*"    "*"            "*"        " "         "*"        
## 25  ( 1 ) "*"    "*"    "*"            "*"        "*"         "*"        
## 26  ( 1 ) "*"    "*"    "*"            "*"        "*"         "*"        
##           ftaPerGame drbPerGame astPerGame stlPerGame blkPerGame ptsPerGame
## 1  ( 1 )  " "        " "        " "        " "        " "        "*"       
## 2  ( 1 )  " "        " "        " "        " "        " "        "*"       
## 3  ( 1 )  " "        " "        "*"        " "        " "        "*"       
## 4  ( 1 )  " "        "*"        "*"        " "        " "        "*"       
## 5  ( 1 )  " "        "*"        "*"        " "        " "        "*"       
## 6  ( 1 )  " "        "*"        "*"        " "        " "        "*"       
## 7  ( 1 )  " "        "*"        "*"        " "        " "        "*"       
## 8  ( 1 )  " "        "*"        "*"        " "        " "        "*"       
## 9  ( 1 )  " "        "*"        "*"        " "        " "        "*"       
## 10  ( 1 ) " "        "*"        "*"        "*"        " "        "*"       
## 11  ( 1 ) " "        "*"        "*"        "*"        "*"        "*"       
## 12  ( 1 ) " "        "*"        "*"        "*"        "*"        "*"       
## 13  ( 1 ) " "        "*"        "*"        "*"        " "        "*"       
## 14  ( 1 ) " "        "*"        "*"        "*"        "*"        "*"       
## 15  ( 1 ) " "        "*"        "*"        "*"        "*"        "*"       
## 16  ( 1 ) " "        "*"        "*"        "*"        "*"        "*"       
## 17  ( 1 ) " "        "*"        "*"        "*"        "*"        "*"       
## 18  ( 1 ) " "        "*"        "*"        "*"        "*"        "*"       
## 19  ( 1 ) " "        "*"        "*"        "*"        "*"        "*"       
## 20  ( 1 ) " "        "*"        "*"        "*"        "*"        "*"       
## 21  ( 1 ) " "        "*"        "*"        "*"        "*"        "*"       
## 22  ( 1 ) " "        "*"        "*"        "*"        "*"        "*"       
## 23  ( 1 ) "*"        "*"        "*"        "*"        "*"        "*"       
## 24  ( 1 ) "*"        "*"        "*"        "*"        "*"        "*"       
## 25  ( 1 ) "*"        "*"        "*"        "*"        "*"        "*"       
## 26  ( 1 ) "*"        "*"        "*"        "*"        "*"        "*"
new.best <- lm(salary ~ agePlayer + ptsPerGame + astPerGame + drbPerGame + minutes + countGamesStarted + pctAST + minutesPerGame + pctDRB, + ratioPER + blkPerGame, data = nba.salary)
summary(new.best)
## 
## Call:
## lm(formula = salary ~ agePlayer + ptsPerGame + astPerGame + drbPerGame + 
##     minutes + countGamesStarted + pctAST + minutesPerGame + pctDRB, 
##     data = nba.salary, subset = +ratioPER + blkPerGame)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -22077007  -1931704    897512   1560478  14803983 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       -1.913e+07  2.338e+06  -8.180 4.42e-15 ***
## agePlayer          9.612e+05  5.820e+04  16.514  < 2e-16 ***
## ptsPerGame         1.296e+06  1.044e+05  12.419  < 2e-16 ***
## astPerGame         1.217e+06  1.225e+06   0.994    0.321    
## drbPerGame         4.840e+06  4.355e+05  11.113  < 2e-16 ***
## minutes            9.783e+02  1.064e+03   0.920    0.358    
## countGamesStarted  4.925e+03  3.007e+04   0.164    0.870    
## pctAST            -1.668e+07  2.546e+07  -0.655    0.513    
## minutesPerGame    -6.915e+05  1.084e+05  -6.381 5.18e-10 ***
## pctDRB            -6.219e+07  7.157e+06  -8.690  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4196000 on 376 degrees of freedom
## Multiple R-squared:  0.8712, Adjusted R-squared:  0.8681 
## F-statistic: 282.5 on 9 and 376 DF,  p-value: < 2.2e-16

These are the seemingly most significant variables to predict salary. I ran the subsets function to see how many variables would be best to predict salary. According to the plot it is about 10-12 because there is a slight inflection point there. I then ran a summary to see what the best combination of variables is with 10, 11, and 12 variables. The model with the best AdjR2 has 11 predictors and they are the ones listed above.

bind_rows(
  glance(full.model) %>% mutate(Model="Full main effects"),
  glance(step.pick.backward) %>% mutate(Model="Backward Stepwise Regression"),
   glance(step.pick.forward) %>% mutate(Model="Forward Stepwise Regression"),
   glance(best.backward) %>% mutate(Model="Best Selected from Backward Regression"),
  glance(best.forward) %>% mutate(Model="Best Selected from Forward Regression"),
  glance(new.best) %>% mutate(Model="Best Subset Regression"),
) %>%
  select(Model, Adj.R.Squared = adj.r.squared,
         AIC, BIC) %>%
  kable()
Model Adj.R.Squared AIC BIC
Full main effects 0.6906293 13145.83 13383.33
Backward Stepwise Regression 0.7041413 13100.57 13211.41
Forward Stepwise Regression 0.6906293 13145.83 13383.33
Best Selected from Backward Regression 0.6294817 13171.51 13215.05
Best Selected from Forward Regression 0.6079106 13193.41 13236.95
Best Subset Regression 0.8680964 12879.98 12923.49

The model with the highest Adjusted R squared, and lowest AIC and BIC is by far the best Subsets Regression, meaning that those variables are the best predictors of Salary in the NBA