library(tidyverse)
## ── Attaching packages ────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.2.1     ✓ purrr   0.3.3
## ✓ tibble  2.1.3     ✓ dplyr   0.8.3
## ✓ tidyr   1.0.2     ✓ stringr 1.4.0
## ✓ readr   1.3.1     ✓ forcats 0.4.0
## ── Conflicts ───────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(readxl)
library(ggplot2)

bb <- read_excel("basketball.xlsx")
## New names:
## * `` -> ...1
str(bb)
## Classes 'tbl_df', 'tbl' and 'data.frame':    559 obs. of  29 variables:
##  $ ...1                             : num  1 2 3 4 5 6 7 8 9 10 ...
##  $ Player                           : chr  "Kay Felder" "Vince Hunter" "Josh Smith" "Jeremy Evans" ...
##  $ Country                          : chr  "USA" "USA" "USA" "USA" ...
##  $ Country_Dummy                    : num  1 1 1 1 1 0 0 1 0 0 ...
##  $ Salary                           : num  1312611 50000 5400000 104059 950000 ...
##  $ Guaranteed                       : num  456529 50000 16200000 50000 1640000 ...
##  $ Position                         : chr  "Point Guard" "Power Forward" "Power Forward" "Small Forward" ...
##  $ Age                              : num  22 23 32 30 19 29 24 22 23 24 ...
##  $ Team                             : chr  "Detroit Pistons" "Memphis Grizzlies" "New Orleans Pride" "Atlanta Hawks" ...
##  $ Player_Efficiency_Rating         : num  -31.6 35.9 3.5 10.4 23.2 30.3 11.7 8.8 25 20.8 ...
##  $ True_Shooting_Percentage         : num  0 0.6 0.25 1 0.558 0.635 0.521 0.465 0.589 0.628 ...
##  $ Three_Point_Field_Goal_Percentage: num  50 0 0 0 0 0 16.7 0 9.2 0.3 ...
##  $ Free_Throw_Percentage            : num  0 0 0 0 66.7 71.1 100 44.4 81.5 39.7 ...
##  $ Offensive_Rebound_Percentage     : num  35.9 32.3 28.1 22.4 20.4 20.4 18.7 17.9 17.2 16.9 ...
##  $ Defensive_Rebound_Percentage     : num  0 17 8.8 0 16.1 32.9 12.4 20.8 29.8 13.9 ...
##  $ Total_Rebound_Percentage         : num  18.4 24.8 18.1 11.2 18.2 26.7 15.5 19.4 23.4 15.5 ...
##  $ Assist_Percentage                : num  0 0 0 0 0 9.7 0 5.9 7.1 5.5 ...
##  $ Steal_Percentage                 : num  0 0 0 0 1.8 2.1 2.6 0 2 1.9 ...
##  $ Block_Percentage                 : num  0 13.4 0 0 9.5 3.6 0 6.7 3 2.9 ...
##  $ Turnover_Percentage              : num  33.3 16.7 0 50 14.7 11.9 18.8 21.8 7.3 13.3 ...
##  $ Usage_Percentage                 : num  44.2 38.5 14.4 17.5 21.8 25.2 12.7 23.5 21 16.7 ...
##  $ Offensive_Win_Shares             : num  -0.1 0 0 0 0.1 0.5 0 0 0.7 6.4 ...
##  $ Defensive_Win_Shares             : num  0 0 0 0 0 0.2 0 0 0.3 2.9 ...
##  $ Win_Shares                       : num  -0.1 0 0 0 0.1 0.7 0.1 0 1.1 9.3 ...
##  $ Win_Shares_Per_48_Minutes        : num  -1.005 0.099 -0.015 -0.03 0.177 ...
##  $ Offense_Box_Plus_Minus           : num  -29.5 -3.1 -6 -2.3 -4.1 2.5 -3.1 -7.5 1.4 2.2 ...
##  $ Defense_Box_Plus_Minus           : num  -11.9 -5.8 -7.4 -6.1 -1.4 0.8 -4.6 0.6 0.4 1.2 ...
##  $ Box_Plus_Minus                   : num  -41.4 -9 -13.4 -8.3 -5.4 3.2 -7.7 -6.9 1.8 3.4 ...
##  $ Value_Over_Replacement_Player    : num  0 0 0 0 0 0.2 -0.1 0 0.2 3.3 ...
names(bb)
##  [1] "...1"                              "Player"                           
##  [3] "Country"                           "Country_Dummy"                    
##  [5] "Salary"                            "Guaranteed"                       
##  [7] "Position"                          "Age"                              
##  [9] "Team"                              "Player_Efficiency_Rating"         
## [11] "True_Shooting_Percentage"          "Three_Point_Field_Goal_Percentage"
## [13] "Free_Throw_Percentage"             "Offensive_Rebound_Percentage"     
## [15] "Defensive_Rebound_Percentage"      "Total_Rebound_Percentage"         
## [17] "Assist_Percentage"                 "Steal_Percentage"                 
## [19] "Block_Percentage"                  "Turnover_Percentage"              
## [21] "Usage_Percentage"                  "Offensive_Win_Shares"             
## [23] "Defensive_Win_Shares"              "Win_Shares"                       
## [25] "Win_Shares_Per_48_Minutes"         "Offense_Box_Plus_Minus"           
## [27] "Defense_Box_Plus_Minus"            "Box_Plus_Minus"                   
## [29] "Value_Over_Replacement_Player"
modbb <- lm(Salary~Age, data=bb)
modbb
## 
## Call:
## lm(formula = Salary ~ Age, data = bb)
## 
## Coefficients:
## (Intercept)          Age  
##    -8117036       563483
summary(modbb)
## 
## Call:
## lm(formula = Salary ~ Age, data = bb)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -12093644  -4265093  -2186556   2409749  26458570 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -8117036    1824186  -4.450 1.04e-05 ***
## Age           563483      68059   8.279 9.23e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 6826000 on 557 degrees of freedom
## Multiple R-squared:  0.1096, Adjusted R-squared:  0.108 
## F-statistic: 68.55 on 1 and 557 DF,  p-value: 9.228e-16
ggplot(bb, aes(x=Age, y=Salary))+
  geom_jitter()+
  geom_abline(slope=modbb$coefficients[2], intercept=modbb$coefficients[1],
              color="blue", lty=2, lwd=1)+
  geom_smooth(col = "orange")
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

plot(modbb)

hist(bb$Salary)

pairs(~Salary + Guaranteed + Age + Player_Efficiency_Rating + 
        True_Shooting_Percentage + Three_Point_Field_Goal_Percentage, bb)

pairs(~Salary + Free_Throw_Percentage + Offensive_Rebound_Percentage + Defensive_Rebound_Percentage + 
        Total_Rebound_Percentage + Assist_Percentage, bb)

pairs(~Salary + Steal_Percentage + Block_Percentage +   Turnover_Percentage + Usage_Percentage + 
        Offensive_Win_Shares, bb)

pairs(~Salary + Defensive_Win_Shares  +  Win_Shares + Win_Shares_Per_48_Minutes 
      + Offense_Box_Plus_Minus, bb)

pairs(~Salary + Defense_Box_Plus_Minus + Box_Plus_Minus + Value_Over_Replacement_Player, bb)

anova(modbb)
## Analysis of Variance Table
## 
## Response: Salary
##            Df     Sum Sq    Mean Sq F value    Pr(>F)    
## Age         1 3.1939e+15 3.1939e+15  68.547 9.228e-16 ***
## Residuals 557 2.5953e+16 4.6594e+13                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
library(ggplot2)

modbb2 <- lm(Salary~Age+Guaranteed+Player_Efficiency_Rating+True_Shooting_Percentage+Three_Point_Field_Goal_Percentage+Free_Throw_Percentage+Offensive_Rebound_Percentage+Defensive_Rebound_Percentage+Total_Rebound_Percentage+Assist_Percentage+Steal_Percentage+Block_Percentage+Turnover_Percentage+Usage_Percentage+Offensive_Win_Shares+Defensive_Win_Shares+Win_Shares+Win_Shares_Per_48_Minutes+Offense_Box_Plus_Minus+Defense_Box_Plus_Minus+Box_Plus_Minus+Value_Over_Replacement_Player, data=bb)
modbb2
## 
## Call:
## lm(formula = Salary ~ Age + Guaranteed + Player_Efficiency_Rating + 
##     True_Shooting_Percentage + Three_Point_Field_Goal_Percentage + 
##     Free_Throw_Percentage + Offensive_Rebound_Percentage + Defensive_Rebound_Percentage + 
##     Total_Rebound_Percentage + Assist_Percentage + Steal_Percentage + 
##     Block_Percentage + Turnover_Percentage + Usage_Percentage + 
##     Offensive_Win_Shares + Defensive_Win_Shares + Win_Shares + 
##     Win_Shares_Per_48_Minutes + Offense_Box_Plus_Minus + Defense_Box_Plus_Minus + 
##     Box_Plus_Minus + Value_Over_Replacement_Player, data = bb)
## 
## Coefficients:
##                       (Intercept)                                Age  
##                        -7.300e+06                          4.011e+05  
##                        Guaranteed           Player_Efficiency_Rating  
##                         1.774e-01                          1.384e+05  
##          True_Shooting_Percentage  Three_Point_Field_Goal_Percentage  
##                        -2.283e+06                         -1.546e+04  
##             Free_Throw_Percentage       Offensive_Rebound_Percentage  
##                        -3.037e+03                         -1.045e+06  
##      Defensive_Rebound_Percentage           Total_Rebound_Percentage  
##                        -8.493e+05                          1.911e+06  
##                 Assist_Percentage                   Steal_Percentage  
##                        -5.664e+04                         -2.103e+05  
##                  Block_Percentage                Turnover_Percentage  
##                        -1.739e+05                          1.448e+04  
##                  Usage_Percentage               Offensive_Win_Shares  
##                         5.667e+04                          6.708e+04  
##              Defensive_Win_Shares                         Win_Shares  
##                         4.394e+05                          2.403e+05  
##         Win_Shares_Per_48_Minutes             Offense_Box_Plus_Minus  
##                        -1.330e+07                         -2.668e+06  
##            Defense_Box_Plus_Minus                     Box_Plus_Minus  
##                        -2.804e+06                          2.954e+06  
##     Value_Over_Replacement_Player  
##                        -1.550e+05
summary(modbb2)
## 
## Call:
## lm(formula = Salary ~ Age + Guaranteed + Player_Efficiency_Rating + 
##     True_Shooting_Percentage + Three_Point_Field_Goal_Percentage + 
##     Free_Throw_Percentage + Offensive_Rebound_Percentage + Defensive_Rebound_Percentage + 
##     Total_Rebound_Percentage + Assist_Percentage + Steal_Percentage + 
##     Block_Percentage + Turnover_Percentage + Usage_Percentage + 
##     Offensive_Win_Shares + Defensive_Win_Shares + Win_Shares + 
##     Win_Shares_Per_48_Minutes + Offense_Box_Plus_Minus + Defense_Box_Plus_Minus + 
##     Box_Plus_Minus + Value_Over_Replacement_Player, data = bb)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -22609397  -1963642   -455650   1548429  14898466 
## 
## Coefficients:
##                                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                       -7.300e+06  2.849e+06  -2.562   0.0107 *  
## Age                                4.011e+05  3.873e+04  10.356   <2e-16 ***
## Guaranteed                         1.774e-01  7.314e-03  24.255   <2e-16 ***
## Player_Efficiency_Rating           1.384e+05  1.887e+05   0.734   0.4635    
## True_Shooting_Percentage          -2.283e+06  3.018e+06  -0.756   0.4498    
## Three_Point_Field_Goal_Percentage -1.546e+04  1.491e+04  -1.037   0.3002    
## Free_Throw_Percentage             -3.037e+03  6.738e+03  -0.451   0.6524    
## Offensive_Rebound_Percentage      -1.045e+06  6.002e+05  -1.741   0.0823 .  
## Defensive_Rebound_Percentage      -8.493e+05  5.921e+05  -1.434   0.1520    
## Total_Rebound_Percentage           1.911e+06  1.186e+06   1.611   0.1078    
## Assist_Percentage                 -5.664e+04  2.986e+04  -1.897   0.0584 .  
## Steal_Percentage                  -2.103e+05  2.870e+05  -0.733   0.4640    
## Block_Percentage                  -1.739e+05  2.128e+05  -0.817   0.4143    
## Turnover_Percentage                1.448e+04  3.311e+04   0.437   0.6620    
## Usage_Percentage                   5.667e+04  7.048e+04   0.804   0.4217    
## Offensive_Win_Shares               6.708e+04  3.083e+06   0.022   0.9827    
## Defensive_Win_Shares               4.394e+05  3.083e+06   0.143   0.8867    
## Win_Shares                         2.403e+05  3.086e+06   0.078   0.9380    
## Win_Shares_Per_48_Minutes         -1.330e+07  6.777e+06  -1.962   0.0503 .  
## Offense_Box_Plus_Minus            -2.668e+06  3.304e+06  -0.807   0.4199    
## Defense_Box_Plus_Minus            -2.804e+06  3.257e+06  -0.861   0.3896    
## Box_Plus_Minus                     2.954e+06  3.268e+06   0.904   0.3665    
## Value_Over_Replacement_Player     -1.550e+05  3.718e+05  -0.417   0.6770    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3721000 on 536 degrees of freedom
## Multiple R-squared:  0.7453, Adjusted R-squared:  0.7349 
## F-statistic: 71.31 on 22 and 536 DF,  p-value: < 2.2e-16
ggplot(bb, aes(x=Age+Guaranteed+Player_Efficiency_Rating+True_Shooting_Percentage+Three_Point_Field_Goal_Percentage+Free_Throw_Percentage+Offensive_Rebound_Percentage+Defensive_Rebound_Percentage+Total_Rebound_Percentage+Assist_Percentage+Steal_Percentage+Block_Percentage+Turnover_Percentage+Usage_Percentage+Offensive_Win_Shares+Defensive_Win_Shares+Win_Shares+Win_Shares_Per_48_Minutes+Offense_Box_Plus_Minus+Defense_Box_Plus_Minus+Box_Plus_Minus+Value_Over_Replacement_Player, y=Salary))+
  geom_jitter()+
  geom_smooth(col = "orange")+ #least square line 
  geom_smooth(method = "lm", se = FALSE) #regression line 
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

plot(modbb2)

  1. Introduction The purpose of this paper is to analyze a dataset from 2017 to 2018 NBA season’s player statistics and its correlation between players salaries. Our dataset contains 559 observations and has 28 variables. Out of 28 variables, 4 of the variables are character variables. The central question of this analysis is whether player statistics affect to player’s salaries. Out of all the variables in the dataset, A response variable is Salary and exlpanatory includes dummy variables for players from USA as 1 and players outside of USA as 0, Guranteed, Age, Player_Efficiency_Rating, True_Shooting_Percentage, Three_Point_Field_Goal_Percentage, Free_Throw_Percentage, Offensive_Rebound_Percentage, Defensive_Rebound_Percentage, Total_Rebound_Percentage, Assist_Percentage, Steal_Percentage, Block_Percentage Turnover_Percentage, Usage_Percentage, Offensive_Win_Shares, Defensive_Win_Shares, Win_Shares, Win_Shares_Per_48_Minutes, Offense_Box_Plus_Minus, Defense_Box_Plus_Minus, Box_Plus_Minus, and Value_Over_Replacement_Player. For residuals versus fitted values graph, non-linear relationship is not explained in the graph. For normal q-q plot graph, we can acknoeledge that residuals are normally distributed. Outliers are not influential to this linear model becasue cook’s distance scores are not seen in the graph.