library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.2.1     ✓ purrr   0.3.3
## ✓ tibble  2.1.3     ✓ dplyr   0.8.3
## ✓ tidyr   1.0.2     ✓ stringr 1.4.0
## ✓ readr   1.3.1     ✓ forcats 0.4.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(ggplot2)
library(readxl)
bb <- read_excel("basketball.xlsx")
## New names:
## * `` -> ...1
str(bb$Country)
##  chr [1:559] "USA" "USA" "USA" "USA" "USA" "Serbia" "Ukraine" "USA" "Spain" ...
str(bb$Position)
##  chr [1:559] "Point Guard" "Power Forward" "Power Forward" "Small Forward" ...
Categorical variables that might be related to the response variable \(Y_{Salary}\) are Country and Position. Country variable(categorical) is used for dummy variable in the model, and levels for Position variables are “Point Guard” “Power Forward” “Power Forward” “Small Forward” and “Center”
modbb2 <- lm(Salary~Age+Guaranteed+Player_Efficiency_Rating+True_Shooting_Percentage+Three_Point_Field_Goal_Percentage+Free_Throw_Percentage+Offensive_Rebound_Percentage+Defensive_Rebound_Percentage+Total_Rebound_Percentage+Assist_Percentage+Steal_Percentage+Block_Percentage+Turnover_Percentage+Usage_Percentage+Offensive_Win_Shares+Defensive_Win_Shares+Win_Shares+Win_Shares_Per_48_Minutes+Offense_Box_Plus_Minus+Defense_Box_Plus_Minus+Box_Plus_Minus+Value_Over_Replacement_Player, data=bb)
modbb2
## 
## Call:
## lm(formula = Salary ~ Age + Guaranteed + Player_Efficiency_Rating + 
##     True_Shooting_Percentage + Three_Point_Field_Goal_Percentage + 
##     Free_Throw_Percentage + Offensive_Rebound_Percentage + Defensive_Rebound_Percentage + 
##     Total_Rebound_Percentage + Assist_Percentage + Steal_Percentage + 
##     Block_Percentage + Turnover_Percentage + Usage_Percentage + 
##     Offensive_Win_Shares + Defensive_Win_Shares + Win_Shares + 
##     Win_Shares_Per_48_Minutes + Offense_Box_Plus_Minus + Defense_Box_Plus_Minus + 
##     Box_Plus_Minus + Value_Over_Replacement_Player, data = bb)
## 
## Coefficients:
##                       (Intercept)                                Age  
##                        -7.300e+06                          4.011e+05  
##                        Guaranteed           Player_Efficiency_Rating  
##                         1.774e-01                          1.384e+05  
##          True_Shooting_Percentage  Three_Point_Field_Goal_Percentage  
##                        -2.283e+06                         -1.546e+04  
##             Free_Throw_Percentage       Offensive_Rebound_Percentage  
##                        -3.037e+03                         -1.045e+06  
##      Defensive_Rebound_Percentage           Total_Rebound_Percentage  
##                        -8.493e+05                          1.911e+06  
##                 Assist_Percentage                   Steal_Percentage  
##                        -5.664e+04                         -2.103e+05  
##                  Block_Percentage                Turnover_Percentage  
##                        -1.739e+05                          1.448e+04  
##                  Usage_Percentage               Offensive_Win_Shares  
##                         5.667e+04                          6.708e+04  
##              Defensive_Win_Shares                         Win_Shares  
##                         4.394e+05                          2.403e+05  
##         Win_Shares_Per_48_Minutes             Offense_Box_Plus_Minus  
##                        -1.330e+07                         -2.668e+06  
##            Defense_Box_Plus_Minus                     Box_Plus_Minus  
##                        -2.804e+06                          2.954e+06  
##     Value_Over_Replacement_Player  
##                        -1.550e+05
summary(modbb2)
## 
## Call:
## lm(formula = Salary ~ Age + Guaranteed + Player_Efficiency_Rating + 
##     True_Shooting_Percentage + Three_Point_Field_Goal_Percentage + 
##     Free_Throw_Percentage + Offensive_Rebound_Percentage + Defensive_Rebound_Percentage + 
##     Total_Rebound_Percentage + Assist_Percentage + Steal_Percentage + 
##     Block_Percentage + Turnover_Percentage + Usage_Percentage + 
##     Offensive_Win_Shares + Defensive_Win_Shares + Win_Shares + 
##     Win_Shares_Per_48_Minutes + Offense_Box_Plus_Minus + Defense_Box_Plus_Minus + 
##     Box_Plus_Minus + Value_Over_Replacement_Player, data = bb)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -22609397  -1963642   -455650   1548429  14898466 
## 
## Coefficients:
##                                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                       -7.300e+06  2.849e+06  -2.562   0.0107 *  
## Age                                4.011e+05  3.873e+04  10.356   <2e-16 ***
## Guaranteed                         1.774e-01  7.314e-03  24.255   <2e-16 ***
## Player_Efficiency_Rating           1.384e+05  1.887e+05   0.734   0.4635    
## True_Shooting_Percentage          -2.283e+06  3.018e+06  -0.756   0.4498    
## Three_Point_Field_Goal_Percentage -1.546e+04  1.491e+04  -1.037   0.3002    
## Free_Throw_Percentage             -3.037e+03  6.738e+03  -0.451   0.6524    
## Offensive_Rebound_Percentage      -1.045e+06  6.002e+05  -1.741   0.0823 .  
## Defensive_Rebound_Percentage      -8.493e+05  5.921e+05  -1.434   0.1520    
## Total_Rebound_Percentage           1.911e+06  1.186e+06   1.611   0.1078    
## Assist_Percentage                 -5.664e+04  2.986e+04  -1.897   0.0584 .  
## Steal_Percentage                  -2.103e+05  2.870e+05  -0.733   0.4640    
## Block_Percentage                  -1.739e+05  2.128e+05  -0.817   0.4143    
## Turnover_Percentage                1.448e+04  3.311e+04   0.437   0.6620    
## Usage_Percentage                   5.667e+04  7.048e+04   0.804   0.4217    
## Offensive_Win_Shares               6.708e+04  3.083e+06   0.022   0.9827    
## Defensive_Win_Shares               4.394e+05  3.083e+06   0.143   0.8867    
## Win_Shares                         2.403e+05  3.086e+06   0.078   0.9380    
## Win_Shares_Per_48_Minutes         -1.330e+07  6.777e+06  -1.962   0.0503 .  
## Offense_Box_Plus_Minus            -2.668e+06  3.304e+06  -0.807   0.4199    
## Defense_Box_Plus_Minus            -2.804e+06  3.257e+06  -0.861   0.3896    
## Box_Plus_Minus                     2.954e+06  3.268e+06   0.904   0.3665    
## Value_Over_Replacement_Player     -1.550e+05  3.718e+05  -0.417   0.6770    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3721000 on 536 degrees of freedom
## Multiple R-squared:  0.7453, Adjusted R-squared:  0.7349 
## F-statistic: 71.31 on 22 and 536 DF,  p-value: < 2.2e-16
anova(modbb2)
## Analysis of Variance Table
## 
## Response: Salary
##                                    Df     Sum Sq    Mean Sq   F value    Pr(>F)
## Age                                 1 3.1939e+15 3.1939e+15  230.6436 < 2.2e-16
## Guaranteed                          1 1.7116e+16 1.7116e+16 1235.9803 < 2.2e-16
## Player_Efficiency_Rating            1 2.1420e+14 2.1420e+14   15.4684 9.492e-05
## True_Shooting_Percentage            1 5.9988e+12 5.9988e+12    0.4332  0.510708
## Three_Point_Field_Goal_Percentage   1 4.1604e+13 4.1604e+13    3.0044  0.083614
## Free_Throw_Percentage               1 9.5711e+11 9.5711e+11    0.0691  0.792728
## Offensive_Rebound_Percentage        1 3.6157e+11 3.6157e+11    0.0261  0.871692
## Defensive_Rebound_Percentage        1 3.4278e+14 3.4278e+14   24.7534 8.790e-07
## Total_Rebound_Percentage            1 6.2076e+13 6.2076e+13    4.4827  0.034699
## Assist_Percentage                   1 2.1969e+13 2.1969e+13    1.5865  0.208376
## Steal_Percentage                    1 5.4734e+10 5.4734e+10    0.0040  0.949894
## Block_Percentage                    1 5.8237e+12 5.8237e+12    0.4206  0.516938
## Turnover_Percentage                 1 5.7151e+12 5.7151e+12    0.4127  0.520871
## Usage_Percentage                    1 1.3529e+14 1.3529e+14    9.7696  0.001870
## Offensive_Win_Shares                1 3.2995e+14 3.2995e+14   23.8270 1.393e-06
## Defensive_Win_Shares                1 1.4424e+14 1.4424e+14   10.4164  0.001325
## Win_Shares                          1 1.5392e+10 1.5392e+10    0.0011  0.973417
## Win_Shares_Per_48_Minutes           1 4.2972e+13 4.2972e+13    3.1032  0.078708
## Offense_Box_Plus_Minus              1 4.1327e+13 4.1327e+13    2.9844  0.084649
## Defense_Box_Plus_Minus              1 5.8863e+12 5.8863e+12    0.4251  0.514697
## Box_Plus_Minus                      1 1.1415e+13 1.1415e+13    0.8243  0.364325
## Value_Over_Replacement_Player       1 2.4060e+12 2.4060e+12    0.1737  0.676970
## Residuals                         536 7.4224e+15 1.3848e+13                    
##                                      
## Age                               ***
## Guaranteed                        ***
## Player_Efficiency_Rating          ***
## True_Shooting_Percentage             
## Three_Point_Field_Goal_Percentage .  
## Free_Throw_Percentage                
## Offensive_Rebound_Percentage         
## Defensive_Rebound_Percentage      ***
## Total_Rebound_Percentage          *  
## Assist_Percentage                    
## Steal_Percentage                     
## Block_Percentage                     
## Turnover_Percentage                  
## Usage_Percentage                  ** 
## Offensive_Win_Shares              ***
## Defensive_Win_Shares              ** 
## Win_Shares                           
## Win_Shares_Per_48_Minutes         .  
## Offense_Box_Plus_Minus            .  
## Defense_Box_Plus_Minus               
## Box_Plus_Minus                       
## Value_Over_Replacement_Player        
## Residuals                            
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(bb, aes(x=Age+Guaranteed+Player_Efficiency_Rating+True_Shooting_Percentage+Three_Point_Field_Goal_Percentage+Free_Throw_Percentage+Offensive_Rebound_Percentage+Defensive_Rebound_Percentage+Total_Rebound_Percentage+Assist_Percentage+Steal_Percentage+Block_Percentage+Turnover_Percentage+Usage_Percentage+Offensive_Win_Shares+Defensive_Win_Shares+Win_Shares+Win_Shares_Per_48_Minutes+Offense_Box_Plus_Minus+Defense_Box_Plus_Minus+Box_Plus_Minus+Value_Over_Replacement_Player, y=Salary))+
  geom_jitter()+
  geom_smooth(col = "orange")+ #least square line 
  geom_smooth(method = "lm", se = FALSE) #regression line 
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
positionvector <- factor(bb$Position)
contrasts(positionvector)
##                Point Guard Power Forward Shooting Guard Small Forward
## Center                   0             0              0             0
## Point Guard              1             0              0             0
## Power Forward            0             1              0             0
## Shooting Guard           0             0              1             0
## Small Forward            0             0              0             1
Countrydummy <- factor(bb$Country_Dummy)
contrasts(Countrydummy)
##   1
## 0 0
## 1 1
str(bb$Country)
##  chr [1:559] "USA" "USA" "USA" "USA" "USA" "Serbia" "Ukraine" "USA" "Spain" ...
str(bb$Country_Dummy)
##  num [1:559] 1 1 1 1 1 0 0 1 0 0 ...
categoricalmodbb <- lm(Salary~Country_Dummy+positionvector, data=bb)
categoricalmodbb
## 
## Call:
## lm(formula = Salary ~ Country_Dummy + positionvector, data = bb)
## 
## Coefficients:
##                  (Intercept)                 Country_Dummy  
##                      7632245                        100392  
##    positionvectorPoint Guard   positionvectorPower Forward  
##                     -1610166                        -65319  
## positionvectorShooting Guard   positionvectorSmall Forward  
##                     -1176955                      -1790813
summary(categoricalmodbb)
## 
## Call:
## lm(formula = Salary ~ Country_Dummy + positionvector, data = bb)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -7632637 -5051122 -2913414  3027783 28560079 
## 
## Coefficients:
##                              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                   7632245     814911   9.366   <2e-16 ***
## Country_Dummy                  100392     748992   0.134   0.8934    
## positionvectorPoint Guard    -1610166     960240  -1.677   0.0941 .  
## positionvectorPower Forward    -65319     971811  -0.067   0.9464    
## positionvectorShooting Guard -1176955     952244  -1.236   0.2170    
## positionvectorSmall Forward  -1790813     997190  -1.796   0.0731 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7221000 on 553 degrees of freedom
## Multiple R-squared:  0.01064,    Adjusted R-squared:  0.001694 
## F-statistic: 1.189 on 5 and 553 DF,  p-value: 0.313
ggplot(bb, aes(x=Country_Dummy+positionvector, y=Salary))+
  geom_jitter()+
  geom_smooth(col = "orange")+ #least square line 
  geom_smooth(method = "loess", se = FALSE) #regression line 
## Warning in Ops.factor(Country_Dummy, positionvector): '+' not meaningful for
## factors
## Warning in Ops.factor(Country_Dummy, positionvector): '+' not meaningful for
## factors
## Warning in Ops.factor(Country_Dummy, positionvector): '+' not meaningful for
## factors
## Warning in Ops.factor(Country_Dummy, positionvector): '+' not meaningful for
## factors
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
ggplot(bb, aes(y=Salary, x=Countrydummy, fill=Countrydummy))+
  geom_boxplot()
ggplot(bb, aes(y=Salary, x=positionvector, fill=positionvector))+
  geom_boxplot()
ggplot(bb, aes(x=Age+Guaranteed+Player_Efficiency_Rating+True_Shooting_Percentage+Three_Point_Field_Goal_Percentage+Free_Throw_Percentage+Offensive_Rebound_Percentage+Defensive_Rebound_Percentage+Total_Rebound_Percentage+Assist_Percentage+Steal_Percentage+Block_Percentage+Turnover_Percentage+Usage_Percentage+Offensive_Win_Shares+Defensive_Win_Shares+Win_Shares+Win_Shares_Per_48_Minutes+Offense_Box_Plus_Minus+Defense_Box_Plus_Minus+Box_Plus_Minus+Value_Over_Replacement_Player, y=Salary, color=positionvector))+
  geom_jitter()+
  geom_smooth(col = "orange")+ #least square line 
  geom_smooth(method = "lm", se = FALSE) #regression line 
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
#categorical 
ggplot(bb, aes(x=Age+Guaranteed+Player_Efficiency_Rating+True_Shooting_Percentage+Three_Point_Field_Goal_Percentage+Free_Throw_Percentage+Offensive_Rebound_Percentage+Defensive_Rebound_Percentage+Total_Rebound_Percentage+Assist_Percentage+Steal_Percentage+Block_Percentage+Turnover_Percentage+Usage_Percentage+Offensive_Win_Shares+Defensive_Win_Shares+Win_Shares+Win_Shares_Per_48_Minutes+Offense_Box_Plus_Minus+Defense_Box_Plus_Minus+Box_Plus_Minus+Value_Over_Replacement_Player+Country_Dummy, y=Salary, color=Countrydummy))+
  geom_jitter()+
  geom_smooth(col = "orange")+ #least square line 
  geom_smooth(method = "lm", se = FALSE) #regression line 
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
#Dummy
modbb3 <- lm(Salary~Age+Guaranteed+Player_Efficiency_Rating+True_Shooting_Percentage+Three_Point_Field_Goal_Percentage+Free_Throw_Percentage+Offensive_Rebound_Percentage+Defensive_Rebound_Percentage+Total_Rebound_Percentage+Assist_Percentage+Steal_Percentage+Block_Percentage+Turnover_Percentage+Usage_Percentage+Offensive_Win_Shares+Defensive_Win_Shares+Win_Shares+Win_Shares_Per_48_Minutes+Offense_Box_Plus_Minus+Defense_Box_Plus_Minus+Box_Plus_Minus+Value_Over_Replacement_Player+Country_Dummy+positionvector, data=bb)
modbb3
## 
## Call:
## lm(formula = Salary ~ Age + Guaranteed + Player_Efficiency_Rating + 
##     True_Shooting_Percentage + Three_Point_Field_Goal_Percentage + 
##     Free_Throw_Percentage + Offensive_Rebound_Percentage + Defensive_Rebound_Percentage + 
##     Total_Rebound_Percentage + Assist_Percentage + Steal_Percentage + 
##     Block_Percentage + Turnover_Percentage + Usage_Percentage + 
##     Offensive_Win_Shares + Defensive_Win_Shares + Win_Shares + 
##     Win_Shares_Per_48_Minutes + Offense_Box_Plus_Minus + Defense_Box_Plus_Minus + 
##     Box_Plus_Minus + Value_Over_Replacement_Player + Country_Dummy + 
##     positionvector, data = bb)
## 
## Coefficients:
##                       (Intercept)                                Age  
##                        -6.382e+06                          3.914e+05  
##                        Guaranteed           Player_Efficiency_Rating  
##                         1.770e-01                          1.944e+05  
##          True_Shooting_Percentage  Three_Point_Field_Goal_Percentage  
##                        -3.044e+06                         -1.198e+04  
##             Free_Throw_Percentage       Offensive_Rebound_Percentage  
##                        -2.272e+03                         -1.128e+06  
##      Defensive_Rebound_Percentage           Total_Rebound_Percentage  
##                        -9.413e+05                          2.019e+06  
##                 Assist_Percentage                   Steal_Percentage  
##                        -3.155e+04                         -1.703e+05  
##                  Block_Percentage                Turnover_Percentage  
##                        -2.463e+05                          2.502e+04  
##                  Usage_Percentage               Offensive_Win_Shares  
##                         3.278e+04                          3.190e+05  
##              Defensive_Win_Shares                         Win_Shares  
##                         8.323e+05                         -2.524e+04  
##         Win_Shares_Per_48_Minutes             Offense_Box_Plus_Minus  
##                        -1.419e+07                         -2.561e+06  
##            Defense_Box_Plus_Minus                     Box_Plus_Minus  
##                        -2.701e+06                          2.791e+06  
##     Value_Over_Replacement_Player                      Country_Dummy  
##                        -1.972e+05                          4.632e+05  
##         positionvectorPoint Guard        positionvectorPower Forward  
##                        -1.746e+06                         -1.050e+05  
##      positionvectorShooting Guard        positionvectorSmall Forward  
##                        -1.102e+06                         -1.288e+06
summary(modbb3)
## 
## Call:
## lm(formula = Salary ~ Age + Guaranteed + Player_Efficiency_Rating + 
##     True_Shooting_Percentage + Three_Point_Field_Goal_Percentage + 
##     Free_Throw_Percentage + Offensive_Rebound_Percentage + Defensive_Rebound_Percentage + 
##     Total_Rebound_Percentage + Assist_Percentage + Steal_Percentage + 
##     Block_Percentage + Turnover_Percentage + Usage_Percentage + 
##     Offensive_Win_Shares + Defensive_Win_Shares + Win_Shares + 
##     Win_Shares_Per_48_Minutes + Offense_Box_Plus_Minus + Defense_Box_Plus_Minus + 
##     Box_Plus_Minus + Value_Over_Replacement_Player + Country_Dummy + 
##     positionvector, data = bb)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -21928752  -1875774   -342348   1524878  14084130 
## 
## Coefficients:
##                                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                       -6.382e+06  3.016e+06  -2.116   0.0348 *  
## Age                                3.914e+05  3.914e+04  10.000   <2e-16 ***
## Guaranteed                         1.770e-01  7.333e-03  24.141   <2e-16 ***
## Player_Efficiency_Rating           1.944e+05  1.919e+05   1.013   0.3115    
## True_Shooting_Percentage          -3.044e+06  3.045e+06  -1.000   0.3179    
## Three_Point_Field_Goal_Percentage -1.198e+04  1.505e+04  -0.796   0.4263    
## Free_Throw_Percentage             -2.272e+03  6.756e+03  -0.336   0.7368    
## Offensive_Rebound_Percentage      -1.128e+06  6.007e+05  -1.878   0.0610 .  
## Defensive_Rebound_Percentage      -9.413e+05  5.926e+05  -1.588   0.1128    
## Total_Rebound_Percentage           2.019e+06  1.186e+06   1.703   0.0892 .  
## Assist_Percentage                 -3.155e+04  3.622e+04  -0.871   0.3842    
## Steal_Percentage                  -1.703e+05  2.882e+05  -0.591   0.5548    
## Block_Percentage                  -2.463e+05  2.177e+05  -1.132   0.2582    
## Turnover_Percentage                2.502e+04  3.377e+04   0.741   0.4592    
## Usage_Percentage                   3.278e+04  7.160e+04   0.458   0.6473    
## Offensive_Win_Shares               3.190e+05  3.084e+06   0.103   0.9177    
## Defensive_Win_Shares               8.323e+05  3.086e+06   0.270   0.7875    
## Win_Shares                        -2.524e+04  3.087e+06  -0.008   0.9935    
## Win_Shares_Per_48_Minutes         -1.419e+07  6.941e+06  -2.045   0.0413 *  
## Offense_Box_Plus_Minus            -2.561e+06  3.300e+06  -0.776   0.4380    
## Defense_Box_Plus_Minus            -2.701e+06  3.251e+06  -0.831   0.4065    
## Box_Plus_Minus                     2.791e+06  3.264e+06   0.855   0.3928    
## Value_Over_Replacement_Player     -1.972e+05  3.757e+05  -0.525   0.5999    
## Country_Dummy                      4.632e+05  3.969e+05   1.167   0.2438    
## positionvectorPoint Guard         -1.746e+06  8.793e+05  -1.986   0.0476 *  
## positionvectorPower Forward       -1.050e+05  5.717e+05  -0.184   0.8544    
## positionvectorShooting Guard      -1.102e+06  7.658e+05  -1.439   0.1506    
## positionvectorSmall Forward       -1.288e+06  7.124e+05  -1.808   0.0712 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3712000 on 531 degrees of freedom
## Multiple R-squared:  0.749,  Adjusted R-squared:  0.7362 
## F-statistic: 58.68 on 27 and 531 DF,  p-value: < 2.2e-16
ggplot(bb, aes(x=Age+Guaranteed+Player_Efficiency_Rating+True_Shooting_Percentage+Three_Point_Field_Goal_Percentage+Free_Throw_Percentage+Offensive_Rebound_Percentage+Defensive_Rebound_Percentage+Total_Rebound_Percentage+Assist_Percentage+Steal_Percentage+Block_Percentage+Turnover_Percentage+Usage_Percentage+Offensive_Win_Shares+Defensive_Win_Shares+Win_Shares+Win_Shares_Per_48_Minutes+Offense_Box_Plus_Minus+Defense_Box_Plus_Minus+Box_Plus_Minus+Value_Over_Replacement_Player+Country_Dummy, y=Salary, color = positionvector))+
  geom_jitter()+
  geom_smooth(col = "orange")+ #least square line 
  geom_smooth(method = "lm", se = FALSE) #regression line 
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
ggplot(bb, aes(x=Age+Guaranteed+Player_Efficiency_Rating+True_Shooting_Percentage+Three_Point_Field_Goal_Percentage+Free_Throw_Percentage+Offensive_Rebound_Percentage+Defensive_Rebound_Percentage+Total_Rebound_Percentage+Assist_Percentage+Steal_Percentage+Block_Percentage+Turnover_Percentage+Usage_Percentage+Offensive_Win_Shares+Defensive_Win_Shares+Win_Shares+Win_Shares_Per_48_Minutes+Offense_Box_Plus_Minus+Defense_Box_Plus_Minus+Box_Plus_Minus+Value_Over_Replacement_Player+Country_Dummy, y=Salary, color = Countrydummy))+
  geom_jitter()+
  geom_smooth(col = "orange")+ #least square line 
  geom_smooth(method = "lm", se = FALSE) #regression line 
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
library(ISLR)
data(Carseats) 
str(Carseats)
## 'data.frame':    400 obs. of  11 variables:
##  $ Sales      : num  9.5 11.22 10.06 7.4 4.15 ...
##  $ CompPrice  : num  138 111 113 117 141 124 115 136 132 132 ...
##  $ Income     : num  73 48 35 100 64 113 105 81 110 113 ...
##  $ Advertising: num  11 16 10 4 3 13 0 15 0 0 ...
##  $ Population : num  276 260 269 466 340 501 45 425 108 131 ...
##  $ Price      : num  120 83 80 97 128 72 108 120 124 124 ...
##  $ ShelveLoc  : Factor w/ 3 levels "Bad","Good","Medium": 1 2 3 3 1 1 3 2 3 3 ...
##  $ Age        : num  42 65 59 55 38 78 71 67 76 76 ...
##  $ Education  : num  17 10 12 14 13 16 15 10 10 17 ...
##  $ Urban      : Factor w/ 2 levels "No","Yes": 2 2 2 2 2 1 2 2 1 1 ...
##  $ US         : Factor w/ 2 levels "No","Yes": 2 2 2 2 1 2 1 2 1 2 ...
# Sales and Price variables are numeric and Urban and US variables are categotical("Yes" or "No")
library(ggplot2)
ggplot(Carseats, aes(y=Sales, x=US, fill=US))+
  geom_boxplot()
ggplot(Carseats, aes(y=Sales, x=Urban, fill=Urban))+
  geom_boxplot()
modcarseats1 <- lm(Sales~ Price+Urban+US, data=Carseats)
modcarseats1
## 
## Call:
## lm(formula = Sales ~ Price + Urban + US, data = Carseats)
## 
## Coefficients:
## (Intercept)        Price     UrbanYes        USYes  
##    13.04347     -0.05446     -0.02192      1.20057
summary(modcarseats1)
## 
## Call:
## lm(formula = Sales ~ Price + Urban + US, data = Carseats)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -6.9206 -1.6220 -0.0564  1.5786  7.0581 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 13.043469   0.651012  20.036  < 2e-16 ***
## Price       -0.054459   0.005242 -10.389  < 2e-16 ***
## UrbanYes    -0.021916   0.271650  -0.081    0.936    
## USYes        1.200573   0.259042   4.635 4.86e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.472 on 396 degrees of freedom
## Multiple R-squared:  0.2393, Adjusted R-squared:  0.2335 
## F-statistic: 41.52 on 3 and 396 DF,  p-value: < 2.2e-16
ggplot(data=Carseats, aes(x = Price+Urban+US , y = Sales, color=Urban))+
  geom_jitter()+
  geom_abline(intercept = modcarseats1$coefficients[1], slope=modcarseats1$coefficients[4],
              color="red", lwd=1)+ #non US and non Urban
  geom_abline(intercept = modcarseats1$coefficients[1]+modcarseats1$coefficients[2], slope=modcarseats1$coefficients[4],
              color="forestgreen", lwd=1)+ #Urban Yes
  geom_abline(intercept = modcarseats1$coefficients[1]+modcarseats1$coefficients[3], slope=modcarseats1$coefficients[4], 
              color="blue", lwd=1) #US Yes
## Warning in Ops.factor(Price, Urban): '+' not meaningful for factors
## Warning in Ops.factor(Price + Urban, US): '+' not meaningful for factors
## Warning in Ops.factor(Price, Urban): '+' not meaningful for factors
## Warning in Ops.factor(Price + Urban, US): '+' not meaningful for factors
ggplot(data=Carseats, aes(x = Price+Urban+US , y = Sales, color=US))+
  geom_jitter()+
  geom_abline(intercept = modcarseats1$coefficients[1], slope=modcarseats1$coefficients[4],
              color="red", lwd=1)+ #non US and non Urban
  geom_abline(intercept = modcarseats1$coefficients[1]+modcarseats1$coefficients[2], slope=modcarseats1$coefficients[4],
              color="forestgreen", lwd=1)+ #Urban Yes
  geom_abline(intercept = modcarseats1$coefficients[1]+modcarseats1$coefficients[3], slope=modcarseats1$coefficients[4], 
              color="blue", lwd=1) #US Yes
## Warning in Ops.factor(Price, Urban): '+' not meaningful for factors
## Warning in Ops.factor(Price, Urban): '+' not meaningful for factors
## Warning in Ops.factor(Price, Urban): '+' not meaningful for factors
## Warning in Ops.factor(Price + Urban, US): '+' not meaningful for factors
\(Y_{Sales}\) = \(\beta_0\) + \(\beta_1\) \(X_{Price}\) + \(\beta_2\) \(X_{Urban}\), where \(X_1\) is estimated coefficients of Price and \(X_{Urban}\) is equal to 1 if Urban carsales is yes and is equal to 0 if Urban carsales is no.
\(Y_{Sales}\) = \(\beta_0\) + \(\beta_1\) \(X_{Price}\) + \(\beta_2\) \(X_{US}\), where \(X_1\) is estimated coefficients of Price and \(X_2\) is equal to 1 if US carsales is yes and equal to 0 if US carsales is no.