library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.2.1 ✓ purrr 0.3.3
## ✓ tibble 2.1.3 ✓ dplyr 0.8.3
## ✓ tidyr 1.0.2 ✓ stringr 1.4.0
## ✓ readr 1.3.1 ✓ forcats 0.4.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(ggplot2)
library(readxl)
bb <- read_excel("basketball.xlsx")
## New names:
## * `` -> ...1
str(bb$Country)
## chr [1:559] "USA" "USA" "USA" "USA" "USA" "Serbia" "Ukraine" "USA" "Spain" ...
str(bb$Position)
## chr [1:559] "Point Guard" "Power Forward" "Power Forward" "Small Forward" ...
Categorical variables that might be related to the response variable \(Y_{Salary}\) are Country and Position. Country variable(categorical) is used for dummy variable in the model, and levels for Position variables are “Point Guard” “Power Forward” “Power Forward” “Small Forward” and “Center”
modbb2 <- lm(Salary~Age+Guaranteed+Player_Efficiency_Rating+True_Shooting_Percentage+Three_Point_Field_Goal_Percentage+Free_Throw_Percentage+Offensive_Rebound_Percentage+Defensive_Rebound_Percentage+Total_Rebound_Percentage+Assist_Percentage+Steal_Percentage+Block_Percentage+Turnover_Percentage+Usage_Percentage+Offensive_Win_Shares+Defensive_Win_Shares+Win_Shares+Win_Shares_Per_48_Minutes+Offense_Box_Plus_Minus+Defense_Box_Plus_Minus+Box_Plus_Minus+Value_Over_Replacement_Player, data=bb)
modbb2
##
## Call:
## lm(formula = Salary ~ Age + Guaranteed + Player_Efficiency_Rating +
## True_Shooting_Percentage + Three_Point_Field_Goal_Percentage +
## Free_Throw_Percentage + Offensive_Rebound_Percentage + Defensive_Rebound_Percentage +
## Total_Rebound_Percentage + Assist_Percentage + Steal_Percentage +
## Block_Percentage + Turnover_Percentage + Usage_Percentage +
## Offensive_Win_Shares + Defensive_Win_Shares + Win_Shares +
## Win_Shares_Per_48_Minutes + Offense_Box_Plus_Minus + Defense_Box_Plus_Minus +
## Box_Plus_Minus + Value_Over_Replacement_Player, data = bb)
##
## Coefficients:
## (Intercept) Age
## -7.300e+06 4.011e+05
## Guaranteed Player_Efficiency_Rating
## 1.774e-01 1.384e+05
## True_Shooting_Percentage Three_Point_Field_Goal_Percentage
## -2.283e+06 -1.546e+04
## Free_Throw_Percentage Offensive_Rebound_Percentage
## -3.037e+03 -1.045e+06
## Defensive_Rebound_Percentage Total_Rebound_Percentage
## -8.493e+05 1.911e+06
## Assist_Percentage Steal_Percentage
## -5.664e+04 -2.103e+05
## Block_Percentage Turnover_Percentage
## -1.739e+05 1.448e+04
## Usage_Percentage Offensive_Win_Shares
## 5.667e+04 6.708e+04
## Defensive_Win_Shares Win_Shares
## 4.394e+05 2.403e+05
## Win_Shares_Per_48_Minutes Offense_Box_Plus_Minus
## -1.330e+07 -2.668e+06
## Defense_Box_Plus_Minus Box_Plus_Minus
## -2.804e+06 2.954e+06
## Value_Over_Replacement_Player
## -1.550e+05
summary(modbb2)
##
## Call:
## lm(formula = Salary ~ Age + Guaranteed + Player_Efficiency_Rating +
## True_Shooting_Percentage + Three_Point_Field_Goal_Percentage +
## Free_Throw_Percentage + Offensive_Rebound_Percentage + Defensive_Rebound_Percentage +
## Total_Rebound_Percentage + Assist_Percentage + Steal_Percentage +
## Block_Percentage + Turnover_Percentage + Usage_Percentage +
## Offensive_Win_Shares + Defensive_Win_Shares + Win_Shares +
## Win_Shares_Per_48_Minutes + Offense_Box_Plus_Minus + Defense_Box_Plus_Minus +
## Box_Plus_Minus + Value_Over_Replacement_Player, data = bb)
##
## Residuals:
## Min 1Q Median 3Q Max
## -22609397 -1963642 -455650 1548429 14898466
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -7.300e+06 2.849e+06 -2.562 0.0107 *
## Age 4.011e+05 3.873e+04 10.356 <2e-16 ***
## Guaranteed 1.774e-01 7.314e-03 24.255 <2e-16 ***
## Player_Efficiency_Rating 1.384e+05 1.887e+05 0.734 0.4635
## True_Shooting_Percentage -2.283e+06 3.018e+06 -0.756 0.4498
## Three_Point_Field_Goal_Percentage -1.546e+04 1.491e+04 -1.037 0.3002
## Free_Throw_Percentage -3.037e+03 6.738e+03 -0.451 0.6524
## Offensive_Rebound_Percentage -1.045e+06 6.002e+05 -1.741 0.0823 .
## Defensive_Rebound_Percentage -8.493e+05 5.921e+05 -1.434 0.1520
## Total_Rebound_Percentage 1.911e+06 1.186e+06 1.611 0.1078
## Assist_Percentage -5.664e+04 2.986e+04 -1.897 0.0584 .
## Steal_Percentage -2.103e+05 2.870e+05 -0.733 0.4640
## Block_Percentage -1.739e+05 2.128e+05 -0.817 0.4143
## Turnover_Percentage 1.448e+04 3.311e+04 0.437 0.6620
## Usage_Percentage 5.667e+04 7.048e+04 0.804 0.4217
## Offensive_Win_Shares 6.708e+04 3.083e+06 0.022 0.9827
## Defensive_Win_Shares 4.394e+05 3.083e+06 0.143 0.8867
## Win_Shares 2.403e+05 3.086e+06 0.078 0.9380
## Win_Shares_Per_48_Minutes -1.330e+07 6.777e+06 -1.962 0.0503 .
## Offense_Box_Plus_Minus -2.668e+06 3.304e+06 -0.807 0.4199
## Defense_Box_Plus_Minus -2.804e+06 3.257e+06 -0.861 0.3896
## Box_Plus_Minus 2.954e+06 3.268e+06 0.904 0.3665
## Value_Over_Replacement_Player -1.550e+05 3.718e+05 -0.417 0.6770
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3721000 on 536 degrees of freedom
## Multiple R-squared: 0.7453, Adjusted R-squared: 0.7349
## F-statistic: 71.31 on 22 and 536 DF, p-value: < 2.2e-16
anova(modbb2)
## Analysis of Variance Table
##
## Response: Salary
## Df Sum Sq Mean Sq F value Pr(>F)
## Age 1 3.1939e+15 3.1939e+15 230.6436 < 2.2e-16
## Guaranteed 1 1.7116e+16 1.7116e+16 1235.9803 < 2.2e-16
## Player_Efficiency_Rating 1 2.1420e+14 2.1420e+14 15.4684 9.492e-05
## True_Shooting_Percentage 1 5.9988e+12 5.9988e+12 0.4332 0.510708
## Three_Point_Field_Goal_Percentage 1 4.1604e+13 4.1604e+13 3.0044 0.083614
## Free_Throw_Percentage 1 9.5711e+11 9.5711e+11 0.0691 0.792728
## Offensive_Rebound_Percentage 1 3.6157e+11 3.6157e+11 0.0261 0.871692
## Defensive_Rebound_Percentage 1 3.4278e+14 3.4278e+14 24.7534 8.790e-07
## Total_Rebound_Percentage 1 6.2076e+13 6.2076e+13 4.4827 0.034699
## Assist_Percentage 1 2.1969e+13 2.1969e+13 1.5865 0.208376
## Steal_Percentage 1 5.4734e+10 5.4734e+10 0.0040 0.949894
## Block_Percentage 1 5.8237e+12 5.8237e+12 0.4206 0.516938
## Turnover_Percentage 1 5.7151e+12 5.7151e+12 0.4127 0.520871
## Usage_Percentage 1 1.3529e+14 1.3529e+14 9.7696 0.001870
## Offensive_Win_Shares 1 3.2995e+14 3.2995e+14 23.8270 1.393e-06
## Defensive_Win_Shares 1 1.4424e+14 1.4424e+14 10.4164 0.001325
## Win_Shares 1 1.5392e+10 1.5392e+10 0.0011 0.973417
## Win_Shares_Per_48_Minutes 1 4.2972e+13 4.2972e+13 3.1032 0.078708
## Offense_Box_Plus_Minus 1 4.1327e+13 4.1327e+13 2.9844 0.084649
## Defense_Box_Plus_Minus 1 5.8863e+12 5.8863e+12 0.4251 0.514697
## Box_Plus_Minus 1 1.1415e+13 1.1415e+13 0.8243 0.364325
## Value_Over_Replacement_Player 1 2.4060e+12 2.4060e+12 0.1737 0.676970
## Residuals 536 7.4224e+15 1.3848e+13
##
## Age ***
## Guaranteed ***
## Player_Efficiency_Rating ***
## True_Shooting_Percentage
## Three_Point_Field_Goal_Percentage .
## Free_Throw_Percentage
## Offensive_Rebound_Percentage
## Defensive_Rebound_Percentage ***
## Total_Rebound_Percentage *
## Assist_Percentage
## Steal_Percentage
## Block_Percentage
## Turnover_Percentage
## Usage_Percentage **
## Offensive_Win_Shares ***
## Defensive_Win_Shares **
## Win_Shares
## Win_Shares_Per_48_Minutes .
## Offense_Box_Plus_Minus .
## Defense_Box_Plus_Minus
## Box_Plus_Minus
## Value_Over_Replacement_Player
## Residuals
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(bb, aes(x=Age+Guaranteed+Player_Efficiency_Rating+True_Shooting_Percentage+Three_Point_Field_Goal_Percentage+Free_Throw_Percentage+Offensive_Rebound_Percentage+Defensive_Rebound_Percentage+Total_Rebound_Percentage+Assist_Percentage+Steal_Percentage+Block_Percentage+Turnover_Percentage+Usage_Percentage+Offensive_Win_Shares+Defensive_Win_Shares+Win_Shares+Win_Shares_Per_48_Minutes+Offense_Box_Plus_Minus+Defense_Box_Plus_Minus+Box_Plus_Minus+Value_Over_Replacement_Player, y=Salary))+
geom_jitter()+
geom_smooth(col = "orange")+ #least square line
geom_smooth(method = "lm", se = FALSE) #regression line
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
positionvector <- factor(bb$Position)
contrasts(positionvector)
## Point Guard Power Forward Shooting Guard Small Forward
## Center 0 0 0 0
## Point Guard 1 0 0 0
## Power Forward 0 1 0 0
## Shooting Guard 0 0 1 0
## Small Forward 0 0 0 1
Countrydummy <- factor(bb$Country_Dummy)
contrasts(Countrydummy)
## 1
## 0 0
## 1 1
str(bb$Country)
## chr [1:559] "USA" "USA" "USA" "USA" "USA" "Serbia" "Ukraine" "USA" "Spain" ...
str(bb$Country_Dummy)
## num [1:559] 1 1 1 1 1 0 0 1 0 0 ...
categoricalmodbb <- lm(Salary~Country_Dummy+positionvector, data=bb)
categoricalmodbb
##
## Call:
## lm(formula = Salary ~ Country_Dummy + positionvector, data = bb)
##
## Coefficients:
## (Intercept) Country_Dummy
## 7632245 100392
## positionvectorPoint Guard positionvectorPower Forward
## -1610166 -65319
## positionvectorShooting Guard positionvectorSmall Forward
## -1176955 -1790813
summary(categoricalmodbb)
##
## Call:
## lm(formula = Salary ~ Country_Dummy + positionvector, data = bb)
##
## Residuals:
## Min 1Q Median 3Q Max
## -7632637 -5051122 -2913414 3027783 28560079
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7632245 814911 9.366 <2e-16 ***
## Country_Dummy 100392 748992 0.134 0.8934
## positionvectorPoint Guard -1610166 960240 -1.677 0.0941 .
## positionvectorPower Forward -65319 971811 -0.067 0.9464
## positionvectorShooting Guard -1176955 952244 -1.236 0.2170
## positionvectorSmall Forward -1790813 997190 -1.796 0.0731 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7221000 on 553 degrees of freedom
## Multiple R-squared: 0.01064, Adjusted R-squared: 0.001694
## F-statistic: 1.189 on 5 and 553 DF, p-value: 0.313
ggplot(bb, aes(x=Country_Dummy+positionvector, y=Salary))+
geom_jitter()+
geom_smooth(col = "orange")+ #least square line
geom_smooth(method = "loess", se = FALSE) #regression line
## Warning in Ops.factor(Country_Dummy, positionvector): '+' not meaningful for
## factors
## Warning in Ops.factor(Country_Dummy, positionvector): '+' not meaningful for
## factors
## Warning in Ops.factor(Country_Dummy, positionvector): '+' not meaningful for
## factors
## Warning in Ops.factor(Country_Dummy, positionvector): '+' not meaningful for
## factors
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
ggplot(bb, aes(y=Salary, x=Countrydummy, fill=Countrydummy))+
geom_boxplot()
ggplot(bb, aes(y=Salary, x=positionvector, fill=positionvector))+
geom_boxplot()
ggplot(bb, aes(x=Age+Guaranteed+Player_Efficiency_Rating+True_Shooting_Percentage+Three_Point_Field_Goal_Percentage+Free_Throw_Percentage+Offensive_Rebound_Percentage+Defensive_Rebound_Percentage+Total_Rebound_Percentage+Assist_Percentage+Steal_Percentage+Block_Percentage+Turnover_Percentage+Usage_Percentage+Offensive_Win_Shares+Defensive_Win_Shares+Win_Shares+Win_Shares_Per_48_Minutes+Offense_Box_Plus_Minus+Defense_Box_Plus_Minus+Box_Plus_Minus+Value_Over_Replacement_Player, y=Salary, color=positionvector))+
geom_jitter()+
geom_smooth(col = "orange")+ #least square line
geom_smooth(method = "lm", se = FALSE) #regression line
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
#categorical
ggplot(bb, aes(x=Age+Guaranteed+Player_Efficiency_Rating+True_Shooting_Percentage+Three_Point_Field_Goal_Percentage+Free_Throw_Percentage+Offensive_Rebound_Percentage+Defensive_Rebound_Percentage+Total_Rebound_Percentage+Assist_Percentage+Steal_Percentage+Block_Percentage+Turnover_Percentage+Usage_Percentage+Offensive_Win_Shares+Defensive_Win_Shares+Win_Shares+Win_Shares_Per_48_Minutes+Offense_Box_Plus_Minus+Defense_Box_Plus_Minus+Box_Plus_Minus+Value_Over_Replacement_Player+Country_Dummy, y=Salary, color=Countrydummy))+
geom_jitter()+
geom_smooth(col = "orange")+ #least square line
geom_smooth(method = "lm", se = FALSE) #regression line
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
#Dummy
modbb3 <- lm(Salary~Age+Guaranteed+Player_Efficiency_Rating+True_Shooting_Percentage+Three_Point_Field_Goal_Percentage+Free_Throw_Percentage+Offensive_Rebound_Percentage+Defensive_Rebound_Percentage+Total_Rebound_Percentage+Assist_Percentage+Steal_Percentage+Block_Percentage+Turnover_Percentage+Usage_Percentage+Offensive_Win_Shares+Defensive_Win_Shares+Win_Shares+Win_Shares_Per_48_Minutes+Offense_Box_Plus_Minus+Defense_Box_Plus_Minus+Box_Plus_Minus+Value_Over_Replacement_Player+Country_Dummy+positionvector, data=bb)
modbb3
##
## Call:
## lm(formula = Salary ~ Age + Guaranteed + Player_Efficiency_Rating +
## True_Shooting_Percentage + Three_Point_Field_Goal_Percentage +
## Free_Throw_Percentage + Offensive_Rebound_Percentage + Defensive_Rebound_Percentage +
## Total_Rebound_Percentage + Assist_Percentage + Steal_Percentage +
## Block_Percentage + Turnover_Percentage + Usage_Percentage +
## Offensive_Win_Shares + Defensive_Win_Shares + Win_Shares +
## Win_Shares_Per_48_Minutes + Offense_Box_Plus_Minus + Defense_Box_Plus_Minus +
## Box_Plus_Minus + Value_Over_Replacement_Player + Country_Dummy +
## positionvector, data = bb)
##
## Coefficients:
## (Intercept) Age
## -6.382e+06 3.914e+05
## Guaranteed Player_Efficiency_Rating
## 1.770e-01 1.944e+05
## True_Shooting_Percentage Three_Point_Field_Goal_Percentage
## -3.044e+06 -1.198e+04
## Free_Throw_Percentage Offensive_Rebound_Percentage
## -2.272e+03 -1.128e+06
## Defensive_Rebound_Percentage Total_Rebound_Percentage
## -9.413e+05 2.019e+06
## Assist_Percentage Steal_Percentage
## -3.155e+04 -1.703e+05
## Block_Percentage Turnover_Percentage
## -2.463e+05 2.502e+04
## Usage_Percentage Offensive_Win_Shares
## 3.278e+04 3.190e+05
## Defensive_Win_Shares Win_Shares
## 8.323e+05 -2.524e+04
## Win_Shares_Per_48_Minutes Offense_Box_Plus_Minus
## -1.419e+07 -2.561e+06
## Defense_Box_Plus_Minus Box_Plus_Minus
## -2.701e+06 2.791e+06
## Value_Over_Replacement_Player Country_Dummy
## -1.972e+05 4.632e+05
## positionvectorPoint Guard positionvectorPower Forward
## -1.746e+06 -1.050e+05
## positionvectorShooting Guard positionvectorSmall Forward
## -1.102e+06 -1.288e+06
summary(modbb3)
##
## Call:
## lm(formula = Salary ~ Age + Guaranteed + Player_Efficiency_Rating +
## True_Shooting_Percentage + Three_Point_Field_Goal_Percentage +
## Free_Throw_Percentage + Offensive_Rebound_Percentage + Defensive_Rebound_Percentage +
## Total_Rebound_Percentage + Assist_Percentage + Steal_Percentage +
## Block_Percentage + Turnover_Percentage + Usage_Percentage +
## Offensive_Win_Shares + Defensive_Win_Shares + Win_Shares +
## Win_Shares_Per_48_Minutes + Offense_Box_Plus_Minus + Defense_Box_Plus_Minus +
## Box_Plus_Minus + Value_Over_Replacement_Player + Country_Dummy +
## positionvector, data = bb)
##
## Residuals:
## Min 1Q Median 3Q Max
## -21928752 -1875774 -342348 1524878 14084130
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -6.382e+06 3.016e+06 -2.116 0.0348 *
## Age 3.914e+05 3.914e+04 10.000 <2e-16 ***
## Guaranteed 1.770e-01 7.333e-03 24.141 <2e-16 ***
## Player_Efficiency_Rating 1.944e+05 1.919e+05 1.013 0.3115
## True_Shooting_Percentage -3.044e+06 3.045e+06 -1.000 0.3179
## Three_Point_Field_Goal_Percentage -1.198e+04 1.505e+04 -0.796 0.4263
## Free_Throw_Percentage -2.272e+03 6.756e+03 -0.336 0.7368
## Offensive_Rebound_Percentage -1.128e+06 6.007e+05 -1.878 0.0610 .
## Defensive_Rebound_Percentage -9.413e+05 5.926e+05 -1.588 0.1128
## Total_Rebound_Percentage 2.019e+06 1.186e+06 1.703 0.0892 .
## Assist_Percentage -3.155e+04 3.622e+04 -0.871 0.3842
## Steal_Percentage -1.703e+05 2.882e+05 -0.591 0.5548
## Block_Percentage -2.463e+05 2.177e+05 -1.132 0.2582
## Turnover_Percentage 2.502e+04 3.377e+04 0.741 0.4592
## Usage_Percentage 3.278e+04 7.160e+04 0.458 0.6473
## Offensive_Win_Shares 3.190e+05 3.084e+06 0.103 0.9177
## Defensive_Win_Shares 8.323e+05 3.086e+06 0.270 0.7875
## Win_Shares -2.524e+04 3.087e+06 -0.008 0.9935
## Win_Shares_Per_48_Minutes -1.419e+07 6.941e+06 -2.045 0.0413 *
## Offense_Box_Plus_Minus -2.561e+06 3.300e+06 -0.776 0.4380
## Defense_Box_Plus_Minus -2.701e+06 3.251e+06 -0.831 0.4065
## Box_Plus_Minus 2.791e+06 3.264e+06 0.855 0.3928
## Value_Over_Replacement_Player -1.972e+05 3.757e+05 -0.525 0.5999
## Country_Dummy 4.632e+05 3.969e+05 1.167 0.2438
## positionvectorPoint Guard -1.746e+06 8.793e+05 -1.986 0.0476 *
## positionvectorPower Forward -1.050e+05 5.717e+05 -0.184 0.8544
## positionvectorShooting Guard -1.102e+06 7.658e+05 -1.439 0.1506
## positionvectorSmall Forward -1.288e+06 7.124e+05 -1.808 0.0712 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3712000 on 531 degrees of freedom
## Multiple R-squared: 0.749, Adjusted R-squared: 0.7362
## F-statistic: 58.68 on 27 and 531 DF, p-value: < 2.2e-16
ggplot(bb, aes(x=Age+Guaranteed+Player_Efficiency_Rating+True_Shooting_Percentage+Three_Point_Field_Goal_Percentage+Free_Throw_Percentage+Offensive_Rebound_Percentage+Defensive_Rebound_Percentage+Total_Rebound_Percentage+Assist_Percentage+Steal_Percentage+Block_Percentage+Turnover_Percentage+Usage_Percentage+Offensive_Win_Shares+Defensive_Win_Shares+Win_Shares+Win_Shares_Per_48_Minutes+Offense_Box_Plus_Minus+Defense_Box_Plus_Minus+Box_Plus_Minus+Value_Over_Replacement_Player+Country_Dummy, y=Salary, color = positionvector))+
geom_jitter()+
geom_smooth(col = "orange")+ #least square line
geom_smooth(method = "lm", se = FALSE) #regression line
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
ggplot(bb, aes(x=Age+Guaranteed+Player_Efficiency_Rating+True_Shooting_Percentage+Three_Point_Field_Goal_Percentage+Free_Throw_Percentage+Offensive_Rebound_Percentage+Defensive_Rebound_Percentage+Total_Rebound_Percentage+Assist_Percentage+Steal_Percentage+Block_Percentage+Turnover_Percentage+Usage_Percentage+Offensive_Win_Shares+Defensive_Win_Shares+Win_Shares+Win_Shares_Per_48_Minutes+Offense_Box_Plus_Minus+Defense_Box_Plus_Minus+Box_Plus_Minus+Value_Over_Replacement_Player+Country_Dummy, y=Salary, color = Countrydummy))+
geom_jitter()+
geom_smooth(col = "orange")+ #least square line
geom_smooth(method = "lm", se = FALSE) #regression line
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
library(ISLR)
data(Carseats)
str(Carseats)
## 'data.frame': 400 obs. of 11 variables:
## $ Sales : num 9.5 11.22 10.06 7.4 4.15 ...
## $ CompPrice : num 138 111 113 117 141 124 115 136 132 132 ...
## $ Income : num 73 48 35 100 64 113 105 81 110 113 ...
## $ Advertising: num 11 16 10 4 3 13 0 15 0 0 ...
## $ Population : num 276 260 269 466 340 501 45 425 108 131 ...
## $ Price : num 120 83 80 97 128 72 108 120 124 124 ...
## $ ShelveLoc : Factor w/ 3 levels "Bad","Good","Medium": 1 2 3 3 1 1 3 2 3 3 ...
## $ Age : num 42 65 59 55 38 78 71 67 76 76 ...
## $ Education : num 17 10 12 14 13 16 15 10 10 17 ...
## $ Urban : Factor w/ 2 levels "No","Yes": 2 2 2 2 2 1 2 2 1 1 ...
## $ US : Factor w/ 2 levels "No","Yes": 2 2 2 2 1 2 1 2 1 2 ...
# Sales and Price variables are numeric and Urban and US variables are categotical("Yes" or "No")
library(ggplot2)
ggplot(Carseats, aes(y=Sales, x=US, fill=US))+
geom_boxplot()
ggplot(Carseats, aes(y=Sales, x=Urban, fill=Urban))+
geom_boxplot()
modcarseats1 <- lm(Sales~ Price+Urban+US, data=Carseats)
modcarseats1
##
## Call:
## lm(formula = Sales ~ Price + Urban + US, data = Carseats)
##
## Coefficients:
## (Intercept) Price UrbanYes USYes
## 13.04347 -0.05446 -0.02192 1.20057
summary(modcarseats1)
##
## Call:
## lm(formula = Sales ~ Price + Urban + US, data = Carseats)
##
## Residuals:
## Min 1Q Median 3Q Max
## -6.9206 -1.6220 -0.0564 1.5786 7.0581
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 13.043469 0.651012 20.036 < 2e-16 ***
## Price -0.054459 0.005242 -10.389 < 2e-16 ***
## UrbanYes -0.021916 0.271650 -0.081 0.936
## USYes 1.200573 0.259042 4.635 4.86e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.472 on 396 degrees of freedom
## Multiple R-squared: 0.2393, Adjusted R-squared: 0.2335
## F-statistic: 41.52 on 3 and 396 DF, p-value: < 2.2e-16
ggplot(data=Carseats, aes(x = Price+Urban+US , y = Sales, color=Urban))+
geom_jitter()+
geom_abline(intercept = modcarseats1$coefficients[1], slope=modcarseats1$coefficients[4],
color="red", lwd=1)+ #non US and non Urban
geom_abline(intercept = modcarseats1$coefficients[1]+modcarseats1$coefficients[2], slope=modcarseats1$coefficients[4],
color="forestgreen", lwd=1)+ #Urban Yes
geom_abline(intercept = modcarseats1$coefficients[1]+modcarseats1$coefficients[3], slope=modcarseats1$coefficients[4],
color="blue", lwd=1) #US Yes
## Warning in Ops.factor(Price, Urban): '+' not meaningful for factors
## Warning in Ops.factor(Price + Urban, US): '+' not meaningful for factors
## Warning in Ops.factor(Price, Urban): '+' not meaningful for factors
## Warning in Ops.factor(Price + Urban, US): '+' not meaningful for factors
ggplot(data=Carseats, aes(x = Price+Urban+US , y = Sales, color=US))+
geom_jitter()+
geom_abline(intercept = modcarseats1$coefficients[1], slope=modcarseats1$coefficients[4],
color="red", lwd=1)+ #non US and non Urban
geom_abline(intercept = modcarseats1$coefficients[1]+modcarseats1$coefficients[2], slope=modcarseats1$coefficients[4],
color="forestgreen", lwd=1)+ #Urban Yes
geom_abline(intercept = modcarseats1$coefficients[1]+modcarseats1$coefficients[3], slope=modcarseats1$coefficients[4],
color="blue", lwd=1) #US Yes
## Warning in Ops.factor(Price, Urban): '+' not meaningful for factors
## Warning in Ops.factor(Price, Urban): '+' not meaningful for factors
## Warning in Ops.factor(Price, Urban): '+' not meaningful for factors
## Warning in Ops.factor(Price + Urban, US): '+' not meaningful for factors
\(Y_{Sales}\) = \(\beta_0\) + \(\beta_1\) \(X_{Price}\) + \(\beta_2\) \(X_{Urban}\), where \(X_1\) is estimated coefficients of Price and \(X_{Urban}\) is equal to 1 if Urban carsales is yes and is equal to 0 if Urban carsales is no.
\(Y_{Sales}\) = \(\beta_0\) + \(\beta_1\) \(X_{Price}\) + \(\beta_2\) \(X_{US}\), where \(X_1\) is estimated coefficients of Price and \(X_2\) is equal to 1 if US carsales is yes and equal to 0 if US carsales is no.