## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.5 ✓ purrr 0.3.4
## ✓ tibble 3.1.6 ✓ dplyr 1.0.8
## ✓ tidyr 1.2.0 ✓ stringr 1.4.0
## ✓ readr 2.1.2 ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
## Rows: 2455 Columns: 24
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): TEAM, CONF, POSTSEASON
## dbl (21): G, W, ADJOE, ADJDE, BARTHAG, EFG_O, EFG_D, TOR, TORD, ORB, DRB, FT...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
##Goal: to fit models to the cbb.csv dataset
#Response Variable = WAB (Wins above Bubble)
#Numeric Predictor = ADJOE (Adjusted Offensive Efficiency)
#Categorical Predictor = CONF (Conference that team plays for)
##Simple linear model with response variable and numeric predictor
model <- lm(WAB ~ ADJOE, data = bball)
summary(model)
##
## Call:
## lm(formula = WAB ~ ADJOE, data = bball)
##
## Residuals:
## Min 1Q Median 3Q Max
## -13.1204 -2.5203 -0.0016 2.5731 13.3365
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -89.66782 1.07363 -83.52 <2e-16 ***
## ADJOE 0.79247 0.01037 76.44 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.788 on 2453 degrees of freedom
## Multiple R-squared: 0.7043, Adjusted R-squared: 0.7042
## F-statistic: 5844 on 1 and 2453 DF, p-value: < 2.2e-16
ggplot(data = bball, aes(x = ADJOE, y = WAB))+
geom_point(aes(color = 'b'))+
theme_bw()+
geom_abline(slope = model$coefficients[2], intercept = model$coefficients[1])
model2 <- lm(WAB ~ ADJOE + CONF, data = bball)
summary(model2)
##
## Call:
## lm(formula = WAB ~ ADJOE + CONF, data = bball)
##
## Residuals:
## Min 1Q Median 3Q Max
## -12.3517 -2.4588 -0.0399 2.3880 11.9321
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -79.14577 1.41347 -55.994 < 2e-16 ***
## ADJOE 0.69682 0.01294 53.844 < 2e-16 ***
## CONFACC 1.71177 0.50866 3.365 0.000777 ***
## CONFAE -0.87170 0.58186 -1.498 0.134230
## CONFAmer 0.76070 0.56353 1.350 0.177179
## CONFASun -2.28530 0.58696 -3.893 0.000101 ***
## CONFB10 2.04760 0.51762 3.956 7.85e-05 ***
## CONFB12 2.50653 0.56254 4.456 8.74e-06 ***
## CONFBE 1.86619 0.54967 3.395 0.000697 ***
## CONFBSky -3.02633 0.53520 -5.655 1.75e-08 ***
## CONFBSth -2.02893 0.54409 -3.729 0.000197 ***
## CONFBW -1.42402 0.57415 -2.480 0.013198 *
## CONFCAA -2.77109 0.55684 -4.976 6.93e-07 ***
## CONFCUSA -1.10570 0.50945 -2.170 0.030077 *
## CONFGWC 2.56804 1.64308 1.563 0.118197
## CONFHorz -1.97606 0.56494 -3.498 0.000478 ***
## CONFind 2.16984 2.09413 1.036 0.300234
## CONFInd 0.96441 3.57952 0.269 0.787626
## CONFIvy -0.54373 0.59729 -0.910 0.362743
## CONFMAAC -2.53103 0.54647 -4.632 3.82e-06 ***
## CONFMAC -1.10802 0.52918 -2.094 0.036379 *
## CONFMEAC -0.88244 0.54000 -1.634 0.102358
## CONFMVC -0.06123 0.55670 -0.110 0.912428
## CONFMWC -0.12969 0.54524 -0.238 0.812007
## CONFNEC -2.21580 0.55979 -3.958 7.77e-05 ***
## CONFOVC -2.05922 0.53093 -3.879 0.000108 ***
## CONFP12 0.93011 0.53014 1.754 0.079480 .
## CONFPat -2.05743 0.56475 -3.643 0.000275 ***
## CONFSB -1.35167 0.53972 -2.504 0.012331 *
## CONFSC -1.86951 0.55227 -3.385 0.000723 ***
## CONFSEC 1.56812 0.50980 3.076 0.002122 **
## CONFSlnd -0.84237 0.52829 -1.595 0.110950
## CONFSum -2.12089 0.58370 -3.634 0.000285 ***
## CONFSWAC -0.36271 0.57938 -0.626 0.531353
## CONFWAC -0.41297 0.58747 -0.703 0.482139
## CONFWCC -0.84082 0.55861 -1.505 0.132403
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.561 on 2419 degrees of freedom
## Multiple R-squared: 0.7423, Adjusted R-squared: 0.7386
## F-statistic: 199.1 on 35 and 2419 DF, p-value: < 2.2e-16
ggplot(data = bball, aes(x = ADJOE, y = WAB, color = CONF))+
geom_point()
anova(model2)
## Analysis of Variance Table
##
## Response: WAB
## Df Sum Sq Mean Sq F value Pr(>F)
## ADJOE 1 83867 83867 6612.122 < 2.2e-16 ***
## CONF 34 4522 133 10.485 < 2.2e-16 ***
## Residuals 2419 30682 13
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# what do the f values mean?
ggplot(data = bball, aes(x = ADJOE, y = WAB))+
geom_point()+
theme_bw()+
ggtitle("Scatterplot of Adjusted Offensive Efficiency vs Wins Above The Bubble")+
geom_abline(slope = model2$coefficients[2],intercept =model2$coefficients[1],color =2)+
geom_abline(slope = model2$coefficients[2],intercept =model2$coefficients[3]+model2$coefficients[1],
color = 4 )
model3 <- lm(WAB ~ ADJOE*CONF, data = bball)
summary(model3)
##
## Call:
## lm(formula = WAB ~ ADJOE * CONF, data = bball)
##
## Residuals:
## Min 1Q Median 3Q Max
## -12.3156 -2.4041 -0.0089 2.3665 11.5342
##
## Coefficients: (1 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -82.018323 7.221096 -11.358 <2e-16 ***
## ADJOE 0.724005 0.068259 10.607 <2e-16 ***
## CONFACC 6.068268 9.346498 0.649 0.5162
## CONFAE -6.974903 10.080773 -0.692 0.4891
## CONFAmer -10.941786 10.417594 -1.050 0.2937
## CONFASun -1.847280 12.039290 -0.153 0.8781
## CONFB10 17.331514 9.412000 1.841 0.0657 .
## CONFB12 12.912298 10.519566 1.227 0.2198
## CONFBE 9.705786 10.454621 0.928 0.3533
## CONFBSky -0.955272 10.850328 -0.088 0.9299
## CONFBSth -9.227923 10.749870 -0.858 0.3907
## CONFBW -6.103588 11.886298 -0.513 0.6077
## CONFCAA -3.427877 10.934397 -0.313 0.7539
## CONFCUSA -10.372357 10.577559 -0.981 0.3269
## CONFGWC 71.069930 42.111222 1.688 0.0916 .
## CONFHorz 2.481224 11.943285 0.208 0.8354
## CONFind 31.665391 46.605967 0.679 0.4969
## CONFInd 1.028562 3.581122 0.287 0.7740
## CONFIvy 1.765546 12.932415 0.137 0.8914
## CONFMAAC 4.847081 10.213517 0.475 0.6351
## CONFMAC 14.103543 10.282817 1.372 0.1703
## CONFMEAC 16.568757 9.939833 1.667 0.0957 .
## CONFMVC -5.884603 10.802551 -0.545 0.5860
## CONFMWC -1.907273 10.272396 -0.186 0.8527
## CONFNEC 8.994926 11.700282 0.769 0.4421
## CONFOVC 0.966899 9.769045 0.099 0.9212
## CONFP12 -0.518546 10.680655 -0.049 0.9613
## CONFPat 9.931839 11.748675 0.845 0.3980
## CONFSB 11.109319 10.920912 1.017 0.3091
## CONFSC 0.558951 10.386910 0.054 0.9571
## CONFSEC 0.852904 9.867988 0.086 0.9311
## CONFSlnd -1.301050 10.215035 -0.127 0.8987
## CONFSum 6.319563 11.373219 0.556 0.5785
## CONFSWAC 6.144921 10.177358 0.604 0.5460
## CONFWAC -6.466563 10.805073 -0.598 0.5496
## CONFWCC -4.967886 9.711311 -0.512 0.6090
## ADJOE:CONFACC -0.040463 0.086416 -0.468 0.6397
## ADJOE:CONFAE 0.064136 0.098795 0.649 0.5163
## ADJOE:CONFAmer 0.109614 0.098029 1.118 0.2636
## ADJOE:CONFASun -0.002889 0.117823 -0.025 0.9804
## ADJOE:CONFB10 -0.138945 0.087196 -1.593 0.1112
## ADJOE:CONFB12 -0.094337 0.096408 -0.979 0.3279
## ADJOE:CONFBE -0.071908 0.096326 -0.747 0.4554
## ADJOE:CONFBSky -0.019251 0.105232 -0.183 0.8549
## ADJOE:CONFBSth 0.073496 0.104788 0.701 0.4831
## ADJOE:CONFBW 0.047447 0.115524 0.411 0.6813
## ADJOE:CONFCAA 0.006898 0.104548 0.066 0.9474
## ADJOE:CONFCUSA 0.091584 0.101812 0.900 0.3685
## ADJOE:CONFGWC -0.746491 0.459471 -1.625 0.1044
## ADJOE:CONFHorz -0.042656 0.115169 -0.370 0.7111
## ADJOE:CONFind -0.315211 0.502286 -0.628 0.5304
## ADJOE:CONFInd NA NA NA NA
## ADJOE:CONFIvy -0.021669 0.125226 -0.173 0.8626
## ADJOE:CONFMAAC -0.071792 0.098774 -0.727 0.4674
## ADJOE:CONFMAC -0.146612 0.098313 -1.491 0.1360
## ADJOE:CONFMEAC -0.182185 0.099585 -1.829 0.0675 .
## ADJOE:CONFMVC 0.056612 0.103135 0.549 0.5831
## ADJOE:CONFMWC 0.017036 0.097321 0.175 0.8611
## ADJOE:CONFNEC -0.112012 0.115811 -0.967 0.3335
## ADJOE:CONFOVC -0.028698 0.094014 -0.305 0.7602
## ADJOE:CONFP12 0.012436 0.099267 0.125 0.9003
## ADJOE:CONFPat -0.117816 0.114579 -1.028 0.3039
## ADJOE:CONFSB -0.121463 0.105511 -1.151 0.2498
## ADJOE:CONFSC -0.022804 0.100353 -0.227 0.8203
## ADJOE:CONFSEC 0.005607 0.091802 0.061 0.9513
## ADJOE:CONFSlnd 0.006614 0.100095 0.066 0.9473
## ADJOE:CONFSum -0.081246 0.109166 -0.744 0.4568
## ADJOE:CONFSWAC -0.066226 0.102815 -0.644 0.5196
## ADJOE:CONFWAC 0.062183 0.105425 0.590 0.5554
## ADJOE:CONFWCC 0.038576 0.091460 0.422 0.6732
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.56 on 2386 degrees of freedom
## Multiple R-squared: 0.7461, Adjusted R-squared: 0.7389
## F-statistic: 103.1 on 68 and 2386 DF, p-value: < 2.2e-16
anova(model3)
## Analysis of Variance Table
##
## Response: WAB
## Df Sum Sq Mean Sq F value Pr(>F)
## ADJOE 1 83867 83867 6619.1116 <2e-16 ***
## CONF 34 4522 133 10.4965 <2e-16 ***
## ADJOE:CONF 33 451 14 1.0775 0.3502
## Residuals 2386 30232 13
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##Conclusion: