Introduction
Variable Meanings:
G = # of games
W = # of games won,
ADJOE: Adjusted Offensive Efficiency (An estimate of the offensive efficiency (points scored per 100 possessions) a team would have against the average Division I defense),
ADJDE: Adjusted Defensive Efficiency (An estimate of the defensive efficiency (points allowed per 100 possessions) a team would have against the average Division I offense),
BARTHAG: Power rating (Chance of beating an average Division 1 team),
EFG_O: Effective Field Goal Percentage Shot,
EFG_D: Effective Field Goal Percentage Allowed,
TOR: Turnover Rate, TORD: Turnovers committed,
ORD: Offensive Rebound Percentage,
DRB: Defensive Rebound Percentage,
FTR: Free Throw Rate,
FTRD: Free Throw Rate Allowed,
2P_O: Two_Point Shooting Percentage,
2P_D: Two-Point Shooting Percentage Allowed,
3P_O: Three-Point Shooting Percentage,
3P_D: Three-Point Shooting Percentage Allowed,
ADJ_T: Adjusted Tempo (An estimate of the tempo (possessions per 40 minutes) a team would have against the team that wants to play at an average Division I tempo) ,
WAB: Wins Above Bubble (The bubble refers to the cut off between making the NCAA March Madness Tournament and not making it) .
Team Analysis
#Add winning percentage coumn to dataset
CBB <- CBB %>% mutate(Winpct = W/G)
CBB <- CBB[ ,-c(23)]
#Graph top 10 teams by wins
CBB <- CBB %>% arrange(desc(W))
Top10 <- CBB[c(1:10), ]
#Plot Top 10 Teams by Wins
TopTeams <- ggplot(Top10, aes(x = reorder(TEAM, -W), y = W)) +
geom_bar(aes(fill =(CONF)), stat = 'identity') +
scale_fill_brewer(palette = 'Set2', name = 'Conference') +
xlab('Team') +
ylab('Wins') +
ggtitle('Top 10 Teams') +
theme(axis.text.x = element_text(angle = 65, vjust = .55))
ggplotly(TopTeams)# Take note of how you use summarize
Conference <- Top10 %>%
group_by(TEAM) %>%
summarise(Wins = sum(W), Threepct = sum(X3P_O),
Twopct = sum(X2P_O))
Pct <- plot_ly(Conference, x = ~as.character(TEAM), y = ~Wins, type = 'bar', name = 'Wins') %>%
add_trace(y = ~Threepct, name = '3 Point Pct ',type = 'bar') %>%
add_trace(y = ~Twopct, name = '2 Point Pct', type = 'bar') %>%
layout(title = 'Wins - 3 Point Percentage - 2 Point Percentage ', xaxis = list(title ='Team', tickangle = 90),barmode = 'stack')
Pct#Check for multicollinearity
#Create correlation matrix on numeric data only
CORR <- cor(CBB[sapply(CBB, is.numeric)])
CORR## G W ADJOE ADJDE BARTHAG EFG_O
## G 1.00000000 0.6687878 0.64043697 -0.63429117 0.70724258 0.31740838
## W 0.66878780 1.0000000 0.74763110 -0.68440029 0.79146271 0.58754992
## ADJOE 0.64043697 0.7476311 1.00000000 -0.56874376 0.87575569 0.69859632
## ADJDE -0.63429117 -0.6844003 -0.56874376 1.00000000 -0.86874229 -0.23153786
## BARTHAG 0.70724258 0.7914627 0.87575569 -0.86874229 1.00000000 0.51354109
## EFG_O 0.31740838 0.5875499 0.69859632 -0.23153786 0.51354109 1.00000000
## EFG_D -0.49878613 -0.6475087 -0.39177984 0.82266256 -0.66465946 -0.18106981
## TOR -0.31618564 -0.4573094 -0.57555088 0.22404201 -0.45166906 -0.35924875
## TORD 0.17851368 0.2338684 -0.02937972 -0.26341305 0.12586897 -0.12365993
## ORB 0.28576831 0.3197058 0.29743173 -0.26351623 0.30952541 -0.11222911
## DRB -0.13527063 -0.2918946 -0.20688698 0.30799495 -0.27803348 -0.27755839
## FTR 0.20488462 0.1507355 0.06553578 -0.11471795 0.10295019 -0.05442689
## FTRD -0.21369926 -0.2609696 -0.39567478 0.17652413 -0.33078770 -0.42118803
## X2P_O 0.32445676 0.5691137 0.63768215 -0.29721597 0.52190198 0.87599726
## X2P_D -0.48075944 -0.5678749 -0.35758643 0.74178549 -0.59591895 -0.15816102
## X3P_O 0.16447683 0.3608993 0.47138440 -0.03103733 0.27041502 0.73129752
## X3P_D -0.33707360 -0.5113620 -0.28556206 0.62248233 -0.50458872 -0.14128891
## ADJ_T -0.05490806 -0.0322420 0.02083955 0.14741318 -0.06707993 0.02351906
## WAB 0.69540045 0.9015331 0.85247526 -0.83208405 0.94202190 0.51433121
## Winpct 0.50741156 0.9774137 0.69390853 -0.62233061 0.73150154 0.59931611
## EFG_D TOR TORD ORB DRB
## G -0.49878613 -0.31618564 0.178513676 0.285768309 -0.13527063
## W -0.64750869 -0.45730938 0.233868447 0.319705840 -0.29189457
## ADJOE -0.39177984 -0.57555088 -0.029379721 0.297431734 -0.20688698
## ADJDE 0.82266256 0.22404201 -0.263413045 -0.263516228 0.30799495
## BARTHAG -0.66465946 -0.45166906 0.125868967 0.309525411 -0.27803348
## EFG_O -0.18106981 -0.35924875 -0.123659932 -0.112229109 -0.27755839
## EFG_D 1.00000000 0.12732851 -0.080294648 -0.254626589 0.20750493
## TOR 0.12732851 1.00000000 0.072927459 0.195408596 0.17720302
## TORD -0.08029465 0.07292746 1.000000000 0.181558577 0.35363956
## ORB -0.25462659 0.19540860 0.181558577 1.000000000 0.04743562
## DRB 0.20750493 0.17720302 0.353639563 0.047435622 1.00000000
## FTR -0.15481813 0.20181671 0.159904795 0.252701680 0.08585746
## FTRD 0.11804645 0.39002134 0.390805698 0.174410610 0.19701887
## X2P_O -0.22970140 -0.27534514 -0.042459227 -0.023534072 -0.24688765
## X2P_D 0.89269729 0.10707538 -0.028444512 -0.246106820 0.23962181
## X3P_O -0.03433789 -0.32198299 -0.176486267 -0.182026313 -0.18482034
## X3P_D 0.76680248 0.10435917 -0.136812071 -0.167964761 0.08305652
## ADJ_T 0.12448737 -0.02587471 0.001098851 -0.006564624 0.18579006
## WAB -0.68645001 -0.43965170 0.173040673 0.358144733 -0.24925372
## Winpct -0.61512938 -0.44909479 0.226335291 0.291929631 -0.30645510
## FTR FTRD X2P_O X2P_D X3P_O X3P_D
## G 0.20488462 -0.21369926 0.32445676 -0.48075944 0.16447683 -0.33707360
## W 0.15073549 -0.26096955 0.56911366 -0.56787488 0.36089932 -0.51136197
## ADJOE 0.06553578 -0.39567478 0.63768215 -0.35758643 0.47138440 -0.28556206
## ADJDE -0.11471795 0.17652413 -0.29721597 0.74178549 -0.03103733 0.62248233
## BARTHAG 0.10295019 -0.33078770 0.52190198 -0.59591895 0.27041502 -0.50458872
## EFG_O -0.05442689 -0.42118803 0.87599726 -0.15816102 0.73129752 -0.14128891
## EFG_D -0.15481813 0.11804645 -0.22970140 0.89269729 -0.03433789 0.76680248
## TOR 0.20181671 0.39002134 -0.27534514 0.10707538 -0.32198299 0.10435917
## TORD 0.15990479 0.39080570 -0.04245923 -0.02844451 -0.17648627 -0.13681207
## ORB 0.25270168 0.17441061 -0.02353407 -0.24610682 -0.18202631 -0.16796476
## DRB 0.08585746 0.19701887 -0.24688765 0.23962181 -0.18482034 0.08305652
## FTR 1.00000000 0.15330714 0.02122398 -0.14403079 -0.13336136 -0.10476175
## FTRD 0.15330714 1.00000000 -0.40376176 0.14402242 -0.26103516 0.03503524
## X2P_O 0.02122398 -0.40376176 1.00000000 -0.20697996 0.32235125 -0.17186528
## X2P_D -0.14403079 0.14402242 -0.20697996 1.00000000 -0.02884244 0.39907658
## X3P_O -0.13336136 -0.26103516 0.32235125 -0.02884244 1.00000000 -0.02586062
## X3P_D -0.10476175 0.03503524 -0.17186528 0.39907658 -0.02586062 1.00000000
## ADJ_T 0.24607106 0.05079104 0.08290551 0.14894231 -0.04684675 0.04937992
## WAB 0.14541106 -0.30952251 0.52283210 -0.61359549 0.27742910 -0.52356246
## Winpct 0.12975456 -0.24655103 0.57774562 -0.52791693 0.37397952 -0.50154907
## ADJ_T WAB Winpct
## G -0.054908057 0.69540045 0.50741156
## W -0.032241997 0.90153311 0.97741367
## ADJOE 0.020839554 0.85247526 0.69390853
## ADJDE 0.147413183 -0.83208405 -0.62233061
## BARTHAG -0.067079932 0.94202190 0.73150154
## EFG_O 0.023519060 0.51433121 0.59931611
## EFG_D 0.124487372 -0.68645001 -0.61512938
## TOR -0.025874706 -0.43965170 -0.44909479
## TORD 0.001098851 0.17304067 0.22633529
## ORB -0.006564624 0.35814473 0.29192963
## DRB 0.185790063 -0.24925372 -0.30645510
## FTR 0.246071058 0.14541106 0.12975456
## FTRD 0.050791042 -0.30952251 -0.24655103
## X2P_O 0.082905507 0.52283210 0.57774562
## X2P_D 0.148942313 -0.61359549 -0.52791693
## X3P_O -0.046846753 0.27742910 0.37397952
## X3P_D 0.049379923 -0.52356246 -0.50154907
## ADJ_T 1.000000000 -0.03405397 -0.01866466
## WAB -0.034053973 1.00000000 0.85527659
## Winpct -0.018664661 0.85527659 1.00000000
#Correlation plot matrix
ggcorrplot(CORR,
type = 'full',
lab = TRUE,
lab_size = 2.25,
method = 'square',
ggtheme= theme_bw ,
colors = c('firebrick2', 'white', 'springgreen3'))##
## Call:
## lm(formula = W ~ X3P_O + X2P_O + X2P_D + X3P_D + ORB + DRB)
##
## Residuals:
## Min 1Q Median 3Q Max
## -9.5153 -2.4369 0.1219 2.0955 10.5086
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 8.40971 6.21958 1.352 0.1772
## X3P_O 0.65774 0.07812 8.420 1.02e-15 ***
## X2P_O 0.69187 0.05997 11.538 < 2e-16 ***
## X2P_D -0.59914 0.06782 -8.835 < 2e-16 ***
## X3P_D -0.75016 0.08635 -8.687 < 2e-16 ***
## ORB 0.42248 0.04928 8.573 3.42e-16 ***
## DRB -0.15842 0.06677 -2.373 0.0182 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.439 on 346 degrees of freedom
## Multiple R-squared: 0.714, Adjusted R-squared: 0.7091
## F-statistic: 144 on 6 and 346 DF, p-value: < 2.2e-16
Variance Inflation Factor Check for multicollinearity
## X3P_O X2P_O X2P_D X3P_D ORB DRB
## 1.169395 1.210628 1.334807 1.211396 1.121524 1.134320