In the modern NBA, floor spacing is a major key to the success of an offensive system. We no longer see offenses that “pack” all five players within the three-point line; today, offensive systems utilize the three-point line to create more spacing to produce easier shots. Floor spacing can be manufactured, but it is heavily dependent on the design of an offensive system. Consequently, shot selection carries equal importance on the success of an offensive system. As Brian Skinner (2012) mentioned in his article The Problem of Shot Selection in Basketball, “the purpose of an offensive set is to generate a high-quality shot opportunity.” Thus, it is important for teams to be aware of a player’s ability to make shots and their shot selection. This study will examine the probability of an NBA player making a shot and what factors impact shot success. I develop a predictive shot model using logistic regression to predict whether a shot will be successful using five concepts:
Shot data from the 2021-2022 NBA season was used to train and test the model. Results showed that zone location and the nearest defender distance were the most important factors to shot success. Shot success probabilities were the highest in the restricted area, mid-range shots right outside the paint, and corner threes. This model could be used to help teams with game planning, player development, and roster construction.
df <- read_excel("C:\\Users\\danni\\OneDrive\\Documents\\cap_shots (1).xlsx")
nba <- df %>%
filter(zoneBasic != 'Backcourt')%>%
filter(period != 5 & period != 6 &period != 7) %>%
mutate(off_dribble = recode(dribble_range,
"0 Dribbles" = "0",
"1 Dribble" = "1",
"2 Dribbles" = "1",
"3-6 Dribbles" = "1",
"7+ Dribbles" = "1")) %>%
mutate(across(c(close_def_dist,off_dribble,touch_time,shot_clock,zoneBasic,period),
factor))
nba$zoneBasic <- relevel(nba$zoneBasic, ref='Restricted Area')
## Creating contingency table
tab<- table(nba$zoneBasic, nba$close_def_dist, nba$touch_time, nba$period, dnn=c('Zone','Closest Defender Distance','Touch Time','Period'))
#converting from table to data frame that features row and column names like a matrix
tab_to_df <- function(x){
mat <- as.matrix.data.frame(x)
df <- data.frame(mat)
rownames(df) <- c('Restricted Area','Above the Break 3','In The Paint (Non-RA)','Left Corner 3','Mid-Range','Right Corner 3')
colnames(df) <- c('Very Tight (0-2ft)','Tight (2-4ft)','Open (4-6ft)','Wide Open (6ft+)')
return(df)
}
#gt function for stratified tables in presentation format
gt_contin <- function(tbl,tbl.num,t_time,period){
gt.tab <- tbl %>%
gt::gt(rownames_to_stub = TRUE) %>%
tab_header(
title = glue('Stratified Table #{tbl.num}'),
subtitle = glue('Touch Time < {t_time}, Period = {period}')
) %>%
tab_spanner(
label = 'Closest Defender Distance',
columns = c('Very Tight (0-2ft)','Tight (2-4ft)','Open (4-6ft)','Wide Open (6ft+)')) %>%
tab_stubhead(
label = 'Zone'
) %>%
gtExtras::gt_theme_nytimes()
}
# Placing stratified tables on one plot
tab1 <- tab_to_df(tab[,,1,1])
gt1 <- gt_contin(tab1,1,'2 seconds',1)
#gt1 %>%
# gtsave("p1.png", path = 'C:\\Users\\danni\\OneDrive\\Pictures')
tab2 <- tab_to_df(tab[,,2,1])
gt2 <- gt_contin(tab2,2,'2-6 seconds',1)
#gt2 %>%
# gtsave("p2.png", path = 'C:\\Users\\danni\\OneDrive\\Pictures')
tab3 <- tab_to_df(tab[,,3,1])
gt3 <- gt_contin(tab3,3,'6ft+ seconds',1)
#gt3 %>%
# gtsave("p3.png", path = 'C:\\Users\\danni\\OneDrive\\Pictures')
tab4 <- tab_to_df(tab[,,1,2])
gt4 <- gt_contin(tab4,4,'2 seconds',2)
#gt4 %>%
# gtsave("p4.png", path = 'C:\\Users\\danni\\OneDrive\\Pictures')
tab5 <- tab_to_df(tab[,,2,2])
gt5 <- gt_contin(tab5,5,'2-6 seconds',2)
#gt5 %>%
# gtsave("p5.png", path = 'C:\\Users\\danni\\OneDrive\\Pictures')
tab6 <- tab_to_df(tab[,,3,2])
gt6 <- gt_contin(tab6,6,'6ft+ seconds',2)
#gt6 %>%
# gtsave("p6.png", path = 'C:\\Users\\danni\\OneDrive\\Pictures')
tab7 <- tab_to_df(tab[,,1,3])
gt7 <- gt_contin(tab7,7,'2 seconds',3)
#gt7 %>%
# gtsave("p7.png", path = 'C:\\Users\\danni\\OneDrive\\Pictures')
tab8 <- tab_to_df(tab[,,2,3])
gt8 <- gt_contin(tab7,8,'2-6 seconds',3)
#gt8 %>%
# gtsave("p8.png", path = 'C:\\Users\\danni\\OneDrive\\Pictures')
tab9 <- tab_to_df(tab[,,3,3])
gt9 <- gt_contin(tab7,9,'6ft+ seconds',3)
#gt9 %>%
#gtsave("p9.png", path = 'C:\\Users\\danni\\OneDrive\\Pictures')
tab10 <- tab_to_df(tab[,,1,4])
gt10 <- gt_contin(tab10,10,'2 seconds',4)
#gt10 %>%
#gtsave("p10.png", path = 'C:\\Users\\danni\\OneDrive\\Pictures')
tab11 <- tab_to_df(tab[,,2,4])
gt11 <- gt_contin(tab11,11,'2-6 seconds',4)
#gt11 %>%
#gtsave("p11.png", path = 'C:\\Users\\danni\\OneDrive\\Pictures')
tab12 <- tab_to_df(tab[,,3,4])
gt12 <- gt_contin(tab12,12,'6ft+ seconds',4)
#gt12 %>%
#gtsave("p12.png", path = 'C:\\Users\\danni\\OneDrive\\Pictures')
p1 <- ggdraw() + draw_image("C:\\Users\\danni\\OneDrive\\Pictures\\p1.png", scale = 0.8)
p2 <- ggdraw() + draw_image("C:\\Users\\danni\\OneDrive\\Pictures\\p2.png", scale = 0.8)
p3 <- ggdraw() + draw_image("C:\\Users\\danni\\OneDrive\\Pictures\\p3.png", scale = 0.8)
p4 <- ggdraw() + draw_image("C:\\Users\\danni\\OneDrive\\Pictures\\p4.png", scale = 0.8)
p5 <- ggdraw() + draw_image("C:\\Users\\danni\\OneDrive\\Pictures\\p5.png", scale =0.8)
p6 <- ggdraw() + draw_image("C:\\Users\\danni\\OneDrive\\Pictures\\p6.png", scale = 0.8)
p7 <- ggdraw() + draw_image("C:\\Users\\danni\\OneDrive\\Pictures\\p7.png", scale = 0.8)
p8 <- ggdraw() + draw_image("C:\\Users\\danni\\OneDrive\\Pictures\\p8.png", scale = 0.8)
p9 <- ggdraw() + draw_image("C:\\Users\\danni\\OneDrive\\Pictures\\p9.png", scale = 0.8)
p10 <- ggdraw() + draw_image("C:\\Users\\danni\\OneDrive\\Pictures\\p10.png", scale =0.8)
p11 <- ggdraw() + draw_image("C:\\Users\\danni\\OneDrive\\Pictures\\p11.png", scale = 0.8)
p12 <- ggdraw() + draw_image("C:\\Users\\danni\\OneDrive\\Pictures\\p12.png", scale = 0.8)
pg1 <- plot_grid(
p1,p2,p3,p4,
labels= c(1:4),
align = 'hv')
pg2 <- plot_grid(
p5,p6,p7,p8,
labels= c(5:8),
align = 'hv')
pg3 <-plot_grid(
p9,p10,p11,p12,
labels= c(9:12),
align = 'hv')
######## Cramer's V Table ######
#getting Cramer's V for each stratified table
stats <-assocstats(tab)
v<-c()
for (i in stats) {
v <-c(v,round(i[[5]],3))
}
tab.order <- c(1:12)
c.df <- data.frame(tab.order, v )
t <- c.df %>%
gt() %>%
cols_label(
tab.order = "Table #", v = "Cramer's V"
) %>%
gtExtras::gt_color_rows(columns = v, palette = "ggsci::blue_material" ) %>%
gt::tab_header(title = paste("Cramer's V for Each Contingency Table")) %>%
tab_options(
table.font.size = px(20),
data_row.padding = px(1),
) %>%
tab_style(
style = list(
cell_text(weight = "bold")
),
locations = cells_body(
columns = c(1)
)
) %>%
gtExtras::gt_theme_nytimes() %>%
gtExtras::fmt_symbol_first(c(v), decimals =3)
pg1
pg2
pg3
t
| Cramer's V for Each Contingency Table | |
| Table # | Cramer's V |
|---|---|
| 1 | 0.188 |
| 2 | 0.107 |
| 3 | 0.099 |
| 4 | 0.211 |
| 5 | 0.128 |
| 6 | 0.136 |
| 7 | 0.193 |
| 8 | 0.110 |
| 9 | 0.096 |
| 10 | 0.209 |
| 11 | 0.122 |
| 12 | 0.120 |
#t %>%
#gtsave("C:\\Users\\danni\\OneDrive\\Pictures\\cramers.png", expand = 20)
‘Off Dribble’ is not included in the correlations since–as we’ll see in the next section–the ‘Off Dribble’ predictor is insignificant to our model, therefore, it can be removed from the model. Only factors that were significant to the final model are included in the correlations test.
set.seed(1234)
split = initial_split(data=nba,prop=0.75)
Train = training(split)
Test = testing(split)
nba.m<- glm(isShotMade ~ zoneBasic + close_def_dist + touch_time + period + off_dribble, data=Train, family = 'binomial')
summary(nba.m)
##
## Call:
## glm(formula = isShotMade ~ zoneBasic + close_def_dist + touch_time +
## period + off_dribble, family = "binomial", data = Train)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.511 -1.020 -0.910 1.278 1.584
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 4.785e-01 2.313e-02 20.689 < 2e-16 ***
## zoneBasicAbove the Break 3 -1.278e+00 1.448e-02 -88.259 < 2e-16 ***
## zoneBasicIn The Paint (Non-RA) -9.119e-01 1.574e-02 -57.918 < 2e-16 ***
## zoneBasicLeft Corner 3 -1.112e+00 2.564e-02 -43.377 < 2e-16 ***
## zoneBasicMid-Range -1.022e+00 1.794e-02 -56.983 < 2e-16 ***
## zoneBasicRight Corner 3 -1.134e+00 2.685e-02 -42.225 < 2e-16 ***
## close_def_dist2-4 Feet - Tight 2.136e-01 2.084e-02 10.250 < 2e-16 ***
## close_def_dist4-6 Feet - Open 2.774e-01 2.168e-02 12.798 < 2e-16 ***
## close_def_dist6+ Feet - Wide Open 1.970e-01 2.310e-02 8.529 < 2e-16 ***
## touch_timeTouch 2-6 Seconds -5.303e-02 1.896e-02 -2.796 0.00517 **
## touch_timeTouch 6+ Seconds -2.606e-03 2.216e-02 -0.118 0.90640
## period2 -2.532e-02 1.482e-02 -1.708 0.08759 .
## period3 -1.501e-02 1.489e-02 -1.008 0.31354
## period4 -6.539e-02 1.507e-02 -4.340 1.43e-05 ***
## off_dribble1 -6.987e-05 1.879e-02 -0.004 0.99703
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 210131 on 152112 degrees of freedom
## Residual deviance: 200343 on 152098 degrees of freedom
## AIC: 200373
##
## Number of Fisher Scoring iterations: 4
All levels for the Zone and Closest Defender Distance categories are significant. However, for both Touch Time and Period, only one level is significant and Off Dribble is insignificant as well. We can make a decision to include these variables by building models without those categories and then checking a likelihood ratio test.
#without touch time
nba.m2 <-glm(isShotMade ~ zoneBasic + close_def_dist + period + off_dribble, data=Train, family = 'binomial')
summary(nba.m2)
##
## Call:
## glm(formula = isShotMade ~ zoneBasic + close_def_dist + period +
## off_dribble, family = "binomial", data = Train)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.5094 -1.0218 -0.9101 1.2801 1.5736
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.47528 0.02311 20.566 < 2e-16 ***
## zoneBasicAbove the Break 3 -1.27720 0.01447 -88.254 < 2e-16 ***
## zoneBasicIn The Paint (Non-RA) -0.91197 0.01573 -57.971 < 2e-16 ***
## zoneBasicLeft Corner 3 -1.11187 0.02564 -43.363 < 2e-16 ***
## zoneBasicMid-Range -1.02221 0.01791 -57.059 < 2e-16 ***
## zoneBasicRight Corner 3 -1.13315 0.02684 -42.214 < 2e-16 ***
## close_def_dist2-4 Feet - Tight 0.21313 0.02084 10.227 < 2e-16 ***
## close_def_dist4-6 Feet - Open 0.27816 0.02167 12.836 < 2e-16 ***
## close_def_dist6+ Feet - Wide Open 0.19899 0.02309 8.618 < 2e-16 ***
## period2 -0.02483 0.01482 -1.675 0.0938 .
## period3 -0.01465 0.01489 -0.983 0.3254
## period4 -0.06454 0.01506 -4.285 1.83e-05 ***
## off_dribble1 -0.02925 0.01155 -2.533 0.0113 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 210131 on 152112 degrees of freedom
## Residual deviance: 200356 on 152100 degrees of freedom
## AIC: 200382
##
## Number of Fisher Scoring iterations: 4
#without period
nba.m3 <-glm(isShotMade ~ zoneBasic + close_def_dist + touch_time + off_dribble, data=Train, family = 'binomial')
summary(nba.m3)
##
## Call:
## glm(formula = isShotMade ~ zoneBasic + close_def_dist + touch_time +
## off_dribble, family = "binomial", data = Train)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.4989 -1.0200 -0.9111 1.2830 1.5657
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.452597 0.021288 21.260 <2e-16 ***
## zoneBasicAbove the Break 3 -1.277820 0.014477 -88.264 <2e-16 ***
## zoneBasicIn The Paint (Non-RA) -0.911078 0.015742 -57.874 <2e-16 ***
## zoneBasicLeft Corner 3 -1.112178 0.025641 -43.376 <2e-16 ***
## zoneBasicMid-Range -1.020960 0.017937 -56.921 <2e-16 ***
## zoneBasicRight Corner 3 -1.133379 0.026844 -42.221 <2e-16 ***
## close_def_dist2-4 Feet - Tight 0.213715 0.020844 10.253 <2e-16 ***
## close_def_dist4-6 Feet - Open 0.277435 0.021675 12.800 <2e-16 ***
## close_def_dist6+ Feet - Wide Open 0.197349 0.023100 8.543 <2e-16 ***
## touch_timeTouch 2-6 Seconds -0.052988 0.018961 -2.795 0.0052 **
## touch_timeTouch 6+ Seconds -0.004185 0.022156 -0.189 0.8502
## off_dribble1 -0.000115 0.018787 -0.006 0.9951
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 210131 on 152112 degrees of freedom
## Residual deviance: 200363 on 152101 degrees of freedom
## AIC: 200387
##
## Number of Fisher Scoring iterations: 4
#without off_dribble
nba.m4<-glm(isShotMade ~ zoneBasic + close_def_dist + touch_time + period, data=Train, family = 'binomial')
summary(nba.m4)
##
## Call:
## glm(formula = isShotMade ~ zoneBasic + close_def_dist + touch_time +
## period, family = "binomial", data = Train)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.511 -1.020 -0.910 1.278 1.584
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.478525 0.022826 20.964 < 2e-16 ***
## zoneBasicAbove the Break 3 -1.277857 0.014477 -88.267 < 2e-16 ***
## zoneBasicIn The Paint (Non-RA) -0.911915 0.015734 -57.959 < 2e-16 ***
## zoneBasicLeft Corner 3 -1.112284 0.025639 -43.382 < 2e-16 ***
## zoneBasicMid-Range -1.022460 0.017929 -57.029 < 2e-16 ***
## zoneBasicRight Corner 3 -1.133558 0.026843 -42.229 < 2e-16 ***
## close_def_dist2-4 Feet - Tight 0.213639 0.020843 10.250 < 2e-16 ***
## close_def_dist4-6 Feet - Open 0.277403 0.021656 12.810 < 2e-16 ***
## close_def_dist6+ Feet - Wide Open 0.197034 0.022985 8.572 < 2e-16 ***
## touch_timeTouch 2-6 Seconds -0.053082 0.012433 -4.269 1.96e-05 ***
## touch_timeTouch 6+ Seconds -0.002662 0.016204 -0.164 0.8695
## period2 -0.025320 0.014822 -1.708 0.0876 .
## period3 -0.015009 0.014893 -1.008 0.3135
## period4 -0.065391 0.015067 -4.340 1.43e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 210131 on 152112 degrees of freedom
## Residual deviance: 200343 on 152099 degrees of freedom
## AIC: 200371
##
## Number of Fisher Scoring iterations: 4
#2nd model anova
m2 <- anova(nba.m,nba.m2, test='LRT')#should be included since p-value less than 0.05
#3rd model anova
m3 <- anova(nba.m,nba.m3, test='LRT')#should be included since p-value less than 0.05
#4th model anova
m4 <- anova(nba.m,nba.m4, test='LRT') #should NOT be included since p-value greater than 0.05
anova2 <- m2 %>%
gt() %>%
tab_header(
title = 'ANOVA for Model Without Touch Time'
) %>%
gtExtras::gt_highlight_cols(`Pr(>Chi)`, fill='green', alpha = 0.5) %>%
gtExtras::gt_theme_nytimes()
anova2 %>%
gtsave("anova2.png", path = 'C:\\Users\\danni\\OneDrive\\Pictures')
anova3 <- m3 %>%
gt() %>%
tab_header(
title = 'ANOVA for Model Without Period'
) %>%
gtExtras::gt_highlight_cols(`Pr(>Chi)`, fill='green', alpha = 0.5) %>%
gtExtras::gt_theme_nytimes()
anova3 %>%
gtsave("anova3.png", path = 'C:\\Users\\danni\\OneDrive\\Pictures')
anova4 <- m4 %>%
gt() %>%
tab_header(
title = 'ANOVA for Model Without Off Dribble'
) %>%
gtExtras::gt_highlight_cols(`Pr(>Chi)`, fill='red', alpha = 0.5) %>%
gtExtras::gt_theme_nytimes()
anova4 %>%
gtsave("anova4.png", path = 'C:\\Users\\danni\\OneDrive\\Pictures')
anova.p2 <- ggdraw() + draw_image("C:\\Users\\danni\\OneDrive\\Pictures\\anova2.png", scale = 0.8)
anova.p3 <- ggdraw() + draw_image("C:\\Users\\danni\\OneDrive\\Pictures\\anova3.png", scale = 0.8)
anova.p4 <- ggdraw() + draw_image("C:\\Users\\danni\\OneDrive\\Pictures\\anova4.png", scale = 0.8)
an.pg <- plot_grid(
anova.p2,anova.p3,anova.p4,
labels= c(1:4),
align = 'hv')
an.pg
tbl_reg_nba <- nba.m4 %>%
tbl_regression() %>%
as_gt() %>%
gt::tab_header("Summary Table of NBA Shot Model Results ") %>%
tab_options(
table.font.size = px(20),
data_row.padding = px(1),
) %>%
tab_style(
style = list(
cell_text(weight = "bold")
),
locations = cells_body(
columns = c(1)
)
) %>%
gtExtras::gt_theme_nytimes()
tbl_reg_nba
| Summary Table of NBA Shot Model Results | |||
| Characteristic | log(OR)1 | 95% CI1 | p-value |
|---|---|---|---|
| zoneBasic | |||
| Restricted Area | — | — | |
| Above the Break 3 | -1.3 | -1.3, -1.2 | <0.001 |
| In The Paint (Non-RA) | -0.91 | -0.94, -0.88 | <0.001 |
| Left Corner 3 | -1.1 | -1.2, -1.1 | <0.001 |
| Mid-Range | -1.0 | -1.1, -0.99 | <0.001 |
| Right Corner 3 | -1.1 | -1.2, -1.1 | <0.001 |
| close_def_dist | |||
| 0-2 Feet - Very Tight | — | — | |
| 2-4 Feet - Tight | 0.21 | 0.17, 0.25 | <0.001 |
| 4-6 Feet - Open | 0.28 | 0.23, 0.32 | <0.001 |
| 6+ Feet - Wide Open | 0.20 | 0.15, 0.24 | <0.001 |
| touch_time | |||
| Touch < 2 Seconds | — | — | |
| Touch 2-6 Seconds | -0.05 | -0.08, -0.03 | <0.001 |
| Touch 6+ Seconds | 0.00 | -0.03, 0.03 | 0.9 |
| period | |||
| 1 | — | — | |
| 2 | -0.03 | -0.05, 0.00 | 0.088 |
| 3 | -0.02 | -0.04, 0.01 | 0.3 |
| 4 | -0.07 | -0.09, -0.04 | <0.001 |
| 1 OR = Odds Ratio, CI = Confidence Interval | |||
#tbl_reg_nba %>%
#gtsave("C:\\Users\\danni\\OneDrive\\Pictures\\NBA log results2.png", expand = 20)
predict_reg3 <- predict(nba.m4,
Test, type = "response")
Test$prob <- c(predict_reg3) #adding probabilities to dataframe
predict_reg4 <- ifelse(predict_reg3 >0.5, 1, 0) ##changing probabilities
#confusion matrix
table(Test$isShotMade, predict_reg4)
## predict_reg4
## 0 1
## FALSE 22267 5105
## TRUE 13742 9591
#prediction accuracy
missing_classerr <- mean(predict_reg4 != Test$isShotMade)
print(paste('Accuracy =', 1 - missing_classerr))
## [1] "Accuracy = 0.628300956513164"
ROCPred <- prediction(predict_reg4, Test$isShotMade)
ROCPer <- performance(ROCPred, measure = "tpr",
x.measure = "fpr")
#area under curve
auc <- performance(ROCPred, measure = "auc")
auc <- auc@y.values[[1]]
auc
## [1] 0.6122721
#Plotting Curve
plot(ROCPer, colorize = TRUE,
print.cutoffs.at = seq(0.1, by = 0.1),
main = "ROC CURVE")
abline(a = 0, b = 1)
auc <- round(auc, 4)
legend(.6, .4, auc, title = "AUC", cex = 1)
#Variable Importance
##variable importance
vi(nba.m4) #variable importance table
## # A tibble: 13 × 3
## Variable Importance Sign
## <chr> <dbl> <chr>
## 1 zoneBasicAbove the Break 3 88.3 NEG
## 2 zoneBasicIn The Paint (Non-RA) 58.0 NEG
## 3 zoneBasicMid-Range 57.0 NEG
## 4 zoneBasicLeft Corner 3 43.4 NEG
## 5 zoneBasicRight Corner 3 42.2 NEG
## 6 close_def_dist4-6 Feet - Open 12.8 POS
## 7 close_def_dist2-4 Feet - Tight 10.2 POS
## 8 close_def_dist6+ Feet - Wide Open 8.57 POS
## 9 period4 4.34 NEG
## 10 touch_timeTouch 2-6 Seconds 4.27 NEG
## 11 period2 1.71 NEG
## 12 period3 1.01 NEG
## 13 touch_timeTouch 6+ Seconds 0.164 NEG
vip(nba.m4) #variable importance plot (only includes varaibles that were significant to the model)
prob_charts <- function(df,prob_col,title){
mid<- mean(prob_col)
nba_plot <- ggplot(df, aes(x=locationX, y=locationY, color = prob)) + geom_point() +
scale_colour_gradient2('Probability',low='blue', mid = 'white', high = 'red',
midpoint = mid,space='Lab') +
theme(plot.title = element_text( hjust=.4,size = 20, family = "Comic Sans MS", face = "bold", vjust = 0, colour = "lightgrey"),
plot.margin=unit(c(1,2,1,1),"cm"),
plot.background = element_rect(fill = 'gray15', color = 'gray15'),
panel.background = element_rect(fill = 'gray15', color = 'gray15'),
panel.grid.major = element_line(colour= 'gray20'),
panel.grid.minor = element_blank(),
axis.line = element_line(colour="lightgrey"),
axis.text = element_text(family = "Comic Sans MS", face = "bold",colour="lightgrey"),
axis.title = element_text(family = "Comic Sans MS", face = "bold",colour="lightgrey"),
legend.background = element_blank(),
legend.text = element_text(hjust = .70, size = 10, family = "Comic Sans MS", face = "bold", colour = "white"),
legend.title = element_text(family = "Comic Sans MS", face = "bold",colour='lightgrey')
) +
ggtitle(title)
ggdraw(nba_plot) +
draw_image('https://cdn.nba.com/manage/2021/07/NBA_75-690x588.jpg',
x=.80,y=.73 ,height=.12, width=.12)
}
nba_plot <- prob_charts(df=Test, prob_col=Test$prob,title="2021-2022 NBA Shot Probabilities")
nba_plot
##3 pointers##
nba_3 <- subset(Test, zoneBasic == "Above the Break 3" |
zoneBasic =='Right Corner 3' |
zoneBasic =='Left Corner 3')
nba_plot_3 <- prob_charts(df=nba_3,prob_col= nba_3$prob,title="2021-2022 NBA 3 Point Shot Probabilities")
nba_plot_3
nba_mid_range <- subset(Test, zoneBasic == "Mid-Range")
nba_plot_2 <- prob_charts(df=nba_mid_range,prob_col= nba_mid_range$prob,title="2021-2022 NBA Mid-Range Shot Probabilities")
nba_plot_2