title: “Basketball” author: “Laura Werner” date: “2025-10-28” output: html_document


library(readxl)
## Warning: package 'readxl' was built under R version 4.4.3
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.4.3
library(purrr)
library(ggcorrplot)
## Warning: package 'ggcorrplot' was built under R version 4.4.3
library(ppcor)
## Warning: package 'ppcor' was built under R version 4.4.3
## Loading required package: MASS
## 
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
## 
##     select
library(dplyr)
load_team_data <- function(sheet_name, file_path = "NBA Team Total Data 2024-2025.xlsx") {
  df <- read_excel(file_path, sheet = sheet_name)
  
  df <- df %>%
    mutate(
      Team = sheet_name,                     
      Won_award = ifelse(is.na(Awards), 0, 1),  
      PRA = PTS + TRB + AST,     
      STOCKS = STL + BLK               
    )
  
  return(df)
}


file_path <- "NBA Team Total Data 2024-2025.xlsx"


team_sheets <- excel_sheets(file_path)


all_teams_list <- lapply(team_sheets, load_team_data, file_path = file_path)


nba_data <- bind_rows(all_teams_list)


head(nba_data)
## # A tibble: 6 × 35
##      Rk Player   Age     G    GS    MP    FG   FGA `FG%`  `3P` `3PA` `3P%`  `2P`
##   <dbl> <chr>  <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1     1 Jalen…    24    79    22  2031   246   620 0.397   122   362 0.337   124
## 2     2 Keon …    22    79    56  1925   303   779 0.389   126   401 0.314   177
## 3     3 Nic C…    25    70    62  1882   320   568 0.563     5    21 0.238   315
## 4     4 Camer…    28    57    57  1800   355   747 0.475   159   408 0.39    196
## 5     5 Ziair…    23    63    45  1541   214   520 0.412   103   302 0.341   111
## 6     6 Tyres…    25    60    11  1315   189   465 0.406    99   282 0.351    90
## # ℹ 22 more variables: `2PA` <dbl>, `2P%` <dbl>, `eFG%` <dbl>, FT <dbl>,
## #   FTA <dbl>, `FT%` <dbl>, ORB <dbl>, DRB <dbl>, TRB <dbl>, AST <dbl>,
## #   STL <dbl>, BLK <dbl>, TOV <dbl>, PF <dbl>, PTS <dbl>, `Trp-Dbl` <dbl>,
## #   Awards <chr>, Team <chr>, Won_award <dbl>, PRA <dbl>, STOCKS <dbl>,
## #   Pos <chr>
conference_lookup <- read_excel("Team Conferences.xlsx")


nba_data <- nba_data %>%
  left_join(conference_lookup, by = "Team") %>%
  mutate(Conference_binary = ifelse(Conference == "East", 1, 0))

head(nba_data)
## # A tibble: 6 × 37
##      Rk Player   Age     G    GS    MP    FG   FGA `FG%`  `3P` `3PA` `3P%`  `2P`
##   <dbl> <chr>  <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1     1 Jalen…    24    79    22  2031   246   620 0.397   122   362 0.337   124
## 2     2 Keon …    22    79    56  1925   303   779 0.389   126   401 0.314   177
## 3     3 Nic C…    25    70    62  1882   320   568 0.563     5    21 0.238   315
## 4     4 Camer…    28    57    57  1800   355   747 0.475   159   408 0.39    196
## 5     5 Ziair…    23    63    45  1541   214   520 0.412   103   302 0.341   111
## 6     6 Tyres…    25    60    11  1315   189   465 0.406    99   282 0.351    90
## # ℹ 24 more variables: `2PA` <dbl>, `2P%` <dbl>, `eFG%` <dbl>, FT <dbl>,
## #   FTA <dbl>, `FT%` <dbl>, ORB <dbl>, DRB <dbl>, TRB <dbl>, AST <dbl>,
## #   STL <dbl>, BLK <dbl>, TOV <dbl>, PF <dbl>, PTS <dbl>, `Trp-Dbl` <dbl>,
## #   Awards <chr>, Team <chr>, Won_award <dbl>, PRA <dbl>, STOCKS <dbl>,
## #   Pos <chr>, Conference <chr>, Conference_binary <dbl>
ggplot(nba_data, aes(x = PRA, y = STOCKS, color = factor(Conference_binary))) +
  geom_point(size = 3, alpha = 0.7) +
  labs(color = "Conference (1=East, 0=West)",
       x = "PRA (Points + Rebounds + Assists)",
       y = "STOCKS (Steals + Blocks)",
       title = "Offensive vs Defensive Performance by Conference") +
  theme_minimal()

The scatterplot shows a positive relationship between offensive (PRA) and defensive (STOCKS) metrics. Teams with higher PRA have slightly higher STOCKS. Western teams (blue) cluster higher in defensive metrics than Eastern teams (red).

ggplot(nba_data, aes(x = PRA, fill = factor(Conference_binary))) +
  geom_histogram(position = "dodge", bins = 15, alpha = 0.7) +
  labs(fill = "Conference (1=East, 0=West)",
       x = "PRA",
       y = "Number of Players",
       title = "Distribution of PRA by Conference") +
  theme_minimal()

cor_pra <- cor.test(nba_data$Conference_binary, nba_data$PRA)
cor_stocks <- cor.test(nba_data$Conference_binary, nba_data$STOCKS)

cor_pra
## 
##  Pearson's product-moment correlation
## 
## data:  nba_data$Conference_binary and nba_data$PRA
## t = -1.8195, df = 650, p-value = 0.0693
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.147164250  0.005629906
## sample estimates:
##         cor 
## -0.07118475
cor_stocks
## 
##  Pearson's product-moment correlation
## 
## data:  nba_data$Conference_binary and nba_data$STOCKS
## t = -2.094, df = 650, p-value = 0.03665
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.157650363 -0.005105577
## sample estimates:
##         cor 
## -0.08185737

The correlation matrix indicates that PRA and STOCKS are positively correlated, showing teams that perform well offensively also perform well defensively. Age shows little to no relationship with PRA and STOCKS. The correlation shows that the conference (East vs West) has little effect on offensive metrics (PRA), but a small negative effect on defensive metrics (STOCKS). This shows that Western teams are lightly stronger defensively.

cor_matrix <- nba_data %>% dplyr::select(Age, PRA, STOCKS)

 cor_matrix <- nba_data %>%
  dplyr::select(Age, PRA, STOCKS) %>%  # pick columns
  cor(use = "pairwise.complete.obs")    # calculate correlations


ggcorrplot(cor_matrix, lab = TRUE, title = "Correlation Matrix: Age, PRA, STOCKS")
## Warning: `aes_string()` was deprecated in ggplot2 3.0.0.
## ℹ Please use tidy evaluation idioms with `aes()`.
## ℹ See also `vignette("ggplot2-in-packages")` for more information.
## ℹ The deprecated feature was likely used in the ggcorrplot package.
##   Please report the issue at <https://github.com/kassambara/ggcorrplot/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

Age does not affect the link between offensive and defensive performance, so the relationship appears mostly similar across team ages.

partial_res <- pcor.test(nba_data$PRA, nba_data$STOCKS, nba_data$Age)
partial_res
##    estimate       p.value statistic   n gp  Method
## 1 0.8395996 3.657553e-174  39.37587 652  1 pearson

Dear Commissioner Silver,

After reviewing the 2024–2025 NBA team data, we found a moderate link between offensive performance (PRA) and defensive performance (STOCKS). In other words, teams that excel offensively generally also contribute defensively. There are small differences between the Eastern and Western Conferences, with Western teams showing slightly stronger defensive performance, but the gap is minor. Age does not appear to have a meaningful effect on this relationship.

One limitation of this analysis is that it only considers a single season. Looking at multiple seasons in future studies could provide a clearer picture of trends over time. Additionally, incorporating advanced metrics like Player Efficiency Rating could offer deeper insight into overall team performance.