# File names
team_xlsx <- "NBA Team Total Data 2024-2025.xlsx"
conf_xlsx <- "Team Conferences.xlsx"
# Check all sheet names in the team data workbook
excel_sheets(team_xlsx)
## [1] "Nets" "Knicks" "Raptors" "Philly" "Celtics"
## [6] "Timberwolves" "Thunder" "Jazz" "Trailblazers" "Nuggets"
## [11] "Bulls" "Bucks" "Cavaliers" "Pistons" "Pacers"
## [16] "Warriors" "Suns" "Lakers" "Clippers" "Kings"
## [21] "Hornets" "Magic" "Wizards" "Hawks" "Heat"
## [26] "Grizzles" "Spurs" "Pelicans" "Rockets" "Mavericks"
library(readxl)
# Look at the first team sheet to check the column names
test_sheet <- read_excel(team_xlsx, sheet = 1)
names(test_sheet)
## [1] "Rk" "Player" "Age" "G" "GS" "MP" "FG"
## [8] "FGA" "FG%" "3P" "3PA" "3P%" "2P" "2PA"
## [15] "2P%" "eFG%" "FT" "FTA" "FT%" "ORB" "DRB"
## [22] "TRB" "AST" "STL" "BLK" "TOV" "PF" "PTS"
## [29] "Trp-Dbl" "Awards"
# Function to load each team’s data
load_team_data <- function(sheet_name) {
df <- read_excel(team_xlsx, sheet = sheet_name)
df <- df %>%
mutate(
Team = sheet_name,
Won_award = ifelse(Awards > 0, 1, 0),
PRA = PTS + TRB + AST,
STOCKS = STL + BLK
)
return(df)
}
# Apply the function to all sheets and combine
all_teams <- lapply(excel_sheets(team_xlsx), load_team_data) %>%
bind_rows()
# Preview combined dataset
head(all_teams)
## # A tibble: 6 × 35
## Rk Player Age G GS MP FG FGA `FG%` `3P` `3PA` `3P%` `2P`
## <dbl> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1 Jalen… 24 79 22 2031 246 620 0.397 122 362 0.337 124
## 2 2 Keon … 22 79 56 1925 303 779 0.389 126 401 0.314 177
## 3 3 Nic C… 25 70 62 1882 320 568 0.563 5 21 0.238 315
## 4 4 Camer… 28 57 57 1800 355 747 0.475 159 408 0.39 196
## 5 5 Ziair… 23 63 45 1541 214 520 0.412 103 302 0.341 111
## 6 6 Tyres… 25 60 11 1315 189 465 0.406 99 282 0.351 90
## # ℹ 22 more variables: `2PA` <dbl>, `2P%` <dbl>, `eFG%` <dbl>, FT <dbl>,
## # FTA <dbl>, `FT%` <dbl>, ORB <dbl>, DRB <dbl>, TRB <dbl>, AST <dbl>,
## # STL <dbl>, BLK <dbl>, TOV <dbl>, PF <dbl>, PTS <dbl>, `Trp-Dbl` <dbl>,
## # Awards <chr>, Team <chr>, Won_award <dbl>, PRA <dbl>, STOCKS <dbl>,
## # Pos <chr>
# Load the conference lookup data
conference_lookup <- read_excel(conf_xlsx)
# Merge with main dataset and create binary variable
all_teams <- all_teams %>%
left_join(conference_lookup, by = "Team") %>%
mutate(Conference_binary = ifelse(Conference == "East", 1, 0))
# Preview to confirm
head(all_teams)
## # A tibble: 6 × 37
## Rk Player Age G GS MP FG FGA `FG%` `3P` `3PA` `3P%` `2P`
## <dbl> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1 Jalen… 24 79 22 2031 246 620 0.397 122 362 0.337 124
## 2 2 Keon … 22 79 56 1925 303 779 0.389 126 401 0.314 177
## 3 3 Nic C… 25 70 62 1882 320 568 0.563 5 21 0.238 315
## 4 4 Camer… 28 57 57 1800 355 747 0.475 159 408 0.39 196
## 5 5 Ziair… 23 63 45 1541 214 520 0.412 103 302 0.341 111
## 6 6 Tyres… 25 60 11 1315 189 465 0.406 99 282 0.351 90
## # ℹ 24 more variables: `2PA` <dbl>, `2P%` <dbl>, `eFG%` <dbl>, FT <dbl>,
## # FTA <dbl>, `FT%` <dbl>, ORB <dbl>, DRB <dbl>, TRB <dbl>, AST <dbl>,
## # STL <dbl>, BLK <dbl>, TOV <dbl>, PF <dbl>, PTS <dbl>, `Trp-Dbl` <dbl>,
## # Awards <chr>, Team <chr>, Won_award <dbl>, PRA <dbl>, STOCKS <dbl>,
## # Pos <chr>, Conference <chr>, Conference_binary <dbl>
# Scatterplot: PRA vs STOCKS by Conference
ggplot(all_teams, aes(x = PRA, y = STOCKS, color = Conference)) +
geom_point(size = 3) +
labs(
title = "Offensive vs. Defensive Performance by Conference",
x = "Points + Rebounds + Assists (PRA)",
y = "Steals + Blocks (STOCKS)",
color = "Conference"
) +
theme_minimal()

# Second visualization: average PRA by Conference
ggplot(all_teams, aes(x = Conference, y = PRA, fill = Conference)) +
geom_boxplot() +
labs(
title = "Distribution of PRA by Conference",
x = "Conference",
y = "PRA (Offensive Metric)"
) +
theme_minimal()

# Point-biserial correlations (Conference vs PRA and STOCKS)
cor.test(all_teams$Conference_binary, all_teams$PRA)
##
## Pearson's product-moment correlation
##
## data: all_teams$Conference_binary and all_teams$PRA
## t = -1.8195, df = 650, p-value = 0.0693
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.147164250 0.005629906
## sample estimates:
## cor
## -0.07118475
cor.test(all_teams$Conference_binary, all_teams$STOCKS)
##
## Pearson's product-moment correlation
##
## data: all_teams$Conference_binary and all_teams$STOCKS
## t = -2.094, df = 650, p-value = 0.03665
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.157650363 -0.005105577
## sample estimates:
## cor
## -0.08185737
# Create correlation matrix for Age, PRA, and STOCKS
corr_data <- all_teams %>%
dplyr::select(Age, PRA, STOCKS) %>%
na.omit()
# Compute and visualize correlation matrix
corr_matrix <- cor(corr_data)
ggcorrplot(corr_matrix, lab = TRUE, title = "Correlation Matrix: Age, PRA, and STOCKS")
## Warning: `aes_string()` was deprecated in ggplot2 3.0.0.
## ℹ Please use tidy evaluation idioms with `aes()`.
## ℹ See also `vignette("ggplot2-in-packages")` for more information.
## ℹ The deprecated feature was likely used in the ggcorrplot package.
## Please report the issue at <https://github.com/kassambara/ggcorrplot/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

##
## **To:** NBA Commissioner Adam Silver
## **From:** Emma Valentina Tupone
## **Subject:** Summary of 2024-2025 Team Performance Analysis
##
## Our analysis of team performance metrics shows a moderate positive relationship between offensive output (Points + Rebounds + Assists, PRA) and defensive activity (Steals + Blocks, STOCKS).
## Teams that performed well offensively tended to also perform well defensively.
##
## Overall, differences between Eastern and Western Conference teams were minor. There was also no strong evidence that one conference consistently outperformed the other across these measures.
##
## When focusing on player Age, the relationship between PRA and STOCKS remained positive. This suggests that team performance links are not primarily driven by player experience.
##
## **Limitation:** The data represents team-level totals rather than individual player-level performance, limiting insight into specific player contributions.
## **Next Step:** Future analyses could include player efficiency ratings or advanced metrics such as plus-minus to better understand what drives elite performances across teams.