Loading and Preparing the Data

library(readxl)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   4.0.0     ✔ tibble    3.3.0
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.1.0     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
NBA_teams<- read_xlsx("NBA Team Total Data 2024-2025.xlsx")
View(NBA_teams)

loading_teams<- function(file_name,team_name,sheet_name,PRA, Stocks){
  team_data<- read_xlsx(file_name, sheet=sheet_name)
  team_data$Team<- team_name
  team_data$Sheet<- sheet_name
  team_data$PRA<- rowSums(team_data[, c("PTS", "ORB", "AST")], na.rm=TRUE)
  team_data$Stocks<- rowSums(team_data[, c("STL", "BLK")], na.rm=TRUE)
  team_data$Won_award<-ifelse(is.na(team_data$Awards),"0","1")
  return(team_data)
}
team_warrior<- loading_teams("NBA Team Total Data 2024-2025.xlsx", "Warriors", "Warriors", "PRA", "Stocks")
team_warrior
## # A tibble: 23 × 35
##       Rk Player        Age     G    GS    MP    FG   FGA `FG%`  `3P` `3PA` `3P%`
##    <dbl> <chr>       <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
##  1     1 Stephen Cu…    36    70    70  2252   564  1258 0.448   311   784 0.397
##  2     2 Draymond G…    34    68    66  1983   216   509 0.424    80   246 0.325
##  3     3 Buddy Hield    32    82    22  1863   328   786 0.417   203   549 0.37 
##  4     4 Brandin Po…    21    64    33  1716   280   629 0.445   115   309 0.372
##  5     5 Moses Moody    22    74    34  1649   246   568 0.433   126   337 0.374
##  6     6 Andrew Wig…    29    43    43  1296   261   588 0.444    94   248 0.379
##  7     7 Jonathan K…    22    47    10  1144   258   568 0.454    46   151 0.305
##  8     8 Kevon Loon…    28    76     6  1142   143   278 0.514     2     5 0.4  
##  9     9 Jimmy Butl…    35    30    30   980   159   334 0.476    19    68 0.279
## 10    10 Trayce Jac…    24    62    37   967   174   302 0.576     0     3 0    
## # ℹ 13 more rows
## # ℹ 23 more variables: `2P` <dbl>, `2PA` <dbl>, `2P%` <dbl>, `eFG%` <dbl>,
## #   FT <dbl>, FTA <dbl>, `FT%` <dbl>, ORB <dbl>, DRB <dbl>, TRB <dbl>,
## #   AST <dbl>, STL <dbl>, BLK <dbl>, TOV <dbl>, PF <dbl>, PTS <dbl>,
## #   `Trp-Dbl` <dbl>, Awards <chr>, Team <chr>, Sheet <chr>, PRA <dbl>,
## #   Stocks <dbl>, Won_award <chr>
getwd()
## [1] "/Users/crystaladote/Downloads/Reproducible Psyc Fall 2025"
path<- "/Users/crystaladote/Downloads/Reproducible Psyc Fall 2025/NBA Team Total Data 2024-2025.xlsx"
file.exists(path)
## [1] TRUE
team_sheets<- excel_sheets(path)
team_sheets
##  [1] "Nets"         "Knicks"       "Raptors"      "Philly"       "Celtics"     
##  [6] "Timberwolves" "Thunder"      "Jazz"         "Trailblazers" "Nuggets"     
## [11] "Bulls"        "Bucks"        "Cavaliers"    "Pistons"      "Pacers"      
## [16] "Warriors"     "Suns"         "Lakers"       "Clippers"     "Kings"       
## [21] "Hornets"      "Magic"        "Wizards"      "Hawks"        "Heat"        
## [26] "Grizzles"     "Spurs"        "Pelicans"     "Rockets"      "Mavericks"
file_name <- "/Users/crystaladote/Downloads/Reproducible Psyc Fall 2025/NBA Team Total Data 2024-2025.xlsx"
team_sheets <- excel_sheets(file_name)

all_teams <- bind_rows(
  lapply(team_sheets, function(sheet_name) {
    loading_teams(file_name = file_name, team_name = sheet_name, sheet_name = sheet_name)
  })
)
View(all_teams)

Adding Conference Information

team_conference<- read_excel("Team Conferences data.xlsx")
View(team_conference)
full_team_data<- merge(all_teams, team_conference)
View(full_team_data)
full_team_data<- full_team_data %>% 
  mutate(
    Conference= case_when(
     Conference== "East" ~ 1,
     Conference== "West" ~ 0
    )
  )

Visual Exploration

library(ggplot2)
ggplot(full_team_data, aes(x=PRA, y=Stocks, color=Conference))+
  geom_point()+
  labs(
    title= "Relationship between PRA and Stocks",
    x= "PRA",
    y= "Stocks"
  )

This scatter plot shows us that for both conferences (East and West), there is a positive relationship between PRA and Stocks.

ggplot(full_team_data, aes(x=DRB, y=STL, color= Conference)) +
  geom_point()+
  labs(
    title= "Relationship between Defensive Rebounds and Steals",
    x= "Defensive Rebounds",
    y= "Steals"
  )

The scatter plot shows us that there is a positive relationship with defensive rebounds and steals for both East and West.

Correlation Analysis

cor.test(full_team_data$PRA, full_team_data$Conference)
## 
##  Pearson's product-moment correlation
## 
## data:  full_team_data$PRA and full_team_data$Conference
## t = -1.7941, df = 650, p-value = 0.07325
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.146194514  0.006620927
## sample estimates:
##         cor 
## -0.07019864

There is a negative, weak correlation between PRA and Conference (-0.070). This correlation is not statistically significant, given that it has a p-value of 0.073.

cor.test(full_team_data$Stocks, full_team_data$Conference)
## 
##  Pearson's product-moment correlation
## 
## data:  full_team_data$Stocks and full_team_data$Conference
## t = -2.094, df = 650, p-value = 0.03665
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.157650363 -0.005105577
## sample estimates:
##         cor 
## -0.08185737

There is a negative,weak correlation between Stocks and Conference (-0.0818). However, this correlation is statistically significant, with a p-value of 0.036.

library(ggcorrplot)
stats_matrix<- full_team_data %>% dplyr::select(Age, PRA, Stocks)
APS_matrix<- cor(stats_matrix, use="pairwise.complete.obs")
APS_matrix
##               Age       PRA     Stocks
## Age    1.00000000 0.1246811 0.07734898
## PRA    0.12468112 1.0000000 0.81779753
## Stocks 0.07734898 0.8177975 1.00000000
ggcorrplot(APS_matrix, lab=TRUE, type="lower")+
  labs(title="Correlation Matrix: Age, PRA, and Stocks")
## Warning: `aes_string()` was deprecated in ggplot2 3.0.0.
## ℹ Please use tidy evaluation idioms with `aes()`.
## ℹ See also `vignette("ggplot2-in-packages")` for more information.
## ℹ The deprecated feature was likely used in the ggcorrplot package.
##   Please report the issue at <https://github.com/kassambara/ggcorrplot/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

The relationship between PRA and Stocks is the strongest, with a positive correlation of 0.82. It is the closest out of all 3 correlations to 1.

library(ppcor)
## Loading required package: MASS
## 
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
## 
##     select
pcor.test(full_team_data$PRA, full_team_data$Stocks, full_team_data$Age)
##    estimate       p.value statistic   n gp  Method
## 1 0.8169568 2.748291e-157   36.0888 652  1 pearson

Controlling for the variable Age conveys that there is a strong positive correlation between PRA and Stocks (0.81695). Meaning that age doesn’t much effect on those 2 variables. It also shows us that it is statistically significant due to the p-value.

Findings

To Mr. Silver,

According to the findings, and correlations there doesn’t appear to be a difference between the East and West teams, especially regarding PRA (points, rebounds, and assists) and Stocks (steals and blocks). We also saw that there is a strong, positive correlation between PRA and Stocks when Age was controlled, conveying that age doesn’t have an effect on either variables. One scatter plot showed the relationship between defensive rebounds and steals conveying that offensive and defensive performances tend to move together. The scatter plot conveys a positive, somewhat strong correlation between the two variables. One potential next step for when analyzing this data could be to look at the relationship between the 2-point and 3-point averages and possibly field goal average. One limitation of my analysis would be that the weak correlations that were observed could possibly be due to other factors that aren’t in the data.