library(readxl)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 4.0.0 ✔ tibble 3.3.0
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.1.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
NBA_teams<- read_xlsx("NBA Team Total Data 2024-2025.xlsx")
View(NBA_teams)
loading_teams<- function(file_name,team_name,sheet_name,PRA, Stocks){
team_data<- read_xlsx(file_name, sheet=sheet_name)
team_data$Team<- team_name
team_data$Sheet<- sheet_name
team_data$PRA<- rowSums(team_data[, c("PTS", "ORB", "AST")], na.rm=TRUE)
team_data$Stocks<- rowSums(team_data[, c("STL", "BLK")], na.rm=TRUE)
team_data$Won_award<-ifelse(is.na(team_data$Awards),"0","1")
return(team_data)
}
team_warrior<- loading_teams("NBA Team Total Data 2024-2025.xlsx", "Warriors", "Warriors", "PRA", "Stocks")
team_warrior
## # A tibble: 23 × 35
## Rk Player Age G GS MP FG FGA `FG%` `3P` `3PA` `3P%`
## <dbl> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1 Stephen Cu… 36 70 70 2252 564 1258 0.448 311 784 0.397
## 2 2 Draymond G… 34 68 66 1983 216 509 0.424 80 246 0.325
## 3 3 Buddy Hield 32 82 22 1863 328 786 0.417 203 549 0.37
## 4 4 Brandin Po… 21 64 33 1716 280 629 0.445 115 309 0.372
## 5 5 Moses Moody 22 74 34 1649 246 568 0.433 126 337 0.374
## 6 6 Andrew Wig… 29 43 43 1296 261 588 0.444 94 248 0.379
## 7 7 Jonathan K… 22 47 10 1144 258 568 0.454 46 151 0.305
## 8 8 Kevon Loon… 28 76 6 1142 143 278 0.514 2 5 0.4
## 9 9 Jimmy Butl… 35 30 30 980 159 334 0.476 19 68 0.279
## 10 10 Trayce Jac… 24 62 37 967 174 302 0.576 0 3 0
## # ℹ 13 more rows
## # ℹ 23 more variables: `2P` <dbl>, `2PA` <dbl>, `2P%` <dbl>, `eFG%` <dbl>,
## # FT <dbl>, FTA <dbl>, `FT%` <dbl>, ORB <dbl>, DRB <dbl>, TRB <dbl>,
## # AST <dbl>, STL <dbl>, BLK <dbl>, TOV <dbl>, PF <dbl>, PTS <dbl>,
## # `Trp-Dbl` <dbl>, Awards <chr>, Team <chr>, Sheet <chr>, PRA <dbl>,
## # Stocks <dbl>, Won_award <chr>
getwd()
## [1] "/Users/crystaladote/Downloads/Reproducible Psyc Fall 2025"
path<- "/Users/crystaladote/Downloads/Reproducible Psyc Fall 2025/NBA Team Total Data 2024-2025.xlsx"
file.exists(path)
## [1] TRUE
team_sheets<- excel_sheets(path)
team_sheets
## [1] "Nets" "Knicks" "Raptors" "Philly" "Celtics"
## [6] "Timberwolves" "Thunder" "Jazz" "Trailblazers" "Nuggets"
## [11] "Bulls" "Bucks" "Cavaliers" "Pistons" "Pacers"
## [16] "Warriors" "Suns" "Lakers" "Clippers" "Kings"
## [21] "Hornets" "Magic" "Wizards" "Hawks" "Heat"
## [26] "Grizzles" "Spurs" "Pelicans" "Rockets" "Mavericks"
file_name <- "/Users/crystaladote/Downloads/Reproducible Psyc Fall 2025/NBA Team Total Data 2024-2025.xlsx"
team_sheets <- excel_sheets(file_name)
all_teams <- bind_rows(
lapply(team_sheets, function(sheet_name) {
loading_teams(file_name = file_name, team_name = sheet_name, sheet_name = sheet_name)
})
)
View(all_teams)
team_conference<- read_excel("Team Conferences data.xlsx")
View(team_conference)
full_team_data<- merge(all_teams, team_conference)
View(full_team_data)
full_team_data<- full_team_data %>%
mutate(
Conference= case_when(
Conference== "East" ~ 1,
Conference== "West" ~ 0
)
)
library(ggplot2)
ggplot(full_team_data, aes(x=PRA, y=Stocks, color=Conference))+
geom_point()+
labs(
title= "Relationship between PRA and Stocks",
x= "PRA",
y= "Stocks"
)
This scatter plot shows us that for both conferences (East and West), there is a positive relationship between PRA and Stocks.
ggplot(full_team_data, aes(x=DRB, y=STL, color= Conference)) +
geom_point()+
labs(
title= "Relationship between Defensive Rebounds and Steals",
x= "Defensive Rebounds",
y= "Steals"
)
The scatter plot shows us that there is a positive relationship with defensive rebounds and steals for both East and West.
cor.test(full_team_data$PRA, full_team_data$Conference)
##
## Pearson's product-moment correlation
##
## data: full_team_data$PRA and full_team_data$Conference
## t = -1.7941, df = 650, p-value = 0.07325
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.146194514 0.006620927
## sample estimates:
## cor
## -0.07019864
There is a negative, weak correlation between PRA and Conference (-0.070). This correlation is not statistically significant, given that it has a p-value of 0.073.
cor.test(full_team_data$Stocks, full_team_data$Conference)
##
## Pearson's product-moment correlation
##
## data: full_team_data$Stocks and full_team_data$Conference
## t = -2.094, df = 650, p-value = 0.03665
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.157650363 -0.005105577
## sample estimates:
## cor
## -0.08185737
There is a negative,weak correlation between Stocks and Conference (-0.0818). However, this correlation is statistically significant, with a p-value of 0.036.
library(ggcorrplot)
stats_matrix<- full_team_data %>% dplyr::select(Age, PRA, Stocks)
APS_matrix<- cor(stats_matrix, use="pairwise.complete.obs")
APS_matrix
## Age PRA Stocks
## Age 1.00000000 0.1246811 0.07734898
## PRA 0.12468112 1.0000000 0.81779753
## Stocks 0.07734898 0.8177975 1.00000000
ggcorrplot(APS_matrix, lab=TRUE, type="lower")+
labs(title="Correlation Matrix: Age, PRA, and Stocks")
## Warning: `aes_string()` was deprecated in ggplot2 3.0.0.
## ℹ Please use tidy evaluation idioms with `aes()`.
## ℹ See also `vignette("ggplot2-in-packages")` for more information.
## ℹ The deprecated feature was likely used in the ggcorrplot package.
## Please report the issue at <https://github.com/kassambara/ggcorrplot/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
The relationship between PRA and Stocks is the strongest, with a positive correlation of 0.82. It is the closest out of all 3 correlations to 1.
library(ppcor)
## Loading required package: MASS
##
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
##
## select
pcor.test(full_team_data$PRA, full_team_data$Stocks, full_team_data$Age)
## estimate p.value statistic n gp Method
## 1 0.8169568 2.748291e-157 36.0888 652 1 pearson
Controlling for the variable Age conveys that there is a strong positive correlation between PRA and Stocks (0.81695). Meaning that age doesn’t much effect on those 2 variables. It also shows us that it is statistically significant due to the p-value.
To Mr. Silver,
According to the findings, and correlations there doesn’t appear to be a difference between the East and West teams, especially regarding PRA (points, rebounds, and assists) and Stocks (steals and blocks). We also saw that there is a strong, positive correlation between PRA and Stocks when Age was controlled, conveying that age doesn’t have an effect on either variables. One scatter plot showed the relationship between defensive rebounds and steals conveying that offensive and defensive performances tend to move together. The scatter plot conveys a positive, somewhat strong correlation between the two variables. One potential next step for when analyzing this data could be to look at the relationship between the 2-point and 3-point averages and possibly field goal average. One limitation of my analysis would be that the weak correlations that were observed could possibly be due to other factors that aren’t in the data.