nba_historical = read_csv("https://raw.githubusercontent.com/fivethirtyeight/nba-player-advanced-metrics/refs/heads/master/nba-data-historical.csv", na = c("", "NA", "NULL"))
## Rows: 28179 Columns: 42
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (6): player_id, name_common, type, team_id, pos, franch_id
## dbl (36): year_id, age, tmRtg, G, Min, MP%, MPG, P/36, TS%, A/36, R/36, SB/3...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(dplyr)
library(ggplot2)
nba_historical %>%
filter(year_id >= 2016L & year_id <= 2016L) %>%
filter(Min >= 96) %>%
ggplot() +
aes(x = age, y = ORtg) +
geom_jitter(size = .5) +
theme_minimal() +
geom_smooth(method = lm) +
facet_wrap(vars(pos))
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 124 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 124 rows containing missing values or values outside the scale range
## (`geom_point()`).

library(dplyr)
library(ggplot2)
nba_historical %>%
filter(year_id >= 2016L & year_id <= 2016L) %>%
filter(ORtg >= 50 & ORtg <= 150) %>%
filter(Min >= 96) %>%
ggplot() +
aes(x = age, y = ORtg) +
geom_jitter(size = .5) +
theme_minimal() +
geom_smooth(method = lm) +
facet_wrap(vars(pos))
## `geom_smooth()` using formula = 'y ~ x'

# Calculate correlation between age and ORtg by position for the 2016 dataset
correlations_by_position_ortg <- nba_historical %>%
filter(year_id == 2016L) %>%
filter(ORtg >= 50 & ORtg <= 150) %>%
filter(Min >= 96) %>%
group_by(pos) %>%
summarize(
correlation_age_ortg = cor(age, ORtg, use = "complete.obs"),
.groups = "drop" # Optional: drops the grouping for a cleaner result
)
print(correlations_by_position_ortg)
## # A tibble: 5 × 2
## pos correlation_age_ortg
## <chr> <dbl>
## 1 C 0.0450
## 2 PF 0.0244
## 3 PG 0.330
## 4 SF 0.116
## 5 SG 0.169
nba_historical %>%
filter(year_id >= 2016L & year_id <= 2016L) %>%
filter(ORtg >= 50 & ORtg <= 150) %>%
filter(Min >= 96) %>%
filter(pos == "PG") %>%
permTestCor(ORtg ~ age, data = .)
##
## ** Permutation test **
##
## Permutation test with alternative: two.sided
## Observed correlation between age , ORtg : 0.3301
## Mean of permutation distribution: -0.00018
## Standard error of permutation distribution: 0.10152
## P-value: 0.001
##
## *-------------*

nba_historical %>%
filter(year_id >= 2016L & year_id <= 2016L) %>%
filter(ORtg >= 50 & ORtg <= 150) %>%
filter(Min >= 96) %>%
filter(pos == "SG") %>%
permTestCor(ORtg ~ age, data = .)
##
## ** Permutation test **
##
## Permutation test with alternative: two.sided
## Observed correlation between age , ORtg : 0.1687
## Mean of permutation distribution: 0.00213
## Standard error of permutation distribution: 0.09976
## P-value: 0.089
##
## *-------------*

nba_historical %>%
filter(year_id >= 2016L & year_id <= 2016L) %>%
filter(ORtg >= 50 & ORtg <= 150) %>%
filter(Min >= 96) %>%
filter(pos == "SF") %>%
permTestCor(ORtg ~ age, data = .)
##
## ** Permutation test **
##
## Permutation test with alternative: two.sided
## Observed correlation between age , ORtg : 0.1164
## Mean of permutation distribution: -0.00422
## Standard error of permutation distribution: 0.10663
## P-value: 0.279
##
## *-------------*

nba_historical %>%
filter(year_id >= 2016L & year_id <= 2016L) %>%
filter(ORtg >= 50 & ORtg <= 150) %>%
filter(Min >= 96) %>%
filter(pos == "PF") %>%
permTestCor(ORtg ~ age, data = .)
##
## ** Permutation test **
##
## Permutation test with alternative: two.sided
## Observed correlation between age , ORtg : 0.0244
## Mean of permutation distribution: 0.00537
## Standard error of permutation distribution: 0.10322
## P-value: 0.824
##
## *-------------*

nba_historical %>%
filter(year_id >= 2016L & year_id <= 2016L) %>%
filter(ORtg >= 50 & ORtg <= 150) %>%
filter(Min >= 96) %>%
filter(pos == "C") %>%
permTestCor(ORtg ~ age, data = .)
##
## ** Permutation test **
##
## Permutation test with alternative: two.sided
## Observed correlation between age , ORtg : 0.045
## Mean of permutation distribution: 0.00124
## Standard error of permutation distribution: 0.10838
## P-value: 0.671
##
## *-------------*

library(dplyr)
library(ggplot2)
# Plot age vs PIE. by position for the year 2016
nba_historical %>%
filter(year_id >= 2016L & year_id <= 2016L) %>%
filter(Min >= 96) %>%
ggplot() +
aes(x = age, y = `PIE%` ) +
geom_jitter(size = .5) +
theme_minimal() +
geom_smooth(method = lm) +
facet_wrap(vars(pos))
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 124 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 124 rows containing missing values or values outside the scale range
## (`geom_point()`).

library(dplyr)
library(ggplot2)
# Plot age vs PIE. by position for the year 2016
nba_historical %>%
filter(year_id >= 2016L & year_id <= 2016L) %>%
filter(`PIE%` >= 0 & `PIE%` <= 25) %>%
filter(Min >= 96) %>%
ggplot() +
aes(x = age, y = `PIE%` ) +
geom_jitter(size = .5) +
theme_minimal() +
geom_smooth(method = lm) +
facet_wrap(vars(pos))
## `geom_smooth()` using formula = 'y ~ x'

# Calculate correlation between age and PIE. by position for the 2016 dataset
correlations_by_position_pie <- nba_historical %>%
filter(year_id == 2016L) %>%
filter(`PIE%` >= 0 & `PIE%` <= 25) %>%
filter(Min >= 96) %>%
group_by(pos) %>%
summarize(
correlation_age_pie = cor(age, `PIE%`, use = "complete.obs"),
.groups = "drop" # Optional: drops the grouping for a cleaner result
)
print(correlations_by_position_pie)
## # A tibble: 5 × 2
## pos correlation_age_pie
## <chr> <dbl>
## 1 C -0.160
## 2 PF 0.0468
## 3 PG 0.0120
## 4 SF -0.0103
## 5 SG 0.0774
nba_historical %>%
filter(year_id >= 2016L & year_id <= 2016L) %>%
filter(`PIE%` >= 0 & `PIE%` <= 25) %>%
filter(Min >= 96) %>%
filter(pos == "PG") %>%
permTestCor(`PIE%` ~ age, data = .)
##
## ** Permutation test **
##
## Permutation test with alternative: two.sided
## Observed correlation between age , PIE% : 0.012
## Mean of permutation distribution: 0.00755
## Standard error of permutation distribution: 0.10035
## P-value: 0.911
##
## *-------------*

nba_historical %>%
filter(year_id >= 2016L & year_id <= 2016L) %>%
filter(`PIE%` >= 0 & `PIE%` <= 25) %>%
filter(Min >= 96) %>%
filter(pos == "SG") %>%
permTestCor(`PIE%` ~ age, data = .)
##
## ** Permutation test **
##
## Permutation test with alternative: two.sided
## Observed correlation between age , PIE% : 0.0774
## Mean of permutation distribution: -0.00202
## Standard error of permutation distribution: 0.1117
## P-value: 0.49
##
## *-------------*

nba_historical %>%
filter(year_id >= 2016L & year_id <= 2016L) %>%
filter(`PIE%` >= 0 & `PIE%` <= 25) %>%
filter(Min >= 96) %>%
filter(pos == "SF") %>%
permTestCor(`PIE%` ~ age, data = .)
##
## ** Permutation test **
##
## Permutation test with alternative: two.sided
## Observed correlation between age , PIE% : -0.0103
## Mean of permutation distribution: -0.00079
## Standard error of permutation distribution: 0.1048
## P-value: 0.915
##
## *-------------*

nba_historical %>%
filter(year_id >= 2016L & year_id <= 2016L) %>%
filter(`PIE%` >= 0 & `PIE%` <= 25) %>%
filter(Min >= 96) %>%
filter(pos == "PF") %>%
permTestCor(`PIE%` ~ age, data = .)
##
## ** Permutation test **
##
## Permutation test with alternative: two.sided
## Observed correlation between age , PIE% : 0.0468
## Mean of permutation distribution: 0.00163
## Standard error of permutation distribution: 0.09993
## P-value: 0.652
##
## *-------------*

nba_historical %>%
filter(year_id >= 2016L & year_id <= 2016L) %>%
filter(`PIE%` >= 0 & `PIE%` <= 25) %>%
filter(Min >= 96) %>%
filter(pos == "C") %>%
permTestCor(`PIE%` ~ age, data = .)
##
## ** Permutation test **
##
## Permutation test with alternative: two.sided
## Observed correlation between age , PIE% : -0.1602
## Mean of permutation distribution: 0.00022
## Standard error of permutation distribution: 0.10234
## P-value: 0.124
##
## *-------------*

library(dplyr)
library(ggplot2)
#nooutlier
nba_historical %>%
filter(year_id >= 2016L & year_id <= 2016L) %>%
filter(Min >= 96) %>%
ggplot() +
aes(x = age, y = as.numeric(DRtg)) +
geom_jitter(size = .5) +
theme_minimal() +
geom_smooth(method = lm) +
facet_wrap(vars(pos))
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 124 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 124 rows containing missing values or values outside the scale range
## (`geom_point()`).

correlations_by_position_DRtg <- nba_historical %>%
filter(year_id == 2016L) %>%
filter(Min >= 96) %>%
group_by(pos) %>%
summarize(
correlation_age_drtg = cor(age, as.numeric(DRtg), use = "complete.obs"),
.groups = "drop" # Optional: drops the grouping for a cleaner result
)
print(correlations_by_position_DRtg)
## # A tibble: 5 × 2
## pos correlation_age_drtg
## <chr> <dbl>
## 1 C -0.0450
## 2 PF -0.216
## 3 PG 0.00556
## 4 SF 0.0246
## 5 SG -0.107
nba_historical %>%
filter(year_id >= 2016L & year_id <= 2016L) %>%
filter(Min >= 96) %>%
filter(pos == "PG") %>%
permTestCor(DRtg ~ age, data = .)
##
## 25 observation(s) removed due to missing values.
##
## ** Permutation test **
##
## Permutation test with alternative: two.sided
## Observed correlation between age , DRtg : 0.0056
## Mean of permutation distribution: -0.00401
## Standard error of permutation distribution: 0.10062
## P-value: 0.959
##
## *-------------*

nba_historical %>%
filter(year_id >= 2016L & year_id <= 2016L) %>%
filter(Min >= 96) %>%
filter(pos == "SG") %>%
permTestCor(DRtg ~ age, data = .)
##
## 31 observation(s) removed due to missing values.
##
## ** Permutation test **
##
## Permutation test with alternative: two.sided
## Observed correlation between age , DRtg : -0.1066
## Mean of permutation distribution: -0.00101
## Standard error of permutation distribution: 0.10676
## P-value: 0.317
##
## *-------------*

nba_historical %>%
filter(year_id >= 2016L & year_id <= 2016L) %>%
filter(Min >= 96) %>%
filter(pos == "SF") %>%
permTestCor(DRtg ~ age, data = .)
##
## 24 observation(s) removed due to missing values.
##
## ** Permutation test **
##
## Permutation test with alternative: two.sided
## Observed correlation between age , DRtg : 0.0246
## Mean of permutation distribution: -0.00562
## Standard error of permutation distribution: 0.10691
## P-value: 0.816
##
## *-------------*

nba_historical %>%
filter(year_id >= 2016L & year_id <= 2016L) %>%
filter(Min >= 96) %>%
filter(pos == "PF") %>%
permTestCor(DRtg ~ age, data = .)
##
## 23 observation(s) removed due to missing values.
##
## ** Permutation test **
##
## Permutation test with alternative: two.sided
## Observed correlation between age , DRtg : -0.2164
## Mean of permutation distribution: 0.0027
## Standard error of permutation distribution: 0.10506
## P-value: 0.038
##
## *-------------*

nba_historical %>%
filter(year_id >= 2016L & year_id <= 2016L) %>%
filter(Min >= 96) %>%
filter(pos == "C") %>%
permTestCor(DRtg ~ age, data = .)
##
## 21 observation(s) removed due to missing values.
##
## ** Permutation test **
##
## Permutation test with alternative: two.sided
## Observed correlation between age , DRtg : -0.045
## Mean of permutation distribution: -0.00097
## Standard error of permutation distribution: 0.10624
## P-value: 0.68
##
## *-------------*

library(dplyr)
library(ggplot2)
nba_historical %>%
filter(year_id >= 2016L & year_id <= 2016L) %>%
filter(Min >= 96) %>%
ggplot() +
aes(x = age, y = as.numeric(`TS%`)) +
geom_jitter(size = .5) +
theme_minimal() +
geom_smooth(method = lm) +
facet_wrap(vars(pos))
## `geom_smooth()` using formula = 'y ~ x'

correlations_by_position_TS <- nba_historical %>%
filter(year_id == 2016L) %>%
filter(Min >= 96) %>%
group_by(pos) %>%
summarize(
correlation_age_ts = cor(age, `TS%`, use = "complete.obs"),
.groups = "drop" # Optional: drops the grouping for a cleaner result
)
print(correlations_by_position_TS)
## # A tibble: 5 × 2
## pos correlation_age_ts
## <chr> <dbl>
## 1 C -0.00529
## 2 PF -0.0647
## 3 PG 0.269
## 4 SF 0.0172
## 5 SG 0.0874
nba_historical %>%
filter(year_id >= 2016L & year_id <= 2016L) %>%
filter(Min >= 96) %>%
filter(pos == "PG") %>%
permTestCor(`TS%` ~ age, data = .)
##
## ** Permutation test **
##
## Permutation test with alternative: two.sided
## Observed correlation between age , TS% : 0.2687
## Mean of permutation distribution: -0.00082
## Standard error of permutation distribution: 0.09316
## P-value: 0.004
##
## *-------------*

nba_historical %>%
filter(year_id >= 2016L & year_id <= 2016L) %>%
filter(Min >= 96) %>%
filter(pos == "SG") %>%
permTestCor(`TS%` ~ age, data = .)
##
## ** Permutation test **
##
## Permutation test with alternative: two.sided
## Observed correlation between age , TS% : 0.0874
## Mean of permutation distribution: -0.00084
## Standard error of permutation distribution: 0.09206
## P-value: 0.343
##
## *-------------*

nba_historical %>%
filter(year_id >= 2016L & year_id <= 2016L) %>%
filter(Min >= 96) %>%
filter(pos == "SF") %>%
permTestCor(`TS%` ~ age, data = .)
##
## ** Permutation test **
##
## Permutation test with alternative: two.sided
## Observed correlation between age , TS% : 0.0172
## Mean of permutation distribution: 0.00178
## Standard error of permutation distribution: 0.0915
## P-value: 0.863
##
## *-------------*

nba_historical %>%
filter(year_id >= 2016L & year_id <= 2016L) %>%
filter(Min >= 96) %>%
filter(pos == "PF") %>%
permTestCor(`TS%` ~ age, data = .)
##
## ** Permutation test **
##
## Permutation test with alternative: two.sided
## Observed correlation between age , TS% : -0.0647
## Mean of permutation distribution: 0.00428
## Standard error of permutation distribution: 0.08876
## P-value: 0.474
##
## *-------------*

nba_historical %>%
filter(year_id >= 2016L & year_id <= 2016L) %>%
filter(Min >= 96) %>%
filter(pos == "C") %>%
permTestCor(`TS%` ~ age, data = .)
##
## ** Permutation test **
##
## Permutation test with alternative: two.sided
## Observed correlation between age , TS% : -0.0053
## Mean of permutation distribution: -0.00139
## Standard error of permutation distribution: 0.09873
## P-value: 0.964
##
## *-------------*

library(dplyr)
library(ggplot2)
nba_historical %>%
filter(year_id >= 2016L & year_id <= 2016L) %>%
filter(Min >= 96) %>%
ggplot() +
aes(x = age, y = as.numeric(`2P%`)) +
geom_jitter(size = .5) +
theme_minimal() +
geom_smooth(method = lm) +
facet_wrap(vars(pos))
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 124 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 124 rows containing missing values or values outside the scale range
## (`geom_point()`).

# Calculate correlation between age and X2P. by position for the 2016 dataset
correlations_by_position_2p <- nba_historical %>%
filter(year_id == 2016L) %>%
filter(Min >= 96) %>%
group_by(pos) %>%
summarize(
correlation_age_2p = cor(age, `2P%`, use = "complete.obs"),
.groups = "drop" # Optional: drops the grouping for a cleaner result
)
print(correlations_by_position_2p)
## # A tibble: 5 × 2
## pos correlation_age_2p
## <chr> <dbl>
## 1 C -0.0268
## 2 PF 0.00364
## 3 PG 0.361
## 4 SF 0.00157
## 5 SG -0.0785
nba_historical %>%
filter(year_id >= 2016L & year_id <= 2016L) %>%
filter(Min >= 96) %>%
filter(pos == "PG") %>%
permTestCor(`2P%` ~ age, data = .)
##
## 25 observation(s) removed due to missing values.
##
## ** Permutation test **
##
## Permutation test with alternative: two.sided
## Observed correlation between age , 2P% : 0.3613
## Mean of permutation distribution: 0.00114
## Standard error of permutation distribution: 0.10756
## P-value: 0.002
##
## *-------------*

nba_historical %>%
filter(year_id >= 2016L & year_id <= 2016L) %>%
filter(Min >= 96) %>%
filter(pos == "SG") %>%
permTestCor(`2P%` ~ age, data = .)
##
## 31 observation(s) removed due to missing values.
##
## ** Permutation test **
##
## Permutation test with alternative: two.sided
## Observed correlation between age , 2P% : -0.0785
## Mean of permutation distribution: -0.00195
## Standard error of permutation distribution: 0.1078
## P-value: 0.476
##
## *-------------*

nba_historical %>%
filter(year_id >= 2016L & year_id <= 2016L) %>%
filter(Min >= 96) %>%
filter(pos == "SF") %>%
permTestCor(`2P%` ~ age, data = .)
##
## 24 observation(s) removed due to missing values.
##
## ** Permutation test **
##
## Permutation test with alternative: two.sided
## Observed correlation between age , 2P% : 0.0016
## Mean of permutation distribution: -0.0047
## Standard error of permutation distribution: 0.10833
## P-value: 0.989
##
## *-------------*

nba_historical %>%
filter(year_id >= 2016L & year_id <= 2016L) %>%
filter(Min >= 96) %>%
filter(pos == "PF") %>%
permTestCor(`2P%` ~ age, data = .)
##
## 23 observation(s) removed due to missing values.
##
## ** Permutation test **
##
## Permutation test with alternative: two.sided
## Observed correlation between age , 2P% : 0.0036
## Mean of permutation distribution: -0.00234
## Standard error of permutation distribution: 0.10531
## P-value: 0.978
##
## *-------------*

nba_historical %>%
filter(year_id >= 2016L & year_id <= 2016L) %>%
filter(Min >= 96) %>%
filter(pos == "C") %>%
permTestCor(`2P%` ~ age, data = .)
##
## 21 observation(s) removed due to missing values.
##
## ** Permutation test **
##
## Permutation test with alternative: two.sided
## Observed correlation between age , 2P% : -0.0268
## Mean of permutation distribution: -0.00349
## Standard error of permutation distribution: 0.10827
## P-value: 0.795
##
## *-------------*

library(dplyr)
library(ggplot2)
nba_historical %>%
filter(year_id >= 2016L & year_id <= 2016L) %>%
filter(Min >= 96) %>%
ggplot() +
aes(x = age, y = as.numeric(`3P%`)) +
geom_jitter(size = .5) +
theme_minimal() +
geom_smooth(method = lm) +
facet_wrap(vars(pos))
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 124 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 124 rows containing missing values or values outside the scale range
## (`geom_point()`).

library(dplyr)
library(ggplot2)
nba_historical %>%
filter(year_id >= 2016L & year_id <= 2016L) %>%
filter(`3P%` > 0 & `3P%` <= 45) %>%
filter(Min >= 96) %>%
ggplot() +
aes(x = age, y = as.numeric(`3P%`)) +
geom_jitter(size = .5) +
theme_minimal() +
geom_smooth(method = lm) +
facet_wrap(vars(pos))
## `geom_smooth()` using formula = 'y ~ x'

# Calculate correlation between age and X3P. by position for the 2016 dataset
correlations_by_position_3p <- nba_historical %>%
filter(year_id == 2016L) %>%
filter(`3P%` > 0 & `3P%` <= 45) %>%
filter(Min >= 96) %>%
group_by(pos) %>%
summarize(
correlation_age_x3p = cor(age, `3P%`, use = "complete.obs"),
.groups = "drop" # Optional: drops the grouping for a cleaner result
)
print(correlations_by_position_3p)
## # A tibble: 5 × 2
## pos correlation_age_x3p
## <chr> <dbl>
## 1 C 0.160
## 2 PF 0.116
## 3 PG 0.124
## 4 SF 0.0390
## 5 SG 0.0443
nba_historical %>%
filter(year_id == 2016L) %>%
filter(`3P%` > 0 & `3P%` <= 45) %>%
filter(Min >= 96) %>%
filter(pos == "PG") %>%
permTestCor(`3P%` ~ age, data = .)
##
## ** Permutation test **
##
## Permutation test with alternative: two.sided
## Observed correlation between age , 3P% : 0.1239
## Mean of permutation distribution: -0.00051
## Standard error of permutation distribution: 0.10473
## P-value: 0.246
##
## *-------------*

nba_historical %>%
filter(year_id == 2016L) %>%
filter(`3P%` > 0 & `3P%` <= 45) %>%
filter(Min >= 96) %>%
filter(pos == "SG") %>%
permTestCor(`3P%` ~ age, data = .)
##
## ** Permutation test **
##
## Permutation test with alternative: two.sided
## Observed correlation between age , 3P% : 0.0443
## Mean of permutation distribution: -0.00173
## Standard error of permutation distribution: 0.10397
## P-value: 0.668
##
## *-------------*

nba_historical %>%
filter(year_id == 2016L) %>%
filter(`3P%` > 0 & `3P%` <= 45) %>%
filter(Min >= 96) %>%
filter(pos == "SF") %>%
permTestCor(`3P%` ~ age, data = .)
##
## ** Permutation test **
##
## Permutation test with alternative: two.sided
## Observed correlation between age , 3P% : 0.039
## Mean of permutation distribution: -0.00091
## Standard error of permutation distribution: 0.10504
## P-value: 0.711
##
## *-------------*

nba_historical %>%
filter(year_id == 2016L) %>%
filter(`3P%` > 0 & `3P%` <= 45) %>%
filter(Min >= 96) %>%
filter(pos == "PF") %>%
permTestCor(`3P%` ~ age, data = .)
##
## ** Permutation test **
##
## Permutation test with alternative: two.sided
## Observed correlation between age , 3P% : 0.1157
## Mean of permutation distribution: 0.00109
## Standard error of permutation distribution: 0.11505
## P-value: 0.324
##
## *-------------*

nba_historical %>%
filter(year_id == 2016L) %>%
filter(`3P%` > 0 & `3P%` <= 45) %>%
filter(Min >= 96) %>%
filter(pos == "C") %>%
permTestCor(`3P%` ~ age, data = .)
##
## ** Permutation test **
##
## Permutation test with alternative: two.sided
## Observed correlation between age , 3P% : 0.1602
## Mean of permutation distribution: 0.00508
## Standard error of permutation distribution: 0.19157
## P-value: 0.429
##
## *-------------*
