# Load necessary libraries for data manipulation and reading
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(readr)
library(ggplot2)
# Load your dataset from CSV
# Make sure the file path is correct (it should be in your working directory or full path provided)
nba <- read_csv("DATA 607 Final Project Dataset NBA Stats.csv")
## Rows: 745 Columns: 11
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): TeamName
## dbl (10): Season, GP, WINS, LOSSES, WinPCT, FG3A_per_game, FG3M_per_game, FG...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
nba1 <- nba
nba <- nba %>%
mutate(
# Win margin = total wins minus total losses
WinMargin = WINS - LOSSES,
# 3-point make rate = total made 3s divided by total attempted 3s
# Should be approximately equal to the FG3_PCT column
FG3M_Rate = FG3M_total / FG3A_total,
# 3-point makes per win: shows how reliant a team was on 3s for its victories
FG3M_per_Win = ifelse(WINS > 0, FG3M_total / WINS, NA) # avoid divide-by-zero
)
# 2. Compute Relative-to-League-Average Metrics
# These show how each team compares to the league average in that same season
nba <- nba %>%
group_by(Season) %>%
mutate(
# Difference between a team's total 3PA and the league average that year
FG3A_rel_to_avg = FG3A_total - mean(FG3A_total, na.rm = TRUE),
# Difference in 3P% from the league average
FG3_PCT_rel_to_avg = FG3_PCT - mean(FG3_PCT, na.rm = TRUE),
# Difference in win % from league average that year
WinPCT_rel_to_avg = WinPCT - mean(WinPCT, na.rm = TRUE)
) %>%
ungroup()
# 3. Year-over-Year Changes per Team
# This helps analyze how teams are evolving over time
nba <- nba %>%
arrange(TeamName, Season) %>% # Ensure rows are sorted correctly
group_by(TeamName) %>%
mutate(
# Change in 3PA total from last season
FG3A_total_change = FG3A_total - lag(FG3A_total),
# Change in win % from last season
WinPCT_change = WinPCT - lag(WinPCT)
) %>%
ungroup()
# 4. Add Era Classification Buckets
# Manually classify NBA seasons into eras based on analytics usage
nba <- nba %>%
mutate(
Era = case_when(
Season <= 2009 ~ "Pre-Analytics", # Before modern 3-point explosion
Season <= 2014 ~ "Adoption", # Growing adoption of analytics
TRUE ~ "Revolution" # Full embrace of 3-point-focused strategy
)
)
# 5. Preview the Transformed Data
# Gives a summary of column names, types, and example values
glimpse(nba)
## Rows: 745
## Columns: 20
## $ TeamName <chr> "Atlanta Hawks", "Atlanta Hawks", "Atlanta Hawks", …
## $ Season <dbl> 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 200…
## $ GP <dbl> 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 66,…
## $ WINS <dbl> 28, 25, 33, 35, 28, 13, 26, 30, 37, 47, 53, 44, 40,…
## $ LOSSES <dbl> 54, 57, 49, 47, 54, 69, 56, 52, 45, 35, 29, 38, 26,…
## $ WinPCT <dbl> 0.341, 0.305, 0.402, 0.427, 0.341, 0.159, 0.317, 0.…
## $ FG3A_per_game <dbl> 814, 933, 1194, 1141, 1249, 973, 1154, 1038, 1078, …
## $ FG3M_per_game <dbl> 258, 333, 423, 402, 419, 304, 424, 341, 384, 597, 5…
## $ FG3_PCT <dbl> 0.317, 0.357, 0.354, 0.352, 0.335, 0.312, 0.367, 0.…
## $ FG3A_total <dbl> 66748, 76506, 97908, 93562, 102418, 79786, 94628, 8…
## $ FG3M_total <dbl> 21156, 27306, 34686, 32964, 34358, 24928, 34768, 27…
## $ WinMargin <dbl> -26, -32, -16, -12, -26, -56, -30, -22, -8, 12, 24,…
## $ FG3M_Rate <dbl> 0.3169533, 0.3569132, 0.3542714, 0.3523225, 0.33546…
## $ FG3M_per_Win <dbl> 755.5714, 1092.2400, 1051.0909, 941.8286, 1227.0714…
## $ FG3A_rel_to_avg <dbl> -25470.897, -15664.828, -1266.759, -5154.690, 2061.…
## $ FG3_PCT_rel_to_avg <dbl> -0.0358965517, 0.0051724138, 0.0022413793, 0.005379…
## $ WinPCT_rel_to_avg <dbl> -1.590690e-01, -1.949655e-01, -9.800000e-02, -7.300…
## $ FG3A_total_change <dbl> NA, 9758, 21402, -4346, 8856, -22632, 14842, -9512,…
## $ WinPCT_change <dbl> NA, -0.036, 0.097, 0.025, -0.086, -0.182, 0.158, 0.…
## $ Era <chr> "Pre-Analytics", "Pre-Analytics", "Pre-Analytics", …
names(nba)
## [1] "TeamName" "Season" "GP"
## [4] "WINS" "LOSSES" "WinPCT"
## [7] "FG3A_per_game" "FG3M_per_game" "FG3_PCT"
## [10] "FG3A_total" "FG3M_total" "WinMargin"
## [13] "FG3M_Rate" "FG3M_per_Win" "FG3A_rel_to_avg"
## [16] "FG3_PCT_rel_to_avg" "WinPCT_rel_to_avg" "FG3A_total_change"
## [19] "WinPCT_change" "Era"
#View(nba)
nba_summary <- nba %>%
group_by(Season) %>%
summarise(
avg_FG3A = mean(FG3A_total),
avg_FG3PCT = mean(FG3_PCT),
avg_WinPCT = mean(WinPCT)
)
model <- lm(WinPCT ~ FG3A_total + FG3_PCT, data = nba)
summary(model)
##
## Call:
## lm(formula = WinPCT ~ FG3A_total + FG3_PCT, data = nba)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.37262 -0.09553 0.00669 0.09831 0.41890
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -8.827e-01 8.964e-02 -9.847 <2e-16 ***
## FG3A_total -4.858e-08 9.253e-08 -0.525 0.6
## FG3_PCT 3.906e+00 2.589e-01 15.085 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1299 on 742 degrees of freedom
## Multiple R-squared: 0.2447, Adjusted R-squared: 0.2427
## F-statistic: 120.2 on 2 and 742 DF, p-value: < 2.2e-16
ggplot(nba_summary, aes(x = Season)) +
geom_line(aes(y = avg_FG3A), color = "blue") +
geom_line(aes(y = avg_FG3PCT * 1000), color = "red") + # scaled for display
labs(title = "League-Wide 3-Point Trends", y = "Value", x = "Season")

ggplot(nba, aes(x = FG3A_total, y = WinPCT)) +
geom_point(alpha = 0.5) +
geom_smooth(method = "lm", color = "red") +
labs(title = "3-Point Attempts vs. Win Percentage",
x = "Total 3PA",
y = "Winning Percentage")
## `geom_smooth()` using formula = 'y ~ x'
