# Load necessary libraries for data manipulation and reading
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(readr)
library(ggplot2)

# Load your dataset from CSV
# Make sure the file path is correct (it should be in your working directory or full path provided)
nba <- read_csv("DATA 607 Final Project Dataset NBA Stats.csv")
## Rows: 745 Columns: 11
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (1): TeamName
## dbl (10): Season, GP, WINS, LOSSES, WinPCT, FG3A_per_game, FG3M_per_game, FG...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
nba1 <- nba

nba <- nba %>%
  mutate(
    # Win margin = total wins minus total losses
    WinMargin = WINS - LOSSES,
    
    # 3-point make rate = total made 3s divided by total attempted 3s
    # Should be approximately equal to the FG3_PCT column
    FG3M_Rate = FG3M_total / FG3A_total,
    
    # 3-point makes per win: shows how reliant a team was on 3s for its victories
    FG3M_per_Win = ifelse(WINS > 0, FG3M_total / WINS, NA)  # avoid divide-by-zero
  )


# 2. Compute Relative-to-League-Average Metrics
# These show how each team compares to the league average in that same season
nba <- nba %>%
  group_by(Season) %>%
  mutate(
    # Difference between a team's total 3PA and the league average that year
    FG3A_rel_to_avg = FG3A_total - mean(FG3A_total, na.rm = TRUE),
    
    # Difference in 3P% from the league average
    FG3_PCT_rel_to_avg = FG3_PCT - mean(FG3_PCT, na.rm = TRUE),
    
    # Difference in win % from league average that year
    WinPCT_rel_to_avg = WinPCT - mean(WinPCT, na.rm = TRUE)
  ) %>%
  ungroup()

# 3. Year-over-Year Changes per Team
# This helps analyze how teams are evolving over time
nba <- nba %>%
  arrange(TeamName, Season) %>%  # Ensure rows are sorted correctly
  group_by(TeamName) %>%
  mutate(
    # Change in 3PA total from last season
    FG3A_total_change = FG3A_total - lag(FG3A_total),
    
    # Change in win % from last season
    WinPCT_change = WinPCT - lag(WinPCT)
  ) %>%
  ungroup()

# 4. Add Era Classification Buckets
# Manually classify NBA seasons into eras based on analytics usage
nba <- nba %>%
  mutate(
    Era = case_when(
      Season <= 2009 ~ "Pre-Analytics",   # Before modern 3-point explosion
      Season <= 2014 ~ "Adoption",        # Growing adoption of analytics
      TRUE ~ "Revolution"                 # Full embrace of 3-point-focused strategy
    )
  )

# 5. Preview the Transformed Data
# Gives a summary of column names, types, and example values
glimpse(nba)
## Rows: 745
## Columns: 20
## $ TeamName           <chr> "Atlanta Hawks", "Atlanta Hawks", "Atlanta Hawks", …
## $ Season             <dbl> 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 200…
## $ GP                 <dbl> 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 66,…
## $ WINS               <dbl> 28, 25, 33, 35, 28, 13, 26, 30, 37, 47, 53, 44, 40,…
## $ LOSSES             <dbl> 54, 57, 49, 47, 54, 69, 56, 52, 45, 35, 29, 38, 26,…
## $ WinPCT             <dbl> 0.341, 0.305, 0.402, 0.427, 0.341, 0.159, 0.317, 0.…
## $ FG3A_per_game      <dbl> 814, 933, 1194, 1141, 1249, 973, 1154, 1038, 1078, …
## $ FG3M_per_game      <dbl> 258, 333, 423, 402, 419, 304, 424, 341, 384, 597, 5…
## $ FG3_PCT            <dbl> 0.317, 0.357, 0.354, 0.352, 0.335, 0.312, 0.367, 0.…
## $ FG3A_total         <dbl> 66748, 76506, 97908, 93562, 102418, 79786, 94628, 8…
## $ FG3M_total         <dbl> 21156, 27306, 34686, 32964, 34358, 24928, 34768, 27…
## $ WinMargin          <dbl> -26, -32, -16, -12, -26, -56, -30, -22, -8, 12, 24,…
## $ FG3M_Rate          <dbl> 0.3169533, 0.3569132, 0.3542714, 0.3523225, 0.33546…
## $ FG3M_per_Win       <dbl> 755.5714, 1092.2400, 1051.0909, 941.8286, 1227.0714…
## $ FG3A_rel_to_avg    <dbl> -25470.897, -15664.828, -1266.759, -5154.690, 2061.…
## $ FG3_PCT_rel_to_avg <dbl> -0.0358965517, 0.0051724138, 0.0022413793, 0.005379…
## $ WinPCT_rel_to_avg  <dbl> -1.590690e-01, -1.949655e-01, -9.800000e-02, -7.300…
## $ FG3A_total_change  <dbl> NA, 9758, 21402, -4346, 8856, -22632, 14842, -9512,…
## $ WinPCT_change      <dbl> NA, -0.036, 0.097, 0.025, -0.086, -0.182, 0.158, 0.…
## $ Era                <chr> "Pre-Analytics", "Pre-Analytics", "Pre-Analytics", …
names(nba)     
##  [1] "TeamName"           "Season"             "GP"                
##  [4] "WINS"               "LOSSES"             "WinPCT"            
##  [7] "FG3A_per_game"      "FG3M_per_game"      "FG3_PCT"           
## [10] "FG3A_total"         "FG3M_total"         "WinMargin"         
## [13] "FG3M_Rate"          "FG3M_per_Win"       "FG3A_rel_to_avg"   
## [16] "FG3_PCT_rel_to_avg" "WinPCT_rel_to_avg"  "FG3A_total_change" 
## [19] "WinPCT_change"      "Era"
#View(nba)
nba_summary <- nba %>%
  group_by(Season) %>%
  summarise(
    avg_FG3A = mean(FG3A_total),
    avg_FG3PCT = mean(FG3_PCT),
    avg_WinPCT = mean(WinPCT)
  )

model <- lm(WinPCT ~ FG3A_total + FG3_PCT, data = nba)
summary(model)
## 
## Call:
## lm(formula = WinPCT ~ FG3A_total + FG3_PCT, data = nba)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.37262 -0.09553  0.00669  0.09831  0.41890 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -8.827e-01  8.964e-02  -9.847   <2e-16 ***
## FG3A_total  -4.858e-08  9.253e-08  -0.525      0.6    
## FG3_PCT      3.906e+00  2.589e-01  15.085   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1299 on 742 degrees of freedom
## Multiple R-squared:  0.2447, Adjusted R-squared:  0.2427 
## F-statistic: 120.2 on 2 and 742 DF,  p-value: < 2.2e-16
ggplot(nba_summary, aes(x = Season)) +
  geom_line(aes(y = avg_FG3A), color = "blue") +
  geom_line(aes(y = avg_FG3PCT * 1000), color = "red") +  # scaled for display
  labs(title = "League-Wide 3-Point Trends", y = "Value", x = "Season")

ggplot(nba, aes(x = FG3A_total, y = WinPCT)) +
  geom_point(alpha = 0.5) +
  geom_smooth(method = "lm", color = "red") +
  labs(title = "3-Point Attempts vs. Win Percentage",
       x = "Total 3PA",
       y = "Winning Percentage")
## `geom_smooth()` using formula = 'y ~ x'