players <- players %>% filter(career_PTS != "", draft_pick != "", draft_round != "")
players <- players %>%
  mutate(
    # Extract feet and inches and convert to numeric
    height_in_inches = as.numeric(str_extract(height, "^\\d+")) * 12 + as.numeric(str_extract(height, "(?<=-)\\d+"))
  )


players_first_round <- players %>%
  filter(draft_round == "1st round") %>%
  drop_na(draft_pick) %>%  # Exclude rows with NAs in draft_pick
  mutate(
    draft_pick = as.integer(gsub("[^0-9]", "", draft_pick))  # Remove non-numeric characters
  )
# Rename the column player_id to X_id in the salaries dataset
salaries <- salaries %>%
  rename(X_id = player_id)

# Calculate total salary by X_id
total_salary <- salaries %>%
  group_by(X_id) %>%  # Group by player (X_id)
  summarise(total_salary = sum(salary, na.rm = TRUE))  # Sum the salary, ignoring NAs

# Merge players_first_round with total_salary by X_id
first_round_merged <- players_first_round %>%
  left_join(total_salary, by = "X_id")

first_round_merged <- first_round_merged %>%
  mutate(position = as.factor(position))

first_round_merged$career_eFG. <- as.numeric(first_round_merged$career_eFG.)
# Calculate average salary by draft year
avg_salary_by_year <- first_round_merged %>%
  filter(!is.na(total_salary), !is.na(draft_year)) %>%
  group_by(draft_year) %>%
  summarise(avg_salary = mean(total_salary, na.rm = TRUE)) %>%
  arrange(draft_year)

ggplot(avg_salary_by_year, aes(x = draft_year, y = avg_salary)) +
  geom_line(color = "blue", size = 1) +  # Actual line graph
  geom_point(color = "red", size = 2) +  # Highlight points
  geom_smooth(method = "lm", formula = y ~ poly(x, 2), color = "black", linetype = "dashed", size = 1) +  # Parabolic line
  labs(title = "Average Salary by Draft Year with Parabolic Trend",
       x = "Draft Year",
       y = "Average Salary") +
  theme_minimal()+
theme(axis.text.x = element_text(angle = 90, vjust = 0.5))  # Rotate x-axis labels

# Filter for 1st round drafts and group by draft_number
first_round_drafts <- first_round_merged %>%
  mutate(draft_pick = as.numeric(draft_pick)) %>%
  group_by(draft_pick) %>%
  summarise(
    avg_pts = mean(career_PTS, na.rm = TRUE),
    avg_fg = mean(`career_eFG.`, na.rm = TRUE),  # Using backticks for special characters
    avg_salary = mean(total_salary, na.rm = TRUE),
    avg_height = mean(height_in_inches, na.rm = TRUE)
  ) %>%
  arrange(draft_pick)


# View the grouped comparison to confirm the code worked
print(first_round_drafts)
## # A tibble: 30 × 5
##    draft_pick avg_pts avg_fg avg_salary avg_height
##         <dbl>   <dbl>  <dbl>      <dbl>      <dbl>
##  1          1   15.5    50.3  77977063.       80.5
##  2          2   13.5    48.7  59558384.       79.5
##  3          3   13.5    49.6  61690729.       79.4
##  4          4   11.6    48.2  55962460.       79.2
##  5          5   11.4    48.2  56619982.       79.0
##  6          6    9.67   48.3  27262295.       79.6
##  7          7   10.6    48.0  39473002.       78.7
##  8          8   10.2    47.6  33956162.       78.6
##  9          9   10.1    48.4  43432376.       79.6
## 10         10    9.48   48.1  44551145.       77.9
## # ℹ 20 more rows
# Reshape the data for easier plotting
first_round_drafts_long <- first_round_drafts %>%
  gather(key = "variable", value = "value", avg_pts, avg_fg, avg_salary, avg_height)

# Plot side-by-side line plots
ggplot(first_round_drafts_long, aes(x = draft_pick, y = value, color = variable, group = variable)) +
  geom_line() +
  geom_smooth(method = "lm", se = FALSE, aes(group = variable)) +
  facet_wrap(~ variable, scales = "free_y") +  # Create separate plots for each variable
  labs(
    title = "Comparison of Draft Picks: Points, Effective FG %, Height, and Salaries",
    x = "Draft Pick Number",
    y = "Value",
    color = "Variable"
  ) +
  theme_minimal()

salary_model <- lm(avg_salary ~ avg_pts, data = first_round_drafts)
salary_model1 <- lm(avg_salary ~ avg_pts + avg_fg + avg_height, data = first_round_drafts)
salary_model2 <- lm(avg_salary ~ avg_pts + draft_pick + avg_fg + avg_height, data = first_round_drafts)

stargazer(salary_model, salary_model1, salary_model2, type = "text", 
          title = "Regression Results: Impact of Draft Pick, Points, Height and Field Goal Percentage on Salary", 
          dep.var.labels = "Average Salary", 
          covariate.labels = c("Average Points", "Draft Pick", "Average Field Goal Percentage", "Avg Height"), 
          style = "qje")
## 
## Regression Results: Impact of Draft Pick, Points, Height and Field Goal Percentage on Salary
## =====================================================================================================
##                                                           Average Salary                             
##                                         (1)                     (2)                     (3)          
## -----------------------------------------------------------------------------------------------------
## Average Points                   6,110,563.000***        6,335,826.000***        9,113,209.000***    
##                                    (490,932.200)           (652,435.100)          (1,593,889.000)    
##                                                                                                      
## Draft Pick                                                                         761,472.400*      
##                                                                                    (402,316.100)     
##                                                                                                      
## Average Field Goal Percentage                             -1,285,566.000          -2,263,351.000     
##                                                           (1,525,948.000)         (1,544,350.000)    
##                                                                                                      
## Avg Height                                                 2,671,756.000           2,004,189.000     
##                                                           (2,611,607.000)         (2,515,684.000)    
##                                                                                                      
## Constant                        -22,175,968.000***       -174,005,467.000        -110,381,006.000    
##                                   (4,415,813.000)        (183,758,487.000)       (178,455,417.000)   
##                                                                                                      
## N                                       30                      30                      30           
## R2                                     0.847                   0.854                   0.872         
## Adjusted R2                            0.841                   0.837                   0.852         
## Residual Std. Error           6,737,344.000 (df = 28) 6,833,079.000 (df = 26) 6,517,092.000 (df = 25)
## F Statistic                   154.924*** (df = 1; 28) 50.611*** (df = 3; 26)  42.624*** (df = 4; 25) 
## =====================================================================================================
## Notes:                                                         ***Significant at the 1 percent level.
##                                                                 **Significant at the 5 percent level.
##                                                                 *Significant at the 10 percent level.
team_mascots <- c("Hawks", "Celtics", "Nets", "Hornets", "Bulls", "Cavaliers", "Mavericks", "Nuggets", "Pistons", "Warriors", "Rockets", "Pacers", "Clippers", "Lakers", "Grizzlies", "Heat", "Bucks", "Timberwolves", "Pelicans", "Knicks", "Thunder", "Magic", "76ers", "Suns", "Trail Blazers", "Kings", "Spurs", "Raptors", "Jazz", "Wizards", "SuperSonics", "Bobcats", "Bullets", "Royals", "Nationals", "Blackhawks", "Braves", "Stags", "Olympians", "Packers", "Capitols", "Zephyrs")

# Extract mascot names from draft_team
first_round_merged <- first_round_merged %>%
  mutate(team_mascot = str_extract(draft_team, paste(team_mascots, collapse = "|")))

# Group by the corrected mascot names
avg_salary_by_team <- first_round_merged %>%
  filter(!is.na(total_salary), !team_mascot %in% c("Pelicans", "Blackhawks", "Braves", "Stags", "Olympians")) %>%
  mutate(team_mascot = str_extract(draft_team, paste(team_mascots, collapse = "|"))) %>%
  group_by(team_mascot) %>%
  summarise(avg_salary = mean(total_salary, na.rm = TRUE)) %>%
  arrange(desc(avg_salary))

# Plot the cleaned team data
salary_plot <- ggplot(avg_salary_by_team, aes(x = reorder(team_mascot, avg_salary), y = avg_salary, fill = avg_salary)) +
  geom_bar(stat = "identity") +
  coord_flip() +
  scale_fill_gradient(low = "blue", high = "red") +
  labs(title = "Average Salary",
       x = "Team Mascot",
       y = "Average Salary") +
  theme_minimal()
team_ranks <- first_round_merged %>%
  mutate(team_mascot = str_extract(draft_team, paste(team_mascots, collapse = "|"))) %>%
  filter(!is.na(career_PTS), !is.na(total_salary), !is.na(team_mascot), !team_mascot %in% c("Pelicans", "Blackhawks", "Braves", "Stags", "Olympians", "Packers", "Capitols", "Zephyrs")) %>%
  group_by(team_mascot) %>%
  summarise(
    avg_pts = mean(career_PTS, na.rm = TRUE),
    avg_salary = mean(total_salary, na.rm = TRUE)
  ) %>%
  arrange(desc(avg_pts)) %>%
  mutate(
    rank_pts = rank(-avg_pts),  # Rank based on average points (descending)
    rank_salary = rank(-avg_salary)  # Rank based on average salary (descending)
  ) %>%
  # Compute the difference between the ranks
  mutate(rank_diff = rank_pts - rank_salary) %>%
  # Standardize the rank differences (z-score)
  mutate(rank_diff_std = (rank_diff - mean(rank_diff, na.rm = TRUE)) / sd(rank_diff, na.rm = TRUE))

# View the result with rank differences and standardized values
team_ranks
## # A tibble: 32 × 7
##    team_mascot  avg_pts avg_salary rank_pts rank_salary rank_diff rank_diff_std
##    <chr>          <dbl>      <dbl>    <dbl>       <dbl>     <dbl>         <dbl>
##  1 76ers          10.7   34518881.        1          13       -12       -1.07  
##  2 Warriors       10.6   39020809.        2          10        -8       -0.715 
##  3 Cavaliers      10.5   39218357.        3           9        -6       -0.536 
##  4 Timberwolves   10.4   42509492.        4           4         0        0     
##  5 Bucks          10.3   27278885.        5          25       -20       -1.79  
##  6 SuperSonics    10.2   39721939.        6           7        -1       -0.0894
##  7 Bobcats        10.1   40360159.        7           5         2        0.179 
##  8 Pacers         10.0   28669553.        8          21       -13       -1.16  
##  9 Bullets         9.87  26765271.        9          26       -17       -1.52  
## 10 Hornets         9.86  49171838.       10           2         8        0.715 
## # ℹ 22 more rows
arrange(team_ranks, rank_diff)
## # A tibble: 32 × 7
##    team_mascot avg_pts avg_salary rank_pts rank_salary rank_diff rank_diff_std
##    <chr>         <dbl>      <dbl>    <dbl>       <dbl>     <dbl>         <dbl>
##  1 Bucks         10.3   27278885.        5          25       -20        -1.79 
##  2 Bullets        9.87  26765271.        9          26       -17        -1.52 
##  3 Mavericks      9.52  20754180.       15          32       -17        -1.52 
##  4 Nets           9.73  23017630.       13          29       -16        -1.43 
##  5 Pacers        10.0   28669553.        8          21       -13        -1.16 
##  6 Lakers         9.51  22853191.       17          30       -13        -1.16 
##  7 76ers         10.7   34518881.        1          13       -12        -1.07 
##  8 Kings          9.78  27845244.       11          23       -12        -1.07 
##  9 Warriors      10.6   39020809.        2          10        -8        -0.715
## 10 Cavaliers     10.5   39218357.        3           9        -6        -0.536
## # ℹ 22 more rows
# Plot the bar graph
points_plot <- ggplot(team_ranks, aes(x = reorder(team_mascot, avg_pts), y = avg_pts, fill = avg_pts)) +
  geom_bar(stat = "identity") +
  coord_flip() +
  scale_fill_gradient(low = "blue", high = "red") +
  labs(title = "Average PPG",
       x = "Team Mascot",
       y = "Average PPG") +
  theme_minimal()
salary_plot | points_plot

bar_rank_diff <- ggplot(team_ranks, aes(x = reorder(team_mascot, rank_diff), y = rank_diff, fill = rank_diff)) +
  geom_bar(stat = "identity") +
  coord_flip() +
  scale_fill_gradient(low = "blue", high = "red") +
  labs(title = "Salary Rank - PTS Rank",
       x = "Team Mascot",
       y = "Rank Difference") +
  theme_minimal()

# Bar graph for rank_diff_std
bar_rank_diff_std <- ggplot(team_ranks, aes(x = reorder(team_mascot, rank_diff_std), y = rank_diff_std, fill = rank_diff_std)) +
  geom_bar(stat = "identity") +
  coord_flip() +
  scale_fill_gradient(low = "blue", high = "red") +
  labs(title = "Standardized Rank Difference",
       x = "Team Mascot",
       y = "Standardized Rank Difference") +
  theme_minimal()

# Combine the two graphs side by side
bar_rank_diff | bar_rank_diff_std