players <- players %>% filter(career_PTS != "", draft_pick != "", draft_round != "")
players <- players %>%
mutate(
# Extract feet and inches and convert to numeric
height_in_inches = as.numeric(str_extract(height, "^\\d+")) * 12 + as.numeric(str_extract(height, "(?<=-)\\d+"))
)
players_first_round <- players %>%
filter(draft_round == "1st round") %>%
drop_na(draft_pick) %>% # Exclude rows with NAs in draft_pick
mutate(
draft_pick = as.integer(gsub("[^0-9]", "", draft_pick)) # Remove non-numeric characters
)
# Rename the column player_id to X_id in the salaries dataset
salaries <- salaries %>%
rename(X_id = player_id)
# Calculate total salary by X_id
total_salary <- salaries %>%
group_by(X_id) %>% # Group by player (X_id)
summarise(total_salary = sum(salary, na.rm = TRUE)) # Sum the salary, ignoring NAs
# Merge players_first_round with total_salary by X_id
first_round_merged <- players_first_round %>%
left_join(total_salary, by = "X_id")
first_round_merged <- first_round_merged %>%
mutate(position = as.factor(position))
first_round_merged$career_eFG. <- as.numeric(first_round_merged$career_eFG.)
# Calculate average salary by draft year
avg_salary_by_year <- first_round_merged %>%
filter(!is.na(total_salary), !is.na(draft_year)) %>%
group_by(draft_year) %>%
summarise(avg_salary = mean(total_salary, na.rm = TRUE)) %>%
arrange(draft_year)
ggplot(avg_salary_by_year, aes(x = draft_year, y = avg_salary)) +
geom_line(color = "blue", size = 1) + # Actual line graph
geom_point(color = "red", size = 2) + # Highlight points
geom_smooth(method = "lm", formula = y ~ poly(x, 2), color = "black", linetype = "dashed", size = 1) + # Parabolic line
labs(title = "Average Salary by Draft Year with Parabolic Trend",
x = "Draft Year",
y = "Average Salary") +
theme_minimal()+
theme(axis.text.x = element_text(angle = 90, vjust = 0.5)) # Rotate x-axis labels

# Filter for 1st round drafts and group by draft_number
first_round_drafts <- first_round_merged %>%
mutate(draft_pick = as.numeric(draft_pick)) %>%
group_by(draft_pick) %>%
summarise(
avg_pts = mean(career_PTS, na.rm = TRUE),
avg_fg = mean(`career_eFG.`, na.rm = TRUE), # Using backticks for special characters
avg_salary = mean(total_salary, na.rm = TRUE),
avg_height = mean(height_in_inches, na.rm = TRUE)
) %>%
arrange(draft_pick)
# View the grouped comparison to confirm the code worked
print(first_round_drafts)
## # A tibble: 30 × 5
## draft_pick avg_pts avg_fg avg_salary avg_height
## <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1 15.5 50.3 77977063. 80.5
## 2 2 13.5 48.7 59558384. 79.5
## 3 3 13.5 49.6 61690729. 79.4
## 4 4 11.6 48.2 55962460. 79.2
## 5 5 11.4 48.2 56619982. 79.0
## 6 6 9.67 48.3 27262295. 79.6
## 7 7 10.6 48.0 39473002. 78.7
## 8 8 10.2 47.6 33956162. 78.6
## 9 9 10.1 48.4 43432376. 79.6
## 10 10 9.48 48.1 44551145. 77.9
## # ℹ 20 more rows
# Reshape the data for easier plotting
first_round_drafts_long <- first_round_drafts %>%
gather(key = "variable", value = "value", avg_pts, avg_fg, avg_salary, avg_height)
# Plot side-by-side line plots
ggplot(first_round_drafts_long, aes(x = draft_pick, y = value, color = variable, group = variable)) +
geom_line() +
geom_smooth(method = "lm", se = FALSE, aes(group = variable)) +
facet_wrap(~ variable, scales = "free_y") + # Create separate plots for each variable
labs(
title = "Comparison of Draft Picks: Points, Effective FG %, Height, and Salaries",
x = "Draft Pick Number",
y = "Value",
color = "Variable"
) +
theme_minimal()

salary_model <- lm(avg_salary ~ avg_pts, data = first_round_drafts)
salary_model1 <- lm(avg_salary ~ avg_pts + avg_fg + avg_height, data = first_round_drafts)
salary_model2 <- lm(avg_salary ~ avg_pts + draft_pick + avg_fg + avg_height, data = first_round_drafts)
stargazer(salary_model, salary_model1, salary_model2, type = "text",
title = "Regression Results: Impact of Draft Pick, Points, Height and Field Goal Percentage on Salary",
dep.var.labels = "Average Salary",
covariate.labels = c("Average Points", "Draft Pick", "Average Field Goal Percentage", "Avg Height"),
style = "qje")
##
## Regression Results: Impact of Draft Pick, Points, Height and Field Goal Percentage on Salary
## =====================================================================================================
## Average Salary
## (1) (2) (3)
## -----------------------------------------------------------------------------------------------------
## Average Points 6,110,563.000*** 6,335,826.000*** 9,113,209.000***
## (490,932.200) (652,435.100) (1,593,889.000)
##
## Draft Pick 761,472.400*
## (402,316.100)
##
## Average Field Goal Percentage -1,285,566.000 -2,263,351.000
## (1,525,948.000) (1,544,350.000)
##
## Avg Height 2,671,756.000 2,004,189.000
## (2,611,607.000) (2,515,684.000)
##
## Constant -22,175,968.000*** -174,005,467.000 -110,381,006.000
## (4,415,813.000) (183,758,487.000) (178,455,417.000)
##
## N 30 30 30
## R2 0.847 0.854 0.872
## Adjusted R2 0.841 0.837 0.852
## Residual Std. Error 6,737,344.000 (df = 28) 6,833,079.000 (df = 26) 6,517,092.000 (df = 25)
## F Statistic 154.924*** (df = 1; 28) 50.611*** (df = 3; 26) 42.624*** (df = 4; 25)
## =====================================================================================================
## Notes: ***Significant at the 1 percent level.
## **Significant at the 5 percent level.
## *Significant at the 10 percent level.
team_mascots <- c("Hawks", "Celtics", "Nets", "Hornets", "Bulls", "Cavaliers", "Mavericks", "Nuggets", "Pistons", "Warriors", "Rockets", "Pacers", "Clippers", "Lakers", "Grizzlies", "Heat", "Bucks", "Timberwolves", "Pelicans", "Knicks", "Thunder", "Magic", "76ers", "Suns", "Trail Blazers", "Kings", "Spurs", "Raptors", "Jazz", "Wizards", "SuperSonics", "Bobcats", "Bullets", "Royals", "Nationals", "Blackhawks", "Braves", "Stags", "Olympians", "Packers", "Capitols", "Zephyrs")
# Extract mascot names from draft_team
first_round_merged <- first_round_merged %>%
mutate(team_mascot = str_extract(draft_team, paste(team_mascots, collapse = "|")))
# Group by the corrected mascot names
avg_salary_by_team <- first_round_merged %>%
filter(!is.na(total_salary), !team_mascot %in% c("Pelicans", "Blackhawks", "Braves", "Stags", "Olympians")) %>%
mutate(team_mascot = str_extract(draft_team, paste(team_mascots, collapse = "|"))) %>%
group_by(team_mascot) %>%
summarise(avg_salary = mean(total_salary, na.rm = TRUE)) %>%
arrange(desc(avg_salary))
# Plot the cleaned team data
salary_plot <- ggplot(avg_salary_by_team, aes(x = reorder(team_mascot, avg_salary), y = avg_salary, fill = avg_salary)) +
geom_bar(stat = "identity") +
coord_flip() +
scale_fill_gradient(low = "blue", high = "red") +
labs(title = "Average Salary",
x = "Team Mascot",
y = "Average Salary") +
theme_minimal()
team_ranks <- first_round_merged %>%
mutate(team_mascot = str_extract(draft_team, paste(team_mascots, collapse = "|"))) %>%
filter(!is.na(career_PTS), !is.na(total_salary), !is.na(team_mascot), !team_mascot %in% c("Pelicans", "Blackhawks", "Braves", "Stags", "Olympians", "Packers", "Capitols", "Zephyrs")) %>%
group_by(team_mascot) %>%
summarise(
avg_pts = mean(career_PTS, na.rm = TRUE),
avg_salary = mean(total_salary, na.rm = TRUE)
) %>%
arrange(desc(avg_pts)) %>%
mutate(
rank_pts = rank(-avg_pts), # Rank based on average points (descending)
rank_salary = rank(-avg_salary) # Rank based on average salary (descending)
) %>%
# Compute the difference between the ranks
mutate(rank_diff = rank_pts - rank_salary) %>%
# Standardize the rank differences (z-score)
mutate(rank_diff_std = (rank_diff - mean(rank_diff, na.rm = TRUE)) / sd(rank_diff, na.rm = TRUE))
# View the result with rank differences and standardized values
team_ranks
## # A tibble: 32 × 7
## team_mascot avg_pts avg_salary rank_pts rank_salary rank_diff rank_diff_std
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 76ers 10.7 34518881. 1 13 -12 -1.07
## 2 Warriors 10.6 39020809. 2 10 -8 -0.715
## 3 Cavaliers 10.5 39218357. 3 9 -6 -0.536
## 4 Timberwolves 10.4 42509492. 4 4 0 0
## 5 Bucks 10.3 27278885. 5 25 -20 -1.79
## 6 SuperSonics 10.2 39721939. 6 7 -1 -0.0894
## 7 Bobcats 10.1 40360159. 7 5 2 0.179
## 8 Pacers 10.0 28669553. 8 21 -13 -1.16
## 9 Bullets 9.87 26765271. 9 26 -17 -1.52
## 10 Hornets 9.86 49171838. 10 2 8 0.715
## # ℹ 22 more rows
arrange(team_ranks, rank_diff)
## # A tibble: 32 × 7
## team_mascot avg_pts avg_salary rank_pts rank_salary rank_diff rank_diff_std
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Bucks 10.3 27278885. 5 25 -20 -1.79
## 2 Bullets 9.87 26765271. 9 26 -17 -1.52
## 3 Mavericks 9.52 20754180. 15 32 -17 -1.52
## 4 Nets 9.73 23017630. 13 29 -16 -1.43
## 5 Pacers 10.0 28669553. 8 21 -13 -1.16
## 6 Lakers 9.51 22853191. 17 30 -13 -1.16
## 7 76ers 10.7 34518881. 1 13 -12 -1.07
## 8 Kings 9.78 27845244. 11 23 -12 -1.07
## 9 Warriors 10.6 39020809. 2 10 -8 -0.715
## 10 Cavaliers 10.5 39218357. 3 9 -6 -0.536
## # ℹ 22 more rows
# Plot the bar graph
points_plot <- ggplot(team_ranks, aes(x = reorder(team_mascot, avg_pts), y = avg_pts, fill = avg_pts)) +
geom_bar(stat = "identity") +
coord_flip() +
scale_fill_gradient(low = "blue", high = "red") +
labs(title = "Average PPG",
x = "Team Mascot",
y = "Average PPG") +
theme_minimal()
salary_plot | points_plot

bar_rank_diff <- ggplot(team_ranks, aes(x = reorder(team_mascot, rank_diff), y = rank_diff, fill = rank_diff)) +
geom_bar(stat = "identity") +
coord_flip() +
scale_fill_gradient(low = "blue", high = "red") +
labs(title = "Salary Rank - PTS Rank",
x = "Team Mascot",
y = "Rank Difference") +
theme_minimal()
# Bar graph for rank_diff_std
bar_rank_diff_std <- ggplot(team_ranks, aes(x = reorder(team_mascot, rank_diff_std), y = rank_diff_std, fill = rank_diff_std)) +
geom_bar(stat = "identity") +
coord_flip() +
scale_fill_gradient(low = "blue", high = "red") +
labs(title = "Standardized Rank Difference",
x = "Team Mascot",
y = "Standardized Rank Difference") +
theme_minimal()
# Combine the two graphs side by side
bar_rank_diff | bar_rank_diff_std
