setwd(“/Users/cyroasseodechoch/Documents”) getwd # Load required libraries library(tidyverse) library(broom) library(car) library(MASS)
data <- read.csv(“2srating-data.csv”, stringsAsFactors = FALSE)
str(data)
numeric_cols <- c(“minutes_played”, “field_goal_percentage”, “two_point_field_goal_percentage”, “three_point_field_goal_percentage”, “free_throw_percentage”, “offensive_rebounds”, “defensive_rebounds”, “total_rebounds”, “assists”, “steals”, “blocks”, “turnovers”, “fouls”, “points”, “player_efficiency_rating”, “true_shooting_percentage”, “usage_percentage”)
data[numeric_cols] <- lapply(data[numeric_cols], as.numeric)
data_clean <- na.omit(data[numeric_cols])
model1 <- lm(player_efficiency_rating ~ minutes_played + field_goal_percentage + three_point_field_goal_percentage + free_throw_percentage + offensive_rebounds + defensive_rebounds + assists + steals + blocks + turnovers + fouls + points + true_shooting_percentage + usage_percentage, data = data_clean)
summary(model1)
model2 <- lm(points ~ field_goal_percentage + three_point_field_goal_percentage + free_throw_percentage + assists + usage_percentage, data = data_clean)
summary(model2)
step_model <- stepAIC(model1, direction = “both”) summary(step_model)
vif(model1)
par(mfrow = c(2, 2)) plot(model1)
cor_matrix <- cor(data_clean, use = “complete.obs”) print(cor_matrix)
#NEXT # Load necessary libraries library(dplyr) library(readr)
data <- read_csv(“srating-data.csv”)
data <- read_csv(“srating-data.csv”, show_col_types = FALSE)
colnames(data)
data\(weight <- as.numeric(data\)weight) data\(plus_minus <- as.numeric(data\)plus_minus)
uncommitted_players <- data %>% filter(is.na(committed) | committed == FALSE)
uncommitted_players <- uncommitted_players %>% filter(!is.na(position)) %>% mutate(position = toupper(position)) # Normalize
position_stats <- uncommitted_players %>% dplyr::select(position, assist_percentage, steal_percentage, block_percentage, field_goal_attempts, field_goal_percentage, two_point_field_goal_percentage, three_point_field_goal_percentage, offensive_rebounds_per_game, total_rebounds_per_game, offensive_rating, defensive_rating, fouls_per_game, turnovers_per_game, minutes_per_game, points_per_game, true_shooting_percentage, first_name, last_name, team_name, conference_id)
position_summary <- position_stats %>% group_by(position) %>% summarise( AST_percent = mean(assist_percentage, na.rm = TRUE), STL_percent = mean(steal_percentage, na.rm = TRUE), BLK_percent = mean(block_percentage, na.rm = TRUE), FGA = mean(field_goal_attempts, na.rm = TRUE), FG_percent = mean(field_goal_percentage, na.rm = TRUE), TwoFG_percent = mean(two_point_field_goal_percentage, na.rm = TRUE), ThreeFG_percent = mean(three_point_field_goal_percentage, na.rm = TRUE), ORB_PG = mean(offensive_rebounds_per_game, na.rm = TRUE), TRB_PG = mean(total_rebounds_per_game, na.rm = TRUE), ORTG = mean(offensive_rating, na.rm = TRUE), DRTG = mean(defensive_rating, na.rm = TRUE), TOV_PG = mean(turnovers_per_game, na.rm = TRUE), Foul_PG = mean(fouls_per_game, na.rm = TRUE), MPG = mean(minutes_per_game, na.rm = TRUE), PPG = mean(points_per_game, na.rm = TRUE), TS_percent = mean(true_shooting_percentage, na.rm = TRUE), Player_Count = n() ) %>% arrange(position)
print(position_summary)
uncommitted_list <- uncommitted_players %>% dplyr::select(first_name, last_name, team_name, conference_id, position, committed) %>% filter(!is.na(first_name) & !is.na(last_name))
print(uncommitted_list)
View(uncommitted_list)
uncommitted_list <- uncommitted_players %>% dplyr::select( first_name, last_name, team_name, conference_id, position, committed, assist_percentage, steal_percentage, block_percentage, field_goal_attempts, field_goal_percentage, two_point_field_goal_percentage, three_point_field_goal_percentage, offensive_rebounds_per_game, total_rebounds_per_game, offensive_rating, defensive_rating, turnovers_per_game, fouls_per_game, minutes_per_game, points_per_game, true_shooting_percentage ) %>% filter(!is.na(first_name) & !is.na(last_name)) %>% rename( AST_percent = assist_percentage, STL_percent = steal_percentage, BLK_percent = block_percentage, FGA = field_goal_attempts, FG_percent = field_goal_percentage, TwoFG_percent = two_point_field_goal_percentage, ThreeFG_percent = three_point_field_goal_percentage, ORB_PG = offensive_rebounds_per_game, TRB_PG = total_rebounds_per_game, ORTG = offensive_rating, DRTG = defensive_rating, TOV_PG = turnovers_per_game, Foul_PG = fouls_per_game, MPG = minutes_per_game, PPG = points_per_game, TS_percent = true_shooting_percentage )
View(uncommitted_list) # Write the uncommitted_list dataframe to a CSV file write.csv(uncommitted_list, “uncommitted_player_stats.csv”, row.names = FALSE)
data\(weight <- as.numeric(data\)weight) data\(plus_minus <- as.numeric(data\)plus_minus)
uncommitted_players <- data %>% filter(is.na(committed) | committed == FALSE) %>% filter(!is.na(position)) %>% mutate(position = toupper(position)) # Normalize positions
uncommitted_list <- uncommitted_players %>% dplyr::select( first_name, last_name, team_name, conference_id, position, committed, assist_percentage, steal_percentage, block_percentage, field_goal_attempts, minutes_played, field_goal_percentage, two_point_field_goal_percentage, three_point_field_goal_percentage, offensive_rebounds_per_game, total_rebounds_per_game, offensive_rating, defensive_rating, turnovers_per_game, fouls_per_game, minutes_per_game, points_per_game, true_shooting_percentage ) %>% filter(!is.na(first_name) & !is.na(last_name)) %>% rename( AST_percent = assist_percentage, STL_percent = steal_percentage, BLK_percent = block_percentage, FGA = field_goal_attempts, MP = minutes_played, FG_percent = field_goal_percentage, TwoFG_percent = two_point_field_goal_percentage, ThreeFG_percent = three_point_field_goal_percentage, ORB_PG = offensive_rebounds_per_game, TRB_PG = total_rebounds_per_game, ORTG = offensive_rating, DRTG = defensive_rating, TOV_PG = turnovers_per_game, Foul_PG = fouls_per_game, MPG = minutes_per_game, PPG = points_per_game, TS_percent = true_shooting_percentage )
quantiles <- uncommitted_list %>% summarise( ast_thresh = quantile(AST_percent, 0.75, na.rm = TRUE), stl_thresh = quantile(STL_percent, 0.75, na.rm = TRUE), blk_thresh = quantile(BLK_percent, 0.75, na.rm = TRUE), drtg_thresh = quantile(DRTG, 0.25, na.rm = TRUE), # lower is better fga_thresh = quantile(FGA, 0.75, na.rm = TRUE), mp_thresh = quantile(MP, 0.75, na.rm = TRUE) )
top_performers <- uncommitted_list %>% filter( (AST_percent >= quantiles\(ast_thresh | STL_percent >= quantiles\)stl_thresh | BLK_percent >= quantiles\(blk_thresh | DRTG <= quantiles\)drtg_thresh) & (FGA >= quantiles\(fga_thresh | MP >= quantiles\)mp_thresh) )
print(top_performers)
write.csv(top_performers, “top_uncommitted_defenders_and_creators.csv”, row.names = FALSE)
library(ggplot2) library(dplyr) library(scales)
ggplot(top_performers, aes(x = FGA, y = DRTG, label = paste(first_name, last_name))) + geom_point(color = “blue”, size = 3) + geom_text(size = 3, vjust = -0.7, check_overlap = TRUE) + labs(title = “FGA vs Defensive Rating (DRTG)”, x = “Field Goal Attempts (FGA)”, y = “Defensive Rating (↓ Better)”) + theme_minimal()
ggplot(top_performers, aes(x = PPG, y = DRTG, label = paste(first_name, last_name))) + geom_point(color = “darkgreen”, size = 3) + geom_text(size = 3, vjust = -0.7, check_overlap = TRUE) + labs(title = “PPG vs Defensive Rating (DRTG)”, x = “Points Per Game (PPG)”, y = “Defensive Rating (↓ Better)”) + theme_minimal()
ggplot(top_performers, aes(x = MP, y = DRTG, label = paste(first_name, last_name))) + geom_point(color = “purple”, size = 3) + geom_text(size = 3, vjust = -0.7, check_overlap = TRUE) + labs(title = “Minutes Played vs Defensive Rating (DRTG)”, x = “Total Minutes Played”, y = “Defensive Rating (↓ Better)”) + theme_minimal()
ggplot(top_performers, aes(x = DRTG, y = ORTG, label = paste(first_name, last_name))) + geom_point(color = “red”, size = 3) + geom_text(size = 3, vjust = -0.7, check_overlap = TRUE) + labs(title = “Defensive vs Offensive Rating”, x = “Defensive Rating (↓ Better)”, y = “Offensive Rating (↑ Better)”) + theme_minimal()
ggplot(top_performers, aes(x = DRTG, y = ORTG, size = TS_percent, color = AST_percent, label = paste(first_name, last_name))) + geom_point(alpha = 0.8) + scale_color_gradient(low = “orange”, high = “blue”) + scale_size_continuous(range = c(3, 10)) + geom_text(size = 3, vjust = -1, check_overlap = TRUE) + labs(title = “Top Uncommitted Players: All-Around Performance”, x = “Defensive Rating (↓ Better)”, y = “Offensive Rating (↑ Better)”, size = “True Shooting %”, color = “Assist %”) + theme_minimal()
library(tidyverse) library(broom) library(car) library(MASS) library(scales) library(readr) library(ggplot2)
data <- read.csv(“srating-data.csv”, stringsAsFactors = FALSE)
numeric_cols <- c(“minutes_played”, “field_goal_percentage”, “two_point_field_goal_percentage”, “three_point_field_goal_percentage”, “free_throw_percentage”, “offensive_rebounds”, “defensive_rebounds”, “total_rebounds”, “assists”, “steals”, “blocks”, “turnovers”, “fouls”, “points”, “player_efficiency_rating”, “true_shooting_percentage”, “usage_percentage”)
data[numeric_cols] <- lapply(data[numeric_cols], as.numeric)
data_clean <- na.omit(data[numeric_cols])
model1 <- lm(player_efficiency_rating ~ minutes_played + field_goal_percentage + three_point_field_goal_percentage + free_throw_percentage + offensive_rebounds + defensive_rebounds + assists + steals + blocks + turnovers + fouls + points + true_shooting_percentage + usage_percentage, data = data_clean) summary(model1)
model2 <- lm(points ~ field_goal_percentage + three_point_field_goal_percentage + free_throw_percentage + assists + usage_percentage, data = data_clean) summary(model2)
step_model <- stepAIC(model1, direction = “both”) summary(step_model)
vif(model1)
par(mfrow = c(2,2)) plot(model1)
cor(data_clean, use = “complete.obs”)
data <- read_csv(“srating-data.csv”, show_col_types = FALSE) data\(weight <- as.numeric(data\)weight) data\(plus_minus <- as.numeric(data\)plus_minus)
uncommitted_players <- data %>% filter(is.na(committed) | committed == FALSE) %>% filter(!is.na(position)) %>% mutate(position = toupper(position))
position_stats <- uncommitted_players %>% dplyr::select(position, assist_percentage, steal_percentage, block_percentage, field_goal_attempts, field_goal_percentage, two_point_field_goal_percentage, three_point_field_goal_percentage, offensive_rebounds_per_game, total_rebounds_per_game, offensive_rating, defensive_rating, fouls_per_game, turnovers_per_game, minutes_per_game, points_per_game, true_shooting_percentage, first_name, last_name, team_name, conference_id)
position_summary <- position_stats %>% group_by(position) %>% summarise( AST_percent = mean(assist_percentage, na.rm = TRUE), STL_percent = mean(steal_percentage, na.rm = TRUE), BLK_percent = mean(block_percentage, na.rm = TRUE), FGA = mean(field_goal_attempts, na.rm = TRUE), FG_percent = mean(field_goal_percentage, na.rm = TRUE), TwoFG_percent = mean(two_point_field_goal_percentage, na.rm = TRUE), ThreeFG_percent = mean(three_point_field_goal_percentage, na.rm = TRUE), ORB_PG = mean(offensive_rebounds_per_game, na.rm = TRUE), TRB_PG = mean(total_rebounds_per_game, na.rm = TRUE), ORTG = mean(offensive_rating, na.rm = TRUE), DRTG = mean(defensive_rating, na.rm = TRUE), TOV_PG = mean(turnovers_per_game, na.rm = TRUE), Foul_PG = mean(fouls_per_game, na.rm = TRUE), MPG = mean(minutes_per_game, na.rm = TRUE), PPG = mean(points_per_game, na.rm = TRUE), TS_percent = mean(true_shooting_percentage, na.rm = TRUE), Player_Count = n() ) %>% arrange(position)
position_summary
uncommitted_list <- uncommitted_players %>% dplyr::select( first_name, last_name, team_name, conference_id, position, committed, assist_percentage, steal_percentage, block_percentage, field_goal_attempts, minutes_played, field_goal_percentage, two_point_field_goal_percentage, three_point_field_goal_percentage, offensive_rebounds_per_game, total_rebounds_per_game, offensive_rating, defensive_rating, turnovers_per_game, fouls_per_game, minutes_per_game, points_per_game, true_shooting_percentage ) %>% filter(!is.na(first_name) & !is.na(last_name)) %>% rename( AST_percent = assist_percentage, STL_percent = steal_percentage, BLK_percent = block_percentage, FGA = field_goal_attempts, MP = minutes_played, FG_percent = field_goal_percentage, TwoFG_percent = two_point_field_goal_percentage, ThreeFG_percent = three_point_field_goal_percentage, ORB_PG = offensive_rebounds_per_game, TRB_PG = total_rebounds_per_game, ORTG = offensive_rating, DRTG = defensive_rating, TOV_PG = turnovers_per_game, Foul_PG = fouls_per_game, MPG = minutes_per_game, PPG = points_per_game, TS_percent = true_shooting_percentage )
quantiles <- uncommitted_list %>% summarise( ast_thresh = quantile(AST_percent, 0.75, na.rm = TRUE), stl_thresh = quantile(STL_percent, 0.75, na.rm = TRUE), blk_thresh = quantile(BLK_percent, 0.75, na.rm = TRUE), drtg_thresh = quantile(DRTG, 0.25, na.rm = TRUE), fga_thresh = quantile(FGA, 0.75, na.rm = TRUE), mp_thresh = quantile(MP, 0.75, na.rm = TRUE) )
top_performers <- uncommitted_list %>% filter( (AST_percent >= quantiles\(ast_thresh | STL_percent >= quantiles\)stl_thresh | BLK_percent >= quantiles\(blk_thresh | DRTG <= quantiles\)drtg_thresh) & (FGA >= quantiles\(fga_thresh | MP >= quantiles\)mp_thresh) )
top_performers
write.csv(uncommitted_list, “uncommitted_player_stats.csv”, row.names = FALSE) write.csv(top_performers, “top_uncommitted_defenders_and_creators.csv”, row.names = FALSE)
ggplot(top_performers, aes(x = FGA, y = DRTG, label = paste(first_name, last_name))) + geom_point(color = “blue”, size = 3) + geom_text(size = 3, vjust = -0.7, check_overlap = TRUE) + labs(title = “FGA vs Defensive Rating (DRTG)”, x = “Field Goal Attempts (FGA)”, y = “Defensive Rating (↓ Better)”) + theme_minimal()
ggplot(top_performers, aes(x = PPG, y = DRTG, label = paste(first_name, last_name))) + geom_point(color = “darkgreen”, size = 3) + geom_text(size = 3, vjust = -0.7, check_overlap = TRUE) + labs(title = “PPG vs Defensive Rating (DRTG)”, x = “Points Per Game (PPG)”, y = “Defensive Rating (↓ Better)”) + theme_minimal()
ggplot(top_performers, aes(x = MP, y = DRTG, label = paste(first_name, last_name))) + geom_point(color = “purple”, size = 3) + geom_text(size = 3, vjust = -0.7, check_overlap = TRUE) + labs(title = “Minutes Played vs Defensive Rating (DRTG)”, x = “Total Minutes Played”, y = “Defensive Rating (↓ Better)”) + theme_minimal()
ggplot(top_performers, aes(x = DRTG, y = ORTG, label = paste(first_name, last_name))) + geom_point(color = “red”, size = 3) + geom_text(size = 3, vjust = -0.7, check_overlap = TRUE) + labs(title = “Defensive vs Offensive Rating”, x = “Defensive Rating (↓ Better)”, y = “Offensive Rating (↑ Better)”) + theme_minimal()
ggplot(top_performers, aes(x = DRTG, y = ORTG, size = TS_percent, color = AST_percent, label = paste(first_name, last_name))) + geom_point(alpha = 0.8) + scale_color_gradient(low = “orange”, high = “blue”) + scale_size_continuous(range = c(3, 10)) + geom_text(size = 3, vjust = -1, check_overlap = TRUE) + labs(title = “Top Uncommitted Players: All-Around Performance”, x = “Defensive Rating (↓ Better)”, y = “Offensive Rating (↑ Better)”, size = “True Shooting %”, color = “Assist %”) + theme_minimal()
library(tinytex) Y
library(gt) library(dplyr) library(webshot2) library(gt) library(dplyr)
top_fga <- top_performers %>% arrange(desc(FGA)) %>% slice_head(n = 20) %>% dplyr::select(first_name, last_name, team_name, FGA, DRTG)
gt_fga <- gt(top_fga) %>% tab_header(title = “Top 20 Uncommitted Players by Field Goal Attempts (FGA)”)
gtsave(gt_fga, “top_fga_table.png”)
top_ppg <- top_performers %>% arrange(desc(PPG)) %>% slice_head(n = 20) %>% dplyr::select(first_name, last_name, team_name, PPG, DRTG)
gt_ppg <- gt(top_ppg) %>% tab_header(title = “Top 20 Uncommitted Players by Points Per Game (PPG)”)
gtsave(gt_ppg, “top_ppg_table.png”)
top_mp <- top_performers %>% arrange(desc(MP)) %>% slice_head(n = 20) %>% dplyr::select(first_name, last_name, team_name, MP, DRTG)
gt_mp <- gt(top_mp) %>% tab_header(title = “Top 20 Uncommitted Players by Minutes Played (MP)”)
gtsave(gt_mp, “top_mp_table.png”)
top_drtg <- top_performers %>% arrange(DRTG) %>% slice_head(n = 20) %>% dplyr::select(first_name, last_name, team_name, DRTG, ORTG)
gt_drtg <- gt(top_drtg) %>% tab_header(title = “Top 20 Uncommitted Players by Defensive Rating (DRTG)”)
gtsave(gt_drtg, “top_drtg_table.png”)
top_bubble <- top_performers %>% arrange(desc(AST_percent)) %>% slice_head(n = 20) %>% dplyr::select(first_name, last_name, team_name, DRTG, ORTG, TS_percent, AST_percent)
gt_bubble <- gt(top_bubble) %>% tab_header(title = “Top 20 All-Around Uncommitted Players (AST%, TS%)”)
gtsave(gt_bubble, “top_bubble_table.png”)