setwd(“/Users/cyroasseodechoch/Documents”) getwd # Load required libraries library(tidyverse) library(broom) library(car) library(MASS)

Read the CSV file

data <- read.csv(“2srating-data.csv”, stringsAsFactors = FALSE)

View structure

str(data)

Convert necessary columns to numeric (skip ID or name columns)

numeric_cols <- c(“minutes_played”, “field_goal_percentage”, “two_point_field_goal_percentage”, “three_point_field_goal_percentage”, “free_throw_percentage”, “offensive_rebounds”, “defensive_rebounds”, “total_rebounds”, “assists”, “steals”, “blocks”, “turnovers”, “fouls”, “points”, “player_efficiency_rating”, “true_shooting_percentage”, “usage_percentage”)

Ensure they are numeric

data[numeric_cols] <- lapply(data[numeric_cols], as.numeric)

Drop rows with NA in key performance indicators

data_clean <- na.omit(data[numeric_cols])

1. OLS Regression: What contributes to Player Efficiency Rating (PER)?

model1 <- lm(player_efficiency_rating ~ minutes_played + field_goal_percentage + three_point_field_goal_percentage + free_throw_percentage + offensive_rebounds + defensive_rebounds + assists + steals + blocks + turnovers + fouls + points + true_shooting_percentage + usage_percentage, data = data_clean)

summary(model1)

2. OLS Regression: Predicting Points Per Game

model2 <- lm(points ~ field_goal_percentage + three_point_field_goal_percentage + free_throw_percentage + assists + usage_percentage, data = data_clean)

summary(model2)

3. Stepwise Regression for PER (based on AIC)

step_model <- stepAIC(model1, direction = “both”) summary(step_model)

4. Multicollinearity Check (VIF)

vif(model1)

5. Plot diagnostics for model1

par(mfrow = c(2, 2)) plot(model1)

6. Correlation Matrix (optional)

cor_matrix <- cor(data_clean, use = “complete.obs”) print(cor_matrix)

#NEXT # Load necessary libraries library(dplyr) library(readr)

Load dataset

data <- read_csv(“srating-data.csv”)

Load the CSV file (ensure proper column types)

data <- read_csv(“srating-data.csv”, show_col_types = FALSE)

Check column names (to fix any mismatches)

colnames(data)

Fix logical columns mistakenly parsed (e.g., weight, plus_minus)

If needed, convert those to numeric or character manually

data\(weight <- as.numeric(data\)weight) data\(plus_minus <- as.numeric(data\)plus_minus)

Filter only uncommitted players

uncommitted_players <- data %>% filter(is.na(committed) | committed == FALSE)

Ensure ‘position’ column exists and is clean

uncommitted_players <- uncommitted_players %>% filter(!is.na(position)) %>% mutate(position = toupper(position)) # Normalize

Select relevant variables (match exact column names from dataset)

position_stats <- uncommitted_players %>% dplyr::select(position, assist_percentage, steal_percentage, block_percentage, field_goal_attempts, field_goal_percentage, two_point_field_goal_percentage, three_point_field_goal_percentage, offensive_rebounds_per_game, total_rebounds_per_game, offensive_rating, defensive_rating, fouls_per_game, turnovers_per_game, minutes_per_game, points_per_game, true_shooting_percentage, first_name, last_name, team_name, conference_id)

Summarize by position

position_summary <- position_stats %>% group_by(position) %>% summarise( AST_percent = mean(assist_percentage, na.rm = TRUE), STL_percent = mean(steal_percentage, na.rm = TRUE), BLK_percent = mean(block_percentage, na.rm = TRUE), FGA = mean(field_goal_attempts, na.rm = TRUE), FG_percent = mean(field_goal_percentage, na.rm = TRUE), TwoFG_percent = mean(two_point_field_goal_percentage, na.rm = TRUE), ThreeFG_percent = mean(three_point_field_goal_percentage, na.rm = TRUE), ORB_PG = mean(offensive_rebounds_per_game, na.rm = TRUE), TRB_PG = mean(total_rebounds_per_game, na.rm = TRUE), ORTG = mean(offensive_rating, na.rm = TRUE), DRTG = mean(defensive_rating, na.rm = TRUE), TOV_PG = mean(turnovers_per_game, na.rm = TRUE), Foul_PG = mean(fouls_per_game, na.rm = TRUE), MPG = mean(minutes_per_game, na.rm = TRUE), PPG = mean(points_per_game, na.rm = TRUE), TS_percent = mean(true_shooting_percentage, na.rm = TRUE), Player_Count = n() ) %>% arrange(position)

View summary table

print(position_summary)

Optional: Show list of uncommitted players (with school/conference)

uncommitted_list <- uncommitted_players %>% dplyr::select(first_name, last_name, team_name, conference_id, position, committed) %>% filter(!is.na(first_name) & !is.na(last_name))

print(uncommitted_list)

View(uncommitted_list)

uncommitted_list <- uncommitted_players %>% dplyr::select( first_name, last_name, team_name, conference_id, position, committed, assist_percentage, steal_percentage, block_percentage, field_goal_attempts, field_goal_percentage, two_point_field_goal_percentage, three_point_field_goal_percentage, offensive_rebounds_per_game, total_rebounds_per_game, offensive_rating, defensive_rating, turnovers_per_game, fouls_per_game, minutes_per_game, points_per_game, true_shooting_percentage ) %>% filter(!is.na(first_name) & !is.na(last_name)) %>% rename( AST_percent = assist_percentage, STL_percent = steal_percentage, BLK_percent = block_percentage, FGA = field_goal_attempts, FG_percent = field_goal_percentage, TwoFG_percent = two_point_field_goal_percentage, ThreeFG_percent = three_point_field_goal_percentage, ORB_PG = offensive_rebounds_per_game, TRB_PG = total_rebounds_per_game, ORTG = offensive_rating, DRTG = defensive_rating, TOV_PG = turnovers_per_game, Foul_PG = fouls_per_game, MPG = minutes_per_game, PPG = points_per_game, TS_percent = true_shooting_percentage )

View(uncommitted_list) # Write the uncommitted_list dataframe to a CSV file write.csv(uncommitted_list, “uncommitted_player_stats.csv”, row.names = FALSE)

Convert columns to correct types

data\(weight <- as.numeric(data\)weight) data\(plus_minus <- as.numeric(data\)plus_minus)

Filter uncommitted players and ensure position is valid

uncommitted_players <- data %>% filter(is.na(committed) | committed == FALSE) %>% filter(!is.na(position)) %>% mutate(position = toupper(position)) # Normalize positions

Clean and rename key stat columns

uncommitted_list <- uncommitted_players %>% dplyr::select( first_name, last_name, team_name, conference_id, position, committed, assist_percentage, steal_percentage, block_percentage, field_goal_attempts, minutes_played, field_goal_percentage, two_point_field_goal_percentage, three_point_field_goal_percentage, offensive_rebounds_per_game, total_rebounds_per_game, offensive_rating, defensive_rating, turnovers_per_game, fouls_per_game, minutes_per_game, points_per_game, true_shooting_percentage ) %>% filter(!is.na(first_name) & !is.na(last_name)) %>% rename( AST_percent = assist_percentage, STL_percent = steal_percentage, BLK_percent = block_percentage, FGA = field_goal_attempts, MP = minutes_played, FG_percent = field_goal_percentage, TwoFG_percent = two_point_field_goal_percentage, ThreeFG_percent = three_point_field_goal_percentage, ORB_PG = offensive_rebounds_per_game, TRB_PG = total_rebounds_per_game, ORTG = offensive_rating, DRTG = defensive_rating, TOV_PG = turnovers_per_game, Foul_PG = fouls_per_game, MPG = minutes_per_game, PPG = points_per_game, TS_percent = true_shooting_percentage )

Filter: top 25th percentile for AST%, STL%, BLK%, and bottom 25% for DRTG (i.e., better defenders)

quantiles <- uncommitted_list %>% summarise( ast_thresh = quantile(AST_percent, 0.75, na.rm = TRUE), stl_thresh = quantile(STL_percent, 0.75, na.rm = TRUE), blk_thresh = quantile(BLK_percent, 0.75, na.rm = TRUE), drtg_thresh = quantile(DRTG, 0.25, na.rm = TRUE), # lower is better fga_thresh = quantile(FGA, 0.75, na.rm = TRUE), mp_thresh = quantile(MP, 0.75, na.rm = TRUE) )

Apply filtering

top_performers <- uncommitted_list %>% filter( (AST_percent >= quantiles\(ast_thresh | STL_percent >= quantiles\)stl_thresh | BLK_percent >= quantiles\(blk_thresh | DRTG <= quantiles\)drtg_thresh) & (FGA >= quantiles\(fga_thresh | MP >= quantiles\)mp_thresh) )

Preview top performers

print(top_performers)

write.csv(top_performers, “top_uncommitted_defenders_and_creators.csv”, row.names = FALSE)

Load required libraries

library(ggplot2) library(dplyr) library(scales)

1. Scatter Plot: FGA vs DRTG

ggplot(top_performers, aes(x = FGA, y = DRTG, label = paste(first_name, last_name))) + geom_point(color = “blue”, size = 3) + geom_text(size = 3, vjust = -0.7, check_overlap = TRUE) + labs(title = “FGA vs Defensive Rating (DRTG)”, x = “Field Goal Attempts (FGA)”, y = “Defensive Rating (↓ Better)”) + theme_minimal()

2. Scatter Plot: PPG vs DRTG

ggplot(top_performers, aes(x = PPG, y = DRTG, label = paste(first_name, last_name))) + geom_point(color = “darkgreen”, size = 3) + geom_text(size = 3, vjust = -0.7, check_overlap = TRUE) + labs(title = “PPG vs Defensive Rating (DRTG)”, x = “Points Per Game (PPG)”, y = “Defensive Rating (↓ Better)”) + theme_minimal()

3. Scatter Plot: MP vs DRTG

ggplot(top_performers, aes(x = MP, y = DRTG, label = paste(first_name, last_name))) + geom_point(color = “purple”, size = 3) + geom_text(size = 3, vjust = -0.7, check_overlap = TRUE) + labs(title = “Minutes Played vs Defensive Rating (DRTG)”, x = “Total Minutes Played”, y = “Defensive Rating (↓ Better)”) + theme_minimal()

4. Scatter Plot: DRTG vs ORTG

ggplot(top_performers, aes(x = DRTG, y = ORTG, label = paste(first_name, last_name))) + geom_point(color = “red”, size = 3) + geom_text(size = 3, vjust = -0.7, check_overlap = TRUE) + labs(title = “Defensive vs Offensive Rating”, x = “Defensive Rating (↓ Better)”, y = “Offensive Rating (↑ Better)”) + theme_minimal()

5. Heatmap-style scatter (bubble chart): DRTG vs ORTG with bubble size = TS%, color = AST%

ggplot(top_performers, aes(x = DRTG, y = ORTG, size = TS_percent, color = AST_percent, label = paste(first_name, last_name))) + geom_point(alpha = 0.8) + scale_color_gradient(low = “orange”, high = “blue”) + scale_size_continuous(range = c(3, 10)) + geom_text(size = 3, vjust = -1, check_overlap = TRUE) + labs(title = “Top Uncommitted Players: All-Around Performance”, x = “Defensive Rating (↓ Better)”, y = “Offensive Rating (↑ Better)”, size = “True Shooting %”, color = “Assist %”) + theme_minimal()

library(tidyverse) library(broom) library(car) library(MASS) library(scales) library(readr) library(ggplot2)

data <- read.csv(“srating-data.csv”, stringsAsFactors = FALSE)

Convert numeric columns

numeric_cols <- c(“minutes_played”, “field_goal_percentage”, “two_point_field_goal_percentage”, “three_point_field_goal_percentage”, “free_throw_percentage”, “offensive_rebounds”, “defensive_rebounds”, “total_rebounds”, “assists”, “steals”, “blocks”, “turnovers”, “fouls”, “points”, “player_efficiency_rating”, “true_shooting_percentage”, “usage_percentage”)

data[numeric_cols] <- lapply(data[numeric_cols], as.numeric)

Clean for regression

data_clean <- na.omit(data[numeric_cols])

model1 <- lm(player_efficiency_rating ~ minutes_played + field_goal_percentage + three_point_field_goal_percentage + free_throw_percentage + offensive_rebounds + defensive_rebounds + assists + steals + blocks + turnovers + fouls + points + true_shooting_percentage + usage_percentage, data = data_clean) summary(model1)

model2 <- lm(points ~ field_goal_percentage + three_point_field_goal_percentage + free_throw_percentage + assists + usage_percentage, data = data_clean) summary(model2)

step_model <- stepAIC(model1, direction = “both”) summary(step_model)

vif(model1)

par(mfrow = c(2,2)) plot(model1)

cor(data_clean, use = “complete.obs”)

data <- read_csv(“srating-data.csv”, show_col_types = FALSE) data\(weight <- as.numeric(data\)weight) data\(plus_minus <- as.numeric(data\)plus_minus)

uncommitted_players <- data %>% filter(is.na(committed) | committed == FALSE) %>% filter(!is.na(position)) %>% mutate(position = toupper(position))

position_stats <- uncommitted_players %>% dplyr::select(position, assist_percentage, steal_percentage, block_percentage, field_goal_attempts, field_goal_percentage, two_point_field_goal_percentage, three_point_field_goal_percentage, offensive_rebounds_per_game, total_rebounds_per_game, offensive_rating, defensive_rating, fouls_per_game, turnovers_per_game, minutes_per_game, points_per_game, true_shooting_percentage, first_name, last_name, team_name, conference_id)

position_summary <- position_stats %>% group_by(position) %>% summarise( AST_percent = mean(assist_percentage, na.rm = TRUE), STL_percent = mean(steal_percentage, na.rm = TRUE), BLK_percent = mean(block_percentage, na.rm = TRUE), FGA = mean(field_goal_attempts, na.rm = TRUE), FG_percent = mean(field_goal_percentage, na.rm = TRUE), TwoFG_percent = mean(two_point_field_goal_percentage, na.rm = TRUE), ThreeFG_percent = mean(three_point_field_goal_percentage, na.rm = TRUE), ORB_PG = mean(offensive_rebounds_per_game, na.rm = TRUE), TRB_PG = mean(total_rebounds_per_game, na.rm = TRUE), ORTG = mean(offensive_rating, na.rm = TRUE), DRTG = mean(defensive_rating, na.rm = TRUE), TOV_PG = mean(turnovers_per_game, na.rm = TRUE), Foul_PG = mean(fouls_per_game, na.rm = TRUE), MPG = mean(minutes_per_game, na.rm = TRUE), PPG = mean(points_per_game, na.rm = TRUE), TS_percent = mean(true_shooting_percentage, na.rm = TRUE), Player_Count = n() ) %>% arrange(position)

position_summary

uncommitted_list <- uncommitted_players %>% dplyr::select( first_name, last_name, team_name, conference_id, position, committed, assist_percentage, steal_percentage, block_percentage, field_goal_attempts, minutes_played, field_goal_percentage, two_point_field_goal_percentage, three_point_field_goal_percentage, offensive_rebounds_per_game, total_rebounds_per_game, offensive_rating, defensive_rating, turnovers_per_game, fouls_per_game, minutes_per_game, points_per_game, true_shooting_percentage ) %>% filter(!is.na(first_name) & !is.na(last_name)) %>% rename( AST_percent = assist_percentage, STL_percent = steal_percentage, BLK_percent = block_percentage, FGA = field_goal_attempts, MP = minutes_played, FG_percent = field_goal_percentage, TwoFG_percent = two_point_field_goal_percentage, ThreeFG_percent = three_point_field_goal_percentage, ORB_PG = offensive_rebounds_per_game, TRB_PG = total_rebounds_per_game, ORTG = offensive_rating, DRTG = defensive_rating, TOV_PG = turnovers_per_game, Foul_PG = fouls_per_game, MPG = minutes_per_game, PPG = points_per_game, TS_percent = true_shooting_percentage )

quantiles <- uncommitted_list %>% summarise( ast_thresh = quantile(AST_percent, 0.75, na.rm = TRUE), stl_thresh = quantile(STL_percent, 0.75, na.rm = TRUE), blk_thresh = quantile(BLK_percent, 0.75, na.rm = TRUE), drtg_thresh = quantile(DRTG, 0.25, na.rm = TRUE), fga_thresh = quantile(FGA, 0.75, na.rm = TRUE), mp_thresh = quantile(MP, 0.75, na.rm = TRUE) )

top_performers <- uncommitted_list %>% filter( (AST_percent >= quantiles\(ast_thresh | STL_percent >= quantiles\)stl_thresh | BLK_percent >= quantiles\(blk_thresh | DRTG <= quantiles\)drtg_thresh) & (FGA >= quantiles\(fga_thresh | MP >= quantiles\)mp_thresh) )

top_performers

write.csv(uncommitted_list, “uncommitted_player_stats.csv”, row.names = FALSE) write.csv(top_performers, “top_uncommitted_defenders_and_creators.csv”, row.names = FALSE)

ggplot(top_performers, aes(x = FGA, y = DRTG, label = paste(first_name, last_name))) + geom_point(color = “blue”, size = 3) + geom_text(size = 3, vjust = -0.7, check_overlap = TRUE) + labs(title = “FGA vs Defensive Rating (DRTG)”, x = “Field Goal Attempts (FGA)”, y = “Defensive Rating (↓ Better)”) + theme_minimal()

ggplot(top_performers, aes(x = PPG, y = DRTG, label = paste(first_name, last_name))) + geom_point(color = “darkgreen”, size = 3) + geom_text(size = 3, vjust = -0.7, check_overlap = TRUE) + labs(title = “PPG vs Defensive Rating (DRTG)”, x = “Points Per Game (PPG)”, y = “Defensive Rating (↓ Better)”) + theme_minimal()

ggplot(top_performers, aes(x = MP, y = DRTG, label = paste(first_name, last_name))) + geom_point(color = “purple”, size = 3) + geom_text(size = 3, vjust = -0.7, check_overlap = TRUE) + labs(title = “Minutes Played vs Defensive Rating (DRTG)”, x = “Total Minutes Played”, y = “Defensive Rating (↓ Better)”) + theme_minimal()

ggplot(top_performers, aes(x = DRTG, y = ORTG, label = paste(first_name, last_name))) + geom_point(color = “red”, size = 3) + geom_text(size = 3, vjust = -0.7, check_overlap = TRUE) + labs(title = “Defensive vs Offensive Rating”, x = “Defensive Rating (↓ Better)”, y = “Offensive Rating (↑ Better)”) + theme_minimal()

ggplot(top_performers, aes(x = DRTG, y = ORTG, size = TS_percent, color = AST_percent, label = paste(first_name, last_name))) + geom_point(alpha = 0.8) + scale_color_gradient(low = “orange”, high = “blue”) + scale_size_continuous(range = c(3, 10)) + geom_text(size = 3, vjust = -1, check_overlap = TRUE) + labs(title = “Top Uncommitted Players: All-Around Performance”, x = “Defensive Rating (↓ Better)”, y = “Offensive Rating (↑ Better)”, size = “True Shooting %”, color = “Assist %”) + theme_minimal()

library(tinytex) Y

library(gt) library(dplyr) library(webshot2) library(gt) library(dplyr)

Create and save: Top FGA

top_fga <- top_performers %>% arrange(desc(FGA)) %>% slice_head(n = 20) %>% dplyr::select(first_name, last_name, team_name, FGA, DRTG)

gt_fga <- gt(top_fga) %>% tab_header(title = “Top 20 Uncommitted Players by Field Goal Attempts (FGA)”)

gtsave(gt_fga, “top_fga_table.png”)

Create and save: Top PPG

top_ppg <- top_performers %>% arrange(desc(PPG)) %>% slice_head(n = 20) %>% dplyr::select(first_name, last_name, team_name, PPG, DRTG)

gt_ppg <- gt(top_ppg) %>% tab_header(title = “Top 20 Uncommitted Players by Points Per Game (PPG)”)

gtsave(gt_ppg, “top_ppg_table.png”)

Create and save: Top MP

top_mp <- top_performers %>% arrange(desc(MP)) %>% slice_head(n = 20) %>% dplyr::select(first_name, last_name, team_name, MP, DRTG)

gt_mp <- gt(top_mp) %>% tab_header(title = “Top 20 Uncommitted Players by Minutes Played (MP)”)

gtsave(gt_mp, “top_mp_table.png”)

Create and save: Top DRTG

top_drtg <- top_performers %>% arrange(DRTG) %>% slice_head(n = 20) %>% dplyr::select(first_name, last_name, team_name, DRTG, ORTG)

gt_drtg <- gt(top_drtg) %>% tab_header(title = “Top 20 Uncommitted Players by Defensive Rating (DRTG)”)

gtsave(gt_drtg, “top_drtg_table.png”)

Create and save: Top Bubble (AST%)

top_bubble <- top_performers %>% arrange(desc(AST_percent)) %>% slice_head(n = 20) %>% dplyr::select(first_name, last_name, team_name, DRTG, ORTG, TS_percent, AST_percent)

gt_bubble <- gt(top_bubble) %>% tab_header(title = “Top 20 All-Around Uncommitted Players (AST%, TS%)”)

gtsave(gt_bubble, “top_bubble_table.png”)