
#NEXT # Load necessary libraries
library(dplyr)
library(readr)
print(top_performers)
write.csv(top_performers, "top_2025_PIT_performers.csv", row.names = FALSE)
data$weight <- as.numeric(data$weight)
Warning: NAs introduced by coercion
# Filter uncommitted players and ensure position is valid
PITdata <- data %>%
filter(!is.na(position)) %>%
mutate(position = toupper(position)) # Normalize positions
names(PITdata)
[1] "player_name" "school" "height" "position"
[5] "weight" "rk" "class" "height_2"
[9] "team" "conf" "g" "min"
[13] "prpg" "bpm" "ortg" "drtg"
[17] "usg" "efg" "ts" "or"
[21] "dr" "ast" "to" "a_to"
[25] "blk" "stl" "ftr" "dunks_m"
[29] "dunks_a" "ft_m" "ft_a" "x2p_m"
[33] "x2p_a" "x3p_m" "x3p_a" "x3p_100"
[37] "ft" "x2p" "x3p" "ppg"
[41] "total_rebounds"
ggplot(PITdata, aes(x = drtg, y = ortg, size = ts, color = ast, label = paste(player_name))) +
geom_point(alpha = 0.8) +
scale_color_gradient(low = "orange", high = "blue") +
scale_size_continuous(range = c(3, 10)) +
geom_text(size = 3, vjust = -1, check_overlap = TRUE) +
labs(title = "Top PIT Players: All-Around Performance by Cyro Asseo",
x = "Defensive Rating (↓ Better)",
y = "Offensive Rating (↑ Better)",
size = "True Shooting %",
color = "Assists") +
theme_minimal()
Warning: Removed 971 rows containing missing values or values outside the scale range
(`geom_point()`).
Warning: Removed 971 rows containing missing values or values outside the scale range
(`geom_text()`).

ggplot(PITdata, aes(x = x3p, y = drtg, label = paste(player_name)) +
geom_point(color = "blue", size = 3) +
geom_text(size = 3, vjust = -0.7, check_overlap = TRUE) +
labs(title = "3PM vs Defensive Rating (DRTG)",
x = "3-Pointers Made (3PM)",
y = "Defensive Rating (↓ Better)") +
theme_minimal()
Error: Incomplete expression: ggplot(PITdata, aes(x = x3p, y = drtg, label = paste(player_name)) +
geom_point(color = "blue", size = 3) +
geom_text(size = 3, vjust = -0.7, check_overlap = TRUE) +
labs(title = "3PM vs Defensive Rating (DRTG)",
x = "3-Pointers Made (3PM)",
y = "Defensive Rating (↓ Better)") +
theme_minimal()
ggsave("3pm_vs_drtg.png", plot = p1, width = 8, height = 6, dpi = 300)
Warning: Removed 971 rows containing missing values or values outside the scale range
(`geom_point()`).
Warning: Removed 971 rows containing missing values or values outside the scale range
(`geom_text()`).
ggsave("ppg_vs_drtg.png", plot = p2, width = 8, height = 6, dpi = 300)
Warning: Removed 971 rows containing missing values or values outside the scale range
(`geom_point()`).
Warning: Removed 971 rows containing missing values or values outside the scale range
(`geom_text()`).
ggsave("min_vs_drtg.png", plot = p3, width = 8, height = 6, dpi = 300)
Warning: Removed 971 rows containing missing values or values outside the scale range
(`geom_point()`).
Warning: Removed 971 rows containing missing values or values outside the scale range
(`geom_text()`).
ggsave("drtg_vs_ortg.png", plot = p4, width = 8, height = 6, dpi = 300)
Warning: Removed 971 rows containing missing values or values outside the scale range
(`geom_point()`).
Warning: Removed 971 rows containing missing values or values outside the scale range
(`geom_text()`).
ggsave("all_around_performance.png", plot = p5, width = 8, height = 6, dpi = 300)
Warning: Removed 971 rows containing missing values or values outside the scale range
(`geom_point()`).
Warning: Removed 971 rows containing missing values or values outside the scale range
(`geom_text()`).
NEXT
top_fga <- PITdata %>%
arrange(desc(x2p + x3p)) %>%
slice_head(n = 20) %>%
dplyr::select(player_name, school, x2p, x3p, drtg)
gt_fga <- gt(top_fga) %>%
tab_header(title = "Top PIT Players by Field Goal Attempts (FGA) by Cyro Asseo")
gtsave(gt_fga, "top_PIT_fga_table.png")
file:////var/folders/n4/y37zz1ss7ql7x6z33jc88mmw0000gn/T//RtmpzEX3L9/file2e7a400f046d.html screenshot completed
gt_top_performers <- gt(top_performers) %>%
tab_header(
title = "Top PIT Defenders & All‑Around Impact by Cyro Asseo"
) %>%
cols_label(
player_name = "Player",
school = "School",
position = "Pos",
drtg = "DRTG",
ortg = "ORTG",
ast = "AST",
stl = "STL",
blk = "BLK",
efg = "eFG %",
ts = "TS %"
) %>%
fmt_number(
columns = c(drtg, ortg, ast, stl, blk, efg, ts),
decimals = 1)
gtsave(
gt_top_performers,
"top_PIT_performers_table.png",
vwidth = 2600, # viewport width in px
vheight = 1900, # viewport height in px
zoom = 2 # scale factor
)
file:////var/folders/n4/y37zz1ss7ql7x6z33jc88mmw0000gn/T//RtmpzEX3L9/file2e7a50ba102f.html screenshot completed
---
title: "R Notebook"
output: html_notebook
---


```{r}
# Set working directory
setwd("/Users/cyroasseodechoch/Documents")
getwd()

# Load necessary libraries
library(tidyverse)
library(broom)
library(car)
library(MASS)
library(janitor)
library(dplyr)

data <- read.csv("PIT_with_Torvik_Stats4.csv", stringsAsFactors = FALSE)

colnames(data)
# Clean column names to standard format
data <- clean_names(data)  # This turns "MIN%" into "min_percent", etc.

# View the cleaned names
names(data)


data <- data %>%
  # make sure these are numeric
  mutate_at(vars(ft_m, x2p_m, x3p_m, g), as.numeric) %>%
  # total points = 1 × FT + 2 × 2P + 3 × 3P; then divide by games played
  mutate(ppg = (ft_m + 2 * x2p_m + 3 * x3p_m) / g)


# Convert necessary columns to numeric
numeric_cols <- c("min", "efg", "ppg", "x2p", "x3p", "ft", "or", "dr", "ast", "stl", "blk", "to", "ts", "usg", "bpm")

# Drop rows with NA in key performance indicators
data_clean <- na.omit(data[numeric_cols])

# Ensure they are numeric
data[numeric_cols] <- lapply(data[numeric_cols], as.numeric)

# Create total rebounds column
data$total_rebounds <- data$or + data$dr

# Rename columns for easier modeling
colnames(data_clean) <- make.names(colnames(data_clean)
                                   
# 1. OLS Regression: What contributes to Box Plus-Minus (BPM)?
model1 <- lm(bpm ~ min + efg + x2p + x3p + ft + or + dr +
               ast + stl + blk + to + ts + usg, data = data)
summary(model1)

# 2. Stepwise Regression for BPM (based on AIC)
step_model <- stepAIC(model1, direction = "both")
summary(step_model)

# 3. Multicollinearity Check (VIF)
vif_values <- vif(model1)
print(vif_values)

# 4. Diagnostic Plots
par(mfrow = c(2, 2))
plot(model1)
```

#NEXT
# Load necessary libraries
```{r}
library(dplyr)
library(readr)
```



```{r}
names(data)
# Ensure 'position' column exists and is clean
data1 <- data %>%
  filter(!is.na(position)) %>%
  mutate(position = toupper(position)) # Normalize

# Select relevant variables (match exact column names from dataset)
position_stats <- data %>%
  dplyr::select(position,
         ast, stl, blk,
         efg, ts,
         x2p, x3p,
         or, dr,
         ppg, min,
         ft, to,
         usg, drtg, ortg, player_name, school, height, weight)


# Summarize by position
position_summary <- position_stats %>%
  group_by(position) %>%
  summarise(
    ast = mean(ast, na.rm = TRUE),
    stl = mean(stl, na.rm = TRUE),
    blk = mean(blk, na.rm = TRUE),
    efg = mean(efg, na.rm = TRUE),
    ts = mean(ts, na.rm = TRUE),
    x2p = mean(x2p, na.rm = TRUE),
    x3p = mean(x3p, na.rm = TRUE),
    or = mean(or, na.rm = TRUE),
    dr = mean(dr, na.rm = TRUE),
    ppg = mean(ppg, na.rm = TRUE),
    min = mean(min, na.rm = TRUE),
    ft = mean(ft, na.rm = TRUE),
    to = mean(to, na.rm = TRUE),
    usg = mean(usg, na.rm = TRUE),
    drtg = mean(drtg, na.rm = TRUE),
    ortg = mean(ortg, na.rm = TRUE),
    player_name = n()
  ) %>%
  arrange(position)

# View summary table
print(position_summary)
```

```{r}
# Filter: top 25th percentile for AST%, STL%, BLK%, and bottom 25% for DRTG (i.e., better defenders)
quantiles <- list(
  ast = quantile(data$ast, probs = 0.75, na.rm = TRUE),
  stl = quantile(data$stl, probs = 0.75, na.rm = TRUE),
  blk = quantile(data$blk, probs = 0.75, na.rm = TRUE),
  ppg = quantile(data$ppg, probs = 0.75, na.rm = TRUE),
  ts = quantile(data$ts, probs = 0.75, na.rm = TRUE),
  efg = quantile(data$efg, probs = 0.75, na.rm = TRUE),
  min = quantile(data$min, probs = 0.75, na.rm = TRUE)
)
  

# Apply filtering
top_performers <- data %>%
  filter(
    (ast >= quantiles$ast |
     stl >= quantiles$stl |
     blk >= quantiles$blk |
     ppg >= quantiles$ppg |
     ts >= quantiles$ts) &
    (efg >= quantiles$efg |
     min >= quantiles$min)
  )


# Preview top performers
print(top_performers)
write.csv(top_performers, "top_2025_PIT_performers.csv", row.names = FALSE)

```

```{r}
# Convert columns to correct types
data$weight <- as.numeric(data$weight)
```

```{r}
# Filter uncommitted players and ensure position is valid
PITdata <- data %>%
  filter(!is.na(position)) %>%
  mutate(position = toupper(position)) # Normalize positions
```


```{r}
names(PITdata)
```



```{r}
# Load required libraries
library(ggplot2)
library(dplyr)
library(scales)


# 1. Scatter Plot: FGA vs DRTG
ggplot(PITdata, aes(x = x3p, y = drtg, label = paste(player_name)) +
  geom_point(color = "blue", size = 3) +
  geom_text(size = 3, vjust = -0.7, check_overlap = TRUE) +
  labs(title = "3PM vs Defensive Rating (DRTG) by Cyro Asseo",
       x = "3-Pointers Made (3PM)",
       y = "Defensive Rating (↓ Better)") +
  theme_minimal()

# 2. Scatter Plot: PPG vs DRTG
ggplot(PITdata, aes(x = ppg, y = drtg, label = paste(player_name))) +
  geom_point(color = "darkgreen", size = 3) +
  geom_text(size = 3, vjust = -0.7, check_overlap = TRUE) +
  labs(title = "PPG vs Defensive Rating (DRTG) by Cyro Asseo",
       x = "Points Per Game (PPG)",
       y = "Defensive Rating (↓ Better)") +
  theme_minimal()

# 3. Scatter Plot: MP vs DRTG
ggplot(PITdata, aes(x = min, y = drtg, label = paste(player_name))) +
  geom_point(color = "purple", size = 3) +
  geom_text(size = 3, vjust = -0.7, check_overlap = TRUE) +
  labs(title = "Minutes Played vs Defensive Rating (DRTG) by Cyro Asseo",
       x = "Total Minutes Played",
       y = "Defensive Rating (↓ Better)") +
  theme_minimal()

# 4. Scatter Plot: DRTG vs ORTG
ggplot(PITdata, aes(x = drtg, y = ortg, label = paste(player_name))) +
  geom_point(color = "red", size = 3) +
  geom_text(size = 3, vjust = -0.7, check_overlap = TRUE) +
  labs(title = "Defensive vs Offensive Rating by Cyro Asseo",
       x = "Defensive Rating (↓ Better)",
       y = "Offensive Rating (↑ Better)") +
  theme_minimal()

# 5. Heatmap-style scatter (bubble chart): DRTG vs ORTG with bubble size = TS%, color = AST%
ggplot(PITdata, aes(x = drtg, y = ortg, size = ts, color = ast, label = paste(player_name))) +
  geom_point(alpha = 0.8) +
  scale_color_gradient(low = "orange", high = "blue") +
  scale_size_continuous(range = c(3, 10)) +
  geom_text(size = 3, vjust = -1, check_overlap = TRUE) +
  labs(title = "Top PIT Players: All-Around Performance by Cyro Asseo",
       x = "Defensive Rating (↓ Better)",
       y = "Offensive Rating (↑ Better)",
       size = "True Shooting %",
       color = "Assists") +
  theme_minimal()
```

```{r}
ggplot(PITdata, aes(x = x3p, y = drtg, label = paste(player_name)) +
  geom_point(color = "blue", size = 3) +
  geom_text(size = 3, vjust = -0.7, check_overlap = TRUE) +
  labs(title = "3PM vs Defensive Rating (DRTG) by Cyro Asseo",
       x = "3-Pointers Made (3PM)",
       y = "Defensive Rating (↓ Better)") +
  theme_minimal()
```





```{r}
# 1. Create and name your plots
p1 <- ggplot(PITdata, aes(x = x3p, y = drtg, label = player_name)) +
  geom_point(color = "blue", size = 3) +
  geom_text(size = 3, vjust = -0.7, check_overlap = TRUE) +
  labs(
    title = "3PM vs Defensive Rating (DRTG) by Cyro Asseo",
    x     = "3‑Pointers Made (3PM)",
    y     = "Defensive Rating (↓ Better)"
  ) +
  theme_minimal()

p2 <- ggplot(PITdata, aes(x = ppg, y = drtg, label = player_name)) +
  geom_point(color = "darkgreen", size = 3) +
  geom_text(size = 3, vjust = -0.7, check_overlap = TRUE) +
  labs(
    title = "PPG vs Defensive Rating (DRTG) by Cyro Asseo",
    x     = "Points Per Game (PPG)",
    y     = "Defensive Rating (↓ Better)"
  ) +
  theme_minimal()

p3 <- ggplot(PITdata, aes(x = min, y = drtg, label = player_name)) +
  geom_point(color = "purple", size = 3) +
  geom_text(size = 3, vjust = -0.7, check_overlap = TRUE) +
  labs(
    title = "Minutes Played vs Defensive Rating (DRTG) by Cyro Asseo",
    x     = "Total Minutes Played",
    y     = "Defensive Rating (↓ Better)"
  ) +
  theme_minimal()

p4 <- ggplot(PITdata, aes(x = drtg, y = ortg, label = player_name)) +
  geom_point(color = "red", size = 3) +
  geom_text(size = 3, vjust = -0.7, check_overlap = TRUE) +
  labs(
    title = "Defensive vs Offensive Rating by Cyro Asseo",
    x     = "Defensive Rating (↓ Better)",
    y     = "Offensive Rating (↑ Better)"
  ) +
  theme_minimal()

p5 <- ggplot(PITdata, aes(x = drtg, y = ortg, size = ts, color = ast, label = player_name)) +
  geom_point(alpha = 0.8) +
  geom_text(size = 3, vjust = -1, check_overlap = TRUE) +
  scale_color_gradient(low = "orange", high = "blue") +
  scale_size_continuous(range = c(3, 10)) +
  labs(
    title = "Top PIT Players: All‑Around Performance by Cyro Asseo",
    x     = "Defensive Rating (↓ Better)",
    y     = "Offensive Rating (↑ Better)",
    size  = "True Shooting %",
    color = "Assists"
  ) +
  theme_minimal()

# 2. Save each plot with ggsave()
ggsave("3pm_vs_drtg.png", plot = p1, width = 8, height = 6, dpi = 300)
ggsave("ppg_vs_drtg.png", plot = p2, width = 8, height = 6, dpi = 300)
ggsave("min_vs_drtg.png", plot = p3, width = 8, height = 6, dpi = 300)
ggsave("drtg_vs_ortg.png", plot = p4, width = 8, height = 6, dpi = 300)
ggsave("all_around_performance.png", plot = p5, width = 8, height = 6, dpi = 300)
```

# NEXT

```{r}

library(gt)
library(dplyr)
library(webshot2)


# Create and save: Top FGA
top_fga <- PITdata %>%
  arrange(desc(x2p + x3p)) %>%
  slice_head(n = 20) %>%
  dplyr::select(player_name, school, x2p, x3p, drtg)

gt_fga <- gt(top_fga) %>%
  tab_header(title = "Top PIT Players by Field Goal Attempts (FGA) by Cyro Asseo")

gtsave(gt_fga, "top_PIT_fga_table.png")

# Create and save: Top PPG
top_ppg <- PITdata %>%
  arrange(desc(ppg)) %>%
  slice_head(n = 20) %>%
  dplyr::select(player_name, school, ppg, drtg)

gt_ppg <- gt(top_ppg) %>%
  tab_header(title = "Top PIT Players by Points Per Game (PPG) by Cyro Asseo")

gtsave(gt_ppg, "top_ppg_table.png")

# Create and save: Top MP
top_mp <- PITdata %>%
  arrange(desc(min)) %>%
  slice_head(n = 20) %>%
  dplyr::select(player_name, school, min, drtg)

gt_mp <- gt(top_mp) %>%
  tab_header(title = "Top PIT Players by Minutes Played x DRTG by Cyro Asseo")

gtsave(gt_mp, "top_mp_table.png")

# Create and save: Top DRTG
top_drtg <- PITdata %>%
  arrange(drtg) %>%
  slice_head(n = 20) %>%
  dplyr::select(player_name, school, drtg, ortg)

gt_drtg <- gt(top_drtg) %>%
  tab_header(title = "Top PIT Players by Defensive Rating (DRTG) by Cyro Asseo")

gtsave(gt_drtg, "top_drtg_table.png")

# Create and save: Top Bubble (AST%)
top_bubble <- PITdata %>%
  arrange(desc(ast)) %>%
  slice_head(n = 20) %>%
  dplyr::select(player_name, school, drtg, ortg, ts, ast)

gt_bubble <- gt(top_bubble) %>%
  tab_header(title = "Top All-Around PIT Players (AST, TS%) by Cyro Asseo")

gtsave(gt_bubble, "top_bubble_table.png")
```
```{r}
# Load libraries
library(dplyr)
library(gt)

# 1. Compute cutoffs for “top” performers
quantiles <- PITdata %>%
  summarise(
    ast_q = quantile(ast, 0.75, na.rm = TRUE),
    stl_q = quantile(stl, 0.75, na.rm = TRUE),
    blk_q = quantile(blk, 0.75, na.rm = TRUE),
    drtg_q = quantile(drtg, 0.25, na.rm = TRUE),  # lower DRTG is better
    ppg_q = quantile(data$ppg, probs = 0.75, na.rm = TRUE)

top_performers <- PITdata %>%
  filter(
    (ast_q >= quantiles$ast |
     stl_q >= quantiles$stl |
     blk_q >= quantiles$blk |
     drtg_q <= quantiles$drtg_q |
     ppg_q >= quantiles$ppg)
    

gt_top_performers <- gt(top_performers) %>%
  tab_header(
    title = "Top PIT Defenders & All‑Around Impact by Cyro Asseo"
  ) %>%
  cols_label(
    player_name = "Player",
    school      = "School",
    position    = "Pos",
    drtg        = "DRTG",
    ortg        = "ORTG",
    ast         = "AST",
    stl         = "STL",
    blk         = "BLK",
    efg         = "eFG %",
    ts          = "TS %"
  ) %>%
  fmt_number(
    columns = c(drtg, ortg, ast, stl, blk, efg, ts),
    decimals = 1)

gtsave(
  gt_top_performers,
  "top_PIT_performers_table.png",
  vwidth  = 2600,   # viewport width in px
  vheight = 1900,    # viewport height in px
  zoom    = 2       # scale factor
)
```

