library(rvest)
library(tidyverse)
library(rsample)
library(tidymodels)
library(nbastatR)
library(ranger)
library(caret)
library(parsnip)
library(glmnet)
library(reactable)
library(reactablefmtr)
library(plotly)
Sys.setenv("VROOM_CONNECTION_SIZE" = 131072 * 2)
High school rankings
tables <- list()
# Iterate over the years from 2024 to 2000
for (year in 2024:2007) {
# Construct the URL
url <- paste0("https://www.espn.com/college-sports/basketball/recruiting/playerrankings/_/class/", year, "/order/true")
# Read the HTML and extract the table
table <- read_html(url) %>%
html_nodes("table") %>%
html_table(fill = TRUE) %>%
as.data.frame()
# Set column names and clean player names
colnames(table) <- table[1, ]
table <- table[-1, ]
table$PLAYER <- gsub("Video | Scouts Report", "", table$PLAYER)
table$PLAYER <- sub("\\|$", "", table$PLAYER)
# Store the table in the list
tables[[as.character(year)]] <- table
}
high_school <- do.call(rbind, tables) %>%
ungroup() %>%
as.data.frame() %>%
select(`RK`,
PLAYER,
POS,
HOMETOWN,
HT,
WT,
GRADE,
SCHOOL)
high_school$HOMETOWN <- gsub("([A-Z]{2})(.*)", "\\1,\\2", high_school$HOMETOWN)
high_school_00_24 <- high_school %>%
separate(HT,c("feet", "inches"), "'", convert = TRUE) %>%
mutate_at(vars(feet), as.numeric) %>%
mutate(Height = (feet * 12) + inches) %>%
mutate_at(vars(`RK`,`GRADE`,`WT`,), as.numeric) %>%
separate(HOMETOWN, into = c("City", "State", "High School"), sep = ",", remove = FALSE) %>%
rename(
Rank = RK,
Player = PLAYER,
Position = POS,
Weight = WT,
Score = GRADE
) %>%
ungroup() %>%
select(
Rank,
Player,
Score,
Position,
Height,
Weight,
City,
State,
`High School`
)
high_school_00_24 %>%
reactable( sortable = TRUE,
searchable = TRUE,
theme = espn(),
pageSizeOptions = 25) %>%
add_title("High School Rankings, '07 to '24")