This R markdown shall show the author’s progression on his skills and learning of R.
A players table from basketball-reference.com with stats per 100 possessions across the NBA 2024-25 regular season.
#install.packages("rvest")
library(rvest)
## Warning: package 'rvest' was built under R version 4.4.3
url <- "https://www.basketball-reference.com/leagues/NBA_2025_per_poss.html#per_poss::17"
document <- read_html(url)
mainTable <- document %>% html_element("table") %>% html_table()
head(mainTable)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
mainTable <- mainTable %>%
arrange(Player, Team) %>%
distinct(Player, .keep_all = TRUE) %>%
filter(Player != "League Average")
colnames(mainTable)[colnames(mainTable) == "eFG%"] <- "eFG"
#write.csv(mainTable, "mainTable2.csv")
minG <- 65
minMP <- 1500
minFGA <- 15
topN <- 5
library(dplyr)
crop_df <- mainTable %>%
select(Player, Team, Pos, G, MP, FGA, eFG) %>%
transform(G = as.double(G), MP = as.double(MP)) %>%
#mainTable[c("Player","Team","Pos", "G", "MP", "FGA","eFG")] %>%
#filter(G >= minG) %>%
filter(MP >= minMP, FGA >= minFGA)
top_df <- slice_max(crop_df, by = Pos,
order_by = eFG, n = topN, with_ties = FALSE)
bottom_df <- slice_min(crop_df, by = Pos,
order_by = eFG, n = topN, with_ties = FALSE)
positionsOrder <- c("PG", "SG", "SF", "PF", "C")
result_df <- union(top_df, bottom_df) %>%
arrange(match(Pos, positionsOrder), desc(eFG))
mean_df <- crop_df %>%
group_by(Pos) %>%
summarise(mean(eFG))
#league average per position with filter - reference line
#include lookup col (TS%) from another df
head(result_df)
library(ggplot2)
ggplot(result_df, aes(x=eFG, y=reorder(Player, FGA), colour=MP))+
geom_point(aes(size=FGA))+
geom_segment(aes(x=0.550, xend=eFG, y=Player, yend=Player))+
scale_x_continuous(labels = scales::label_number(accuracy = 0.001))+
facet_grid(factor(Pos, levels=positionsOrder)~., scales="free_y")+
labs(title=paste("Top and bottom",topN,"players in eFG% by position"),
subtitle=paste("Min",minFGA, "FGA &",minMP, "minutes played in the NBA 2024-25 regular season"),
caption="Source: basketball-reference.com",
y="Player (Descending FGA)",
x="eFG% (Positional Average Dashed)")+
geom_vline(aes(xintercept=.data[["mean(eFG)"]]), mean_df, linetype=5)