library(rvest)league_per_game_url <-"https://www.basketball-reference.com/leagues/NBA_stats_per_game.html"league_per_game_url_page <-read_html(league_per_game_url)nba_season_history <-html_table(league_per_game_url_page)[[1]]# Create a vector of new column names that match the first row valuesnew_names <-as.character(nba_season_history[1,])# Use colnames() function to assign new column names to old column namescolnames(nba_season_history) <- new_names# Remove the first row using slice()nba_season_history <- nba_season_history |>slice(-1)nba_season_history <- nba_season_history |>filter(!is.na(as.numeric(Rk))) |>mutate(across(6:32, as.numeric))#mutate the HT column to inchesnba_season_history <- nba_season_history |>mutate(Ht =sapply(lapply(strsplit(Ht, "-"), as.numeric), function(x) x[1] *12+ x[2]))# Replace TOV values below 2% with NAnba_season_history <- nba_season_history |>mutate(TOV =if_else(TOV <2, NA_real_, TOV))# Filter 'nba_season_history' to seasons with `Pace` of NAnba_season_history_pre_1973 <- nba_season_history |>filter(is.na(Pace))nba_season_history_selected <- nba_season_history |>select(Season, PTS,"3PA","FGA","FTA", AST,"3P%","FT%","eFG%", Pace, ORtg, Wt, Ht, TRB, STL, BLK, TOV,) |>rename(Points=PTS,"3 Point Attempts"="3PA","Field Goal Attempts"=FGA,"Free Throw Attempts"=FTA,"Assists"=AST,"3 Point %"="3P%","Free Throw %"="FT%","Effective Field Goal %"="eFG%","Pace"=Pace,"Points Per 100 Poss."=ORtg,"Weight (lbs)"=Wt,"Height (Inches)"=Ht,"Rebounds"=TRB,"Steals"=STL,"Blocks"=BLK,"Turnovers"=TOV,) |>pivot_longer(cols=2:17,names_to ="Stat",values_to ="Value")#convert Season from factor to numericnba_season_history_selected$Season <-as.numeric(gsub("-.*", "", nba_season_history_selected$Season))#convert Season from numeric to datenba_season_history_selected$Season <-as.Date(paste0(nba_season_history_selected$Season,"-10-01"))#load lubridate packagelibrary(lubridate)#add one year to every row under seasonnba_season_history_selected$Season <- nba_season_history_selected$Season +years(1)#save as csv filewrite.csv(nba_season_history_selected, here("output_data", "nba_season_history_selected.csv"), row.names =FALSE)#time series line graph with facetsggplot(nba_season_history_selected,aes(x = Season,y = Value)) +geom_line() +facet_wrap(~Stat, scales="free_y") +geom_rect(aes(xmin =as.Date("1984-10-01"), xmax =as.Date("1993-07-01"), ymin =-Inf, ymax =Inf), fill ="red", alpha =0.00199) +geom_rect(aes(xmin =as.Date("1995-03-19"), xmax =as.Date("1998-07-01"), ymin =-Inf, ymax =Inf), fill ="red", alpha =0.00199) +geom_rect(aes(xmin =as.Date("2001-10-01"), xmax =as.Date("2003-07-01"), ymin =-Inf, ymax =Inf), fill ="red", alpha =0.00199) +geom_rect(aes(xmin =as.Date("2003-10-01"), xmax =as.Date("2024-07-01"), ymin =-Inf, ymax =Inf), fill ="purple", alpha =0.00199) +geom_rect(aes(xmin =as.Date("1969-10-01"), xmax =as.Date("1989-07-01"), ymin =-Inf, ymax =Inf), fill ="gold", alpha =0.00199) +geom_rect(aes(xmin =as.Date("1956-10-01"), xmax =as.Date("1969-07-01"), ymin =-Inf, ymax =Inf), fill ="green", alpha =0.00199) +labs(title ="NBA League Averages Per Game History, with GOAT Evolution",subtitle ="Green = Russell; Gold = Kareem; Red = MJ; Purple = LeBron",x ="Season",caption=paste("Sources: Basketball-Reference.com.\nnickwarino.com, created", as.character(current_date)))+My_Theme_WithY()
Code
library(plotly)p <-ggplot(nba_season_history_selected,aes(x = Season,y = Value)) +geom_line() +facet_wrap(~Stat, scales="free_y") +labs(title ="NBA League Averages Per Game History",x ="Season",caption=paste("Sources: Basketball-Reference.com.\nnickwarino.com, created", as.character(current_date)))# Use ggplotly() to make it interactivep <-ggplotly(p)
2 Playoff Clutch
Effective FG% in last 2 minutes of regulation, score within 5 points
Code
clutch_url <-"https://stathead.com/tiny/oQ2cW"clutch_url_page <-read_html(clutch_url)clutch_nba_history <-html_table(clutch_url_page)[[1]]clutch_nba_history <- clutch_nba_history |>filter(!is.na(as.numeric(Rk))) |>mutate(across(3:14, as.numeric)) |>mutate(across(3:14, ~replace_na(., 0)))# Rename the %Ast'd columnclutch_nba_history <- clutch_nba_history |>rename(Ast_d_perc =`%Ast'd`) |>rename(Ast_d_tot =`Ast'd`) |>rename(eFG =`eFG%`)weighted_average_eFG <- clutch_nba_history |>summarize(weighted_avg_eFG =sum(eFG * FGA) /sum(FGA)) |>pull(weighted_avg_eFG) |>as.numeric()#save as csv file write.csv(clutch_nba_history, here("output_data", "clutch_nba_history.csv"), row.names =FALSE)ggplot(clutch_nba_history,aes(x = FGA,y = eFG)) +geom_point(aes(size = Ast_d_tot, fill = Ast_d_perc), alpha =0.7, shape =21, stroke = .5) +geom_text_repel(aes(label = Player), size =5, color ="black", max.overlaps =5) +scale_fill_gradient2(low ="blue", mid ="white", high ="red",midpoint =median(clutch_nba_history$Ast_d_perc, na.rm =TRUE)) +geom_hline(yintercept = weighted_average_eFG, linetype ="dashed", color ="red", linewidth = .5) +labs(title ="Most Clutch NBA Players Ever in Playoffs (since 1996)",subtitle ="Effective FG% in last 2 minutes of regulation, score within 5 points.\nUniverse of players limited to top 100 players of FGA in same situation",caption=paste("Sources: Basketball-Reference.com.\nnickwarino.com, created", as.character(current_date)),x ="FGA",y ="Effective FG%",size ="Number of FGA assisted",fill ="% of FGA assisted") +theme(legend.position =c(.9, .9)) +My_Theme_WithY()
2.1 Super Clutch Situations
Effective FG% in last 1 minutes of regulation, shot to tie or take lead.
Code
clutch2_url <-"https://stathead.com/tiny/79Hlz"clutch2_url_page <-read_html(clutch2_url)clutch2_nba_history <-html_table(clutch2_url_page)[[1]]clutch2_nba_history <- clutch2_nba_history |>filter(!is.na(as.numeric(Rk))) |>mutate(across(3:14, as.numeric)) |>mutate(across(3:14, ~replace_na(., 0)))# Rename the %Ast'd columnclutch2_nba_history <- clutch2_nba_history |>rename(Ast_d_perc =`%Ast'd`) |>rename(Ast_d_tot =`Ast'd`) |>rename(eFG =`eFG%`)weighted_average_eFG2 <- clutch2_nba_history |>summarize(weighted_avg_eFG =sum(eFG * FGA) /sum(FGA)) |>pull(weighted_avg_eFG) |>as.numeric()#save as csv file write.csv(clutch2_nba_history, here("output_data", "clutch2_nba_history.csv"), row.names =FALSE)ggplot(clutch2_nba_history,aes(x = FGA,y = eFG)) +geom_point(aes(size = Ast_d_tot, fill = Ast_d_perc), alpha =0.7, shape =21, stroke = .5) +geom_text_repel(aes(label = Player), size =5, color ="black", max.overlaps =10) +scale_fill_gradient2(low ="blue", mid ="white", high ="red",midpoint =median(clutch2_nba_history$Ast_d_perc, na.rm =TRUE)) +geom_hline(yintercept = weighted_average_eFG2, linetype ="dashed", color ="red", linewidth = .5) +theme_minimal() +labs(title ="Most clutch NBA Players Ever in Playoffs (since 1996)",subtitle ="Effective FG% in last 1 minutes of regulation, shot to tie or take lead.\nUniverse of players limited to top 100 players of FGA in same situation",caption=paste("Sources: Basketball-Reference.com.\nnickwarino.com, created", as.character(current_date)),x ="FGA",y ="Effective FG%",size ="Number of FGA assisted",fill ="% of FGA assisted") +theme(legend.position =c(.9, .9)) +My_Theme_WithY()
2.2 Super Super Clutch Situations
Effective FG% in last 50 sec of regulation, shot to take lead.
Code
clutch3_url <-"https://stathead.com/tiny/A0FDq"clutch3_url_page <-read_html(clutch3_url)clutch3_nba_history <-html_table(clutch3_url_page)[[1]]clutch3_nba_history <- clutch3_nba_history |>filter(!is.na(as.numeric(Rk))) |>mutate(across(3:14, as.numeric)) |>mutate(across(3:14, ~replace_na(., 0)))# Rename the %Ast'd columnclutch3_nba_history <- clutch3_nba_history |>rename(Ast_d_perc =`%Ast'd`) |>rename(Ast_d_tot =`Ast'd`) |>rename(eFG =`eFG%`)weighted_average_eFG3 <- clutch3_nba_history |>summarize(weighted_avg_eFG =sum(eFG * FGA) /sum(FGA)) |>pull(weighted_avg_eFG) |>as.numeric()#save as csv file write.csv(clutch3_nba_history, here("output_data", "clutch3_nba_history.csv"), row.names =FALSE)ggplot(clutch3_nba_history,aes(x = FGA,y = eFG)) +geom_point(aes(size = Ast_d_tot, fill = Ast_d_perc), alpha =0.7, shape =21, stroke = .5) +geom_text_repel(aes(label = Player), size =5, color ="black", max.overlaps =10) +scale_fill_gradient2(low ="blue", mid ="white", high ="red",midpoint = .5) +geom_hline(yintercept = weighted_average_eFG3, linetype ="dashed", color ="red", linewidth = .5) +theme_minimal() +labs(title ="Most clutch NBA Players Ever in Playoffs (since 1996)",subtitle ="Effective FG% in last 50 sec of regulation, shot to take lead.\nUniverse of players limited to top 100 players of FGA in same situation",caption=paste("Sources: Basketball-Reference.com.\nnickwarino.com, created", as.character(current_date)),x ="FGA",y ="Effective FG%",size ="Number of FGA assisted",fill ="% of FGA assisted") +theme(legend.position =c(.9, .9)) +My_Theme_WithY()
3 GOAT PG
Playoff VORP by PG position
Code
pg_playoff_vorp_url <-"https://stathead.com/tiny/FeISc"pg_playoff_vorp_page <-read_html(pg_playoff_vorp_url)pg_playoff_vorp <-html_table(pg_playoff_vorp_page)[[1]]pg_playoff_vorp <- pg_playoff_vorp |>select(-3) |>mutate(across(3:26, ~replace_na(., 0)))# Rename the %Ast'd column# pg_playoff_vorp <- pg_playoff_vorp |># rename(Ast_d_perc = `%Ast'd`) |># rename(Ast_d_tot = `Ast'd`) |># rename(eFG = `eFG%`)#save as csv filewrite.csv(pg_playoff_vorp, here("output_data", "pg_playoff_vorp.csv"), row.names =FALSE)ggplot(pg_playoff_vorp,aes(x = VORP,y = BPM)) +geom_point(aes(size = VORP, fill = BPM), alpha =0.7, shape =21, stroke = .5) +geom_text_repel(aes(label = Player), size =5, color ="black", max.overlaps =5) +scale_fill_gradient2(low ="blue", mid ="white", high ="red",midpoint =median(pg_playoff_vorp$BPM, na.rm =TRUE)) +theme_minimal() +labs(title ="LeBron could've been GOAT Point Guard, But Mostly Played Foward Positions",subtitle ="VORP and BPM in Playoffs While Playing PG Position.\nUniverse of players limited to top 100 players of Playoff VORP while playing PG",caption=paste("Sources: Basketball-Reference.com.\nnickwarino.com, created", as.character(current_date)),x ="Playoff VORP",y ="Playoff BPM",size ="Playoff VORP",fill ="Playoff BPM") +theme(legend.position =c(.9, .9)) +My_Theme_WithY()
4 Best Defenders
Code
# Function to extract table from URLextract_table_from_url <-function(url) { page <-read_html(url) table <-html_table(page)[[1]]return(table)}# Extract tablesblockper1 <-extract_table_from_url("https://stathead.com/tiny/tdXLR")blockper2 <-extract_table_from_url("https://stathead.com/tiny/hBlZm")stealper1 <-extract_table_from_url("https://stathead.com/tiny/VWeQT")stealper2 <-extract_table_from_url("https://stathead.com/tiny/krA65")drebper1 <-extract_table_from_url("https://stathead.com/tiny/MxxJY")drebper2 <-extract_table_from_url("https://stathead.com/tiny/JhBJx")# Clean tables. Select `Player`, `BLK%`, `STL%`, `REB%`, MP, GS, DBPM, OBPM, BPM, VORP, Pos columns# Function to remove duplicate columns and select specific columnsclean_and_select <-function(df, columns_to_select) {# Remove duplicate columns df <- df[, !duplicated(names(df))]# Select specific columns df |>select(all_of(columns_to_select))}# Columns to selectselected_columns <-c("Player", "BLK%", "STL%", "DRB%", "MP", "GS", "DBPM", "OBPM", "BPM", "VORP", "Pos")# Apply the function to each dataframeblockper1_clean <-clean_and_select(blockper1, selected_columns)blockper2_clean <-clean_and_select(blockper2, selected_columns)stealper1_clean <-clean_and_select(stealper1, selected_columns)stealper2_clean <-clean_and_select(stealper2, selected_columns)drebper1_clean <-clean_and_select(drebper1, selected_columns)drebper2_clean <-clean_and_select(drebper2, selected_columns)# Combine tables. First combine blocks, steal, and dreb, then join by `Player`# Combine blocks, steals, and drebblockper <- blockper1_clean |>bind_rows(blockper2_clean)stealper <- stealper1_clean |>bind_rows(stealper2_clean)drebper <- drebper1_clean |>bind_rows(drebper2_clean)# bind rows for blocks steals and drebcombined_def <- blockper |>bind_rows(stealper, drebper) |># remove duplicate playersdistinct()# Join by `Player`, keep all observationscombined_def <- combined_def |>select("Player", "BLK%", "STL%", "DRB%", "MP", "GS", "DBPM", "OBPM", "BPM", "VORP", "Pos") |>mutate("Stocks%"=`BLK%`+`STL%`) |>mutate("StockDRB%"=`BLK%`+`STL%`+`DRB%`)# gets labels of Michael Jordan and LeBron James to add to graphmj_lbj <- combined_def |>filter(Player %in%c("Michael Jordan", "LeBron James"))combined_def |>ggplot(aes(x =`BLK%`, y =`STL%`, fill = DBPM, size =`DRB%`)) +geom_point(shape =21, alpha = .9) +scale_fill_gradient2(low ="blue", mid ="white", high ="red", midpoint =0) +scale_size_continuous(range =c(.1, 7)) +# add labels for Michael Jordan and LeBron Jamesgeom_text_repel(data = mj_lbj, aes(label =paste(Player)), size =5, max.overlaps =10) +My_Theme_WithY() + ggrepel::geom_text_repel(aes(label =paste(Player)),size =5, max.overlaps =10) +labs(title ="Best Defenders: Block and Steal %",subtitle ="X = BLK%, Y = STL%, Size = DRB%, Fill = DBPM.\nUniverse of players limited to combined top 400 players for Block %, Steal %, or DREB%",caption=paste("Sources: Basketball-Reference.com.\nnickwarino.com, created", as.character(current_date)),x ="Block Percentage", y ="Steal Percentage",fill ="Defensive Box Plus-Minus")
Code
combined_def |>ggplot(aes(x =`Stocks%`, y =`DRB%`, fill = DBPM, size =`MP`)) +geom_point(shape =21, alpha = .9) +scale_fill_gradient2(low ="blue", mid ="white", high ="red", midpoint =0) +scale_size_continuous(range =c(.1, 7)) +geom_text_repel(data = mj_lbj, aes(label =paste(Player)), size =5, max.overlaps =10) +My_Theme_WithY() + ggrepel::geom_text_repel(aes(label =paste(Player)),size =5, max.overlaps =10) +labs(title ="Best Defenders: Stocks % and DREB %",subtitle ="X = BLK%, Y = STL%, Size = MP, Fill = DBPM.\nUniverse of players limited to combined top 400 players for Block %, Steal %, or DREB%",caption=paste("Sources: Basketball-Reference.com.\nnickwarino.com, created", as.character(current_date)),x ="Stocks Percentage", y ="Defensive Rebound Percentage",fill ="Defensive Box Plus-Minus")
Wemby already unreal on defense. Unlike the two other outliers (Drummond and Whiteside), Wemby isn’t a stiff while defending on the perimeter, and seems much smarter overall on defense (i.e. doesn’t chase blocks and easily fall for pumpfakes). And obviously he’s a much better shooter, passer, and playmaker on offense (also indicating more skill and spatial awareness, which is good for team defense).
I think he’ll go down as GOAT defender. Something to track.
Code
# # Horizontal bar graph by DBPMcombined_def |>arrange(desc(DBPM)) |>head(50) |>ggplot(aes(x=reorder(Player, DBPM), y=DBPM)) +geom_segment(aes(x=reorder(Player, DBPM), xend=reorder(Player, DBPM), y=0, yend=DBPM), size=1, color="black") +geom_point(color="black", size=10) +geom_text(aes(label=sprintf("%.1f", DBPM)), color="white") +# Formatting label to 1 decimal placecoord_flip() +theme_minimal() +labs(title="Top 50 Career DBPM", subtitle="Ranking players by Career Regular Season.\nUniverse of players limited to combined top 400 players for Block %, Steal %, or DREB%",caption=paste("Sources: Basketball-Reference.com.\nnickwarino.com, created", as.character(current_date)),x="Player",y="DBPM") +My_Theme_WithY()
5 Greatest Games Ever
Code
# Download data from BBall Ref# Function to process the dataprocess_data <-function(url, bpm =TRUE) { page <-read_html(url) data <-html_table(page)[[1]]if (bpm) { data <- data |>rename("BPM2"=3, "MP2"=12, Road =8) |>select(-c(1, 3, 12)) } else { data <- data |>rename("GS2"=3, Road =7) |>select(-c(1, 3)) }return(data)}# URLs for BPM and GS databpm_rs_urls <-c("https://stathead.com/tiny/x9frE","https://stathead.com/tiny/9bRhn","https://stathead.com/tiny/rWFnt","https://stathead.com/tiny/zSeTH","https://stathead.com/tiny/gYZEl")gs_rs_urls <-c("https://stathead.com/tiny/mqR2d","https://stathead.com/tiny/C3djd","https://stathead.com/tiny/KE9qJ","https://stathead.com/tiny/tghKu","https://stathead.com/tiny/gxAjF")# Processing the datagreatest_games_bpm_rs_list <-lapply(bpm_rs_urls, process_data, bpm =TRUE)greatest_games_gs_rs_list <-lapply(gs_rs_urls, process_data, bpm =FALSE)greatest_games_bpm_combined <-bind_rows(greatest_games_bpm_rs_list)greatest_games_gs_combined <-bind_rows(greatest_games_gs_rs_list)
Code
#Combine both datasets by rowsgreatest_games <- greatest_games_bpm_combined |>bind_rows(greatest_games_gs_combined)# Remove duplicate rows by Player and Dategreatest_games <- greatest_games |>distinct(Player, Date, .keep_all =TRUE)# Write CSVwrite_csv(greatest_games, "greatest_games.csv")# Create Colored Bubble Plot in GGPlot with GmSc on X, BPM/100*MP on Y, size of bubble is MP, colore is "+/-"greatest_games <- greatest_games |>filter(!is.na(`GmSc`)) |>filter(!is.na(`BPM`)) |>mutate(GameBPM = BPM/25*MP)# Filter all LeBron gameslebron_greatest_games <- greatest_games |>filter(Player =="LeBron James")greatest_games |>ggplot(aes(x = GmSc, y = BPM, fill = PTS)) +geom_point(shape =21, alpha = .9, size =3) +scale_fill_gradient2(low ="blue", mid ="white", high ="red", midpoint =50) +scale_x_continuous(limits =c(40, 70)) +theme(legend.position =c(.9, .9)) +My_Theme_WithY() +# add ggrepel labels of `Player` and `Date` to the plot ggrepel::geom_text_repel(aes(label =paste(Player, Date)), size =5, nudge_x =1, nudge_y =1, max.overlaps =5) +# add lebron james labels to the plotgeom_text_repel(data = lebron_greatest_games, aes(label =paste(Player, Date)), size =5, nudge_x =-1, nudge_y =1, max.overlaps =5) +labs(title ="Greatest Single-Game Individual Performances Since 1972",subtitle ="Single-Game BPM and Game Score.\nUniverse of players limited to top 1000 Game Score games.",caption=paste("Sources: Basketball-Reference.com.\nnickwarino.com, created", as.character(current_date)),x ="Game Score",y ="Box Score Plus-Minus",fill ="Points Scored")
6 Greatest Team Offense Ever
Adjust for league offensive rating
Greatest Team Offenses Since 1972
Team
Season
ORtg+
ORtg
SRS
DAL
2003-04
1.089
112.1
4.86
PHO
2004-05
1.079
114.5
7.08
GSW
2015-16
1.076
114.5
10.38
DAL
2001-02
1.074
112.2
4.41
UTA
1997-98
1.073
112.7
5.73
PHO
2009-10
1.072
115.3
4.67
CHI
1996-97
1.072
114.4
10.70
SAC
2003-04
1.072
110.3
5.41
CHI
1995-96
1.071
115.2
11.80
BOS
1987-88
1.069
115.4
6.15
DEN
1981-82
1.069
114.3
0.13
PHO
2006-07
1.069
113.9
7.28
DAL
2002-03
1.069
110.7
7.90
LAL
1986-87
1.067
115.6
8.32
CHI
1991-92
1.067
115.5
10.07
LAL
1997-98
1.066
111.9
6.88
UTA
1996-97
1.065
113.6
7.97
LAC
2014-15
1.064
112.4
6.80
IND
1998-99
1.064
108.7
3.86
ORL
1994-95
1.063
115.1
6.44
7 NBA Goat
Code
# Function to extract table from URLextract_table_from_url <-function(url) { page <-read_html(url) table <-html_table(page)[[1]]return(table)}# Extract tablesr_ws_url <-c("https://stathead.com/tiny/QKiYH","https://stathead.com/tiny/RYDCj","https://stathead.com/tiny/0yUoP","https://stathead.com/tiny/NUtGM","https://stathead.com/tiny/A5XHn")p_ws_url <-c("https://stathead.com/tiny/ptbsk","https://stathead.com/tiny/EadMu","https://stathead.com/tiny/UJA6r","https://stathead.com/tiny/PWfx6","https://stathead.com/tiny/tiEyh")# Processing the datar_ws_list <-lapply(r_ws_url, extract_table_from_url)# combine them into single dataframesr_ws <-bind_rows(r_ws_list)# Processing the datap_ws_list <-lapply(p_ws_url, extract_table_from_url)# combine them into single dataframesp_ws <-bind_rows(p_ws_list)
# Bubble Graphggplot(goat_data,aes(x=Rate_Z,y=Total_Z))+geom_point(alpha=0.7, size=5, shape=21, aes(fill=GOAT_Score)) +scale_fill_gradient2(midpoint=0, low="blue", mid="white",high="red", space ="Lab", name="GOAT Score") +scale_size(range =c(2, 24)) +geom_text_repel(aes(label=Player), size=5, color="black", max.overlaps =15) +theme_minimal()+labs(title="GOAT NBA Players", subtitle="Ranking players by value per possession and total career value (playoffs and regular season). BPM and VORP used for post-1974 players, WS and WS/48 used for pre-1974 players.\nUniverse of players limited to unique 1200 players listed in top 1000 list for regular season or playoff WS",caption=paste("Sources: Basketball-Reference.com.\nnickwarino.com, created", as.character(current_date)),x="Value Per Possession in Regular Season and Playoffs",y="Value (Total) in Regular Season and Playoffs",color="GOAT Scores (Mean of Rate and Total Z-Scores") +My_Theme_WithY()
Code
goat_data |>mutate(Rank_Player =paste(Rank, ". ", Player, sep="")) |>head(100) |>ggplot(aes(x=reorder(Rank_Player, GOAT_Score), y=GOAT_Score)) +geom_segment(aes(x=Rank_Player, xend=Rank_Player, y=0, yend=GOAT_Score), size=1, color="black") +geom_point(aes(color=if_else(To_YearEnd_r ==2024, "2024", "Other")), size=10) +scale_color_manual(values =c("2024"="red", "Other"="black")) +geom_text(aes(label=sprintf("%.1f", GOAT_Score)), color="white") +# Formatting label to 1 decimal placecoord_flip() +labs(title="Top 100 NBA Players", subtitle="Ranking players by value per possession and total career value (playoffs and regular season). BPM and VORP used for post-1974 \nplayers, WS and WS/48 used for pre-1974 players. Universe of players limited to unique 1200 players listed in top 1000 list \nfor regular season or playoff WS",caption=paste("Sources: Basketball-Reference.com.\nnickwarino.com, created", as.character(current_date)),x="Player",y="GOAT Score (Mean of Rate and Total Z-Scores)") +My_Theme_WithY() +theme(legend.position ="none")
8 Wemby and Chet Tracker: AKA Greatest Rookies Ever
Code
# Function to extract table from URLextract_table_from_url <-function(url) { page <-read_html(url) table <-html_table(page)[[1]]return(table)}# Extract tablesrookies_1974_2024_tables <-c("https://stathead.com/tiny/skxFd","https://stathead.com/tiny/3BQme","https://stathead.com/tiny/CpSrM","https://stathead.com/tiny/NwDZB","https://stathead.com/tiny/YC5jw","https://stathead.com/tiny/UCpvt","https://stathead.com/tiny/xSwSt","https://stathead.com/tiny/f02hE","https://stathead.com/tiny/COE1s","https://stathead.com/tiny/XyVKE","https://stathead.com/tiny/kiFnl","https://stathead.com/tiny/UBgqZ","https://stathead.com/tiny/NpXJY","https://stathead.com/tiny/aVbhc") # min 100 min
Code
# Processing the datarookies_1974_2024 <-lapply(rookies_1974_2024_tables, extract_table_from_url)# combine them into single dataframesrookies_1974_2024 <-bind_rows(rookies_1974_2024)# Clean up the datarookies_1974_2024 <- rookies_1974_2024 |>select(c(-3)) |>rename(VORP ="VORP...20") |>separate(Season, into =c("start_year", "Season"), sep ="-") |>mutate(Season =paste0(substr(start_year, 1, 2), Season),Season =if_else(Season =="1900", "2000", Season),Season =year(ymd(paste0(Season, "-01-01"))))# Get average games playedavg_games_played_url <-read_html("https://www.basketball-reference.com/leagues/NBA_2024.html#per_game-team")avg_games_played <-html_table(avg_games_played_url)[[5]]# Filter only last row, select column 3avg_games_played <- avg_games_played |>filter(Team =="League Average") |>select(3) |># get data as numerical "Values"pull()# Prorate VORP for Season 2024 based on remaining games prorate <-82/avg_games_playedrookies_1974_2024 <- rookies_1974_2024 |>mutate(VORP_prorated =if_else(Season ==2024, VORP * prorate, VORP)) |># round to 1 decimalmutate(VORP_prorated =round(VORP_prorated, 1))
Code
# Bubble Graph# Highlight Player "Victor Wembenyama" on bubble graphs (geom_point is filled red)rookies_1974_2024 <- rookies_1974_2024 |>mutate(highlight =if_else(Player %in%c("Victor Wembanyama", "Chet Holmgren", "Scoot Henderson"), "yes", "no"))ggplot(rookies_1974_2024, aes(x=BPM, y=VORP_prorated)) +geom_point(alpha=0.7, shape=21, size=3, aes(color=highlight, fill=highlight)) +scale_color_manual(values =c("yes"="black", "no"="black")) +# Ensure borders are black for allscale_fill_manual(values =c("yes"="red", "no"="black")) +# Highlight fill color for Victor Wembanyamageom_text_repel(data =subset(rookies_1974_2024, highlight =="yes"), # Filter for highlighted playeraes(label=Player), size=5, color="black",nudge_x =1, nudge_y =1,box.padding =0.35, # Slight padding for better visibilitypoint.padding =0.2, # Adjusted for optimal positioningmax.overlaps =5) +geom_text_repel(aes(label=Player), size=5, color="black",nudge_x =0, nudge_y =0,box.padding =0,point.padding =0,max.overlaps =3) +labs(title="NBA Rookies by BPM and VORP (BPM by minutes played) (1974-2024)",subtitle="Ranking NBA rookies by value per 100 possessions (BPM) and total value (VORP), 2024 rookies VORP prorated to full season.\nUniverse of players limited to all rookies since 1974 with at least 100 minutes played",caption=paste("Sources: Basketball-Reference.com.\nnickwarino.com, created", as.character(current_date)),x="Value per 100 possessions (BPM)",y="Total Value (VORP)") +guides(color=FALSE, fill=FALSE) +# Hide the legend for 'highlight'My_Theme_WithY()
Code
rookies_age_less_21 <- rookies_1974_2024 |>filter(Age<21)ggplot(rookies_age_less_21, aes(x=BPM, y=VORP_prorated)) +geom_point(alpha=0.7, shape=21, size=3, aes(color=highlight, fill=highlight)) +scale_color_manual(values =c("yes"="black", "no"="black")) +# Ensure borders are black for allscale_fill_manual(values =c("yes"="red", "no"="black")) +# Highlight fill color for Victor Wembanyamageom_text_repel(aes(label=Player), size=5, color="black",nudge_x =0, nudge_y =0,box.padding =0,point.padding =0,max.overlaps =10) +labs(title="NBA Rookies (20 and younger) by BPM and VORP (BPM by minutes played) (2024)",subtitle="Ranking NBA rookies by value per 100 possessions (BPM) and total value (VORP), 2024 rookies VORP prorated to full season.\nUniverse of players limited to all rookies since 1974 age 20 and younger with at least 100 minutes played",caption=paste("Sources: Basketball-Reference.com.\nnickwarino.com, created", as.character(current_date)),x="Value per 100 possessions (BPM)",y="Total Value (VORP)") +guides(color=FALSE, fill=FALSE) +# Hide the legend for 'highlight'My_Theme_WithY()
Code
# Highlight Player "Victor Wembenyama" on bubble graphs (geom_point is filled red)rookies_2024 <- rookies_1974_2024 |>mutate(highlight =if_else(Player %in%c("Victor Wembanyama", "Chet Holmgren", "Scoot Henderson"), "yes", "no")) |>filter(Season ==2024)ggplot(rookies_2024, aes(x=BPM, y=VORP_prorated)) +geom_point(alpha=0.7, shape=21, size=3, aes(color=highlight, fill=highlight)) +scale_color_manual(values =c("yes"="black", "no"="black")) +# Ensure borders are black for allscale_fill_manual(values =c("yes"="red", "no"="black")) +# Highlight fill color for Victor Wembanyamageom_text_repel(aes(label=Player), size=5, color="black",nudge_x =0, nudge_y =0,box.padding =0,point.padding =0,max.overlaps =10) +labs(title="NBA Rookies by BPM, VORP, and Minutes Played (2024)",subtitle="Ranking 2024 NBA rookies by value per 100 possessions (BPM) and total value (VORP), 2024 rookies VORP prorated to full season.\nUniverse of players limited to all rookies since 1974 age 20 and younger with at least 100 minutes played",caption=paste("Sources: Basketball-Reference.com.\nnickwarino.com, created", as.character(current_date)),x="Value per 100 possessions (BPM)",y="Total Value (VORP)") +guides(color=FALSE, fill=FALSE) +# Hide the legend for 'highlight'My_Theme_WithY()
Code
ggplot(rookies_1974_2024, aes(x=BPM, y=VORP_prorated)) +geom_point(alpha=0.7, shape=21, size=3, aes(color=highlight, fill=highlight)) +scale_color_manual(values =c("yes"="black", "no"="black")) +# Ensure borders are black for allscale_fill_manual(values =c("yes"="red", "no"="black")) +# Highlight fill color for Victor Wembanyamageom_text_repel(data =subset(rookies_1974_2024, highlight =="yes"), # Filter for highlighted playeraes(label=Player), size=5, color="black",nudge_x =1, nudge_y =1,box.padding =0.35, # Slight padding for better visibilitypoint.padding =0.2, # Adjusted for optimal positioningmax.overlaps =5) +geom_text_repel(aes(label=Player), size=5, color="black",nudge_x =0, nudge_y =0,box.padding =0,point.padding =0,max.overlaps =5) +labs(title="NBA Rookies by BPM, VORP, and Minutes Played (1974-2024), by Age",subtitle="Ranking NBA rookies by value per 100 possessions (BPM) and total value (VORP), 2024 rookies VORP prorated to full season.\nUniverse of players limited to all rookies since 1974 age 20 and younger with at least 100 minutes played",caption=paste("Sources: Basketball-Reference.com.\nnickwarino.com, created", as.character(current_date)),x="Value per 100 possessions (BPM)",y="Total Value (VORP)") +guides(color=FALSE, fill=FALSE) +# Hide the legend for 'highlight'facet_wrap(~Age, ncol=3) +# Facet by 'Age', adjust 'ncol' as needed for layoutMy_Theme_WithY()
Code
# For rookies, calculate Z scores for age, BPM, and VORP_prorated. Age Z score should be reverse scale (lower age scores higher)rookies_1974_2024 <- rookies_1974_2024 |>mutate(Z_Age =scale(-Age), # Reverse scale for AgeZ_BPM =scale(BPM),Z_VORP_prorated =scale(VORP_prorated))# For rookies, create "GOAT Rookie Score", which is mean of all 3 Z scores.rookies_1974_2024 <- rookies_1974_2024 |>mutate(GOAT_Rookie_Score =rowMeans(select(rookies_1974_2024, starts_with("Z_"))))rookies_1974_2024 <- rookies_1974_2024 |>arrange(desc(GOAT_Rookie_Score)) |>mutate(Rank =rank(-GOAT_Rookie_Score, ties.method ="min"), # Calculate rankSeason_2024 =if_else(Season ==2024, TRUE, FALSE)) # Identify 2024 players
Code
rookies_1974_2024 |>mutate(Rank_Player =paste(Rank, ". ", Player, sep="")) |>head(50) |>ggplot(aes(x=reorder(Rank_Player, GOAT_Rookie_Score), y=GOAT_Rookie_Score)) +geom_segment(aes(x=Rank_Player, xend=Rank_Player, y=0, yend=GOAT_Rookie_Score), size=1, color="black") +geom_point(aes(color=if_else(Season ==2024, "2024", "Other")), size=10) +scale_color_manual(values =c("2024"="red", "Other"="black")) +geom_text(aes(label=sprintf("%.1f", GOAT_Rookie_Score)), color="white") +# Formatting label to 1 decimal placecoord_flip() +theme_minimal() +labs(title="Top 50 GOAT NBA Rookies (1974-2024) by `GOAT Rookie Score`", subtitle="GOAT Rookie Score calculated as mean of Z-Scores for Age, BPM, and VORP pro-rated to full season. 2024 rookies highlighted in red.",caption=paste("Sources: Basketball-Reference.com.\nnickwarino.com, created", as.character(current_date)),x="GOAT Rookie Score (mean Z-Scores for Age, BPM, and VORP pro-rated)",y="") +# Removing y-axis label as the names will serve as labelsguides(color=FALSE) +# Remove color legendMy_Theme_WithY()
Code
ggplot(rookies_1974_2024, aes(x=Age, y=VORP_prorated, size=highlight)) +geom_point(alpha=0.4, shape=21, aes(color=highlight, fill=highlight)) +scale_color_manual(values =c("yes"="black", "no"="black")) +# Ensure borders are black for allscale_fill_manual(values =c("yes"="red", "no"="black")) +# Highlight fill color for Victor Wembanyamascale_size_manual(values =c("yes"=4, "no"=2)) +# Adjusted sizegeom_text_repel(aes(label=Player), size=5, color="black",nudge_x =0, nudge_y =0,box.padding =0,point.padding =0,max.overlaps =3) +geom_text_repel(data =subset(rookies_1974_2024, highlight =="yes"), # Filter for highlighted playeraes(label=Player), size=5, color="black",nudge_x =0, nudge_y =0,box.padding =0, # Slight padding for better visibilitypoint.padding =0, # Adjusted for optimal positioningmax.overlaps =5) +labs(title="NBA Rookies by Age and VORP",subtitle="Ranking NBA rookies by total value (VORP), 2024 rookies VORP prorated to full season.\nUniverse of players limited to all rookies since 1974 with at least 100 minutes played",caption=paste("Sources: Basketball-Reference.com.\nnickwarino.com, created", as.character(current_date)),x="Age",y="Total Value (VORP Prorated)") +guides(color=FALSE, fill=FALSE, size=FALSE) +# Hide the legend for 'highlight'My_Theme_WithY()