This code generates a visual representation of Kevin Durant’s career statistics, highlighting his performance across different seasons and teams. It combines data manipulation, visualization, and image handling to create informative and aesthetic charts that showcase his level of consistency. Inspired by data science consultant Bill Kapatsoulias.
# Load various R libraries required for data manipulation, visualization, and image handling.
library(plyr)
library(dplyr)
library(tidyverse)
library(rvest)
library(ggrepel)
library(readr)
library(RCurl)
library(prismatic)
library(extrafont)
library(jpeg)
library(png)
library(grid)
library(magrittr)
# Get player stats from source (basketball-reference)
# Add player name and player slug
player <- "Kevin Durant"
slug <- "duranke01"
# define player page URL and player image URL
url <- paste0("https://www.basketball-reference.com/players/d/duranke01", substr(slug, 1, 1), "/", slug, ".html")
image_url <- paste0("https://cdn.nba.com/headshots/nba/latest/1040x760/201142.png", slug, ".png")
# Clean and prepare the retrieved data for visualization, including calculating per-game averages.
# Define the URL
url <- "https://www.basketball-reference.com/players/d/duranke01.html"
# Read the HTML content
page <- read_html(url)
# Read total stats
total_stat <- url %>%
read_html %>%
html_node("#totals") %>%
html_table(fill = TRUE)
# Read advanced stats
adv_stat <- url %>%
read_html %>%
html_node("#advanced") %>%
html_table()
# Merge stats tables
total_stats <- merge(total_stat, adv_stat, by=c("Season","Age", "Tm", "Lg", "Pos", "G", "MP"))
View(total_stats)
# Get RAPTOR ratings (a robust algorithm for player tracking data while on the court. Included for visibility)
RAPTOR_hist <- read.csv("https://github.com/fivethirtyeight/data/raw/master/nba-raptor/historical_RAPTOR_by_player.csv?raw=true")
RAPTOR_mod <- read.csv("https://github.com/fivethirtyeight/data/raw/master/nba-raptor/modern_RAPTOR_by_player.csv")
RAPTOR <- rbind.fill(RAPTOR_hist, RAPTOR_mod)
RAPTOR <- unique(RAPTOR)
# Merge all stats data and remove duplicate rows with the "unique" function
total_stats$player_name <- player
total_stats$season <- paste0(substr(total_stats$Season,1,2),substr(total_stats$Season,6,7))
total_stats$season <- str_replace(total_stats$season, "1900", "2000")
all_stats <- merge(total_stats, RAPTOR, by=c("player_name", "season"))
all_stats <- unique(all_stats)
View(all_stats)
# Select stats
str(all_stats)
# use below section for basic stats when you want to use the average...the stats used can be any
main_stats <- c("PTS","TRB","AST","STL","BLK","GP"="G")
stats <- main_stats
df <- all_stats %>% select(Season, "Team"=Tm, unlist(stats))
df_avg <- df[,3:(ncol(df)-1)]/df$GP
df <- cbind(df[,1:2],df_avg)
# create the final data frame
df_stats <- df %>% gather(Stat_cat, Stat_val, 3:ncol(df))
# clean the data
df_stats <- dplyr::filter(df_stats, grepl('-', Season))
df_stats <- dplyr::filter(df_stats, !grepl('TOT', Team))
df_stats <- unique(df_stats)
df_stats <- merge(df_stats,data.frame("Season"=total_stats$Season,"GP"=total_stats$G), by="Season")
# For some reason, the 2022-23 season was omitted so added manually below, Durant's last season
# Create data frames for each row to be added
df_2022_23_1 <- data.frame(
Season = "2022-23",
Team = "BRK",
Stat_cat = c("PTS", "TRB", "AST", "STL", "BLK"),
Stat_val = c(26.2, 6.71, 5.31, 0.82, 1.46),
GP = 39
)
df_2022_23_2 <- data.frame(
Season = "2022-23",
Team = "PHO",
Stat_cat = c("PTS", "TRB", "AST", "STL", "BLK"),
Stat_val = c(26.0, 6.375, 3.5, 0.25, 1.25),
GP = 8
)
# Append the new rows to df_stats
df_stats <- rbind(df_stats, df_2022_23_1, df_2022_23_2)
# Creating the visual
library(dplyr)
library(ggrepel)
library(showtext)
## Loading required package: sysfonts
## Loading required package: showtextdb
##
## Attaching package: 'showtextdb'
## The following object is masked from 'package:extrafont':
##
## font_install
# Chart creation (Get columns and pivot wider)
p <- df_stats %>%
ggplot(aes(x = paste0(Season," ",Team,"\n ",GP, "GP"),
y = Stat_val,
label=Stat_cat)) +
# Add points & customize colors, apply the position_jitter function to avoid overlap with plot points
geom_point(position = position_jitter(width = 0.2),
aes(fill = Stat_cat),
size = 2,
color = 'black',
shape = 21) +
scale_fill_manual(values = c("PTS" = "#e56020", "TRB" = "purple3", "AST" = "#006bb6",
"STL" = "#fdb927", "BLK" = "#bec0c2"))
# Add lines
p <- p + geom_line(aes(group = Stat_cat), size=0.05)
# Add labels & bold them
p <- p + geom_text_repel(data = filter(df_stats, Season == last(Season)),
aes(label = Stat_cat),
size = 3,
box.padding = 0.5,
point.padding = 0.5,
force = 50,
segment.size = 0.2,
colour = "black",
segment.color = "grey50",
fontface = "bold",
hjust = 1, # Adjust this value to change the horizontal position of the labels
position = position_jitter(height = 0.2, width = 0.2)) + # Adjust height and width to change the position jitter+ # bold labels
# Bold labels
theme(text = element_text(face = "bold"))
# Edit axis
p <- p + scale_y_continuous(breaks = seq(0, ceiling(max(df_stats$Stat_val,na.rm = TRUE)), 5))
# Add title, subtitle and caption
p <- p + labs(title = paste0(player," Career Statistical Timeline"),
subtitle = paste0(min(df_stats$Season)," - ",max(df_stats$Season)),
x = "",
y = "",
caption = c(""))
# Add theme that removes the legend, modifies title, subtitle, captions, and x-axis
p <- p + theme(legend.position = 'none',
plot.title = element_text(size = 17, face="bold", hjust = .5),
plot.subtitle = element_text(face = 'italic', size = 13, hjust = .5),
axis.text.x=element_text(angle=60, hjust=1, size=10),
plot.caption = element_text(color = 'gray40'),
plot.margin = margin(10, 10, 15, 10))
## Add team changes lines
# Team line breaks
team_changes <- NULL
unique_combinations <- unique(paste0(df_stats$Season, df_stats$Team))
if (length(unique_combinations) > 0) {
i <- 1
while (i < length(unique_combinations)) {
team_change <- NULL
team_change$year <- i + 0.5
team_change$team1 <- substr(unique_combinations, 8, 11)[i]
team_change$team2 <- substr(unique_combinations, 8, 11)[i + 1]
team_change <- as.data.frame(team_change)
ifelse(team_change$team1 == team_change$team2, "", team_changes <- rbind(team_change, team_changes))
i <- i + 1
}
}
p <- p + geom_vline(xintercept=team_changes$year, colour="grey6")
# Create and add custom theme
theme_sweep <- function () {
theme(
# get rid of panel grids
panel.grid.major = element_line(size = (0.075), colour="grey"),
panel.grid.minor = element_blank(),
#panel.border = element_blank(),
# Change plot and panel background
plot.background = element_rect(fill = 'transparent', color = 'transparent'),
panel.background = element_rect(fill = "transparent", color = 'transparent'),
plot.title = element_text(size=18, hjust = 0.5),
plot.subtitle = element_text(size=13, hjust = 0.5),
plot.caption = element_text(size=8, hjust=c(1, 0))
)
}
# Add custom theme to the graph
p <- p + theme_sweep()
# Add theme that removes the legend, modifies title, subtitle, captions, and x-axis
p <- p + theme(legend.position = 'none',
plot.title = element_text(size = 17, face="bold", hjust = .5, family = "Bahnschrift"),
plot.subtitle = element_text(face = 'italic', size = 13, hjust = .5, family = "Bahnschrift"),
axis.text.x=element_text(angle=60, hjust=1, size=8, color = "black", face="bold"),
plot.caption = element_text(color = 'gray40'),
plot.margin = margin(10, 10, 10, 10))
## Coordinate system already present. Adding new coordinate system, which will
## replace the existing one.
# Now pulling data to inform on shooting numbers
str(all_stats)
# make a list of stats we care about
shooting_stats <- c("3P%", "FT%", "eFG%", "TS%", "USG%")
stats <- shooting_stats
# create the data frame for the chart, by using the Season, Team, and the vector of all stats
df <- total_stats %>% select(Season, "Team"=Tm, unlist(stats))
# multiply values by 100 to rescale
df$'3P%' <- df$'3P%'*100
df$'FT%' <- df$'FT%'*100
df$'eFG%' <- df$'eFG%'*100
df$'TS%' <- df$'TS%'*100
# create the final data frame
shooter_stats <- df %>% gather(Stat_cat, Stat_val, 3:ncol(df))
# clean the data
shooter_stats <- dplyr::filter(shooter_stats, grepl('-', Season))
shooter_stats <- dplyr::filter(shooter_stats, !grepl('TOT', Team))
shooter_stats <- unique(shooter_stats)
shooter_stats <- merge(shooter_stats,data.frame("Season"=total_stats$Season,"GP"=total_stats$G), by="Season")
# Remove duplicates and keep only the unique rows
shooter_stats <- shooter_stats %>%
distinct(Stat_val, .keep_all = TRUE)
# For rows with "PHO" in the "Team" column, set the "GP" to 8, since the data frame can get out of whack sometimes
shooter_stats <- shooter_stats %>%
mutate(GP = ifelse(Team == "PHO", 8, GP))
View(shooter_stats)
# Now publishing data related to the player's career shooting performance
library(dplyr)
library(ggrepel)
# Chart creation (Get columns and pivot wider)
p <- shooter_stats %>%
ggplot(aes(x = paste0(Season," ",Team,"\n ",GP, "GP"),
y = Stat_val,
label=Stat_cat)) +
# Add points & customize colors
geom_point(position = position_jitter(width = 0.1),
aes(fill = Stat_cat),
size = 2,
color = 'black',
shape = 21) +
scale_fill_manual(values = c("3P%" = "#E56020", "TS%" = "#1D1160", "eFG%" = "#63727a",
"FT%" = "#FFC72C", "USG%" = "#007AC1")) +
# Add lines
geom_line(aes(group = Stat_cat), size=0.05) +
# Add labels
geom_text_repel(data = filter(shooter_stats, Season == last(Season)),
aes(label = Stat_cat),
size = 3,
box.padding = 0.2,
point.padding = 0.2,
force = 50,
segment.size = 0.2,
colour = "black",
segment.color = "grey50",
fontface = "bold") # bold labels
## Add team changes lines
# Team line breaks
team_changes <- NULL
unique_combinations <- unique(paste0(shooter_stats$Season, shooter_stats$Team))
if (length(unique_combinations) > 0) {
i <- 1
while (i < length(unique_combinations)) {
team_change <- NULL
team_change$year <- i + 0.5
team_change$team1 <- substr(unique_combinations, 8, 11)[i]
team_change$team2 <- substr(unique_combinations, 8, 11)[i + 1]
team_change <- as.data.frame(team_change)
ifelse(team_change$team1 == team_change$team2, "", team_changes <- rbind(team_change, team_changes))
i <- i + 1
}
}
p <- p + geom_vline(xintercept=team_changes$year, colour="grey6") +
# Edit axis
scale_y_continuous(breaks = seq(0, ceiling(max(shooter_stats$Stat_val,na.rm = TRUE)), 5)) +
# Add title, subtitle, and caption
labs(title = paste0(player," Career Shooting Splits"),
subtitle = paste0(min(shooter_stats$Season)," - ",max(shooter_stats$Season)),
x = "",
y = "",
caption = c("")) +
# Remove legend and modify title, subtitle, captions, and x-axis
theme(
plot.background = element_rect(fill = 'transparent', color = 'transparent'),
panel.background = element_rect(fill = "transparent", color = 'transparent'),
legend.position = 'none',
plot.title = element_text(size = 17, face = "bold", hjust = .5, family = "Bahnschrift"),
plot.subtitle = element_text(size = 13, hjust = .5, family = "Bahnschrift", face = "italic", margin = margin(b = 10)), # Adjust the margin here),
axis.text.x = element_text(angle = 60, hjust = 1, size = 7, color = "black", face = "bold"),
plot.caption = element_text(color = 'gray40'),
plot.margin = margin(10, 10, 10, 10),
axis.text = element_text(family = "Helvetica Neue"),
# Modify grid lines
panel.grid.major = element_line(color = "lightgrey", size = 0.2),
panel.grid.minor = element_line(color = "lightgrey", size = 0.1)
)
print(p)
Plots Explained:
The vertical black lines denote Durant changing teams throughout his career.
The labels on the plot are duplicated because of Durant playing on multiple teams in 2022-23
Career Statistical Timeline:
“PTS”: Points per Game
“TRB”: Total Rebounds per Game
“AST”: Assists per Game
“STL”: Steals per Game
“BLK”: Blocks per Game
Career Shooting Split Chart:
This chart utilizes data from basketball-reference’s adjusted shooting data that houses individual advanced shot metrics
“TS% (True Shooting %) takes into account 2-point shots, 3-point shots, and free throws
“USG% (Usage %) is an estimate of the percentage of the team’s plays used by the player while they are on the court
Recap:
Kevin Durant is one of the most consistent offensive performers to ever play the game and these visuals aid in asserting that point. Since his rookie season, Durant has improved his effective field goal percentage and true shooting percentage by over 15%.
Durant’s usage rate while on the court during his career has never been under 27%. That means over a quarter of the offensive possessions his team has had throughout his career have more or less been ran through him…for reference, the average usage rate in the NBA is almost always 20% since there are five players on the floor who will inevitably touch the ball in a given possession.
Has never shot below 85% from the free throw line in his career with a minimum of 8 attempts per game in each season.