This code generates another player-specific data visual. This time for Boston Celtics star Jayson Tatum and his career performance to date.
# Load various R libraries required for data manipulation, visualization, and image handling.
library(plyr)
library(dplyr)
library(tidyverse)
library(rvest)
library(ggrepel)
library(readr)
library(RCurl)
library(cowplot)
library(prismatic)
library(extrafont)
library(jpeg)
library(png)
library(grid)
library(magrittr)
library(janitor)
# Get player stats from source (basketball-reference)
# Add player name and player slug
player <- "Jayson Tatum"
slug <- "tatumja01"
# define player page URL and player image URL
url <- paste0("https://www.basketball-reference.com/players/t/tatumja01", substr(slug, 1, 1), "/", slug, ".html")
image_url <- paste0("https://cdn.nba.com/headshots/nba/latest/1040x760/1628369.png", slug, ".png")
# Clean and prepare the retrieved data for visualization, including calculating per-game averages.
# Define the URL
url <- "https://www.basketball-reference.com/players/t/tatumja01.html"
# Read the HTML content
page <- read_html(url)
# Read total stats
total_stat <- url %>%
read_html() %>%
html_node("#totals") %>%
html_table(fill = TRUE)
# Read advanced stats
adv_stat <- url %>%
read_html() %>%
html_node("#advanced") %>%
html_table()
# Merge stats tables using explicit column names
total_stats <- merge(total_stat, adv_stat, by.x = c("Season", "Age", "Tm", "Lg", "Pos", "G", "MP"),
by.y = c("Season", "Age", "Tm", "Lg", "Pos", "G", "MP"))
# Get RAPTOR ratings
RAPTOR_hist <- read.csv("https://github.com/fivethirtyeight/data/raw/master/nba-raptor/historical_RAPTOR_by_player.csv?raw=true")
RAPTOR_mod <- read.csv("https://github.com/fivethirtyeight/data/raw/master/nba-raptor/modern_RAPTOR_by_player.csv")
RAPTOR <- rbind.fill(RAPTOR_hist, RAPTOR_mod)
RAPTOR <- unique(RAPTOR)
# Merge all stats data and remove duplicate rows
total_stats$player_name <- "Jayson Tatum"
total_stats$season <- paste0(substr(total_stats$Season, 1, 2), substr(total_stats$Season, 6, 7))
total_stats$season <- str_replace(total_stats$season, "1900", "2000")
all_stats <- merge(total_stats, RAPTOR, by = c("player_name", "season"))
# Clean the data by removing leading/trailing white spaces in character columns
all_stats[, sapply(all_stats, is.character)] <- lapply(all_stats[, sapply(all_stats, is.character)], trimws)
# View the resulting data frame
View(all_stats)
# Select stats
str(all_stats)
# use below section for basic stats when you want to use the average...the stats used can be any
main_stats <- c("PTS","TRB","AST","STL","BLK","GP"="G")
stats <- main_stats
df <- all_stats %>% select(Season, "Team"=Tm, unlist(stats))
df_avg <- df[,3:(ncol(df)-1)]/df$GP
df <- cbind(df[,1:2],df_avg)
# create the final data frame
df_stats <- df %>% gather(Stat_cat, Stat_val, 3:ncol(df))
# clean the data
df_stats <- dplyr::filter(df_stats, grepl('-', Season))
df_stats <- dplyr::filter(df_stats, !grepl('TOT', Team))
df_stats <- unique(df_stats)
df_stats <- merge(df_stats,data.frame("Season"=total_stats$Season,"GP"=total_stats$G), by="Season")
# For some reason, the 2022-23 season was omitted so added manually below, (player's name) last season
# Create data frames for each row to be added
df_2022_23_1 <- data.frame(
Season = "2022-23",
Team = "BOS",
Stat_cat = c("PTS", "TRB", "AST", "STL", "BLK"),
Stat_val = c(30.1, 8.8, 4.6, 1.1, 0.7),
GP = 74
)
# Add df_2022_23_1 to df_stats using rbind
df_stats <- rbind(df_stats, df_2022_23_1)
# View the resulting data frame
View(df_stats)
# Append the new rows to df_stats
#df_stats <- rbind(df_stats, df_2022_23_1, df_2022_23_2, etc)
# Creating the visual
library(dplyr)
library(ggrepel)
library(showtext)
## Loading required package: sysfonts
## Loading required package: showtextdb
##
## Attaching package: 'showtextdb'
## The following object is masked from 'package:extrafont':
##
## font_install
# Chart creation (Get columns and pivot wider)
p <- df_stats %>%
ggplot(aes(x = paste0(Season," ",Team,"\n ",GP, "GP"),
y = Stat_val,
label=Stat_cat)) +
# Add points & customize colors, apply the position_jitter function to avoid overlap with plot points
geom_point(aes(fill = Stat_cat),
size = 3.2,
color = 'black',
shape = 21) +
scale_fill_manual(values = c("PTS" = "#007A33", "TRB" = "#BA9653", "AST" = "#963821",
"STL" = "#fdb927", "BLK" = "#bec0c2"))
# Add lines
p <- p + geom_line(aes(group = Stat_cat), size=0.05)
# Add labels & bold them
p <- p + geom_text_repel(data = filter(df_stats, Season == last(Season)),
aes(label = Stat_cat),
size = 3.2,
box.padding = 0.5,
point.padding = 0.5,
force = 50,
segment.size = 0.2,
colour = "black",
segment.color = "grey50",
family = "Paramount Vista Sans Condensed",
fontface = "italic",
hjust = 1) + # Adjust this value to change the horizontal position of the labels
# Bold labels
theme(text = element_text(face = "bold"))
# Edit axis
p <- p + scale_y_continuous(breaks = seq(0, ceiling(max(df_stats$Stat_val,na.rm = TRUE)), 5))
# Add title, subtitle and caption
p <- p + labs(title = paste0(player," Career Statistical Timeline"),
subtitle = paste0(min(df_stats$Season)," - ",max(df_stats$Season)),
x = "",
y = "",
caption = c(""))
# Add theme that removes the legend, modifies title, subtitle, captions, and x-axis
p <- p + theme(legend.position = 'none',
plot.title = element_text(size = 17, face="bold", hjust = .5),
plot.subtitle = element_text(face = 'italic', size = 13, hjust = .5),
axis.text.x=element_text(angle=60, hjust=1, size=10),
plot.caption = element_text(color = 'gray40'),
plot.margin = margin(10, 10, 15, 10))
## Add team changes lines
# Team line breaks
team_changes <- NULL
unique_combinations <- unique(paste0(df_stats$Season, df_stats$Team))
if (length(unique_combinations) > 0) {
i <- 1
while (i < length(unique_combinations)) {
team_change <- NULL
team_change$year <- i + 0.5
team_change$team1 <- substr(unique_combinations, 8, 11)[i]
team_change$team2 <- substr(unique_combinations, 8, 11)[i + 1]
team_change <- as.data.frame(team_change)
ifelse(team_change$team1 == team_change$team2, "", team_changes <- rbind(team_change, team_changes))
i <- i + 1
}
}
p <- p + geom_vline(xintercept=team_changes$year, colour="grey6")
# Create and add custom theme
theme_sweep <- function () {
theme(
# get rid of panel grids
panel.grid.major = element_line(size = (0.075), colour="grey"),
panel.grid.minor = element_blank(),
#panel.border = element_blank(),
# Change plot and panel background
plot.background = element_rect(fill = 'transparent', color = 'transparent'),
panel.background = element_rect(fill = "transparent", color = 'transparent'),
plot.title = element_text(size=18, hjust = 0.5),
plot.subtitle = element_text(size=13, hjust = 0.5),
plot.caption = element_text(size=8, hjust=c(1, 0))
)
}
# Add custom theme to the graph
p <- p + theme_sweep()
# Add theme that removes the legend, modifies title, subtitle, captions, and x-axis
p <- p + theme(legend.position = 'none',
plot.title = element_text(size = 17, face="bold", hjust = .5, family = "Bahnschrift"),
plot.subtitle = element_text(face = 'italic', size = 13, hjust = .5, family = "Bahnschrift"),
axis.text.x=element_text(angle=60, hjust=1, size=8, color = "black", face="bold"),
plot.caption = element_text(color = 'gray40'),
plot.margin = margin(10, 10, 10, 10))
##
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
##
## combine
## Coordinate system already present. Adding new coordinate system, which will
## replace the existing one.
# Now pulling data to inform on shooting numbers
str(all_stats)
# make a list of stats we care about
shooting_stats <- c("3P%", "FT%", "eFG%", "TS%", "USG%")
stats <- shooting_stats
# create the data frame for the chart, by using the Season, Team, and the vector of all stats
df <- total_stats %>% select(Season, "Team"=Tm, unlist(stats))
# multiply values by 100 to rescale
df$'3P%' <- df$'3P%'*100
df$'FT%' <- df$'FT%'*100
df$'eFG%' <- df$'eFG%'*100
df$'TS%' <- df$'TS%'*100
# create the final data frame
shooter_stats <- df %>% gather(Stat_cat, Stat_val, 3:ncol(df))
# clean the data
shooter_stats <- dplyr::filter(shooter_stats, grepl('-', Season))
shooter_stats <- dplyr::filter(shooter_stats, !grepl('TOT', Team))
shooter_stats <- unique(shooter_stats)
shooter_stats <- merge(shooter_stats,data.frame("Season"=total_stats$Season,"GP"=total_stats$G), by="Season")
# Remove duplicates and keep only the unique rows
shooter_stats <- shooter_stats %>%
distinct(Stat_val, .keep_all = TRUE)
# For rows with "PHO" in the "Team" column, set the "GP" to 8, since the data frame can get out of whack sometimes
shooter_stats <- shooter_stats %>%
mutate(GP = ifelse(Team == "PHO", 8, GP))
View(shooter_stats)
# Now publishing data related to the player's career shooting performance
library(dplyr)
library(ggrepel)
# Chart creation (Get columns and pivot wider)
p <- shooter_stats %>%
ggplot(aes(x = paste0(Season," ",Team,"\n ",GP, "GP"),
y = Stat_val,
label=Stat_cat)) +
# Add points & customize colors
geom_point(aes(fill = Stat_cat),
size = 3,
color = 'black',
shape = 21) +
scale_fill_manual(values = c("3P%" = "#007A33", "TS%" = "#BA9653", "eFG%" = "#963821",
"FT%" = "green3", "USG%" = "black")) +
# Add lines
geom_line(aes(group = Stat_cat), size=0.05) +
# Add labels
geom_text_repel(data = filter(shooter_stats, Season == last(Season)),
aes(label = Stat_cat),
size = 3.3,
box.padding = 0.2,
point.padding = 0.2,
force = 50,
segment.size = 0.2,
colour = "black",
family = "Paramount Vista Sans Condensed",
segment.color = "grey50",
fontface = "italic") # bold or italicize labels
## Add team changes lines
# Team line breaks
team_changes <- NULL
unique_combinations <- unique(paste0(shooter_stats$Season, shooter_stats$Team))
if (length(unique_combinations) > 0) {
i <- 1
while (i < length(unique_combinations)) {
team_change <- NULL
team_change$year <- i + 0.5
team_change$team1 <- substr(unique_combinations, 8, 11)[i]
team_change$team2 <- substr(unique_combinations, 8, 11)[i + 1]
team_change <- as.data.frame(team_change)
ifelse(team_change$team1 == team_change$team2, "", team_changes <- rbind(team_change, team_changes))
i <- i + 1
}
}
p <- p + geom_vline(xintercept=team_changes$year, colour="grey6") +
# Edit axis
scale_y_continuous(breaks = seq(0, ceiling(max(shooter_stats$Stat_val,na.rm = TRUE)), 5)) +
# Add title, subtitle, and caption
labs(title = paste0(player," Career Shooting Splits"),
subtitle = paste0(min(shooter_stats$Season)," - ",max(shooter_stats$Season)),
x = "",
y = "",
caption = c("")) +
# Remove legend and modify title, subtitle, captions, and x-axis
theme(
plot.background = element_rect(fill = 'transparent', color = 'transparent'),
panel.background = element_rect(fill = "transparent", color = 'transparent'),
legend.position = 'none',
plot.title = element_text(size = 17, face = "bold", hjust = .5, family = "Bahnschrift"),
plot.subtitle = element_text(size = 13, hjust = .5, family = "Bahnschrift", face = "italic", margin = margin(b = 10)), # Adjust the margin here),
axis.text.x = element_text(angle = 60, hjust = 1, size = 7, color = "black", face = "bold"),
plot.caption = element_text(color = 'gray40'),
plot.margin = margin(10, 10, 10, 10),
axis.text = element_text(family = "Verdana"),
# Modify grid lines
panel.grid.major = element_line(color = "lightgrey", size = 0.2),
panel.grid.minor = element_line(color = "lightgrey", size = 0.1)
)
print(p)
Plots Explained:
Career Statistical Timeline (in per game averages):
“PTS”: Points per Game
“TRB”: Total Rebounds per Game
“AST”: Assists per Game
“STL”: Steals per Game
“BLK”: Blocks per Game
Career Shooting Split Chart:
This chart utilizes data from basketball-reference’s adjusted shooting data that houses individual advanced shot metrics
“TS%” (True Shooting %) takes into account 2-point shots, 3-point shots, and free throws
“USG%” (Usage %) is an estimate of the percentage of the team’s plays used by the player while they are on the court
“eFG” (Effective Field Goal %) adjusts for the fact that a three point shot is worth more than a 2 point shot
“3P%” (Three point %)
“FT%” (Free throw %)
Recap: