library(ggplot2)
library(tidyverse)
library(readxl)
library(showtext)
library(viridis)
library(ggExtra)
library(plotly)
library(openxlsx)
#Add a new font for the plots
font_add_google("Pridi")
#the main nascar data
nascar <- read.xlsx("nascar.xlsx", sheet = 1)
#All nascar data pre Elliott injury
pre_injury <- read.xlsx("nascar.xlsx", sheet = 6)
#all nascar data post Elliott injury
post_injury <- read.xlsx("nascar.xlsx", sheet = 7)
#Look at Hendrick Motorsports - pre and post chase injury
hendrick_preinjury <- pre_injury %>%
filter(driver_name == "Alex Bowman" | driver_name == "Chase Elliott" |
driver_name == "William Byron" | driver_name == "Kyle Larson") %>%
mutate(status = "Pre-Injury")
hendrick_postinjury <- post_injury %>%
filter(driver_name == "Alex Bowman" | driver_name == "Chase Elliott" |
driver_name == "William Byron" | driver_name == "Kyle Larson") %>%
mutate(status = "Post-Injury")
#Create two barcharts so we can tell a more complete story
Predata1 <- hendrick_preinjury %>%
select(driver_name, num_races, avg_fin, avg_q1rank, avg_q2rank, avg_q3rank, avg_q4rank, wavg_speed, ARP, wARP, status)
Predata2 <- hendrick_preinjury %>%
select(driver_name, num_races, wins, stage_wins, dnf, poles, top5, top10, status)
Postdata1 <- hendrick_postinjury %>%
select(driver_name, num_races, avg_fin, avg_q1rank, avg_q2rank, avg_q3rank, avg_q4rank, wavg_speed, ARP, wARP, status)
Postdata2 <- hendrick_postinjury %>%
select(driver_name, num_races, wins, stage_wins, dnf, poles, top5, top10, status)
#Combine the previous sets for the two unique charts
speedstats <- rbind(Predata1, Postdata1)
faststats <- rbind(Predata2, Postdata2)
#Elliott plot1
CE1 <- as.matrix(speedstats[2,3:9]); CE2 <- as.matrix(speedstats[6,3:9])
CE <- rbind(CE1, CE2); rownames(CE) <- c("Pre-Injury", "Post-Injury")
#Elliott plot2
CE3 <- as.matrix(faststats[2,3:7]); CE4 <- as.matrix(faststats[6,3:7])
ce <- rbind(CE3, CE4); rownames(ce) <- c("Pre-Injury", "Post-Injury")
#Remove excess clutter
rm(CE1,CE2,CE3,CE4)
#Create a side-by-side barplot comparing Chase Elliott Pre-Post injury
par(mar = c(5,4,5,4)+.01)
#Create the barplot
barplot(CE, col = c("navyblue", "gold"),
cex.names = 0.7, las = 1, ylab = NA,
names = c("Avg Fin", "Q1 Rank", "Q2 Rank", "Q3 Rank", "Q4 Rank", "wAvgSpeed", "ARP"),
beside = T)
#Set the y-axis
axis(2, at = seq(0, 16, by = 2), labels = F)
#Set the alt tick marks
axis(2, at = seq(1, 15, by = 2), labels = F, tck = -0.02)
#Main title text
mtext("Chase Elliott Next-Gen Comparison",
line = 3, side = 3, cex = 1.3, font = 2)
#Subtitle text
mtext("Pre-Injury (38 races) VS Post-Injury (73 races)",
line = 2, side = 3, cex = 0.9)
#Y-axis text
mtext("Position", side = 2, line = 3, cex = 1)
#x-axis text
mtext("Stat", side = 1, line = 3, cex = 1)
#legend set-up
legend("top", inset = c(-0.15, -0.15), legend=c("Pre-Injury", "Post-Injury"),
pch= 15, col=c("navyblue", "gold"), cex=0.8,
box.lty=0, bty = 'n', xpd = T, horiz = T)
#Plot 2 - Set the barplot
barplot(ce, col = c("navyblue", "gold"), beside = T, cex.names = 0.8,
names = c("Wins", "Stage Wins", "DNF's", "Poles", "Top 5's"))
#Set y-axis
axis(2, at = seq(0, 20, by = 5), labels = F)
#y-axis alt tick marks
axis(2, at = seq(0, 20, by = 1), labels = F, tck = -0.02)
#Main title text
mtext("Chase Elliott Next-Gen Comparison",
line = 3, side = 3, cex = 1.3, font = 2)
#Subtitle text
mtext("Pre-Injury (38 races) VS Post-Injury (73 races)",
line = 2, side = 3, cex = 0.9)
#Y-axis text
mtext("Count", side = 2, line = 3, cex = 1)
#x-axis text
mtext("Stat", side = 1, line = 3, cex = 1)
#Set up the legend
legend("top", inset = c(-0.15, -0.15), legend=c("Pre-Injury", "Post-Injury"),
pch= 15, col=c("navyblue", "gold"), cex=0.8,
box.lty=0, bty = 'n', xpd = T, horiz = T)
#Filter out the 4 hendrick drivers
hendrick <- nascar %>%
filter(driver_name == "Alex Bowman" | driver_name == "Chase Elliott" |
driver_name == "William Byron" | driver_name == "Kyle Larson")
#allows for the new font to be used
showtext_auto()
#GGplot code
ggplot(hendrick, aes(x=as.factor(driver_num), wARP,
fill = as.factor(driver_num)))+
geom_violin() + #violin object
scale_y_reverse() + #scale y-axis in reverse
geom_boxplot(width = 0.3, color = "black", alpha = 0.2) + #boxplots over the violin plots
labs(x = "Driver Number", y = "wARP") +
ggtitle("Weighted Average Running Position for Hendrick Motorsports") +
scale_fill_manual(values = c("5"="dodgerblue1","9"="royalblue4",
"24"="yellowgreen","48"="purple4")) +
theme(plot.title = element_text(family="Pridi", face = "bold", size = 40),
axis.title.x = element_text(family="Pridi", size = 25),
axis.title.y = element_text(family="Pridi", size = 25),
axis.text = element_text(size = 25),
strip.text = element_text(size = 25),
legend.position = "none") +
facet_wrap(~year) #Wrap it by year to show distributions for each year
#Read in the 2025 summary data
nas25 <- read.xlsx("nascar.xlsx", sheet = 5); nas25$year <- 2025
#Create a dummy variable for point size
nas25$hendrick <- NULL
nas25$hendrick <- ifelse((nas25$driver_name == "Alex Bowman" | nas25$driver_name == "Chase Elliott"|
nas25$driver_name == "Kyle Larson" | nas25$driver_name == "William Byron"), 9, 7)
#Create a variable for driver color for the points - Color for the hendrick points, grey for the rest
nas25$color <- NULL
nas25$color <- ifelse(nas25$driver_name == "Chase Elliott", "Chase Elliott",
ifelse(nas25$driver_name == "Alex Bowman", "Alex Bowman",
ifelse(nas25$driver_name == "William Byron", "William Byron",
ifelse(nas25$driver_name == "Kyle Larson", "Kyle Larson", "Everyone Else"))))
#Create the plotly object
nas25 %>%
group_by(driver_name) %>%
plot_ly(x = ~avg_st, y = ~avg_fin, color = ~color,
colors = c("purple", "navyblue", "black", "dodgerblue", "yellowgreen"),
hoverinfo = "text",
text = ~paste("Driver:", driver_name,
"<br>Start Position: ", round(avg_st,2),
"<br>Finish Position: ", round(avg_fin,2),
"<br>Driver Rating: ", round(avg_driverRTG,2))) %>%
add_markers(showlegend = T, size = ~hendrick) %>%
add_annotations(x = 12.5, y = 32.5, font = list(color = 'darkred',size = 14),
text = "Finished Below<br> Avg Start Pos",showarrow = F) %>%
add_annotations(x = 32.5, y = 12.5, font = list(color = 'darkgreen',size = 14),
text = "Finished Above<br> Avg Start Pos",showarrow = F) %>%
layout(yaxis = list(title = "Finish Position"),
xaxis = list(title = "Start Position",
range = list(0,40), dtick = 5, tick0 = 0, tickmode = "linear"),
title = "2025 Season Average Start Position VS Finish Position",
shapes = list(type = "line", x0 = 0, y0 = 0, x1 = 40, y1 = 40,
line = list(color = "black", width = 1)))
#This is supposed to help make the lines to work
#Code is from https://plotly.com/r/cumulative-animations/
accumulate_by <- function(dat, var) {
var <- lazyeval::f_eval(var, dat)
lvls <- plotly:::getLevels(var)
dats <- lapply(seq_along(lvls), function(x) {
cbind(dat[var %in% lvls[seq(1, x)], ], frame = lvls[[x]])
})
dplyr::bind_rows(dats)
}
#Create a cumulative t15 lap percentage variable in for the 2024 season
nas2024 <- nascar %>%
filter(year == 2024 &
(driver_name == "Alex Bowman" | driver_name == "Chase Elliott" |
driver_name == "William Byron" | driver_name == "Kyle Larson")) %>%
group_by(driver_name) %>%
mutate(cumulativet15 = cumsum(t15_laps) / cumsum(laps_ran)*100) %>%
accumulate_by(~race_num) #allows for the frames to be specified on race_num
#Used for the color object on the animated plotly graph
nas2024$split <- ifelse(nas2024$driver_name == "Chase Elliott", "Chase Elliott",
ifelse(nas2024$driver_name == "Alex Bowman", "Alex Bowman",
ifelse(nas2024$driver_name == "William Byron", "William Byron",
ifelse(nas2024$driver_name == "Kyle Larson", "Kyle Larson", "Everyone Else"))))
#T15% - Animated Line plot
nas2024 %>%
plot_ly(x = ~race_num, y = ~cumulativet15, frame = ~frame,
type = 'scatter', mode = 'lines+markers', color = ~split, showlegend = T,
colors = c("purple", "navyblue", "dodgerblue", "yellowgreen"),
hoverinfo = "text",
text = ~paste("<b>Driver:</b><i>", driver_name,
"</i><br><b>Current Race:</b><i>", race_num,
"</i><br><b>Current Race T15%:</b><i>", round(t15_lapsPCT,2),
"%</i><br><b>2024 T15%:</b><i>", round(cumulativet15,2), "%")) %>%
add_text(x = 20, y = 20, text = ~race_num, frame = ~race_num, showlegend = F,
textfont = list(size = 50, color = toRGB("grey75"))) %>%
layout(title = "<b>Cumulative Top 15 Laps Ran PCT<br>2024 Season</b>",
yaxis = list(title = "<b>Cumulative T15 Lap PCT</b>",
range = list(0,100), dtick = 10, tick0 = 0, tickmode = "linear"),
xaxis = list(title = "<b>Race Num</b>",
range = list(0,37), dtick = 4, tick0 = 0, tickmode = "linear")) %>%
animation_opts(frame = 200, transition = 10, redraw = F) %>%
animation_slider(hide = T) %>%
animation_button(x = 1, xanchor = "right", y = 0, yanchor = "bottom")