Load in necessary packages

library(ggplot2)
library(trelliscopejs)
library(tidyverse)
library(readxl)
library(showtext)
library(viridis)
library(ggExtra)

Problem 1

For a data set of your choosing, make a faceted plot using the trelliscopejs package. You may make any type of plot; scatter plot, histogram, etc. but, as mentioned in the discussion below, you must explain why you chose this plot and what you are investigating about the variable you are graphing.

The trelliscope plot must include one cognostic measure of your own. Include a description of what it is and what information this measure gives.

~Load in the data set

nascar <- read_xlsx("nascar2.xlsx", sheet = 1)

#Data Cleaning to help summarize
nascar$num_racesMH <- NA;nascar$num_racesRC <- NA
nascar$num_racesSS <- NA;nascar$num_racesST <- NA
nascar$track_type <- ifelse(nascar$track_type == "Short Track", "ST",
                            ifelse(nascar$track_type == "Superspeedway", "SS",
                                   ifelse(nascar$track_type == "Mile-and-a-Half", "MH", "RC")))
nascar$num_racesST <- ifelse(nascar$track_type == "ST", 1, 0)
nascar$num_racesSS <- ifelse(nascar$track_type == "SS", 1, 0)
nascar$num_racesMH <- ifelse(nascar$track_type == "MH", 1, 0)
nascar$num_racesRC <- ifelse(nascar$track_type == "RC", 1, 0)

~Perform some Tidyverse to make a season summary dataframe

#Make a new season-based DF based on track type and driver
szn2024 <- nascar %>%
  filter(year == 2024) %>%
  summarize(.by = c(driver, team, track_type),
    num_races = unique(ifelse(track_type == "SS", sum(num_racesSS),
                         ifelse(track_type == "ST", sum(num_racesST),
                                ifelse(track_type == "RC", sum(num_racesRC),
                                      sum(num_racesMH))))),
    laps_ran = sum(laps_ran, na.rm = T),
    dnf = sum(dnf, na.rm = T),
    poles = sum(poles, na.rm = T),
    wins = sum(wins, na.rm = T),
    top5 = sum(top5, na.rm = T),
    top10 = sum(top10, na.rm = T),
    top15 = sum(top15, na.rm = T), 
    sub30 = sum(sub30, na.rm = T),
    laps_led = sum(laps_led, na.rm = T),
    pts = sum(pts, na.rm = T),
    ppts = sum(ppts, na.rm = T),
    stage_wins = sum(stage_wins, na.rm = T),
    st_pos = mean(st_pos, na.rm = T),
    fin_pos = mean(fin_pos, na.rm = T),
    avg_pos = mean(avg_pos, na.rm = T),
    passdiff = sum(passdiff, na.rm = T),
    `qualitypass%` = mean(`qualitypass%`, na.rm = T),
    `flaps#` = sum(`flaps#`, na.rm = T),
    `t15laps#` = sum(`t15laps#`, na.rm = T),
    `t15laps%` = mean(`t15laps%`, na.rm = T),
    driver_rtg = mean(driver_rtg, na.rm = T),
    p_time_z = mean(p_time_z, na.rm = T),
    q_time_z = mean(q_time_z, na.rm = T),
    q1_speed = mean(q1_speed, na.rm = T),
    q2_speed = mean(q2_speed, na.rm = T),
    q3_speed = mean(q3_speed, na.rm = T),
    q4_speed = mean(q4_speed, na.rm = T),
    avg_speed = mean(avg_speed, na.rm = T),
    wavg_speed = mean(wavg_speed, na.rm = T),
    gfs_z = mean(gfs_z, na.rm = T),
    finVspeed = mean(finVspeed, na.rm = T),
    race_grade = mean(race_grade, na.rm = T)) %>%
  filter(.by = driver, sum(num_races) >= 27)

~Add a cognostic variable

szn2024 <- szn2024 %>%
  group_by(driver) %>%
  mutate(mean_stpos = round(mean(st_pos),2))

~Set up the faceted trelliscope

szn2024 %>%
  #filter(driver == "Kyle Larson") %>% # Test for the GGplot graph before Trelliscope
  ggplot(aes(x = as.factor(track_type), y = `t15laps%`, fill = track_type)) +
    geom_bar(stat = "identity", position = "dodge") +
  ylim(c(0,100)) +
  labs(x = "Track Type", y = "Percentage of Laps in T15")+
  scale_fill_manual(values=c("MH"="#f89540", "RC" = "#cc4778",
                             "SS" = "#7e03a8", "ST" = "#0d0887"),
                    name = "Track Type", labels = c("Mile-and-a-Half", "Road Course",
                                                      "Superspeedway", "Short Track"))+
  theme(legend.text = element_text(size = 8), 
        legend.title = element_text(size = 10)) +
  facet_trelliscope(~mean_stpos, name = "Percentage of Top 15 Laps",
                    desc = "By Driver", nrow = 1, ncol = 2, path = ".",
                    self_contained = TRUE)

Description 2-3 paragraphs.

Describe the data set. Explain the variable you are graphing in your plots and the reason you are investigating with it. Discuss the reason/motivation you chose the variable to facet on, and what insight or trend you are attempting to investigate. Discuss any challenges you had in making the graphs and how you dealt with these challenges. Name at least one cognostic measure (this can include the cognostic you created or be different) the reader could investigate, and explain any insight they might gain from it.

After creating the trelliscope, I took an initial look through the 34 barcharts that were created. It did seem like there were certain drivers who were running well everywhere. There was a way to bin the results in a sense, with a cluster of drivers who ran above 50% of t15 laps at all track types, a middle group of drivers who ran between 25-75% of laps in the top 15, and a group of drivers who had 40% or lower.I wanted to look for balanced drivers, ones who ran rather evenly in their percentages among the track types. Chase Elliott and Chris Buescher were both pretty evenly spread across the 4 track types. There were also 3 drivers that exceled or underperformed in their percentages. Denny Hamlin ran around 25% of his Road course laps in the top 15, way under his other track types. Martin Truex Jr ran over 75% of his mile-and-a-half laps in the top 15, way over his other percentages. Similarly, Josh Berry ran over 50% of his short track laps in the top 15, despite the rest of his percentages being way lower.

URL to the Published RPubs page