library(dplyr)
library(ggplot2)
library(gt)
library(ggiraph)
library(tidyr)
library(tidyverse)
library(corrplot)
library(bestNormalize)
library(patchwork)
library(gridExtra)
library(zoo)

Predicting F1 Pit Stops

train <- read.csv("train.csv")
test <- read.csv("test.csv")

Missing Values

In the given datasets, we have no missing values.

missing_train <- train |>
  dplyr::mutate(across(where(is.character), ~ dplyr::na_if(.x, ""))) |>
  dplyr::summarise(across(everything(), ~ sum(is.na(.x)))) |>
  tidyr::pivot_longer(cols = everything(), names_to = "Variables", values_to = "Total_train")

missing_test <- test |>
  dplyr::mutate(across(where(is.character), ~ dplyr::na_if(.x, ""))) |>
  dplyr::summarise(across(everything(), ~ sum(is.na(.x)))) |>
  tidyr::pivot_longer(cols = everything(), names_to = "Variables", values_to = "Total_test")

missing <- dplyr::left_join(missing_train, missing_test, by="Variables")

missing |> gt()
Variables Total_train Total_test
id 0 0
Driver 0 0
Compound 0 0
Race 0 0
Year 0 0
PitStop 0 0
LapNumber 0 0
Stint 0 0
TyreLife 0 0
Position 0 0
LapTime..s. 0 0
LapTime_Delta 0 0
Cumulative_Degradation 0 0
RaceProgress 0 0
Position_Change 0 0
PitNextLap 0 NA

Data Exploration

Class Imbalance Check

target_imbalance <- as.data.frame(table(train$PitNextLap)) 
colnames(target_imbalance) <- c("Target", "Frquency")
target_imbalance |>  
  gt()
Target Frquency
0 351759
1 87381

Data Correlations

train_numeric <- train |>
  mutate(
    Driver = as.numeric(as.factor(Driver)), 
    Compound = as.numeric(as.factor(Compound)),
    Race = as.numeric(as.factor(Race))
  )

pit_corr <- stats::cor(train_numeric, use = "complete.obs")

corrplot(pit_corr, method = 'color', order = 'alphabet', , addCoef.col = 'black', col = COL2('RdYlBu'))

p1 <- ggplot(train, aes(x = Cumulative_Degradation, fill = factor(PitNextLap))) +
  geom_density(alpha = 0.5) +
  scale_fill_manual(values = c("#E69F00", "#56B4E9", "#009E73", "#F0E442", 
                                "#0072B2", "#D55E00", "#CC79A7")) +
  scale_color_manual(values = c("#E69F00", "#56B4E9", "#009E73", "#F0E442", 
                                "#0072B2", "#D55E00", "#CC79A7")) +
  labs(
    title = "Cumulative Degradation",
    y = "Density",
    x = "Cumulative Degradation"
  ) +
  theme_minimal(base_size = 12, base_family = "sans") +
  theme(
    plot.title = element_text(face = "bold", size = 14, margin = margin(b = 4)),
    plot.subtitle = element_text(color = "grey40", size = 11, margin = margin(b = 12)),
    plot.caption = element_text(color = "grey60", size = 9, hjust = 0),
    plot.margin = margin(16, 16, 16, 16),
    panel.grid.major.x = element_blank(),
    panel.grid.minor = element_blank(),
    axis.title = element_text(color = "grey30", size = 10),
    axis.text = element_text(color = "grey30"),
    axis.ticks.x = element_line(color = "grey80"),
    legend.position = "bottom",
    legend.title = element_blank()
  )
p2 <- ggplot(train, aes(x = LapNumber, fill = factor(PitNextLap))) +
  geom_density(alpha = 0.5) +
  scale_fill_manual(values = c("#E69F00", "#56B4E9", "#009E73", "#F0E442", 
                                "#0072B2", "#D55E00", "#CC79A7")) +
  scale_color_manual(values = c("#E69F00", "#56B4E9", "#009E73", "#F0E442", 
                                "#0072B2", "#D55E00", "#CC79A7")) +
  labs(
    title = "Lap Number",
    y = "Density",
    x = "Lap Number"
  ) +
  theme_minimal(base_size = 12, base_family = "sans") +
  theme(
    plot.title = element_text(face = "bold", size = 14, margin = margin(b = 4)),
    plot.subtitle = element_text(color = "grey40", size = 11, margin = margin(b = 12)),
    plot.caption = element_text(color = "grey60", size = 9, hjust = 0),
    plot.margin = margin(16, 16, 16, 16),
    panel.grid.major.x = element_blank(),
    panel.grid.minor = element_blank(),
    axis.title = element_text(color = "grey30", size = 10),
    axis.text = element_text(color = "grey30"),
    axis.ticks.x = element_line(color = "grey80"),
    legend.position = "bottom",
    legend.title = element_blank()
  )
p3 <- ggplot(train, aes(x = Stint, fill = factor(PitNextLap))) +
  geom_density(alpha = 0.5) +
  scale_fill_manual(values = c("#E69F00", "#56B4E9", "#009E73", "#F0E442", 
                                "#0072B2", "#D55E00", "#CC79A7")) +
  scale_color_manual(values = c("#E69F00", "#56B4E9", "#009E73", "#F0E442", 
                                "#0072B2", "#D55E00", "#CC79A7")) +
  labs(
    title = "Stint",
    y = "Density",
    x = "Stint"
  ) +
  theme_minimal(base_size = 12, base_family = "sans") +
  theme(
    plot.title = element_text(face = "bold", size = 14, margin = margin(b = 4)),
    plot.subtitle = element_text(color = "grey40", size = 11, margin = margin(b = 12)),
    plot.caption = element_text(color = "grey60", size = 9, hjust = 0),
    plot.margin = margin(16, 16, 16, 16),
    panel.grid.major.x = element_blank(),
    panel.grid.minor = element_blank(),
    axis.title = element_text(color = "grey30", size = 10),
    axis.text = element_text(color = "grey30"),
    axis.ticks.x = element_line(color = "grey80"),
    legend.position = "bottom",
    legend.title = element_blank()
  )
p4 <- ggplot(train, aes(x = TyreLife, fill = factor(PitNextLap))) +
  geom_density(alpha = 0.5) +
  scale_fill_manual(values = c("#E69F00", "#56B4E9", "#009E73", "#F0E442", 
                                "#0072B2", "#D55E00", "#CC79A7")) +
  scale_color_manual(values = c("#E69F00", "#56B4E9", "#009E73", "#F0E442", 
                                "#0072B2", "#D55E00", "#CC79A7")) +
  labs(
    title = "Tyre Life",
    y = "Density",
    x = "Tyre Life"
  ) +
  theme_minimal(base_size = 12, base_family = "sans") +
  theme(
    plot.title = element_text(face = "bold", size = 14, margin = margin(b = 4)),
    plot.subtitle = element_text(color = "grey40", size = 11, margin = margin(b = 12)),
    plot.caption = element_text(color = "grey60", size = 9, hjust = 0),
    plot.margin = margin(16, 16, 16, 16),
    panel.grid.major.x = element_blank(),
    panel.grid.minor = element_blank(),
    axis.title = element_text(color = "grey30", size = 10),
    axis.text = element_text(color = "grey30"),
    axis.ticks.x = element_line(color = "grey80"),
    legend.position = "bottom",
    legend.title = element_blank()
  )
p5 <- ggplot(train, aes(x = LapTime..s., fill = factor(PitNextLap))) +
  geom_density(alpha = 0.5) +
  scale_fill_manual(values = c("#E69F00", "#56B4E9", "#009E73", "#F0E442", 
                                "#0072B2", "#D55E00", "#CC79A7")) +
  scale_color_manual(values = c("#E69F00", "#56B4E9", "#009E73", "#F0E442", 
                                "#0072B2", "#D55E00", "#CC79A7")) +
  labs(
    title = "Lap Time",
    y = "Density",
    x = "Lap Time"
  ) +
  theme_minimal(base_size = 12, base_family = "sans") +
  theme(
    plot.title = element_text(face = "bold", size = 14, margin = margin(b = 4)),
    plot.subtitle = element_text(color = "grey40", size = 11, margin = margin(b = 12)),
    plot.caption = element_text(color = "grey60", size = 9, hjust = 0),
    plot.margin = margin(16, 16, 16, 16),
    panel.grid.major.x = element_blank(),
    panel.grid.minor = element_blank(),
    axis.title = element_text(color = "grey30", size = 10),
    axis.text = element_text(color = "grey30"),
    axis.ticks.x = element_line(color = "grey80"),
    legend.position = "bottom",
    legend.title = element_blank()
  )
p6 <- ggplot(train, aes(x = LapTime_Delta, fill = factor(PitNextLap))) +
  geom_density(alpha = 0.5) +
  scale_fill_manual(values = c("#E69F00", "#56B4E9", "#009E73", "#F0E442", 
                                "#0072B2", "#D55E00", "#CC79A7")) +
  scale_color_manual(values = c("#E69F00", "#56B4E9", "#009E73", "#F0E442", 
                                "#0072B2", "#D55E00", "#CC79A7")) +
  labs(
    title = "Lap Time Delta",
    y = "Density",
    x = "Lap Time Delta"
  ) +
  theme_minimal(base_size = 12, base_family = "sans") +
  theme(
    plot.title = element_text(face = "bold", size = 14, margin = margin(b = 4)),
    plot.subtitle = element_text(color = "grey40", size = 11, margin = margin(b = 12)),
    plot.caption = element_text(color = "grey60", size = 9, hjust = 0),
    plot.margin = margin(16, 16, 16, 16),
    panel.grid.major.x = element_blank(),
    panel.grid.minor = element_blank(),
    axis.title = element_text(color = "grey30", size = 10),
    axis.text = element_text(color = "grey30"),
    axis.ticks.x = element_line(color = "grey80"),
    legend.position = "bottom",
    legend.title = element_blank()
  )
p7 <- ggplot(train, aes(x = Position, fill = factor(PitNextLap))) +
  geom_density(alpha = 0.5) +
  scale_fill_manual(values = c("#E69F00", "#56B4E9", "#009E73", "#F0E442", 
                                "#0072B2", "#D55E00", "#CC79A7")) +
  scale_color_manual(values = c("#E69F00", "#56B4E9", "#009E73", "#F0E442", 
                                "#0072B2", "#D55E00", "#CC79A7")) +
  labs(
    title = "Position",
    y = "Density",
    x = "Position"
  ) +
  theme_minimal(base_size = 12, base_family = "sans") +
  theme(
    plot.title = element_text(face = "bold", size = 14, margin = margin(b = 4)),
    plot.subtitle = element_text(color = "grey40", size = 11, margin = margin(b = 12)),
    plot.caption = element_text(color = "grey60", size = 9, hjust = 0),
    plot.margin = margin(16, 16, 16, 16),
    panel.grid.major.x = element_blank(),
    panel.grid.minor = element_blank(),
    axis.title = element_text(color = "grey30", size = 10),
    axis.text = element_text(color = "grey30"),
    axis.ticks.x = element_line(color = "grey80"),
    legend.position = "bottom",
    legend.title = element_blank()
  )
p8 <- ggplot(train, aes(x = Position_Change, fill = factor(PitNextLap))) +
  geom_density(alpha = 0.5) +
  scale_fill_manual(values = c("#E69F00", "#56B4E9", "#009E73", "#F0E442", 
                                "#0072B2", "#D55E00", "#CC79A7")) +
  scale_color_manual(values = c("#E69F00", "#56B4E9", "#009E73", "#F0E442", 
                                "#0072B2", "#D55E00", "#CC79A7")) +
  labs(
    title = "Position Change",
    y = "Density",
    x = "Position Change"
  ) +
  theme_minimal(base_size = 12, base_family = "sans") +
  theme(
    plot.title = element_text(face = "bold", size = 14, margin = margin(b = 4)),
    plot.subtitle = element_text(color = "grey40", size = 11, margin = margin(b = 12)),
    plot.caption = element_text(color = "grey60", size = 9, hjust = 0),
    plot.margin = margin(16, 16, 16, 16),
    panel.grid.major.x = element_blank(),
    panel.grid.minor = element_blank(),
    axis.title = element_text(color = "grey30", size = 10),
    axis.text = element_text(color = "grey30"),
    axis.ticks.x = element_line(color = "grey80"),
    legend.position = "bottom",
    legend.title = element_blank()
  )
p9 <- ggplot(train, aes(x = RaceProgress, fill = factor(PitNextLap))) +
  geom_density(alpha = 0.5) +
  scale_fill_manual(values = c("#E69F00", "#56B4E9", "#009E73", "#F0E442", 
                                "#0072B2", "#D55E00", "#CC79A7")) +
  scale_color_manual(values = c("#E69F00", "#56B4E9", "#009E73", "#F0E442", 
                                "#0072B2", "#D55E00", "#CC79A7")) +
  labs(
    title = "Race Progress",
    y = "Density",
    x = "Race Progress"
  ) +
  theme_minimal(base_size = 12, base_family = "sans") +
  theme(
    plot.title = element_text(face = "bold", size = 14, margin = margin(b = 4)),
    plot.subtitle = element_text(color = "grey40", size = 11, margin = margin(b = 12)),
    plot.caption = element_text(color = "grey60", size = 9, hjust = 0),
    plot.margin = margin(16, 16, 16, 16),
    panel.grid.major.x = element_blank(),
    panel.grid.minor = element_blank(),
    axis.title = element_text(color = "grey30", size = 10),
    axis.text = element_text(color = "grey30"),
    axis.ticks.x = element_line(color = "grey80"),
    legend.position = "bottom",
    legend.title = element_blank()
  )
grid.arrange(p1, p2, p3, p4, p5, p6, p7, p8, p9, ncol=3, nrow=3)

Tyre Life and Degradation

ggplot(train, aes(x = factor(PitNextLap), y = TyreLife, fill = factor(PitNextLap))) +
  geom_boxplot() +
  scale_fill_manual(values = c("#E69F00", "#56B4E9", "#009E73", "#F0E442", 
                                "#0072B2", "#D55E00", "#CC79A7")) +
  scale_color_manual(values = c("#E69F00", "#56B4E9", "#009E73", "#F0E442", 
                                "#0072B2", "#D55E00", "#CC79A7")) +
  labs(
    title = "Tyre Life at Time of Pit Decision",
    y = "Tyre Life",
    x = "Pit Next Lap"
  ) +
  theme_minimal(base_size = 12, base_family = "sans") +
  theme(
    plot.title = element_text(face = "bold", size = 14, margin = margin(b = 4)),
    plot.subtitle = element_text(color = "grey40", size = 11, margin = margin(b = 12)),
    plot.caption = element_text(color = "grey60", size = 9, hjust = 0),
    plot.margin = margin(16, 16, 16, 16),
    panel.grid.major.x = element_blank(),
    panel.grid.minor = element_blank(),
    axis.title = element_text(color = "grey30", size = 10),
    axis.text = element_text(color = "grey30"),
    axis.ticks.x = element_line(color = "grey80"),
    legend.position = "bottom",
    legend.title = element_blank()
  )

ggplot(train, aes(x = TyreLife, y = Cumulative_Degradation, color = Compound)) +
  geom_smooth() +
  scale_fill_manual(values = c("#E69F00", "#56B4E9", "#009E73", "#F0E442", 
                                "#0072B2", "#D55E00", "#CC79A7")) +
  scale_color_manual(values = c("#E69F00", "#56B4E9", "#009E73", "#F0E442", 
                                "#0072B2", "#D55E00", "#CC79A7")) +
  labs(
    title = "Degradation Curve by Compound",
    y = "Cumulative Degradation",
    x = "Tyre Life"
  ) +
  theme_minimal(base_size = 12, base_family = "sans") +
  theme(
    plot.title = element_text(face = "bold", size = 14, margin = margin(b = 4)),
    plot.subtitle = element_text(color = "grey40", size = 11, margin = margin(b = 12)),
    plot.caption = element_text(color = "grey60", size = 9, hjust = 0),
    plot.margin = margin(16, 16, 16, 16),
    panel.grid.major.x = element_blank(),
    panel.grid.minor = element_blank(),
    axis.title = element_text(color = "grey30", size = 10),
    axis.text = element_text(color = "grey30"),
    axis.ticks.x = element_line(color = "grey80"),
    legend.position = "bottom",
    legend.title = element_blank()
  )

ggplot(train, aes(x = TyreLife, y = Compound, color = factor(PitNextLap))) +
  geom_boxplot() +
  scale_fill_manual(values = c("#E69F00", "#56B4E9", "#009E73", "#F0E442", 
                                "#0072B2", "#D55E00", "#CC79A7")) +
  scale_color_manual(values = c("#E69F00", "#56B4E9", "#009E73", "#F0E442", 
                                "#0072B2", "#D55E00", "#CC79A7")) +
  labs(
    title = "Degradation Curve by Compound",
    y = "Cumulative Degradation",
    x = "Tyre Life"
  ) +
  theme_minimal(base_size = 12, base_family = "sans") +
  theme(
    plot.title = element_text(face = "bold", size = 14, margin = margin(b = 4)),
    plot.subtitle = element_text(color = "grey40", size = 11, margin = margin(b = 12)),
    plot.caption = element_text(color = "grey60", size = 9, hjust = 0),
    plot.margin = margin(16, 16, 16, 16),
    panel.grid.major.x = element_blank(),
    panel.grid.minor = element_blank(),
    axis.title = element_text(color = "grey30", size = 10),
    axis.text = element_text(color = "grey30"),
    axis.ticks.x = element_line(color = "grey80"),
    legend.position = "bottom",
    legend.title = element_blank()
  )

Pit Rate by Compound

train |>
  group_by(Compound) |>
  summarise(pit_rate = mean(PitNextLap), count = n()) |>
  ggplot(aes(x = reorder(Compound, pit_rate), y = pit_rate, fill = Compound)) +
  geom_col() +
  scale_fill_manual(values = c("#E69F00", "#56B4E9", "#009E73", "#F0E442", 
                                "#0072B2", "#D55E00", "#CC79A7")) +
  scale_color_manual(values = c("#E69F00", "#56B4E9", "#009E73", "#F0E442", 
                                "#0072B2", "#D55E00", "#CC79A7")) +
  labs(
    title = "Pit Rate by Compound",
    y = "Pit Rate",
    x = "Compound"
  ) +
  theme_minimal(base_size = 12, base_family = "sans") +
  theme(
    plot.title = element_text(face = "bold", size = 14, margin = margin(b = 4)),
    plot.subtitle = element_text(color = "grey40", size = 11, margin = margin(b = 12)),
    plot.caption = element_text(color = "grey60", size = 9, hjust = 0),
    plot.margin = margin(16, 16, 16, 16),
    panel.grid.major.x = element_blank(),
    panel.grid.minor = element_blank(),
    axis.title = element_text(color = "grey30", size = 10),
    axis.text = element_text(color = "grey30"),
    axis.ticks.x = element_line(color = "grey80"),
    legend.position = "bottom",
    legend.title = element_blank()
  )

Race Progress

ggplot(train, aes(x = RaceProgress, fill = factor(PitNextLap))) +
  geom_density(alpha = 0.5) +
  facet_wrap(~Compound) +
  scale_fill_manual(values = c("#E69F00", "#56B4E9", "#009E73", "#F0E442", 
                                "#0072B2", "#D55E00", "#CC79A7")) +
  scale_color_manual(values = c("#E69F00", "#56B4E9", "#009E73", "#F0E442", 
                                "#0072B2", "#D55E00", "#CC79A7")) +
  labs(
    title = "Race Progress Distribution by Pit Decision",
    y = "Density",
    x = "Race Progress"
  ) +
  theme_minimal(base_size = 12, base_family = "sans") +
  theme(
    plot.title = element_text(face = "bold", size = 14, margin = margin(b = 4)),
    plot.subtitle = element_text(color = "grey40", size = 11, margin = margin(b = 12)),
    plot.caption = element_text(color = "grey60", size = 9, hjust = 0),
    plot.margin = margin(16, 16, 16, 16),
    panel.grid.major.x = element_blank(),
    panel.grid.minor = element_blank(),
    axis.title = element_text(color = "grey30", size = 10),
    axis.text = element_text(color = "grey30"),
    axis.ticks.x = element_line(color = "grey80"),
    legend.position = "bottom",
    legend.title = element_blank()
  )

Driver Position

ggplot(train, aes(x = Position, fill = factor(PitNextLap), color = factor(PitNextLap))) +
  geom_histogram(position = "fill", bins = 20, alpha=0.3) +
  scale_fill_manual(values = c("#E69F00", "#56B4E9", "#009E73", "#F0E442", 
                                "#0072B2", "#D55E00", "#CC79A7")) +
  scale_color_manual(values = c("#E69F00", "#56B4E9", "#009E73", "#F0E442", 
                                "#0072B2", "#D55E00", "#CC79A7")) +
  labs(
    title = "Pit Rate by Track Position",
    y = "Proportion",
   # x = "Race Progress"
  ) +
  theme_minimal(base_size = 12, base_family = "sans") +
  theme(
    plot.title = element_text(face = "bold", size = 14, margin = margin(b = 4)),
    plot.subtitle = element_text(color = "grey40", size = 11, margin = margin(b = 12)),
    plot.caption = element_text(color = "grey60", size = 9, hjust = 0),
    plot.margin = margin(16, 16, 16, 16),
    panel.grid.major.x = element_blank(),
    panel.grid.minor = element_blank(),
    axis.title = element_text(color = "grey30", size = 10),
    axis.text = element_text(color = "grey30"),
    axis.ticks.x = element_line(color = "grey80"),
    legend.position = "bottom",
    legend.title = element_blank()
  )

train |>
  group_by(Stint, Compound) |>
  summarise(pit_rate = mean(PitNextLap)) |>
  ggplot(aes(x = Stint, y = pit_rate, color = Compound, fill = Compound)) +
  geom_line() + 
  geom_point() +
  scale_fill_manual(values = c("#E69F00", "#56B4E9", "#009E73", "#F0E442", 
                                "#0072B2", "#D55E00", "#CC79A7")) +
  scale_color_manual(values = c("#E69F00", "#56B4E9", "#009E73", "#F0E442", 
                                "#0072B2", "#D55E00", "#CC79A7")) +
  labs(
    title = "Pit Rate by Stint Number",
    y = "Pit Rate",
    x = "Stint"
  ) +
  theme_minimal(base_size = 12, base_family = "sans") +
  theme(
    plot.title = element_text(face = "bold", size = 14, margin = margin(b = 4)),
    plot.subtitle = element_text(color = "grey40", size = 11, margin = margin(b = 12)),
    plot.caption = element_text(color = "grey60", size = 9, hjust = 0),
    plot.margin = margin(16, 16, 16, 16),
    panel.grid.major.x = element_blank(),
    panel.grid.minor = element_blank(),
    axis.title = element_text(color = "grey30", size = 10),
    axis.text = element_text(color = "grey30"),
    axis.ticks.x = element_line(color = "grey80"),
    legend.position = "bottom",
    legend.title = element_blank()
  )

race_stint <- train |> 
  group_by(Race, Compound) |>
  summarise(
    min_val  = min(Stint, na.rm = TRUE),
    mean_val = mean(Stint, na.rm = TRUE),
    max_val  = max(Stint, na.rm = TRUE)
  ) |>
  ungroup() 

race_stint |> 
  gt(
    rowname_col = "Compound",
    groupname_col = "Race"
  ) |>
  tab_header("Race Stint Analysis") |>
  cols_width(
    everything() ~ px(150)
    )
Race Stint Analysis
min_val mean_val max_val
Abu Dhabi Grand Prix
HARD 1 2.303222 5
INTERMEDIATE 4 4.000000 4
MEDIUM 1 1.119048 5
SOFT 1 3.800000 5
Australian Grand Prix
HARD 1 2.434992 6
INTERMEDIATE 1 3.974941 7
MEDIUM 1 1.101116 6
SOFT 1 2.700000 6
Austrian Grand Prix
HARD 1 2.603384 7
MEDIUM 1 1.532366 6
SOFT 1 3.160714 7
Azerbaijan Grand Prix
HARD 1 1.896008 5
INTERMEDIATE 2 2.500000 3
MEDIUM 1 1.038069 4
SOFT 3 3.625000 4
Bahrain Grand Prix
HARD 1 2.590173 6
INTERMEDIATE 6 6.000000 6
MEDIUM 1 2.178304 5
SOFT 1 1.607715 6
Belgian Grand Prix
HARD 1 2.433742 4
INTERMEDIATE 1 1.016854 4
MEDIUM 1 1.596089 4
SOFT 1 2.511943 6
British Grand Prix
HARD 1 2.563769 5
INTERMEDIATE 1 1.833844 6
MEDIUM 1 1.287798 6
SOFT 1 3.029770 6
Canadian Grand Prix
HARD 1 2.043497 6
INTERMEDIATE 1 1.165700 6
MEDIUM 1 1.460700 6
SOFT 1 4.305556 6
Chinese Grand Prix
HARD 1 2.330100 5
MEDIUM 1 1.142687 5
SOFT 1 3.000000 5
Dutch Grand Prix
HARD 1 2.253925 7
INTERMEDIATE 1 4.672496 8
MEDIUM 1 1.213644 8
SOFT 1 2.801603 8
Emilia Romagna Grand Prix
HARD 1 2.208805 5
INTERMEDIATE 1 1.010131 4
MEDIUM 1 1.413910 5
SOFT 3 3.602564 5
French Grand Prix
HARD 1 2.169747 4
MEDIUM 1 1.026102 4
Hungarian Grand Prix
HARD 1 2.412507 5
MEDIUM 1 1.285714 6
SOFT 1 2.264993 5
Italian Grand Prix
HARD 1 2.055583 4
INTERMEDIATE 1 1.000000 1
MEDIUM 1 1.121868 4
SOFT 1 2.650000 4
Japanese Grand Prix
HARD 1 2.700276 6
INTERMEDIATE 1 2.854722 5
MEDIUM 1 1.195066 5
SOFT 1 2.867769 5
WET 1 2.037037 4
Las Vegas Grand Prix
HARD 1 2.338379 5
MEDIUM 1 1.045455 4
SOFT 1 3.736842 5
Mexico City Grand Prix
HARD 1 2.169914 6
MEDIUM 1 1.189163 5
SOFT 1 1.939289 5
Miami Grand Prix
HARD 1 1.729888 6
MEDIUM 1 1.244020 5
SOFT 1 3.012500 4
Monaco Grand Prix
HARD 1 1.894737 6
INTERMEDIATE 1 3.068104 8
MEDIUM 1 1.716032 5
SOFT 3 3.965000 6
WET 1 1.136398 6
Pre-Season Testing
HARD 1 2.248302 5
INTERMEDIATE 1 4.000000 6
MEDIUM 1 1.158844 5
SOFT 1 2.874941 8
Qatar Grand Prix
HARD 1 3.271513 6
INTERMEDIATE 4 5.000000 6
MEDIUM 1 1.411722 5
SOFT 1 4.154930 6
Saudi Arabian Grand Prix
HARD 1 2.012779 4
INTERMEDIATE 1 1.000000 1
MEDIUM 1 1.072724 4
SOFT 1 3.304762 4
Singapore Grand Prix
HARD 1 1.988117 5
INTERMEDIATE 1 1.013149 4
MEDIUM 1 1.161593 4
SOFT 1 2.662222 5
Spanish Grand Prix
HARD 1 2.753523 6
INTERMEDIATE 5 5.500000 6
MEDIUM 1 2.248474 5
SOFT 1 1.638353 6
São Paulo Grand Prix
HARD 1 1.367347 5
INTERMEDIATE 1 1.693778 6
MEDIUM 1 2.456855 6
SOFT 1 3.526909 7
WET 2 2.000000 2
United States Grand Prix
HARD 1 2.325069 5
MEDIUM 1 1.188922 4
SOFT 1 2.228729 4

Feature Engineering

train <- train %>%
  mutate(
    TyreLife_x_Degradation = TyreLife * Cumulative_Degradation,

    Degradation_per_Lap = Cumulative_Degradation / (TyreLife + 1),
    
    TyreAge_Category = case_when(
      Compound == "SOFT"         & TyreLife > 15 ~ "old",
      Compound == "MEDIUM"       & TyreLife > 25 ~ "old",
      Compound == "HARD"         & TyreLife > 35 ~ "old",
      Compound == "INTERMEDIATE" & TyreLife > 20 ~ "old",
      TRUE ~ "fresh"
    )
  )

test <- test %>%
  mutate(
    TyreLife_x_Degradation = TyreLife * Cumulative_Degradation,

    Degradation_per_Lap = Cumulative_Degradation / (TyreLife + 1),
    
    TyreAge_Category = case_when(
      Compound == "SOFT" & TyreLife > 15 ~ "old",
      Compound == "MEDIUM" & TyreLife > 25 ~ "old",
      Compound == "HARD" & TyreLife > 35 ~ "old",
      Compound == "INTERMEDIATE" & TyreLife > 20 ~ "old",
      TRUE ~ "fresh"
    )
  )
train <- train %>%
  mutate(
    Losing_Positions = Position_Change < -1,
    
    InPoints = Position <= 10,

    Struggling_OldTyres = Losing_Positions & Degradation_per_Lap > median(Degradation_per_Lap, na.rm = TRUE)
  )

test <- test %>%
  mutate(
    Losing_Positions = Position_Change < -1,
    
    InPoints = Position <= 10,

    Struggling_OldTyres = Losing_Positions & Degradation_per_Lap > median(Degradation_per_Lap, na.rm = TRUE)
  )