con <- dbConnect(SQLite(), "f1_data.sqlite")
query1 <- "
SELECT ra.year, ra.round, d.driverRef, r.points
FROM results r
JOIN races ra ON r.raceId = ra.raceId
JOIN drivers d ON r.driverId = d.driverId
WHERE ra.year >= 2018;
"

fig_dat1 <- dbGetQuery(con, query1) %>% as_tibble()

seasons_to_plot <- c(2021, 2022)

driver_points <- fig_dat1 %>%
  filter(year %in% seasons_to_plot) %>%
  group_by(year, driverRef) %>%
  summarise(total_points = sum(points), .groups = "drop") %>%
  group_by(year) %>%
  slice_max(total_points, n = 5) %>%   
  inner_join(fig_dat1, by = c("year", "driverRef")) %>%
  group_by(year, driverRef) %>%
  arrange(round) %>%
  mutate(cum_points = cumsum(points))

ggplot(driver_points, aes(x = round, y = cum_points, color = driverRef, group = driverRef)) +
  geom_line(size = 1.1) +
  facet_wrap(~year, ncol = 2, scales = "free_x") +
  labs(title = "Top 5 Drivers – Two Seasons Championship Battle",
       x = "Race Round", y = "Cumulative Points",  color = "Driver") +
  theme_minimal(base_size = 14) +
  theme(legend.position = "bottom")
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

query2 <- "
SELECT c.name AS constructor, ra.year
FROM results r
JOIN races ra ON r.raceId = ra.raceId
JOIN constructors c ON r.constructorId = c.constructorId
WHERE r.position = 1;
"

fig_dat2 <- dbGetQuery(con, query2) %>%
  as_tibble() %>%
  mutate(decade = floor(year/10)*10) %>%
  group_by(constructor, decade) %>%
  summarise(total_wins = n(), .groups = 'drop') %>%
  group_by(constructor) %>%
  summarise(all_time_wins = sum(total_wins), .groups = "drop") %>%
  slice_max(all_time_wins, n = 5) %>%      # pick top 5 all-time constructors
  inner_join(
    dbGetQuery(con, query2) %>%
      as_tibble() %>%
      mutate(decade = floor(year/10)*10) %>%
      group_by(constructor, decade) %>%
      summarise(total_wins = n(), .groups = 'drop'),
    by = "constructor"
  )

ggplot(fig_dat2, aes(x = factor(decade), y = total_wins, fill = constructor)) +
  geom_col(position = "dodge") +
  labs(title = "Top 5 Constructors by Decade",
       x = "Decade", y = "Total Wins", fill = "Constructor") +
  theme_minimal(base_size = 14) +
  theme(legend.position = "bottom")

query3 <- "
SELECT d.driverRef, lt.lap, lt.milliseconds/60000.0 AS lap_time_min
FROM lap_times lt
JOIN drivers d ON lt.driverId = d.driverId
JOIN races r ON lt.raceId = r.raceId
WHERE r.year = 2021 AND r.name = 'Italian Grand Prix';
"

fig_dat3 <- dbGetQuery(con, query3) %>%
  as_tibble()

# Get top 3 drivers by number of laps completed
top3_drivers <- fig_dat3 %>%
  count(driverRef, sort = TRUE) %>%
  slice_head(n = 3) %>%
  pull(driverRef)

fig_dat3_top3 <- fig_dat3 %>%
  filter(driverRef %in% top3_drivers)

# Violin + boxplot plot
ggplot(fig_dat3_top3, aes(x = driverRef, y = lap_time_min, fill = driverRef)) +
  geom_violin(alpha = 0.6, trim = FALSE) +
  geom_boxplot(width = 0.1, fill = "white", outlier.shape = NA) +
  labs(
    title = "Lap Time Distribution (minutes) – Top 3 Drivers, Italian GP 2021",
    x = "Driver",
    y = "Lap Time (minutes)",
    fill = "Driver"
  ) +
  theme_minimal(base_size = 14) +
  theme(legend.position = "none")

query4 <- "
SELECT d.driverRef, ps.lap, ps.milliseconds/1000.0 AS pit_sec
FROM pit_stops ps
JOIN drivers d ON ps.driverId = d.driverId
JOIN races r ON ps.raceId = r.raceId
WHERE r.year = 2021 AND r.name = 'Italian Grand Prix';
"

fig_dat4 <- dbGetQuery(con, query4) %>% as_tibble()

ggplot(fig_dat4, aes(x = lap, y = driverRef, fill = pit_sec)) +
  geom_tile(color = "white") +
  scale_fill_viridis_c(option = "C") +
  labs(title = "Pit Stop Times Heatmap – Italian GP 2021",
       x = "Lap", y = "Driver", fill = "Pit Stop (s)",
       caption = "Source: Ergast F1 Database") +
  theme_minimal(base_size = 14)

query5 <- "
SELECT r.position AS finish_pos, q.position AS quali_pos, d.surname, ra.year, ra.name AS race
FROM results r
JOIN qualifying q ON r.raceId = q.raceId AND r.driverId = q.driverId
JOIN drivers d ON r.driverId = d.driverId
JOIN races ra ON r.raceId = ra.raceId
WHERE ra.year = 2021;
"

fig_dat5 <- dbGetQuery(con, query5) %>% as_tibble()

fig_dat5 <- dbGetQuery(con, query5) %>% as_tibble()

plot_ly(
  data = fig_dat5,
  x = ~quali_pos,
  y = ~finish_pos,
  text = ~paste("Driver:", surname, "<br>Race:", race),
  color = ~surname,
  type = "scatter",
  mode = "markers"
) %>%
  plotly::layout(
    title = list(text = "Qualifying vs Finishing Position (2021)"),
    xaxis = list(title = "Qualifying Position"),
    yaxis = list(title = "Finishing Position", autorange = "reversed") # invert so P1 is top
  )
## Warning in RColorBrewer::brewer.pal(max(N, 3L), "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors
## Warning in RColorBrewer::brewer.pal(max(N, 3L), "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors
query6 <- "
SELECT d.driverRef, lt.milliseconds/1000.0 AS lap_time_sec, r.year
FROM lap_times lt
JOIN drivers d ON lt.driverId = d.driverId
JOIN races r ON lt.raceId = r.raceId
WHERE r.year = 2021;
"

fig_dat6 <- dbGetQuery(con, query6) %>% as_tibble()

ggplot(fig_dat6, aes(x = lap_time_sec, y = driverRef, fill = driverRef)) +
  geom_density_ridges(alpha = 0.6, scale = 1.5, rel_min_height = 0.01) +
  labs(title = "Lap Time Density – Drivers (2021 Season)",
       x = "Lap Time (seconds)", y = "Driver",
       caption = "Source: Ergast F1 Database") +
  theme_minimal(base_size = 14) +
  theme(legend.position = "none")
## Picking joint bandwidth of 3.19

champ_points <- fig_dat1 %>%
  filter(year == 2021) %>%
  group_by(driverRef) %>%
  arrange(round) %>%
  mutate(cum_points = cumsum(points))

ggplot(champ_points, aes(x = round, y = cum_points, color = driverRef, group = driverRef)) +
  geom_line(size = 1.1) +
  labs(title = "Championship Points Race – 2021",
       subtitle = "Round: {frame_along}",
       x = "Round", y = "Cumulative Points", color = "Driver",
       caption = "Source: Ergast F1 Database") +
  theme_minimal(base_size = 14) +
  transition_reveal(round)
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?

query8 <- "
SELECT ra.name AS race, ra.year, SUM(r.time) AS race_duration
FROM results r
JOIN races ra ON r.raceId = ra.raceId
WHERE ra.year BETWEEN 2019 AND 2021
GROUP BY ra.raceId;
"

fig_dat8 <- dbGetQuery(con, query8) %>% as_tibble()

ggplot(fig_dat8, aes(x = reorder(race, race_duration), y = race_duration/60000)) +
  geom_boxplot(fill = "skyblue", alpha = 0.7) +
  coord_flip() +
  labs(title = "Race Duration Distribution by Circuit (2019–2021)",
       x = "Circuit", y = "Duration (minutes)",
       caption = "Source: Ergast F1 Database") +
  theme_minimal(base_size = 14)