Heatmaps, Treemaps, Streamgraphs, and Alluvials

library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.4.4     ✔ tibble    3.2.1
✔ lubridate 1.9.3     ✔ tidyr     1.3.1
✔ purrr     1.0.2     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(nycflights13)

head(flights)
# A tibble: 6 × 19
   year month   day dep_time sched_dep_time dep_delay arr_time sched_arr_time
  <int> <int> <int>    <int>          <int>     <dbl>    <int>          <int>
1  2013     1     1      517            515         2      830            819
2  2013     1     1      533            529         4      850            830
3  2013     1     1      542            540         2      923            850
4  2013     1     1      544            545        -1     1004           1022
5  2013     1     1      554            600        -6      812            837
6  2013     1     1      554            558        -4      740            728
# ℹ 11 more variables: arr_delay <dbl>, carrier <chr>, flight <int>,
#   tailnum <chr>, origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>,
#   hour <dbl>, minute <dbl>, time_hour <dttm>
flights <- mutate(flights, distance_cat = cut(distance, breaks = quantile(distance, probs = seq(0, 1, 0.2), na.rm = TRUE), include.lowest = TRUE, labels = FALSE))

# Calculate average air_time for each carrier and distance interval
avg_air_time <- flights %>%
  group_by(carrier, distance_cat) %>%
  summarize(avg_air_time = mean(air_time, na.rm = TRUE), .groups = 'drop') %>%
  filter(!is.na(avg_air_time)) # Remove NA values
ggplot(avg_air_time, aes(x = carrier, y = as.factor(distance_cat), fill = avg_air_time)) +
  geom_tile() + # Create the heatmap
  scale_fill_gradient(name = "Avg. Air Time (min)", low = "blue", high = "red") +
  labs(x = "Carrier", y = "Distance Category",
       title = "Average Air Time by Carrier and Distance Category",
       caption = "Data source: nycflights13 package. Distance categorized into quintiles.") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

flights <- mutate(flights, distance_thousands = cut(distance, breaks = seq(0, max(distance, na.rm = TRUE), by = 1000), labels = paste(seq(0, max(distance, na.rm = TRUE)/1000 - 1), "-", seq(1, max(distance, na.rm = TRUE)/1000), "k", sep = ""), include.lowest = TRUE))