Adapted from a lab written by Prof Charlotte Wickham

The data

These data come from the hflights package, but some summarization is done for you.

library(tidyverse)
iah <- read.csv("https://raw.githubusercontent.com/kitadasmalley/Teaching/main/DATA502/FA2023/R_Markdown/Week8/iah_flightSummary.csv")

str(iah)
## 'data.frame':    156 obs. of  6 variables:
##  $ DayOfWeekNum     : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ DepHour          : int  0 1 5 6 7 8 9 10 11 12 ...
##  $ prop_over_15     : num  1 1 0.00272 0.03348 0.03315 ...
##  $ avg_delay        : num  188.818 526.333 -3.147 1.206 0.226 ...
##  $ DayOfWeek        : chr  "Mon" "Mon" "Mon" "Mon" ...
##  $ avg_delay_delayed: num  188.82 526.33 6.25 10.76 7.15 ...

Start with this heatmap:

The heatmap represents the proportion of flights that have a departure delay of more that 15 minutes (prop_over_15) at the George Bush Intercontinental Airport (IAH) by day of the week (DayOfWeek) and departure hour (DepHour).

# make sure days of week are displayed in the right order
iah$DayOfWeek <- factor(iah$DayOfWeek, 
  levels = c("Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"))

p <- ggplot(iah, aes(DepHour, DayOfWeek)) +
  geom_tile(aes(fill = prop_over_15))

p
## Warning: Removed 7 rows containing missing values (geom_tile).

Step 1 : scale_x_continuous

Breaks

ggplot(iah, aes(DepHour, DayOfWeek)) +
  geom_tile(aes(fill = prop_over_15), colour = "grey50") + 
  scale_x_continuous("Departure time", 
    breaks = c(0, 6, 12, 18, 24)) 
## Warning: Removed 7 rows containing missing values (geom_tile).

Labels

ggplot(iah, aes(DepHour, DayOfWeek)) +
  geom_tile(aes(fill = prop_over_15), colour = "grey50") + 
  scale_x_continuous("Departure time", 
    breaks = c(0, 6, 12, 18, 24),
    labels = c("midnight", "6am", "noon", "6pm", "midnight")) 
## Warning: Removed 7 rows containing missing values (geom_tile).

#### Limits

0 and midnight are the same thing!

ggplot(iah, aes(DepHour, DayOfWeek)) +
  geom_tile(aes(fill = prop_over_15), colour = "grey50") + 
  scale_x_continuous("Departure time", 
    breaks = c(0, 6, 12, 18, 24),
    labels = c("midnight", "6am", "noon", "6pm", "midnight"),
    limits = c(0, 23)) 
## Warning: Removed 8 rows containing missing values (geom_tile).

Expansion (Padding Space)

ggplot(iah, aes(DepHour, DayOfWeek)) +
  geom_tile(aes(fill = prop_over_15), colour = "grey50") + 
  scale_x_continuous("Departure time", 
    breaks = c(0, 6, 12, 18, 24),
    labels = c("midnight", "6am", "noon", "6pm", "midnight"),
    expand = c(0, 0),
    limits = c(0, 23))
## Warning: Removed 8 rows containing missing values (geom_tile).

Step 2: scale_y_discrete

Label

ggplot(iah, aes(DepHour, DayOfWeek)) +
  geom_tile(aes(fill = prop_over_15), colour = "grey50") + 
  scale_x_continuous("Departure time", 
    breaks = c(0, 6, 12, 18, 24),
    labels = c("midnight", "6am", "noon", "6pm", "midnight"),
    expand = c(0, 0),
    limits = c(0, 23)) +
  scale_y_discrete("Departure day") 
## Warning: Removed 8 rows containing missing values (geom_tile).

Step 3: scale_fill_continuous_sequential

We’re going to need the color space package for this.

Palette

library(colorspace)

ggplot(iah, aes(DepHour, DayOfWeek)) +
  geom_tile(aes(fill = prop_over_15), colour = "grey50") + 
  scale_x_continuous("Departure time", 
    breaks = c(0, 6, 12, 18, 24),
    labels = c("midnight", "6am", "noon", "6pm", "midnight"),
    expand = c(0, 0),
    limits = c(0, 23)) +
  scale_y_discrete("Departure day") +
  scale_fill_continuous_sequential(
    name = "Flights delayed \nmore than 15 mins", 
    palette = "YlOrRd")
## Warning: Removed 8 rows containing missing values (geom_tile).

Breaks

library(colorspace)

ggplot(iah, aes(DepHour, DayOfWeek)) +
  geom_tile(aes(fill = prop_over_15), colour = "grey50") + 
  scale_x_continuous("Departure time", 
    breaks = c(0, 6, 12, 18, 24),
    labels = c("midnight", "6am", "noon", "6pm", "midnight"),
    expand = c(0, 0),
    limits = c(0, 23)) +
  scale_y_discrete("Departure day") +
  scale_fill_continuous_sequential(
    name = "Flights delayed \nmore than 15 mins", 
    palette = "YlOrRd", 
    breaks = c(0, .25, 0.5, .75, 1))
## Warning: Removed 8 rows containing missing values (geom_tile).

Labels

library(colorspace)

ggplot(iah, aes(DepHour, DayOfWeek)) +
  geom_tile(aes(fill = prop_over_15), colour = "grey50") + 
  scale_x_continuous("Departure time", 
    breaks = c(0, 6, 12, 18, 24),
    labels = c("midnight", "6am", "noon", "6pm", "midnight"),
    expand = c(0, 0),
    limits = c(0, 23)) +
  scale_y_discrete("Departure day") +
  scale_fill_continuous_sequential(
    name = "Flights delayed \nmore than 15 mins", 
    palette = "YlOrRd", 
    breaks = c(0, .25, 0.5, .75, 1), 
    labels = c("0%", "25%", "50%", "75%", "100%"))
## Warning: Removed 8 rows containing missing values (geom_tile).

Guide (Legend)

library(colorspace)

ggplot(iah, aes(DepHour, DayOfWeek)) +
  geom_tile(aes(fill = prop_over_15), colour = "grey50") + 
  scale_x_continuous("Departure time", 
    breaks = c(0, 6, 12, 18, 24),
    labels = c("midnight", "6am", "noon", "6pm", "midnight"),
    expand = c(0, 0),
    limits = c(0, 23)) +
  scale_y_discrete("Departure day") +
  scale_fill_continuous_sequential(
    name = "Flights delayed \nmore than 15 mins", 
    palette = "YlOrRd", 
    breaks = c(0, .25, 0.5, .75, 1), 
    labels = c("0%", "25%", "50%", "75%", "100%"), 
    expand = c(0, 0),
    guide = "colorbar")
## Warning: Removed 8 rows containing missing values (geom_tile).

Step 4: Title and subtitle

ggplot(iah, aes(DepHour, DayOfWeek)) +
  geom_tile(aes(fill = prop_over_15), colour = "grey50") + 
  scale_x_continuous("Departure time", 
    breaks = c(0, 6, 12, 18, 24),
    labels = c("midnight", "6am", "noon", "6pm", "midnight"),
    expand = c(0, 0),
    limits = c(0, 23)) +
  scale_y_discrete("Departure day") +
  scale_fill_continuous_sequential(
    name = "Flights delayed \nmore than 15 mins", 
    palette = "YlOrRd", 
    breaks = c(0, .25, 0.5, .75, 1), 
    labels = c("0%", "25%", "50%", "75%", "100%"), 
    expand = c(0, 0),
    guide = "colorbar") +
  labs(title = "Take an early flight from IAH to avoid delays",
    subtitle = "Based on all departing flights from George Bush Intercontinental Airport (IAH) in 2011")
## Warning: Removed 8 rows containing missing values (geom_tile).

Step 5: Theme

ggplot(iah, aes(DepHour, DayOfWeek)) +
  geom_tile(aes(fill = prop_over_15), colour = "grey50") + 
  scale_x_continuous("Departure time", 
    breaks = c(0, 6, 12, 18, 24),
    labels = c("midnight", "6am", "noon", "6pm", "midnight"),
    expand = c(0, 0),
    limits = c(0, 23)) +
  scale_y_discrete("Departure day") +
  scale_fill_continuous_sequential(
    name = "Flights delayed \nmore than 15 mins", 
    palette = "YlOrRd", 
    breaks = c(0, .25, 0.5, .75, 1), 
    labels = c("0%", "25%", "50%", "75%", "100%"), 
    expand = c(0, 0),
    guide = "colorbar") +
  labs(title = "Take an early flight from IAH to avoid delays",
    subtitle = "Based on all departing flights from George Bush Intercontinental Airport (IAH) in 2011") +
  theme_classic() +
  theme(axis.ticks.y = element_blank(), axis.line.y = element_blank()) + coord_equal()
## Warning: Removed 8 rows containing missing values (geom_tile).

Step 6: Order Days (Relevel)

iah$DayOfWeek <- factor(iah$DayOfWeek, 
                        levels = c("Sun", "Sat", "Fri", "Thu", "Wed", "Tue","Mon"))

ggplot(iah, aes(DepHour, DayOfWeek)) +
  geom_tile(aes(fill = prop_over_15), colour = "grey50") + 
  scale_x_continuous("Departure time", 
    breaks = c(0, 6, 12, 18, 24),
    labels = c("midnight", "6am", "noon", "6pm", "midnight"),
    expand = c(0, 0),
    limits = c(0, 23)) +
  scale_y_discrete("Departure day") +
  scale_fill_continuous_sequential(
    name = "Flights delayed \nmore than 15 mins", 
    palette = "YlOrRd", 
    breaks = c(0, .25, 0.5, .75, 1), 
    labels = c("0%", "25%", "50%", "75%", "100%"), 
    expand = c(0, 0),
    guide = "colorbar") +
  labs(title = "Take an early flight from IAH to avoid delays",
    subtitle = "Based on all departing flights from George Bush Intercontinental Airport (IAH) in 2011") +
  theme_classic() +
  theme(axis.ticks.y = element_blank(), axis.line.y = element_blank()) + coord_equal()
## Warning: Removed 8 rows containing missing values (geom_tile).

Step 7: ggsave()

ggsave("polishedDelays.png", height = 3.5, width = 10)
## Warning: Removed 8 rows containing missing values (geom_tile).