iah <- read_csv("http://vis.cwick.co.nz/data/iah-summary.csv")
## Rows: 154 Columns: 7
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (1): DayOfWeek
## dbl (6): DepHour, avg_delay, avg_delay_delayed, prop_over_15, nflights, ndests
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
str(iah)
## spec_tbl_df [154 x 7] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
##  $ DepHour          : num [1:154] 0 0 0 0 0 0 0 1 1 1 ...
##  $ DayOfWeek        : chr [1:154] "Mon" "Tue" "Wed" "Thu" ...
##  $ avg_delay        : num [1:154] 187.6 174.5 173 196.2 31.1 ...
##  $ avg_delay_delayed: num [1:154] 187.6 174.5 173 196.2 31.1 ...
##  $ prop_over_15     : num [1:154] 1 1 1 1 0.31 ...
##  $ nflights         : num [1:154] 7 6 10 17 29 3 7 1 3 5 ...
##  $ ndests           : num [1:154] 5 6 9 14 5 1 7 1 3 5 ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   DepHour = col_double(),
##   ..   DayOfWeek = col_character(),
##   ..   avg_delay = col_double(),
##   ..   avg_delay_delayed = col_double(),
##   ..   prop_over_15 = col_double(),
##   ..   nflights = col_double(),
##   ..   ndests = col_double()
##   .. )
##  - attr(*, "problems")=<externalptr>

Step 2: Make a heatmap:

# make sure days of week are displayed in the right order

iah$DayOfWeek <- factor(iah$DayOfWeek, 
  levels = c("Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"))

p <- ggplot(iah, aes(DepHour, DayOfWeek)) +
  geom_tile(aes(fill = prop_over_15)) 
p
## Warning: Removed 7 rows containing missing values (geom_tile).

Step 3: Experiment with scale_file_xxx

p + scale_fill_gradient()
## Warning: Removed 7 rows containing missing values (geom_tile).

Step 4: Find better color scales

# install.packages("colorspace")
library(colorspace)
hcl_palettes(plot = TRUE)

p + scale_fill_continuous_sequential("Mint")
## Warning: Removed 7 rows containing missing values (geom_tile).

p +  scale_fill_continuous_diverging("Blue-Red")
## Warning: Removed 7 rows containing missing values (geom_tile).

Experiment!

p + scale_fill_continuous_diverging("Tropic")
## Warning: Removed 7 rows containing missing values (geom_tile).

p + scale_fill_continuous_sequential("Purples3")
## Warning: Removed 7 rows containing missing values (geom_tile).

p + scale_fill_continuous_sequential("Peach")
## Warning: Removed 7 rows containing missing values (geom_tile).

Step 5: Transform data and change limits

ggplot(iah, aes(DepHour, DayOfWeek)) +
  geom_tile(aes(fill = avg_delay_delayed)) +
  scale_fill_gradient()
## Warning: Removed 7 rows containing missing values (geom_tile).

ggplot(iah, aes(DepHour, DayOfWeek)) +
  geom_tile(aes(fill = log10(avg_delay_delayed))) +
  scale_fill_gradient()
## Warning: Removed 7 rows containing missing values (geom_tile).

# An alternative approach to deal with a few very large numbers is to turn the continuous variable into a discrete one by binning it:

iah <- iah %>% 
  mutate(avg_delay_cut = cut(avg_delay, breaks = c(-5, 0, 15, 30, 60, 1000)))

# Then you’ll need to use the discrete form of the scale:

ggplot(iah, aes(DepHour, DayOfWeek)) +
  geom_tile(aes(fill = avg_delay_cut))  +
  scale_fill_discrete_sequential("Mint")
## Warning: Removed 7 rows containing missing values (geom_tile).

ggplot(iah, aes(DepHour, DayOfWeek)) +
  geom_tile(aes(fill = avg_delay_cut))  +
  scale_fill_discrete_qualitative("Cold")
## Warning: Removed 7 rows containing missing values (geom_tile).

ggplot(iah, aes(DepHour, DayOfWeek)) +
  geom_tile(aes(fill = avg_delay_cut))  +
  scale_fill_discrete_qualitative("Harmonic")
## Warning: Removed 7 rows containing missing values (geom_tile).

Cleaning the Graph

ggplot(iah, aes(DepHour, DayOfWeek)) +
  
  geom_tile(aes(fill = prop_over_15), colour = "grey50")+
  
  scale_x_continuous("Departure Time",
                     breaks =  c(0, 6, 12, 18, 24),
                     label= c("midnight", "6am", "noon", "6pm", "midnight"),
                     expand=c(0,0),
                     limits = c(0, 23))+
  
  scale_y_discrete("Departure Date")+
  
  scale_fill_continuous_sequential(name="Flights delayed \nmore than \n15 mins",
                                   palette = "YlOrRd",
                                   breaks = c(0, 0.25, 0.5, 0.75, 1),
                                   labels = c("0%", "25%", "50%", "75%", "100%"),
                                   expand= c(0,0),
                                   guide= "colorbar")+
  labs(title = "Take an early flight from IAM to avoid delays",
       subtitle = "Based on all departing flights from George Bush Intercontinental Airport (IAH) in 2011",
         guides(color= "none"))+
  theme_classic()+
  theme(axis.ticks.y = element_blank(),
        axis.line.y = element_blank(),
        legend.position = "bottom")+
  coord_equal()
## Warning: Removed 8 rows containing missing values (geom_tile).