iah <- read_csv("http://vis.cwick.co.nz/data/iah-summary.csv")
## Rows: 154 Columns: 7
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (1): DayOfWeek
## dbl (6): DepHour, avg_delay, avg_delay_delayed, prop_over_15, nflights, ndests
##
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
str(iah)
## spec_tbl_df [154 x 7] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ DepHour : num [1:154] 0 0 0 0 0 0 0 1 1 1 ...
## $ DayOfWeek : chr [1:154] "Mon" "Tue" "Wed" "Thu" ...
## $ avg_delay : num [1:154] 187.6 174.5 173 196.2 31.1 ...
## $ avg_delay_delayed: num [1:154] 187.6 174.5 173 196.2 31.1 ...
## $ prop_over_15 : num [1:154] 1 1 1 1 0.31 ...
## $ nflights : num [1:154] 7 6 10 17 29 3 7 1 3 5 ...
## $ ndests : num [1:154] 5 6 9 14 5 1 7 1 3 5 ...
## - attr(*, "spec")=
## .. cols(
## .. DepHour = col_double(),
## .. DayOfWeek = col_character(),
## .. avg_delay = col_double(),
## .. avg_delay_delayed = col_double(),
## .. prop_over_15 = col_double(),
## .. nflights = col_double(),
## .. ndests = col_double()
## .. )
## - attr(*, "problems")=<externalptr>
Step 2: Make a heatmap:
# make sure days of week are displayed in the right order
iah$DayOfWeek <- factor(iah$DayOfWeek,
levels = c("Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"))
p <- ggplot(iah, aes(DepHour, DayOfWeek)) +
geom_tile(aes(fill = prop_over_15))
p
## Warning: Removed 7 rows containing missing values (geom_tile).

Step 3: Experiment with scale_file_xxx
p + scale_fill_gradient()
## Warning: Removed 7 rows containing missing values (geom_tile).

Step 4: Find better color scales
# install.packages("colorspace")
library(colorspace)
hcl_palettes(plot = TRUE)

p + scale_fill_continuous_sequential("Mint")
## Warning: Removed 7 rows containing missing values (geom_tile).

p + scale_fill_continuous_diverging("Blue-Red")
## Warning: Removed 7 rows containing missing values (geom_tile).

Experiment!
p + scale_fill_continuous_diverging("Tropic")
## Warning: Removed 7 rows containing missing values (geom_tile).

p + scale_fill_continuous_sequential("Purples3")
## Warning: Removed 7 rows containing missing values (geom_tile).

p + scale_fill_continuous_sequential("Peach")
## Warning: Removed 7 rows containing missing values (geom_tile).

Step 5: Transform data and change limits
ggplot(iah, aes(DepHour, DayOfWeek)) +
geom_tile(aes(fill = avg_delay_delayed)) +
scale_fill_gradient()
## Warning: Removed 7 rows containing missing values (geom_tile).

ggplot(iah, aes(DepHour, DayOfWeek)) +
geom_tile(aes(fill = log10(avg_delay_delayed))) +
scale_fill_gradient()
## Warning: Removed 7 rows containing missing values (geom_tile).

# An alternative approach to deal with a few very large numbers is to turn the continuous variable into a discrete one by binning it:
iah <- iah %>%
mutate(avg_delay_cut = cut(avg_delay, breaks = c(-5, 0, 15, 30, 60, 1000)))
# Then you’ll need to use the discrete form of the scale:
ggplot(iah, aes(DepHour, DayOfWeek)) +
geom_tile(aes(fill = avg_delay_cut)) +
scale_fill_discrete_sequential("Mint")
## Warning: Removed 7 rows containing missing values (geom_tile).

ggplot(iah, aes(DepHour, DayOfWeek)) +
geom_tile(aes(fill = avg_delay_cut)) +
scale_fill_discrete_qualitative("Cold")
## Warning: Removed 7 rows containing missing values (geom_tile).

ggplot(iah, aes(DepHour, DayOfWeek)) +
geom_tile(aes(fill = avg_delay_cut)) +
scale_fill_discrete_qualitative("Harmonic")
## Warning: Removed 7 rows containing missing values (geom_tile).

Cleaning the Graph
ggplot(iah, aes(DepHour, DayOfWeek)) +
geom_tile(aes(fill = prop_over_15), colour = "grey50")+
scale_x_continuous("Departure Time",
breaks = c(0, 6, 12, 18, 24),
label= c("midnight", "6am", "noon", "6pm", "midnight"),
expand=c(0,0),
limits = c(0, 23))+
scale_y_discrete("Departure Date")+
scale_fill_continuous_sequential(name="Flights delayed \nmore than \n15 mins",
palette = "YlOrRd",
breaks = c(0, 0.25, 0.5, 0.75, 1),
labels = c("0%", "25%", "50%", "75%", "100%"),
expand= c(0,0),
guide= "colorbar")+
labs(title = "Take an early flight from IAM to avoid delays",
subtitle = "Based on all departing flights from George Bush Intercontinental Airport (IAH) in 2011",
guides(color= "none"))+
theme_classic()+
theme(axis.ticks.y = element_blank(),
axis.line.y = element_blank(),
legend.position = "bottom")+
coord_equal()
## Warning: Removed 8 rows containing missing values (geom_tile).
