An exploration of Tube temperature data from the London Datastore.

How hot is the Tube?

Average temperature of each Tube line, from the hottest to the coldest.

# Data prep
avg <- data17 %>% 
  group_by(Line) %>% 
  summarise(avg = mean(Temperature)) %>% 
  mutate(label = str_c(round(avg, 1), "°C"))
  
# Plot
ggplot(avg) +
  geom_segment(aes(x = reorder(Line, avg), xend = reorder(Line, avg), 
                   y = 18, yend = avg, colour = Line), 
               size = 2.5, lineend = "round") +
  geom_point(aes(x = Line, y = 18), shape = 21, fill = "white", colour = "black", size = 4, stroke = 1.5) +
  scale_colour_manual(values = line_colours) +
  coord_flip() +
  ylim(18, 28) +
  theme_minimal() +
  theme(text = element_text(family = "Source Sans Pro", size = 12),
        plot.caption = element_text(size = 8, vjust = -1, colour = "dark grey"),
        axis.text.y = element_text(face = "bold", colour = "black"),
        legend.position = "none",
        axis.title = element_blank(),
        panel.grid = element_blank(),
        axis.text.x = element_blank()) +
  geom_text(aes(x = Line, y = (avg + 0.5), label = label), family = "Source Sans Pro", size = 3) +
  labs(caption = "* Sub-surface lines refers to the Circle, District, Hammersmith & City and Metropolitan lines. 

       Source: TFL via London Datastore.")

The baking Bakerloo

Seasonal heights and depths of each Tube line, from the hottest to the coldest.

# Data prep
seasonal <- data17 %>% 
  group_by(season, Line) %>% 
  summarise(min = min(Temperature), max = max(Temperature)) %>%
  gather(extreme, value, 3:4) %>% 
  mutate(line_id = str_c(season, ": ", Line))

heights <- seasonal %>% filter(season == "Spring/Summer", extreme == "max")
depths <- seasonal %>% filter(season == "Autumn/Winter", extreme == "min")
seasonal <- rbind(heights, depths)

# Vectorise y axis temperature labels to include unit
temp_labels <- c("12°C", "16°C", "20°C", "24°C", "28°C", "32°C")  

# Plot
ggplot(seasonal) + 
  geom_segment(aes(x = reorder(line_id, -value), xend = reorder(line_id, -value),
                   y = 0, yend = value-24, colour = Line), 
               size = 2.5, lineend = "round") +
  geom_segment(aes(x = "Spring/Summer: Bakerloo", xend = "Autumn/Winter: Sub-surface lines *",
                   y = 0, yend = 0),
               colour = "light grey", alpha = 0.01, size = 1.5, lineend = "round") +
  geom_point(aes(x = line_id, y = 0), shape = 21, fill = "white", colour = "black", size = 4, stroke = 1.5) +
  scale_y_continuous(limits = c(-13, 9),
                     breaks = c(-12, -8, -4, 0, 4, 8),
                     labels = temp_labels,
                     position = "right") +
  scale_colour_manual(values = line_colours) +
  annotate("text", x = 4, y = -3, label = "Temperatures during the height of summer", 
           family = "Source Sans Pro", size = 4) +
  annotate("text", x = 12.5, y = 3, label = "Temperatures during the depths of winter", 
           family = "Source Sans Pro", size = 4) +
  geom_curve(aes(x = 1.5, y = -3, xend = 1, yend = -2),
              colour = "light grey", size = 0.5, curvature = -0.8,
             arrow = arrow(angle = 40, length = unit(2, "mm"))) +
  geom_curve(aes(x = 15, y = 3, xend = 15.5, yend = 2),
              colour = "light grey", size = 0.5, curvature = -0.8,
             arrow = arrow(angle = 40, length = unit(2, "mm"))) +
  theme_minimal() +
  theme(text = element_text(family = "Source Sans Pro"),
                plot.caption = element_text(size = 8, vjust = -1, colour = "dark grey"),
        axis.ticks.y = element_line(size = 1.5, colour = "light grey", lineend = "round"),
        legend.position = "none",
        panel.grid = element_blank(),
        axis.title = element_blank(),
        axis.text.x = element_blank()) +
  labs(caption = "* Sub-surface lines refers to the Circle, District, Hammersmith & City and Metropolitan lines. 

       Source: TFL via London Datastore.")

Seasonal fluctuations

Gap between the hottest and coldest months on each Tube line, from the biggest gap to the smallest.

# Data prep
fluctuations <- data17 %>% 
  group_by(Line) %>% 
  summarise(min = min(Temperature),
            max = max(Temperature)) %>% 
  gather(extreme, value, 2:3) 

fluctuations_spread <- fluctuations %>% 
  spread(extreme, value) %>% 
  mutate(diff = max - min) %>% 
  mutate(max_label = str_c(round(max, 1), "°C"),
         min_label = str_c(round(min, 1), "°C"))

# Plot
ggplot(fluctuations_spread) +
  geom_segment(aes(x = reorder(Line, diff), xend = reorder(Line, diff), y = min, yend = max, colour = Line), 
                 size = 2.5, lineend = "round") +
  geom_point(data = fluctuations, aes(x = Line, y = value), 
                 shape = 21, fill = "white", colour = "black", size = 4, stroke = 1.5) +
  geom_text(aes(x = Line, y = (max + 1), label = max_label), family = "Source Sans Pro", size = 3.25) +
  geom_text(aes(x = Line, y = (min - 1), label = min_label), family = "Source Sans Pro", size = 3.25) +
  scale_colour_manual(values = line_colours) +
  coord_flip() +
  theme_minimal() +
  theme(text = element_text(family = "Source Sans Pro", size = 12),
        plot.caption = element_text(size = 8, vjust = -1, colour = "dark grey"),
        axis.text.y = element_text(face = "bold", colour = "black"),
        legend.position = "none",
        axis.title = element_blank(),
        panel.grid = element_blank(),
        axis.text.x = element_blank()) +
  labs(caption = "* Sub-surface lines refers to the Circle, District, Hammersmith & City and Metropolitan lines. 

       Source: TFL via London Datastore.")

Country comparisons

A year spent on each Tube line is like a year spent …where?

# import data
tube_temp <- data17
country_temp <- read_csv("country_temps.csv")

# tidy data
tube_temp <- tube_temp %>% 
  select(Month_short, Line, Temperature) %>% 
  rename(Month = Month_short)

country_temp <- country_temp %>% 
  select(1:14) %>% 
  rename(Jul = July)

# create function
tube_country <- function(line_in, n_country){
  
  # calculate sum of absolute monthly differences between this tube line and each country
  tube_line <- tube_temp %>% filter(Line == line_in)
  
  t_Jan <- tube_line$Temperature[tube_line$Month == "Jan"]
  t_Feb <- tube_line$Temperature[tube_line$Month == "Feb"]
  t_Mar <- tube_line$Temperature[tube_line$Month == "Mar"]
  t_Apr <- tube_line$Temperature[tube_line$Month == "Apr"]
  t_May <- tube_line$Temperature[tube_line$Month == "May"]
  t_Jun <- tube_line$Temperature[tube_line$Month == "Jun"]
  t_Jul <- tube_line$Temperature[tube_line$Month == "Jul"]
  t_Aug <- tube_line$Temperature[tube_line$Month == "Aug"]
  t_Sep <- tube_line$Temperature[tube_line$Month == "Sep"]
  t_Oct <- tube_line$Temperature[tube_line$Month == "Oct"]
  t_Nov <- tube_line$Temperature[tube_line$Month == "Nov"]
  t_Dec <- tube_line$Temperature[tube_line$Month == "Dec"]
  
  country_temp <- country_temp %>%
    mutate(tube_diff_temp = abs(Jan - t_Jan) + abs(Feb - t_Feb) + abs(Mar - t_Mar) + abs(Apr - t_Apr) +
                             abs(May - t_May) + abs(Jun - t_Jun) + abs(Jul - t_Jul) + abs(Aug - t_Aug) +
                             abs(Sep - t_Sep) + abs(Oct - t_Oct) + abs(Nov - t_Nov) + abs(Dec - t_Dec)) %>% 
    top_n(n_country, desc(tube_diff_temp))
  
  # summarise results of calculations in table to be used in charts
  chart_data <- country_temp %>% 
    gather(Month, Temperature, Jan:Dec) %>%
    select(-Annual, -tube_diff_temp) %>% 
    rename(Line = "ISO") %>% 
    select(Month, Line, Temperature) %>% 
    bind_rows(tube_line) %>%
    rename(Place = Line)
  
  return(chart_data)
} 

# create tibble of Tube lines and run funcrion for each Tube line
country_comparison <- tube_temp %>% 
  select(Line) %>% 
  distinct() %>% 
  mutate(data = purrr::map(Line, tube_country, n_country = 1)) %>%
  unnest() %>% 
  mutate(colour_category = ifelse(Place == Line, Place, "Country"),
         line_category = ifelse(Place == Line, "Tube", "Country"))

# get country names for country codes and add column for facet title
codes <- codelist %>% 
            select(iso.name.en, iso3c) %>% 
            na.omit() %>% 
            rename(Place = iso3c, 
                   country_name = iso.name.en)

# separate datasets to plot countries and tube lines separately
country_data <- country_comparison %>% filter(line_category == "Country")
  
country_data <- country_data %>% 
  left_join(codes, by = "Place") %>% 
  mutate(country_name = fct_recode(country_name, "the Bahamas" = "Bahamas (the)", "Vietnam" = "Viet Nam"))

tube_data <- country_comparison %>% 
  filter(line_category == "Tube")

# vectorise colours (taken from http://oobrien.com/2012/01/tube-colours/)
line_colours <- c("Bakerloo" = "#996633", 
                    "Central" = "#CC3333",
                    "Jubilee" = "#868F98",
                    "Northern" = "#000000",
                    "Piccadilly" = "#000099", 
                    "Sub-surface lines *" = "#CC9999",
                    "Victoria" = "#0099CC",
                    "Waterloo and City" = "#66CCCC")
  
# factorise months to override alphabetical ordering
month_levels <- c("Jan", "Feb", "Mar", "Apr", "May", "Jun",
                  "Jul", "Aug", "Sep", "Oct", "Nov", "Dec")
country_data$Month <- factor(country_data$Month, levels = month_levels)
tube_data$Month <- factor(tube_data$Month, levels = month_levels)

# create data frame of country labels
country_labels <- country_data %>% 
  select(Line, country_name) %>% 
  distinct(Line, country_name) %>% 
  mutate(country_label = str_c("Most like ", country_name))

# create data frame of start and end points
point_start <- tube_data %>% filter(Month == "Jan") %>% select(Line, Month, Temperature)
point_end <- tube_data %>% filter(Month == "Dec") %>% select(Line, Month, Temperature)

# plot chart
ggplot() +
  geom_line(data = country_data, 
            aes(x = Month, y = Temperature, group = Line, colour = Line), 
            alpha = 0.5, size = 1, lineend = "round") +
  geom_line(data = tube_data, 
            aes(x = Month, y = Temperature, group = Line, colour = Line), 
            size = 2.5, lineend = "round") +
  geom_text(data = country_labels, 
            aes(x = 6.5, y = 34, label = country_label, colour = Line), 
            family = "Source Sans Pro", fontface = "bold", size = 3.25, face = "bold") +
  geom_point(data = point_start,
             aes(x = Month, y = Temperature), 
             shape = 21, fill = "white", colour = "black", size = 4, stroke = 1.5) +
  geom_point(data = point_end,
             aes(x = Month, y = Temperature), 
             shape = 21, fill = "white", colour = "black", size = 4, stroke = 1.5) +
  scale_colour_manual(values = line_colours) +
  scale_y_continuous(limits = c(10, 35),
                     breaks = c(10, 20, 30),
                     labels = c("10°C", "20°C", "30°C")) +
  scale_x_discrete(breaks = c("Jan", "Apr", "Jul", "Oct")) +
  facet_wrap(~ Line, ncol = 4) +
  theme_minimal() +
  theme(text = element_text(family = "Source Sans Pro", size = 12),
        plot.caption = element_text(size = 8, vjust = -1, colour = "dark grey"),
        axis.ticks.y = element_line(size = 1.5, colour = "light grey", lineend = "round"),
        axis.text.x = element_text(),
        strip.text = element_text(size = 10, face = "bold"),
        legend.position = "none",
        axis.title = element_blank(),
        panel.grid = element_blank(),
        strip.switch.pad.wrap = unit(100, "cm")) +
  labs(caption = "* Sub-surface lines refers to the Circle, District, Hammersmith & City and Metropolitan lines. 

       Source: TFL via London Datastore and World Bank Climate Change Data Portal.")

It’s getting hot(ter) in here.

The Tube is warming, but some lines are hotting up faster than others.

# Data
seasonal_data <- data %>% 
  group_by(Year, Line) %>% 
  summarise(temp = mean(Temperature)) %>% 
  ungroup() %>% 
  filter(Year != "2018")

first_year <- seasonal_data %>% 
  filter(Year == "2013") %>% 
  select(-Year) %>% 
  rename(first_temp = "temp")

seasonal_data <- seasonal_data %>% 
  left_join(first_year) %>% 
  mutate(change_temp = temp - first_temp)

# Vectorise y axis temperature labels to include unit
temp_labels <- c("0°C", "+1°C", "+2°C", "+3°C", "+4°C")

# Plot
ggplot(seasonal_data, aes(x = Year, y = change_temp, colour = Line)) +
  geom_line(size = 2.5, lineend = "round") +
  geom_point(aes(x = Year, y = change_temp), shape = 21, fill = "white", colour = "black", size = 4, stroke = 1.5) +
  facet_wrap(~ Line, ncol = 4) +
  theme() +
  scale_y_continuous(limits = c(-0.5, 4),
                     breaks = c(0, 1, 2, 3, 4),
                     labels = temp_labels) +
  scale_x_continuous(limits = c(2012, 2018),
                     breaks = c(2012, 2013, 2017, 2018),
                     labels = c(" ", "2013", "2017", " ")) +
  theme(strip.text = element_text(size = 10, face = "bold"),
        text = element_text(family = "Source Sans Pro", size = 12),
        plot.caption = element_text(size = 8, vjust = -1, colour = "dark grey"),
        legend.position = "none",
        axis.title = element_blank(),
        axis.ticks.x = element_blank(),
        axis.ticks.y = element_line(size = 1.5, colour = "light grey", lineend = "round"),
        panel.background = element_blank(),
        strip.background = element_blank(),
        strip.switch.pad.wrap = unit(100, "cm")) +
  scale_colour_manual(values = line_colours, name = "Key") +
  # scale_y_continuous(breaks = c(0, 5, 10, 15, 20, 25, 30), labels = temp_labels) +
  labs(caption = "* Sub-surface lines refers to the Circle, District, Hammersmith & City and Metropolitan lines. 

       Source: TFL via London Datastore.")