An exploration of Tube temperature data from the London Datastore.
Average temperature of each Tube line, from the hottest to the coldest.
# Data prep
avg <- data17 %>%
group_by(Line) %>%
summarise(avg = mean(Temperature)) %>%
mutate(label = str_c(round(avg, 1), "°C"))
# Plot
ggplot(avg) +
geom_segment(aes(x = reorder(Line, avg), xend = reorder(Line, avg),
y = 18, yend = avg, colour = Line),
size = 2.5, lineend = "round") +
geom_point(aes(x = Line, y = 18), shape = 21, fill = "white", colour = "black", size = 4, stroke = 1.5) +
scale_colour_manual(values = line_colours) +
coord_flip() +
ylim(18, 28) +
theme_minimal() +
theme(text = element_text(family = "Source Sans Pro", size = 12),
plot.caption = element_text(size = 8, vjust = -1, colour = "dark grey"),
axis.text.y = element_text(face = "bold", colour = "black"),
legend.position = "none",
axis.title = element_blank(),
panel.grid = element_blank(),
axis.text.x = element_blank()) +
geom_text(aes(x = Line, y = (avg + 0.5), label = label), family = "Source Sans Pro", size = 3) +
labs(caption = "* Sub-surface lines refers to the Circle, District, Hammersmith & City and Metropolitan lines.
Source: TFL via London Datastore.")
Seasonal heights and depths of each Tube line, from the hottest to the coldest.
# Data prep
seasonal <- data17 %>%
group_by(season, Line) %>%
summarise(min = min(Temperature), max = max(Temperature)) %>%
gather(extreme, value, 3:4) %>%
mutate(line_id = str_c(season, ": ", Line))
heights <- seasonal %>% filter(season == "Spring/Summer", extreme == "max")
depths <- seasonal %>% filter(season == "Autumn/Winter", extreme == "min")
seasonal <- rbind(heights, depths)
# Vectorise y axis temperature labels to include unit
temp_labels <- c("12°C", "16°C", "20°C", "24°C", "28°C", "32°C")
# Plot
ggplot(seasonal) +
geom_segment(aes(x = reorder(line_id, -value), xend = reorder(line_id, -value),
y = 0, yend = value-24, colour = Line),
size = 2.5, lineend = "round") +
geom_segment(aes(x = "Spring/Summer: Bakerloo", xend = "Autumn/Winter: Sub-surface lines *",
y = 0, yend = 0),
colour = "light grey", alpha = 0.01, size = 1.5, lineend = "round") +
geom_point(aes(x = line_id, y = 0), shape = 21, fill = "white", colour = "black", size = 4, stroke = 1.5) +
scale_y_continuous(limits = c(-13, 9),
breaks = c(-12, -8, -4, 0, 4, 8),
labels = temp_labels,
position = "right") +
scale_colour_manual(values = line_colours) +
annotate("text", x = 4, y = -3, label = "Temperatures during the height of summer",
family = "Source Sans Pro", size = 4) +
annotate("text", x = 12.5, y = 3, label = "Temperatures during the depths of winter",
family = "Source Sans Pro", size = 4) +
geom_curve(aes(x = 1.5, y = -3, xend = 1, yend = -2),
colour = "light grey", size = 0.5, curvature = -0.8,
arrow = arrow(angle = 40, length = unit(2, "mm"))) +
geom_curve(aes(x = 15, y = 3, xend = 15.5, yend = 2),
colour = "light grey", size = 0.5, curvature = -0.8,
arrow = arrow(angle = 40, length = unit(2, "mm"))) +
theme_minimal() +
theme(text = element_text(family = "Source Sans Pro"),
plot.caption = element_text(size = 8, vjust = -1, colour = "dark grey"),
axis.ticks.y = element_line(size = 1.5, colour = "light grey", lineend = "round"),
legend.position = "none",
panel.grid = element_blank(),
axis.title = element_blank(),
axis.text.x = element_blank()) +
labs(caption = "* Sub-surface lines refers to the Circle, District, Hammersmith & City and Metropolitan lines.
Source: TFL via London Datastore.")
Gap between the hottest and coldest months on each Tube line, from the biggest gap to the smallest.
# Data prep
fluctuations <- data17 %>%
group_by(Line) %>%
summarise(min = min(Temperature),
max = max(Temperature)) %>%
gather(extreme, value, 2:3)
fluctuations_spread <- fluctuations %>%
spread(extreme, value) %>%
mutate(diff = max - min) %>%
mutate(max_label = str_c(round(max, 1), "°C"),
min_label = str_c(round(min, 1), "°C"))
# Plot
ggplot(fluctuations_spread) +
geom_segment(aes(x = reorder(Line, diff), xend = reorder(Line, diff), y = min, yend = max, colour = Line),
size = 2.5, lineend = "round") +
geom_point(data = fluctuations, aes(x = Line, y = value),
shape = 21, fill = "white", colour = "black", size = 4, stroke = 1.5) +
geom_text(aes(x = Line, y = (max + 1), label = max_label), family = "Source Sans Pro", size = 3.25) +
geom_text(aes(x = Line, y = (min - 1), label = min_label), family = "Source Sans Pro", size = 3.25) +
scale_colour_manual(values = line_colours) +
coord_flip() +
theme_minimal() +
theme(text = element_text(family = "Source Sans Pro", size = 12),
plot.caption = element_text(size = 8, vjust = -1, colour = "dark grey"),
axis.text.y = element_text(face = "bold", colour = "black"),
legend.position = "none",
axis.title = element_blank(),
panel.grid = element_blank(),
axis.text.x = element_blank()) +
labs(caption = "* Sub-surface lines refers to the Circle, District, Hammersmith & City and Metropolitan lines.
Source: TFL via London Datastore.")
A year spent on each Tube line is like a year spent …where?
# import data
tube_temp <- data17
country_temp <- read_csv("country_temps.csv")
# tidy data
tube_temp <- tube_temp %>%
select(Month_short, Line, Temperature) %>%
rename(Month = Month_short)
country_temp <- country_temp %>%
select(1:14) %>%
rename(Jul = July)
# create function
tube_country <- function(line_in, n_country){
# calculate sum of absolute monthly differences between this tube line and each country
tube_line <- tube_temp %>% filter(Line == line_in)
t_Jan <- tube_line$Temperature[tube_line$Month == "Jan"]
t_Feb <- tube_line$Temperature[tube_line$Month == "Feb"]
t_Mar <- tube_line$Temperature[tube_line$Month == "Mar"]
t_Apr <- tube_line$Temperature[tube_line$Month == "Apr"]
t_May <- tube_line$Temperature[tube_line$Month == "May"]
t_Jun <- tube_line$Temperature[tube_line$Month == "Jun"]
t_Jul <- tube_line$Temperature[tube_line$Month == "Jul"]
t_Aug <- tube_line$Temperature[tube_line$Month == "Aug"]
t_Sep <- tube_line$Temperature[tube_line$Month == "Sep"]
t_Oct <- tube_line$Temperature[tube_line$Month == "Oct"]
t_Nov <- tube_line$Temperature[tube_line$Month == "Nov"]
t_Dec <- tube_line$Temperature[tube_line$Month == "Dec"]
country_temp <- country_temp %>%
mutate(tube_diff_temp = abs(Jan - t_Jan) + abs(Feb - t_Feb) + abs(Mar - t_Mar) + abs(Apr - t_Apr) +
abs(May - t_May) + abs(Jun - t_Jun) + abs(Jul - t_Jul) + abs(Aug - t_Aug) +
abs(Sep - t_Sep) + abs(Oct - t_Oct) + abs(Nov - t_Nov) + abs(Dec - t_Dec)) %>%
top_n(n_country, desc(tube_diff_temp))
# summarise results of calculations in table to be used in charts
chart_data <- country_temp %>%
gather(Month, Temperature, Jan:Dec) %>%
select(-Annual, -tube_diff_temp) %>%
rename(Line = "ISO") %>%
select(Month, Line, Temperature) %>%
bind_rows(tube_line) %>%
rename(Place = Line)
return(chart_data)
}
# create tibble of Tube lines and run funcrion for each Tube line
country_comparison <- tube_temp %>%
select(Line) %>%
distinct() %>%
mutate(data = purrr::map(Line, tube_country, n_country = 1)) %>%
unnest() %>%
mutate(colour_category = ifelse(Place == Line, Place, "Country"),
line_category = ifelse(Place == Line, "Tube", "Country"))
# get country names for country codes and add column for facet title
codes <- codelist %>%
select(iso.name.en, iso3c) %>%
na.omit() %>%
rename(Place = iso3c,
country_name = iso.name.en)
# separate datasets to plot countries and tube lines separately
country_data <- country_comparison %>% filter(line_category == "Country")
country_data <- country_data %>%
left_join(codes, by = "Place") %>%
mutate(country_name = fct_recode(country_name, "the Bahamas" = "Bahamas (the)", "Vietnam" = "Viet Nam"))
tube_data <- country_comparison %>%
filter(line_category == "Tube")
# vectorise colours (taken from http://oobrien.com/2012/01/tube-colours/)
line_colours <- c("Bakerloo" = "#996633",
"Central" = "#CC3333",
"Jubilee" = "#868F98",
"Northern" = "#000000",
"Piccadilly" = "#000099",
"Sub-surface lines *" = "#CC9999",
"Victoria" = "#0099CC",
"Waterloo and City" = "#66CCCC")
# factorise months to override alphabetical ordering
month_levels <- c("Jan", "Feb", "Mar", "Apr", "May", "Jun",
"Jul", "Aug", "Sep", "Oct", "Nov", "Dec")
country_data$Month <- factor(country_data$Month, levels = month_levels)
tube_data$Month <- factor(tube_data$Month, levels = month_levels)
# create data frame of country labels
country_labels <- country_data %>%
select(Line, country_name) %>%
distinct(Line, country_name) %>%
mutate(country_label = str_c("Most like ", country_name))
# create data frame of start and end points
point_start <- tube_data %>% filter(Month == "Jan") %>% select(Line, Month, Temperature)
point_end <- tube_data %>% filter(Month == "Dec") %>% select(Line, Month, Temperature)
# plot chart
ggplot() +
geom_line(data = country_data,
aes(x = Month, y = Temperature, group = Line, colour = Line),
alpha = 0.5, size = 1, lineend = "round") +
geom_line(data = tube_data,
aes(x = Month, y = Temperature, group = Line, colour = Line),
size = 2.5, lineend = "round") +
geom_text(data = country_labels,
aes(x = 6.5, y = 34, label = country_label, colour = Line),
family = "Source Sans Pro", fontface = "bold", size = 3.25, face = "bold") +
geom_point(data = point_start,
aes(x = Month, y = Temperature),
shape = 21, fill = "white", colour = "black", size = 4, stroke = 1.5) +
geom_point(data = point_end,
aes(x = Month, y = Temperature),
shape = 21, fill = "white", colour = "black", size = 4, stroke = 1.5) +
scale_colour_manual(values = line_colours) +
scale_y_continuous(limits = c(10, 35),
breaks = c(10, 20, 30),
labels = c("10°C", "20°C", "30°C")) +
scale_x_discrete(breaks = c("Jan", "Apr", "Jul", "Oct")) +
facet_wrap(~ Line, ncol = 4) +
theme_minimal() +
theme(text = element_text(family = "Source Sans Pro", size = 12),
plot.caption = element_text(size = 8, vjust = -1, colour = "dark grey"),
axis.ticks.y = element_line(size = 1.5, colour = "light grey", lineend = "round"),
axis.text.x = element_text(),
strip.text = element_text(size = 10, face = "bold"),
legend.position = "none",
axis.title = element_blank(),
panel.grid = element_blank(),
strip.switch.pad.wrap = unit(100, "cm")) +
labs(caption = "* Sub-surface lines refers to the Circle, District, Hammersmith & City and Metropolitan lines.
Source: TFL via London Datastore and World Bank Climate Change Data Portal.")
The Tube is warming, but some lines are hotting up faster than others.
# Data
seasonal_data <- data %>%
group_by(Year, Line) %>%
summarise(temp = mean(Temperature)) %>%
ungroup() %>%
filter(Year != "2018")
first_year <- seasonal_data %>%
filter(Year == "2013") %>%
select(-Year) %>%
rename(first_temp = "temp")
seasonal_data <- seasonal_data %>%
left_join(first_year) %>%
mutate(change_temp = temp - first_temp)
# Vectorise y axis temperature labels to include unit
temp_labels <- c("0°C", "+1°C", "+2°C", "+3°C", "+4°C")
# Plot
ggplot(seasonal_data, aes(x = Year, y = change_temp, colour = Line)) +
geom_line(size = 2.5, lineend = "round") +
geom_point(aes(x = Year, y = change_temp), shape = 21, fill = "white", colour = "black", size = 4, stroke = 1.5) +
facet_wrap(~ Line, ncol = 4) +
theme() +
scale_y_continuous(limits = c(-0.5, 4),
breaks = c(0, 1, 2, 3, 4),
labels = temp_labels) +
scale_x_continuous(limits = c(2012, 2018),
breaks = c(2012, 2013, 2017, 2018),
labels = c(" ", "2013", "2017", " ")) +
theme(strip.text = element_text(size = 10, face = "bold"),
text = element_text(family = "Source Sans Pro", size = 12),
plot.caption = element_text(size = 8, vjust = -1, colour = "dark grey"),
legend.position = "none",
axis.title = element_blank(),
axis.ticks.x = element_blank(),
axis.ticks.y = element_line(size = 1.5, colour = "light grey", lineend = "round"),
panel.background = element_blank(),
strip.background = element_blank(),
strip.switch.pad.wrap = unit(100, "cm")) +
scale_colour_manual(values = line_colours, name = "Key") +
# scale_y_continuous(breaks = c(0, 5, 10, 15, 20, 25, 30), labels = temp_labels) +
labs(caption = "* Sub-surface lines refers to the Circle, District, Hammersmith & City and Metropolitan lines.
Source: TFL via London Datastore.")