Plot

2021-01-08

### SOURCE
### https://github.com/MaiaPelletier/tidytuesday/tree/master/R


# (2021 - week 2 - global transit costs) ----------------------------------

#### set up ####

# Load libraries
library(tidyverse)
library(here)
library(ggforce)
library(cowplot)


setwd("C:/Users/subas/Downloads/new")
# create directory to save all progress plots
dir.create(here("images", "progress", "imgs_2021_week2"))

# load fonts
extrafont::loadfonts(device = "win")

#### data cleaning + manipulation ####

# read data
transit_cost <- read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2021/2021-01-05/transit_cost.csv')

# clean up data + manipulate + create derived columns for labels in plot
transit_cost_tidy <- 
  transit_cost %>% 
  slice(-c(538:544)) %>% # Remove a bunch of NA cols
  mutate(
    real_cost = as.numeric(real_cost),
    label_cost_km_millions = paste0("$", round(cost_km_millions), "M"),
    line_city = paste0(city, "\n", line),
    line_city = factor(line_city),
    line_city = fct_reorder(line_city, -cost_km_millions)
  ) %>% 
  filter(country %in% c("US", "CA", "MX"))

# circle specifications
R = 1 
x0 = 0 
y0 = 0

# functions to generate random point within a circle with the specifications
gen_r <- function(n, radius = R) radius * sqrt(runif(n))
gen_theta <- function(n) runif(n) * 2 * pi

# create the columns with the generated points & convert to cartesian
transit_cost_math <-
  transit_cost_tidy %>% 
  select(country, city, line, line_city, length, cost_km_millions, label_cost_km_millions) %>% 
  mutate(
    # Point generation
    r = map(cost_km_millions/10, gen_r),
    theta = map(cost_km_millions/10, gen_theta)
  ) %>% 
  unnest() %>% 
  mutate(
    # Convert to cartesian
    x = x0 + r * cos(theta),
    y = y0 + r * sin(theta)
  )

# data for ggforce::geom_circle() (speeds up geom rendering - ggplot tries to draw a circle for every data point otherwise)
circle <- data.frame(x0 = 0, y0 = 0, r = 1.1)

# data for geom_text() (same as above)
cost_labels <- 
  distinct(transit_cost_math, line_city, label_cost_km_millions, city, line) %>% 
  add_column(x = 0, y = -1.3)

#### plot creation ####
transit_plot <- 
  transit_cost_math %>% 
  ggplot() +
  geom_point(
    aes(x = 0.8, y = -0.85, size = length), 
    color = "red4"
  ) +
  geom_circle(
    data = circle, 
    aes(x0 = x0, y0 = y0, r = r), 
    fill = "white", 
    color = "white", 
    alpha = 0.8
  ) +
  geom_point(
    aes(x = x, y = y),
    color = "grey25",
    #position = "jitter",
    alpha = 0.25,
    size = 1.5
  ) +
  geom_text(
    data = cost_labels, 
    aes(x, y, label = label_cost_km_millions),
    size = 2,
    family = "Lato"
  ) +
  labs(
    caption = "@MaiaPelletier | Source: Transit Costs Project"
  ) +
  scale_size(range = c(1, 15), guide = guide_none()) +
  xlim(c(-1.5, 1.5)) +
  ylim(c(-1.55, 1.15)) +
  facet_wrap(line_city~., ncol = 5) +
  coord_fixed() +
  theme_void(base_family = "Lato") +
  theme(
    plot.background = element_rect(fill = "#efefef", color = NA),
    plot.margin = margin(125, 25, 10, 25),
    strip.text = element_text(size = 6),
    plot.caption = element_text(size = 4)
  ) 

#### create legend ####

cost_labels_legend <- 
  cost_labels %>% 
  filter(city == "Montreal", str_detect(line, "Blue")) %>% 
  mutate(
    line_city = "City\nTransit Line"
  )

legend1 <- 
  transit_cost_math %>% 
  filter(city == "Montreal", str_detect(line, "Blue")) %>%
  mutate(
    line_city = "City\nTransit Line"
  ) %>% 
  ggplot() +
  geom_point(
    aes(x = 0.8, y = -0.85, size = length),
    color = "red4"
  ) +
  geom_circle(
    data = circle, 
    aes(x0 = x0, y0 = y0, r = r), 
    fill = "white", 
    color = "white", 
    alpha = 0.8
  ) +
  geom_point(
    aes(x = x, y = y),
    color = "grey25",
    #position = "jitter",
    alpha = 0.25,
    size = 1.5
  ) +
  geom_text(
    data = cost_labels_legend, 
    aes(x, y, label = label_cost_km_millions),
    size = 2,
    family = "Lato"
  ) +
  scale_size(range = c(2, 14), guide = guide_none()) +
  xlim(c(-1.5, 1.5)) +
  ylim(c(-1.5, 1.15)) +
  facet_wrap(line_city~., ncol = 5) +
  coord_fixed() +
  theme_void(base_family = "Lato") +
  theme(
    strip.text = element_text(size = 6),
    plot.title = element_text(family = "Libre Caslon Display", face = "bold")
  )

legend2 <- 
  transit_cost_math %>% 
  ggplot() +
  geom_point(
    aes(x = 0, y = 0, size = length),
    color = "red4"
  ) +
  geom_point(
    aes(x = 0, y = 0), size = 30, color = "#efefef"
  ) +
  scale_size(range = c(1, 15), breaks = c(2, 5, 15), labels = c("2 km", "5 km", "15 km"), name = NULL,
             guide = guide_legend(ncol = 1, label.position = "right")) +
  theme_void(base_family = "Lato") +
  theme(
    legend.position = c(0.5, 0.5),
    legend.text = element_text(size = 6)
  )



#### layers ####
ggdraw(transit_plot) +
  draw_label("North American\ntransit costs", 
             x = 0.22, y = 0.9, fontfamily = "Libre Caslon Display", size = 18, hjust = 0.5) +
  draw_line(x = c(0.55, 0.61), y = c(0.905, 0.905), color = "grey25", size = 0.25) +
  draw_line(x = c(0.55, 0.61), y = c(0.86, 0.86), color = "grey25", size = 0.25) +
  draw_line(x = c(0.1, 0.9), y = c(0.805, 0.805), color = "grey35", size = 0.5, lty = 3) +
  draw_line(x = c(0.4, 0.4), y = c(0.85, 0.95), color = "grey35", size = 0.5) +
  draw_label("number of dots: cost per\nkm of the transit line\n1 dot = 100k USD/km",
             x = 0.665, y = 0.91, fontfamily = "Lato", size = 5) +
  draw_label("area of circle: length of\ntransit line in km",
             x = 0.665, y = 0.8625, fontfamily = "Lato", size = 5) +
  draw_plot(legend1, height = 0.15, width = 0.15, x = 0.45, y = 0.825) +
  draw_plot(legend2, height = 0.15, width = 0.15, x = 0.75, y = 0.825)