Cartography Exploration

Jason Ola

2023-01-24

Import packages

library(tidyverse)
library(sf)
library(leaflet)
library(patchwork)
library(osmdata)

Load the data

Let’s get the csv data, you can find the IHME data here, rest is on my github

ihme <- read_csv("data/IHME_DAH_DATABASE_1990_2019_Y2020M04D23.CSV", 
                 na = c('NA', '-', ''))

Let’s work with HIV, malaria and tuberculosis

diseases <- c("hiv","mal","tb")

Compute sum of spendings by country and change the names to match the shapes data

dollar_spent_2019 <- ihme %>% 
  filter(year == 2019) %>% 
  select(source, contains(diseases)) %>% 
  pivot_longer(cols = contains(diseases)) %>% 
  group_by(source) %>% 
  summarise(dollar_spent = sum(value, na.rm = T)) %>% 
  mutate(source = str_replace_all(source, 
                              c("New_Zealand" = "New Zealand",
                                "United_Kingdom" = "United Kingdom",
                                "United_States" = "United States of America")))

Create the intervals with labels for legend

dollars <- dollar_spent_2019 %>% 
  pull(dollar_spent)

cuts <- dollars %>% 
  classInt::classIntervals(n = 5) %>% 
  pluck("brks")

labels <- tibble(cuts1 = cuts, cuts2 = lead(cuts)) %>% 
  head(-1) %>% 
  mutate(cuts1 = scales::dollar_format()(cuts1),
         cuts2 = scales::dollar_format()(cuts2),
         label = paste0(cuts1, " - ", cuts2) %>% fct_inorder())

Apply the cuts

dollar_spent_2019 <- dollar_spent_2019 %>% 
  mutate(cut = cut(dollar_spent,
                   breaks = cuts,
                   labels = labels$label))

Read the shape file

world <- st_read("data/110m_cultural/ne_110m_admin_0_countries.shp")

## Reading layer `ne_110m_admin_0_countries' from data source 
##   `/Users/jasonola/Desktop/datascience/R_advanced_data_visualisation/Report_3/data/110m_cultural/ne_110m_admin_0_countries.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 177 features and 94 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -180 ymin: -90 xmax: 180 ymax: 83.64513
## Geodetic CRS:  WGS 84

Join the shape and the csv

world <- world %>% 
  left_join(dollar_spent_2019, by = c("SOVEREIGNT" = "source"))

Plot the data

world_dollar_spent <- ggplot()+
  geom_sf(data = world %>% filter(SOVEREIGNT != "Antarctica"),
          mapping = aes(fill = cut),
          color = "darkgray",
          size = 0.2)+
  scale_fill_brewer(na.value = "transparent")+
  labs(title = "Variation of dollars spent by country in 2019",
       subtitle = "For Tuberculosis, Malaria and HIV",
       fill = "",
       caption = "Source : www.healthdata.org")+
  theme_void()+
  theme(legend.position = "bottom",
        plot.title = element_text(hjust = 0.1),
        plot.subtitle = element_text(hjust = 0.075),
        plot.caption = element_text(face = "italic",
                                    hjust = 0.9,
                                    size = 7))
world_dollar_spent

Let’s do the same map but using countries that recieved money

dollar_received_2002 <- ihme %>% 
  filter(year == 2002) %>% 
  select(recipient_country, contains(diseases)) %>% 
  pivot_longer(cols = contains(diseases)) %>% 
  group_by(recipient_country) %>% 
  summarise(dollar_received = sum(value, na.rm = T)) %>% 
  mutate(dollar_received = na_if(dollar_received,0))

cuts_recieved <- dollar_received_2002 %>% 
  pull(dollar_received) %>% 
  classInt::classIntervals(n = 5) %>% 
  pluck("brks")

labels_recieved <- tibble(cut1 = round(cuts), cut2 = round(lead(cuts))) %>% 
  head(-1) %>% 
  mutate(cut1 = scales::dollar_format()(cut1),
         cut2 = scales::dollar_format()(cut2),
         label = paste0(cut1, " - ", cut2) %>% fct_inorder())

Replace some names of countries

dollar_received_2002 <- dollar_received_2002 %>% 
  mutate(labels_recieved = cut(dollar_received,
                               breaks = cuts_recieved,
                               label = labels_recieved$label),
         recipient_country = str_replace_all(recipient_country, 
                                 c("Cote d'Ivoire" = "Ivory Coast",
                                    "Serbia" = "Republic of Serbia",
                                    "Tanzania" = "United Republic of Tanzania",
                                    "Swaziland" = "eSwatini")))

world <- world %>% 
  left_join(dollar_received_2002, by = c("SOVEREIGNT" = "recipient_country"))

Plot the map

world_dollar_recieved <- ggplot()+
  geom_sf(data = world %>% filter(SOVEREIGNT != "Antarctica"),
          mapping = aes(fill = labels_recieved),
          color = "darkgray",
          size = 0.2)+
  scale_fill_brewer(na.value = "transparent",
                    palette = "Greens")+
  labs(title = "Variation of dollars recieved by country in 2002",
       subtitle = "For Tuberculosis, Malaria and HIV",
       fill = "",
       caption = "Source : www.healthdata.org")+
  theme_void()+
  theme(legend.position = "bottom",
        plot.title = element_text(hjust = 0.1),
        plot.subtitle = element_text(hjust = 0.07),
        plot.caption = element_text(face = "italic",
                                    hjust = 0.9,
                                    size = 7))
world_dollar_recieved

Let’s create the regions shapes

sf_use_s2(FALSE)
regions <- world %>% 
  group_by(REGION_WB) %>% 
  summarise()

regions %>% pull(REGION_WB) %>% 
  sort

## [1] "Antarctica"                 "East Asia & Pacific"       
## [3] "Europe & Central Asia"      "Latin America & Caribbean" 
## [5] "Middle East & North Africa" "North America"             
## [7] "South Asia"                 "Sub-Saharan Africa"

ihme %>% 
  pull(gbd_region) %>% 
  unique

##  [1] "North Africa/Middle East"     "Sub-Saharan Africa, Central" 
##  [3] "Europe, Central"              "Caribbean"                   
##  [5] "Latin America, Southern"      "Asia, Central"               
##  [7] "Sub-Saharan Africa, Eastern"  "Sub-Saharan Africa, Western" 
##  [9] "Asia, South"                  "Europe, Eastern"             
## [11] "Latin America, Andean"        "Latin America, Tropical"     
## [13] "Sub-Saharan Africa, Southern" "Asia, East"                  
## [15] "Latin America, Central"       "Oceania"                     
## [17] "Asia, Southeast"              "Asia Pacific, high-income"   
## [19] "Unallocated/Unspecified"      "Global"                      
## [21] "Europe, Western"

We see we have different names, we have to rename the gdb regions before joining

ihme_gbd_reg <- ihme %>% 
  mutate(gbd_region = str_replace_all(
    gbd_region, c("North Africa/Middle East" = "Middle East & North Africa",
                  "Caribbean" = "Latin America & Caribbean",
                  "Sub-Saharan Africa, Eastern" = "Sub-Saharan Africa",
                  "Europe, Eastern" = "Europe & Central Asia",
                  "Sub-Saharan Africa, Southern" = "Sub-Saharan Africa",
                  "Oceania" = "East Asia & Pacific",
                  "Sub-Saharan Africa, Central" = "Sub-Saharan Africa",
                  "Latin America, Southern" = "Latin America & Caribbean",
                  "Sub-Saharan Africa, Western" = "Sub-Saharan Africa",
                  "Latin America, Andean" = "Latin America & Caribbean",
                  "Asia, East" = "East Asia & Pacific",
                  "Asia, Southeast" = "East Asia & Pacific",
                  "Europe, Central" = "Europe & Central Asia",
                  "Asia, Central" = "South Asia",
                  "Asia, South" = "South Asia",
                  "Latin America, Tropical" = "Latin America & Caribbean",
                  "Latin America, Central" = "Latin America & Caribbean",
                  "Asia Pacific, high-income" = "East Asia & Pacific",
                  "Europe, Western" = "Europe & Central Asia"
                  )))

ihme_gbd_reg_2002 <- ihme_gbd_reg %>% 
  filter(year == 2002) %>% 
  select(gbd_region, contains(diseases)) %>% 
  pivot_longer(cols = contains(diseases)) %>% 
  group_by(gbd_region) %>% 
  summarise(dollar_received = sum(value, na.rm = T))

dollar_recieved_region <- ihme_gbd_reg_2002 %>% 
  pull(dollar_received) %>% 
  classInt::classIntervals(n = 3) %>% 
  pluck("brks")

lab_2002 <- dollar_recieved_region %>%  
  as_tibble() %>% 
  mutate(value2 = lead(value)) %>% 
  head(-1) %>% 
  mutate(label = paste0(round(value), " - ", round(value2), " $"))

ihme_gbd_reg_2002 <- ihme_gbd_reg_2002 %>% 
  mutate(cut = cut(dollar_received,
                   breaks = dollar_recieved_region,
                   labels = lab_2002$label,
                   include.lowest = T))

regions <- regions %>% 
  left_join(ihme_gbd_reg_2002, by = c("REGION_WB" = "gbd_region"))

regions %>%
  filter(REGION_WB != "Antarctica") %>% 
  ggplot()+
  geom_sf(aes(fill = cut),
          size = 0.1)+
  labs(title = "Variation of dollars recieved by region in 2002",
       subtitle = "For Tuberculosis, Malaria and HIV",
       fill = "",
       caption = "Source : www.healthdata.org")+
  scale_fill_brewer(palette = "Greens")+
  theme_void()+
  theme(legend.position = "bottom",
        plot.title = element_text(hjust = 0.1),
        plot.subtitle = element_text(hjust = 0.07),
        plot.caption = element_text(face = "italic",
                                    hjust = 0.9,
                                    size = 8))

Interactive with leaflet

Let’s add interactions using leaflet

First create the labels we will use, we want to show country and how many dollars it received

leaflet_labs <- paste(
  "Country: ", world$SOVEREIGNT, "<br/>",
  "Dollars recieved: ", world$dollar_received," $", "<br/>"
) %>%
  lapply(htmltools::HTML)

Then we plot our shape in leaflet with the addPolygons and link our data with fillColor and the text we prepared with label.

factpal <- colorFactor(palette = "Greens", world$labels_recieved)

leaflet(world)%>%
  addPolygons(
    stroke = TRUE,
    color = 'gray',
    weight = 1.5,
    fillColor = factpal(world$labels_recieved),
    label = leaflet_labs,
    highlightOptions = highlightOptions(color = "white", weight = 3,
      bringToFront = TRUE)) %>% 
    setView(lng = 0, lat = 0, zoom = 1) %>% 
  addLegend("bottomright",pal = factpal, 
            values = world$labels_recieved,
            title = "Variation of dollars recieved by country in 2002")

Combining plots

First we get our bbox

africa_bbox <- world %>% 
  filter(CONTINENT == "Africa") %>% 
  st_bbox() %>% 
  st_as_sfc() %>% 
  st_buffer(2)

Then the shape of the continent we want to highlight, here Africa

africa <- world %>% 
  filter(CONTINENT == "Africa")

Put the bbox around Africa in the world map

inset <- world %>% filter(SOVEREIGNT != "Antarctica") %>% 
  ggplot()+
  geom_sf(size = 0.1)+
  geom_sf(data = africa_bbox,
          fill = NA,
          color = "brown")+
  theme_void()
inset

Plot the data on the Africa map

africa_map <- africa %>% 
  ggplot()+
  geom_sf(data = world %>% filter(CONTINENT != "Africa"),
          fill = NA,
          color = "gray",
          size = 0.3)+
  geom_sf(mapping = aes(fill = labels_recieved),
          color = "white",
          size = 0.1)+
  scale_fill_brewer(palette = "Greens",
                    na.value = "gray")+
  labs(fill = "")+
  coord_sf(xlim = st_bbox(africa_bbox)[c(1,3)],
           ylim = st_bbox(africa_bbox)[c(2,4)])+
  theme_void()+
  theme(legend.position = "bottom")
africa_map

Put it all together with patchwork

layout <- c(
  area(t = 1, l = 0, b = 7, r = 3),
  area(t = 1, l = 1, b = 6, r = 10)
)

inset + africa_map +
  plot_layout(design = layout) +
  plot_annotation(title = "Dollars recieved in African countries (2002)",
                  subtitle = "For Tuberculosis, Malaria and HIV\n",
                  caption = "Source : www.healthdata.org",
                  theme = theme(plot.title = element_text(hjust = 0.5),
                                plot.subtitle = element_text(hjust = 0.5),
                                plot.caption = element_text(face = "italic")
                                ))

Open street map

Data found on https://osdatahub.os.uk/downloads/open?_ga=2.3518960.1516077193.1626005802-1560027532.1626005802

london <- st_read("data/bdline_essh_gb/Data/greater_london_const_region.shp")

## Reading layer `greater_london_const_region' from data source 
##   `/Users/jasonola/Desktop/datascience/R_advanced_data_visualisation/Report_3/data/bdline_essh_gb/Data/greater_london_const_region.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 14 features and 15 fields
## Geometry type: POLYGON
## Dimension:     XY
## Bounding box:  xmin: 503568.2 ymin: 155850.8 xmax: 561957.5 ymax: 200933.9
## Projected CRS: OSGB36 / British National Grid

ext_london <- st_read("data/bdline_essh_gb/Data/county_region.shp")

## Reading layer `county_region' from data source 
##   `/Users/jasonola/Desktop/datascience/R_advanced_data_visualisation/Report_3/data/bdline_essh_gb/Data/county_region.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 25 features and 15 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: 212680.1 ymin: 34912.7 xmax: 655989 ymax: 588517.4
## Projected CRS: OSGB36 / British National Grid

london_bbox <- london %>% 
  st_bbox() %>% 
  st_as_sfc() %>% 
  st_buffer(2)

urban_reg <- st_read("data/strtgi_essh_gb/data/urban_region.shp") %>% 
  rmapshaper::ms_simplify()

## Reading layer `urban_region' from data source 
##   `/Users/jasonola/Desktop/datascience/R_advanced_data_visualisation/Report_3/data/strtgi_essh_gb/data/urban_region.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 24764 features and 3 fields
## Geometry type: POLYGON
## Dimension:     XY
## Bounding box:  xmin: 62627 ymin: 7858 xmax: 655503 ymax: 1214258
## Projected CRS: OSGB36 / British National Grid

green_space <- st_read("data/OS Open Greenspace (ESRI Shape File) GB/data/GB_GreenspaceSite.shp") %>% 
  rmapshaper::ms_simplify()

## Reading layer `GB_GreenspaceSite' from data source 
##   `/Users/jasonola/Desktop/datascience/R_advanced_data_visualisation/Report_3/data/OS Open Greenspace (ESRI Shape File) GB/data/GB_GreenspaceSite.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 148263 features and 6 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XYZ
## Bounding box:  xmin: 9819.84 ymin: 8274.57 xmax: 655229.5 ymax: 1214133
## z_range:       zmin: 0 zmax: 0
## Projected CRS: OSGB36 / British National Grid

rivers <- st_read("data/oprvrs_essh_gb/data/WatercourseLink.shp")

## Reading layer `WatercourseLink' from data source 
##   `/Users/jasonola/Desktop/datascience/R_advanced_data_visualisation/Report_3/data/oprvrs_essh_gb/data/WatercourseLink.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 186272 features and 9 fields
## Geometry type: LINESTRING
## Dimension:     XYZ
## Bounding box:  xmin: 64429.59 ymin: 13274.99 xmax: 655208.5 ymax: 1216428
## z_range:       zmin: 0 zmax: 0
## Projected CRS: OSGB36 / British National Grid

green_space_ld <- london %>% 
  st_intersection(green_space)

rivers_ld <- london %>% 
  st_intersection(rivers)

urban_reg_ld <- london %>% 
  st_intersection(urban_reg)

london %>% 
  ggplot()+
  geom_sf(data = ext_london,
          fill = NA)+
  geom_sf(fill = "#F0F0F0")+
  geom_sf(data = urban_reg_ld,
          fill = "#EBE172",
          alpha = 0.5)+
  geom_sf(data = green_space_ld,
          fill = "#93EB5D",
          color = NA,
          alpha = 0.9)+
  geom_sf(data = rivers_ld,
          color = "#5D9FEB")+
  coord_sf(xlim = st_bbox(london_bbox)[c(1,3)],
           ylim = st_bbox(london_bbox)[c(2,4)])+
  labs(title = "London <span style = 'color:#EBE172'>urban area</span>",
       subtitle = "And its <span style = 'color:#5D9FEB'>rivers</span> and
       <span style = 'color:#93EB5D'>green areas</span>",
       caption = "Source : osdatahub.os.uk")+
  theme_void()+
  theme(plot.title = ggtext::element_markdown(hjust = 0.5),
        plot.subtitle = ggtext::element_markdown(hjust = 0.5),
        plot.caption = element_text(hjust = 0.5))

Let’s now try to map Paris with OSM

streets <- getbb("paris") %>% 
  opq() %>% 
  add_osm_feature(key = "highway",
                  value = c("primary","secondary","tertiary","motorway")) %>% 
  osmdata_sf() %>% 
  pluck("osm_lines")

small_streets <- getbb("paris") %>% 
  opq() %>%
  add_osm_feature(
    key = "highway",
    value = c(
      "residential", "living_street",
      "unclassified",
      "service", "footway"
    )
  ) %>%
  osmdata_sf() %>%
  pluck("osm_lines")

river <- getbb("paris") %>% 
  opq() %>% 
  add_osm_feature(
    key = "waterway",
    value = "river"
  ) %>% 
  osmdata_sf() %>% 
  pluck("osm_lines")

hotels <- getbb("paris") %>% 
  opq() %>% 
  add_osm_feature(
    key = "tourism",
    value = "hotel"
  ) %>% 
  osmdata_sf() %>% 
  pluck("osm_points")

taxi <- getbb("paris") %>% 
  opq() %>% 
  add_osm_feature(
    key = "amenity",
    value = "taxi"
  ) %>% 
  osmdata_sf() %>% 
  pluck("osm_points")

Put it all together

ggplot()+
  geom_sf(data = river, 
          color = "#3182bd",
          size = 2)+
  geom_sf(data = small_streets)+
  geom_sf(data = streets)+
  geom_sf(data = hotels, 
          color = "#c24c15",
          size = 0.5)+
  geom_sf(data = taxi,
          color = "gold",
          size = 0.5)+
  coord_sf(xlim = c(2.238762,2.447758),
           ylim = c(48.818118,48.899639))+
  theme_void()+
  labs(title = "Paris...",
       subtitle = "And it's <span style = 'color:#c24c15'>hotels</span> and 
       <span style = 'color:gold'>taxi</span> pickup places \n")+
  theme(plot.subtitle = ggtext::element_markdown())