Import packages
library(tidyverse)
library(sf)
library(leaflet)
library(patchwork)
library(osmdata)
Load the data
Let’s get the csv data, you can find the IHME data here, rest is on my github
ihme <- read_csv("data/IHME_DAH_DATABASE_1990_2019_Y2020M04D23.CSV",
na = c('NA', '-', ''))
Let’s work with HIV, malaria and tuberculosis
diseases <- c("hiv","mal","tb")
Compute sum of spendings by country and change the names to match the shapes data
dollar_spent_2019 <- ihme %>%
filter(year == 2019) %>%
select(source, contains(diseases)) %>%
pivot_longer(cols = contains(diseases)) %>%
group_by(source) %>%
summarise(dollar_spent = sum(value, na.rm = T)) %>%
mutate(source = str_replace_all(source,
c("New_Zealand" = "New Zealand",
"United_Kingdom" = "United Kingdom",
"United_States" = "United States of America")))
Create the intervals with labels for legend
dollars <- dollar_spent_2019 %>%
pull(dollar_spent)
cuts <- dollars %>%
classInt::classIntervals(n = 5) %>%
pluck("brks")
labels <- tibble(cuts1 = cuts, cuts2 = lead(cuts)) %>%
head(-1) %>%
mutate(cuts1 = scales::dollar_format()(cuts1),
cuts2 = scales::dollar_format()(cuts2),
label = paste0(cuts1, " - ", cuts2) %>% fct_inorder())
Apply the cuts
dollar_spent_2019 <- dollar_spent_2019 %>%
mutate(cut = cut(dollar_spent,
breaks = cuts,
labels = labels$label))
Read the shape file
world <- st_read("data/110m_cultural/ne_110m_admin_0_countries.shp")
## Reading layer `ne_110m_admin_0_countries' from data source
## `/Users/jasonola/Desktop/datascience/R_advanced_data_visualisation/Report_3/data/110m_cultural/ne_110m_admin_0_countries.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 177 features and 94 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: -180 ymin: -90 xmax: 180 ymax: 83.64513
## Geodetic CRS: WGS 84
Join the shape and the csv
world <- world %>%
left_join(dollar_spent_2019, by = c("SOVEREIGNT" = "source"))
Plot the data
world_dollar_spent <- ggplot()+
geom_sf(data = world %>% filter(SOVEREIGNT != "Antarctica"),
mapping = aes(fill = cut),
color = "darkgray",
size = 0.2)+
scale_fill_brewer(na.value = "transparent")+
labs(title = "Variation of dollars spent by country in 2019",
subtitle = "For Tuberculosis, Malaria and HIV",
fill = "",
caption = "Source : www.healthdata.org")+
theme_void()+
theme(legend.position = "bottom",
plot.title = element_text(hjust = 0.1),
plot.subtitle = element_text(hjust = 0.075),
plot.caption = element_text(face = "italic",
hjust = 0.9,
size = 7))
world_dollar_spent
Let’s do the same map but using countries that recieved money
dollar_received_2002 <- ihme %>%
filter(year == 2002) %>%
select(recipient_country, contains(diseases)) %>%
pivot_longer(cols = contains(diseases)) %>%
group_by(recipient_country) %>%
summarise(dollar_received = sum(value, na.rm = T)) %>%
mutate(dollar_received = na_if(dollar_received,0))
cuts_recieved <- dollar_received_2002 %>%
pull(dollar_received) %>%
classInt::classIntervals(n = 5) %>%
pluck("brks")
labels_recieved <- tibble(cut1 = round(cuts), cut2 = round(lead(cuts))) %>%
head(-1) %>%
mutate(cut1 = scales::dollar_format()(cut1),
cut2 = scales::dollar_format()(cut2),
label = paste0(cut1, " - ", cut2) %>% fct_inorder())
Replace some names of countries
dollar_received_2002 <- dollar_received_2002 %>%
mutate(labels_recieved = cut(dollar_received,
breaks = cuts_recieved,
label = labels_recieved$label),
recipient_country = str_replace_all(recipient_country,
c("Cote d'Ivoire" = "Ivory Coast",
"Serbia" = "Republic of Serbia",
"Tanzania" = "United Republic of Tanzania",
"Swaziland" = "eSwatini")))
world <- world %>%
left_join(dollar_received_2002, by = c("SOVEREIGNT" = "recipient_country"))
Plot the map
world_dollar_recieved <- ggplot()+
geom_sf(data = world %>% filter(SOVEREIGNT != "Antarctica"),
mapping = aes(fill = labels_recieved),
color = "darkgray",
size = 0.2)+
scale_fill_brewer(na.value = "transparent",
palette = "Greens")+
labs(title = "Variation of dollars recieved by country in 2002",
subtitle = "For Tuberculosis, Malaria and HIV",
fill = "",
caption = "Source : www.healthdata.org")+
theme_void()+
theme(legend.position = "bottom",
plot.title = element_text(hjust = 0.1),
plot.subtitle = element_text(hjust = 0.07),
plot.caption = element_text(face = "italic",
hjust = 0.9,
size = 7))
world_dollar_recieved
Let’s create the regions shapes
sf_use_s2(FALSE)
regions <- world %>%
group_by(REGION_WB) %>%
summarise()
regions %>% pull(REGION_WB) %>%
sort
## [1] "Antarctica" "East Asia & Pacific"
## [3] "Europe & Central Asia" "Latin America & Caribbean"
## [5] "Middle East & North Africa" "North America"
## [7] "South Asia" "Sub-Saharan Africa"
ihme %>%
pull(gbd_region) %>%
unique
## [1] "North Africa/Middle East" "Sub-Saharan Africa, Central"
## [3] "Europe, Central" "Caribbean"
## [5] "Latin America, Southern" "Asia, Central"
## [7] "Sub-Saharan Africa, Eastern" "Sub-Saharan Africa, Western"
## [9] "Asia, South" "Europe, Eastern"
## [11] "Latin America, Andean" "Latin America, Tropical"
## [13] "Sub-Saharan Africa, Southern" "Asia, East"
## [15] "Latin America, Central" "Oceania"
## [17] "Asia, Southeast" "Asia Pacific, high-income"
## [19] "Unallocated/Unspecified" "Global"
## [21] "Europe, Western"
We see we have different names, we have to rename the gdb regions before joining
ihme_gbd_reg <- ihme %>%
mutate(gbd_region = str_replace_all(
gbd_region, c("North Africa/Middle East" = "Middle East & North Africa",
"Caribbean" = "Latin America & Caribbean",
"Sub-Saharan Africa, Eastern" = "Sub-Saharan Africa",
"Europe, Eastern" = "Europe & Central Asia",
"Sub-Saharan Africa, Southern" = "Sub-Saharan Africa",
"Oceania" = "East Asia & Pacific",
"Sub-Saharan Africa, Central" = "Sub-Saharan Africa",
"Latin America, Southern" = "Latin America & Caribbean",
"Sub-Saharan Africa, Western" = "Sub-Saharan Africa",
"Latin America, Andean" = "Latin America & Caribbean",
"Asia, East" = "East Asia & Pacific",
"Asia, Southeast" = "East Asia & Pacific",
"Europe, Central" = "Europe & Central Asia",
"Asia, Central" = "South Asia",
"Asia, South" = "South Asia",
"Latin America, Tropical" = "Latin America & Caribbean",
"Latin America, Central" = "Latin America & Caribbean",
"Asia Pacific, high-income" = "East Asia & Pacific",
"Europe, Western" = "Europe & Central Asia"
)))
ihme_gbd_reg_2002 <- ihme_gbd_reg %>%
filter(year == 2002) %>%
select(gbd_region, contains(diseases)) %>%
pivot_longer(cols = contains(diseases)) %>%
group_by(gbd_region) %>%
summarise(dollar_received = sum(value, na.rm = T))
dollar_recieved_region <- ihme_gbd_reg_2002 %>%
pull(dollar_received) %>%
classInt::classIntervals(n = 3) %>%
pluck("brks")
lab_2002 <- dollar_recieved_region %>%
as_tibble() %>%
mutate(value2 = lead(value)) %>%
head(-1) %>%
mutate(label = paste0(round(value), " - ", round(value2), " $"))
ihme_gbd_reg_2002 <- ihme_gbd_reg_2002 %>%
mutate(cut = cut(dollar_received,
breaks = dollar_recieved_region,
labels = lab_2002$label,
include.lowest = T))
regions <- regions %>%
left_join(ihme_gbd_reg_2002, by = c("REGION_WB" = "gbd_region"))
regions %>%
filter(REGION_WB != "Antarctica") %>%
ggplot()+
geom_sf(aes(fill = cut),
size = 0.1)+
labs(title = "Variation of dollars recieved by region in 2002",
subtitle = "For Tuberculosis, Malaria and HIV",
fill = "",
caption = "Source : www.healthdata.org")+
scale_fill_brewer(palette = "Greens")+
theme_void()+
theme(legend.position = "bottom",
plot.title = element_text(hjust = 0.1),
plot.subtitle = element_text(hjust = 0.07),
plot.caption = element_text(face = "italic",
hjust = 0.9,
size = 8))
Interactive with leaflet
Let’s add interactions using leaflet
First create the labels we will use, we want to show country and how many dollars it received
leaflet_labs <- paste(
"Country: ", world$SOVEREIGNT, "<br/>",
"Dollars recieved: ", world$dollar_received," $", "<br/>"
) %>%
lapply(htmltools::HTML)
Then we plot our shape in leaflet with the addPolygons and link our data with fillColor and the text we prepared with label.
factpal <- colorFactor(palette = "Greens", world$labels_recieved)
leaflet(world)%>%
addPolygons(
stroke = TRUE,
color = 'gray',
weight = 1.5,
fillColor = factpal(world$labels_recieved),
label = leaflet_labs,
highlightOptions = highlightOptions(color = "white", weight = 3,
bringToFront = TRUE)) %>%
setView(lng = 0, lat = 0, zoom = 1) %>%
addLegend("bottomright",pal = factpal,
values = world$labels_recieved,
title = "Variation of dollars recieved by country in 2002")
Combining plots
First we get our bbox
africa_bbox <- world %>%
filter(CONTINENT == "Africa") %>%
st_bbox() %>%
st_as_sfc() %>%
st_buffer(2)
Then the shape of the continent we want to highlight, here Africa
africa <- world %>%
filter(CONTINENT == "Africa")
Put the bbox around Africa in the world map
inset <- world %>% filter(SOVEREIGNT != "Antarctica") %>%
ggplot()+
geom_sf(size = 0.1)+
geom_sf(data = africa_bbox,
fill = NA,
color = "brown")+
theme_void()
inset
Plot the data on the Africa map
africa_map <- africa %>%
ggplot()+
geom_sf(data = world %>% filter(CONTINENT != "Africa"),
fill = NA,
color = "gray",
size = 0.3)+
geom_sf(mapping = aes(fill = labels_recieved),
color = "white",
size = 0.1)+
scale_fill_brewer(palette = "Greens",
na.value = "gray")+
labs(fill = "")+
coord_sf(xlim = st_bbox(africa_bbox)[c(1,3)],
ylim = st_bbox(africa_bbox)[c(2,4)])+
theme_void()+
theme(legend.position = "bottom")
africa_map
Put it all together with patchwork
layout <- c(
area(t = 1, l = 0, b = 7, r = 3),
area(t = 1, l = 1, b = 6, r = 10)
)
inset + africa_map +
plot_layout(design = layout) +
plot_annotation(title = "Dollars recieved in African countries (2002)",
subtitle = "For Tuberculosis, Malaria and HIV\n",
caption = "Source : www.healthdata.org",
theme = theme(plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5),
plot.caption = element_text(face = "italic")
))
Open street map
Data found on https://osdatahub.os.uk/downloads/open?_ga=2.3518960.1516077193.1626005802-1560027532.1626005802
london <- st_read("data/bdline_essh_gb/Data/greater_london_const_region.shp")
## Reading layer `greater_london_const_region' from data source
## `/Users/jasonola/Desktop/datascience/R_advanced_data_visualisation/Report_3/data/bdline_essh_gb/Data/greater_london_const_region.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 14 features and 15 fields
## Geometry type: POLYGON
## Dimension: XY
## Bounding box: xmin: 503568.2 ymin: 155850.8 xmax: 561957.5 ymax: 200933.9
## Projected CRS: OSGB36 / British National Grid
ext_london <- st_read("data/bdline_essh_gb/Data/county_region.shp")
## Reading layer `county_region' from data source
## `/Users/jasonola/Desktop/datascience/R_advanced_data_visualisation/Report_3/data/bdline_essh_gb/Data/county_region.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 25 features and 15 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: 212680.1 ymin: 34912.7 xmax: 655989 ymax: 588517.4
## Projected CRS: OSGB36 / British National Grid
london_bbox <- london %>%
st_bbox() %>%
st_as_sfc() %>%
st_buffer(2)
urban_reg <- st_read("data/strtgi_essh_gb/data/urban_region.shp") %>%
rmapshaper::ms_simplify()
## Reading layer `urban_region' from data source
## `/Users/jasonola/Desktop/datascience/R_advanced_data_visualisation/Report_3/data/strtgi_essh_gb/data/urban_region.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 24764 features and 3 fields
## Geometry type: POLYGON
## Dimension: XY
## Bounding box: xmin: 62627 ymin: 7858 xmax: 655503 ymax: 1214258
## Projected CRS: OSGB36 / British National Grid
green_space <- st_read("data/OS Open Greenspace (ESRI Shape File) GB/data/GB_GreenspaceSite.shp") %>%
rmapshaper::ms_simplify()
## Reading layer `GB_GreenspaceSite' from data source
## `/Users/jasonola/Desktop/datascience/R_advanced_data_visualisation/Report_3/data/OS Open Greenspace (ESRI Shape File) GB/data/GB_GreenspaceSite.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 148263 features and 6 fields
## Geometry type: MULTIPOLYGON
## Dimension: XYZ
## Bounding box: xmin: 9819.84 ymin: 8274.57 xmax: 655229.5 ymax: 1214133
## z_range: zmin: 0 zmax: 0
## Projected CRS: OSGB36 / British National Grid
rivers <- st_read("data/oprvrs_essh_gb/data/WatercourseLink.shp")
## Reading layer `WatercourseLink' from data source
## `/Users/jasonola/Desktop/datascience/R_advanced_data_visualisation/Report_3/data/oprvrs_essh_gb/data/WatercourseLink.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 186272 features and 9 fields
## Geometry type: LINESTRING
## Dimension: XYZ
## Bounding box: xmin: 64429.59 ymin: 13274.99 xmax: 655208.5 ymax: 1216428
## z_range: zmin: 0 zmax: 0
## Projected CRS: OSGB36 / British National Grid
green_space_ld <- london %>%
st_intersection(green_space)
rivers_ld <- london %>%
st_intersection(rivers)
urban_reg_ld <- london %>%
st_intersection(urban_reg)
london %>%
ggplot()+
geom_sf(data = ext_london,
fill = NA)+
geom_sf(fill = "#F0F0F0")+
geom_sf(data = urban_reg_ld,
fill = "#EBE172",
alpha = 0.5)+
geom_sf(data = green_space_ld,
fill = "#93EB5D",
color = NA,
alpha = 0.9)+
geom_sf(data = rivers_ld,
color = "#5D9FEB")+
coord_sf(xlim = st_bbox(london_bbox)[c(1,3)],
ylim = st_bbox(london_bbox)[c(2,4)])+
labs(title = "London <span style = 'color:#EBE172'>urban area</span>",
subtitle = "And its <span style = 'color:#5D9FEB'>rivers</span> and
<span style = 'color:#93EB5D'>green areas</span>",
caption = "Source : osdatahub.os.uk")+
theme_void()+
theme(plot.title = ggtext::element_markdown(hjust = 0.5),
plot.subtitle = ggtext::element_markdown(hjust = 0.5),
plot.caption = element_text(hjust = 0.5))
Let’s now try to map Paris with OSM
streets <- getbb("paris") %>%
opq() %>%
add_osm_feature(key = "highway",
value = c("primary","secondary","tertiary","motorway")) %>%
osmdata_sf() %>%
pluck("osm_lines")
small_streets <- getbb("paris") %>%
opq() %>%
add_osm_feature(
key = "highway",
value = c(
"residential", "living_street",
"unclassified",
"service", "footway"
)
) %>%
osmdata_sf() %>%
pluck("osm_lines")
river <- getbb("paris") %>%
opq() %>%
add_osm_feature(
key = "waterway",
value = "river"
) %>%
osmdata_sf() %>%
pluck("osm_lines")
hotels <- getbb("paris") %>%
opq() %>%
add_osm_feature(
key = "tourism",
value = "hotel"
) %>%
osmdata_sf() %>%
pluck("osm_points")
taxi <- getbb("paris") %>%
opq() %>%
add_osm_feature(
key = "amenity",
value = "taxi"
) %>%
osmdata_sf() %>%
pluck("osm_points")
Put it all together
ggplot()+
geom_sf(data = river,
color = "#3182bd",
size = 2)+
geom_sf(data = small_streets)+
geom_sf(data = streets)+
geom_sf(data = hotels,
color = "#c24c15",
size = 0.5)+
geom_sf(data = taxi,
color = "gold",
size = 0.5)+
coord_sf(xlim = c(2.238762,2.447758),
ylim = c(48.818118,48.899639))+
theme_void()+
labs(title = "Paris...",
subtitle = "And it's <span style = 'color:#c24c15'>hotels</span> and
<span style = 'color:gold'>taxi</span> pickup places \n")+
theme(plot.subtitle = ggtext::element_markdown())