This short document outlines 2 ways of creating choropleth maps in R using the thematic map package tmap and ggmap which is an extension of the widely used ggplot2 plotting framework. tmap is simpler; ggmap is more flexible.
In either case we need:
We’ll use ward level data from Fingertips, and shape files from the ONS Open Geography portal - although you could use the PHE’s ESRI portal or other sources.
There are a few steps:
In R we’ll need a few libraries to help us create the maps - we’ll load them first
library(readr) ## for reading in data
library(dplyr) ## for data manipulation
library(rgdal) ## for reading in shape files
library(tmap) ## for plotting maps
library(ggplot2) ## for charting
library(ggmap) ## for plotting maps
library(viridis)## colour palettes
library(geojsonio)
I downloaded the data from Fingertips and converted to from Excel to csv format. To illustrate we’ll use the self herm data.
data <- read.csv("~/Downloads/testward.csv") %>%
janitor::clean_names() ## this tidies up variable names and converts them to lower case
map <- data %>%
mutate(indicator = as.character(indicator)) %>%
filter(indicator == "Hospital admissions for self-harm: standardised emergency admission ratio (all ages)")
The ONS site has a number of shape files for ward geographies. In the interest of speed we’ll use super generalised boundaries.
## First we need to download and unzip the file
shp <- tempfile()
download.file('http://geoportal.statistics.gov.uk/datasets/5fb8813978cc4e4892da4b57bcf4491f_3.zip', shp)
unzip(shp, exdir = getwd())
## and then read the file into R as a Spatial Polygon Data Frame
s <- readOGR(dsn = "Wards_December_2015_Super_Generalised_Clipped_Boundaries_in_Great_Britain.shp",
layer = "Wards_December_2015_Super_Generalised_Clipped_Boundaries_in_Great_Britain", verbose = FALSE)
We can look at the structure of this file which shows it contains 8734 ward boundaries, ward codes and names, and the lat/longs for each area. We can limit the data to English wards only.
class(s)
## [1] "SpatialPolygonsDataFrame"
## attr(,"package")
## [1] "sp"
str(s, max.level = 3, list.len = 3)
## Formal class 'SpatialPolygonsDataFrame' [package "sp"] with 5 slots
## ..@ data :'data.frame': 8734 obs. of 8 variables:
## .. ..$ wd15cd : Factor w/ 8734 levels "E05000026","E05000027",..: 1 2 3 4 5 6 7 8 9 10 ...
## .. ..$ wd15nm : Factor w/ 8013 levels "Abbey","Abbey Hill",..: 1 115 515 1470 2426 2427 2890 2986 3399 4447 ...
## .. ..$ wd15nmw : Factor w/ 831 levels "Aber-craf","Aber-soch",..: NA NA NA NA NA NA NA NA NA NA ...
## .. .. [list output truncated]
## ..@ polygons :List of 8734
## .. ..$ :Formal class 'Polygons' [package "sp"] with 5 slots
## .. .. .. .. [list output truncated]
## .. ..$ :Formal class 'Polygons' [package "sp"] with 5 slots
## .. .. .. .. [list output truncated]
## .. ..$ :Formal class 'Polygons' [package "sp"] with 5 slots
## .. .. .. .. [list output truncated]
## .. .. [list output truncated]
## ..@ plotOrder : int [1:8734] 7712 7717 7733 7724 7732 7790 7723 7715 7722 7557 ...
## .. [list output truncated]
seng <- subset(s, substr(wd15cd, 1, 1) == "E")
ggmapTo use the shape file with ggmap we need to ‘fortify’ the data to turn it into an ordinary data frame. Then we can link the data to be mapped.
## Fortify the file and specify which areas are to be plotted
s1 <-fortify(seng, region = "wd15cd")
## Link data
s2 <- s1 %>%
left_join(map, by = c("id" = "area_code"))
## Create the map
g <- ggplot() +
geom_polygon(data = s2,
aes(long, lat,
group = group,
fill = value)) +
coord_map()
## Enhance - 'theme_nothing' removes the background, viridis is a heatmap colour palette
g + scale_fill_viridis(direction = -1,
option = "plasma",
name = "Self harm rate") +
theme_nothing(legend = TRUE ) +
labs(title = "Admission ratios for self harm")
library(stringr)
## Load data
newdata <- read_csv("~/Downloads/phof_new.csv", col_types = c("ccccccdddddccc")) %>%
janitor::clean_names()
newdata <- filter(newdata, x_indicator == "0.1ii - Life expectancy at birth" & sex == "Male")
shp <- tempfile()
download.file('http://geoportal.statistics.gov.uk/datasets/686603e943f948acaa13fb5d2b0f1275_3.zip', shp)
unzip(shp, exdir = getwd())
## and then read the file into R as a Spatial Polygon Data Frame
s <- readOGR(dsn = "Local_Authority_Districts_December_2016_Super_Generalised_Clipped_Boundaries_in_Great_Britain.shp",
layer = "Local_Authority_Districts_December_2016_Super_Generalised_Clipped_Boundaries_in_Great_Britain", verbose = FALSE)
seng <- subset(s, substr(lad16cd, 1, 1) == "E")
seng1 <- fortify(seng, region = "lad16cd")
u <- unique(factor(seng1$id))
v <- unique(factor(newdata$area_code))
identical(u,v)
## [1] TRUE
s2 <- seng1 %>%
left_join(newdata, by = c("id" = "area_code"))
g <- ggplot() +
geom_polygon(data = s2,
aes(long, lat,
group = group,
fill = value)) +
coord_map() +
facet_wrap(~time_period, nrow = 4)
## Enhance - 'theme_nothing' removes the background, viridis is a heatmap colour palette
g + scale_fill_viridis(direction = -1,
option = "viridis",
name = "LIfe expectancy") +
theme_nothing(legend = TRUE )
tmapWe need to link the data to be plotted to the unfortified shape file and then its easy to use the tm_shape function to create the map.
seng@data <- left_join(seng@data, map, by=c("wd15cd" = "area_code"))
palette <- rev(plasma(15))
credits <- "Contains ordnance survey data (c) \nCrown copyright and database right 2016"
t <- tm_shape(seng) +
tm_fill("value", style = "kmeans" , n = 10,
palette = palette, title = "Self harm rate") +
tm_credits( credits, size = 0.5, align = "right") +
tm_layout(
legend.outside = TRUE,
frame = FALSE)
t
We can simplify even more by reading the data in GeoJSON format.
palette <- rev(plasma(15))
credits <- "Contains ordnance survey data (c) \nCrown copyright and database right 2016"
## Download the data and convert to spatial polygon data frame
geodata <- geojson_read("http://geoportal.statistics.gov.uk/datasets/5fb8813978cc4e4892da4b57bcf4491f_3.geojson", what = "sp")
## Select England wards
seng <- subset(geodata, substr(lad15cd, 1, 1) == "E")
## Join data to plot
seng@data <- left_join(seng@data, map, by=c("wd15cd" = "area_code"))
## Map
tm_shape(seng) +
tm_fill("value", style = "kmeans" , n = 10,
palette = palette, title = "Self harm rate") +
tm_credits( credits, size = 0.5, align = "right") +
tm_layout(legend.outside = TRUE,
frame = FALSE) +
tm_compass(position = c("left", "center")) +
tm_scale_bar(position = c("left", "center"))
Once nice feature of tmap is the range of options to partition the data. These include:
The figure below shows the effect of changing the stlye in tmaps.
tmap_mode("plot")
## tmap mode set to plotting
t1 <- tm_shape(seng)
kmeans <- t1 + tm_fill("value", style = "kmeans" , n = 5,
palette = palette, title = "kmeans")
jenks <- t1 + tm_fill("value", style = "jenks" , n = 5,
palette = palette, title = "jenks")
quantile <- t1 + tm_fill("value", style = "quantile" , n = 5,
palette = palette, title = "quantile")
pretty <- t1 + tm_fill("value", style = "pretty" , n = 5,
palette = palette, title = "Pretty")
sd <- t1 + tm_fill("value", style = "sd" , n = 5,
palette = palette, title = "sd")
tmap_arrange(kmeans, jenks, quantile, pretty, sd, nrow = 1)
tmap has one more trick up its sleeve…setting tmap_mode to view creates an interactive map via leaflet.
tmap_mode("view")
## tmap mode set to interactive viewing
t
```