This short document outlines 2 ways of creating choropleth maps in R using the thematic map package tmap
and ggmap
which is an extension of the widely used ggplot2
plotting framework. tmap
is simpler; ggmap
is more flexible.
In either case we need:
We’ll use ward level data from Fingertips, and shape files from the ONS Open Geography portal - although you could use the PHE’s ESRI portal or other sources.
There are a few steps:
In R we’ll need a few libraries to help us create the maps - we’ll load them first
library(readr) ## for reading in data
library(dplyr) ## for data manipulation
library(rgdal) ## for reading in shape files
library(tmap) ## for plotting maps
library(ggplot2) ## for charting
library(ggmap) ## for plotting maps
library(viridis)## colour palettes
library(geojsonio)
I downloaded the data from Fingertips and converted to from Excel to csv format. To illustrate we’ll use the self herm data.
data <- read.csv("~/Downloads/testward.csv") %>%
janitor::clean_names() ## this tidies up variable names and converts them to lower case
map <- data %>%
mutate(indicator = as.character(indicator)) %>%
filter(indicator == "Hospital admissions for self-harm: standardised emergency admission ratio (all ages)")
The ONS site has a number of shape files for ward geographies. In the interest of speed we’ll use super generalised boundaries.
## First we need to download and unzip the file
shp <- tempfile()
download.file('http://geoportal.statistics.gov.uk/datasets/5fb8813978cc4e4892da4b57bcf4491f_3.zip', shp)
unzip(shp, exdir = getwd())
## and then read the file into R as a Spatial Polygon Data Frame
s <- readOGR(dsn = "Wards_December_2015_Super_Generalised_Clipped_Boundaries_in_Great_Britain.shp",
layer = "Wards_December_2015_Super_Generalised_Clipped_Boundaries_in_Great_Britain", verbose = FALSE)
We can look at the structure of this file which shows it contains 8734 ward boundaries, ward codes and names, and the lat/longs for each area. We can limit the data to English wards only.
class(s)
## [1] "SpatialPolygonsDataFrame"
## attr(,"package")
## [1] "sp"
str(s, max.level = 3, list.len = 3)
## Formal class 'SpatialPolygonsDataFrame' [package "sp"] with 5 slots
## ..@ data :'data.frame': 8734 obs. of 8 variables:
## .. ..$ wd15cd : Factor w/ 8734 levels "E05000026","E05000027",..: 1 2 3 4 5 6 7 8 9 10 ...
## .. ..$ wd15nm : Factor w/ 8013 levels "Abbey","Abbey Hill",..: 1 115 515 1470 2426 2427 2890 2986 3399 4447 ...
## .. ..$ wd15nmw : Factor w/ 831 levels "Aber-craf","Aber-soch",..: NA NA NA NA NA NA NA NA NA NA ...
## .. .. [list output truncated]
## ..@ polygons :List of 8734
## .. ..$ :Formal class 'Polygons' [package "sp"] with 5 slots
## .. .. .. .. [list output truncated]
## .. ..$ :Formal class 'Polygons' [package "sp"] with 5 slots
## .. .. .. .. [list output truncated]
## .. ..$ :Formal class 'Polygons' [package "sp"] with 5 slots
## .. .. .. .. [list output truncated]
## .. .. [list output truncated]
## ..@ plotOrder : int [1:8734] 7712 7717 7733 7724 7732 7790 7723 7715 7722 7557 ...
## .. [list output truncated]
seng <- subset(s, substr(wd15cd, 1, 1) == "E")
ggmap
To use the shape file with ggmap
we need to ‘fortify’ the data to turn it into an ordinary data frame. Then we can link the data to be mapped.
## Fortify the file and specify which areas are to be plotted
s1 <-fortify(seng, region = "wd15cd")
## Link data
s2 <- s1 %>%
left_join(map, by = c("id" = "area_code"))
## Create the map
g <- ggplot() +
geom_polygon(data = s2,
aes(long, lat,
group = group,
fill = value)) +
coord_map()
## Enhance - 'theme_nothing' removes the background, viridis is a heatmap colour palette
g + scale_fill_viridis(direction = -1,
option = "plasma",
name = "Self harm rate") +
theme_nothing(legend = TRUE ) +
labs(title = "Admission ratios for self harm")
library(stringr)
## Load data
newdata <- read_csv("~/Downloads/phof_new.csv", col_types = c("ccccccdddddccc")) %>%
janitor::clean_names()
newdata <- filter(newdata, x_indicator == "0.1ii - Life expectancy at birth" & sex == "Male")
shp <- tempfile()
download.file('http://geoportal.statistics.gov.uk/datasets/686603e943f948acaa13fb5d2b0f1275_3.zip', shp)
unzip(shp, exdir = getwd())
## and then read the file into R as a Spatial Polygon Data Frame
s <- readOGR(dsn = "Local_Authority_Districts_December_2016_Super_Generalised_Clipped_Boundaries_in_Great_Britain.shp",
layer = "Local_Authority_Districts_December_2016_Super_Generalised_Clipped_Boundaries_in_Great_Britain", verbose = FALSE)
seng <- subset(s, substr(lad16cd, 1, 1) == "E")
seng1 <- fortify(seng, region = "lad16cd")
u <- unique(factor(seng1$id))
v <- unique(factor(newdata$area_code))
identical(u,v)
## [1] TRUE
s2 <- seng1 %>%
left_join(newdata, by = c("id" = "area_code"))
g <- ggplot() +
geom_polygon(data = s2,
aes(long, lat,
group = group,
fill = value)) +
coord_map() +
facet_wrap(~time_period, nrow = 4)
## Enhance - 'theme_nothing' removes the background, viridis is a heatmap colour palette
g + scale_fill_viridis(direction = -1,
option = "viridis",
name = "LIfe expectancy") +
theme_nothing(legend = TRUE )
tmap
We need to link the data to be plotted to the unfortified shape file and then its easy to use the tm_shape
function to create the map.
seng@data <- left_join(seng@data, map, by=c("wd15cd" = "area_code"))
palette <- rev(plasma(15))
credits <- "Contains ordnance survey data (c) \nCrown copyright and database right 2016"
t <- tm_shape(seng) +
tm_fill("value", style = "kmeans" , n = 10,
palette = palette, title = "Self harm rate") +
tm_credits( credits, size = 0.5, align = "right") +
tm_layout(
legend.outside = TRUE,
frame = FALSE)
t
We can simplify even more by reading the data in GeoJSON format.
palette <- rev(plasma(15))
credits <- "Contains ordnance survey data (c) \nCrown copyright and database right 2016"
## Download the data and convert to spatial polygon data frame
geodata <- geojson_read("http://geoportal.statistics.gov.uk/datasets/5fb8813978cc4e4892da4b57bcf4491f_3.geojson", what = "sp")
## Select England wards
seng <- subset(geodata, substr(lad15cd, 1, 1) == "E")
## Join data to plot
seng@data <- left_join(seng@data, map, by=c("wd15cd" = "area_code"))
## Map
tm_shape(seng) +
tm_fill("value", style = "kmeans" , n = 10,
palette = palette, title = "Self harm rate") +
tm_credits( credits, size = 0.5, align = "right") +
tm_layout(legend.outside = TRUE,
frame = FALSE) +
tm_compass(position = c("left", "center")) +
tm_scale_bar(position = c("left", "center"))
Once nice feature of tmap
is the range of options to partition the data. These include:
The figure below shows the effect of changing the stlye in tmaps
.
tmap_mode("plot")
## tmap mode set to plotting
t1 <- tm_shape(seng)
kmeans <- t1 + tm_fill("value", style = "kmeans" , n = 5,
palette = palette, title = "kmeans")
jenks <- t1 + tm_fill("value", style = "jenks" , n = 5,
palette = palette, title = "jenks")
quantile <- t1 + tm_fill("value", style = "quantile" , n = 5,
palette = palette, title = "quantile")
pretty <- t1 + tm_fill("value", style = "pretty" , n = 5,
palette = palette, title = "Pretty")
sd <- t1 + tm_fill("value", style = "sd" , n = 5,
palette = palette, title = "sd")
tmap_arrange(kmeans, jenks, quantile, pretty, sd, nrow = 1)
tmap
has one more trick up its sleeve…setting tmap_mode
to view creates an interactive map via leaflet
.
tmap_mode("view")
## tmap mode set to interactive viewing
t
```