Over the last few days, I’ve been working on a hexagram map for UK parliamentary election results. Aside from purely aesthetic/academic reasons for doing so, it’s also good to remember why such cartograms are important.
A good recent example is the French Presidential election, wherein Emmanuel Macron was elected. In the first round, Macron won fairly comfortably (24% vs. two candidates on around 21%, but you wouldn’t know that from the results.
This is because increasingly, the distribution of people is becoming ever more unequal, with fewer much larger cities containing sizeable proportions of the total population. The hexagrams correct this by giving an equal area to a (roughly) equal population seat. However, I hadn’t seen just how unequal the distribution of population across Europe is.
To map this, I’m using data from Eurostat. Each step is explained below and should be fully reproducible.
First, we want to get the most detailed shapefiles on offer and open them in R:
library(RCurl)
#get shapefiles (16Mb file)
shapes_func <- function(){
download.file(paste0("http://ec.europa.eu/eurostat/cache/",
"GISCO/geodatafiles/NUTS_2013_01M_SH.zip"),
g <- tempfile())
unzip(g, exdir=tempdir())
rm(g)
directory <- paste0(gsub("\\\\", "/", tempdir()),
"/NUTS_2013_01M_SH/data")
shape <- rgdal::readOGR(dsn=directory, layer="NUTS_RG_01M_2013",
verbose = FALSE)
return(shape)
}
#download and open
NUTS <- shapes_func()
#sort out level 3 data (smallest)
level3 <- NUTS[which(NUTS$STAT_LEVL_ == "3"),]
head(level3@data)
## NUTS_ID STAT_LEVL_ SHAPE_AREA SHAPE_LEN
## 3 AT111 3 0.08381656 1.474930
## 4 AT112 3 0.21511603 3.394260
## 6 AT211 3 0.23837154 3.519518
## 7 AT212 3 0.48747204 5.836238
## 8 AT213 3 0.39799135 4.264666
## 10 AT221 3 0.14611800 2.481093
We want to project these onto an imaginary earth (I’m using the Lmabert azimuthal projection):
library(sp)
#project the map on an (Lambert azimuthal) equal area projection
level3 <- spTransform(level3, CRS(paste0("+proj=laea +lat_0=52 +lon_0=10 +x_0=4321000 ",
"+y_0=3210000 +ellps=GRS80 +towgs84=0,0,0,0,0,0,0 +units=m +no_defs")))
Then it’s time to download the data of the population and population density for each of these areas. A bit of fiddling is necessary to get them open in a way we can work with:
library(dplyr)
library(magrittr)
library(tidyr)
library(stringi)
library(stringr)
#get population data from github
EUdata <- read.table(text = getURL(paste0("https://raw.githubusercontent.com/RobWHickman/",
"Population-Distributions/master/demo_r_pjangrp3.tsv")),
sep="\t", header=TRUE)
#format data
EUdata <- EUdata %>% separate(sex.unit.age.geo.time,
unlist(str_split(names(EUdata)[1], "\\.")),
sep = ",")
#get for total ages and total sexes
EUdata <- EUdata[which(EUdata$age == "TOTAL" & EUdata$sex == "T"),]
#sort out and name
popDF <- EUdata[c(4,6)]
names(popDF) <- c("NUTS_ID", "pop2014")
popDF$pop2014 <- as.numeric(as.character(gsub(" .*", "", popDF$pop2014)))
#get population density data
#basically the same as above
EUdataDens <- read.table(text = getURL(paste0("https://raw.githubusercontent.com/RobWHickman",
"/Population-Distributions/master/demo_r_d3dens.tsv")),
sep="\t", header=TRUE)
#format data
EUdataDens <- EUdataDens %>% separate(unit.geo.time,
names(EUdata)[1:2],
sep = ",")
#sort out and name
EUdataDens$X2014 <- as.numeric(as.character(EUdataDens$X2014))
densDF <- EUdataDens[2:3]
names(densDF) <- c("NUTS_ID", "Density")
We can glimpse the firstlines of both of these using head():
head(popDF)
## NUTS_ID pop2014
## 88175 AL 2893005
## 88176 AL0 2893005
## 88177 AL01 NA
## 88178 AL011 NA
## 88179 AL012 NA
## 88180 AL013 NA
head(densDF)
## NUTS_ID Density
## 1 AL 105.6
## 2 AL0 105.6
## 3 AT 103.6
## 4 AT1 160.9
## 5 AT11 78.4
## 6 AT111 54.1
This data is then joined to the DataFrame part of the SpatialPolygonsDataFrame using left_join(). I also averaged out some missing German areas and removed the French South American/Pacific islands.
#populate up SpatialPolygonsDataFrame
#bind to the shapefiles
level3@data <- left_join(level3@data, popDF, by = "NUTS_ID")
level3@data <- left_join(level3@data, densDF, by = "NUTS_ID")
#add in missing German densities using their average
level3$Density[which(is.na(level3$Density))] <- EUdataDens$X2014[which(EUdataDens$unit %in% "DE8")]
#get rid of small islands
remove <- c("FRA10", "FRA20", "FRA30", "FRA40", "FRA50", "PT200", "PT300",
"ES703", "ES704", "ES705", "ES706", "ES707", "ES708", "ES709")
level3 <- level3[-c(which(level3$NUTS_ID %in% remove)),]
To plot a colour gradient of total population for each shape, I used ggplot as it’s nice to work with and gives fairly aesthetic results. There are perhaps neater/ more itneractive ways to do this, but it wasn’t the key figure I wanted to produce in any case.
The map data is fortified then passed into geom_map():
library(ggplot2)
library(ggthemes)
library(maptools)
## Checking rgeos availability: TRUE
#fortify for plotting
EUmap <- fortify(level3, region="NUTS_ID")
#make plot
ggplot() +
#map based off of fortified data frame
geom_map(data = EUmap, map = EUmap,
aes(x = long, y = lat, map_id = id),
color = "black", size = 1, fill = NA) +
geom_map(data = level3@data, map = EUmap,
aes(fill = pop2014, map_id = NUTS_ID),
color = NA) +
theme_map() +
labs(title="Population in Level 3 EU Regions") +
#fill by population of polygon
scale_fill_gradient(name = "Population",
low = "darkblue", high = "yellow") +
theme(legend.position = "right")
## Warning: Ignoring unknown aesthetics: x, y
To plot a specific proportion of the population living in region x/y, I first summed the population of every shape then summed from the most dense until I reach z% of this. The dplyr package makes it nice and easy to pass columns back into the data as either the “high density” shapes containing z% or the “low density” shapes containnig the rest:
#make sorting table
sortingDF <- level3@data %>% select(NUTS_ID, pop2014, Density)
totalpop <- sum(sortingDF$pop2014, na.rm = TRUE)
#sort by density (i.e. find smallest area for max pop)
sortingDF <- sortingDF %>% arrange(-Density) %>%
mutate(cumsum = cumsum(pop2014))
percentage <- 30
sortingDF <- sortingDF %>%
mutate(lowhigh = ifelse(cumsum > totalpop*(percentage /100),
"low density", "high density")) %>%
select(NUTS_ID, lowhigh)
#bind high/low value to SPDF
level3@data <- left_join(level3@data, sortingDF, by = "NUTS_ID")
head(level3@data)
## NUTS_ID STAT_LEVL_ SHAPE_AREA SHAPE_LEN pop2014 Density lowhigh
## 1 AT111 3 0.08381656 1.474930 37578 54.1 low density
## 2 AT112 3 0.21511603 3.394260 153542 100.3 low density
## 3 AT211 3 0.23837154 3.519518 281065 143.1 low density
## 4 AT212 3 0.48747204 5.836238 125272 30.9 low density
## 5 AT213 3 0.39799135 4.264666 150710 45.2 low density
## 6 AT221 3 0.14611800 2.481093 420885 343.2 low density
This is then plotted as before:
#make plot
ggplot() +
geom_map(data = EUmap, map = EUmap,
aes(x = long, y = lat, map_id = id),
color = "black", size = 1, fill = NA) +
geom_map(data = level3@data, map = EUmap,
aes(fill = lowhigh, map_id = NUTS_ID),
color = NA) +
theme_map() +
labs(title= paste0("Map Showing ", percentage,
"% of the European Population in Yellow")) +
scale_fill_manual(values = c("yellow", "darkblue"),
name="Population Density") +
theme(legend.position = "right")
## Warning: Ignoring unknown aesthetics: x, y
Finally, I felt like making a grid at different percentages. Blogdown wasn’t enjoying my attempts to turn this into a function so it’s shown below as a for-loop that creates a different plot for each decile between 10% and 90% and then plots them in a 3x3 matrix:
library(gridExtra)
library(grid)
plot_grid <- function(){
for (percent in seq(10,90,10)){
percentage <- percent
sortingDF <- level3@data %>% select(NUTS_ID, pop2014, Density)
totalpop <- sum(sortingDF$pop2014, na.rm = TRUE)
#sort by density (i.e. find smallest area for max pop)
sortingDF <- sortingDF %>% arrange(-Density) %>%
mutate(cumsum = cumsum(pop2014))
sortingDF <- sortingDF %>%
mutate(lowhigh = ifelse(cumsum > totalpop*(percentage /100),
"low density", "high density")) %>%
select(NUTS_ID, lowhigh)
level3@data <- left_join(level3@data[1:6], sortingDF, by = "NUTS_ID")
p <- ggplot() +
geom_map(data = EUmap, map = EUmap,
aes(x = long, y = lat, map_id = id),
color = "black", size = 1, fill = NA) +
geom_map(data = level3@data, map = EUmap,
aes(fill = lowhigh, map_id = NUTS_ID),
color = NA) +
theme_map() +
labs(title= paste0(percentage,
"% of Population")) +
scale_fill_manual(values = c("yellow", "darkblue"),
name="Population Density") +
theme(legend.position = c(.65, .4))
assign(paste0("plot", as.character(percentage)), p)
}
grid <- grid.arrange(plot10, plot20, plot30, plot40, plot50, plot60, plot70, plot80, plot90,
top = textGrob("Percentages of the European Population in Yellow",
gp=gpar(fontsize=20,font=1)))
return(grid)
}
plot <- plot_grid()