Europe - The Final Countdown

Over the last few days, I’ve been working on a hexagram map for UK parliamentary election results. Aside from purely aesthetic/academic reasons for doing so, it’s also good to remember why such cartograms are important.

A good recent example is the French Presidential election, wherein Emmanuel Macron was elected. In the first round, Macron won fairly comfortably (24% vs. two candidates on around 21%, but you wouldn’t know that from the results.

This is because increasingly, the distribution of people is becoming ever more unequal, with fewer much larger cities containing sizeable proportions of the total population. The hexagrams correct this by giving an equal area to a (roughly) equal population seat. However, I hadn’t seen just how unequal the distribution of population across Europe is.

To map this, I’m using data from Eurostat. Each step is explained below and should be fully reproducible.

First, we want to get the most detailed shapefiles on offer and open them in R:

library(RCurl)
#get shapefiles (16Mb file)
shapes_func <- function(){
  download.file(paste0("http://ec.europa.eu/eurostat/cache/",
                    "GISCO/geodatafiles/NUTS_2013_01M_SH.zip"),
              g <- tempfile())
  unzip(g, exdir=tempdir())
  rm(g)
  directory <- paste0(gsub("\\\\", "/", tempdir()), 
                        "/NUTS_2013_01M_SH/data")

  shape <- rgdal::readOGR(dsn=directory, layer="NUTS_RG_01M_2013",
                           verbose = FALSE)
  return(shape)
}

#download and open
NUTS <- shapes_func()

#sort out level 3 data (smallest)
level3 <- NUTS[which(NUTS$STAT_LEVL_ == "3"),]
head(level3@data)
##    NUTS_ID STAT_LEVL_ SHAPE_AREA SHAPE_LEN
## 3    AT111          3 0.08381656  1.474930
## 4    AT112          3 0.21511603  3.394260
## 6    AT211          3 0.23837154  3.519518
## 7    AT212          3 0.48747204  5.836238
## 8    AT213          3 0.39799135  4.264666
## 10   AT221          3 0.14611800  2.481093

We want to project these onto an imaginary earth (I’m using the Lmabert azimuthal projection):

library(sp)
#project the map on an (Lambert azimuthal) equal area projection
level3 <- spTransform(level3, CRS(paste0("+proj=laea +lat_0=52 +lon_0=10 +x_0=4321000 ",
                                         "+y_0=3210000 +ellps=GRS80 +towgs84=0,0,0,0,0,0,0 +units=m +no_defs")))

Then it’s time to download the data of the population and population density for each of these areas. A bit of fiddling is necessary to get them open in a way we can work with:

library(dplyr)
library(magrittr)
library(tidyr)
library(stringi)
library(stringr)
#get population data from github
EUdata  <- read.table(text = getURL(paste0("https://raw.githubusercontent.com/RobWHickman/",
                        "Population-Distributions/master/demo_r_pjangrp3.tsv")),
                      sep="\t", header=TRUE)

#format data
EUdata <- EUdata %>% separate(sex.unit.age.geo.time,
                              unlist(str_split(names(EUdata)[1], "\\.")),
                              sep = ",")

#get for total ages and total sexes
EUdata <- EUdata[which(EUdata$age == "TOTAL" & EUdata$sex == "T"),]

#sort out and name
popDF <- EUdata[c(4,6)]
names(popDF) <- c("NUTS_ID", "pop2014")
popDF$pop2014 <- as.numeric(as.character(gsub(" .*", "", popDF$pop2014)))

#get population density data
#basically the same as above
EUdataDens <- read.table(text = getURL(paste0("https://raw.githubusercontent.com/RobWHickman",
                            "/Population-Distributions/master/demo_r_d3dens.tsv")),
                          sep="\t", header=TRUE)

#format data
EUdataDens <- EUdataDens %>% separate(unit.geo.time,
                                      names(EUdata)[1:2],
                                      sep = ",")

#sort out and name
EUdataDens$X2014 <- as.numeric(as.character(EUdataDens$X2014))
densDF <- EUdataDens[2:3]
names(densDF) <- c("NUTS_ID", "Density")

We can glimpse the firstlines of both of these using head():

head(popDF)
##       NUTS_ID pop2014
## 88175      AL 2893005
## 88176     AL0 2893005
## 88177    AL01      NA
## 88178   AL011      NA
## 88179   AL012      NA
## 88180   AL013      NA
head(densDF)
##   NUTS_ID Density
## 1      AL   105.6
## 2     AL0   105.6
## 3      AT   103.6
## 4     AT1   160.9
## 5    AT11    78.4
## 6   AT111    54.1

This data is then joined to the DataFrame part of the SpatialPolygonsDataFrame using left_join(). I also averaged out some missing German areas and removed the French South American/Pacific islands.

#populate up SpatialPolygonsDataFrame
#bind to the shapefiles
level3@data <- left_join(level3@data, popDF, by = "NUTS_ID")
level3@data <- left_join(level3@data, densDF, by = "NUTS_ID")

#add in missing German densities using their average
level3$Density[which(is.na(level3$Density))] <- EUdataDens$X2014[which(EUdataDens$unit %in% "DE8")]

#get rid of small islands
remove <- c("FRA10", "FRA20", "FRA30", "FRA40", "FRA50", "PT200", "PT300",
            "ES703", "ES704", "ES705", "ES706", "ES707", "ES708", "ES709")
level3 <- level3[-c(which(level3$NUTS_ID %in% remove)),]

To plot a colour gradient of total population for each shape, I used ggplot as it’s nice to work with and gives fairly aesthetic results. There are perhaps neater/ more itneractive ways to do this, but it wasn’t the key figure I wanted to produce in any case.

The map data is fortified then passed into geom_map():

library(ggplot2)
library(ggthemes)
library(maptools)
## Checking rgeos availability: TRUE
#fortify for plotting
EUmap <- fortify(level3, region="NUTS_ID")

#make plot
ggplot() +
    #map based off of fortified data frame
    geom_map(data = EUmap, map = EUmap,
             aes(x = long, y = lat, map_id = id),
             color = "black", size = 1, fill = NA) +
    geom_map(data = level3@data, map = EUmap,
             aes(fill = pop2014, map_id = NUTS_ID),
             color = NA) +
    theme_map() +
    labs(title="Population in Level 3 EU Regions") +
    #fill by population of polygon
    scale_fill_gradient(name = "Population",
                        low = "darkblue", high = "yellow") +
    theme(legend.position = "right")
## Warning: Ignoring unknown aesthetics: x, y

To plot a specific proportion of the population living in region x/y, I first summed the population of every shape then summed from the most dense until I reach z% of this. The dplyr package makes it nice and easy to pass columns back into the data as either the “high density” shapes containing z% or the “low density” shapes containnig the rest:

#make sorting table
sortingDF <- level3@data %>% select(NUTS_ID, pop2014, Density)
    totalpop <- sum(sortingDF$pop2014, na.rm = TRUE)
    #sort by density (i.e. find smallest area for max pop)
    sortingDF <- sortingDF %>% arrange(-Density) %>% 
            mutate(cumsum = cumsum(pop2014))

percentage <- 30

sortingDF <- sortingDF %>% 
              mutate(lowhigh = ifelse(cumsum > totalpop*(percentage /100),
                                "low density", "high density")) %>%
              select(NUTS_ID, lowhigh)

#bind high/low value to SPDF
level3@data <- left_join(level3@data, sortingDF, by = "NUTS_ID")

head(level3@data)
##   NUTS_ID STAT_LEVL_ SHAPE_AREA SHAPE_LEN pop2014 Density     lowhigh
## 1   AT111          3 0.08381656  1.474930   37578    54.1 low density
## 2   AT112          3 0.21511603  3.394260  153542   100.3 low density
## 3   AT211          3 0.23837154  3.519518  281065   143.1 low density
## 4   AT212          3 0.48747204  5.836238  125272    30.9 low density
## 5   AT213          3 0.39799135  4.264666  150710    45.2 low density
## 6   AT221          3 0.14611800  2.481093  420885   343.2 low density

This is then plotted as before:

#make plot
ggplot() +
    geom_map(data = EUmap, map = EUmap,
              aes(x = long, y = lat, map_id = id),
              color = "black", size = 1, fill = NA) +
    geom_map(data = level3@data, map = EUmap,
             aes(fill = lowhigh, map_id = NUTS_ID),
             color = NA) +
    theme_map() +
    labs(title= paste0("Map Showing ", percentage,
                       "% of the European Population in Yellow")) + 
    scale_fill_manual(values = c("yellow", "darkblue"), 
                      name="Population Density") +
    theme(legend.position = "right")
## Warning: Ignoring unknown aesthetics: x, y

Finally, I felt like making a grid at different percentages. Blogdown wasn’t enjoying my attempts to turn this into a function so it’s shown below as a for-loop that creates a different plot for each decile between 10% and 90% and then plots them in a 3x3 matrix:

library(gridExtra)
library(grid)

plot_grid <- function(){
for (percent in seq(10,90,10)){
  percentage <- percent
  
  sortingDF <- level3@data %>% select(NUTS_ID, pop2014, Density)
    totalpop <- sum(sortingDF$pop2014, na.rm = TRUE)
    #sort by density (i.e. find smallest area for max pop)
    sortingDF <- sortingDF %>% arrange(-Density) %>% 
                  mutate(cumsum = cumsum(pop2014))

  sortingDF <- sortingDF %>% 
                mutate(lowhigh = ifelse(cumsum > totalpop*(percentage /100),
                                "low density", "high density")) %>%
                select(NUTS_ID, lowhigh)
  
  level3@data <- left_join(level3@data[1:6], sortingDF, by = "NUTS_ID")

  p <- ggplot() +
    geom_map(data = EUmap, map = EUmap,
              aes(x = long, y = lat, map_id = id),
              color = "black", size = 1, fill = NA) +
    geom_map(data = level3@data, map = EUmap,
             aes(fill = lowhigh, map_id = NUTS_ID),
             color = NA) +
    theme_map() +
    labs(title= paste0(percentage,
                       "% of Population")) + 
    scale_fill_manual(values = c("yellow", "darkblue"), 
                      name="Population Density") +
    theme(legend.position = c(.65, .4))
  
  assign(paste0("plot", as.character(percentage)), p)
  }

  grid <- grid.arrange(plot10, plot20, plot30, plot40, plot50, plot60, plot70, plot80, plot90,
                        top  = textGrob("Percentages of the European Population in Yellow",
                                gp=gpar(fontsize=20,font=1)))
  return(grid)
}

plot <- plot_grid()