London Migration

Results

Internal Migration

Flow of internal migration in and out of London, 2019 to 2024 combined
In-flow, out-flow and net internal migration flow, London, 2019 to 2024 combined
International Migration - UK

Immigration, Emigration and Net Migration, 2014 to 2024
International Migration - London foreign births

Cumulative arriving population of foreign born resident living in London in 2021, arriving before 1951 to 2021
Country, subregion and continent of origin of foreign born London resident, 2021
R Code

# Summary -----------------------------------------------------------------

# To build a visual overview of migration for London. 
# To produce 5 key visuals from ONS internal migration movement and census 2021 foreign born usual residents. 

# (1) Census 2021 - Foreign born place of birth (sankey: region -> country -> England -> London
# (2) Census 2021 - Foreign born by year of arrival (stacked area)
# (3) International migration - Net international migration over time (divergent bar + net overlay)
# (4) Internal migration - Movement in/out London by external local authority & region (Chord chart)
# (5) Internal migration - Net internal migration over time (divergent bar + net overlay)

# Setup -------------------------------------------------------------------

library(openxlsx)

library(formattable)
library(dplyr)
library(reshape2)
library(tidyr)
library(stringr)

library(tinytex)
library(ggtext)
library(ggplot2)
library(ggsankey)
library(circlize)

# Files -------------------------------------------------------------------

# Data 

# data.internal.migration.2024 <- read.xlsx("detailedestimates2024on2023las.xlsx", sheet = "IM2024 on 2023 LAs")
# 
# data.internal.migration.2023 <- read.xlsx("detailedestimates2023on2023las.xlsx", sheet = "IM2023 on 2023 LAs")
# 
# data.internal.migration.2022 <- read.xlsx("detailedestimates2022on2023las.xlsx", sheet = "IM2022 on 2023 LAs")
# 
# data.internal.migration.2021 <- read.xlsx("detailedestimates2021on2021and2023las.xlsx", sheet = "2021 on 2023 LAs")
# 
# data.internal.migration.2020 <- read.xlsx("detailedestimates2020on2021and2023las.xlsx", sheet = "2020 on 2023 LAs")
# 
# data.internal.migration.2019 <- read.xlsx("detailedestimates2019on2021and2023las.xlsx", sheet = "2019 on 2023 LAs")

data.international.migration.2024 <- read.xlsx("ltimnov25.xlsx", 
                        sheet = "1",
                        startRow = 6)


data.census <- read.csv("census_2021_london_birthplace_arrivalyear.csv")


table.internal.migration <-  read.csv("table.internal.migration.csv")

# Lookups

lookup.la2023toregion <- read.xlsx("lasregionew2023lookup.xlsx", startRow = 4)


# Clean -------------------------------------------------------------------

colnames(data.international.migration.2024) <- c("flow", "year", "all.nationalities","british","eu", "non.eu")

colnames(data.census) <- c("region.code","region","birth.code","birth","arrival.year.code","arrival.year","population")

# data.internal.migration.2021 <- 
#   data.internal.migration.2021 %>%
#   rename(year = Year)
# 
# data.internal.migration.2020 <- 
#   data.internal.migration.2020 %>%
#   rename(year = Year)
# 
# data.internal.migration.2019 <- 
#   data.internal.migration.2019 %>%
#   rename(year = Year)


# tests --------------------------------------------------------------------




# Internal Migration (Chord Chart) ------------------------------------------------------

# Group by sex, calculate totals across all ages, join regions

# table.internal.migration <-
#   bind_rows( # combine internal migration files
#     data.internal.migration.2019,
#     data.internal.migration.2020,
#     data.internal.migration.2021,
#     data.internal.migration.2022,
#     data.internal.migration.2023,
#     data.internal.migration.2024
#   ) %>%
#   mutate( # add total column based on all age rows
#     age.total = rowSums(pick(contains("age")), na.rm = TRUE)
#   ) %>%
#   select(
#     outla,
#     inla,
#     sex,
#     year,
#     age.total
#   ) %>%
#   group_by(outla, inla, year) %>% # group columns to aggregate sex
#   summarise(age.total = sum(age.total), .groups = "drop") %>%
#   left_join( # join region lookup to inward LA and to outward LA
#     lookup.la2023toregion,
#     by = c("outla" = "LA.code")
#   ) %>%
#   left_join(
#     lookup.la2023toregion,
#     by = c("inla" = "LA.code")
#   ) %>%
#   rename(
#     outla.name = LA.name.x,
#     outregion.name = Region.name.x,
#     outregion.code = Region.code.x,
#     inla.name = LA.name.y,
#     inregion.name = Region.name.y,
#     inregion.code = Region.code.y
#   )

# Export for reduced file size on import

# write.csv(table.internal.migration, "table.internal.migration.csv")


# Chord Diagram

table.internal.migration2 <- 
  table.internal.migration %>%
  select(inregion.name, outregion.name, age.total)%>%
  group_by(inregion.name, outregion.name)%>%
  summarise(age.total = sum(age.total), .groups = "drop")%>%
  filter(
    !is.na(outregion.name),
    !is.na(inregion.name),
    outregion.name != inregion.name
  )%>%
  rename(
    from = outregion.name,
    to = inregion.name,
    value = age.total
  ) %>%
  mutate(
    value = round(value/1000,0)
  )



chordDiagramFromDataFrame(table.internal.migration2,
                          direction.type = "diffHeight"
                          )



# Identify which links involve London
is_london <- table.internal.migration2$from == "London" |
  table.internal.migration2$to   == "London"

# Set link colors: grey for normal, red for London links
link_colors <- ifelse(is_london, "sienna2", "grey80")

# Set link widths: thicker for London
link_widths <- ifelse(is_london, 2, 0.5)

# Draw chord diagram with highlighted links
  chordDiagramFromDataFrame(
  table.internal.migration2,
  directional = 1,
  direction.type = "diffHeight",
  col = link_colors,
  link.lwd = link_widths
)
 


# Internal Migration (Divergent Bar) --------------------------------------

# remove rows where migration is moving within London
table.internal.migration.divergentbar <- 
  table.internal.migration %>%
  filter( 
    outregion.name != inregion.name
  )

# Create two separate dataframes, filtered to London for inregion and outregion, respectively. Join dataframes by year and calculate 3rd net migration column.    
table.internal.migration.divergentbar.in <-
  table.internal.migration.divergentbar%>% 
  filter(
    inregion.name == "London" #arrivals into London only
  )%>%
  select(
    year,
    age.total
  )%>%
  group_by(year)%>%
  summarise(age.total = sum(age.total))

table.internal.migration.divergentbar.out <-
  table.internal.migration.divergentbar%>% 
  filter(
    outregion.name == "London" #departures from London only
  )%>%
  select(
    year,
    age.total
  )%>%
  group_by(year)%>%
  summarise(age.total = sum(age.total))

table.internal.migration.divergentbar.net <-
  left_join(
    table.internal.migration.divergentbar.in,
    table.internal.migration.divergentbar.out,
    by = "year"
  ) %>%
  mutate(
    net = age.total.x - age.total.y,
    age.total.y = age.total.y * -1
  )%>%
  rename(
   "Arriving" = "age.total.x",
   "Leaving" = "age.total.y",
   "Net migration" = "net",
   "Year" = "year"
  ) %>%
  melt(
    value.name = "Value",
    id.vars = "Year",
    variable.name = "Migration"
  ) %>%
  mutate(
    Value = round(Value / 1000,0)
  )

# Chart
  
chart.internal.migration.bar <- 
  ggplot(
  table.internal.migration.divergentbar.net,
  aes(x = Year, y = Value)
) +
  geom_bar(
    data = dplyr::filter(
      table.internal.migration.divergentbar.net,
      Migration %in% c("Arriving", "Leaving")
    ),
    alpha = 0.6,
    aes(fill = Migration),
    stat = "identity",
    position = "stack"
  ) +
  geom_line(
    data = dplyr::filter(
      table.internal.migration.divergentbar.net,
      Migration == "Net migration"
    ),
    aes(group = 1),
    colour = "grey40",
    linewidth = 1.2
  ) +
  geom_point(
    data = dplyr::filter(
      table.internal.migration.divergentbar.net,
      Migration == "Net migration"
      ),
    colour = "grey40",
    size = 4
  )  +
  # Labels on points
  geom_text(
    data = dplyr::filter(
      table.internal.migration.divergentbar.net,
      Migration == "Net migration"
      ),
    aes(label = Value),
    vjust = -2,
    colour = "black",
    size = 3
  )+ 
  scale_fill_manual(
    values = c(
      "Arriving"  = "steelblue",
      "Leaving" = "sienna2"
      )
  ) +
  theme_minimal() +
  labs(
    y = "Migrating population (thousands)"
  )



# International Migrants (Sankey) ----------------------------------------

# flow to show: country -> sub region -> region -> London
# group all arrivals from year of arrival
# separate out regions/sub region/country based on ":"
# calculate top countries within each region to aggregate those outside of top X by conditional column based on % of total. e.g IF country contrinutes < 2% of total migrants = Other

test <- 
  data.census %>%
  group_by(region, birth) %>%
  summarise(population = sum(population), .groups = "drop") %>%
  mutate(
    country = sub(".*: ", "", birth) # country or most granular place of birth available
  ) %>%
  separate(
    birth,
    into = c("continent", "subregion", "subsubregion"),
    sep = ": "
  ) %>%
  filter(
    subregion != "United Kingdom" #remove births from UK
  ) %>%
  mutate(
    total.population = sum(population),
    percent =  ((population/total.population) * 100) # to find contribution of country to foreign births
  ) %>%
  arrange(desc(population)) %>%
  mutate(cumulative.percent = cumsum(percent))


table.foreign.births <- 
  data.census %>%
  group_by(region, birth) %>%
  summarise(population = sum(population), .groups = "drop") %>%
  mutate(
    country = sub(".*: ", "", birth) # country or most granular place of birth available
    ) %>%
  separate(
    birth,
    into = c("continent", "subregion", "subsubregion"),
    sep = ": "
  ) %>%
  filter(
    subregion != "United Kingdom" #remove births from UK
  ) %>%
  mutate(
    total.population = sum(population),
    percent =  ((population/total.population) * 100) # to find contribution of country to foreign births
  ) %>%
  filter(
    percent >= 2.8
  ) %>%
  mutate(
    country = # if already summarised as an "other" country standardise all "other"
      if_else(
        str_detect(country,
                   regex("other", ignore_case = TRUE)),
        paste0("Other - ", subregion),
        country
      ),
    country.2 = if_else(percent < 1, "Other", country),
    subregion = if_else(subregion == "Other Europe", subsubregion, subregion) #fixes europe subregion to eu/restofeurope
    ) %>%
  group_by( # group to collapse all "other" countries within respective continent and subregion
    region,
    continent,
    subregion,
    country
  ) %>%
  summarise(population = sum(population), .groups = "drop")


sankey_data <- 
  table.foreign.births %>%
  make_long(
    country,
    subregion,
    continent,
    region,
    value = population
  ) %>%
  arrange(desc(value))

chart.international.migration.sankey <- 
  ggplot(sankey_data,
       aes(x = x,
           node = node,
           next_x = next_x,
           next_node = next_node,
           fill = factor(node),
           label = node,
           value = value
       )
) + 
  theme_minimal(
) + 
  geom_sankey(
    flow.alpha = 0.6,
    show.legend = F
) + 
  theme(
    axis.text.y = element_blank(),
    axis.ticks.y = element_blank(),
    axis.title.x = element_blank(),
    legend.position = "none"
    ) + 
  scale_x_discrete(
        labels = c(
          "country" = "Country",
          "subregion" = "Subregion",
          "continent" = "Continent",
          "region" = "London"
        )
      )+ 
  geom_sankey_label(
)
  


 # International Migrants (Stacked Area) ----------------------------------------

table.foreign.births.trend <-
  data.census %>%
  filter(
    arrival.year != "Born in the UK",
    arrival.year != "Does not apply"
  ) %>%
  group_by(arrival.year, arrival.year.code) %>%
  summarise(population =  sum(population), .groups = "drop") %>%
  mutate(
    year = sub(".*Arrived ", "", arrival.year) # country or most granular place of birth available
    ) %>%
  arrange(arrival.year.code) %>%
  mutate(
    arrival.year = factor(
      arrival.year,
      levels = arrival.year),
    cum.population = cumsum(population)
    )

chart.international.migration.stackedarea <- 
  ggplot(
  data = table.foreign.births.trend,
  aes(
    x = arrival.year.code,
    y = cum.population,
    group = 1
  )
) +
  geom_col(
    aes(
      fill = arrival.year
    )
  ) +
  theme_minimal() +
  theme(
    axis.ticks.y = element_blank(),
    axis.text.x  = element_blank(),
    axis.ticks.x = element_blank(),
    axis.title.x = element_blank()
    )+
  scale_y_continuous(labels = scales::label_comma())+
  labs(
    y = "Cumulative Population",
    fill = "Arrival Year"
  ) +
  scale_fill_viridis_d()

summary(table.foreign.births.trend)

# International Migrants (Divergent Bar) ----------------------------------------

#filter to YE June, unpivot flow 
table.international.migration <- 
  data.international.migration.2024 %>%
  filter(
    str_detect(year, "Jun")
  )%>%
  mutate(
    year = case_when(
      str_detect(year, "24") ~2024,
      str_detect(year, "23") ~2023,
      str_detect(year, "22") ~2022,
      str_detect(year, "21") ~2021,
      str_detect(year, "20") ~2020,
      str_detect(year, "19") ~2019,
      str_detect(year, "18") ~2018,
      str_detect(year, "17") ~2017,
      str_detect(year, "16") ~2016,
      str_detect(year, "15") ~2015,
      str_detect(year, "14") ~2014,
      TRUE ~ 0
    )
  )%>%
  filter(
    year != 0
  )%>%
  select(
    year,
    flow,
    all.nationalities
  )%>%
  mutate(
    all.nationalities = round(all.nationalities / 1000),
    all.nationalities = 
      case_when(
        str_detect(flow, "Emigration") ~ all.nationalities * -1, # where migration type is emigration, make negative, otherwise keep the same value
        TRUE ~ all.nationalities
      )
    )

# chart


chart.international.migration.bar <- 
  ggplot(
    table.international.migration,
    aes(x = year, y = all.nationalities)
  ) +
  geom_bar(
    data = dplyr::filter(
      table.international.migration,
      flow %in% c("Immigration", "Emigration")
    ),
    alpha = 0.6,
    aes(fill = flow),
    stat = "identity",
    position = "stack"
  ) +
  geom_line(
    data = dplyr::filter(
      table.international.migration,
      flow == "Net migration"
    ),
    aes(group = 1),
    colour = "grey40",
    linewidth = 1.2
  ) +
  geom_point(
    data = dplyr::filter(
      table.international.migration,
      flow == "Net migration"
    ),
    colour = "grey40",
    size = 4
  )  +
  # Labels on points
  geom_text(
    data = dplyr::filter(
      table.international.migration,
      flow == "Net migration"
    ),
    aes(label = all.nationalities),
    vjust = -2,
    colour = "black",
    size = 3
  )+ 
  scale_fill_manual(
    values = c(
      "Immigration"  = "steelblue",
      "Emigration" = "sienna2"
    )
  ) +
  theme_minimal() +
  labs(
    y = "Migrating population (thousands)",
    x = "Year (ending June)",
    fill = "Migration"
  )