Revealing mismatched spatial aggregation of physical and social vitality in London

load packages

library(here)
library(tidyverse)
library(fs)
library(plyr)
library(sf)
library(tmap)
library(tmaptools)
library(janitor) 
library(spatstat) 
library(spdep)
library(ggplot2)
library(rgdal)
library(stringr)
library(patchwork)
here::here()
## [1] "E:/200_CASA/05_GIS_repo/London_vitality"

Data Prepare

  • Download the LSOA boundary
#download.file("https://data.london.gov.uk/download/statistical-gis-boundary-files-london/9ba8c833-6370-4b11-abdc-314aa020d5e0/statistical-gis-boundaries-london.zip",destfile="data/statistical-gis-boundaries-london.zip")
#listfiles<- list.files(path = here::here("data"), pattern = "*.zip",
#    recursive = TRUE, full.names = TRUE) %>% 
#    lapply(., unzip, exdir = here::here("data")) 
  • read the data and check
    • the building height data was divided into residence and none residence, we use rbind to merge the data into all building heights, the volume column is the building area for each building
## building Height 
p_buildH_Res <- here::here("data","London_Res.shp")
sf_buildH_Res <- st_read(p_buildH_Res) %>% 
    clean_names() %>% 
    st_transform(., 27700) 
## Reading layer `London_Res' from data source `E:\200_CASA\05_GIS_repo\London_vitality\data\London_Res.shp' using driver `ESRI Shapefile'
## Simple feature collection with 571640 features and 5 fields
## geometry type:  POLYGON
## dimension:      XY
## bbox:           xmin: 503841.6 ymin: 156708.3 xmax: 561154.1 ymax: 200865.3
## projected CRS:  OSGB 1936 / British National Grid
p_buildH_unRes <-  here::here("data","London_NonRes.shp")
sf_buildH_unRes <- st_read(p_buildH_unRes) %>% 
    clean_names() %>% 
    st_transform(., 27700) ## reprojection 
## Reading layer `London_NonRes' from data source `E:\200_CASA\05_GIS_repo\London_vitality\data\London_NonRes.shp' using driver `ESRI Shapefile'
## Simple feature collection with 74029 features and 5 fields
## geometry type:  MULTIPOLYGON
## dimension:      XY
## bbox:           xmin: 504027.9 ymin: 157529 xmax: 557655.9 ymax: 200024.7
## projected CRS:  OSGB 1936 / British National Grid
sf_buildH <- rbind(sf_buildH_Res, sf_buildH_unRes)

sf_buildH
## Simple feature collection with 645669 features and 5 fields
## geometry type:  GEOMETRY
## dimension:      XY
## bbox:           xmin: 503841.6 ymin: 156708.3 xmax: 561154.1 ymax: 200865.3
## projected CRS:  OSGB 1936 / British National Grid
## First 10 features:
##                                  id      mean       max      area     volume
## 1  0D4F7073C15027B3E050A00A568A259B  6.490812  7.670000  379.9188  2465.9813
## 2  0D4F7073C15127B3E050A00A568A259B  8.276826 12.010000  482.1397  3990.5865
## 3  0D4F7073C15327B3E050A00A568A259B  6.022876 10.100000  910.7202  5485.1546
## 4  0D4F7073C15627B3E050A00A568A259B  4.694023  5.900000  772.7072  3627.1055
## 5  0D4F7073C15A27B3E050A00A568A259B  4.037549  6.379999  617.6506  2493.7943
## 6  0D4F7073C15C27B3E050A00A568A259B  7.919131 11.320000  206.3168  1633.8499
## 7  0D4F7073C15D27B3E050A00A568A259B  5.306131  6.230000 3154.0480 16735.7908
## 8  0D4F7073C15E27B3E050A00A568A259B  4.668476  5.620001  269.9656  1260.3277
## 9  0D4F7073C15F27B3E050A00A568A259B  3.509804  4.520000  103.0385   361.6449
## 10 0D4F7073C16027B3E050A00A568A259B 10.240180 17.730001 1946.9758 19937.3825
##                          geometry
## 1  POLYGON ((527004.2 168860.9...
## 2  POLYGON ((527077.6 168883, ...
## 3  POLYGON ((527211.5 168601.7...
## 4  POLYGON ((527501.4 168377.4...
## 5  POLYGON ((527413.1 168823, ...
## 6  POLYGON ((526852.3 169020.4...
## 7  POLYGON ((526890.7 168966.3...
## 8  POLYGON ((527000.3 169026.6...
## 9  POLYGON ((527109.1 169211.1...
## 10 POLYGON ((527576.1 169204.5...
## lsoa
p_bodry <- here::here("data","LSOA_2011_London_gen_MHW.shp")

sf_LSOA <- st_read(p_bodry) %>% 
    clean_names() %>% 
    st_transform(., 27700) 
## Reading layer `LSOA_2011_London_gen_MHW' from data source `E:\200_CASA\05_GIS_repo\London_vitality\data\LSOA_2011_London_gen_MHW.shp' using driver `ESRI Shapefile'
## Simple feature collection with 4835 features and 14 fields
## geometry type:  MULTIPOLYGON
## dimension:      XY
## bbox:           xmin: 503574.2 ymin: 155850.8 xmax: 561956.7 ymax: 200933.6
## projected CRS:  OSGB 1936 / British National Grid
  • filter poi category of small catering business in all pois
## poi
p_poi <- here::here("data","poi2020_09.csv")

l_small_catering<-c(1020013,#Cafes, snack bars and tea rooms
                    1020018,#Fast food and takeaway outlets
                    1020019,#Fast food delivery services
                    1020020,#Fish and chip shops
                    1020034,#Pubs, bars and inns
                    1020043 #Restaurants
                    )

df_poi <- read_csv(p_poi) %>%
    clean_names() %>% 
    filter(pointx_class %in% l_small_catering)
## 
## -- Column specification --------------------------------------------------------
## cols(
##   .default = col_character(),
##   ref_no = col_double(),
##   pointx_class = col_double(),
##   feature_easting = col_double(),
##   feature_northing = col_double(),
##   pos_accuracy = col_double(),
##   uprn = col_double(),
##   topo_toid_version = col_double(),
##   usrn = col_double(),
##   usrn_mi = col_double(),
##   distance = col_double(),
##   telephone_number = col_logical(),
##   brand = col_logical()
## )
## i Use `spec()` for the full column specifications.
## Warning: 352048 parsing failures.
##  row              col           expected                    actual                                                          file
## 2117 telephone_number 1/0/T/F/TRUE/FALSE 02077191676               'E:/200_CASA/05_GIS_repo/London_vitality/data/poi2020_09.csv'
## 2117 brand            1/0/T/F/TRUE/FALSE Pret A Manger             'E:/200_CASA/05_GIS_repo/London_vitality/data/poi2020_09.csv'
## 2122 telephone_number 1/0/T/F/TRUE/FALSE 08456781810               'E:/200_CASA/05_GIS_repo/London_vitality/data/poi2020_09.csv'
## 2122 brand            1/0/T/F/TRUE/FALSE United Colors of Benetton 'E:/200_CASA/05_GIS_repo/London_vitality/data/poi2020_09.csv'
## 2123 telephone_number 1/0/T/F/TRUE/FALSE 01689831551               'E:/200_CASA/05_GIS_repo/London_vitality/data/poi2020_09.csv'
## .... ................ .................. ......................... .............................................................
## See problems(...) for more details.
sf_poi <- st_as_sf(df_poi,coords = c("feature_easting", "feature_northing"), 
                   crs = 27700)

tm_shape(sf_poi) + 
    tm_dots()

Calculate metrics

Built-environment intensity

to calculate the building area in each lsoa, I use st_join to join the two table and group by lsoa to sum the volume.

join <- st_join(sf_LSOA, sf_buildH, join = st_intersects)

vol <-  aggregate(list(ResVol = join$volume), 
                  by=list(lsoa11cd =join$lsoa11cd ), 
                  FUN=sum)
sf_res <- left_join(sf_LSOA,vol,by=c("lsoa11cd" = "lsoa11cd")) %>% 
    mutate(area=st_area(.) %>% as.vector()) %>% 
    mutate(BV = ResVol/area)

tm_shape(sf_res) + 
  tm_fill("BV", style = "fisher", palette = "Reds") +
  tm_borders(alpha = 0.1) +
  tm_layout(main.title = "Built-environment vitality", main.title.size = 0.7 ,
            legend.position = c("right", "bottom"), legend.title.size = 0.8)+
    tm_scale_bar(position = c("left", "bottom")) + 
    tm_compass(type = "4star", size = 2, position = c("right", "top"))

Social vitality

join_poi <- st_join(sf_LSOA,sf_poi,join = st_intersects)

poi_count <- count(join_poi$lsoa11cd)

sf_result <- left_join(sf_res,poi_count,by=c("lsoa11cd" = "x")) %>% 
    mutate(SV = freq/area)

tm_shape(sf_result) + 
  tm_fill("SV", style = "fisher", palette = "PuBu") +
  tm_borders(alpha = 0.1) +
  tm_layout(main.title = "Social vitality", main.title.size = 0.7 ,
            legend.position = c("right", "bottom"), legend.title.size = 0.8)+
    tm_scale_bar(position = c("left", "bottom")) + 
    tm_compass(type = "4star", size = 2, position = c("right", "top"))

Distribution and correlation

sf_result
## Simple feature collection with 4835 features and 19 fields
## geometry type:  MULTIPOLYGON
## dimension:      XY
## bbox:           xmin: 503574.2 ymin: 155850.8 xmax: 561956.7 ymax: 200933.6
## projected CRS:  OSGB 1936 / British National Grid
## First 10 features:
##     lsoa11cd                  lsoa11nm  msoa11cd                 msoa11nm
## 1  E01000001       City of London 001A E02000001       City of London 001
## 2  E01000002       City of London 001B E02000001       City of London 001
## 3  E01000003       City of London 001C E02000001       City of London 001
## 4  E01000005       City of London 001E E02000001       City of London 001
## 5  E01000006 Barking and Dagenham 016A E02000017 Barking and Dagenham 016
## 6  E01000007 Barking and Dagenham 015A E02000016 Barking and Dagenham 015
## 7  E01000008 Barking and Dagenham 015B E02000016 Barking and Dagenham 015
## 8  E01000009 Barking and Dagenham 016B E02000017 Barking and Dagenham 016
## 9  E01000010 Barking and Dagenham 015C E02000016 Barking and Dagenham 015
## 10 E01000011 Barking and Dagenham 016C E02000017 Barking and Dagenham 016
##      lad11cd              lad11nm   rgn11cd rgn11nm usualres hholdres comestres
## 1  E09000001       City of London E12000007  London     1465     1465         0
## 2  E09000001       City of London E12000007  London     1436     1436         0
## 3  E09000001       City of London E12000007  London     1346     1250        96
## 4  E09000001       City of London E12000007  London      985      985         0
## 5  E09000002 Barking and Dagenham E12000007  London     1703     1699         4
## 6  E09000002 Barking and Dagenham E12000007  London     1391     1391         0
## 7  E09000002 Barking and Dagenham E12000007  London     1544     1544         0
## 8  E09000002 Barking and Dagenham E12000007  London     1773     1773         0
## 9  E09000002 Barking and Dagenham E12000007  London     2840     2840         0
## 10 E09000002 Barking and Dagenham E12000007  London     1634     1628         6
##    popden hholds avhholdsz    ResVol      area        BV freq
## 1   112.9    876       1.7 3385112.9 133320.77 25.390739   25
## 2    62.9    830       1.7 3926553.3 226191.27 17.359438   40
## 3   227.7    817       1.5 2240161.2  57302.97 39.093285    4
## 4    52.0    467       2.1 3264903.1 190738.76 17.117145   90
## 5   116.2    543       3.1  171882.4 144195.85  1.192006    1
## 6    69.6    612       2.3  406584.6 198134.81  2.052060   11
## 7    79.1    521       3.0  346620.8 193425.10  1.792016    1
## 8   138.6    638       2.8  250804.0 128591.53  1.950393   21
## 9    80.7   1103       2.6  918986.9 348848.34  2.634345   34
## 10  178.2    457       3.6  152837.7  90297.68  1.692598    3
##                          geometry           SV
## 1  MULTIPOLYGON (((532105.1 18... 1.875177e-04
## 2  MULTIPOLYGON (((532746.8 18... 1.768415e-04
## 3  MULTIPOLYGON (((532135.1 18... 6.980441e-05
## 4  MULTIPOLYGON (((533807.9 18... 4.718496e-04
## 5  MULTIPOLYGON (((545122 1843... 6.935013e-06
## 6  MULTIPOLYGON (((544180.3 18... 5.551776e-05
## 7  MULTIPOLYGON (((543741 1845... 5.169960e-06
## 8  MULTIPOLYGON (((544499.8 18... 1.633078e-04
## 9  MULTIPOLYGON (((544174 1843... 9.746356e-05
## 10 MULTIPOLYGON (((544523.4 18... 3.322345e-05
p1 <- ggplot(sf_result, aes(x=BV, fill=sex)) +
  geom_histogram(fill="white", color="black",bins = 40)+
  geom_vline(aes(xintercept=mean(BV)), color="blue",linetype="dashed")+
  xlim(c(0, 15))+
  labs(x="Physical built-environment vitality", y = "Count")+
  theme(text=element_text(family="serif"))

p2 <- ggplot(sf_result, aes(x=SV, fill=sex)) +
  geom_histogram(fill="white", color="black",bins = 40)+
  geom_vline(aes(xintercept=mean(SV)), color="blue",linetype="dashed")+
  xlim(c(0, 0.0003))+
  labs(x="Social vitality", y = "Count")+
  theme(text=element_text(family="serif"))

p1+p2

png(
    filename = "Hist.png",
    type = "cairo",
    res = 300,
    width = 2500, height = 1200,
    bg = "transparent"
)
p1+p2
## Warning: Removed 20 rows containing non-finite values (stat_bin).
## Warning: Removed 2 rows containing missing values (geom_bar).
## Warning: Removed 56 rows containing non-finite values (stat_bin).
## Warning: Removed 2 rows containing missing values (geom_bar).
dev.off()
## png 
##   2

Spatial Autocorrelation

# bivariate LISA
## reference: 
## https://stackoverflow.com/questions/45177590/map-of-bivariate-spatial-correlation-in-r-bivariate-lisa
## https://gist.github.com/rafapereirabr/5348193abf779625f5e8c5090776a228

#======================================================
# Programming some functions

# Bivariate Moran's I
moran_I <- function(x, y = NULL, W){
        if(is.null(y)) y = x

        xp <- (x - mean(x, na.rm=T))/sd(x, na.rm=T)
        yp <- (y - mean(y, na.rm=T))/sd(y, na.rm=T)
        W[which(is.na(W))] <- 0
        n <- nrow(W)

        global <- (xp%*%W%*%yp)/(n - 1)
        local  <- (xp*W%*%yp)

        list(global = global, local  = as.numeric(local))
}


# Permutations for the Bivariate Moran's I
simula_moran <- function(x, y = NULL, W, nsims = 1000){

        if(is.null(y)) y = x

        n   = nrow(W)
        IDs = 1:n

        xp <- (x - mean(x, na.rm=T))/sd(x, na.rm=T)
        W[which(is.na(W))] <- 0

        global_sims = NULL
        local_sims  = matrix(NA, nrow = n, ncol=nsims)

        ID_sample = sample(IDs, size = n*nsims, replace = T)

        y_s = y[ID_sample]
        y_s = matrix(y_s, nrow = n, ncol = nsims)
        y_s <- (y_s - apply(y_s, 1, mean))/apply(y_s, 1, sd)

        global_sims  <- as.numeric( (xp%*%W%*%y_s)/(n - 1) )
        local_sims  <- (xp*W%*%y_s)

        list(global_sims = global_sims,
             local_sims  = local_sims)
}
#======================================================
# Variables to use in the correlation: white and black population in each census track
x <- sf_result$BV
y <- sf_result$SV

#======================================================
# Adjacency Matrix (Queen)
## reference : https://gis.stackexchange.com/questions/355300/poly2nb-spdep-not-identifying-neighbours-correctly
overlapmat <- st_overlaps(sf_result,sparse=FALSE)
lw <- mat2listw(overlapmat)
W  <- as(lw, "symmetricMatrix")
W  <- as.matrix(W) #[1:nrow(W),1:ncol(W),drop=FALSE]
W  <- W/rowSums(W) ## rowSum auto compile 
W[which(is.na(W))] <- 0

plot(sf_result$geometry)
plot(lw, st_coordinates(st_centroid(sf_result)),col="red",add=TRUE)

#======================================================
# Calculating the index and its simulated distribution for global and local values

m  <- moran_I(x, y, W)
m[[1]] # global value
##           [,1]
## [1,] 0.4723904
m_i <- m[[2]]  # local values

local_sims <- simula_moran(x, y, W)$local_sims

# Identifying the significant values 
alpha <- .05  # for a 95% confidence interval
probs <- c(alpha/2, 1-alpha/2)
intervals <- t( apply(local_sims, 1, function(x) quantile(x, probs=probs)))
sig        <- ( m_i < intervals[,1] )  | ( m_i > intervals[,2] )

#======================================================
# Preparing for plotting
sf_result     <- st_as_sf(sf_result)
sf_result$sig <- sig

# Identifying the LISA patterns
xp <- (x-mean(x))/sd(x)
yp <- (y-mean(y))/sd(y)

patterns <- as.character( interaction(xp > 0, W%*%yp > 0) ) 
patterns <- patterns %>% 
        str_replace_all("TRUE","High") %>% 
        str_replace_all("FALSE","Low")
patterns[sf_result$sig==0] <- "Not significant"
sf_result$patterns <- patterns

# Rename LISA clusters
sf_result$patterns2 <- factor(sf_result$patterns, levels=c("High.High", "High.Low", "Low.High", "Low.Low", "Not significant"), labels=c("High BV - High SV", "High BV - Low SV", "Low BV - High SV","Low BV - Low SV", "Not significant"))

# Plotting
ggplot() + 
    geom_sf(data=sf_result, aes(fill=patterns2), color="NA") +
    scale_fill_manual(values = c("orange", "red", "green", "light blue", "grey95")) + 
    guides(fill = guide_legend(title="LISA clusters")) +
    theme_minimal()

## st_write(sf_result, here::here("plots","res.shp"))