1 Set up

Install necessary packages

install.packages("sf", dependencies = TRUE)
install.packages("terra", dependencies = TRUE)
install.packages("tmap")
install.packages("tidyverse", dependencies = TRUE)

2 Dasymetric mapping

2.1 Population

Import polygons of census districts and population tabulation in 2020

library(sf)
library(tidyverse)

sgshp <- st_read("../data/master-plan-2019-subzone-boundary-singapore.geojson", quiet = TRUE)
sgpop <- read_csv("../data/census-data-singapore-2020.csv")

Amend character strings of SUBZONE_N in sgpop to match values in sgshp

sgpop$SUBZONE_N <- toupper(sgpop$SUBZONE_N) # change to all caps

Join sgpop to the sgshp object

sgshp_pop <- left_join(sgshp, sgpop, by = c("SUBZONE_N"))
head(sgshp_pop)
## Simple feature collection with 6 features and 2 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: 103.6537 ymin: 1.216215 xmax: 103.8988 ymax: 1.297462
## Geodetic CRS:  WGS 84
##                 SUBZONE_N pop_count                       geometry
## 1             MARINA EAST         0 MULTIPOLYGON (((103.8803 1....
## 2        INSTITUTION HILL      3140 MULTIPOLYGON (((103.8375 1....
## 3          ROBERTSON QUAY      2990 MULTIPOLYGON (((103.8341 1....
## 4 JURONG ISLAND AND BUKOM         0 MULTIPOLYGON (((103.7124 1....
## 5            FORT CANNING       180 MULTIPOLYGON (((103.8471 1....
## 6        MARINA EAST (MP)         0 MULTIPOLYGON (((103.8987 1....

Plot the column ‘pop_count’ in sgshp

library(tmap)
tmap_mode("plot")

sgshp_pop %>% 
  mutate(pop_count = ifelse(pop_count == 0, NA, pop_count)) %>% 
  tm_shape() +
  tm_polygons("pop_count", palette = "Reds", style = "jenks",
              colorNA = "transparent", showNA = FALSE) +
  tm_layout(frame = FALSE)
Population count per Subzone in Singapore in 2020

Population count per Subzone in Singapore in 2020


2.2 Land use

Load Singapore’s land use from the 2019 Master Plan

sg_landuse <- read_sf("../data/master-plan-2019-landuse-singapore.geojson")
head(sg_landuse)
## Simple feature collection with 6 features and 1 field
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: 103.7339 ymin: 1.338392 xmax: 103.869 ymax: 1.410862
## Geodetic CRS:  WGS 84
## # A tibble: 6 × 2
##   LU_DESC                                                               geometry
##   <chr>                                                       <MULTIPOLYGON [°]>
## 1 OPEN SPACE       (((103.8166 1.338891, 103.8166 1.33894, 103.8166 1.338979, 1…
## 2 ROAD             (((103.8353 1.341708, 103.8352 1.341597, 103.8349 1.341675, …
## 3 PLACE OF WORSHIP (((103.735 1.376136, 103.7345 1.375653, 103.7343 1.37584, 10…
## 4 ROAD             (((103.8667 1.405202, 103.8669 1.404992, 103.8669 1.404936, …
## 5 COMMERCIAL       (((103.8685 1.409949, 103.8686 1.409948, 103.8686 1.409955, …
## 6 BUSINESS 2       (((103.8653 1.406314, 103.8653 1.40634, 103.8652 1.406366, 1…

Tally the number of rows for each type of land use

landuse_tally <- sg_landuse %>% 
  st_set_geometry(NULL) %>% # remove geometry column for faster processing
  group_by(LU_DESC) %>% # column name for land use
  summarize(n = n()) %>% # count no. of rows
  arrange(-n) # descending order
landuse_tally
## # A tibble: 33 × 2
##    LU_DESC                                       n
##    <chr>                                     <int>
##  1 RESIDENTIAL                               79121
##  2 ROAD                                       6623
##  3 COMMERCIAL                                 5983
##  4 BUSINESS 2                                 5976
##  5 RESIDENTIAL WITH COMMERCIAL AT 1ST STOREY  2821
##  6 BUSINESS 1                                 1505
##  7 PARK                                       1425
##  8 UTILITY                                    1067
##  9 WATERBODY                                  1015
## 10 RESIDENTIAL / INSTITUTION                   960
## # ℹ 23 more rows

Subset sg_landuse to only include residential land use zones

sg_landuse <- sg_landuse %>% 
  filter(str_detect(LU_DESC, "RESIDENTIAL"))

Visualise the types of residential land use

tmap_mode("plot")
tm_shape(sgshp) + tm_borders(col = "grey90") +
  tm_shape(sg_landuse) + tm_polygons("LU_DESC", border.col = "transparent") +
  tm_layout(frame = FALSE, legend.width = 10)
Residential land use zones in Singapore from the 2019 Master Plan

Residential land use zones in Singapore from the 2019 Master Plan


2.3 Rasterize

Set the pixel resolution (in meters)

pixel_res <- 10

Transform all layers to a projected CRS

sgshp_pop <- sgshp_pop %>% sf::st_transform(sf::st_crs(32648))
sg_landuse <- sg_landuse %>% sf::st_transform(sf::st_crs(32648))

Create a raster template at the specified resolution and CRS

library(terra)
rast_template <- rast(ext(sgshp_pop), 
                      resolution = pixel_res,
                      crs = st_crs(sgshp_pop)$wkt) # input format is WKT

For each census district, calculate the population per pixel (population density)

sgshp_pop <- sgshp_pop %>%
  mutate(area = st_area(sgshp_pop)) %>% # first create a new column 'area'
  mutate(pop_perpixel = pop_count/area * pixel_res^2) # per 10 x 10 pixel

Create new rasters of ‘population count’ and ‘population density’

rast_popcount <- rasterize(vect(sgshp_pop), rast_template,
                           field = "pop_count")
rast_popcount[rast_popcount == 0] <- NA  # convert 0s to NAs

rast_popdens <- rasterize(vect(sgshp_pop), rast_template,
                          field = "pop_perpixel")
rast_popdens[rast_popdens == 0] <- NA

For each land use polygon, assign a value representing (relative) ‘population intensity’

sg_landuse <- sg_landuse %>%
  mutate(intensity = case_when( # create new column 'intensity'
    LU_DESC == "RESIDENTIAL" ~ 1,
    LU_DESC == "RESIDENTIAL WITH COMMERCIAL AT 1ST STOREY" ~ 1,
    LU_DESC == "RESIDENTIAL / INSTITUTION" ~ 0.5,
    LU_DESC == "COMMERCIAL & RESIDENTIAL" ~ 0.8))

Convert to raster representing ‘population intensity’

rast_landuse <- rasterize(vect(sg_landuse), rast_template,
                          field = "intensity")
rast_landuse[rast_landuse == 0] <- NA  

Mask away areas within rast_landuse in census districts with no population

rast_landuse <- mask(rast_landuse, rast_popcount)

2.4 Map

Multiply the ‘population density’ with ‘population intensity’

rast_popdens_intensity <- rast_popdens * rast_landuse
names(rast_popdens_intensity) <- "popintensity_sum" # rename raster - used as colname later

Calculate the zonal sum for this raster (sum per census district)

sgshp_pop <- 
  terra::extract(rast_popdens_intensity, vect(sgshp_pop),
                 fun = "sum", na.rm = TRUE, 
                 bind = TRUE) %>%  # bind results directly to sgshp_pop
  st_as_sf() # convert back to sf

Rasterize the zonal sums

rast_popdens_intensity_sum <- rasterize(vect(sgshp_pop), rast_template,
                                       field = "popintensity_sum")

Calculate the dasymetric map of population density

rast_popdasymap <- rast_popdens_intensity * rast_popcount / rast_popdens_intensity_sum
names(rast_popdasymap) <- "pop_perpixel_dasy" # rename raster

Check if the sum of raster values within the dasymetric map is close to the total population count in the original dataset

test <- 
  terra::extract(rast_popdasymap, vect(sgshp_pop),
                 fun = "sum", na.rm = TRUE, 
                 bind = TRUE) %>% 
  st_as_sf() 

sum(test$pop_perpixel_dasy, na.rm = TRUE)
## [1] 4048520
sum(test$pop_count, na.rm = TRUE)
## [1] 4051500

2.5 Polygonize

Combine raster cells with similar values into single polygons

sg_landuse_pop <- 
  as.polygons(rast_popdasymap, 
              round = FALSE, # don't round off values before aggregation
              aggregate = TRUE) %>%  # combine cells with same values 
  st_as_sf() %>% 
  st_make_valid() %>% 
  st_cast("MULTIPOLYGON") %>% # Geometry type was previously 'GEOMETRY'
  st_cast("POLYGON") # split apart the multipolygons

For each polygon, calculate the population count based on its ‘population per pixel’ value (by accounting for polygon area and pixel resolution)

sg_landuse_pop <- sg_landuse_pop %>% 
  mutate(area = st_area(sg_landuse_pop)) %>% 
  mutate(pop_count = pop_perpixel_dasy * area / pixel_res^2)

# remove 'units' attribute from the 'pop_count' column (not in m2)
sg_landuse_pop$pop_count <- units::drop_units(sg_landuse_pop$pop_count)

2.6 Visualize

Subset each dataset to a selected region, in order to reduce load on computing resources

sgshp_subset <- sgshp_pop %>% 
  filter(SUBZONE_N %in% c("TAMPINES NORTH", "TAMPINES EAST", "TAMPINES WEST",
                          "PASIR RIS WEST", "PASIR RIS CENTRAL", "PASIR RIS DRIVE", "FLORA DRIVE", "CHANGI WEST",
                          "BEDOK RESERVOIR", "SIMEI", "XILIN"))
sg_landuse_pop_subset <- st_filter(sg_landuse_pop, sgshp_subset) 
rast_popdasymap_subset <- crop(rast_popdasymap, sgshp_subset)
rast_popdasymap_subset <- mask(rast_popdasymap_subset, sgshp_subset)

Visualize the subset region

tmap_mode("view")

tm <- tm_basemap("OpenStreetMap") +
  
  tm_shape(sgshp_subset %>% 
             mutate(pop_count = ifelse(pop_count == 0, NA, pop_count))) + # make transparent later
  tm_polygons("pop_count", 
              title = "Population count<br>(per census district)",
              group = "Census districts", 
              palette = "Reds", 
              alpha = 0.5,
              colorNA = "transparent", showNA = FALSE) +
  
  tm_shape(rast_popdasymap_subset) +
  tm_raster(title = "Population density<br>(per 10m pixel)",
            group = "Dasymetric map",
            palette = "viridis", alpha = 0.7,
            colorNA = "transparent", textNA = "None") +
  
  tm_shape(sg_landuse_pop_subset) +
  tm_polygons("pop_count", 
              title = "Population count<br>(per land use type)",
              group = "Dasymetric map (polygonized)", 
              n = 6,
              alpha = 0.7, border.col = "grey", border.alpha = 0.3,
              palette = "viridis", colorNA = "white", textNA = "None") +
  tm_view(set.view = 14) # set zoom level

# hide certain layers by default
tm %>% tmap_leaflet() %>%
  leaflet::hideGroup("Dasymetric map") 

3 Credits

Data sources used:


© XP Song
Disclaimer: These notes exclusively reserved for use with the Geospatial Data Science with R online course. Please do not distribute or reproduce these materials, they are intended solely for personal use.