Disclaimer: The contents of this document come from 13. Geomarketing of Geocomputation with R (Lovelace, Nowosad, & Muenchow, 2019). This document is prepared for CP6521 Advanced GIS, a graduate-level city planning elective course at Georgia Tech in Spring 2019. For any question, contact the instructor, Yongsung Lee, Ph.D. via yongsung.lee(at)gatech.edu.
This document is also published on RPubs.
library(tidyverse)
library(sf)
library(raster)
library(osmdata) # help download detailed geographic data from OpenStreetMap
library(spDataLarge)
library(tmap)
library(revgeo) # for reverse geocoding 
library(osmdata)
library(classInt)

13.1 Introduction

13.2 Case study: bike shops in Germany

Our scenario

Suggested Workflow

13.3 Tidy the input data

First, download, unzip, and read in a German Census csv file.

download.file("https://tinyurl.com/ybtpkwxz", 
              destfile = "census.zip", mode = "wb")
unzip("census.zip") # unzip the files
census_de = readr::read_csv2(list.files(pattern = "Gitter.csv"))
# https://stackoverflow.com/questions/22970091/difference-between-read-csv-and-read-csv2-in-r

An alternative is to use spDataLarge.

data("census_de", package = "spDataLarge")

census_de: a data frame containing 13 variables for more than 300,000 grid cells across Germany.

Data manipulation tasks

# pop = population, hh_size = household size
input = dplyr::select(census_de, x = x_mp_1km, y = y_mp_1km, pop = Einwohner,
                      women = Frauen_A, mean_age = Alter_D,
                      hh_size = HHGroesse_D)
# set -1 and -9 to NA
# https://dplyr.tidyverse.org/reference/mutate_all.html
input_tidy = mutate_all(input, list(~ifelse(. %in% c(-1, -9), NA, .)))
# input_tidy = mutate_all(input, ~ifelse(. %in% c(-1, -9), NA, .)) # this line does the same task 

Interestingly, input_tidy provides class codes, instead of actual counts or %, for each variable like the following. Now, we will recode them in the next steps.

13.4 Create census rasters

Step1. rasterFromXYZ()

input_ras = rasterFromXYZ(input_tidy, crs = st_crs(3035)$proj4string)
input_ras # check the min/max values for each layer 
## class       : RasterBrick 
## dimensions  : 868, 642, 557256, 4  (nrow, ncol, ncell, nlayers)
## resolution  : 1000, 1000  (x, y)
## extent      : 4031000, 4673000, 2684000, 3552000  (xmin, xmax, ymin, ymax)
## coord. ref. : +proj=laea +lat_0=52 +lon_0=10 +x_0=4321000 +y_0=3210000 +ellps=GRS80 +towgs84=0,0,0,0,0,0,0 +units=m +no_defs 
## data source : in memory
## names       : pop, women, mean_age, hh_size 
## min values  :   1,     1,        1,       1 
## max values  :   6,     5,        5,       5

epsg:3035

Step2. reclassify()

rcl_pop = matrix(c(1, 1, 127, 2, 2, 375, 3, 3, 1250, 
                   4, 4, 3000, 5, 5, 6000, 6, 6, 8000), 
                 ncol = 3, byrow = TRUE)
rcl_women = matrix(c(1, 1, 3, 2, 2, 2, 3, 3, 1, 4, 5, 0), 
                   ncol = 3, byrow = TRUE)
rcl_age = matrix(c(1, 1, 3, 2, 2, 0, 3, 5, 0),
                 ncol = 3, byrow = TRUE)
rcl_hh = rcl_women
rcl = list(rcl_pop, rcl_women, rcl_age, rcl_hh)
reclass = input_ras
for (i in seq_len(nlayers(reclass))) {
  reclass[[i]] = reclassify(x = reclass[[i]], rcl = rcl[[i]], right = NA)
}
names(reclass) = names(input_ras)

Check what it means by right = NA above: RDocumentation reclassify.

reclass # again, check the min/max values for each layer 
## class       : RasterBrick 
## dimensions  : 868, 642, 557256, 4  (nrow, ncol, ncell, nlayers)
## resolution  : 1000, 1000  (x, y)
## extent      : 4031000, 4673000, 2684000, 3552000  (xmin, xmax, ymin, ymax)
## coord. ref. : +proj=laea +lat_0=52 +lon_0=10 +x_0=4321000 +y_0=3210000 +ellps=GRS80 +towgs84=0,0,0,0,0,0,0 +units=m +no_defs 
## data source : in memory
## names       :  pop, women, mean_age, hh_size 
## min values  :  127,     0,        0,       0 
## max values  : 8000,     3,        3,       3

13.5 Define metropolitan areas

Here, we identify metropolitan areas. Note that the input raster layers have the cell size of 1 km by 1 km, which is too small/detailed to delineate metropolitan areas. Thus, we first aggregate the input rater layers by the factor of 20.

pop_agg = aggregate(reclass$pop, fact = 20, fun = sum)

Next, keep only those cells with more than half a million people (now, the cell size is 20 km by 20 km).

pop_agg = pop_agg[pop_agg > 500000, drop = FALSE] # try what happens with drop = TRUE
polys = pop_agg %>% 
  clump() %>%
  rasterToPolygons() %>%
  st_as_sf()

Note that polys is an sf object (polygon) with 21 features. For now, neighboring polygons are separate entities. We will spatially dissolve them by clumps (i.e., a temporarily created categorical variable to denote individual metropolitan areas) via tidyverse functions: group_by() and summarize(). When no variable name is specified in summarize(), it geometrically dissolves an input data.

metros = polys %>%
  group_by(clumps) %>%
  summarize()

Still, the metroplitan names are missing in metros. To add the names, we reverse-geocode, which is a task similar to geocoding by in the opposite direction (input: coordinates -> output: addresses).

metros_wgs = st_transform(metros, 4326)
coords = st_centroid(metros_wgs) %>%
  st_coordinates() %>%
  round(4)

Now, let’s reverse geocode with given coordinates.

metro_names = revgeo(longitude = coords[, 1], latitude = coords[, 2], 
                     output = "frame") # this returns a data frame

Alternative way to extract the metro names are:

# attach metro_names from spDataLarge
data("metro_names", package = "spDataLarge")

One minor manual edit: Wülfrath to Duesseldorf (no Umlaut).

metro_names = dplyr::pull(metro_names, city) %>% 
  as.character() %>% 
  ifelse(. == "Wülfrath", "Duesseldorf", .)

13.6 Points of interest

shops = map(
  metro_names, 
  function(x) {
    message("Downloading shops of: ", x, "\n")
    
    # give the server a bit time
    Sys.sleep(sample(seq(5, 10, 0.1), 1))
    
    # specify a query with a bounding box and a key   
    query = opq(x) %>%
      add_osm_feature(key = "shop")
    
    # Return an OSM query in sf format 
    points = osmdata_sf(query)
    
    # request the same data again if nothing has been downloaded 
    iter = 2
    while (nrow(points$osm_points) == 0 & iter > 0) {
      points = osmdata_sf(query)
      iter = iter - 1
    }
    # 
    points = st_set_crs(points$osm_points, 4326)
  }
)
# checking if we have downloaded shops for each metropolitan area
ind = map(shops, nrow) == 0
if (any(ind)) {
  message("There are/is still (a) metropolitan area/s without any features:\n",
          paste(metro_names[ind], collapse = ", "), "\nPlease fix it!")
}
# select only specific columns
shops = map(shops, dplyr::select, osm_id, shop)
# putting all list elements into a single data frame
shops = do.call(rbind, shops)
data("shops", package = "spDataLarge")
# to match crs of shops and reclass
shops = st_transform(shops, proj4string(reclass)) 
# create poi raster
poi = rasterize(x = shops, y = reclass, field = "osm_id", fun = "count")
# construct reclassification matrix
int = classInt::classIntervals(values(poi), n = 4, style = "fisher")
int = round(int$brks)
rcl_poi = matrix(c(int[1], rep(int[-c(1, length(int))], each = 2), 
                   int[length(int)] + 1), ncol = 2, byrow = TRUE)
rcl_poi = cbind(rcl_poi, 0:3)  
# reclassify
poi = reclassify(poi, rcl = rcl_poi, right = NA) 
names(poi) = "poi"

13.7 Identifying suitable locations

Now, before identifying the best locations for bike shops, add the poi raster layer to the reclass raster stack object, and remove the pop layer from it.

# add poi raster
reclass = addLayer(reclass, poi)
# delete population raster
reclass = dropLayer(reclass, "pop")
# calculate the total score
result = sum(reclass)
# create a boolena raster, cropped by the Berlin metropolitan area 
good_for_bikeshops <- crop(result>9, metros[2, ]) 
tmap_mode("view")
tm_basemap("OpenStreetMap.Mapnik") +
  tm_shape(metros[2, ]) + # Berlin
  tm_borders(lwd = 0) +   # zero line width for setting up the mapping area 
  tm_shape(good_for_bikeshops) +
  tm_raster(alpha = 0.5)