Plotting urban clusters as Openstreetmap data

Many analyses of urban structure and movement aim to discern clusters. This code enables entire urban regions to be plotted from Openstreetmap data, as in my “Plotting Openstreetmap polygons and lines,” but here with clusters distinctly shaded and outlined, as illustrated in the example at the bottom.

Clusters are generated here directly from OSM data, and are obviously illustrative only. A selection of coordinates are randomly extracted from the OSM data, and the associated distance matrix submitted to a clustering algorithm. Actual implementations of this technique need then only replace the lines used to generate cluster memberships (membs).

The colour scheme is defined by filling a two-dimension grid with colours shaded from each of the four corners according to the following colourmat function. The resultant colourmat is then projected onto the space of the OSM data, and clusters are coloured according to the positions of their mid-points within the colourmat.

colour.mat

colour.mat <- function(n = c(20, 10), plot = FALSE) {
    # Top row is blue -> green
    tl <- c(0, 0, 255)  # top left RGB colour
    tr <- c(0, 255, 0)  # top right
    # the corner colours are then (linearly) graduated across the top row
    indx.tr <- seq(tl[1], tr[1], length.out = n[1])  # top red shade
    indx.tg <- seq(tl[2], tr[2], length.out = n[1])  # top green shade
    indx.tb <- seq(tl[3], tr[3], length.out = n[1])  # top blue shade
    # Bottom row is red -> yellow
    bl <- c(255, 0, 0)
    br <- c(255, 255, 0)
    indx.br <- seq(bl[1], br[1], length.out = n[1])
    indx.bg <- seq(bl[2], br[2], length.out = n[1])
    indx.bb <- seq(bl[3], br[3], length.out = n[1])

    # Then fill arrays with the vector indxs of RGB colours:
    indx.r <- indx.g <- indx.b <- array(NA, dim = n)
    tr.arr <- array(indx.tr, dim = n)
    br.arr <- array(indx.br, dim = n)
    tg.arr <- array(indx.tg, dim = n)
    bg.arr <- array(indx.bg, dim = n)
    tb.arr <- array(indx.tb, dim = n)
    bb.arr <- array(indx.bb, dim = n)
    p <- t(array((1:n[2])/n[2], dim = rev(n)))  # proportion of Top vs. Bottom colours

    indx.r <- p * tr.arr + (1 - p) * br.arr
    indx.g <- p * tg.arr + (1 - p) * bg.arr
    indx.b <- p * tb.arr + (1 - p) * bb.arr

    # Then fill the actual colourmat with RGB colours composed of the 3 indices:
    carr <- array(rgb(indx.r, indx.g, indx.b, maxColorValue = 255), dim = n)

    if (plot) {
        x11()
        par(mar = rep(0, 4))
        plot(NULL, NULL, xlim = c(0, n[1]), ylim = c(0, n[2]))
        for (i in 1:n[1]) {
            for (j in 1:n[2]) {
                rect(i - 1, j - 1, i, j, col = carr[i, j])
            }  # end for j
        }  # end for i
    }
    return(carr)
}  # end function colour.mat

add.polys

Same as in my “Plotting Openstreetmap polygons and lines.” Takes a SpatialPolygonsDataFrame (SPDF) object, and fills all polygons with the specified colour.

add.polys <- function(poly = poly, col = "gray40") {
    plotfun <- function(i) {
        xy <- slot(slot(i, "Polygons")[[1]], "coords")
        polypath(xy, border = NA, col = col)
    }
    junk <- lapply(slot(poly, "polygons"), plotfun)
}

poly.in.hull

Identifies those polygons within the SPDF poly that lie within the convex hull of the cluster given as clust.hull. Hulls are the boundaries of objects returned from the spatstat version of convexhull, constructed as chb <- cbind (ch$bdry [[1]]$x, ch$bdry [[1]]$y), where ch is the output of convexhull.

This is the most computationally intensive part of the whole routine. To increase speed, the method implemented here to access the central coordinates of each polygon makes for somewhat less neat code, but is computationally more efficient that the “direct” way via the following two lines:

xy <- coordinates(poly)
ids <- apply(xy, 1, function(z) pinpoly(clust.hull, z))

The efficiency gain makes this routine run about 25% faster, which makes a bit of difference given that this routine also represents around 65% of the total computational time. pinpoly requires spatialkernel, which is loaded in the main plotmap routine.

poly.in.hull <- function(poly = poly, clust.hull = clust.hull) {
    getids <- function(i) {
        xy <- slot(slot(i, "Polygons")[[1]], "labpt")
        pinpoly(clust.hull, xy)
    }
    lapply(slot(poly, "polygons"), getids)
}

plotmap

The main plotmap routine is largely taken from my “Plotting Openstreetmap polygons and lines,” and uses the OSM data as downloaded and processed in that routine. The plots are based on the previously stored SPDFs.

plotmap <- function(nc = 20, filedump = TRUE) {
    require(spatstat)  # for convex hulls
    require(sp)
    require(spatialkernel)  # for pinpoly, which checks if a point is in a polygon

    load("polyBU")  # buildings
    load("polyW")  # water
    load("polyN")  # natural features
    load("polyG")  # grass surfaces
    load("polyP")  # parks
    # Select some points from polyBU to form random clusters:
    npts <- nc * 50
    xy <- coordinates(polyBU)
    xlims <- range(xy[, 1])
    ylims <- range(xy[, 2])
    indx <- floor(runif(npts) * dim(xy)[1])
    xy <- unique(xy[indx, ])
    # xy are then the points used to generate clusters, with `unique` removing
    # any repeats
    dmat <- dist(xy)
    hc <- hclust(dmat, method = "complete")
    membs <- cutree(hc, nc)

    polys <- list()
    polys[[1]] <- polyBU
    polys[[2]] <- polyG
    polys[[3]] <- polyP
    rm(polyBU, polyG, polyP)  # w and n are plotted separately, and in different colours

    # Then get colours by mapping the xy positions onto a colourmat grid
    col.indx.x <- col.indx.y <- rep(NA, nc)
    cdims <- c(50, 20)
    cmat <- colour.mat(n = cdims)
    for (i in 1:nc) {
        indx <- which(membs == i)
        xi <- cdims[1] * (mean(xy[indx, 1]) - xlims[1])/diff(xlims)
        yi <- cdims[2] * (mean(xy[indx, 2]) - ylims[1])/diff(ylims)
        col.indx.x[i] <- floor(xi)
        col.indx.y[i] <- floor(yi)
    }  # end for i

    if (filedump) {
        fname <- "London_clusters.png"
        png.width <- 842  # 842 is A4 paper.
        xy.ratio <- diff(range(xy[, 1]))/diff(range(xy[, 2]))
        png(filename = fname, width = png.width, height = png.width/xy.ratio, 
            type = "cairo-png", bg = "white")
    } else {
        x11()
    }
    par(mar = rep(0, 4), mgp = rep(0, 3), ps = 14, font = 1)

    plot(NULL, NULL, xlim = xlims, ylim = ylims, bty = "l", xaxt = "n", yaxt = "n", 
        xlab = "", ylab = "", frame = FALSE)

    # First plot everything in neutral background colour prior to overplotting
    # in selected colours for each cluster.
    for (i in 1:3) {
        add.polys(polys[[i]], col = "gray80")
    }
    add.polys(polyW, col = "lightsteelblue3")
    add.polys(polyN, col = "lightsteelblue3")

    for (i in 1:nc) {
        indx <- which(membs == i)
        x <- xy[indx, 1]
        y <- xy[indx, 2]
        # Input to convexhull has to be a point pattern (ppp):
        xyi <- ppp(x, y, xrange = range(x), yrange = range(y))
        ch <- convexhull(xyi)

        coli <- cmat[col.indx.x[i], col.indx.y[i]]
        chb <- cbind(ch$bdry[[1]]$x, ch$bdry[[1]]$y)
        chb <- rbind(chb, chb[1, ])

        # Then the actual plotting of osm polygons backwards so grass & parks are
        # first
        for (j in length(polys):1) {
            junk <- poly.in.hull(polys[[j]], chb)
            indx <- which(junk == 2)  # ID for inside points from pinpoly
            if (length(indx) > 0) {
                add.polys(polys[[j]][indx, ], col = coli)
            }
        }
        # Plot actual convexhull as thick line:
        plot(ch, col = NA, border = coli, add = TRUE, lwd = 2)
    }
    if (filedump) 
        junk <- dev.off(which = dev.cur())
}
plotmap(nc = 20, filedump = FALSE)

plot of chunk theplot