Lecture on sf package

library(sf)
library(tidyverse)
library(tmap)
setwd("C:/Users/Bonwoo Koo/Dropbox/School/CP6025/Labs/Lab14")

1. Reading

Nothing new here..

# Reading sf data
fulton<- st_read("fulton.shp")

## Reading layer `fulton' from data source `C:\Users\Bonwoo Koo\Dropbox\School\CP6025\Labs\Lab14\fulton.shp' using driver `ESRI Shapefile'
## Simple feature collection with 195 features and 16 fields
## geometry type:  POLYGON
## dimension:      XY
## bbox:           xmin: 636450.8 ymin: 388416.6 xmax: 706367.7 ymax: 464175.8
## epsg (SRID):    NA
## proj4string:    +proj=tmerc +lat_0=30 +lon_0=-84.16666666666667 +k=0.9999 +x_0=700000 +y_0=0 +ellps=GRS80 +units=m +no_defs

dekalb <- st_read("dekalb.shp")

## Reading layer `dekalb' from data source `C:\Users\Bonwoo Koo\Dropbox\School\CP6025\Labs\Lab14\dekalb.shp' using driver `ESRI Shapefile'
## Simple feature collection with 141 features and 16 fields
## geometry type:  POLYGON
## dimension:      XY
## bbox:           xmin: 682974.1 ymin: 400768.3 xmax: 713243.6 ymax: 440285.4
## epsg (SRID):    NA
## proj4string:    +proj=tmerc +lat_0=30 +lon_0=-84.16666666666667 +k=0.9999 +x_0=700000 +y_0=0 +ellps=GRS80 +units=m +no_defs

cobb <- st_read("cobb.shp")

## Reading layer `cobb' from data source `C:\Users\Bonwoo Koo\Dropbox\School\CP6025\Labs\Lab14\cobb.shp' using driver `ESRI Shapefile'
## Simple feature collection with 119 features and 16 fields
## geometry type:  POLYGON
## dimension:      XY
## bbox:           xmin: 647038.8 ymin: 415132.4 xmax: 680757.4 ymax: 452668.4
## epsg (SRID):    NA
## proj4string:    +proj=tmerc +lat_0=30 +lon_0=-84.16666666666667 +k=0.9999 +x_0=700000 +y_0=0 +ellps=GRS80 +units=m +no_defs

clayton <- st_read("clayton.shp")

## Reading layer `clayton' from data source `C:\Users\Bonwoo Koo\Dropbox\School\CP6025\Labs\Lab14\clayton.shp' using driver `ESRI Shapefile'
## Simple feature collection with 46 features and 16 fields
## geometry type:  POLYGON
## dimension:      XY
## bbox:           xmin: 672864 ymin: 371709.7 xmax: 692836 ymax: 404520.8
## epsg (SRID):    NA
## proj4string:    +proj=tmerc +lat_0=30 +lon_0=-84.16666666666667 +k=0.9999 +x_0=700000 +y_0=0 +ellps=GRS80 +units=m +no_defs

cbd.buffer <- st_read("city_hall_5mile.shp")

## Reading layer `city_hall_5mile' from data source `C:\Users\Bonwoo Koo\Dropbox\School\CP6025\Labs\Lab14\city_hall_5mile.shp' using driver `ESRI Shapefile'
## Simple feature collection with 1 feature and 2 fields
## geometry type:  POLYGON
## dimension:      XY
## bbox:           xmin: -84.47435 ymin: 33.67687 xmax: -84.30165 ymax: 33.82113
## epsg (SRID):    4326
## proj4string:    +proj=longlat +datum=WGS84 +no_defs

marta <- st_read("MARTA_Routes.shp")

## Reading layer `MARTA_Routes' from data source `C:\Users\Bonwoo Koo\Dropbox\School\CP6025\Labs\Lab14\MARTA_Routes.shp' using driver `ESRI Shapefile'
## Simple feature collection with 3 features and 2 fields
## geometry type:  MULTILINESTRING
## dimension:      XY
## bbox:           xmin: 671895.2 ymin: 403525.2 xmax: 694364.9 ymax: 437538.4
## epsg (SRID):    NA
## proj4string:    +proj=tmerc +lat_0=30 +lon_0=-84.16666666666667 +k=0.9999 +x_0=700000 +y_0=0 +ellps=GRS80 +units=m +no_defs

2. Changing coordinate reference system (CRS)

It is important to unify the CRS of the shapefiles you want to work with. The EPSG (European Petroleum Survey Group - but I may be wrong) has a list of the standardized set of coordinate systems, datum transformations, projections, units, spheroids and all other related information and has them numbered. There a few CRS numbers that I have memorized, including (1) 4326 - WGS84, (2) 26967 - NAD83 / Georgia West, and (3) 7131 - NAD83(2011) / San Francisco CS13. These numbers really makes it simple to transform & unify the CRS of different shapefiles.

# Transforming the CRS so that they all have the same CRS with meter-unit.
cbd.buffer <- st_transform(cbd.buffer, crs = 26967)
marta <- st_transform(marta, crs = 26967)
fulton <- st_transform(fulton, crs = 26967)
cobb <- st_transform(cobb, crs = 26967)
dekalb <- st_transform(dekalb, crs = 26967)
clayton <- st_transform(clayton, crs = 26967)

3. Centroid & Coordinates

Converting polygons to centroids are very common operation in geospatial data wrangling. In R, this conversion is as simple as just typing st_centroid(). Sometimes you want to get the coordinates of your points, lines, or polygons. You can use st_coordinates() for that purpose.

If st_coordinates() is applied to points, the output is intuitive. If st_coordinates() is applied to polygon, however, it will give you the coordinates of ALL THE POINTS that together make the polygon.

# Converting the centroid of the cbd.buffer (which is a polygon)
cbd.centroid <- st_centroid(cbd.buffer)

## Warning in st_centroid.sf(cbd.buffer): st_centroid assumes attributes are
## constant over geometries of x

# Mapping the centroid + the buffer
tmap_mode('view')

## tmap mode set to interactive viewing

tm_shape(cbd.centroid) + tm_dots(col = "red") + # This is the CENTROID of the buffer polygon
  tm_shape(cbd.buffer) + tm_polygons(alpha = 0.2, col = "orange") # This is the buffer polygon

# To find out the coordinate of the centroid..
cbd.coord <- st_coordinates(cbd.centroid)
cbd.coord

##          X        Y
## 1 679494.3 415687.5

# The coordinate of polygon shapefile ????
polygon.coord <- st_coordinates(fulton) 
head(polygon.coord) # This is the coordinate of every single point that defines the polygon

##             X        Y L1 L2
## [1,] 681417.9 421943.5  1  1
## [2,] 681536.1 422178.8  1  1
## [3,] 681585.2 422208.9  1  1
## [4,] 682343.5 422569.7  1  1
## [5,] 683026.0 422428.1  1  1
## [6,] 683191.0 422335.2  1  1

# To see this .. (you don't need to understand the st_multipoint, etc.)
tm_shape(fulton) + tm_polygons() + 
  tm_shape(st_multipoint(x = polygon.coord[,1:2], dim="XY") %>% st_sfc(crs = 26967)) + tm_dots()

4. Geoprocessing

In ArcMap, there is a dropdown menu that contains a list of commonly used geoprocessing tools such as intersection, union, etc. The code below shows some of those functions in sf package. There are many other functions in sf package - since this class is almost over, I will leave those to you.

BUFFER:

Making a buffer is very simple; st_buffer() with two arguments - an sf object as the first argument and the distance of the bufffer as the second argument. Note that the distance here uses the unit of the CRS (which in this example is meter).

# Buffer
marta.buffer <- st_buffer(marta, dist = 400)
tmap_mode('view')

## tmap mode set to interactive viewing

tm_shape(marta.buffer) + tm_polygons(alpha = 0.1, col = "route_long") + 
        tm_shape(marta) + tm_lines()

UNION & MERGE:

Although the sf package has some dedicated functions that are equivalent to ‘merge’ and ‘union’ in ArcMap, respectively (st_union() or st_combine()), I personally do not prefer using those dedicated functions. Because when you union or merge polygons, you MUST also do the same operation to the attributes (i.e., the data.frame part of an sf object) because you cannot, for example, have one polygon with multiple rows of attributes or vice versa. Because of this, st_union() and st_combine() throws away attributes and only retain the GEOMETRY.

A better way to do this is to use rbind(), group_by() and summarise() function. Let’s merge fulton, dekalb, cobb, and clayton shapefiles into one large shapefile called ‘census’. The rbind() stands for row-bind and does exactly what the name says; it stacks multiple files by adding rows.

I personally use st_union() or st_combine() only when I know for sure that I don’t need attributes of a shapefile.

# Combine using rbind() 
census <- rbind(fulton, dekalb, cobb, clayton)
tm_shape(census) + tm_polygons(col = "county")

# dissolve by group_by and summarise
census.group <- group_by(census, county) # Mark 'census' that I'd like to group it by county
census.dissolve <- summarise(census.group, 
                             ave.nroom = mean(nroom), 
                             max.nroom = max(nroom),
                             med.mval = median(mval),
                             sd.hinc = sd(hinc)) 

census.dissolve # values are summarised for each county

## Simple feature collection with 4 features and 5 fields
## geometry type:  POLYGON
## dimension:      XY
## bbox:           xmin: 636450.8 ymin: 371709.7 xmax: 713243.6 ymax: 464175.8
## epsg (SRID):    NA
## proj4string:    +proj=tmerc +lat_0=30 +lon_0=-84.16666666666667 +k=0.9999 +x_0=700000 +y_0=0 +ellps=GRS80 +units=m +no_defs
## # A tibble: 4 x 6
##   county  ave.nroom max.nroom med.mval sd.hinc                     geometry
## * <fct>       <dbl>     <dbl>    <dbl>   <dbl>                <POLYGON [m]>
## 1 Fulton~      5.64       9     219000  42615. ((672864 394716, 672895.1 3~
## 2 DeKalb~      5.92       9     161300  31237. ((708560 409446.8, 708552.9~
## 3 Cobb C~      6.58       9     202600  31166. ((670162.4 422790.3, 669590~
## 4 Clayto~      5.81       7.6    86400  11368. ((678102.8 379788.3, 678100~

tm_shape(census.dissolve) + tm_polygons(col = "med.mval") # poligons are grouped too, just like dissolve in ArcGIS.

CLIP & INTERSECTION:

If I am not wrong, clip and intersection in ArcMap is essentially the same thing. The difference is that clip will only retain the attributes of the one that is clipped while intersection will retain the attributes from both shapefiles. Let’s mimic clip and intersection from ArcMap in sf packageb using st_intersection().

Note that st_intersection() can be sensitive to the order of two shapefiles you supply as arguments if one of the shapefiles has no attribute but just geometry. If you supply, for example, an sf object that contains attributes as the first argument and supply an sf object that only contains geomtry as the second argument, the output will have all the attributes from the first object. If you switch the order, however, the output will have the same geometry but will not have any attributes.

Note that if both sf objects have attributes, the output from st_intersection() will have attributes from BOTH objects.

# Clip & Intersection
clipped_1 <- st_intersection(census, 
                           st_geometry(cbd.buffer)) # This is equivelant to clip

## Warning: attribute variables are assumed to be spatially constant
## throughout all geometries

clipped_2 <- st_intersection(st_geometry(cbd.buffer),
                             census) # The output of this code only contains geometry.

clipped_3 <- st_intersection(cbd.buffer,
                             census) # This is equivelant to intersection

## Warning: attribute variables are assumed to be spatially constant
## throughout all geometries

clipped_4 <- st_intersection(census,
                             cbd.buffer) # This is equivelant to intersection

## Warning: attribute variables are assumed to be spatially constant
## throughout all geometries

# To see that clipped_2 does not have any attributes.. (and compare it to clipped_1)
clip1 <- tm_shape(clipped_1) + tm_polygons()
clip2 <- tm_shape(clipped_2) + tm_polygons()
tmap_arrange(clip1, clip2, sync = T) # Click polygons in each map to see the differences

5. Spatial Join

Spatial join is another operation in which the order of the argument matters. The sf object supplied as the second argument is JOINED TO the first argument. For example, we have the sf object “census” which is a polygon file and “marta” which is a line file. If you supply “census” as the first argument and “marta” as the second argument, the attributes of “marta” (i.e., second argument) are joined to “census” (i.e., first argument), and the ways in which it joins is based on the topological relationship such as st_intersects, st_disjoints, st_is_within_distance, etc.

Note that in R, st_join does one-to-many join AND left join by default. This means is two things:

You will get whatever is supplied as the first argument preserved in the output, and rows that do not have something to join will get NAs (we’ve seen this behavior when we learned left_join, right_join, etc.)
If there are multiple features in the second argument that needs to be joined to one feature in the first argument, R will duplicate that one feature so that they can be joined with multiple other features.

# Spatial Join 1
census.marta <- st_join(census, marta, join = st_intersects)

# Sptial Join 2
marta.census <- st_join(marta, census, join = st_intersects) 

# One-to-many join can be seen in marta.census
marta # marta only has three rows, BLUE, RED, and GOLD

## Simple feature collection with 3 features and 2 fields
## geometry type:  MULTILINESTRING
## dimension:      XY
## bbox:           xmin: 671895.2 ymin: 403525.2 xmax: 694364.9 ymax: 437538.4
## epsg (SRID):    NA
## proj4string:    +proj=tmerc +lat_0=30 +lon_0=-84.16666666666667 +k=0.9999 +x_0=700000 +y_0=0 +ellps=GRS80 +units=m +no_defs
##                           route_long OBJECTID
## 1                BLUE-East/West Line   1568.0
## 2      GOLD-Northeast Doraville Line   1552.5
## 3 RED-North South North Springs Line   1555.5
##                         geometry
## 1 MULTILINESTRING ((678342.5 ...
## 2 MULTILINESTRING ((674061.6 ...
## 3 MULTILINESTRING ((674075.2 ...

marta.census # There are multiple BLUE lines (and Multiple RED, GOLD too)

## Simple feature collection with 83 features and 18 fields
## geometry type:  MULTILINESTRING
## dimension:      XY
## bbox:           xmin: 671895.2 ymin: 403525.2 xmax: 694364.9 ymax: 437538.4
## epsg (SRID):    NA
## proj4string:    +proj=tmerc +lat_0=30 +lon_0=-84.16666666666667 +k=0.9999 +x_0=700000 +y_0=0 +ellps=GRS80 +units=m +no_defs
## First 10 features:
##              route_long OBJECTID       GEOID  n  walkscr
## 1   BLUE-East/West Line     1568 13121002400  9 47.66667
## 1.1 BLUE-East/West Line     1568 13121002500  6 72.83333
## 1.2 BLUE-East/West Line     1568 13121002600  6 59.50000
## 1.3 BLUE-East/West Line     1568 13121003000  7 83.42857
## 1.4 BLUE-East/West Line     1568 13121003100  8 72.87500
## 1.5 BLUE-East/West Line     1568 13121003200  9 72.77778
## 1.6 BLUE-East/West Line     1568 13121003500 11 83.63636
## 1.7 BLUE-East/West Line     1568 13121004000 12 40.66667
## 1.8 BLUE-East/West Line     1568 13121008102 55 25.12727
## 1.9 BLUE-East/West Line     1568 13121008302 13 36.38462
##                  tract        county   state   mval  hinc nroom unt_sng
## 1      Census Tract 24 Fulton County Georgia  92700 30972   5.7    1058
## 1.1    Census Tract 25 Fulton County Georgia 165500 26066   4.4     427
## 1.2    Census Tract 26 Fulton County Georgia 114300 26350   4.6     128
## 1.3    Census Tract 30 Fulton County Georgia 446500 94079   4.6     831
## 1.4    Census Tract 31 Fulton County Georgia 244400 46467   4.8     583
## 1.5    Census Tract 32 Fulton County Georgia 228700 84350   3.8     728
## 1.6    Census Tract 35 Fulton County Georgia 175300 55585   3.0      60
## 1.7    Census Tract 40 Fulton County Georgia  58200 28191   6.1    1077
## 1.8 Census Tract 81.02 Fulton County Georgia  90300 24940   5.1    1081
## 1.9 Census Tract 83.02 Fulton County Georgia  72200 28949   5.4     812
##     unt_mlt untstr_    p_singl   p_multi  cbd_dst in_atl
## 1       420    1478 0.71583221 0.2841678 2810.529      1
## 1.1     869    1311 0.32570557 0.6628528 1945.904      1
## 1.2     532     660 0.19393939 0.8060606 1011.383      1
## 1.3    1029    1884 0.44108280 0.5461783 1988.552      1
## 1.4     368     951 0.61303891 0.3869611 2775.643      1
## 1.5     744    1472 0.49456522 0.5054348 1054.076      1
## 1.6     634     713 0.08415147 0.8892006    0.000      1
## 1.7     231    1319 0.81652767 0.1751327 3548.812      1
## 1.8    2152    3282 0.32937233 0.6556977 4861.674      1
## 1.9     347    1177 0.68988955 0.2948173 5142.972      1
##                           geometry
## 1   MULTILINESTRING ((678342.5 ...
## 1.1 MULTILINESTRING ((678342.5 ...
## 1.2 MULTILINESTRING ((678342.5 ...
## 1.3 MULTILINESTRING ((678342.5 ...
## 1.4 MULTILINESTRING ((678342.5 ...
## 1.5 MULTILINESTRING ((678342.5 ...
## 1.6 MULTILINESTRING ((678342.5 ...
## 1.7 MULTILINESTRING ((678342.5 ...
## 1.8 MULTILINESTRING ((678342.5 ...
## 1.9 MULTILINESTRING ((678342.5 ...

# Visualization of the output
join1 <- tm_shape(census.marta) + tm_polygons(col = "route_long")
join2 <- tm_shape(marta.census) + tm_lines(col = "route_long")
tmap_arrange(join1, join2, sync = T)

6. Area/length/distance (Optional material because this one can be a bit tricky)

To measure area, length, and distance, you can use st_area(), st_length(), st_distance() functions. Although measuring area, length, and distance is easy in terms of coding, there are some things called ‘class’ and ‘attributes’ in R that complicate things a bit. To make things simple, I will ignore all that and simply show you what needs to be done to get the job done.

In fact, all you need to do is not all that different; simply wrap st_area() with unclass() function (same applies to st_length and st_distance). Note that the measured area/length/distance will be in whatever unit defined by CRS.

fulton2 <- fulton # I am copying fulton to demonstrate st_area, etc. because I need to keep fulton unmodified for later
marta2 <- marta # I am copying marta to demonstrate st_length, etc. because I need to keep marta unmodified for later

# Measuring area
area <- st_area(fulton2) # This is a class called "unit", which is not what we are familiar with. (print out area1 to see what is inside)

area.ucls <- unclass(st_area(fulton2)) # Wrapping it with unclass() converts it to a vector of areas of each polygon. Ignore the attr stuff at the bottom.
fulton2$area <- area.ucls
head(fulton2$area)

## [1] 3349033 2883085 1595262 3330016 1451682 1745294

# Visualization
tm_shape(fulton2) + tm_polygons(col = "area", style = "quantile")

For length, it is identical.

# Measuring length
marta2$length <- unclass(st_length(marta2)) # I simplified the code a bit, but the gist is the same.

# Visualization
tm_shape(marta2) + tm_lines(col = "length")

For distance, there is one difference. Unlike the area and length, which are of something, the distance is between some things (plural). This means that st_distance() gives you a matrix of all possible combination of the first argument and the second argument.

In the example below, the first argument is ‘fulton’ (195 rows), and the second argument is ‘marta’ (3 rows). The output is 195 rows x 3 columns matrix. The first of this matrix is the distance from the first row of ‘fulton’ to BLUE, RED, and GOLD MARTA lines.

If you are interested in the distance from each census tract in fulton to, for example, the first line of marta, use the first column of the matrix as your distance measure.

# st_distance
distt <- unclass(st_distance(fulton2, marta)) # this outputs a matrix of all combination of x and y
distt[1:10,] # See first 10 rows, all columns

##           [,1]       [,2]       [,3]
##  [1,] 2099.404 1039.72355 1032.47606
##  [2,] 2498.792  906.26487  887.40529
##  [3,] 3072.936    0.00000    0.00000
##  [4,] 3277.945    0.00000    0.00000
##  [5,] 2553.724 1012.70354 1084.61774
##  [6,] 1639.942 2449.21699 2475.52177
##  [7,] 1783.981   56.10156   76.66546
##  [8,] 2992.161    0.00000    0.00000
##  [9,] 2019.365  293.05799  290.64635
## [10,] 1860.586    0.00000    0.00000

# Measuring the distance from each census tract in fulton to the first line of marta and attach it to fulton
fulton2$dist.to.blue<- distt[,1]

# Visualization
tm_shape(fulton2) + tm_polygons(col = "dist.to.blue") +
        tm_shape(marta) + tm_lines(col = "route_long", lwd = 3)

Lecture on sf package - 2