Introduction

In the Assignment 6, we created choropleth map showing the state-level distributions of Arby’s and Hardee’s. We were given A dataset comprising the USA state shape file and a geodatabase containing point locations of both restaurants. We used sf, dplyr, and RColorBrewer packages to complete the assignment.

Cleaning the Global environment and the console

rm(list = ls())      # Clears the global environment for a fresh start
cat('\f')            # Cleans the console

Loading libraries

This code allows to load multiple library at once.

my_libtrary <- c("sf", "dplyr", "RColorBrewer", "classInt", "ggplot2", "gridExtra")
lapply(my_libtrary, library, character.only = TRUE)
## Linking to GEOS 3.8.0, GDAL 3.0.4, PROJ 6.3.1
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
## 
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
## 
##     combine

Loading the data

dsn1 <- "Data/seminarClass/usa.shp"
dsn2 <- "Data/seminarClass/restaurants.gdb"
US_States <- st_read(dsn1)
## Reading layer `usa' from data source `C:\Rizwan\Education\UofM PhD Work\Seminar in Earth Sciences\R-Spatial\Data\seminarClass\usa.shp' using driver `ESRI Shapefile'
## Simple feature collection with 51 features and 54 fields
## geometry type:  MULTIPOLYGON
## dimension:      XY
## bbox:           xmin: -6293474 ymin: -1294964 xmax: 2256319 ymax: 4592025
## projected CRS:  USA_Contiguous_Albers_Equal_Area_Conic
restaurants <- st_read(dsn2)
## Multiple layers are present in data source C:\Rizwan\Education\UofM PhD Work\Seminar in Earth Sciences\R-Spatial\Data\seminarClass\restaurants.gdb, reading layer `arbys'.
## Use `st_layers' to list all layer names and their type in a data source.
## Set the `layer' argument in `st_read' to read a particular layer.
## Warning in evalq((function (..., call. = TRUE, immediate. = FALSE, noBreaks. =
## FALSE, : automatically selected the first layer in a data source containing more
## than one.
## Reading layer `arbys' from data source `C:\Rizwan\Education\UofM PhD Work\Seminar in Earth Sciences\R-Spatial\Data\seminarClass\restaurants.gdb' using driver `OpenFileGDB'
## Simple feature collection with 2590 features and 2 fields
## geometry type:  POINT
## dimension:      XY
## bbox:           xmin: -158.0057 ymin: 19.69826 xmax: -68.01207 ymax: 61.21599
## geographic CRS: NAD83
par(mfrow=c(2,1))
plot(st_geometry(US_States), axes = TRUE, main = "USA States", cex.main = 1.5)
plot(st_geometry(restaurants), axes = TRUE, main = "Restaurants in USA", cex.main = 1.5)

Read all layers from the geodatabase

LayerList <- st_layers(dsn2)
LayerList
## Driver: OpenFileGDB 
## Available layers:
##   layer_name geometry_type features fields
## 1      arbys         Point     2590      2
## 2    hardees         Point     3262      2
class(LayerList)
## [1] "sf_layers"
LayerList[1]
## $name
## [1] "arbys"   "hardees"

Assign variables to read all layers in the restaurant.gdb

Name = LayerList$name
Layers <- lapply(c(1:length(Name)), function(i) { 
  st_read(dsn2, layer = Name[i])
})
## Reading layer `arbys' from data source `C:\Rizwan\Education\UofM PhD Work\Seminar in Earth Sciences\R-Spatial\Data\seminarClass\restaurants.gdb' using driver `OpenFileGDB'
## Simple feature collection with 2590 features and 2 fields
## geometry type:  POINT
## dimension:      XY
## bbox:           xmin: -158.0057 ymin: 19.69826 xmax: -68.01207 ymax: 61.21599
## geographic CRS: NAD83
## Reading layer `hardees' from data source `C:\Rizwan\Education\UofM PhD Work\Seminar in Earth Sciences\R-Spatial\Data\seminarClass\restaurants.gdb' using driver `OpenFileGDB'
## Simple feature collection with 3262 features and 2 fields
## geometry type:  POINT
## dimension:      XY
## bbox:           xmin: -117.0076 ymin: 25.78868 xmax: -73.34611 ymax: 48.80142
## geographic CRS: NAD83
names(Layers) <- Name
Arbys <- Layers$arbys
Hardees <- Layers$hardees

Checking the projection systems

Before joining to layers, it is necessary to check their projection systems. We used st_crs to check the projection systems.

st_crs(US_States)
## Coordinate Reference System:
##   User input: USA_Contiguous_Albers_Equal_Area_Conic 
##   wkt:
## PROJCRS["USA_Contiguous_Albers_Equal_Area_Conic",
##     BASEGEOGCRS["NAD83",
##         DATUM["North American Datum 1983",
##             ELLIPSOID["GRS 1980",6378137,298.257222101,
##                 LENGTHUNIT["metre",1]]],
##         PRIMEM["Greenwich",0,
##             ANGLEUNIT["Degree",0.0174532925199433]]],
##     CONVERSION["USA_Contiguous_Albers_Equal_Area_Conic",
##         METHOD["Albers Equal Area",
##             ID["EPSG",9822]],
##         PARAMETER["Latitude of false origin",37.5,
##             ANGLEUNIT["Degree",0.0174532925199433],
##             ID["EPSG",8821]],
##         PARAMETER["Longitude of false origin",-96,
##             ANGLEUNIT["Degree",0.0174532925199433],
##             ID["EPSG",8822]],
##         PARAMETER["Latitude of 1st standard parallel",29.5,
##             ANGLEUNIT["Degree",0.0174532925199433],
##             ID["EPSG",8823]],
##         PARAMETER["Latitude of 2nd standard parallel",45.5,
##             ANGLEUNIT["Degree",0.0174532925199433],
##             ID["EPSG",8824]],
##         PARAMETER["Easting at false origin",0,
##             LENGTHUNIT["metre",1],
##             ID["EPSG",8826]],
##         PARAMETER["Northing at false origin",0,
##             LENGTHUNIT["metre",1],
##             ID["EPSG",8827]]],
##     CS[Cartesian,2],
##         AXIS["(E)",east,
##             ORDER[1],
##             LENGTHUNIT["metre",1]],
##         AXIS["(N)",north,
##             ORDER[2],
##             LENGTHUNIT["metre",1]],
##     USAGE[
##         SCOPE["unknown"],
##         AREA["USA - CONUS - onshore"],
##         BBOX[24.41,-124.79,49.38,-66.91]],
##     ID["ESRI",102003]]
st_crs(Arbys)
## Coordinate Reference System:
##   User input: NAD83 
##   wkt:
## GEOGCRS["NAD83",
##     DATUM["North American Datum 1983",
##         ELLIPSOID["GRS 1980",6378137,298.257222101,
##             LENGTHUNIT["metre",1]]],
##     PRIMEM["Greenwich",0,
##         ANGLEUNIT["degree",0.0174532925199433]],
##     CS[ellipsoidal,2],
##         AXIS["geodetic latitude (Lat)",north,
##             ORDER[1],
##             ANGLEUNIT["degree",0.0174532925199433]],
##         AXIS["geodetic longitude (Lon)",east,
##             ORDER[2],
##             ANGLEUNIT["degree",0.0174532925199433]],
##     USAGE[
##         SCOPE["unknown"],
##         AREA["North America - NAD83"],
##         BBOX[14.92,167.65,86.46,-47.74]],
##     ID["EPSG",4269]]
st_crs(Hardees)
## Coordinate Reference System:
##   User input: NAD83 
##   wkt:
## GEOGCRS["NAD83",
##     DATUM["North American Datum 1983",
##         ELLIPSOID["GRS 1980",6378137,298.257222101,
##             LENGTHUNIT["metre",1]]],
##     PRIMEM["Greenwich",0,
##         ANGLEUNIT["degree",0.0174532925199433]],
##     CS[ellipsoidal,2],
##         AXIS["geodetic latitude (Lat)",north,
##             ORDER[1],
##             ANGLEUNIT["degree",0.0174532925199433]],
##         AXIS["geodetic longitude (Lon)",east,
##             ORDER[2],
##             ANGLEUNIT["degree",0.0174532925199433]],
##     USAGE[
##         SCOPE["unknown"],
##         AREA["North America - NAD83"],
##         BBOX[14.92,167.65,86.46,-47.74]],
##     ID["EPSG",4269]]

Reprojecting the shape file

The projection system for the US states shape file is different than the restaurants. It is necessary to reproject the shapefile to ensure complete convergence of the data. We used st_transform to reproject the US States shape file.

US_States_nad83 <- st_transform(US_States, st_crs(Arbys))
st_crs(US_States_nad83)
## Coordinate Reference System:
##   User input: NAD83 
##   wkt:
## GEOGCRS["NAD83",
##     DATUM["North American Datum 1983",
##         ELLIPSOID["GRS 1980",6378137,298.257222101,
##             LENGTHUNIT["metre",1]]],
##     PRIMEM["Greenwich",0,
##         ANGLEUNIT["degree",0.0174532925199433]],
##     CS[ellipsoidal,2],
##         AXIS["geodetic latitude (Lat)",north,
##             ORDER[1],
##             ANGLEUNIT["degree",0.0174532925199433]],
##         AXIS["geodetic longitude (Lon)",east,
##             ORDER[2],
##             ANGLEUNIT["degree",0.0174532925199433]],
##     USAGE[
##         SCOPE["unknown"],
##         AREA["North America - NAD83"],
##         BBOX[14.92,167.65,86.46,-47.74]],
##     ID["EPSG",4269]]

Checking the differences in the transformation

par(mfrow=c(2,1))
plot(st_geometry(US_States), axes = TRUE, main = " US States before re-projection", cex.main = 1.5)
plot(st_geometry(US_States_nad83), axes = TRUE, main = " US States after re-projection", cex.main = 1.5)

Checking for NA values

cat("Number of NA value in the data table is: ", sum(is.na(Arbys)), sep = '')
## Number of NA value in the data table is: 0
cat("Number of NA value in the data table is: ", sum(is.na(Hardees)), sep = '')
## Number of NA value in the data table is: 0

Spatial aggregation: Points in polygon

Defining Color pallet

Pal <- brewer.pal(7, "YlOrRd")
class(Pal)
## [1] "character"

Arby’s distribution

In this part we visualized the Arby’s distribution density in the USA using both ggplot and plot function for comparison.

Arbys_den <- US_States_nad83 %>% mutate(state_area = st_area(geometry)) %>% 
  st_join(Arbys) %>% group_by(STATE_FIPS) %>% 
  summarise(n_Arbys = n(), 
            state_area = unique(state_area), 
            arbys_den = n_Arbys/state_area * 1e6)
## although coordinates are longitude/latitude, st_intersects assumes that they are planar
## `summarise()` ungrouping output (override with `.groups` argument)
hist(Arbys_den$arbys_den, main = "Arby's distribution histogram", xlab = "Density (per sq. km.)", col = 'blue')

breaks_qt_arbys <- classIntervals(c(min(Arbys_den$arbys_den)-0.00001, Arbys_den$arbys_den), n=7, style = "quantile")
Arbys_den <- mutate(Arbys_den, arbys_den_cat=cut(arbys_den, breaks_qt_arbys$brks))
ggplot(Arbys_den)+
  geom_sf(aes(fill=arbys_den_cat))+
  scale_fill_brewer(palette = "YlOrRd")

plot(Arbys_den["arbys_den"], axes = TRUE, main = "Distribution of Arby's in the USA", 
    cex.main = 1.5, nbreaks = 7, breaks = "quantile", pal = Pal)

Hardee’s distribution

We also visualized the Hardee’s distribution density in the USA using both ggplot and plot function for comparison.

Hardees_den <- US_States_nad83 %>% mutate(state_area = st_area(geometry)) %>% 
  st_join(Hardees) %>% group_by(STATE_FIPS) %>% 
  summarise(n_Hardees = n(), 
            state_area = unique(state_area), 
            hardees_den = n_Hardees/state_area * 1e6)
## although coordinates are longitude/latitude, st_intersects assumes that they are planar
## `summarise()` ungrouping output (override with `.groups` argument)
hist(Hardees_den$hardees_den, main = "Hardee's distribution histogram", xlab = "Density (per sq. km.)", col = 'red')

breaks_qt_hardees <- classIntervals(c(min(Hardees_den$hardees_den)-0.00001, Hardees_den$hardees_den), n=7, style = "quantile")
Hardees_den <- mutate(Hardees_den, hardees_den_cat=cut(hardees_den, breaks_qt_hardees$brks))
ggplot(Hardees_den)+
  geom_sf(aes(fill=hardees_den_cat))+
  scale_fill_brewer(palette = "YlOrRd")

plot(Hardees_den["hardees_den"], axes = TRUE, main = "Distribution of Hardee's in the USA", 
     cex.main = 1.5, nbreaks = 7, breaks = "quantile", pal = Pal)

Mapping with different class interval style

Arby’s distribution

plot(Arbys_den["arbys_den"], axes = TRUE, main = "Class Interval Style: Quantile", cex.main = 1.5, 
     nbreaks = 7, breaks = "quantile", pal = Pal)

plot(Arbys_den["arbys_den"], axes = TRUE, main = "Class Interval Style: Equal interval", cex.main = 1.5, 
     nbreaks = 7, breaks = "equal", pal = Pal)

plot(Arbys_den["arbys_den"], axes = TRUE, main = "Class Interval Style: Natural breaks", cex.main = 1.5, 
     nbreaks = 7, breaks = "jenks", pal = Pal)

plot(Arbys_den["arbys_den"], axes = TRUE, main = "Class Interval Style: Kmeans", cex.main = 1.5, 
     nbreaks = 7, breaks = "kmeans", pal = Pal)

Hardee’s distribution

par(mfrow=c(2,2))
plot(Hardees_den["hardees_den"], axes = TRUE, main = "Class Interval Style: Quantile", cex.main = 1.5, 
     nbreaks = 7, breaks = "quantile", pal = Pal)

plot(Hardees_den["hardees_den"], axes = TRUE, main = "Class Interval Style: Equal interval", cex.main = 1.5, 
     nbreaks = 7, breaks = "equal", pal = Pal)

plot(Hardees_den["hardees_den"], axes = TRUE, main = "Class Interval Style: Natural breaks", cex.main = 1.5, 
     nbreaks = 7, breaks = "jenks", pal = Pal)

plot(Hardees_den["hardees_den"], axes = TRUE, main = "Class Interval Style: Kmeans", cex.main = 1.5, 
     nbreaks = 7, breaks = "kmeans", pal = Pal)