Hand in: zip file with .Rmd and .html file, with answers to the exercises of the chapter: https://keen-swartz-3146c4.netlify.com/featureattributes.html as well as those in the “further exercises” html for today.
Add a variable to the nc dataset by nc$State = "North Carolina". Which value should you attach to this variable for the attribute-geometry relationship (agr)?
library(sf)## Linking to GEOS 3.6.1, GDAL 2.2.3, PROJ 4.9.3
library(tidyverse)## -- Attaching packages ----------------------------------------------------------------------- tidyverse 1.2.1 --
## v ggplot2 3.1.0 v purrr 0.3.0
## v tibble 2.0.1 v dplyr 0.8.0.1
## v tidyr 0.8.2 v stringr 1.4.0
## v readr 1.3.1 v forcats 0.4.0
## -- Conflicts -------------------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
nc <- system.file("gpkg/nc.gpkg", package="sf") %>%
read_sf() %>%
st_transform(32119)
nc2 <- nc %>% select(BIR74, SID74, NAME) %>%
st_set_agr(c(BIR74 = "aggregate", SID74 = "aggregate", NAME = "identity"))
nc2%>%st_agr()## BIR74 SID74 NAME
## aggregate aggregate identity
## Levels: constant aggregate identity
nc## Simple feature collection with 100 features and 14 fields
## geometry type: MULTIPOLYGON
## dimension: XY
## bbox: xmin: 123829 ymin: 14744.69 xmax: 930521.8 ymax: 318259.9
## epsg (SRID): 32119
## proj4string: +proj=lcc +lat_1=36.16666666666666 +lat_2=34.33333333333334 +lat_0=33.75 +lon_0=-79 +x_0=609601.22 +y_0=0 +ellps=GRS80 +towgs84=0,0,0,0,0,0,0 +units=m +no_defs
## # A tibble: 100 x 15
## AREA PERIMETER CNTY_ CNTY_ID NAME FIPS FIPSNO CRESS_ID BIR74 SID74
## <dbl> <dbl> <dbl> <dbl> <chr> <chr> <dbl> <int> <dbl> <dbl>
## 1 0.114 1.44 1825 1825 Ashe 37009 37009 5 1091 1
## 2 0.061 1.23 1827 1827 Alle~ 37005 37005 3 487 0
## 3 0.143 1.63 1828 1828 Surry 37171 37171 86 3188 5
## 4 0.07 2.97 1831 1831 Curr~ 37053 37053 27 508 1
## 5 0.153 2.21 1832 1832 Nort~ 37131 37131 66 1421 9
## 6 0.097 1.67 1833 1833 Hert~ 37091 37091 46 1452 7
## 7 0.062 1.55 1834 1834 Camd~ 37029 37029 15 286 0
## 8 0.091 1.28 1835 1835 Gates 37073 37073 37 420 0
## 9 0.118 1.42 1836 1836 Warr~ 37185 37185 93 968 4
## 10 0.124 1.43 1837 1837 Stok~ 37169 37169 85 1612 1
## # ... with 90 more rows, and 5 more variables: NWBIR74 <dbl>, BIR79 <dbl>,
## # SID79 <dbl>, NWBIR79 <dbl>, geom <MULTIPOLYGON [m]>
nc$State = "North Carolina"
nc## Simple feature collection with 100 features and 15 fields
## geometry type: MULTIPOLYGON
## dimension: XY
## bbox: xmin: 123829 ymin: 14744.69 xmax: 930521.8 ymax: 318259.9
## epsg (SRID): 32119
## proj4string: +proj=lcc +lat_1=36.16666666666666 +lat_2=34.33333333333334 +lat_0=33.75 +lon_0=-79 +x_0=609601.22 +y_0=0 +ellps=GRS80 +towgs84=0,0,0,0,0,0,0 +units=m +no_defs
## # A tibble: 100 x 16
## AREA PERIMETER CNTY_ CNTY_ID NAME FIPS FIPSNO CRESS_ID BIR74 SID74
## <dbl> <dbl> <dbl> <dbl> <chr> <chr> <dbl> <int> <dbl> <dbl>
## 1 0.114 1.44 1825 1825 Ashe 37009 37009 5 1091 1
## 2 0.061 1.23 1827 1827 Alle~ 37005 37005 3 487 0
## 3 0.143 1.63 1828 1828 Surry 37171 37171 86 3188 5
## 4 0.07 2.97 1831 1831 Curr~ 37053 37053 27 508 1
## 5 0.153 2.21 1832 1832 Nort~ 37131 37131 66 1421 9
## 6 0.097 1.67 1833 1833 Hert~ 37091 37091 46 1452 7
## 7 0.062 1.55 1834 1834 Camd~ 37029 37029 15 286 0
## 8 0.091 1.28 1835 1835 Gates 37073 37073 37 420 0
## 9 0.118 1.42 1836 1836 Warr~ 37185 37185 93 968 4
## 10 0.124 1.43 1837 1837 Stok~ 37169 37169 85 1612 1
## # ... with 90 more rows, and 6 more variables: NWBIR74 <dbl>, BIR79 <dbl>,
## # SID79 <dbl>, NWBIR79 <dbl>, geom <MULTIPOLYGON [m]>, State <chr>
nc1 <- nc %>% select(NAME,State) %>%
st_set_agr(c(State= "constant"))
nc1 %>%
st_agr()## NAME State
## <NA> constant
## Levels: constant aggregate identity
We should attach a constant agr because this value is valid for all the geometries
Create a new sf object from the geometry obtained by st_union(nc), and assign "North Carolina" to the variable State. Which agr can you now assign to this attribute variable?
(state_geom=st_sf(st_union(nc)))## Simple feature collection with 1 feature and 0 fields
## geometry type: MULTIPOLYGON
## dimension: XY
## bbox: xmin: 123829 ymin: 14744.69 xmax: 930521.8 ymax: 318259.9
## epsg (SRID): 32119
## proj4string: +proj=lcc +lat_1=36.16666666666666 +lat_2=34.33333333333334 +lat_0=33.75 +lon_0=-79 +x_0=609601.22 +y_0=0 +ellps=GRS80 +towgs84=0,0,0,0,0,0,0 +units=m +no_defs
## st_union.nc.
## 1 MULTIPOLYGON (((834893.7 95...
plot(state_geom)state_geom$State="North Carolina"
st2<-state_geom%>%select(State)%>%
st_set_agr(c(State="identity"))
st2%>%
st_agr()## State
## identity
## Levels: constant aggregate identity
st2## Simple feature collection with 1 feature and 1 field
## Attribute-geometry relationship: 0 constant, 0 aggregate, 1 identity
## geometry type: MULTIPOLYGON
## dimension: XY
## bbox: xmin: 123829 ymin: 14744.69 xmax: 930521.8 ymax: 318259.9
## epsg (SRID): 32119
## proj4string: +proj=lcc +lat_1=36.16666666666666 +lat_2=34.33333333333334 +lat_0=33.75 +lon_0=-79 +x_0=609601.22 +y_0=0 +ellps=GRS80 +towgs84=0,0,0,0,0,0,0 +units=m +no_defs
## State st_union.nc.
## 1 North Carolina MULTIPOLYGON (((834893.7 95...
This variable State is now a identity variable because it represents the whole geometry
Use st_area to add a variable with name area to nc. Compare the area and AREA variables in the nc dataset. What are the units of AREA? Are the two linearly related? If there are discrepancies, what could be the cause?
nc$area=st_area(nc)
nc%>%select(NAME, AREA,area)## Simple feature collection with 100 features and 3 fields
## geometry type: MULTIPOLYGON
## dimension: XY
## bbox: xmin: 123829 ymin: 14744.69 xmax: 930521.8 ymax: 318259.9
## epsg (SRID): 32119
## proj4string: +proj=lcc +lat_1=36.16666666666666 +lat_2=34.33333333333334 +lat_0=33.75 +lon_0=-79 +x_0=609601.22 +y_0=0 +ellps=GRS80 +towgs84=0,0,0,0,0,0,0 +units=m +no_defs
## # A tibble: 100 x 4
## NAME AREA area geom
## <chr> <dbl> <S3: unit> <MULTIPOLYGON [m]>
## 1 Ashe 0.114 113759816~ (((387344.8 278387.2, 381334.2 282773.7, 3794~
## 2 Allegha~ 0.061 61120784~ (((408601.8 292424.5, 408565.1 293985.3, 4066~
## 3 Surry 0.143 142374317~ (((478717.1 277490.3, 476935.7 278867.1, 4715~
## 4 Curritu~ 0.07 69467210~ (((878193.9 289128.3, 877381.5 291117.1, 8759~
## 5 Northam~ 0.153 152099621~ (((769835.3 277796.1, 768364.4 274841.8, 7626~
## 6 Hertford 0.097 96785742~ (((812328 277876.4, 791158.2 277011.9, 789882~
## 7 Camden 0.062 61605187~ (((878193.9 289128.3, 883270.9 275314.1, 8811~
## 8 Gates 0.091 90381791~ (((828444.6 290095.5, 824767.5 287166, 820818~
## 9 Warren 0.118 117952748~ (((671747.1 278687.7, 674043.1 282237.4, 6704~
## 10 Stokes 0.124 123299616~ (((517436.7 277857.7, 479040.3 279098.1, 4811~
## # ... with 90 more rows
AREA units are in arc degrees corresponding to 100 000 meters in the metric system. They are related by AREA=area m^2 * 100000 m^2. Being more strict if L is an arc degree and r is the Earth radius in meters we have
\[L=\frac{2πr}{360}\]
Calculating L and by replace r
\[L=\frac{2π*6378000}{360}\]
we have that an arc degree is 113317.099 meters. The discrepancies are because of the projections system. AS the Earth shape behaves as a geoide, there are differences in the areas projected per CRS.
Is the area variable intensive or extensive? Is its agr equal to constant, identity or aggregate?
nc%>%select(NAME, AREA,area)%>%
st_agr()## NAME AREA area
## <NA> <NA> <NA>
## Levels: constant aggregate identity
The area variable is extensive because of its property of change if it is splitted for example. It is mutable. It is agr is equal to aggregate according to its nature of being a “summary value over the geometry” [1]
Find the name of the county that contains POINT(-78.34046 35.017)
#Create the point
look<-st_sfc(st_point(c(-78.34046,35.017)),crs=4326)
plot(st_geometry(st_transform(nc,4326)), col = terrain.colors(50,alpha=0.5),main="Counties and Point")
plot(look,col="red",pch = 19, add=TRUE)#Query the intersection
container=(st_intersects(look,st_transform(nc,4326)))## although coordinates are longitude/latitude, st_intersects assumes that they are planar
## although coordinates are longitude/latitude, st_intersects assumes that they are planar
states<-nc[unlist(container),]
states$NAME## [1] "Sampson"
#Ploting the result
plot(st_geometry(states),col="orange",main="States that intersects POINT(-78.34046 35.017)")Find the names of all counties with boundaries that touch county Sampson.
sampson= nc[nc$NAME == "Sampson", ]
sampson_to=nc[unlist(st_touches(sampson,nc)),]
#List of names
sampson_to$NAME## [1] "Johnston" "Wayne" "Harnett" "Cumberland" "Duplin"
## [6] "Bladen" "Pender"
plot(st_geometry(sampson_to),col="orange",main="Counties with boundaries that touch county `Sampson`.")
plot(st_geometry(sampson), col="red",add=TRUE)List the names of all counties that are less than 50 km away from county Sampson.
sampson_close=nc[unlist(st_is_within_distance(sampson,nc,dist=50000)),]
sampson_close$NAME## [1] "Wake" "Chatham" "Wilson" "Johnston" "Greene"
## [6] "Lee" "Wayne" "Harnett" "Moore" "Lenoir"
## [11] "Sampson" "Cumberland" "Jones" "Hoke" "Duplin"
## [16] "Onslow" "Robeson" "Bladen" "Pender" "Columbus"
## [21] "New Hanover" "Brunswick"
For the following exercises, we will work with naturalearth:
library(rnaturalearth)
e = ne_countries(returnclass = "sf")["pop_est"]Plot the variable pop_est. Which projection is used?
e## Simple feature collection with 177 features and 1 field
## geometry type: MULTIPOLYGON
## dimension: XY
## bbox: xmin: -180 ymin: -90 xmax: 180 ymax: 83.64513
## epsg (SRID): 4326
## proj4string: +proj=longlat +datum=WGS84 +no_defs
## First 10 features:
## pop_est geometry
## 0 28400000 MULTIPOLYGON (((61.21082 35...
## 1 12799293 MULTIPOLYGON (((16.32653 -5...
## 2 3639453 MULTIPOLYGON (((20.59025 41...
## 3 4798491 MULTIPOLYGON (((51.57952 24...
## 4 40913584 MULTIPOLYGON (((-65.5 -55.2...
## 5 2967004 MULTIPOLYGON (((43.58275 41...
## 6 3802 MULTIPOLYGON (((-59.57209 -...
## 7 140 MULTIPOLYGON (((68.935 -48....
## 8 21262641 MULTIPOLYGON (((145.398 -40...
## 9 8210281 MULTIPOLYGON (((16.97967 48...
plot(e) The projection used is WGS84
Plot the variable converted to +proj=ortho, see here. What happened to Antarctica?
e2<-st_transform(e,st_crs("+proj=ortho"))
plot(e2)Antarctica is no longer visible in the plot
Plot the same data with the ortho projection, adding lat_0=-90, lat_0=90, lon_0=90, lon_0=-90 for four different views. Try to explain what happens.
plot(st_transform(e,st_crs("+proj=ortho +lat_0=-90")),col = terrain.colors(50,alpha=0.5),main="Natural Earth Dataset - South Pole view")plot(st_transform(e,st_crs("+proj=ortho +lat_0=90")),col = terrain.colors(50,alpha=0.5),main="Natural Earth Dataset - North Pole view")plot(st_transform(e,st_crs("+proj=ortho +long_0=90")),col = terrain.colors(50,alpha=0.5),main="Natural Earth Dataset - Side Pole view")plot(st_transform(e,st_crs("+proj=ortho +long_0=-90")),col = terrain.colors(50,alpha=0.5),main="Natural Earth Dataset - Side Pole view")Create a polygon of the northern hemisphere. Use that to get the intersection with e, and plot that with lat_0=90.
#Get North Hemisphere
hnorth<-st_transform(e,st_crs("+proj=ortho +lat_0=90"))
plot(hnorth,col = terrain.colors(50,alpha=0.5),main="Natural Earth Dataset - North Pole view")#Create a unique geometry
hnorthclean<-st_union(st_geometry(e[unlist((!is.na(st_dimension(hnorth)))),]))
plot(hnorthclean)#Plot the new geometry from pole north view
hnorthcleanview<-st_transform(hnorthclean,st_crs("+proj=ortho +lat_0=90"))
plot(hnorthcleanview,col = terrain.colors(50,alpha=0.5),main="Natural Earth Dataset - North Pole view")#Intersection with e
(result<-st_intersects(hnorthclean,e,sparse=FALSE))## although coordinates are longitude/latitude, st_intersects assumes that they are planar
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12]
## [1,] TRUE TRUE TRUE TRUE TRUE TRUE FALSE FALSE FALSE TRUE TRUE TRUE
## [,13] [,14] [,15] [,16] [,17] [,18] [,19] [,20] [,21] [,22] [,23]
## [1,] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [,24] [,25] [,26] [,27] [,28] [,29] [,30] [,31] [,32] [,33] [,34]
## [1,] TRUE TRUE FALSE TRUE TRUE TRUE FALSE TRUE TRUE TRUE TRUE
## [,35] [,36] [,37] [,38] [,39] [,40] [,41] [,42] [,43] [,44] [,45]
## [1,] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [,46] [,47] [,48] [,49] [,50] [,51] [,52] [,53] [,54] [,55] [,56]
## [1,] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE FALSE FALSE TRUE
## [,57] [,58] [,59] [,60] [,61] [,62] [,63] [,64] [,65] [,66] [,67]
## [1,] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [,68] [,69] [,70] [,71] [,72] [,73] [,74] [,75] [,76] [,77] [,78]
## [1,] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [,79] [,80] [,81] [,82] [,83] [,84] [,85] [,86] [,87] [,88] [,89]
## [1,] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [,90] [,91] [,92] [,93] [,94] [,95] [,96] [,97] [,98] [,99] [,100]
## [1,] TRUE TRUE TRUE TRUE TRUE TRUE FALSE TRUE TRUE TRUE TRUE
## [,101] [,102] [,103] [,104] [,105] [,106] [,107] [,108] [,109] [,110]
## [1,] TRUE FALSE TRUE TRUE TRUE TRUE TRUE TRUE FALSE TRUE
## [,111] [,112] [,113] [,114] [,115] [,116] [,117] [,118] [,119] [,120]
## [1,] FALSE TRUE FALSE FALSE TRUE TRUE TRUE TRUE TRUE TRUE
## [,121] [,122] [,123] [,124] [,125] [,126] [,127] [,128] [,129] [,130]
## [1,] FALSE TRUE TRUE TRUE TRUE TRUE FALSE TRUE TRUE TRUE
## [,131] [,132] [,133] [,134] [,135] [,136] [,137] [,138] [,139] [,140]
## [1,] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [,141] [,142] [,143] [,144] [,145] [,146] [,147] [,148] [,149] [,150]
## [1,] TRUE TRUE FALSE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [,151] [,152] [,153] [,154] [,155] [,156] [,157] [,158] [,159] [,160]
## [1,] TRUE TRUE FALSE TRUE TRUE TRUE TRUE TRUE TRUE FALSE
## [,161] [,162] [,163] [,164] [,165] [,166] [,167] [,168] [,169] [,170]
## [1,] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [,171] [,172] [,173] [,174] [,175] [,176] [,177]
## [1,] TRUE TRUE FALSE TRUE FALSE TRUE FALSE
geom_in<-e[unlist(result),]
result2<-st_transform(geom_in,st_crs("+proj=ortho +lat_0=90"))
#Intersection plotting
plot(result2,col = terrain.colors(50,alpha=0.5),main="Natural Earth Dataset - North Pole view result")For the land mass between latitudes 45 and 55 and longitudes 0 and 10, estimate the total population as well as the average population.
landmass<-st_crop(e, c(xmin=0, xmax=10, ymin=45, ymax=55))## although coordinates are longitude/latitude, st_intersection assumes that they are planar
plot(landmass,col = terrain.colors(10,alpha=0.5),main="Land Mass crop for Natural Earth Dataset")(total_pop=sum(landmass$pop_est))## [1] 315713130
(avg_pop=mean(landmass$pop_est))## [1] 31571313
[1] Spatial Data Science https://keen-swartz-3146c4.netlify.com/
[2] R Documentation https://www.rdocumentation.org
[3] sf Github https://r-spatial.github.io/sf/articles/sf1.html