Exercises for the meeting of May 17, 2019

Hand in: zip file with .Rmd and .html file, with answers to the exercises of the chapter: https://keen-swartz-3146c4.netlify.com/featureattributes.html as well as those in the “further exercises” html for today.

Exercise 1

Add a variable to the nc dataset by nc$State = "North Carolina". Which value should you attach to this variable for the attribute-geometry relationship (agr)?

library(sf)
## Linking to GEOS 3.6.1, GDAL 2.2.3, PROJ 4.9.3
library(tidyverse)
## -- Attaching packages ----------------------------------------------------------------------- tidyverse 1.2.1 --
## v ggplot2 3.1.0       v purrr   0.3.0  
## v tibble  2.0.1       v dplyr   0.8.0.1
## v tidyr   0.8.2       v stringr 1.4.0  
## v readr   1.3.1       v forcats 0.4.0
## -- Conflicts -------------------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
nc <- system.file("gpkg/nc.gpkg", package="sf") %>%
  read_sf() %>%
  st_transform(32119)
nc2 <- nc %>% select(BIR74, SID74, NAME) %>%
  st_set_agr(c(BIR74 = "aggregate", SID74 = "aggregate", NAME = "identity"))

nc2%>%st_agr()
##     BIR74     SID74      NAME 
## aggregate aggregate  identity 
## Levels: constant aggregate identity
nc
## Simple feature collection with 100 features and 14 fields
## geometry type:  MULTIPOLYGON
## dimension:      XY
## bbox:           xmin: 123829 ymin: 14744.69 xmax: 930521.8 ymax: 318259.9
## epsg (SRID):    32119
## proj4string:    +proj=lcc +lat_1=36.16666666666666 +lat_2=34.33333333333334 +lat_0=33.75 +lon_0=-79 +x_0=609601.22 +y_0=0 +ellps=GRS80 +towgs84=0,0,0,0,0,0,0 +units=m +no_defs
## # A tibble: 100 x 15
##     AREA PERIMETER CNTY_ CNTY_ID NAME  FIPS  FIPSNO CRESS_ID BIR74 SID74
##    <dbl>     <dbl> <dbl>   <dbl> <chr> <chr>  <dbl>    <int> <dbl> <dbl>
##  1 0.114      1.44  1825    1825 Ashe  37009  37009        5  1091     1
##  2 0.061      1.23  1827    1827 Alle~ 37005  37005        3   487     0
##  3 0.143      1.63  1828    1828 Surry 37171  37171       86  3188     5
##  4 0.07       2.97  1831    1831 Curr~ 37053  37053       27   508     1
##  5 0.153      2.21  1832    1832 Nort~ 37131  37131       66  1421     9
##  6 0.097      1.67  1833    1833 Hert~ 37091  37091       46  1452     7
##  7 0.062      1.55  1834    1834 Camd~ 37029  37029       15   286     0
##  8 0.091      1.28  1835    1835 Gates 37073  37073       37   420     0
##  9 0.118      1.42  1836    1836 Warr~ 37185  37185       93   968     4
## 10 0.124      1.43  1837    1837 Stok~ 37169  37169       85  1612     1
## # ... with 90 more rows, and 5 more variables: NWBIR74 <dbl>, BIR79 <dbl>,
## #   SID79 <dbl>, NWBIR79 <dbl>, geom <MULTIPOLYGON [m]>
nc$State = "North Carolina"
nc
## Simple feature collection with 100 features and 15 fields
## geometry type:  MULTIPOLYGON
## dimension:      XY
## bbox:           xmin: 123829 ymin: 14744.69 xmax: 930521.8 ymax: 318259.9
## epsg (SRID):    32119
## proj4string:    +proj=lcc +lat_1=36.16666666666666 +lat_2=34.33333333333334 +lat_0=33.75 +lon_0=-79 +x_0=609601.22 +y_0=0 +ellps=GRS80 +towgs84=0,0,0,0,0,0,0 +units=m +no_defs
## # A tibble: 100 x 16
##     AREA PERIMETER CNTY_ CNTY_ID NAME  FIPS  FIPSNO CRESS_ID BIR74 SID74
##    <dbl>     <dbl> <dbl>   <dbl> <chr> <chr>  <dbl>    <int> <dbl> <dbl>
##  1 0.114      1.44  1825    1825 Ashe  37009  37009        5  1091     1
##  2 0.061      1.23  1827    1827 Alle~ 37005  37005        3   487     0
##  3 0.143      1.63  1828    1828 Surry 37171  37171       86  3188     5
##  4 0.07       2.97  1831    1831 Curr~ 37053  37053       27   508     1
##  5 0.153      2.21  1832    1832 Nort~ 37131  37131       66  1421     9
##  6 0.097      1.67  1833    1833 Hert~ 37091  37091       46  1452     7
##  7 0.062      1.55  1834    1834 Camd~ 37029  37029       15   286     0
##  8 0.091      1.28  1835    1835 Gates 37073  37073       37   420     0
##  9 0.118      1.42  1836    1836 Warr~ 37185  37185       93   968     4
## 10 0.124      1.43  1837    1837 Stok~ 37169  37169       85  1612     1
## # ... with 90 more rows, and 6 more variables: NWBIR74 <dbl>, BIR79 <dbl>,
## #   SID79 <dbl>, NWBIR79 <dbl>, geom <MULTIPOLYGON [m]>, State <chr>
nc1 <- nc %>% select(NAME,State) %>%
  st_set_agr(c(State= "constant"))

nc1 %>%
  st_agr()
##     NAME    State 
##     <NA> constant 
## Levels: constant aggregate identity

We should attach a constant agr because this value is valid for all the geometries

Exercise 2

Create a new sf object from the geometry obtained by st_union(nc), and assign "North Carolina" to the variable State. Which agr can you now assign to this attribute variable?

(state_geom=st_sf(st_union(nc)))
## Simple feature collection with 1 feature and 0 fields
## geometry type:  MULTIPOLYGON
## dimension:      XY
## bbox:           xmin: 123829 ymin: 14744.69 xmax: 930521.8 ymax: 318259.9
## epsg (SRID):    32119
## proj4string:    +proj=lcc +lat_1=36.16666666666666 +lat_2=34.33333333333334 +lat_0=33.75 +lon_0=-79 +x_0=609601.22 +y_0=0 +ellps=GRS80 +towgs84=0,0,0,0,0,0,0 +units=m +no_defs
##                     st_union.nc.
## 1 MULTIPOLYGON (((834893.7 95...
plot(state_geom)

state_geom$State="North Carolina"

st2<-state_geom%>%select(State)%>%
  st_set_agr(c(State="identity"))

st2%>%
  st_agr()
##    State 
## identity 
## Levels: constant aggregate identity
st2
## Simple feature collection with 1 feature and 1 field
## Attribute-geometry relationship: 0 constant, 0 aggregate, 1 identity
## geometry type:  MULTIPOLYGON
## dimension:      XY
## bbox:           xmin: 123829 ymin: 14744.69 xmax: 930521.8 ymax: 318259.9
## epsg (SRID):    32119
## proj4string:    +proj=lcc +lat_1=36.16666666666666 +lat_2=34.33333333333334 +lat_0=33.75 +lon_0=-79 +x_0=609601.22 +y_0=0 +ellps=GRS80 +towgs84=0,0,0,0,0,0,0 +units=m +no_defs
##            State                   st_union.nc.
## 1 North Carolina MULTIPOLYGON (((834893.7 95...

This variable State is now a identity variable because it represents the whole geometry

Exercise 3

Use st_area to add a variable with name area to nc. Compare the area and AREA variables in the nc dataset. What are the units of AREA? Are the two linearly related? If there are discrepancies, what could be the cause?

nc$area=st_area(nc)
nc%>%select(NAME, AREA,area)
## Simple feature collection with 100 features and 3 fields
## geometry type:  MULTIPOLYGON
## dimension:      XY
## bbox:           xmin: 123829 ymin: 14744.69 xmax: 930521.8 ymax: 318259.9
## epsg (SRID):    32119
## proj4string:    +proj=lcc +lat_1=36.16666666666666 +lat_2=34.33333333333334 +lat_0=33.75 +lon_0=-79 +x_0=609601.22 +y_0=0 +ellps=GRS80 +towgs84=0,0,0,0,0,0,0 +units=m +no_defs
## # A tibble: 100 x 4
##    NAME      AREA area                                                 geom
##    <chr>    <dbl> <S3: unit>                             <MULTIPOLYGON [m]>
##  1 Ashe     0.114 113759816~ (((387344.8 278387.2, 381334.2 282773.7, 3794~
##  2 Allegha~ 0.061  61120784~ (((408601.8 292424.5, 408565.1 293985.3, 4066~
##  3 Surry    0.143 142374317~ (((478717.1 277490.3, 476935.7 278867.1, 4715~
##  4 Curritu~ 0.07   69467210~ (((878193.9 289128.3, 877381.5 291117.1, 8759~
##  5 Northam~ 0.153 152099621~ (((769835.3 277796.1, 768364.4 274841.8, 7626~
##  6 Hertford 0.097  96785742~ (((812328 277876.4, 791158.2 277011.9, 789882~
##  7 Camden   0.062  61605187~ (((878193.9 289128.3, 883270.9 275314.1, 8811~
##  8 Gates    0.091  90381791~ (((828444.6 290095.5, 824767.5 287166, 820818~
##  9 Warren   0.118 117952748~ (((671747.1 278687.7, 674043.1 282237.4, 6704~
## 10 Stokes   0.124 123299616~ (((517436.7 277857.7, 479040.3 279098.1, 4811~
## # ... with 90 more rows

AREA units are in arc degrees corresponding to 100 000 meters in the metric system. They are related by AREA=area m^2 * 100000 m^2. Being more strict if L is an arc degree and r is the Earth radius in meters we have

\[L=\frac{2πr}{360}\]

Calculating L and by replace r

\[L=\frac{2π*6378000}{360}\]

we have that an arc degree is 113317.099 meters. The discrepancies are because of the projections system. AS the Earth shape behaves as a geoide, there are differences in the areas projected per CRS.

Exercise 4

Is the area variable intensive or extensive? Is its agr equal to constant, identity or aggregate?

nc%>%select(NAME, AREA,area)%>%
  st_agr()
## NAME AREA area 
## <NA> <NA> <NA> 
## Levels: constant aggregate identity

The area variable is extensive because of its property of change if it is splitted for example. It is mutable. It is agr is equal to aggregate according to its nature of being a “summary value over the geometry” [1]

Exercise 5

Find the name of the county that contains POINT(-78.34046 35.017)

#Create the point
look<-st_sfc(st_point(c(-78.34046,35.017)),crs=4326)
plot(st_geometry(st_transform(nc,4326)), col = terrain.colors(50,alpha=0.5),main="Counties and Point")
plot(look,col="red",pch = 19, add=TRUE)

#Query the intersection
container=(st_intersects(look,st_transform(nc,4326)))
## although coordinates are longitude/latitude, st_intersects assumes that they are planar
## although coordinates are longitude/latitude, st_intersects assumes that they are planar
states<-nc[unlist(container),]
states$NAME
## [1] "Sampson"
#Ploting the result
plot(st_geometry(states),col="orange",main="States that intersects POINT(-78.34046 35.017)")

Exercise 6

Find the names of all counties with boundaries that touch county Sampson.

sampson= nc[nc$NAME == "Sampson", ]
sampson_to=nc[unlist(st_touches(sampson,nc)),]
#List of names
sampson_to$NAME
## [1] "Johnston"   "Wayne"      "Harnett"    "Cumberland" "Duplin"    
## [6] "Bladen"     "Pender"
plot(st_geometry(sampson_to),col="orange",main="Counties with boundaries that touch county `Sampson`.")
plot(st_geometry(sampson), col="red",add=TRUE)

Exercise 7

List the names of all counties that are less than 50 km away from county Sampson.

sampson_close=nc[unlist(st_is_within_distance(sampson,nc,dist=50000)),]
sampson_close$NAME
##  [1] "Wake"        "Chatham"     "Wilson"      "Johnston"    "Greene"     
##  [6] "Lee"         "Wayne"       "Harnett"     "Moore"       "Lenoir"     
## [11] "Sampson"     "Cumberland"  "Jones"       "Hoke"        "Duplin"     
## [16] "Onslow"      "Robeson"     "Bladen"      "Pender"      "Columbus"   
## [21] "New Hanover" "Brunswick"

For the following exercises, we will work with naturalearth:

library(rnaturalearth)
e = ne_countries(returnclass = "sf")["pop_est"]

Exercise 8

Plot the variable pop_est. Which projection is used?

e
## Simple feature collection with 177 features and 1 field
## geometry type:  MULTIPOLYGON
## dimension:      XY
## bbox:           xmin: -180 ymin: -90 xmax: 180 ymax: 83.64513
## epsg (SRID):    4326
## proj4string:    +proj=longlat +datum=WGS84 +no_defs
## First 10 features:
##    pop_est                       geometry
## 0 28400000 MULTIPOLYGON (((61.21082 35...
## 1 12799293 MULTIPOLYGON (((16.32653 -5...
## 2  3639453 MULTIPOLYGON (((20.59025 41...
## 3  4798491 MULTIPOLYGON (((51.57952 24...
## 4 40913584 MULTIPOLYGON (((-65.5 -55.2...
## 5  2967004 MULTIPOLYGON (((43.58275 41...
## 6     3802 MULTIPOLYGON (((-59.57209 -...
## 7      140 MULTIPOLYGON (((68.935 -48....
## 8 21262641 MULTIPOLYGON (((145.398 -40...
## 9  8210281 MULTIPOLYGON (((16.97967 48...
plot(e)  

The projection used is WGS84

Exercise 9

Plot the variable converted to +proj=ortho, see here. What happened to Antarctica?

e2<-st_transform(e,st_crs("+proj=ortho"))
plot(e2)

Antarctica is no longer visible in the plot

Exercise 10

Plot the same data with the ortho projection, adding lat_0=-90, lat_0=90, lon_0=90, lon_0=-90 for four different views. Try to explain what happens.

plot(st_transform(e,st_crs("+proj=ortho +lat_0=-90")),col = terrain.colors(50,alpha=0.5),main="Natural Earth Dataset - South Pole view")

plot(st_transform(e,st_crs("+proj=ortho +lat_0=90")),col = terrain.colors(50,alpha=0.5),main="Natural Earth Dataset - North Pole view")

plot(st_transform(e,st_crs("+proj=ortho +long_0=90")),col = terrain.colors(50,alpha=0.5),main="Natural Earth Dataset - Side Pole view")

plot(st_transform(e,st_crs("+proj=ortho +long_0=-90")),col = terrain.colors(50,alpha=0.5),main="Natural Earth Dataset - Side Pole view")

Exercise 11

Create a polygon of the northern hemisphere. Use that to get the intersection with e, and plot that with lat_0=90.

#Get North Hemisphere
hnorth<-st_transform(e,st_crs("+proj=ortho +lat_0=90"))
plot(hnorth,col = terrain.colors(50,alpha=0.5),main="Natural Earth Dataset - North Pole view")

#Create a unique geometry
hnorthclean<-st_union(st_geometry(e[unlist((!is.na(st_dimension(hnorth)))),]))
plot(hnorthclean)

#Plot the new geometry from pole north view
hnorthcleanview<-st_transform(hnorthclean,st_crs("+proj=ortho +lat_0=90"))
plot(hnorthcleanview,col = terrain.colors(50,alpha=0.5),main="Natural Earth Dataset - North Pole view")

#Intersection with e
(result<-st_intersects(hnorthclean,e,sparse=FALSE))
## although coordinates are longitude/latitude, st_intersects assumes that they are planar
##      [,1] [,2] [,3] [,4] [,5] [,6]  [,7]  [,8]  [,9] [,10] [,11] [,12]
## [1,] TRUE TRUE TRUE TRUE TRUE TRUE FALSE FALSE FALSE  TRUE  TRUE  TRUE
##      [,13] [,14] [,15] [,16] [,17] [,18] [,19] [,20] [,21] [,22] [,23]
## [1,]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
##      [,24] [,25] [,26] [,27] [,28] [,29] [,30] [,31] [,32] [,33] [,34]
## [1,]  TRUE  TRUE FALSE  TRUE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE
##      [,35] [,36] [,37] [,38] [,39] [,40] [,41] [,42] [,43] [,44] [,45]
## [1,]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
##      [,46] [,47] [,48] [,49] [,50] [,51] [,52] [,53] [,54] [,55] [,56]
## [1,]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE FALSE  TRUE
##      [,57] [,58] [,59] [,60] [,61] [,62] [,63] [,64] [,65] [,66] [,67]
## [1,]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
##      [,68] [,69] [,70] [,71] [,72] [,73] [,74] [,75] [,76] [,77] [,78]
## [1,]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
##      [,79] [,80] [,81] [,82] [,83] [,84] [,85] [,86] [,87] [,88] [,89]
## [1,]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
##      [,90] [,91] [,92] [,93] [,94] [,95] [,96] [,97] [,98] [,99] [,100]
## [1,]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE   TRUE
##      [,101] [,102] [,103] [,104] [,105] [,106] [,107] [,108] [,109] [,110]
## [1,]   TRUE  FALSE   TRUE   TRUE   TRUE   TRUE   TRUE   TRUE  FALSE   TRUE
##      [,111] [,112] [,113] [,114] [,115] [,116] [,117] [,118] [,119] [,120]
## [1,]  FALSE   TRUE  FALSE  FALSE   TRUE   TRUE   TRUE   TRUE   TRUE   TRUE
##      [,121] [,122] [,123] [,124] [,125] [,126] [,127] [,128] [,129] [,130]
## [1,]  FALSE   TRUE   TRUE   TRUE   TRUE   TRUE  FALSE   TRUE   TRUE   TRUE
##      [,131] [,132] [,133] [,134] [,135] [,136] [,137] [,138] [,139] [,140]
## [1,]   TRUE   TRUE   TRUE   TRUE   TRUE   TRUE   TRUE   TRUE   TRUE   TRUE
##      [,141] [,142] [,143] [,144] [,145] [,146] [,147] [,148] [,149] [,150]
## [1,]   TRUE   TRUE  FALSE   TRUE   TRUE   TRUE   TRUE   TRUE   TRUE   TRUE
##      [,151] [,152] [,153] [,154] [,155] [,156] [,157] [,158] [,159] [,160]
## [1,]   TRUE   TRUE  FALSE   TRUE   TRUE   TRUE   TRUE   TRUE   TRUE  FALSE
##      [,161] [,162] [,163] [,164] [,165] [,166] [,167] [,168] [,169] [,170]
## [1,]   TRUE   TRUE   TRUE   TRUE   TRUE   TRUE   TRUE   TRUE   TRUE   TRUE
##      [,171] [,172] [,173] [,174] [,175] [,176] [,177]
## [1,]   TRUE   TRUE  FALSE   TRUE  FALSE   TRUE  FALSE
geom_in<-e[unlist(result),]

result2<-st_transform(geom_in,st_crs("+proj=ortho +lat_0=90"))
#Intersection plotting
plot(result2,col = terrain.colors(50,alpha=0.5),main="Natural Earth Dataset - North Pole view result")

Exercise 12

For the land mass between latitudes 45 and 55 and longitudes 0 and 10, estimate the total population as well as the average population.

landmass<-st_crop(e, c(xmin=0, xmax=10, ymin=45, ymax=55))
## although coordinates are longitude/latitude, st_intersection assumes that they are planar
plot(landmass,col = terrain.colors(10,alpha=0.5),main="Land Mass crop for Natural Earth Dataset")

(total_pop=sum(landmass$pop_est))
## [1] 315713130
(avg_pop=mean(landmass$pop_est))
## [1] 31571313