Introduction to Simple Features (sf)

library(ggplot2)
library(plotly)
library(raster)
library(RColorBrewer)
library(viridis)
library(sf)
library(kableExtra)
library(maps)
library(rgdal)

setwd("~/Desktop/University of Utah PhD /Course Work/Fall 2022 Semester/GEOG6000_Data Analysis/lab08")
path_to_data = system.file("shape/nc.shp", package = "sf")

north_carolina = st_read(path_to_data, quiet = TRUE)

#Subsetting North Carolina to include only the CNTY ID, NAME, AREA, and PERIMETER

north_carolina = north_carolina [ , c("CNTY_ID", "NAME", "AREA", "PERIMETER")]

north_carolina
## Simple feature collection with 100 features and 4 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -84.32385 ymin: 33.88199 xmax: -75.45698 ymax: 36.58965
## Geodetic CRS:  NAD27
## First 10 features:
##    CNTY_ID        NAME  AREA PERIMETER                       geometry
## 1     1825        Ashe 0.114     1.442 MULTIPOLYGON (((-81.47276 3...
## 2     1827   Alleghany 0.061     1.231 MULTIPOLYGON (((-81.23989 3...
## 3     1828       Surry 0.143     1.630 MULTIPOLYGON (((-80.45634 3...
## 4     1831   Currituck 0.070     2.968 MULTIPOLYGON (((-76.00897 3...
## 5     1832 Northampton 0.153     2.206 MULTIPOLYGON (((-77.21767 3...
## 6     1833    Hertford 0.097     1.670 MULTIPOLYGON (((-76.74506 3...
## 7     1834      Camden 0.062     1.547 MULTIPOLYGON (((-76.00897 3...
## 8     1835       Gates 0.091     1.284 MULTIPOLYGON (((-76.56251 3...
## 9     1836      Warren 0.118     1.421 MULTIPOLYGON (((-78.30876 3...
## 10    1837      Stokes 0.124     1.428 MULTIPOLYGON (((-80.02567 3...
##Creating two points in space using x,y coordinates:
point_one = st_point(c(0,3))

point_two = st_point(c(5,7))

##Creating a line between two points:

a_line = st_linestring(c(point_one, point_two))

plot(a_line)

print(point_one)
## POINT (0 3)
a_line
## LINESTRING (0 3, 5 7)
##To determine the geometry of your simple feature
st_geometry_type(a_line)
## [1] LINESTRING
## 18 Levels: GEOMETRY POINT LINESTRING POLYGON MULTIPOINT ... TRIANGLE
##The CRS is the most important element of a simple feature - it gives us our spatial coordinates!

##Visit spatialreference.org to determine the EPSG code for the datum you'd like to work with. 

##4326 is the code for WGS84

st_crs(4326)
## Coordinate Reference System:
##   User input: EPSG:4326 
##   wkt:
## GEOGCRS["WGS 84",
##     ENSEMBLE["World Geodetic System 1984 ensemble",
##         MEMBER["World Geodetic System 1984 (Transit)"],
##         MEMBER["World Geodetic System 1984 (G730)"],
##         MEMBER["World Geodetic System 1984 (G873)"],
##         MEMBER["World Geodetic System 1984 (G1150)"],
##         MEMBER["World Geodetic System 1984 (G1674)"],
##         MEMBER["World Geodetic System 1984 (G1762)"],
##         MEMBER["World Geodetic System 1984 (G2139)"],
##         ELLIPSOID["WGS 84",6378137,298.257223563,
##             LENGTHUNIT["metre",1]],
##         ENSEMBLEACCURACY[2.0]],
##     PRIMEM["Greenwich",0,
##         ANGLEUNIT["degree",0.0174532925199433]],
##     CS[ellipsoidal,2],
##         AXIS["geodetic latitude (Lat)",north,
##             ORDER[1],
##             ANGLEUNIT["degree",0.0174532925199433]],
##         AXIS["geodetic longitude (Lon)",east,
##             ORDER[2],
##             ANGLEUNIT["degree",0.0174532925199433]],
##     USAGE[
##         SCOPE["Horizontal component of 3D system."],
##         AREA["World."],
##         BBOX[-90,-180,90,180]],
##     ID["EPSG",4326]]
##Defines the spatial extent of the data - min/max of x and y

st_bbox(north_carolina)
##      xmin      ymin      xmax      ymax 
## -84.32385  33.88199 -75.45698  36.58965

Read and Write

##Reading in data

NY8 = st_read(dsn = "../datafiles/NY_data/NY8_utm18.shp",
              layer = "NY8_utm18",
              drivers = "ESRI Shapefile")
## options:        ESRI Shapefile 
## Reading layer `NY8_utm18' from data source 
##   `/Users/davidleydet/Desktop/University of Utah PhD /Course Work/Fall 2022 Semester/GEOG6000_Data Analysis/datafiles/NY_data/NY8_utm18.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 281 features and 17 fields
## Geometry type: POLYGON
## Dimension:     XY
## Bounding box:  xmin: 358241.9 ymin: 4649755 xmax: 480393.1 ymax: 4808545
## Projected CRS: WGS 84 / UTM zone 18N
##Because it is a simple shapefile with the same data source and layer we can use the following syntax
NY8 = st_read("../datafiles/NY_data/NY8_utm18.shp")
## Reading layer `NY8_utm18' from data source 
##   `/Users/davidleydet/Desktop/University of Utah PhD /Course Work/Fall 2022 Semester/GEOG6000_Data Analysis/datafiles/NY_data/NY8_utm18.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 281 features and 17 fields
## Geometry type: POLYGON
## Dimension:     XY
## Bounding box:  xmin: 358241.9 ymin: 4649755 xmax: 480393.1 ymax: 4808545
## Projected CRS: WGS 84 / UTM zone 18N

Converting non-spatial data to simple features

wna_climate = read.csv("../datafiles/WNAclimate.csv")

head(wna_climate)
##   WNASEQ     LONDD   LATDD ELEV totsnopt annp Jan_Tmp Jul_Tmp
## 1      1 -107.9333 50.8736  686  78.7869  326   -13.9    18.8
## 2      2 -105.2670 55.2670  369 145.3526  499   -21.3    16.8
## 3      3 -102.5086 41.7214 1163  42.6544  450    -4.2    23.3
## 4      4 -110.2606 44.2986 2362 255.1009  489   -10.9    14.1
## 5      5 -114.1500 59.2500  880 164.8924  412   -23.9    14.4
## 6      6 -120.6667 57.4500  900 141.9260  451   -17.5    13.8
##Set as simple feature, using coordinates from the columns LONDD and LATDD, using coordinate system WGS84 (code - 4326)
wna_climate = st_as_sf(wna_climate,
                       coords = c("LONDD", "LATDD"),
                       crs = 4326)

wna_climate
## Simple feature collection with 2012 features and 6 fields
## Geometry type: POINT
## Dimension:     XY
## Bounding box:  xmin: -138 ymin: 29.8333 xmax: -100 ymax: 60
## Geodetic CRS:  WGS 84
## First 10 features:
##    WNASEQ ELEV totsnopt annp Jan_Tmp Jul_Tmp                  geometry
## 1       1  686  78.7869  326   -13.9    18.8 POINT (-107.9333 50.8736)
## 2       2  369 145.3526  499   -21.3    16.8   POINT (-105.267 55.267)
## 3       3 1163  42.6544  450    -4.2    23.3 POINT (-102.5086 41.7214)
## 4       4 2362 255.1009  489   -10.9    14.1 POINT (-110.2606 44.2986)
## 5       5  880 164.8924  412   -23.9    14.4     POINT (-114.15 59.25)
## 6       6  900 141.9260  451   -17.5    13.8   POINT (-120.6667 57.45)
## 7       7 1100 183.4187  492   -18.8    13.2 POINT (-119.7167 56.7167)
## 8       8 1480 191.5657  606   -11.0    12.8    POINT (-114.6 50.7667)
## 9       9  651 144.9015  443   -22.4    15.3 POINT (-122.1667 59.5167)
## 10     10  725 151.0778  455   -23.5    15.4    POINT (-112.1 57.7667)
myplot = ggplot(wna_climate)

myplot +
  geom_sf(color = "mediumseagreen", alpha = 0.4)

##Save this to disk as a shape file
st_write(wna_climate, dsn = "../datafiles/wnaclim.shp")

Coordinate Reference System (CRS) Operations

Ensure all of your data is in the same reference system!!

st_crs(NY8)
## Coordinate Reference System:
##   User input: WGS 84 / UTM zone 18N 
##   wkt:
## PROJCRS["WGS 84 / UTM zone 18N",
##     BASEGEOGCRS["WGS 84",
##         DATUM["D_unknown",
##             ELLIPSOID["WGS84",6378137,298.257223563,
##                 LENGTHUNIT["metre",1,
##                     ID["EPSG",9001]]]],
##         PRIMEM["Greenwich",0,
##             ANGLEUNIT["Degree",0.0174532925199433]]],
##     CONVERSION["UTM zone 18N",
##         METHOD["Transverse Mercator",
##             ID["EPSG",9807]],
##         PARAMETER["Latitude of natural origin",0,
##             ANGLEUNIT["Degree",0.0174532925199433],
##             ID["EPSG",8801]],
##         PARAMETER["Longitude of natural origin",-75,
##             ANGLEUNIT["Degree",0.0174532925199433],
##             ID["EPSG",8802]],
##         PARAMETER["Scale factor at natural origin",0.9996,
##             SCALEUNIT["unity",1],
##             ID["EPSG",8805]],
##         PARAMETER["False easting",500000,
##             LENGTHUNIT["metre",1],
##             ID["EPSG",8806]],
##         PARAMETER["False northing",0,
##             LENGTHUNIT["metre",1],
##             ID["EPSG",8807]],
##         ID["EPSG",16018]],
##     CS[Cartesian,2],
##         AXIS["(E)",east,
##             ORDER[1],
##             LENGTHUNIT["metre",1,
##                 ID["EPSG",9001]]],
##         AXIS["(N)",north,
##             ORDER[2],
##             LENGTHUNIT["metre",1,
##                 ID["EPSG",9001]]]]
##Check the EPSG code for your data using $epsg syntax
st_crs(NY8)$epsg
## [1] NA
st_crs(wna_climate)$epsg
## [1] 4326
##Another way to get the CRS

format(st_crs(NY8))
## [1] "WGS 84 / UTM zone 18N"

Setting the CRS

##Note: this should only be used when the simple feature is missing a CRS and you know what it is. It is NOT for re-projecting the sf object to a new coordinate system.

st_crs(wna_climate) = 4326

Reprojecting CRS

##The st_transform() function allows you to project your sf object to a new CRS. This is particularly useful if you have multiple data sources with different original coordinate systems.

##As a reminder: when you read in spatial data, the first thing you should use is st_crs to check the CRS and st_transform to re-project if necessary.

NY8 = st_transform(NY8, crs = 4326)

##Check to see if it worked - the initial check before the transformation was "NA"
st_crs(NY8)$epsg
## [1] 4326

Attribute Operations

oregon_tann = read_sf("../datafiles/oregon/oregontann.shp")

class(oregon_tann)
## [1] "sf"         "tbl_df"     "tbl"        "data.frame"
oregon_tann %>%
  kbl() %>%
  kable_classic_2() %>%
  scroll_box(width = "500px", height = "200px")
elevation tann coords_x1 coords_x2 geometry
846 9.6 -120.717 44.917 POINT (-120.717 44.917)
96 12.5 -120.200 45.717 POINT (-120.2 45.717)
543 11.1 -122.717 42.217 POINT (-122.717 42.217)
2 10.3 -123.883 46.150 POINT (-123.883 46.15)
1027 7.6 -117.817 44.833 POINT (-117.817 44.833)
1050 8.4 -117.833 44.783 POINT (-117.833 44.783)
24 10.8 -124.383 43.117 POINT (-124.383 43.117)
1097 7.7 -121.317 44.067 POINT (-121.317 44.067)
999 9.5 -118.167 43.917 POINT (-118.167 43.917)
18 11.2 -121.950 45.633 POINT (-121.95 45.633)
24 11.8 -124.283 42.050 POINT (-124.283 42.05)
1262 8.1 -119.050 43.583 POINT (-119.05 43.583)
64 10.2 -124.567 42.833 POINT (-124.567 42.833)
1451 5.6 -121.783 43.233 POINT (-121.783 43.233)
238 10.3 -123.250 45.417 POINT (-123.25 45.417)
28 10.5 -123.283 46.100 POINT (-123.283 46.1)
6 10.9 -123.900 45.217 POINT (-123.9 45.217)
863 8.7 -120.183 45.233 POINT (-120.183 45.233)
69 11.0 -123.200 44.633 POINT (-123.2 44.633)
198 11.1 -123.067 43.783 POINT (-123.067 43.783)
253 10.9 -123.050 43.717 POINT (-123.05 43.717)
1974 3.2 -122.133 42.900 POINT (-122.133 42.9)
99 11.1 -123.317 44.933 POINT (-123.317 44.933)
250 10.7 -122.967 43.783 POINT (-122.967 43.783)
89 11.7 -123.317 43.667 POINT (-123.317 43.667)
405 9.6 -121.133 45.450 POINT (-121.133 45.45)
809 8.7 -117.917 45.567 POINT (-117.917 45.567)
35 12.4 -123.583 43.600 POINT (-123.583 43.6)
1155 6.3 -117.267 45.433 POINT (-117.267 45.433)
125 10.9 -122.317 45.267 POINT (-122.317 45.267)
109 11.4 -123.217 44.117 POINT (-123.217 44.117)
118 11.2 -123.300 44.117 POINT (-123.3 44.117)
55 11.4 -123.100 45.533 POINT (-123.1 45.533)
808 8.7 -120.217 45.000 POINT (-120.217 45)
15 11.6 -124.417 42.400 POINT (-124.417 42.4)
282 12.6 -123.317 42.433 POINT (-123.317 42.433)
1109 7.5 -120.933 44.517 POINT (-120.933 44.517)
814 7.9 -117.117 44.883 POINT (-117.117 44.883)
1712 6.5 -119.650 42.550 POINT (-119.65 42.55)
228 10.7 -122.150 45.450 POINT (-122.15 45.45)
594 10.3 -119.550 45.350 POINT (-119.55 45.35)
190 11.4 -119.283 45.817 POINT (-119.283 45.817)
152 10.2 -121.517 45.683 POINT (-121.517 45.683)
646 11.7 -117.267 44.350 POINT (-117.267 44.35)
825 8.9 -120.700 45.200 POINT (-120.7 45.2)
1249 8.8 -121.783 42.200 POINT (-121.783 42.2)
840 9.8 -118.083 45.317 POINT (-118.083 45.317)
1455 8.0 -120.350 42.183 POINT (-120.35 42.183)
206 11.4 -122.683 44.100 POINT (-122.683 44.1)
680 9.2 -121.133 44.633 POINT (-121.133 44.633)
683 10.5 -117.017 43.983 POINT (-117.017 43.983)
1252 8.1 -118.833 43.283 POINT (-118.833 43.283)
45 11.1 -123.183 45.233 POINT (-123.183 45.233)
400 12.0 -122.867 42.367 POINT (-122.867 42.367)
762 8.5 -121.183 44.583 POINT (-121.183 44.583)
472 10.9 -120.350 45.467 POINT (-120.35 45.467)
256 12.1 -118.433 45.967 POINT (-118.433 45.967)
569 9.3 -120.717 45.483 POINT (-120.717 45.483)
47 10.2 -124.050 44.633 POINT (-124.05 44.633)
2 11.2 -124.250 43.417 POINT (-124.25 43.417)
666 10.6 -117.000 43.867 POINT (-117 43.867)
1212 6.0 -120.433 44.400 POINT (-120.433 44.4)
654 11.0 -116.967 44.050 POINT (-116.967 44.05)
51 12.2 -122.600 45.350 POINT (-122.6 45.35)
46 10.3 -123.933 45.033 POINT (-123.933 45.033)
732 11.1 -117.250 43.650 POINT (-117.25 43.65)
1329 9.0 -120.533 42.700 POINT (-120.533 42.7)
452 11.4 -118.850 45.683 POINT (-118.85 45.683)
517 10.5 -118.817 45.483 POINT (-118.817 45.483)
6 11.7 -122.600 45.600 POINT (-122.6 45.6)
70 11.8 -124.067 42.883 POINT (-124.067 42.883)
866 8.4 -120.900 44.350 POINT (-120.9 44.35)
757 10.3 -122.517 42.733 POINT (-122.517 42.733)
917 8.7 -121.217 44.267 POINT (-121.217 44.267)
937 8.4 -121.150 44.267 POINT (-121.15 44.267)
18 11.2 -124.117 43.700 POINT (-124.117 43.7)
675 10.8 -117.167 44.767 POINT (-117.167 44.767)
202 12.2 -123.350 42.967 POINT (-123.35 42.967)
142 12.1 -123.350 43.200 POINT (-123.35 43.2)
1490 6.7 -120.883 42.333 POINT (-120.883 42.333)
60 11.1 -123.017 44.917 POINT (-123.017 44.917)
3 10.9 -123.917 45.983 POINT (-123.917 45.983)
1422 4.4 -118.967 44.150 POINT (-118.967 44.15)
1169 8.8 -123.367 42.617 POINT (-123.367 42.617)
411 9.5 -122.650 44.867 POINT (-122.65 44.867)
1425 7.6 -119.683 43.483 POINT (-119.683 43.483)
142 11.3 -122.767 44.800 POINT (-122.767 44.8)
341 9.8 -122.067 45.117 POINT (-122.067 45.117)
3 10.2 -123.867 45.450 POINT (-123.867 45.45)
843 8.8 -117.883 45.217 POINT (-117.883 45.217)
683 10.7 -117.250 43.983 POINT (-117.25 43.983)
1328 6.4 -121.683 43.683 POINT (-121.683 43.683)
##Relook to add boundaries in ggplot

ggplot(oregon_tann) +
  geom_sf(color = "red", alpha = 0.5) +
  xlab("Longitude") +
  ylab("Latitude")

Select Columns

##There are a couple of ways to do this. One way:

oregon_tann2 = subset(oregon_tann, select = c(elevation, tann))

##The geometry column is sticky and stays attached even when you subset
names(oregon_tann2)
## [1] "elevation" "tann"      "geometry"

Filter Rows

##Subsetting for rows/observation with an elevation over 1000 meters

oregon_tann3 = subset(oregon_tann, subset = elevation > 1000)

##state_bound = map('state', fill = TRUE, plot = FALSE) %>% st_as_sf()

ggplot() +
  geom_sf(data = oregon_tann, aes(color = "red", alpha = 0.9)) +
  geom_sf(data = oregon_tann3, color = "red") +
  #geom_sf(data = state_bound) +
  #coord_sf(xlim = c(-125,-116), ylim = c(42,47)) +
  xlab("Longitude") +
  ylab("Latitude") +
  theme(legend.position = "none")

##Adding a column
oregon_tann$rando = runif(n = nrow(oregon_tann))

names(oregon_tann)
## [1] "elevation" "tann"      "coords_x1" "coords_x2" "geometry"  "rando"
##Extracting a column

elevation = oregon_tann$elevation

elevation[1:10]
##  [1]  846   96  543    2 1027 1050   24 1097  999   18

Geometry

##Get the geometry
geometry = oregon_tann$geometry

geometry
## Geometry set for 92 features 
## Geometry type: POINT
## Dimension:     XY
## Bounding box:  xmin: -124.567 ymin: 42.05 xmax: -116.967 ymax: 46.15
## CRS:           NA
## First 5 geometries:
## POINT (-120.717 44.917)
## POINT (-120.2 45.717)
## POINT (-122.717 42.217)
## POINT (-123.883 46.15)
## POINT (-117.817 44.833)
##Drop Geometry

attributes = st_drop_geometry(oregon_tann)

head(attributes)
## # A tibble: 6 × 5
##   elevation  tann coords_x1 coords_x2   rando
##       <int> <dbl>     <dbl>     <dbl>   <dbl>
## 1       846   9.6     -121.      44.9 0.355  
## 2        96  12.5     -120.      45.7 0.231  
## 3       543  11.1     -123.      42.2 0.00502
## 4         2  10.3     -124.      46.2 0.891  
## 5      1027   7.6     -118.      44.8 0.579  
## 6      1050   8.4     -118.      44.8 0.182

Spatial Operations

Spatial Filter

##Spatial Filter

#Transform the coordinate system to UTM Zone 18N NAD83
north_carolina = st_transform(north_carolina, crs = 26918)

plot(st_geometry(north_carolina))

##Use st_sample to generate random points

##Set seed is used to ensure the random generation is consistent for each iteration
set.seed(1234)

random_pnts = st_sample(north_carolina, size = 500)

##Set as a simple feature (sf)
random_pnts = st_as_sf(random_pnts)

ggplot() +
  geom_sf(data = north_carolina) +
  geom_sf(data = random_pnts, color = "mediumorchid4", alpha = 0.5) +
  theme_light() +
  labs(title = "Random Sample - North Carolina",
       subtitle = "By David, Abby, and Ellie Leydet")

##Subsetting for the County Polygon of Pasquotank
pasq = subset(north_carolina, NAME == "Pasquotank")


##Run the st_filter function to filter the random points by pasq (the district)
filtered_pnts = st_filter(random_pnts, pasq)

myncplot = ggplot() +
  geom_sf(data = north_carolina) +
  theme_light() +
  labs(title = "Random Sample - North Carolina",
       subtitle = "By David, Abby, and Ellie Leydet")

myncplot + geom_sf(data = filtered_pnts, color = "red", alpha = 0.5) +
  labs(caption = "Pasquotank County")

Topological Relations

##Points outside Pasquotank

out.pasq = st_filter(random_pnts, pasq, .predicate = st_disjoint)

##Editing the plot using the pasq subset to be filled a different color --> see geom_sf(data = pasq, fill = "grey19") line

myncplot + geom_sf(data = out.pasq, color = "mediumorchid4", alpha = 0.5) +
  geom_sf(data = pasq, fill = "grey19") +
  labs(caption = "All points outside of Pasquotank County")

##Another useful predicate is st_is_within_distance, which requires that you pass an additional distance (dist) argument to the filter. The dist argument is in units specified by the CRS, in this case meters.

fiftykm.filter = st_filter(random_pnts, pasq,
                           .predicate = st_is_within_distance,
                           dist = 50000)

##Reversed the drawing order so the points appear over the different shaded county

myncplot + geom_sf(data = pasq, fill = "snow1") + 
  geom_sf(data = fiftykm.filter, color = "mediumorchid4", alpha = 0.5) +
  labs(caption = "All points within 50 kilometers of Pasquotank County")

Geometric Operations

##Centroid

center.of.pasq = st_centroid(pasq)

ggplot() +
  geom_sf(data = pasq) +
  geom_sf(data = center.of.pasq, color = "red", pch = 2) +
  theme_light()

##Buffer

pasq.buffer = st_buffer(pasq, dist = 5000)

ggplot() +
  geom_sf(data = pasq.buffer, alpha = 0.8) +
  geom_sf(data = pasq, color = "grey2", fill = "snow1") +
  labs(title = "Pasquotank 5 kilometer Buffer") +
  theme_light()

##Union - merge polygons together into one large feature

nc.boundary = st_union(north_carolina)

ggplot(data = nc.boundary) +geom_sf()

##To cast a geometry is to change it from one geometry type to another. For example, to convert the boundary of North Carolina to points (the vertices of the polygon)

nc.points = st_cast(nc.boundary, "POINT")

ggplot() + geom_sf(data = nc.points, color = "mediumseagreen", alpha = 0.5 ) + theme_light()

nc.lines = st_cast(nc.boundary, "MULTILINESTRING")

ggplot() + geom_sf(data = nc.lines)

nc.lines2 = st_cast(nc.lines, "LINESTRING")

ggplot() + geom_sf(data = nc.lines2)

##If you can’t tell, it was broken into six lines: one for the mainland, and the other five for the ecological (and cultural) disaster known as the Outer Banks. How do we color by polygon?

Plotting

##ggplot

NY8v2 <- st_read("../datafiles/NY_data/NY8_utm18.shp")
## Reading layer `NY8_utm18' from data source 
##   `/Users/davidleydet/Desktop/University of Utah PhD /Course Work/Fall 2022 Semester/GEOG6000_Data Analysis/datafiles/NY_data/NY8_utm18.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 281 features and 17 fields
## Geometry type: POLYGON
## Dimension:     XY
## Bounding box:  xmin: 358241.9 ymin: 4649755 xmax: 480393.1 ymax: 4808545
## Projected CRS: WGS 84 / UTM zone 18N
binghamton = subset(NY8v2, AREANAME == "Binghamton city")

bing.plot = ggplot() +
  theme_bw()

bing.plot + geom_sf(data = binghamton)

Multiple Geometries

## Create a new sf object that has Binghampton and its neighboring polygons
bingies_neighbors = st_filter(NY8v2, binghamton)

## Create a random sample
bing.ran.points = st_sample(bingies_neighbors, size = 25)

## Set the sample as a simple feature (sf)
bing.ran.points = st_as_sf(bing.ran.points)

## REMINDER - THE ORDER THAT THE GEOM_SF ARE WRITTEN IS THE ORDER THEY ARE DRAWN**

bing.plot + geom_sf(data = bingies_neighbors) +
  geom_sf(data = binghamton, fill = "deepskyblue") +
  geom_sf(data = bing.ran.points, color = "brown1", alpha = 0.5, size = 2) 

Plotting Attributes

##Attributes names

names(binghamton)
##  [1] "AREANAME"   "AREAKEY"    "X"          "Y"          "POP8"      
##  [6] "TRACTCAS"   "PROPCAS"    "PCTOWNHOME" "PCTAGE65P"  "Z"         
## [11] "AVGIDIST"   "PEXPOSURE"  "Cases"      "Xm"         "Ym"        
## [16] "Xshift"     "Yshift"     "geometry"
## Plotting a thematic map by population
bing.plot + geom_sf(data = binghamton, aes(fill = POP8))

## Plotting a thematic map by exposure
bing.plot + geom_sf(data = binghamton, aes(fill = PEXPOSURE)) +
  scale_fill_viridis(option = "cividis")

Coordinates

## 
bing.plot + geom_sf(data = binghamton, aes(fill = PEXPOSURE)) +
  scale_fill_viridis(option = "cividis") +
  coord_sf(datum = 26918)

## Make sure the CRS is set appropriately!!!!!
## st_is_valid() check to see if the sf is valid
## st_make_valid() may fix the sf if there are issues

ggplot() + 
  geom_sf(data = binghamton, aes(fill = PEXPOSURE)) +
  scale_fill_viridis(option = "cividis") +
   coord_sf(crs = 4326, xlim = c(-75.91, -75.88), ylim = c(42.10, 42.13)) +
   theme_bw() +
   theme(axis.text.x = element_text(angle = 90))

Rasters

##Create an empty raster frame
r = raster(nrow = 10, ncol = 10)

## Fill it with random values from 0 to 1

r[] = runif(n=100, min = 0, max = 1)

r
## class      : RasterLayer 
## dimensions : 10, 10, 100  (nrow, ncol, ncell)
## resolution : 36, 18  (x, y)
## extent     : -180, 180, -90, 90  (xmin, xmax, ymin, ymax)
## crs        : +proj=longlat +datum=WGS84 +no_defs 
## source     : memory
## names      : layer 
## values     : 0.01260905, 0.9844737  (min, max)
plot(r)

Read and Write Rasters

air_temp = raster("../datafiles/air.mon.ltm.nc")
## Loading required namespace: ncdf4
## Warning in .varName(nc, varname, warn = warn): varname used is: air
## If that is not correct, you can set it to one of: air, valid_yr_count
## Note that we have only **read the first layer (January)**. R will tell you that it loaded the variable called air. To avoid this message you can specify this directly, which is important for files containing multiple variables:##
air_temp = raster("../datafiles/air.mon.ltm.nc", varname = "air")

air_temp
## class      : RasterLayer 
## band       : 1  (of  12  bands)
## dimensions : 73, 144, 10512  (nrow, ncol, ncell)
## resolution : 2.5, 2.5  (x, y)
## extent     : -1.25, 358.75, -91.25, 91.25  (xmin, xmax, ymin, ymax)
## crs        : NA 
## source     : air.mon.ltm.nc 
## names      : Monthly.Long.Term.Mean.Air.Temperature.at.sigma.level.0.995 
## z-value    : 0000-12-30 
## zvar       : air
writeRaster(air_temp, filename = "../datafiles/air_temp.tif")

Raster CRS

## Set CRS
## Check out the PROJ4 for syntax. Go to the website!!
## "+init=epsg:4326" can be used for rasters so you don't have to write out the full PROJ4 syntax!

crs(air_temp) = "+proj=longlat +ellps=WGS84 +towgs84=0,0,0 +no_defs "

## Check CRS
crs(air_temp)
## Coordinate Reference System:
## Deprecated Proj.4 representation: +proj=longlat +ellps=WGS84 +no_defs 
## WKT2 2019 representation:
## GEOGCRS["unknown",
##     DATUM["Unknown based on WGS84 ellipsoid",
##         ELLIPSOID["WGS 84",6378137,298.257223563,
##             LENGTHUNIT["metre",1],
##             ID["EPSG",7030]]],
##     PRIMEM["Greenwich",0,
##         ANGLEUNIT["degree",0.0174532925199433],
##         ID["EPSG",8901]],
##     CS[ellipsoidal,2],
##         AXIS["longitude",east,
##             ORDER[1],
##             ANGLEUNIT["degree",0.0174532925199433,
##                 ID["EPSG",9122]]],
##         AXIS["latitude",north,
##             ORDER[2],
##             ANGLEUNIT["degree",0.0174532925199433,
##                 ID["EPSG",9122]]]]
weird_crs = crs("+proj=tmerc +lat_0=0 +lon_0=15 +k=0.999923 +x_0=5500000 +y_0=0 +ellps=GRS80 +units=m +no_defs")

air_temp_weird_crs = projectRaster(air_temp, crs = weird_crs)
## Warning in rgdal::rawTransform(projfrom, projto, nrow(xy), xy[, 1], xy[, : 100
## projected point(s) not finite
crs(air_temp_weird_crs)
## Coordinate Reference System:
## Deprecated Proj.4 representation:
##  +proj=tmerc +lat_0=0 +lon_0=15 +k=0.999923 +x_0=5500000 +y_0=0
## +ellps=GRS80 +units=m +no_defs 
## WKT2 2019 representation:
## PROJCRS["unknown",
##     BASEGEOGCRS["unknown",
##         DATUM["Unknown based on GRS80 ellipsoid",
##             ELLIPSOID["GRS 1980",6378137,298.257222101,
##                 LENGTHUNIT["metre",1],
##                 ID["EPSG",7019]]],
##         PRIMEM["Greenwich",0,
##             ANGLEUNIT["degree",0.0174532925199433],
##             ID["EPSG",8901]]],
##     CONVERSION["unknown",
##         METHOD["Transverse Mercator",
##             ID["EPSG",9807]],
##         PARAMETER["Latitude of natural origin",0,
##             ANGLEUNIT["degree",0.0174532925199433],
##             ID["EPSG",8801]],
##         PARAMETER["Longitude of natural origin",15,
##             ANGLEUNIT["degree",0.0174532925199433],
##             ID["EPSG",8802]],
##         PARAMETER["Scale factor at natural origin",0.999923,
##             SCALEUNIT["unity",1],
##             ID["EPSG",8805]],
##         PARAMETER["False easting",5500000,
##             LENGTHUNIT["metre",1],
##             ID["EPSG",8806]],
##         PARAMETER["False northing",0,
##             LENGTHUNIT["metre",1],
##             ID["EPSG",8807]]],
##     CS[Cartesian,2],
##         AXIS["(E)",east,
##             ORDER[1],
##             LENGTHUNIT["metre",1,
##                 ID["EPSG",9001]]],
##         AXIS["(N)",north,
##             ORDER[2],
##             LENGTHUNIT["metre",1,
##                 ID["EPSG",9001]]]]
## Plotting

plot(air_temp, main = "NCEP NCAR January LTM Tair")

## Notice how the the the x axis starts at 0 and ends at 360
air_temp = rotate(air_temp)

##Create a color palette with nine colors using color brewer

my.pal = brewer.pal(n = 9, name = "OrRd")

##Add country geometries

countries = st_read("../datafiles/ne_50m_admin_0_countries/ne_50m_admin_0_countries.shp", 
                    quiet = TRUE)

## Run these lines together

plot(air_temp,
     main = "NCEP NCAR January LTM Tair",
     col = my.pal)

plot(st_geometry(countries), add = TRUE)

Summary Statistics

## The function cellStats() can be used to calculate most summary statistics for a raster layer. So to get the mean global temperature (and standard deviation):

cellStats(air_temp, mean)
## [1] 3.546352
cellStats(air_temp, sd)
## [1] 19.63948

Subset Rasters

##If we want to use only a subset of the original raster layer, the function crop() will extract only the cells in a given region. This can be defined using another raster object or Spatial* object, or by defining an extent object:


# Extent Method

canada.ext = extent(c(xmin = -143,
                      xmax = -50,
                      ymin = 41,
                      ymax = 84))


canada_air_temp1 = crop(air_temp, canada.ext)


# Spatial Method
canada = subset(countries, NAME == "Canada")

canada_air_temp2 = crop(air_temp, canada)

##Plot both for comparison

par(mfrow = c(1, 2))

plot(canada_air_temp1, 
     main = "NCEP NCAR January LTM Tair",
     sub = "Extent Method",
     col = my.pal)
plot(st_geometry(canada), add = TRUE)


plot(canada_air_temp2, 
     main = "NCEP NCAR January LTM Tair",
     sub = "Spatial Method",
     col = my.pal)
plot(st_geometry(canada), add = TRUE)

## Note that crop subsets the original raster to the extent of Canada’s borders, rather than to the borders themselves. This is because rasters are always rectangular. You can ‘hide’ the values of raster cells outside of a polygon by using the mask function. The raster has to be rectangular, so this does not remove the cells outside the polygon. Rather, it sets their value to NA.

canada_air_temp3 = mask(canada_air_temp2, mask = canada)

plot(canada_air_temp3, 
     main = "NCEP NCAR January LTM Tair", 
     sub = "Masked Version",
     col = my.pal)
plot(st_geometry(canada), add = TRUE)

Extract Data

## Values can be extracted from individual locations (or sets of locations) using extract(). This can take a set of coordinates in matrix form, or use a Spatial* object. To get the January temperature of Salt Lake City:

##Use long/lat for SLC. Gives value of the cell

extract(air_temp, cbind(-111.9, 40.76))
## [1] -2.919719
##By default this gives you the value of the cell in which the point falls. The value can equally be estimated by bilinear interpolation from the four closest cells with method='bilinear':

extract(air_temp, cbind(-111.9, 40.76), method = "bilinear")
## [1] -4.324555
##Using the wna climate samples locations to subset our data

##Note: character. 'simple' or 'bilinear'. If 'simple' values for the cell a point falls in are returned. If 'bilinear' the returned values are interpolated from the values of the four nearest raster cells.

##  logical. If df=TRUE, results will be returned as a data.frame. The first column is a sequential ID, the other column(s) are the extracted values

wna.air.temp.df = extract(air_temp,
                          wna_climate,
                          method = "bilinear",
                          df = TRUE)

head(wna.air.temp.df)
##   ID Monthly.Long.Term.Mean.Air.Temperature.at.sigma.level.0.995
## 1  1                                                   -9.782378
## 2  2                                                  -18.523962
## 3  3                                                   -2.664617
## 4  4                                                   -7.552574
## 5  5                                                  -18.926520
## 6  6                                                  -12.687242
## Extracting pixels by polygons

##Create the China Polygon
china = subset(countries, NAME == "China")

china.air.temp.df = extract(air_temp,
                            china,
                            df = TRUE)

head(china.air.temp.df)
##   ID Monthly.Long.Term.Mean.Air.Temperature.at.sigma.level.0.995
## 1  1                                                   -23.51869
## 2  1                                                   -22.07151
## 3  1                                                   -23.53544
## 4  1                                                   -24.09268
## 5  1                                                   -21.95177
## 6  1                                                   -15.34994
##Extracts the temperature data for China
##When this function is used with a set of polygons, the output is in a list, but we can retrieve whatever we want from that list.

##Help on this?

two.countries = rbind(china, canada)

china.tjan = extract(air_temp, two.countries)[[1]]

hist(china.tjan)

##The extract() function also takes an argument fun. This allows you to calculate a summary statistic for each set of pixels that is extracted (i.e. one per polygon). Here, we’ll use this with countries to get an average value of January temperature. We add this back as a new column in the countries object, and then plot it:

countries$Jan_Tmp = extract(air_temp, countries, fun = mean)[,1]

country.temp = ggplot(countries) +
  geom_sf(aes(fill = Jan_Tmp)) +
  labs(fill = "Temperature",
       title = "Mean January Temperature")

ggplotly(country.temp)

Raster Stacks

## A useful extension to the basic raster functions is the use of stacks. These are a stack of raster layers which represent different variables, but have the same spatial extent and resolution. 

## Read in the stack
air.temp.stk = stack("../datafiles/air.mon.ltm.nc", varname = "air")

## Rotate the long to -18- to 180
air.temp.stk = rotate(air.temp.stk)

## Change my extent to
myext = extent(c(-130, -60, 25, 50))

## Crop the stack to the extent

air.temp.stk = crop(air.temp.stk, myext)
## Subset the first three stacks. Double check what [[]] means. I assume it is the first three stacks as opposed to picking the data out in a matrix

air.temp.substk = air.temp.stk[[1:3]]

air.temp.substk
## class      : RasterBrick 
## dimensions : 10, 28, 280, 3  (nrow, ncol, ncell, nlayers)
## resolution : 2.5, 2.5  (x, y)
## extent     : -128.75, -58.75, 26.25, 51.25  (xmin, xmax, ymin, ymax)
## crs        : NA 
## source     : memory
## names      : X0000.12.30, X0001.01.30, X0001.02.27 
## min values :   -18.18637,   -15.90084,   -10.40882 
## max values :    21.45181,    21.01648,    21.04738 
## time       : 0000-12-30, 0001-01-30, 0001-02-27
##Paste names to each stack. month.abb is built in to R
##Potential mismatch between the initial names and the paste? For example X0000.12.30 is December, but now reads as Jan?

names(air.temp.stk) = paste("TAS", month.abb)

names(air.temp.stk)
##  [1] "TAS.Jan" "TAS.Feb" "TAS.Mar" "TAS.Apr" "TAS.May" "TAS.Jun" "TAS.Jul"
##  [8] "TAS.Aug" "TAS.Sep" "TAS.Oct" "TAS.Nov" "TAS.Dec"
#Raster stack pull by name

## Method 1

air.temp.jan = air.temp.stk$TAS.Jan

## Method 2

air.temp.jan2 = air.temp.stk[["TAS.Jan"]]

air.temp.jan
## class      : RasterLayer 
## dimensions : 10, 28, 280  (nrow, ncol, ncell)
## resolution : 2.5, 2.5  (x, y)
## extent     : -128.75, -58.75, 26.25, 51.25  (xmin, xmax, ymin, ymax)
## crs        : NA 
## source     : memory
## names      : TAS.Jan 
## values     : -18.18637, 21.45181  (min, max)
## time       : 0000-12-30

Plotting Stacks

## Plotting stacks. zlim syntax sets the same scale for each stack

plot(air.temp.stk,
     col = my.pal,
     zlim = c(-35, 35))

## Turn countries into spatial data to add to plot
## addfun Function to add additional items such as points or polygons to the plot (map). Typically containing statements like "points(xy); plot(polygons, add=TRUE)". This is particularly useful to add something to each map when plotting a multi-layer Raster* object.

addBorder = function() {plot(as_Spatial(countries), add = TRUE)}

plot(air.temp.stk,
     col = my.pal,
     zlim = c(-35, 35),
     addfun = addBorder)

cellStats Function

#The cellStats() function now returns the mean (or other statistic) for all layers, allowing a quick look at the seasonal cycle of average air temperature.

tavg = cellStats(air.temp.stk, mean)

plot(1:12, tavg,
     type = "l",
     col = "red",
     xlab = "Month",
     ylab = "Average Temperature (Celsius)")

## Extract a single location
## Colorado Springs 38.8339° N, 104.8214° W

cos.tavg = extract(air.temp.stk, cbind(-104.82, 38.83), method = "bilinear")

plot(1:12, cos.tavg,
     type = "l",
     col = "red",
     xlab = "Month",
     ylab = "Average Temperature (Celsius)",
     main = "Average Colorado Springs Temperature")