Load in data

# Let's load in from file the USGS watershed boundaries at Hydrologic Unit Code (HUC) level 4
# Remember drop the provided data in you working directory and/or change the file path to find where the data lives
wb4 <- st_read("C:/Users/qq893/Desktop/data/WBDHU4.shp")
## Reading layer `WBDHU4' from data source `C:\Users\qq893\Desktop\data\WBDHU4.shp' using driver `ESRI Shapefile'
## Simple feature collection with 7 features and 15 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -80.54099 ymin: 36.66942 xmax: -71.78971 ymax: 44.15305
## Geodetic CRS:  NAD83
# load HUC8
wb8 <- st_read("C:/Users/qq893/Desktop/data/WBDHU8.shp")
## Reading layer `WBDHU8' from data source `C:\Users\qq893\Desktop\data\WBDHU8.shp' using driver `ESRI Shapefile'
## Simple feature collection with 88 features and 15 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -80.54099 ymin: 36.66942 xmax: -71.78971 ymax: 44.15305
## Geodetic CRS:  NAD83
# load National Hydrography Dataset river flowlines (not I already subsetted this to just the Susquehanna Basin). We have to drop some data from this file to clean it up a bit using st_zm()
nhd <- st_read("C:/Users/qq893/Desktop/data/NHD_Flowline_Susq.shp") %>%
  st_zm()
## Reading layer `NHD_Flowline_Susq' from data source 
##   `C:\Users\qq893\Desktop\data\NHD_Flowline_Susq.shp' using driver `ESRI Shapefile'
## Simple feature collection with 30671 features and 131 fields
## Geometry type: LINESTRING
## Dimension:     XYZM
## Bounding box:  xmin: -78.90935 ymin: 39.54049 xmax: -74.5981 ymax: 42.97751
## z_range:       zmin: 0 zmax: 0
## m_range:       mmin: 0 mmax: 100
## Geodetic CRS:  NAD83
# load in the locations of USGS riverflow gages
gages <- st_read("C:/Users/qq893/Desktop/data/gages02.shp")
## Reading layer `gages02' from data source `C:\Users\qq893\Desktop\data\gages02.shp' using driver `ESRI Shapefile'
## Simple feature collection with 1896 features and 3 fields
## Geometry type: POINT
## Dimension:     XY
## Bounding box:  xmin: 1384783 ymin: 1720951 xmax: 1928242 ymax: 2675893
## Projected CRS: NAD_1983_Albers
### Let's explore what a shapefile, or sf object in R, looks like.
### Run each line below then inspect the output below this R chunk (or can copy and paste each line into console to print the output in the console)

# Notice the data class is both a dataframe and an sf object. There is a geometry column which holds the coordinates, projection etc, and its a multipolygon
str(wb4)  
## Classes 'sf' and 'data.frame':   7 obs. of  16 variables:
##  $ OBJECTID  : num  1 2 3 4 5 6 7
##  $ TNMID     : chr  "{49F84434-1864-4AE7-9E75-61DEEA2CD019}" "{7ED73B78-B141-47CA-93EE-18EF6D95C252}" "{F646200B-E311-4C3B-B730-7A7A09624D88}" "{6F1A4B05-D4F4-4CCC-B2F6-3145111E2FA8}" ...
##  $ MetaSource: chr  NA NA NA NA ...
##  $ SourceData: chr  NA NA NA NA ...
##  $ SourceOrig: chr  NA NA NA NA ...
##  $ SourceFeat: chr  NA NA NA NA ...
##  $ LoadDate  : Date, format: "2012-06-11" "2016-07-29" ...
##  $ AreaSqKm  : num  15207 38018 32802 18979 71224 ...
##  $ AreaAcres : num  3757807 9394497 8105582 4689834 17599782 ...
##  $ GNIS_ID   : num  0 0 0 0 0 0 0
##  $ Name      : chr  "Upper Chesapeake" "Potomac" "Upper Hudson" "Lower Hudson-Long Island" ...
##  $ States    : chr  "DE,MD,PA" "DC,MD,PA,VA,WV" "MA,NJ,NY,VT" "CT,NJ,NY,RI" ...
##  $ HUC4      : chr  "0206" "0207" "0202" "0203" ...
##  $ Shape_Leng: num  8.37 17.81 18.85 15.69 27.5 ...
##  $ Shape_Area: num  1.58 3.96 3.61 2.03 7.65 ...
##  $ geometry  :sfc_MULTIPOLYGON of length 7; first list element: List of 1
##   ..$ :List of 1
##   .. ..$ : num [1:13714, 1:2] -75.9 -75.9 -75.9 -75.9 -75.9 ...
##   ..- attr(*, "class")= chr [1:3] "XY" "MULTIPOLYGON" "sfg"
##  - attr(*, "sf_column")= chr "geometry"
##  - attr(*, "agr")= Factor w/ 3 levels "constant","aggregate",..: NA NA NA NA NA NA NA NA NA NA ...
##   ..- attr(*, "names")= chr [1:15] "OBJECTID" "TNMID" "MetaSource" "SourceData" ...
# notice you can index the geometry column and make a plot using base R or ggplot2
plot(wb4$geometry)

# notice if you do not specify the geometry, R will make plots for each attribute
plot(wb4)
## Warning: plotting the first 9 out of 15 attributes; use max.plot = 15 to plot
## all

# We can also use ggplot with the geom_sf()
ggplot() +
  geom_sf(data= wb4) +
  theme_bw()

# if we want to color the polygons or whatever spatial object type, by some attribute we can use the same ggplot styles
ggplot() +
  geom_sf(data= wb4, aes(fill=AreaSqKm)) +
  theme_bw()

# we can filter and manipulate sf objects in R just like any other dataframe. Each row typical represents one geometry (1 point, 1 line with 2 ends, 1 polygon) in a multipoint/line/polygon data type
wb4 %>%
  filter(AreaSqKm > 60000)
## Simple feature collection with 1 feature and 15 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -78.91438 ymin: 39.53421 xmax: -74.58096 ymax: 42.98733
## Geodetic CRS:  NAD83
##   OBJECTID                                  TNMID MetaSource SourceData
## 1        5 {0067D865-23F8-458D-A526-A8691E422229}       <NA>       <NA>
##   SourceOrig SourceFeat   LoadDate AreaSqKm AreaAcres GNIS_ID        Name
## 1       <NA>       <NA> 2018-02-01 71223.85  17599782       0 Susquehanna
##     States HUC4 Shape_Leng Shape_Area                       geometry
## 1 MD,NY,PA 0205   27.50398   7.653421 MULTIPOLYGON (((-75.19653 4...
# Check the CRS projection of each file and make sure they are all the same for future analysis
st_crs(wb4)
## Coordinate Reference System:
##   User input: NAD83 
##   wkt:
## GEOGCRS["NAD83",
##     DATUM["North American Datum 1983",
##         ELLIPSOID["GRS 1980",6378137,298.257222101,
##             LENGTHUNIT["metre",1]]],
##     PRIMEM["Greenwich",0,
##         ANGLEUNIT["degree",0.0174532925199433]],
##     CS[ellipsoidal,2],
##         AXIS["latitude",north,
##             ORDER[1],
##             ANGLEUNIT["degree",0.0174532925199433]],
##         AXIS["longitude",east,
##             ORDER[2],
##             ANGLEUNIT["degree",0.0174532925199433]],
##     ID["EPSG",4269]]
st_crs(wb8)
## Coordinate Reference System:
##   User input: NAD83 
##   wkt:
## GEOGCRS["NAD83",
##     DATUM["North American Datum 1983",
##         ELLIPSOID["GRS 1980",6378137,298.257222101,
##             LENGTHUNIT["metre",1]]],
##     PRIMEM["Greenwich",0,
##         ANGLEUNIT["degree",0.0174532925199433]],
##     CS[ellipsoidal,2],
##         AXIS["latitude",north,
##             ORDER[1],
##             ANGLEUNIT["degree",0.0174532925199433]],
##         AXIS["longitude",east,
##             ORDER[2],
##             ANGLEUNIT["degree",0.0174532925199433]],
##     ID["EPSG",4269]]
st_crs(nhd)
## Coordinate Reference System:
##   User input: NAD83 
##   wkt:
## GEOGCRS["NAD83",
##     DATUM["North American Datum 1983",
##         ELLIPSOID["GRS 1980",6378137,298.257222101,
##             LENGTHUNIT["metre",1]]],
##     PRIMEM["Greenwich",0,
##         ANGLEUNIT["degree",0.0174532925199433]],
##     CS[ellipsoidal,2],
##         AXIS["latitude",north,
##             ORDER[1],
##             ANGLEUNIT["degree",0.0174532925199433]],
##         AXIS["longitude",east,
##             ORDER[2],
##             ANGLEUNIT["degree",0.0174532925199433]],
##     ID["EPSG",4269]]
st_crs(gages) 
## Coordinate Reference System:
##   User input: NAD_1983_Albers 
##   wkt:
## PROJCRS["NAD_1983_Albers",
##     BASEGEOGCRS["NAD83",
##         DATUM["North American Datum 1983",
##             ELLIPSOID["GRS 1980",6378137,298.257222101,
##                 LENGTHUNIT["metre",1]],
##             ID["EPSG",6269]],
##         PRIMEM["Greenwich",0,
##             ANGLEUNIT["Degree",0.0174532925199433]]],
##     CONVERSION["unnamed",
##         METHOD["Albers Equal Area",
##             ID["EPSG",9822]],
##         PARAMETER["Latitude of false origin",23,
##             ANGLEUNIT["Degree",0.0174532925199433],
##             ID["EPSG",8821]],
##         PARAMETER["Longitude of false origin",-96,
##             ANGLEUNIT["Degree",0.0174532925199433],
##             ID["EPSG",8822]],
##         PARAMETER["Latitude of 1st standard parallel",29.5,
##             ANGLEUNIT["Degree",0.0174532925199433],
##             ID["EPSG",8823]],
##         PARAMETER["Latitude of 2nd standard parallel",45.5,
##             ANGLEUNIT["Degree",0.0174532925199433],
##             ID["EPSG",8824]],
##         PARAMETER["Easting at false origin",0,
##             LENGTHUNIT["metre",1],
##             ID["EPSG",8826]],
##         PARAMETER["Northing at false origin",0,
##             LENGTHUNIT["metre",1],
##             ID["EPSG",8827]]],
##     CS[Cartesian,2],
##         AXIS["(E)",east,
##             ORDER[1],
##             LENGTHUNIT["metre",1,
##                 ID["EPSG",9001]]],
##         AXIS["(N)",north,
##             ORDER[2],
##             LENGTHUNIT["metre",1,
##                 ID["EPSG",9001]]]]
# One of these does not have the same projection as all the other ones. Which file is it? 
# FINISH the code below to reproject the file using the same CRS as the others with the st_transform() function.

gages <- gages %>%
  st_transform(4269)
  
st_crs(gages) 
## Coordinate Reference System:
##   User input: EPSG:4269 
##   wkt:
## GEOGCRS["NAD83",
##     DATUM["North American Datum 1983",
##         ELLIPSOID["GRS 1980",6378137,298.257222101,
##             LENGTHUNIT["metre",1]]],
##     PRIMEM["Greenwich",0,
##         ANGLEUNIT["degree",0.0174532925199433]],
##     CS[ellipsoidal,2],
##         AXIS["geodetic latitude (Lat)",north,
##             ORDER[1],
##             ANGLEUNIT["degree",0.0174532925199433]],
##         AXIS["geodetic longitude (Lon)",east,
##             ORDER[2],
##             ANGLEUNIT["degree",0.0174532925199433]],
##     USAGE[
##         SCOPE["Geodesy."],
##         AREA["North America - onshore and offshore: Canada - Alberta; British Columbia; Manitoba; New Brunswick; Newfoundland and Labrador; Northwest Territories; Nova Scotia; Nunavut; Ontario; Prince Edward Island; Quebec; Saskatchewan; Yukon. Puerto Rico. United States (USA) - Alabama; Alaska; Arizona; Arkansas; California; Colorado; Connecticut; Delaware; Florida; Georgia; Hawaii; Idaho; Illinois; Indiana; Iowa; Kansas; Kentucky; Louisiana; Maine; Maryland; Massachusetts; Michigan; Minnesota; Mississippi; Missouri; Montana; Nebraska; Nevada; New Hampshire; New Jersey; New Mexico; New York; North Carolina; North Dakota; Ohio; Oklahoma; Oregon; Pennsylvania; Rhode Island; South Carolina; South Dakota; Tennessee; Texas; Utah; Vermont; Virginia; Washington; West Virginia; Wisconsin; Wyoming. US Virgin Islands.  British Virgin Islands."],
##         BBOX[14.92,167.65,86.46,-47.74]],
##     ID["EPSG",4269]]

Spatial intersect

### Extract Susquehanna Basin using filter() and make a new R object of Susquehanna subbasins. The HUC4 is "0205". FINISH this code
sb4 <- wb4 %>%
  filter(HUC4=="0205")

# Run this line of code and look at the interactive map that pops up below, play around with changing the background map and zooming in and out. We will come back to these maps
mapview(sb4)
# If two sf objects have the projection, your can subset an object based on the geospatial boundaries of the other object simply using the logic of dataframe indexing (e.g. newdata <- points[boundaries, ] . Remember the logic here is df[rows, columns]. So sf[1:5, ] means filter to rows 1:5 but keep all columns. Similarly, with a spatial object intersect... points[boundaries, ]... we are saying filter points or rows, to those within the boundaries and keep all rows)

# FINISH this code to select the HUC8 watershed boundaries that are within the Susquehanna Basin (just using the Susquehanna HUC4 basins)
sb8 <-   wb8[sb4,]

# USE mapview to plot the Susquehanna HUC8 basins AND all huc4 basins. Mapview works in layers so whatever object you map first it will be plotted on top of the following layers. 
# Change the color of one of the layers so you can see them both (HINT: using col.regions = "some color". You can directly type common colors, red, green, blue, yellow, grey, etc.). FINSIH this code
  mapview(wb4,col.regions="red") +
    mapview(sb8) 

USGS gages

Question: How many gages are in the Susquehanna Basin?

ANSWER: 335 gages

NHD streams

# First lets plot the river flowlines to see what they look like. 
ggplot() +
  geom_sf(data= nhd)

# Now make the same plot, but we color code the lines bases on the column "QE_MA" which is the mean annual discharge. Hard to see though because so many small streams! Pick a color scale you like using some type of scale_color_() function and use theme_bw(). Links for color palettes in R: https://ggplot2-book.org/scale-colour.html, https://ggplot2.tidyverse.org/reference/scale_gradient.html
# FINISH the code

ggplot() +
geom_sf(data=nhd,aes(color=log10(QE_MA))) +
scale_colour_viridis() +
  theme_bw()

# Now you make the same plot but color code the rivers by stream order ("StreamOrde"). You will first have to convert to StreamOrde to a factor using as.factor() to plot stream order as a categorical rather than continuous variable.
# FINISH this code.

ggplot() +
geom_sf(data=nhd,aes(color=as.factor(StreamOrde))) +
scale_colour_viridis(discrete = T, direction= -1) +
  theme_bw()

Drainage density

QUESTION: Which HUC8 has the lowest and highest drainage density, And what are the drainage densities (units are km/km2)?

ANSWER: The HUC8”02050102” has the highest drainage density and its density is 85217.3984km/km2. The HUC8”02050104”has the lowest drainage density and its density is 183.4993km/km2.

QUESTION: Inspect the maps of drainage densities, the stream network shapes, and underlying topography (perhaps just looking at different layers in mapview), make some obervations or hypotheses about why some watersheds have higher or lower drainage densities.

ANSWER: 1. The surface nature can influence the drainage density, such as the high permeable surface can result in lower densities. 2. The frequency of the rain can result in a higher density.

# Somtimes we want to join attributes from one shapefile to another based on spatial location. This is a spatial join. There are many types, See the ?st_join function. https://r-spatial.github.io/sf/reference/st_join.html
# The "x" object is the object we are joining to, so that is the geometry of any new object that will be created but with added attributes or columns from the "y" object. When using pipes, the first object used is assumed to be "x"

# FINISH this code. Use st_join (with all default arguments, but chose a "join =" argument that makes sense to you for figuring out the total length of streams within each subbasin) to join attributes from Susq. River basin HUC8 subbasins to nhd. That way, every nhd river reach will be labeled with its subbasin so we can easily use group_by to calculate metrics over subbasins
nhd_join <- nhd %>%
  st_join(sb8,join=st_within)

# FINISH this code. Make a plot of nhd rivers color coded by HUC8. this plot will take a while to render

ggplot(nhd_join) +
  geom_sf(aes(color=HUC8))

# Now we are going to calculate drainage density, which is the total length of streams per unit area, typically over some watershed boundary. Below use group_by and summarise to calculate the SUM of river lengths (e.g. using "LENGTHKM" attribute from nhd_join) and carry over the Area of the HUC8 basin that you previously joined. Next, use mutate() to calculate the drainage density from length/area. Lastly, this is important for our next analysis, remove any rows (i.e. subbasins) that have less than 1000 km of total stream length using filter(). These are subbasins that cover coastal waters etc, and we cannot analyze drainage network metrics in areas that cover too much water.
# FINISH THIS CODE

dd <- nhd_join %>%
  st_set_geometry(NULL) %>%  #lets remove the spatial geometry since we do not need this anymore
  arrange(HUC8) %>%
  group_by(HUC8) %>%
  summarise(length =sum(LENGTHKM,na.rm=T),
              area =AreaSqKM[1]) %>%
  mutate(density =length/area ) %>%
  filter(length>1000)

# Now lets join that drainage density (dd) back to the HUC8 polygons (sb8) so we can map/visualize drainage density. Resources for joins: https://dplyr.tidyverse.org/reference/mutate-joins.html, https://statisticsglobe.com/r-dplyr-join-inner-left-right-full-semi-anti, https://tavareshugo.github.io/r-intro-tidyverse-gapminder/08-joins/index.html
# FINISH this code. Decide whether to use inner_join() or left_join(). (Hint: by = "HUC8")
sb8_join <-sb8 %>%
  inner_join(dd,by="HUC8")

# Inspect the output using interactive mapview. using "density" as the variable to map in the "zcol =" argument. FINISH THIS CODE
mapview(sb8_join,zcol="density")  
# Make some static or interactive plots with both the HUC8 basins (same map as above) on top of the stream network to visualize both in order to help you answer the question.