# Provides an efficient, fast, and well-documented workflow for general data modeling, wrangling, and visualization tasks.
library(tidyverse)

## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --

## v ggplot2 3.3.5     v purrr   0.3.4
## v tibble  3.1.4     v dplyr   1.0.7
## v tidyr   1.1.3     v stringr 1.4.0
## v readr   2.0.2     v forcats 0.5.1

## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()

# Designed to bring spatial analysis in R in line with these other systems.
library(sf)

## Linking to GEOS 3.9.1, GDAL 3.2.1, PROJ 7.2.1

# Implements basic and high-level functions for raster data and for vector data operations such as intersections.
library(raster)

## Loading required package: sp

## 
## Attaching package: 'raster'

## The following object is masked from 'package:dplyr':
## 
##     select

## The following object is masked from 'package:tidyr':
## 
##     extract

# Allows special formatting of column names such as superscripts.
library(prism)

## Warning: package 'prism' was built under R version 4.1.2

## Be sure to set the download folder using `prism_set_dl_dir()`.

# Be sure to set the download folder using prism_set_dl_dir()!!!!
prism_set_dl_dir("~/prismtmp")

9.1 ZONAL STATISTICS

# The STATEFP and GEOID fields are initially read in as factors.
county <- st_read("cb_2018_us_county_20m.shp")

## Reading layer `cb_2018_us_county_20m' from data source 
##   `C:\Users\Da Dell\Documents\My SugarSync\School\GIS\Final Project\gdswr_chapter9_data\cb_2018_us_county_20m.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 3220 features and 9 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -179.1743 ymin: 17.91377 xmax: 179.7739 ymax: 71.35256
## Geodetic CRS:  NAD83

# The STATEFP and GEOID fields are converted to numbers.
county <- county %>%
  mutate(state = as.numeric(as.character(STATEFP)), 
         fips = as.numeric(as.character(GEOID))) %>%
# The dataset is filtered down to the 48 conterminous United States.
  filter(state != 2, state != 15, state < 60) 
# Checks the coordinate reference system and note that the data are geographic (longitude and latitude) with a NAD83 datum.
st_crs(county)

## Coordinate Reference System:
##   User input: NAD83 
##   wkt:
## GEOGCRS["NAD83",
##     DATUM["North American Datum 1983",
##         ELLIPSOID["GRS 1980",6378137,298.257222101,
##             LENGTHUNIT["metre",1]]],
##     PRIMEM["Greenwich",0,
##         ANGLEUNIT["degree",0.0174532925199433]],
##     CS[ellipsoidal,2],
##         AXIS["latitude",north,
##             ORDER[1],
##             ANGLEUNIT["degree",0.0174532925199433]],
##         AXIS["longitude",east,
##             ORDER[2],
##             ANGLEUNIT["degree",0.0174532925199433]],
##     ID["EPSG",4269]]

# Plots out the data creating a basic & unfilled map of the US counties
ggplot(data = county) +
  geom_sf(fill = NA)

The precipitation data to summarize comes from the PRISM dataset (http://www.prism.oregonstate.edu).
Is a widely used interpolated climate dataset for the United States that combines data from meteorological stations with other ancillary datasets such as elevation.

# Is a prism package that automates data downloading and importing as a raster object.
options(prism.path = ".") 
# Downloads the 30-year climatology of annual precipitation for the conterminous US.
get_prism_normals(type = 'ppt', resolution = '4km', annual = T, keepZip = TRUE)

## 
  |                                                                            
  |                                                                      |   0%

## 
##  already exists. Skipping downloading.

## 
  |                                                                            
  |======================================================================| 100%

# Allows the user to view the list of downloaded packages.
prism_archive_ls()

##  [1] "PRISM_ppt_30yr_normal_4kmM2_annual_bil"
##  [2] "PRISM_ppt_stable_4kmM3_201801_bil"     
##  [3] "PRISM_ppt_stable_4kmM3_201802_bil"     
##  [4] "PRISM_ppt_stable_4kmM3_201803_bil"     
##  [5] "PRISM_ppt_stable_4kmM3_201804_bil"     
##  [6] "PRISM_ppt_stable_4kmM3_201805_bil"     
##  [7] "PRISM_ppt_stable_4kmM3_201806_bil"     
##  [8] "PRISM_ppt_stable_4kmM3_201807_bil"     
##  [9] "PRISM_ppt_stable_4kmM3_201808_bil"     
## [10] "PRISM_ppt_stable_4kmM3_201809_bil"     
## [11] "PRISM_ppt_stable_4kmM3_201810_bil"     
## [12] "PRISM_ppt_stable_4kmM3_201811_bil"     
## [13] "PRISM_ppt_stable_4kmM3_201812_bil"

# Used to incorporate one or more of these dataset into a raster object. 
prism_p30 <- pd_stack(prism_archive_ls()[1])

# Converts the vector polygon dataset of counties to a raster dataset, in which each raster cell is coded with the 5-digit FIPS code of the corresponding county so that we can calculate zonal statistics.
cnty_ras <- rasterize(county, prism_p30, field = "fips") 
# Runs zonal statistics by specifying:
# (1) the raster layer to summarize (prism_p30)
# (2) the raster layer with the zones to use for summarizing (cnty_ras)
# (3) the function to use for summarizing ("mean").
cnty_p30 <- zonal(prism_p30, cnty_ras, fun = "mean")

# The output matrix is converted into a data frame.
cnty_p30 <- data.frame(cnty_p30) 
# Returns the data. However, there is a problem with the output. There are fewer counties in the summary data table than there are counties in the polygon file.
summary(cnty_p30)

##       zone           value        
##  Min.   : 1001   Min.   :  83.68  
##  1st Qu.:19041   1st Qu.: 751.83  
##  Median :29205   Median :1054.33  
##  Mean   :30625   Mean   : 991.52  
##  3rd Qu.:45087   3rd Qu.:1221.14  
##  Max.   :56045   Max.   :2960.78

# Returns the dimension of the data frame.
dim(cnty_p30)

## [1] 3101    2

# Returns the dimension of the data frame.
dim(county)

## [1] 3108   12

# Indicates which elements of data frame county$fip are not existent in data frame cnty_p30$zone
setdiff(county$fips, cnty_p30$zone)

## [1] 51580 51685 51610 51570 51683 51600 51678

9.2 ZONE SIZE AND RASTER SPATIAL RESOLUTION

# Joins the zonal summaries to the county polygons.
cnty_join1 <- left_join(county, cnty_p30, by = c("fips" = "zone")) 

# Generates a national map, which there are no obvious counties with missing data with a legend for the precipitation.
ggplot(data = cnty_join1) +
  geom_sf(aes(fill = value)) +
  scale_fill_continuous(name = "Precip (mm)")

Note that all the missing counties have a state FIPS code of 51, which is Virginia

# Zooms to Virginia by selecting the counties in the state so we can query a non-spatial data frame.
va_join1 <- filter(cnty_join1, STATEFP == "51") 

# Generates a map of Virginia with a legend for the precipitation.
ggplot(data = va_join1) +
  geom_sf(aes(fill = value)) +
  scale_fill_continuous(name = "Precip (mm)")

Virginia is unique among states in that it has a number of small, independent cities that are governed separately from the surrounding county (akin to Washington D.C.) and are therefore assigned their own county FIPS codes.
We will create another map to find where missing data is located.

# Generates a map of a portion of Northern  Virginia with a legend for the precipitation.
ggplot(data = va_join1) +
  geom_sf(aes(fill = value)) +
  coord_sf(xlim = c(-77.6, -77.0), ylim = c(38.6, 39.1)) +
  scale_fill_continuous(name = "Precip (mm)")

Now we can see several small cities with missing precipitation data.
When we rasterized the counties to a 4 km grid to match the precipitation data, we sampled the center point of each 4 km grid cell and assigned it the FIPS code of the county that it fell within.
However, if a county is very small or narrow, it is possible that no grid center point will fall within its boundaries.
In this case, the county ends up excluded from the zonal summary.
The most straightforward way to deal with this is to use a finer sampling grid to generate the zonal statistics.

# disaggregate() reduces the cell size of the PRISM data by a factor of 4, changing the cell size to approximately 1 km.
# Bilinear resampling is used to conduct linear interpolation between the center points of the 4 km grid cells to estimate values at the 1 km resolution.
prism_p30_1km <- disaggregate(prism_p30, fact = 4, method = "bilinear")  
# Uses the new 1 km grid to rasterize the county dataset.
cnty_ras_1km <- rasterize(county, prism_p30_1km, field = "fips") 

# Runs zonal statistics by specifying: 
# (1) Raster layer to summarize (prism_p30_1km)
# (2) Raster layer with the zones to use for summarizing (cnty_ras_1km)
# (3) Function to use for summarizing ("mean")
cnty_p30_1km <- zonal(prism_p30_1km, cnty_ras_1km, fun = "mean")  

# cnty_p30_1km is converted into a data frame.
cnty_p30_1km <- data.frame(cnty_p30_1km) 
# Returns the data.
summary(cnty_p30_1km)

##       zone            mean       
##  Min.   : 1001   Min.   :  83.2  
##  1st Qu.:19045   1st Qu.: 751.2  
##  Median :29212   Median :1054.8  
##  Mean   :30672   Mean   : 991.5  
##  3rd Qu.:46008   3rd Qu.:1221.4  
##  Max.   :56045   Max.   :2943.2

# Returns the dimension of the data frame.
dim(cnty_p30_1km)

## [1] 3108    2

# Returns the dimension of the data frame.
dim(county)

## [1] 3108   12

# Indicates which elements of data frame county$fip are not existent in data frame cnty_p30_1km$zone.
setdiff(county$fips, cnty_p30_1km$zone)

## numeric(0)

Now the zonal summary table contains the same number of records as the county file.
By checking the zoomed-in map of Virginia, we can see that all of the counties, including the tiny independent cities, now have precipitation values.

# Joins the zonal summaries to the county polygons.
cnty_join2 <- left_join(county, cnty_p30_1km, 
                        by = c("fips" = "zone")) 

# Zooms to Virginia by selecting the counties in the state so we can query a non-spatial data frame.
va_join2 <- filter(cnty_join2, STATEFP == "51") 

# Generates a map of a portion of Northern  Virginia with a legend for the precipitation.
ggplot(data = va_join2) +
  geom_sf(aes(fill = mean)) +
  scale_fill_continuous(name = "Precip (mm)") +
  coord_sf(xlim = c(-77.6, -77.0), ylim = c(38.6, 39.1))

9.3 EXTRACTING VALUES WITH POINT DATA

Let’s say that we would like to check the accuracy of the PRISM climate grids by comparing them with weather station data.
In this example, we will look at monthly values for 2018.

# Downloads the monthly climatology of 2018 precipitation for the conterminous US.
get_prism_monthlys(type = 'ppt', years = 2018,  mon=1:12, keepZip = TRUE)

## 
  |                                                                            
  |                                                                      |   0%

## 
## PRISM_ppt_stable_4kmM3_201801_bil.zip already exists. Skipping downloading.

## 
  |                                                                            
  |======                                                                |   8%

## 
## PRISM_ppt_stable_4kmM3_201802_bil.zip already exists. Skipping downloading.

## 
  |                                                                            
  |============                                                          |  17%

## 
## PRISM_ppt_stable_4kmM3_201803_bil.zip already exists. Skipping downloading.

## 
  |                                                                            
  |==================                                                    |  25%

## 
## PRISM_ppt_stable_4kmM3_201804_bil.zip already exists. Skipping downloading.

## 
  |                                                                            
  |=======================                                               |  33%

## 
## PRISM_ppt_stable_4kmM3_201805_bil.zip already exists. Skipping downloading.

## 
  |                                                                            
  |=============================                                         |  42%

## 
## PRISM_ppt_stable_4kmM3_201806_bil.zip already exists. Skipping downloading.

## 
  |                                                                            
  |===================================                                   |  50%

## 
## PRISM_ppt_stable_4kmM3_201807_bil.zip already exists. Skipping downloading.

## 
  |                                                                            
  |=========================================                             |  58%

## 
## PRISM_ppt_stable_4kmM3_201808_bil.zip already exists. Skipping downloading.

## 
  |                                                                            
  |===============================================                       |  67%

## 
## PRISM_ppt_stable_4kmM3_201809_bil.zip already exists. Skipping downloading.

## 
  |                                                                            
  |====================================================                  |  75%

## 
## PRISM_ppt_stable_4kmM3_201810_bil.zip already exists. Skipping downloading.

## 
  |                                                                            
  |==========================================================            |  83%

## 
## PRISM_ppt_stable_4kmM3_201811_bil.zip already exists. Skipping downloading.

## 
  |                                                                            
  |================================================================      |  92%

## 
## PRISM_ppt_stable_4kmM3_201812_bil.zip already exists. Skipping downloading.

## 
  |                                                                            
  |======================================================================| 100%

# Allows the user to view the list of downloaded packages.
datalist <- prism_archive_ls() 
# Used to incorporate one or more of these dataset into a raster object.
prism_prec_2018 <- pd_stack(datalist[2:13])

# Reads .csv file.
mesosm <- read_csv("mesodata_small.csv")

## Rows: 240 Columns: 9

## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr  (1): STID
## dbl  (7): MONTH, YEAR, TMAX, TMIN, HMAX, HMIN, RAIN
## date (1): DATE

## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.

# Reads .csv file.
geo_coords <- read_csv("geoinfo.csv")

## Rows: 143 Columns: 12

## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (7): stid, name, city, cdir, cnty, cdiv, clas
## dbl (5): stnm, rang, lat, lon, elev

## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.

# Converts to an sf 
geo_coords <- st_as_sf(geo_coords, coords = c("lon", "lat")) 
# Returns a vector, data frame or array but with duplicate elements/rows removed.
mystations <- unique(mesosm$STID) 
# Filters data to only contains the locations of the four weather stations included in the mesosm dataset.
station_pts <- geo_coords %>%
  filter(stid %in% mystations)

# Obtains the raster data associated with each of the four points.
prism_samp <- extract(prism_prec_2018, station_pts, factors = T, df = T)

# Gets and sets the names of prism_samp.
names(prism_samp) <- c("ID", month.abb) 
# Combines the columns of prism_samp and station_pts into the prism_samp data frame.
prism_samp <- bind_cols(prism_samp, station_pts) 

# The data is pivoted into a long format.
prism_long <- prism_samp %>%
  pivot_longer(Jan:Dec, names_to = "mnth_name", values_to = "PPrec") %>%
  mutate(month = match(mnth_name, month.abb), PPrec_in = PPrec * 0.0393701) 

# The data is joined to the mesosm data frame (which contains the station measurements) by month and station ID. 
# The critical variables are the monthly rainfall measured at the mesonet stations (RAIN), and the monthly rainfall estimates from the PRISM dataset converted to inches to match the Mesonet data (“Pprec_in”)
compare_dat <- mesosm %>%
  filter(YEAR == 2018) %>%
  inner_join(prism_long, by = c("STID" = "stid", "MONTH" = "month")) %>%
  dplyr::select(STID, MONTH, RAIN, PPrec_in)

# Generates a plot w/ X-axis = Mesonet rainfall (in), Y-axis = PRISM rainfall (in), and a a legend for the Station IDs. 
ggplot(data = compare_dat) +
  geom_point(aes(x = RAIN, y = PPrec_in, color = STID)) +
  scale_color_discrete(name = "Station ID") +
  xlab("Mesonet rainfall (in)") +
  ylab("PRISM rainfall (in)")

The tight relationship looks pretty impressive.
However, keep in mind that the Oklahoma mesonet data is one of many sources of station data that are used to developed the interpolated PRISM dataset.
So the two sources of data are not entirely independent.
The following code generates some more summaries of the relationships between these precipitation estimates.

# Carries out a linear regression. The linear regression has a high R-squared and a slope that is slightly less than one.
rain_lm <- lm(PPrec_in ~ RAIN, data = compare_dat) 
# Returns the data.
summary(rain_lm)

## 
## Call:
## lm(formula = PPrec_in ~ RAIN, data = compare_dat)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.58056 -0.20460 -0.06542  0.20472  1.40651 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.25288    0.10994    2.30    0.026 *  
## RAIN         0.92353    0.02497   36.99   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4625 on 46 degrees of freedom
## Multiple R-squared:  0.9675, Adjusted R-squared:  0.9668 
## F-statistic:  1368 on 1 and 46 DF,  p-value: < 2.2e-16

# Summarizes the statistics of the root mean squared error (RMSE), mean absolute error (MAE), and the mean error (ME).
rain_sum <- compare_dat %>%
  summarize(RMSE = sqrt(mean((PPrec_in - RAIN)^2)),
            MAE = mean(abs(PPrec_in - RAIN)),
            ME = mean(PPrec_in - RAIN)) 
# Calls tibble rain_sum
rain_sum

## # A tibble: 1 x 3
##    RMSE   MAE      ME
##   <dbl> <dbl>   <dbl>
## 1 0.497 0.270 -0.0147

Chapter 9 Combining Raster and Vector Data

Chapter 9 Combining Raster and Vector Data 1 - Gridded Meteorological Data

First, Some Definitions

The Libraries Used

9.1 ZONAL STATISTICS

9.2 ZONE SIZE AND RASTER SPATIAL RESOLUTION

9.3 EXTRACTING VALUES WITH POINT DATA