Chapter 9 Notes

Harold Nelson

2023-03-28

Setup

library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.4.1     ✔ purrr   0.3.4
## ✔ tibble  3.2.1     ✔ dplyr   1.1.1
## ✔ tidyr   1.2.1     ✔ stringr 1.4.1
## ✔ readr   2.1.2     ✔ forcats 0.5.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
library(sf)
## Linking to GEOS 3.11.0, GDAL 3.5.3, PROJ 9.1.0; sf_use_s2() is TRUE
library(terra)
## terra 1.7.3
## 
## Attaching package: 'terra'
## 
## The following object is masked from 'package:tidyr':
## 
##     extract
library(tigris)
## To enable caching of data, set `options(tigris_use_cache = TRUE)`
## in your R script or .Rprofile.
## 
## Attaching package: 'tigris'
## 
## The following object is masked from 'package:terra':
## 
##     blocks
library(prism)
## Be sure to set the download folder using `prism_set_dl_dir()`.
library(tmap)

Get County Boundaries

county <- counties(cb = TRUE, 
                   resolution = '20m',
                   progress_bar = FALSE)
## Retrieving data for the year 2020
class(county)
## [1] "sf"         "data.frame"
glimpse(county)
## Rows: 3,221
## Columns: 13
## $ STATEFP    <chr> "05", "08", "26", "28", "38", "39", "21", "02", "48", "40",…
## $ COUNTYFP   <chr> "121", "069", "105", "153", "075", "125", "095", "230", "06…
## $ COUNTYNS   <chr> "00069178", "00198150", "01622995", "00695797", "01034229",…
## $ AFFGEOID   <chr> "0500000US05121", "0500000US08069", "0500000US26105", "0500…
## $ GEOID      <chr> "05121", "08069", "26105", "28153", "38075", "39125", "2109…
## $ NAME       <chr> "Randolph", "Larimer", "Mason", "Wayne", "Renville", "Pauld…
## $ NAMELSAD   <chr> "Randolph County", "Larimer County", "Mason County", "Wayne…
## $ STUSPS     <chr> "AR", "CO", "MI", "MS", "ND", "OH", "KY", "AK", "TX", "OK",…
## $ STATE_NAME <chr> "Arkansas", "Colorado", "Michigan", "Mississippi", "North D…
## $ LSAD       <chr> "06", "06", "06", "06", "06", "06", "06", "12", "06", "06",…
## $ ALAND      <dbl> 1688445990, 6723014102, 1281963206, 2099745602, 2272050275,…
## $ AWATER     <dbl> 10370823, 98984559, 1935616622, 7255476, 40658499, 6245396,…
## $ geometry   <MULTIPOLYGON [°]> MULTIPOLYGON (((-91.40492 3..., MULTIPOLYGON (…

Modify

Make numeric versions of STATEFP and GEOID. Eliminate Alaska, Hawaii and other islands.

county <- county %>%
  mutate(state = as.numeric(STATEFP),
         fips = as.numeric(GEOID)) %>%
  filter(state != 2, state != 15, state < 60) 

Examine CRS

writeLines(st_crs(county)$WktPretty)
## GEOGCS["NAD83",
##     DATUM["North_American_Datum_1983",
##         SPHEROID["GRS 1980",6378137,298.257222101]],
##     PRIMEM["Greenwich",0],
##     UNIT["degree",0.0174532925199433,
##         AUTHORITY["EPSG","9122"]],
##     AXIS["Latitude",NORTH],
##     AXIS["Longitude",EAST],
##     AUTHORITY["EPSG","4269"]]

The same information could be obtained by a simple call to st_crs(). The output is a little harder to read.

st_crs(county)
## Coordinate Reference System:
##   User input: NAD83 
##   wkt:
## GEOGCRS["NAD83",
##     DATUM["North American Datum 1983",
##         ELLIPSOID["GRS 1980",6378137,298.257222101,
##             LENGTHUNIT["metre",1]]],
##     PRIMEM["Greenwich",0,
##         ANGLEUNIT["degree",0.0174532925199433]],
##     CS[ellipsoidal,2],
##         AXIS["latitude",north,
##             ORDER[1],
##             ANGLEUNIT["degree",0.0174532925199433]],
##         AXIS["longitude",east,
##             ORDER[2],
##             ANGLEUNIT["degree",0.0174532925199433]],
##     ID["EPSG",4269]]

The most important thing is the EPSG code.

PRISM

Give prism access to the current working directory.

options(prism.path = ".")

Download Rainfall

First, download the 30-year normals for annual precipitation from prism.

get_prism_normals(type = 'ppt', 
                  resolution = '4km', 
                  annual = T,
                  keepZip = TRUE)
## 
  |                                                                            
  |                                                                      |   0%
## 
## PRISM_ppt_30yr_normal_4kmM4_annual_bil.zip already exists. Skipping downloading.
## 
  |                                                                            
  |======================================================================| 100%

Next, download the precipitation data for all months of 2018.

get_prism_monthlys(type = 'ppt', 
                   years = 2018,  
                   mon=1:12, 
                   keepZip = TRUE)
## 
  |                                                                            
  |                                                                      |   0%
## 
## PRISM_ppt_stable_4kmM3_201801_bil.zip already exists. Skipping downloading.
## 
  |                                                                            
  |======                                                                |   8%
## 
## PRISM_ppt_stable_4kmM3_201802_bil.zip already exists. Skipping downloading.
## 
  |                                                                            
  |============                                                          |  17%
## 
## PRISM_ppt_stable_4kmM3_201803_bil.zip already exists. Skipping downloading.
## 
  |                                                                            
  |==================                                                    |  25%
## 
## PRISM_ppt_stable_4kmM3_201804_bil.zip already exists. Skipping downloading.
## 
  |                                                                            
  |=======================                                               |  33%
## 
## PRISM_ppt_stable_4kmM3_201805_bil.zip already exists. Skipping downloading.
## 
  |                                                                            
  |=============================                                         |  42%
## 
## PRISM_ppt_stable_4kmM3_201806_bil.zip already exists. Skipping downloading.
## 
  |                                                                            
  |===================================                                   |  50%
## 
## PRISM_ppt_stable_4kmM3_201807_bil.zip already exists. Skipping downloading.
## 
  |                                                                            
  |=========================================                             |  58%
## 
## PRISM_ppt_stable_4kmM3_201808_bil.zip already exists. Skipping downloading.
## 
  |                                                                            
  |===============================================                       |  67%
## 
## PRISM_ppt_stable_4kmM3_201809_bil.zip already exists. Skipping downloading.
## 
  |                                                                            
  |====================================================                  |  75%
## 
## PRISM_ppt_stable_4kmM3_201810_bil.zip already exists. Skipping downloading.
## 
  |                                                                            
  |==========================================================            |  83%
## 
## PRISM_ppt_stable_4kmM3_201811_bil.zip already exists. Skipping downloading.
## 
  |                                                                            
  |================================================================      |  92%
## 
## PRISM_ppt_stable_4kmM3_201812_bil.zip already exists. Skipping downloading.
## 
  |                                                                            
  |======================================================================| 100%

Note that resolution for the monthly data is not specified. Only 4km is available.

Organize File Access

Get a list of all the downloaded files and store it in prism_files.

prism_files <- prism_archive_ls()
prism_files
##  [1] "PRISM_ppt_30yr_normal_4kmM4_annual_bil"
##  [2] "PRISM_ppt_stable_4kmM3_201801_bil"     
##  [3] "PRISM_ppt_stable_4kmM3_201802_bil"     
##  [4] "PRISM_ppt_stable_4kmM3_201803_bil"     
##  [5] "PRISM_ppt_stable_4kmM3_201804_bil"     
##  [6] "PRISM_ppt_stable_4kmM3_201805_bil"     
##  [7] "PRISM_ppt_stable_4kmM3_201806_bil"     
##  [8] "PRISM_ppt_stable_4kmM3_201807_bil"     
##  [9] "PRISM_ppt_stable_4kmM3_201808_bil"     
## [10] "PRISM_ppt_stable_4kmM3_201809_bil"     
## [11] "PRISM_ppt_stable_4kmM3_201810_bil"     
## [12] "PRISM_ppt_stable_4kmM3_201811_bil"     
## [13] "PRISM_ppt_stable_4kmM3_201812_bil"
str(prism_files)
##  chr [1:13] "PRISM_ppt_30yr_normal_4kmM4_annual_bil" ...

Define the paths to the prism data.

prism_paths <- file.path(".", 
                         prism_files, 
                         paste0(prism_files, ".bil"))

What is prism_paths?

str(prism_paths)
##  chr [1:13] "./PRISM_ppt_30yr_normal_4kmM4_annual_bil/PRISM_ppt_30yr_normal_4kmM4_annual_bil.bil" ...

It’s just a vector of character strings. Look at the first item.

prism_paths[1]
## [1] "./PRISM_ppt_30yr_normal_4kmM4_annual_bil/PRISM_ppt_30yr_normal_4kmM4_annual_bil.bil"

Convert to Raster

Convert the prism data to rasters using the rast() function from terrs.

prism_p30 <- rast(prism_paths[1])
prism_prec_2018 <- rast(prism_paths[2:13])

What are these things?

prism_p30
## class       : SpatRaster 
## dimensions  : 621, 1405, 1  (nrow, ncol, nlyr)
## resolution  : 0.04166667, 0.04166667  (x, y)
## extent      : -125.0208, -66.47917, 24.0625, 49.9375  (xmin, xmax, ymin, ymax)
## coord. ref. : lon/lat NAD83 
## source      : PRISM_ppt_30yr_normal_4kmM4_annual_bil.bil 
## name        : PRISM_ppt_30yr_normal_4kmM4_annual_bil 
## min value   :                                46.2574 
## max value   :                              5611.1611
str(prism_p30)
## S4 class 'SpatRaster' [package "terra"]
crs(prism_p30)
## [1] "GEOGCRS[\"NAD83\",\n    DATUM[\"North American Datum 1983\",\n        ELLIPSOID[\"GRS 1980\",6378137,298.257222101,\n            LENGTHUNIT[\"metre\",1]],\n        ID[\"EPSG\",6269]],\n    PRIMEM[\"Greenwich\",0,\n        ANGLEUNIT[\"Degree\",0.0174532925199433]],\n    CS[ellipsoidal,2],\n        AXIS[\"longitude\",east,\n            ORDER[1],\n            ANGLEUNIT[\"Degree\",0.0174532925199433]],\n        AXIS[\"latitude\",north,\n            ORDER[2],\n            ANGLEUNIT[\"Degree\",0.0174532925199433]]]"
prism_prec_2018
## class       : SpatRaster 
## dimensions  : 621, 1405, 12  (nrow, ncol, nlyr)
## resolution  : 0.04166667, 0.04166667  (x, y)
## extent      : -125.0208, -66.47917, 24.0625, 49.9375  (xmin, xmax, ymin, ymax)
## coord. ref. : lon/lat NAD83 
## sources     : PRISM_ppt_stable_4kmM3_201801_bil.bil  
##               PRISM_ppt_stable_4kmM3_201802_bil.bil  
##               PRISM_ppt_stable_4kmM3_201803_bil.bil  
##               ... and 9 more source(s)
## names       : PRISM~1_bil, PRISM~2_bil, PRISM~3_bil, PRISM~4_bil, PRISM~5_bil, PRISM~6_bil, ... 
## min values  :       0.000,       0.000,       0.000,       0.000,       0.000,       0.000, ... 
## max values  :    1696.978,     799.842,    1058.936,     941.047,     694.387,     535.787, ...
str(prism_prec_2018)
## S4 class 'SpatRaster' [package "terra"]

Section 9.2: Zonal Statistics

Rasterize the Vector Data

cnty_ras <- rasterize(vect(county), 
                      prism_p30, 
                      field = "fips")

Create the Zonal Mean

Also examine the results.

cnty_p30 <- zonal(prism_p30, 
                  cnty_ras, 
                  fun = "mean", 
                  na.rm = TRUE)
summary(cnty_p30)
##       fips       PRISM_ppt_30yr_normal_4kmM4_annual_bil
##  Min.   : 1001   Min.   :  79.69                       
##  1st Qu.:19041   1st Qu.: 761.91                       
##  Median :29205   Median :1091.00                       
##  Mean   :30625   Mean   :1022.57                       
##  3rd Qu.:45087   3rd Qu.:1274.46                       
##  Max.   :56045   Max.   :3044.25
cnty_p30 <- rename(cnty_p30, 
                   precip = 2)

summary(cnty_p30)
##       fips           precip       
##  Min.   : 1001   Min.   :  79.69  
##  1st Qu.:19041   1st Qu.: 761.91  
##  Median :29205   Median :1091.00  
##  Mean   :30625   Mean   :1022.57  
##  3rd Qu.:45087   3rd Qu.:1274.46  
##  Max.   :56045   Max.   :3044.25
str(cnty_p30)
## 'data.frame':    3101 obs. of  2 variables:
##  $ fips  : num  1001 1003 1005 1007 1009 ...
##  $ precip: num  1415 1696 1375 1431 1481 ...

Section 9.3

We will look closely at the visualization, but not concern ourselves with the oddities of counties in Virgina.

The first graphs visualize US counties from the perspective of precipitation. Let’s repeat that. I’ll only do the second graph with the preferred palette.

cnty_join1 <- left_join(county, 
                        cnty_p30, 
                        by = "fips")

ggplot(data = cnty_join1) +
  geom_sf(aes(fill = precip), size = 0.1) +
  scale_fill_distiller(name = "Precip (mm)", 
                       palette = "YlGnBu", 
                       direction = 1,
                       trans = "log", 
                       breaks = c(100, 300, 1000, 3000)) +
  theme_bw() +
  theme(legend.position = "bottom")

Compare with a Raster

Looking at this, I have to ask if a simple overlay of a raster and the county borders map might work just as well.

Let’s try that.

tm_shape(prism_p30) +
  tm_raster(style = "cont", palette = "YlGnBu") +
tm_shape(county) +
  tm_borders() +
tm_layout(legend.show = F)

This is not a good visualization. The problem is that plotting individual pixels leads to extreme values. Accommodating the extreme values leaves only a small range of colors to handle most of the variation. We can ameliorate the situation by plotting logs instead of raw values.

prism_p30_log = log10(prism_p30)
tm_shape(prism_p30_log) +
  tm_raster(style = "cont", palette = "YlGnBu") +
tm_shape(county) +
  tm_borders() +
tm_layout(legend.show = F)

This is very comparable to what we got with the zonal values.

Why Zonal Statistics?

Possibly visualization is not the greatest strenght of zonal statistics. Focus on the fact that we have a precipitation value associated with every county in the US.

We could easily list the wettest and driest counties.

Driest first.

driest = cnty_join1 %>% 
  arrange(precip) %>% 
  select(NAME, STATE_NAME,precip) %>% 
  st_drop_geometry() 

head(driest,10)
##              NAME STATE_NAME    precip
## 1        Imperial California  79.69049
## 2            Yuma    Arizona 114.80269
## 3          La Paz    Arizona 150.13433
## 4       Esmeralda     Nevada 164.12375
## 5  San Bernardino California 169.27685
## 6            Inyo California 172.46073
## 7       Churchill     Nevada 174.94690
## 8           Clark     Nevada 183.11072
## 9         Mineral     Nevada 188.37015
## 10      Riverside California 197.61710

Now the wettest

wettest = cnty_join1 %>% 
  arrange(-precip) %>% 
  select(NAME, STATE_NAME,precip) %>% 
  st_drop_geometry() 

  
head(wettest,10)
##            NAME STATE_NAME   precip
## 1     Tillamook     Oregon 3044.255
## 2         Curry     Oregon 2813.129
## 3     Jefferson Washington 2761.666
## 4       Clatsop     Oregon 2719.755
## 5  Grays Harbor Washington 2665.338
## 6     Del Norte California 2559.314
## 7         Mason Washington 2471.506
## 8      Skamania Washington 2456.403
## 9       Pacific Washington 2386.468
## 10    Wahkiakum Washington 2384.799

Note that six of the top 10 wettest counties are in western Washingon. All are on the west coast.

Washington State

I want to look at the counties of Washington State.

wettest$rank = 1:nrow(wettest)

wa = wettest %>% 
  filter(STATE_NAME == "Washington") %>% 
  select(-STATE_NAME)

wa
##            NAME    precip rank
## 1     Jefferson 2761.6665    3
## 2  Grays Harbor 2665.3378    5
## 3         Mason 2471.5061    7
## 4      Skamania 2456.4032    8
## 5       Pacific 2386.4677    9
## 6     Wahkiakum 2384.7989   10
## 7       Clallam 2227.7087   12
## 8     Snohomish 2207.5937   13
## 9        Skagit 2171.2780   14
## 10      Whatcom 2082.9538   15
## 11      Cowlitz 2059.3605   16
## 12         King 2006.3862   17
## 13        Clark 1830.7389   22
## 14        Lewis 1760.7343   28
## 15       Pierce 1599.1150   89
## 16     Thurston 1366.1374  459
## 17       Kitsap 1175.0280 1282
## 18       Chelan 1113.2213 1491
## 19     Kittitas  914.4667 2012
## 20 Pend Oreille  912.6468 2017
## 21     Columbia  730.3880 2366
## 22     San Juan  709.7141 2395
## 23       Island  689.6902 2431
## 24       Yakima  641.8042 2513
## 25      Stevens  610.2847 2560
## 26     Okanogan  598.6109 2579
## 27    Klickitat  563.5194 2640
## 28     Garfield  550.3559 2662
## 29      Spokane  539.3699 2681
## 30        Ferry  524.0472 2710
## 31      Whitman  467.5955 2813
## 32       Asotin  448.3594 2852
## 33  Walla Walla  408.4534 2916
## 34      Lincoln  334.8210 3016
## 35      Douglas  293.0298 3054
## 36        Adams  277.5832 3060
## 37     Franklin  239.1973 3082
## 38       Benton  220.1659 3087
## 39        Grant  216.7848 3088

A Cleveland Style Plot

wa %>% 
  ggplot(aes(x = precip,y = reorder(NAME,precip),fill = precip)) +
  geom_col() +
  ggtitle("Precipitation in Washington Counties")

A Map

A Map is interesting. I’ll just use the 4k raster and tmap. Note that I’m doing a log transformation.

wa_sf = county %>% 
  filter(state == 53) %>% 
  st_as_sf()

wa_sf = st_transform(wa_sf, crs(prism_p30))
wa_rast = crop(prism_p30, wa_sf)
wa_rast = mask(wa_rast, wa_sf)


tm_shape(log(wa_rast)) +
  tm_raster(style = "cont",palette = "YlGn") +
tm_shape(wa_sf) +
  tm_borders(lwd = 2) +
tm_layout(legend.show = F)

With this palette, darker is wetter.

Section 9.4

The purpose of this section is to acquaint you with getting values from individual points in a raster.

I want to use a single location, the Olympia Airport, for which I have weather data.

First, get the location of the point in a sf object/dataframe.

lat = 46.9753197
lon = -122.8993301
name = "OlyAir"
oly_air = data.frame(name,lon,lat)
oly_air = st_as_sf(oly_air,
                   coords = c("lon", "lat"))
str(oly_air)
## Classes 'sf' and 'data.frame':   1 obs. of  2 variables:
##  $ name    : chr "OlyAir"
##  $ geometry:sfc_POINT of length 1; first list element:  'XY' num  -123 47
##  - attr(*, "sf_column")= chr "geometry"
##  - attr(*, "agr")= Factor w/ 3 levels "constant","aggregate",..: NA
##   ..- attr(*, "names")= chr "name"

Actual Precipitation Values

Load the file OAW23 and get the monthly precipitation values for 2018.

load("OAW23.Rdata")

monthlies = OAW23 %>% 
  filter(yr == 2018) %>% 
  group_by(mo) %>% 
  summarize(mo_precip = sum(PRCP))

monthlies
## # A tibble: 12 × 2
##    mo    mo_precip
##    <fct>     <dbl>
##  1 1          9.87
##  2 2          3.63
##  3 3          2.59
##  4 4          7.24
##  5 5          0.24
##  6 6          0.88
##  7 7          0.07
##  8 8          0.17
##  9 9          2.06
## 10 10         4.34
## 11 11         5.89
## 12 12         8.34

Values from Prism

Now get the prism estimate for our point.

prism_samp <- extract(prism_prec_2018, 
                      vect(oly_air), 
                      factors = T, 
                      df = T)

prism_samp
##   ID PRISM_ppt_stable_4kmM3_201801_bil PRISM_ppt_stable_4kmM3_201802_bil
## 1  1                           269.074                           110.794
##   PRISM_ppt_stable_4kmM3_201803_bil PRISM_ppt_stable_4kmM3_201804_bil
## 1                            79.484                           183.061
##   PRISM_ppt_stable_4kmM3_201805_bil PRISM_ppt_stable_4kmM3_201806_bil
## 1                             3.373                            28.147
##   PRISM_ppt_stable_4kmM3_201807_bil PRISM_ppt_stable_4kmM3_201808_bil
## 1                             1.852                             6.243
##   PRISM_ppt_stable_4kmM3_201809_bil PRISM_ppt_stable_4kmM3_201810_bil
## 1                            49.675                            121.44
##   PRISM_ppt_stable_4kmM3_201811_bil PRISM_ppt_stable_4kmM3_201812_bil
## 1                            165.85                            265.77

We need to pivot to long form and convert the data from metric to inches.

prism_long = pivot_longer(prism_samp,
                          cols = 2:13,
                          names_to = "Month",
                          values_to = "prism_precip")


prism_long = prism_long %>% 
  select(prism_precip) %>% 
  mutate(prism_precip = 0.0393701 * prism_precip) 

prism_long
## # A tibble: 12 × 1
##    prism_precip
##           <dbl>
##  1      10.6   
##  2       4.36  
##  3       3.13  
##  4       7.21  
##  5       0.133 
##  6       1.11  
##  7       0.0729
##  8       0.246 
##  9       1.96  
## 10       4.78  
## 11       6.53  
## 12      10.5

Combine and Compare

We can use cbind() to put these two monthly series into a dataframe for comparison.

comparison = cbind(monthlies,prism_long) %>% 
  mutate(diff = mo_precip - prism_precip,
         ratio = mo_precip/prism_precip)

comparison %>% 
  ggplot(aes(x = mo_precip, y = prism_precip)) +
  geom_point() +
  geom_smooth(method = "lm") +
  ggtitle("Scatterplot of Prism agains Actual")
## `geom_smooth()` using formula = 'y ~ x'

Time Series Plots

comparison %>% 
  ggplot(aes(x = mo)) +
  geom_point(aes(y = mo_precip),color = "black",size = 3) +
  geom_point(aes(y =prism_precip)   ,color = "red", size = 3 ) +
  ggtitle("Time Series Plots of Prism and Actual Values")

Time Series Plot of Ratio

comparison %>% 
  ggplot(aes(x = mo)) +
  geom_point(aes(y = ratio),color = "black",size = 3) +
  ggtitle("Time Series Plot of Ratio")

Time Series Plot of Difference

comparison %>% 
  ggplot(aes(x = mo)) +
  geom_point(aes(y = diff),color = "black",size = 3) +
  ggtitle("Time Series Plot of Difference")