chap9_slidy

Rabin Gora

12/10/2021

Combining Raster and Vector Data.

Brief Introduction of terms used in this chapter:

[Note; There is a prism package that automates data downloading and importing as a raster object. Here will will download the 30-year climatology of annual precipitation for the conterminous US.]

-Mesodata: Data that is gathered from a network of collectively owned and operated automated weather stations that are installed close enough to each other—and report data frequently enough—to observe, measure, and track mesoscale meteorological phenomena.

#Libraries used in this chapter
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.5     ✓ purrr   0.3.4
## ✓ tibble  3.1.5     ✓ dplyr   1.0.7
## ✓ tidyr   1.1.4     ✓ stringr 1.4.0
## ✓ readr   2.0.2     ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(sf)
## Linking to GEOS 3.8.1, GDAL 3.2.1, PROJ 7.2.1
library(raster)
## Loading required package: sp
## 
## Attaching package: 'raster'
## The following object is masked from 'package:dplyr':
## 
##     select
library(prism)
## Be sure to set the download folder using `prism_set_dl_dir()`.
prism_set_dl_dir("~/prismtmp")

Zonal Statics

county <- st_read("gdswr_chapter9_data/cb_2018_us_county_20m.shp")
## Reading layer `cb_2018_us_county_20m' from data source 
##   `/Users/rabingora001/Desktop/St_Martin/CSC545-GIS_and_spatial_analysis/FinalPresentaion/chap9_wimberly/gdswr_chapter9_data/cb_2018_us_county_20m.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 3220 features and 9 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -179.1743 ymin: 17.91377 xmax: 179.7739 ymax: 71.35256
## Geodetic CRS:  NAD83
county <- county %>%
  mutate(state = as.numeric(as.character(STATEFP)),
         fips = as.numeric(as.character(GEOID))) %>%
  #exclude Alaska(FIPS=2), Hawaii(FIPS=15), and other non-mainland US isalnds.
  filter(state != 2, state != 15, state < 60)  

#check the crs of the shapefile.
st_crs(county)
## Coordinate Reference System:
##   User input: NAD83 
##   wkt:
## GEOGCRS["NAD83",
##     DATUM["North American Datum 1983",
##         ELLIPSOID["GRS 1980",6378137,298.257222101,
##             LENGTHUNIT["metre",1]]],
##     PRIMEM["Greenwich",0,
##         ANGLEUNIT["degree",0.0174532925199433]],
##     CS[ellipsoidal,2],
##         AXIS["latitude",north,
##             ORDER[1],
##             ANGLEUNIT["degree",0.0174532925199433]],
##         AXIS["longitude",east,
##             ORDER[2],
##             ANGLEUNIT["degree",0.0174532925199433]],
##     ID["EPSG",4269]]
#plot the county data
ggplot(data = county) +
  geom_sf(fill = NA)

#define the path for the prism data. Here it is in the same project folder. 
options(prism.path = ".")

# Here will will download the 30-year climatology of annual precipitation for the conterminous US
get_prism_normals(type = 'ppt', resolution = '4km', annual = T, keepZip = TRUE)
## 
  |                                                                            
  |                                                                      |   0%
## 
##  already exists. Skipping downloading.
## 
  |                                                                            
  |======================================================================| 100%
# The prism_archive_ls() function allows the user to view the list of downloaded packages
prism_archive_ls()
##  [1] "PRISM_ppt_30yr_normal_4kmM3_annual_bil"
##  [2] "PRISM_ppt_stable_4kmM3_201801_bil"     
##  [3] "PRISM_ppt_stable_4kmM3_201802_bil"     
##  [4] "PRISM_ppt_stable_4kmM3_201803_bil"     
##  [5] "PRISM_ppt_stable_4kmM3_201804_bil"     
##  [6] "PRISM_ppt_stable_4kmM3_201805_bil"     
##  [7] "PRISM_ppt_stable_4kmM3_201806_bil"     
##  [8] "PRISM_ppt_stable_4kmM3_201807_bil"     
##  [9] "PRISM_ppt_stable_4kmM3_201808_bil"     
## [10] "PRISM_ppt_stable_4kmM3_201809_bil"     
## [11] "PRISM_ppt_stable_4kmM3_201810_bil"     
## [12] "PRISM_ppt_stable_4kmM3_201811_bil"     
## [13] "PRISM_ppt_stable_4kmM3_201812_bil"
#The pd_stack() function can then be used to incorporate one or more of these dataset into a raster object.
#points to the downloaded 30-year climatology of annual precipitation for the conterminous US
prism_p30 <- pd_stack(prism_archive_ls()[1]) 

Rasterizing the vector data of counties and use zonal Statistics.

#convert vector polygon to raster dataset.
cnty_ras <- rasterize(county, prism_p30, field = "fips")

#use zonal statistics to connect the two raster data by mean.
cnty_p30 <- zonal(prism_p30, cnty_ras, fun = "mean")

#convert the raster data into a dataframe
cnty_p30 <- data.frame(cnty_p30)
summary(cnty_p30)
##       zone           value        
##  Min.   : 1001   Min.   :  78.88  
##  1st Qu.:19041   1st Qu.: 763.73  
##  Median :29205   Median :1095.83  
##  Mean   :30625   Mean   :1026.01  
##  3rd Qu.:45087   3rd Qu.:1277.06  
##  Max.   :56045   Max.   :3068.14
#dimension of county polygon
dim(county)
## [1] 3108   12
#dinemsiono f 
dim(cnty_p30)
## [1] 3101    2
#check the difference in dimentions for vector data and rasterized data.
setdiff(county$fips, cnty_p30$zone)
## [1] 51580 51685 51610 51570 51683 51600 51678

Zone size and raster spatial resolution

#join the county vector data and cnty_p30 raster data.
cnty_join1 <- left_join(county, cnty_p30, 
                        by = c("fips" = "zone"))

#plot the cnty_join1 map with fill=value.
ggplot(data = cnty_join1) +
  geom_sf(aes(fill = value)) +
  scale_fill_continuous(name = "Precip (mm)")

- when we look at the ‘cnty_join1’ dataset(which can be checked from global environment), all the missing counties have a state FIPS code of 51, which is Virginia.

#filter virginia from cnty_join1.
va_join1 <- filter(cnty_join1, STATEFP == "51")

#plot the virginia map with fill=value.
ggplot(data = va_join1) +
  geom_sf(aes(fill = value)) +
  scale_fill_continuous(name = "Precip (mm)")

- Virginia is unique among states in that it has a number of small, independent cities that are governed separately from the surrounding county and are therefore assigned their own county FIPS codes.

#plot norther virginia by limiting x and y coordinates as shown below.
ggplot(data = va_join1) +
  geom_sf(aes(fill = value)) +
  #put xlim and ylim to zoon further into northern Virginia.
  coord_sf(xlim = c(-77.6, -77.0), ylim = c(38.6, 39.1)) +
  scale_fill_continuous(name = "Precip (mm)")

- Now we can see several small cities with missing precipitation data.

-The solution to deal with this is to use a finer sampling grid to generate the zonal statistics.

#Use the disaggregate() function to reduce the cell size of the PRISM data by a factor of four - changing the cell size to approximately 1 km.
#Bilinear resampling is used to conduct linear interpolation between the center points of the 4 km grid cells to estimate values at the 1 km resolution.
prism_p30_1km <- disaggregate(prism_p30, fact = 4, method = "bilinear")
#Use the new 1 km grid to rasterize the county dataset.
cnty_ras_1km <- rasterize(county, prism_p30_1km, field = "fips")

#Run the zonal() function on the 1 km county and precipitation rasters joing them with their means.
cnty_p30_1km <- zonal(prism_p30_1km, cnty_ras_1km, fun = "mean")

#change the zonal data into a data frame.
cnty_p30_1km <- data.frame(cnty_p30_1km)

#check the summary of this data
summary(cnty_p30_1km)
##       zone            mean        
##  Min.   : 1001   Min.   :  78.39  
##  1st Qu.:19044   1st Qu.: 765.71  
##  Median :29212   Median :1096.10  
##  Mean   :30672   Mean   :1026.01  
##  3rd Qu.:46008   3rd Qu.:1276.26  
##  Max.   :56045   Max.   :3041.39
#dimension of original county vector data
dim(county)
## [1] 3108   12
#dimension of zonal 1 km grid data.
dim(cnty_p30_1km)
## [1] 3108    2
#check the difference in the dimension. the result is 0 difference in dimension as shown below.
setdiff(county$fips, cnty_p30_1km$zone)
## numeric(0)
#left join the county and cnty_p30_1km data by fips and zone.
cnty_join2 <- left_join(county, cnty_p30_1km, 
                        by = c("fips" = "zone"))

# filter virginia in cnty_join2(statefp=51)
va_join2 <- filter(cnty_join2, STATEFP == "51")

#plot VA map with the same coordinates of northern virginia.
ggplot(data = va_join2) +
  geom_sf(aes(fill = mean)) +
  scale_fill_continuous(name = "Precip (mm)") +
  #northern virginia coordinates.
  coord_sf(xlim = c(-77.6, -77.0), ylim = c(38.6, 39.1))

## Extracting values with point data

#downloading the monthly precipitaiton values for 2018 for the conterminous US.
get_prism_monthlys(type = 'ppt', years = 2018,  mon=1:12, keepZip = TRUE)
## 
  |                                                                            
  |                                                                      |   0%
## 
## PRISM_ppt_stable_4kmM3_201801_bil.zip already exists. Skipping downloading.
## 
  |                                                                            
  |======                                                                |   8%
## 
## PRISM_ppt_stable_4kmM3_201802_bil.zip already exists. Skipping downloading.
## 
  |                                                                            
  |============                                                          |  17%
## 
## PRISM_ppt_stable_4kmM3_201803_bil.zip already exists. Skipping downloading.
## 
  |                                                                            
  |==================                                                    |  25%
## 
## PRISM_ppt_stable_4kmM3_201804_bil.zip already exists. Skipping downloading.
## 
  |                                                                            
  |=======================                                               |  33%
## 
## PRISM_ppt_stable_4kmM3_201805_bil.zip already exists. Skipping downloading.
## 
  |                                                                            
  |=============================                                         |  42%
## 
## PRISM_ppt_stable_4kmM3_201806_bil.zip already exists. Skipping downloading.
## 
  |                                                                            
  |===================================                                   |  50%
## 
## PRISM_ppt_stable_4kmM3_201807_bil.zip already exists. Skipping downloading.
## 
  |                                                                            
  |=========================================                             |  58%
## 
## PRISM_ppt_stable_4kmM3_201808_bil.zip already exists. Skipping downloading.
## 
  |                                                                            
  |===============================================                       |  67%
## 
## PRISM_ppt_stable_4kmM3_201809_bil.zip already exists. Skipping downloading.
## 
  |                                                                            
  |====================================================                  |  75%
## 
## PRISM_ppt_stable_4kmM3_201810_bil.zip already exists. Skipping downloading.
## 
  |                                                                            
  |==========================================================            |  83%
## 
## PRISM_ppt_stable_4kmM3_201811_bil.zip already exists. Skipping downloading.
## 
  |                                                                            
  |================================================================      |  92%
## 
## PRISM_ppt_stable_4kmM3_201812_bil.zip already exists. Skipping downloading.
## 
  |                                                                            
  |======================================================================| 100%
#check the list of prism packages available.
datalist <- prism_archive_ls()
# stack the choosen lists into a raster object.
prism_prec_2018 <- pd_stack(datalist[2:13])
#read csv of mesodata
mesosm <- read_csv("gdswr_chapter3_data/mesodata_small.csv")
## Rows: 240 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (1): STID
## dbl  (7): MONTH, YEAR, TMAX, TMIN, HMAX, HMIN, RAIN
## date (1): DATE
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#read geoinfo csv
geo_coords <- read_csv("gdswr_chapter3_data/geoinfo.csv")
## Rows: 143 Columns: 12
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (7): stid, name, city, cdir, cnty, cdiv, clas
## dbl (5): stnm, rang, lat, lon, elev
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#transfer it ro simple feature file accoring to longitude and latitude.
geo_coords <- st_as_sf(geo_coords, coords = c("lon", "lat"))
#choose mystataion based on the STID of msesosm data.
mystations <- unique(mesosm$STID)
#filter the stations for four weather stations.
station_pts <- geo_coords %>%
  filter(stid %in% mystations)
#extract() function is used to obtain the raster data associated with each of the four points
prism_samp <- extract(prism_prec_2018, station_pts, factors = T, df = T)

names(prism_samp) <- c("ID", month.abb)
#There are twelve layers in the prism_prec_2018 raster stack, one for each month. Therefore, the resulting table, prism_samp, has 48 values (4 stations x 12 months).
prism_samp <- bind_cols(prism_samp, station_pts)

#These data are generated in a wide format, with one columns for each month. after bindings these data to station_pts (which contains the geographic coordinates and the station IDs), the data are pivoted into a long format and joined to the mesosm data frame (which contains the station measurements) by month and station ID. 
#The critical variables are the monthly rainfall measured at the mesonet stations (RAIN), and the monthly rainfall estimates from the PRISM dataset converted to inches to match the Mesonet data (“Pprec_in”).
prism_long <- prism_samp %>%
  pivot_longer(Jan:Dec, names_to = "mnth_name", values_to = "PPrec") %>%
  mutate(month = match(mnth_name, month.abb),
         PPrec_in = PPrec * 0.0393701)

compare_dat <- mesosm %>%
  filter(YEAR == 2018) %>%
  inner_join(prism_long, by = c("STID" = "stid", "MONTH" = "month")) %>%
  dplyr::select(STID, MONTH, RAIN, PPrec_in)

#plot the chart with points.
ggplot(data = compare_dat) +
  geom_point(aes(x = RAIN, y = PPrec_in, color = STID)) +
  scale_color_discrete(name = "Station ID") +
  xlab("Mesonet rainfall (in)") +
  ylab("PRISM rainfall (in)")

- The tight relationship looks pretty impressive. However, keep in mind that the Oklahoma mesonet data is one or many sources of station data that are used to developed the interpolated PRISM dataset. So the two sources of data are no entirely independent.

#linear models function. The linear regression has a high R-squared and a slope that is slightly less than one
rain_lm <- lm(PPrec_in ~ RAIN, data = compare_dat)
#summery
summary(rain_lm)
## 
## Call:
## lm(formula = PPrec_in ~ RAIN, data = compare_dat)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.58056 -0.20460 -0.06542  0.20472  1.40651 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.25288    0.10994    2.30    0.026 *  
## RAIN         0.92353    0.02497   36.99   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4625 on 46 degrees of freedom
## Multiple R-squared:  0.9675, Adjusted R-squared:  0.9668 
## F-statistic:  1368 on 1 and 46 DF,  p-value: < 2.2e-16
rain_sum <- compare_dat %>%
  summarize(RMSE = sqrt(mean((PPrec_in - RAIN)^2)),
            MAE = mean(abs(PPrec_in - RAIN)),
            ME = mean(PPrec_in - RAIN))
rain_sum
## # A tibble: 1 × 3
##    RMSE   MAE      ME
##   <dbl> <dbl>   <dbl>
## 1 0.497 0.270 -0.0147