Week 3

Harold Nelson

3/26/2021

library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.3     ✓ purrr   0.3.4
## ✓ tibble  3.0.6     ✓ dplyr   1.0.4
## ✓ tidyr   1.1.2     ✓ stringr 1.4.0
## ✓ readr   1.4.0     ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(sf)
## Linking to GEOS 3.8.1, GDAL 3.1.4, PROJ 6.3.1
library(raster)
## Loading required package: sp
## 
## Attaching package: 'raster'
## The following object is masked from 'package:dplyr':
## 
##     select
## The following object is masked from 'package:tidyr':
## 
##     extract
library(fs)
library(rmapshaper)
## Registered S3 method overwritten by 'geojsonlint':
##   method         from 
##   print.location dplyr

3.5.1 - Get the Vector Data

Download the geopackage version of Australia and put it in your project folder.

Now look at the layers included.

Answer

st_layers("gadm36_AUS.gpkg")
## Driver: GPKG 
## Available layers:
##     layer_name geometry_type features fields
## 1 gadm36_AUS_0 Multi Polygon        1      2
## 2 gadm36_AUS_1 Multi Polygon       11     10
## 3 gadm36_AUS_2 Multi Polygon      569     13

Now read the outline of Australia.

Answer

Ausoutline <- st_read("gadm36_AUS.gpkg",layer='gadm36_AUS_0')
## Reading layer `gadm36_AUS_0' from data source `/Users/haroldnelson/Dropbox/UCL/WK3/gadm36_AUS.gpkg' using driver `GPKG'
## Simple feature collection with 1 feature and 2 fields
## geometry type:  MULTIPOLYGON
## dimension:      XY
## bbox:           xmin: 112.9211 ymin: -55.11694 xmax: 159.1092 ymax: -9.142176
## geographic CRS: WGS 84

Look at the CRS using st_crs().

Answer

st_crs(Ausoutline)
## Coordinate Reference System:
##   User input: WGS 84 
##   wkt:
## GEOGCRS["WGS 84",
##     DATUM["World Geodetic System 1984",
##         ELLIPSOID["WGS 84",6378137,298.257223563,
##             LENGTHUNIT["metre",1]]],
##     PRIMEM["Greenwich",0,
##         ANGLEUNIT["degree",0.0174532925199433]],
##     CS[ellipsoidal,2],
##         AXIS["geodetic latitude (Lat)",north,
##             ORDER[1],
##             ANGLEUNIT["degree",0.0174532925199433]],
##         AXIS["geodetic longitude (Lon)",east,
##             ORDER[2],
##             ANGLEUNIT["degree",0.0174532925199433]],
##     USAGE[
##         SCOPE["unknown"],
##         AREA["World"],
##         BBOX[-90,-180,90,180]],
##     ID["EPSG",4326]]
st_crs(Ausoutline)$proj4string
## [1] "+proj=longlat +datum=WGS84 +no_defs"

The next thing we want to do is give our outline a good projection for Australia, EPSG 3112. Call the projected version AusoutlinePROJECTED.

Answer

AusoutlinePROJECTED <- Ausoutline %>%
  st_transform(3112)

print(AusoutlinePROJECTED)
## Simple feature collection with 1 feature and 2 fields
## geometry type:  MULTIPOLYGON
## dimension:      XY
## bbox:           xmin: -2083066 ymin: -6460625 xmax: 2346599 ymax: -1115948
## projected CRS:  GDA94 / Geoscience Australia Lambert
##   GID_0    NAME_0                           geom
## 1   AUS Australia MULTIPOLYGON (((1775780 -64...

3.5.2 - Get the Raster Data

Get the maximum temperature data.

Let’s look at the January data. Note how you have to change the name of the file. There has been an upgrade from 2.0 to 2.1. I selected tmax instead of tavg.

Answer

jan<-raster("wc2.1_5m_tmax_01.tif")
jan
## class      : RasterLayer 
## dimensions : 2160, 4320, 9331200  (nrow, ncol, ncell)
## resolution : 0.08333333, 0.08333333  (x, y)
## extent     : -180, 180, -90, 90  (xmin, xmax, ymin, ymax)
## crs        : +proj=longlat +datum=WGS84 +no_defs 
## source     : /Users/haroldnelson/Dropbox/UCL/WK3/wc2.1_5m_tmax_01.tif 
## names      : wc2.1_5m_tmax_01 
## values     : -42.624, 42.27  (min, max)

Get the list of files as an fs object.

Answer

listfiles<-dir_info() %>%
  filter(str_detect(path, ".tif")) %>%
  dplyr::select(path)%>%
  pull()

str(listfiles)
##  'fs_path' chr [1:12] "wc2.1_5m_tmax_01.tif" "wc2.1_5m_tmax_02.tif" ...
listfiles
## wc2.1_5m_tmax_01.tif wc2.1_5m_tmax_02.tif wc2.1_5m_tmax_03.tif 
## wc2.1_5m_tmax_04.tif wc2.1_5m_tmax_05.tif wc2.1_5m_tmax_06.tif 
## wc2.1_5m_tmax_07.tif wc2.1_5m_tmax_08.tif wc2.1_5m_tmax_09.tif 
## wc2.1_5m_tmax_10.tif wc2.1_5m_tmax_11.tif wc2.1_5m_tmax_12.tif

Use the stack function to convert the list of files into a raster stack called worldclimtemp.

Answer

worldclimtemp <- listfiles %>%
  stack()

worldclimtemp
## class      : RasterStack 
## dimensions : 2160, 4320, 9331200, 12  (nrow, ncol, ncell, nlayers)
## resolution : 0.08333333, 0.08333333  (x, y)
## extent     : -180, 180, -90, 90  (xmin, xmax, ymin, ymax)
## crs        : +proj=longlat +datum=WGS84 +no_defs 
## names      : wc2.1_5m_tmax_01, wc2.1_5m_tmax_02, wc2.1_5m_tmax_03, wc2.1_5m_tmax_04, wc2.1_5m_tmax_05, wc2.1_5m_tmax_06, wc2.1_5m_tmax_07, wc2.1_5m_tmax_08, wc2.1_5m_tmax_09, wc2.1_5m_tmax_10, wc2.1_5m_tmax_11, wc2.1_5m_tmax_12 
## min values :          -42.624,          -40.004,          -53.400,          -59.599,          -59.883,          -60.370,          -64.500,          -62.100,          -59.771,          -50.800,          -38.200,          -42.013 
## max values :           42.270,           40.440,           41.620,           43.332,           44.851,           46.813,           48.265,           46.542,           44.184,           41.320,           41.248,           41.523

Months

Create a vector month containing the standard abbreviated names of the months. Make this vector the names of the raster stack.

Answer

month <- c("Jan", "Feb", "Mar", "Apr", "May", "Jun", 
           "Jul", "Aug", "Sep", "Oct", "Nov", "Dec")

names(worldclimtemp) <- month

Look at worldclimtemp$Jan

Answer

worldclimtemp$Jan
## class      : RasterLayer 
## dimensions : 2160, 4320, 9331200  (nrow, ncol, ncell)
## resolution : 0.08333333, 0.08333333  (x, y)
## extent     : -180, 180, -90, 90  (xmin, xmax, ymin, ymax)
## crs        : +proj=longlat +datum=WGS84 +no_defs 
## source     : /Users/haroldnelson/Dropbox/UCL/WK3/wc2.1_5m_tmax_01.tif 
## names      : Jan 
## values     : -42.624, 42.27  (min, max)

Demo Vectors

Create vectors site, lon and lat with the names and locations of some cities in Australie.

Answer

site <- c("Brisbane", "Melbourne", "Perth", "Sydney", "Broome", "Darwin", "Orange", 
          "Bunbury", "Cairns", "Adelaide", "Gold Coast", "Canberra", "Newcastle", 
          "Wollongong", "Logan City" )
lon <- c(153.03, 144.96, 115.86, 151.21, 122.23, 130.84, 149.10, 115.64, 145.77, 
         138.6, 153.43, 149.13, 151.78, 150.89, 153.12)
lat <- c(-27.47, -37.91, -31.95, -33.87, 17.96, -12.46, -33.28, -33.33, -16.92, 
         -34.93, -28, -35.28, -32.93, -34.42, -27.64)

Dataframe

Create a dataframe samples containig the three vectors.

Answer

samples <- data.frame(site, lon, lat, row.names="site")

Extract the data from the rasterstack for all points in samples.

Answer

AUcitytemp <- raster::extract(worldclimtemp, samples)

Look at AUcitytemp

Answer

AUcitytemp
##            Jan      Feb      Mar      Apr      May      Jun      Jul      Aug
##  [1,] 30.20957 29.80106 28.99255 26.83191 23.88723 21.20426 20.48191 21.89468
##  [2,] 25.05128 25.85897 23.67949 20.17692 16.97179 13.97436 13.30513 14.55897
##  [3,] 30.84100 31.18800 29.03900 25.32700 21.67300 18.73800 17.80200 18.30000
##  [4,] 26.26493 26.55065 25.32987 23.07013 20.25974 17.51818 16.86753 18.12987
##  [5,] 25.61579 26.96842 29.35789 30.93158 31.98421 32.37895 31.91579 31.64737
##  [6,] 31.59841 31.37460 31.76825 32.70794 31.89048 30.38254 30.21905 31.02381
##  [7,] 26.59300 26.17800 23.51000 18.92500 14.35600 10.80700  9.60700 10.96100
##  [8,] 29.26667 29.47778 27.52222 24.02222 20.65556 18.16667 17.12222 17.62222
##  [9,] 31.14348 30.58261 29.87826 28.52717 26.91087 25.21304 24.80217 25.92391
## [10,] 26.88300 27.63000 24.79200 21.29400 17.87000 14.96700 14.24700 15.29100
## [11,] 29.83077 29.45769 28.55385 26.80769 24.26923 21.83846 21.04615 22.18846
## [12,] 27.11900 27.02400 24.32800 20.04200 15.94700 12.35600 11.35600 13.03100
## [13,] 26.64167 26.72500 25.61667 23.55833 20.74167 18.03333 17.45833 18.73333
## [14,] 25.81892 26.00135 24.84865 22.71622 19.97973 17.34459 16.82432 18.01351
## [15,] 29.68600 29.11200 28.31800 26.44800 23.71800 21.15100 20.28600 21.81400
##            Sep      Oct      Nov      Dec
##  [1,] 24.62447 26.73404 28.33404 29.86596
##  [2,] 16.45385 18.96923 21.10000 23.48205
##  [3,] 20.11400 22.20600 25.28600 28.50400
##  [4,] 20.40520 22.48961 23.55584 25.83247
##  [5,] 30.92632 29.26842 27.50526 25.87895
##  [6,] 32.52857 33.22063 33.29683 32.27301
##  [7,] 14.17300 17.84300 21.11300 25.03300
##  [8,] 19.25556 21.16667 23.86667 26.96667
##  [9,] 27.75326 29.32500 30.49130 31.00217
## [10,] 17.43100 20.25500 22.95100 25.31400
## [11,] 24.34615 26.09615 27.81923 29.14615
## [12,] 15.96600 19.24100 22.16900 25.69800
## [13,] 20.89167 22.87500 24.10833 26.33333
## [14,] 20.20676 22.19730 23.11757 25.40270
## [15,] 24.34200 26.14700 27.76000 29.21900

Convert to Dataframe

Create AUcitytemp2 as a tibble with a column named Site in front of the column named January.

Answer

Aucitytemp2 <- AUcitytemp %>% 
  as_tibble()%>% 
  add_column(Site = site, .before = "Jan")

head(Aucitytemp2)
## # A tibble: 6 x 13
##   Site     Jan   Feb   Mar   Apr   May   Jun   Jul   Aug   Sep   Oct   Nov   Dec
##   <chr>  <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Brisb…  30.2  29.8  29.0  26.8  23.9  21.2  20.5  21.9  24.6  26.7  28.3  29.9
## 2 Melbo…  25.1  25.9  23.7  20.2  17.0  14.0  13.3  14.6  16.5  19.0  21.1  23.5
## 3 Perth   30.8  31.2  29.0  25.3  21.7  18.7  17.8  18.3  20.1  22.2  25.3  28.5
## 4 Sydney  26.3  26.6  25.3  23.1  20.3  17.5  16.9  18.1  20.4  22.5  23.6  25.8
## 5 Broome  25.6  27.0  29.4  30.9  32.0  32.4  31.9  31.6  30.9  29.3  27.5  25.9
## 6 Darwin  31.6  31.4  31.8  32.7  31.9  30.4  30.2  31.0  32.5  33.2  33.3  32.3

Statistics

Create Perthtemp as the vector of temperatures from the third row of the tibble. Then create a histogram of the values in the vector.

Answer

Perthtemp <- Aucitytemp2[3,] 

hist(as.numeric(Perthtemp))
## Warning in hist(as.numeric(Perthtemp)): NAs introduced by coercion

Simplify

Create a simplified version of Ausoutline as AusoutSIMPLE. This makes it possible to display the outline of Australia in a reasonable time.

We might use st_simplify from the sf package, but that requires a projected input. It is also not as accurate as ms_simplify() from the rmapshaper package,

Answer

AusoutSIMPLE<-Ausoutline %>%
  ms_simplify(keep=0.05)

plot(AusoutSIMPLE$geom)

Cropping

We only want to work with the relevant part of worldclimtemp. Use Ausoutline to crop worldclimtemp.

Answer

Austemp <- Ausoutline %>%
  crop(worldclimtemp,.)

Note that the second argument is “.” The dot is necessary when the piped argument is not the first argument.

Plot the output

Use plot() to see the result.

Answer

plot(Austemp)

exactAus

Use the mask function to crop Austemp so that the resulting raster has only the body of Australia and not the surrounding water.

Answer

# exactAus <- Austemp %>%
#  mask(.,Ausoutline, na.rm = TRUE)

exactAus = mask(Austemp,Ausoutline)
head(exactAus)
##       Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec
##  [1,]  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA
##  [2,]  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA
##  [3,]  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA
##  [4,]  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA
##  [5,]  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA
##  [6,]  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA
##  [7,]  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA
##  [8,]  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA
##  [9,]  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA
## [10,]  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA
## [11,]  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA
## [12,]  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA
## [13,]  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA
## [14,]  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA
## [15,]  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA
## [16,]  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA
## [17,]  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA
## [18,]  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA
## [19,]  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA
## [20,]  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA

exactdf

Convert exactAus to a dataframe and look at its head.

Answer

exactdf = as.data.frame(exactAus)
head(exactdf)
##   Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec
## 1  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA
## 2  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA
## 3  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA
## 4  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA
## 5  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA
## 6  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA

exactdf_long

Convert the dataframe to a longer one in which the variable names in the current dataframe become the values of a variable named Month and the values in the current dataframe become the values of a variable named Temp.

exactdf_long = exactdf %>% 
  pivot_longer(cols = 1:12, names_to = "Month", values_to = "Temp")

head(exactdf_long)
## # A tibble: 6 x 2
##   Month  Temp
##   <chr> <dbl>
## 1 Jan      NA
## 2 Feb      NA
## 3 Mar      NA
## 4 Apr      NA
## 5 May      NA
## 6 Jun      NA
tail(exactdf_long)
## # A tibble: 6 x 2
##   Month  Temp
##   <chr> <dbl>
## 1 Jul      NA
## 2 Aug      NA
## 3 Sep      NA
## 4 Oct      NA
## 5 Nov      NA
## 6 Dec      NA

How much NA?

Answer

mean(is.na(exactdf_long$Temp))
## [1] 0.6719093

Any Good Data

Answer

sum(!is.na(exactdf_long$Temp))
## [1] 1201812

Valid by Month

Use group_by and summarize to get a count of good values by month.

Answer

exactdf_long %>% 
   group_by(Month) %>% 
   summarize(GoodCount = sum(!is.na(Temp)))
## # A tibble: 12 x 2
##    Month GoodCount
##  * <chr>     <int>
##  1 Apr      100151
##  2 Aug      100151
##  3 Dec      100151
##  4 Feb      100151
##  5 Jan      100151
##  6 Jul      100151
##  7 Jun      100151
##  8 Mar      100151
##  9 May      100151
## 10 Nov      100151
## 11 Oct      100151
## 12 Sep      100151

exactdf_trim

Create exactdf_trim by eliminating the rows with NA values of temp. Also make month a factor with its levels in natural order, starting with “Jan”.

Answer

exactdf_trim = exactdf_long %>% 
  filter(!is.na(Temp)) %>% 
  mutate(Month = factor(Month,levels = c("Jan","Feb","Mar",
                                          "Apr","May","Jun",
                                          "Jul","Aug","Sep",
                                          "Oct","Nov","Dec") ))

Distribution of Temp by Month

Use a histogram and facet by month.

Answer

exactdf_trim %>% 
  ggplot(aes(x = Temp)) +
  geom_histogram() +
  facet_wrap(~Month,nrow = 4)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Density

Use geom_density() instead of geom_histogram()

Answer

exactdf_trim %>% 
  ggplot(aes(x = Temp)) +
  geom_density() +
  facet_wrap(~Month,nrow = 4)