Département de géographie and Centre d’études nordiques Université de Montréal, Montréal, QC, Canada christoforos.pappas@umontreal.ca
Department of Water Resources Czech University of Life Sciences Prague, Prague, Czech republic markonis@fzp.czu.cz
This two-day workshop focuses on the integration of stochastic and deterministic methods for understanding and modelling the Earth system. The workshop will provide hands-on training in R programming language on cross-scale analyses of Earth system data, including, but not limited to hydrometeorological (e.g., temperature and precipitation) and ecosystem processes (tree growth and terrestrial carbon dynamics). By analyzing how the variability in Earth system observations changes across scales the underlying processes can be revealed, shedding light into dominant mechanisms and critical spatiotemporal scales.
The first day of the workshop starts with the theoretical foundations of cross-scale data analyses, including selected examples from published literature. Traditional techniques for the exploration of multi-scale fluctuations (e.g., wavelet analysis) as well as new model-data integration approaches (scalegram) will be presented, confronting across a continuum of spatiotemporal scales observed patterns with model simulation results. Hands on training with publicly available Earth system observations will be provided. The second day is optional and is designed to guide the participants to apply the methods presented during the previous day on their data sets and to address science questions relevant to their research. Here, we aim to an open-ended and conversational session, tailored to the specific needs of each researcher.
This two-day workshop is presented through the Canada Research Chair in Atmospheric Biogeosciences in High Latitudes based in the Département de géographie at the Université de Montréal.
list_of_packages = c("data.table", "reshape2", "RColorBrewer", "zoo", "ggplot2",
"devtools", "latticeExtra", "scales","RNCEP", "gimms",
"ncdf4", "parallel", "raster", "longmemo", "HKprocess",
"shiny", "FGN", "moments", "Lmoments")
new_packages <- list_of_packages [!(list_of_packages %in% installed.packages()[, "Package"])]
if(length(new_packages)) install.packages(new_packages)
lapply(list_of_packages, require, character.only = T)
devtools::install_github("imarkonis/scalegram")
require("scalegram")
set.seed(1) #Keep constant number generators for reproducibility of results
ts_length <- 10000
repl_no <- 5
cols <- brewer.pal(9, "Set1")[c(1:3)]
# linear/deterministic trends
# Equation: y = a * x
lm_df <- data.frame(lm1 = -10 * (1:ts_length),
lm2 = 10 * (1:ts_length))
# harmonic fluctuation
#Equation: y = a * sin(b * t)
t <- 1:ts_length
a <- 3
period <- 20 # periodicity
hf_df <- data.frame(y1 = a * sin((2 * pi / period) * t))
# random walk
rw_df <- data.frame(rw1 = cumsum(rnorm(n = ts_length)),
rw2 = cumsum(rnorm(n = ts_length)))
#plots
layout(matrix(c(1, 1, 2,
3, 3, 4,
5, 5, 6),
nrow = 3,
byrow = T))
par(oma = c(0, 0, 2, 0))
plot.ts(lm_df[ ,1], ylab = "Linear trend", col = cols[3])
acf(lm_df[ ,1], main = NA, col=cols[3]) #autocorellation function
plot.ts(hf_df[1:1000, 1], ylab = "Harmonic fluctuation", col = cols[1])
acf(hf_df[ ,1], main = NA, col = cols[1])
plot.ts(rw_df[ ,1], ylab = "Random walk", col = cols[2])
acf(rw_df[ ,1], main = NA, col = cols[2])
title("Time series and ACFs", outer = T)
A random walk is a stochastic process, that describes a path that consists of a succession of random steps on some mathematical space.
See scalegram package documentation (?scalegram).
scale_lm <- scalegram(as.matrix(lm_df), stat = "sd", threshold = 100, plot = F)
scale_lm$Model <- "Linear trend"
scale_rw <- scalegram(as.matrix(rw_df), stat = "sd", threshold = 100, plot = F)
scale_rw$Model <- "Random walk"
scale_hf <- scalegram(as.matrix(hf_df), stat = "sd", threshold = 100, plot = F)
scale_hf$Model <- "Harmonic fluctuation"
# compare different models
scale_a_df <- data.frame(rbind(scale_lm, scale_rw, scale_hf))
scale_a_comparison <- ggplot(data = scale_a_df,
aes(x = scale, y = value, group = interaction(Model, variable))) +
geom_line(aes(colour = Model), alpha = 0.2, show.legend = F) +
geom_point(aes(colour = Model), alpha = 0.2, show.legend = F) +
geom_tile(aes(fill = Model)) +
scale_y_log10("Standard deviation [-]",
labels = trans_format("log10", math_format(10 ^ .x)),
breaks=c(min(scale_a_df$value, na.rm=T), 1)) +
scale_x_log10("Aggregation scale [-]",
labels = trans_format("log10", math_format(10 ^ .x))) +
scale_fill_manual("", values = cols) +
scale_colour_manual("", values = cols) +
annotation_logticks(sides = "b") +
theme_bw() +
theme(panel.grid.minor.x = element_blank(),
panel.grid.minor.y = element_blank(),
aspect.ratio = 1)
print(scale_a_comparison)
White noise corresponds to a purely random process.
Autoregressive process is a Markovian process characterized by short term persistence.
Fractional Gaussian noise model is a generalization of Wiener process, characterized by long term persistence.
# white noise
wn_df <- matrix(rnorm(n = ts_length), ncol = repl_no)
# autoregressive model
ar1_df <- matrix(arima.sim(model = list(ar = 0.8), n = ts_length), ncol = repl_no)
# fGn model
fgn_df <- matrix(simFGN0(n = ts_length, H = 0.9), ncol = repl_no)
# plots
layout(matrix(c(1, 1, 2,
3, 3, 4,
5, 5, 6),
nrow = 3,
byrow = T))
par(oma = c(0, 0, 2, 0))
plot.ts(wn_df[ ,1], ylab = "White noise", col = cols[3])
acf(wn_df[,1], main=NA, col = cols[3])
plot.ts(ar1_df[ ,1], ylab = "AR(1)", col = cols[2])
acf(ar1_df[,1], main = NA, col = cols[2])
plot.ts(fgn_df[ ,1], ylab = "fGn", col = cols[1])
acf(fgn_df[ ,1], main = NA, col = cols[1])
title("Time series and ACFs", outer = T)
scale_wn <- scalegram(wn_df, stat = "sd", threshold = 100, plot = F)
scale_wn$Model <- "White noise"
scale_ar1 <- scalegram(ar1_df, stat = "sd", threshold = 100, plot = F)
scale_ar1$Model <- "AR(1)"
scale_fgn <- scalegram(fgn_df, stat = "sd", threshold = 100, plot = F)
scale_fgn$Model <- "FGN"
scale_b_df <- data.frame(rbind(scale_wn, scale_ar1, scale_fgn))
scale_b_comparison <- ggplot(data = scale_b_df,
aes(x = scale, y = value, group=interaction(Model, variable))) +
geom_line(aes(colour = Model), alpha = 0.2, show.legend = F) +
geom_point(aes(colour = Model), alpha = 0.2, show.legend = F) +
geom_smooth(aes(group = Model), se = F, method = "loess", colour= "black", size=1.5) +
geom_smooth(aes(group = Model), se = F, method = "loess", colour= "white") +
geom_tile(aes(fill = Model)) +
scale_y_log10("Standard deviation [-]",
labels = trans_format("log10", math_format(10 ^ .x)),
breaks=c(min(scale_b_df$value, na.rm=T), 1)) +
scale_x_log10("Aggregation scale [-]",
labels = trans_format("log10", math_format(10 ^ .x))) +
scale_fill_manual("", values = cols) +
scale_colour_manual("", values = cols) +
annotation_logticks(sides = "b") +
theme_bw() +
theme(panel.grid.minor.x = element_blank(),
panel.grid.minor.y = element_blank(),
aspect.ratio = 1)
print(scale_b_comparison)
Exercise 1: Modify the parameters of the white noise process (e.g., uniform/normal distribution), autoregressive process (lag-1 autocorrelation coefficient in (-1, 1)) and long-term persistence process (Hurst coefficient in (0, 1)).
Exercise 2: Create a periodic process, with two harmonics T_1 = 24 h (diurnal cycle) and T_2 = 1 year (annual cycle)
Exercise 3: Plot the scalegrams of time series that combine (a) a linear trend & white noise/autoregressive process (rho = 0.8), (b) the harmonic fluctutation from exercise 2 & fGn process (H = 0.85) & (c) a step function with mean in (-2, 2)
Exercise 4: [optional] Explore other statistical properties, e.g., skewness & L moments.
An overview of state-of-the-art precipitation datasets can be found here.
Here we use the NCEP/NCAR 2 Reanalysis dataset as it has been already implemented in R (package RNCEP)
NCEP/NCAR 2 Reanalysis Data
dir.create("./workshop2018")
setwd("./workshop2018") #Set working directory
dir.create("./data")
dir.create("./source")
dir.create("./bin")
dir.create("./results")
More info about data: https://www.esrl.noaa.gov/psd/data/gridded/data.ncep.reanalysis2.html
Variables of interest
air.2m is Air Temperature (At 2 meters) deg K
prate.sfc is Precipitation rate (At Surface) Kg/m^2/s
These info should go to read.me file.
ncep_dload function returns a three dimensional array of weather data. The three dimensions are latitude, longitude, and datetime reflected in the dimnames of the array. Datetimes are always expressed in UTC with the format “%Y_%m_%d_%H”. Optionally, the units of the variable being queried are printed upon completion.
ncep_raw <- list() #Use list in case time series of different lengths are combined
site_coords <- data.frame(lat = 45.5, lon = 286.5) # Montréal
coordinates(site_coords) <- c("lon", "lat")
start_day <- 1
end_day <- 31
start_month <- 1
end_month <- 12
start_year <- 1979
end_year <- 2016
time_as_date = as.POSIXct(seq(ISOdate(start_year, start_month, start_day, 00, 00, tz = "UTC"), #from
ISOdate(end_year, end_month, end_day, 18, 00, tz = "UTC"), #to
"6 h"), tz = "UTC") #by
start_date = as.Date(head(time_as_date, 1))
end_date = as.Date(tail(time_as_date, 1))
We download data from the public repository. This should be a single script, e.g., “download_ncep.R”.
variables <- c("air.2m", "prate.sfc")
n_variables <- length(variables)
for (j in 1:n_variables){# loop through variables
ncep_dload <- NCEP.gather(
variable = variables[j],
level = 'gaussian',
months.minmax = c(start_month, end_month), #
years.minmax = c(start_year, end_year), # overall length of the dataset
lat.southnorth = c(site_coords@coords[2], site_coords@coords[2]),
lon.westeast = c(site_coords@coords[1], site_coords@coords[1]),
reanalysis2 = T,
return.units = T,
status.bar = T
)
# get the mean of the 4 cells (i.e., area)
ncep_raw[[j]] <- apply(ncep_dload, 3, mean, na.rm=T)
} # end loop through variables
names(ncep_raw) <- c("temperature", "precipitation")
saveRDS(ncep_raw, "./data/montreal_ncep_raw.rds")
rm(ncep_raw); gc() #clear memory [always try to keep memory usage as low as possible]
Raw data are saved and will be considered as read-only. Any further manipulation should be saved seperately.
ncep_raw <- readRDS("./data/montreal_ncep_raw.rds") #created by download_ncep.R
getwd()
montreal_ncep <- data.table(melt(ncep_raw))
colnames(montreal_ncep)[2] <- "variable"
montreal_ncep[variable == "temperature", value := value - 273] #Change from K to C
montreal_ncep[variable == "precipitation", value := value * 3600 * 6] #Change from Kg/m^2/s to mm/6h
montreal_ncep[, time := rep(time_as_date, n_variables)]
setcolorder(montreal_ncep, c("variable", "time", "value"))
saveRDS(montreal_ncep, "./data/montreal_ncep.rds")
rm(montreal_ncep); gc()
Again this should be another individual script, “data_manipulation.R”.
Aggregate 6-h time series to monthly time step and estimate scalegrams
montreal_ncep <- readRDS("./data/montreal_ncep.rds") #created by data_manipulation.R
scale_montreal_ncep_6h <- data.table(montreal_ncep[, scalegram(value, plot = F), variable])[,1:3]
plot_scalegram(scale_montreal_ncep_6h)
#Transform to matrix for parallel computing
#montreal_ncep_mat <- dcast(data = montreal_ncep, time~variable, value.var = "value")
#system.time(scale_montreal_ncep_6h <- scalegram(as.matrix(montreal_ncep_mat[,-1]), plot=F))
montreal_ncep[, month := month(time)]
montreal_ncep[, year := year(time)]
#Total (sum) monthly precipitation & Mean monthly temperature
montreal_ncep_month <-
rbind(montreal_ncep[variable == "temperature", mean(value), list(month, year, variable)],
montreal_ncep[variable == "precipitation", sum(value), list(month, year,variable)])
colnames(montreal_ncep_month)[4] = "value"
scale_montreal_ncep_month <- data.table(montreal_ncep_month[, scalegram(value, plot = F), variable])[,1:3]
plot_scalegram(scale_montreal_ncep_month)
Discussion: Is the above script in line with the workflow principles of our project?
Rescale scalegrams
scale_P_month_6h <- scale_montreal_ncep_month[variable == "precipitation"]
scale_P_month_6h$scale <- scale_P_month_6h$scale * 120 # i.e., 4 6h during one day for a month (4 * 30)
scale_P_month_6h$variable <- "Monthly precipitation"
dummy <- scale_montreal_ncep_6h[variable == "precipitation"]
dummy$variable <- "6-h precipitation"
comb_plot <- rbind(dummy, scale_P_month_6h)
plot_scalegram(comb_plot)
rescale_P <- rescale_scalegram( #Var. of the 6h scalegrams in the monthly scale
scalegram_coarse = scale_montreal_ncep_month[variable == "precipitation"],
scalegram_fine = scale_montreal_ncep_6h[variable == "precipitation"],
scale_ratio = 4 * 30)
rescale_P$variable <- "Monthly precipitation"
dummy <- scale_montreal_ncep_6h[variable == "precipitation"]
dummy$variable <- "6-h precipitation"
comb_p_plot <- rbind(dummy, rescale_P)
plot_scalegram(comb_p_plot)
Exercise 4: Plot in one scalegram 6-h, daily, monthly and annual scalegrams.
Exercise 5: Compare scalegrams of T and P between Montreal and another location.
Comparison between NCAP and station data
Station data correspond to daily meteorological data for MONTREAL/PIERRE_ELLIOTT_TRUDEAU that are downloaded from: http://climexp.knmi.nl/start.cgi?id=someone@somewhere
download.file("http://climexp.knmi.nl/data/pgdcnCA007025250.dat", "./data/montreal_P_ghcn.dat")
montreal_station_P <- data.table(read.csv("./data/montreal_P_ghcn.dat", sep = "", skip = 21, header = F))
colnames(montreal_station_P) <- c("year", "month", "day", "value")
montreal_station_P$variable <- "Daily precipitation"
montreal_station_P[, time := as.Date(paste0(year, "/", month, "/", day))]
scale_montreal_station_P <- scalegram(montreal_station_P$value, plot = F)
rescale_montreal_station_P <- rescale_scalegram(
scalegram_coarse = scale_montreal_station_P,
scalegram_fine = scale_montreal_ncep_6h[variable == "precipitation"],
scale_ratio = 4)
dummy_1 <- scale_montreal_ncep_6h[variable == "precipitation"]
dummy_1$variable <- "NCEP"
dummy_2 <- rescale_montreal_station_P
dummy_2$variable <- "Station"
compare_NCEP_station <- rbind(dummy_1, dummy_2)
plot_scalegram(compare_NCEP_station)
Exercise 6: Download other Earth system variables (see below) and perform cross-scale and cross-variable analyses with scalegram, e.g.:
cross-scale variability of a given variable derived from different datasets.
cross-scale hydroclimatic variability compared to vegetation response.
cross-scale temperature variability compared to tree-ring width chronologies.
Package FedData, access to:
The National Elevation Dataset (NED) digital elevation models (USGS)
The National Hydrography Dataset (NHD) (USGS)
The Soil Survey Geographic (SSURGO)
The Global Historical Climatology Network (GHCN)
The Daymet gridded estimates of daily weather parameters for North America
The International Tree Ring Data Bank (ITRDB)
The National Land Cover Database (NLCD) from 2011, 2006, and 2001
Remote sensing vegetation indices: GIMMS NDVI3g
require("gimms")
gimms_files <- downloadGimms(x = as.Date("2000-01-01"), y = as.Date("2001-12-31"))
site_coords <- data.frame(lat = 45.5, lon = 286.5) # C'est Montreal
coordinates(site_coords) <- c("lon", "lat")
gimms_raster <- rasterizeGimms(gimms_files)
plot(gimms_raster[[1]])
points(site_coords)
ndvi3g <- t(extract(gimms_raster, site_coords))
Eddy covariance data of CO2, water and energy exchange between the land surface and atmosphere: