source("remove_outliers.R")
source("gather_data.R")
source("align_events.R")
source("plot_rsvi.R")
source("plot_compare.R")
source("rsvi_sites.R")
source("add_scaled_rsvi.R")
source("add_normalised_rsvi.R")
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(lubridate)
##
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
##
## date
library(readr)
library(ggplot2)
Load all RSVI data from .Rdata file. Preprocessed by Paula.
## change path to file by hand here
## TODO: gather_data() should ideally take the paths to site-specific CSV files that were downloaded by Adria.
ddf <- gather_data("/alphadata01/bstocker/data/modis_MOD09GA1km_fluxnet_cutouts/MOD09GA_MODOCGA_filter_indices.RData", add_scaled = FALSE) %>%
ungroup()
## [1] "Number of rows in df: 355260"
## [1] "Number of rows in ddf: 355260"
Subset homogenous sites. Selection of sites is based on whether sites could be allocated to clusters in Stocker et al. (2018) and based on the selection by Manuela Balzarolo (see site_selection_rsvi.Rmd
).
df_homo <- read_csv("./data/sites.csv")
## Parsed with column specification:
## cols(
## sitename = col_character(),
## cluster = col_character(),
## lon = col_double(),
## lat = col_double(),
## elv = col_double(),
## year_start = col_double(),
## year_end = col_double(),
## years_data = col_double(),
## classid = col_character(),
## whc = col_double(),
## c4 = col_logical(),
## koeppen_code = col_character(),
## kgnumber = col_double(),
## koeppen_code_extr = col_character()
## )
ddf <- ddf %>% filter( site %in% df_homo$sitename )
Scaling is done with respect to the full time series of the RSVIs, not just the subset of data points just before and during drought events. Two methods are implemented:
"range"
: scales by range to within 0 and 1, where the range is from all values from all sites and dates."range_bysite"
: scales by range to within 0 and 1 by site, where the range is from values for each site separately.dovars <- c("cci", "evi", "ndvi", "NIRv", "pri")
ddf <- ddf %>%
dplyr::select(site, date, one_of(dovars)) %>%
add_scaled_rsvi(dovars, method="range_bysite")
Add fLUE data.
ddf <- read_csv("/alphadata01/bstocker/data/flue/flue_stocker18nphyt.csv") %>%
select(site, date, flue, is_flue_drought) %>%
right_join(ddf, by=c("site", "date"))
## Parsed with column specification:
## cols(
## site = col_character(),
## date = col_date(format = ""),
## year = col_double(),
## doy = col_double(),
## flue = col_double(),
## is_flue_drought = col_logical(),
## cluster = col_character()
## )
Interesting: There is quite a trend in these indices, here for the site FR-Pue.
ddf %>%
tidyr::gather(rsvi, value, c(flue, ndvi, evi, cci, pri, NIRv)) %>%
filter(site=="FR-Pue") %>%
ggplot(aes(x=date, y=value)) +
geom_line() +
facet_grid(rows=vars(rsvi), scales = "free_y")
## Warning: Removed 1 rows containing missing values (geom_path).
## Get fLUE Stocker et al., 2018 publicly available data here: https://zenodo.org/record/1158524#.W_bNMZNKjOQ
df_flue <- readr::read_csv( "/alphadata01/bstocker/data/flue/flue_stocker18nphyt.csv" ) %>%
dplyr::select(-year, -doy, -cluster) %>%
dplyr::rename( isevent = is_flue_drought )
## Parsed with column specification:
## cols(
## site = col_character(),
## date = col_date(format = ""),
## year = col_double(),
## doy = col_double(),
## flue = col_double(),
## is_flue_drought = col_logical(),
## cluster = col_character()
## )
## Rearrange data
out_align <- align_events(
ddf,
select(df_flue, site, date, isevent),
dovars,
leng_threshold=10,
before=20, after=80, nbins=10
)
## Loading required package: tidyr
## Warning: Factor `inbin` contains implicit NA, consider using
## `forcats::fct_explicit_na`
Not clear to me why the curve is not centered around 1 for dday between -10 and 0 (zero-bin). Division by median of value in zero-bin is done in align_events()
.
Data is aggregated across multiple drought events (column inst
).
median <- out_align$df_dday_agg_inst %>%
select(site, dday, ends_with("median")) %>%
tidyr::gather(rsvi, median, ends_with("median")) %>%
mutate(rsvi=stringr::str_replace(rsvi, "_median", "") )
q33 <- out_align$df_dday_agg_inst %>%
select(site, dday, ends_with("q33")) %>%
tidyr::gather(rsvi, q33, ends_with("q33")) %>%
mutate(rsvi=stringr::str_replace(rsvi, "_q33", "") )
q66 <- out_align$df_dday_agg_inst %>%
select(site, dday, ends_with("q66")) %>%
tidyr::gather(rsvi, q66, ends_with("q66")) %>%
mutate(rsvi=stringr::str_replace(rsvi, "_q66", "") )
df_dday_agg_inst <- median %>%
left_join(q33, by=c("site","dday", "rsvi")) %>%
left_join(q66, by=c("site","dday", "rsvi"))
## Example for one site
df_dday_agg_inst %>%
filter(site=="US-SRM") %>%
filter(rsvi %in% c("flue","dsndvi", "dsevi", "dscci", "dspri", "dsNIRv")) %>%
ggplot(aes(x=dday, y=median)) +
geom_line() +
geom_ribbon(aes(ymin=q33, ymax=q66), alpha=0.3) +
facet_wrap( ~ rsvi)
Absolute values, aggregated across sites and events
median <- out_align$df_dday_agg_inst_site %>%
select(dday, ends_with("median")) %>%
tidyr::gather(rsvi, median, ends_with("median")) %>%
mutate(rsvi=stringr::str_replace(rsvi, "_median", "") )
q33 <- out_align$df_dday_agg_inst_site %>%
select(dday, ends_with("q33")) %>%
tidyr::gather(rsvi, q33, ends_with("q33")) %>%
mutate(rsvi=stringr::str_replace(rsvi, "_q33", "") )
q66 <- out_align$df_dday_agg_inst_site %>%
select(dday, ends_with("q66")) %>%
tidyr::gather(rsvi, q66, ends_with("q66")) %>%
mutate(rsvi=stringr::str_replace(rsvi, "_q66", "") )
df_dday_agg_inst_site <- median %>%
left_join(q33, by=c("dday", "rsvi")) %>%
left_join(q66, by=c("dday", "rsvi"))
df_dday_agg_inst_site %>%
filter(rsvi %in% c("flue","ndvi", "evi", "cci", "pri", "NIRv")) %>%
ggplot(aes(x=dday, y=median)) +
geom_line() +
geom_ribbon(aes(ymin=q33, ymax=q66), alpha=0.3) +
facet_wrap(~rsvi, scales = "free_y")
df_dday_agg_inst_site %>%
filter(rsvi %in% c("flue","sndvi", "sevi", "scci", "spri", "sNIRv")) %>%
ggplot(aes(x=dday, y=median)) +
geom_line() +
geom_ribbon(aes(ymin=q33, ymax=q66), alpha=0.3) +
facet_wrap(~rsvi, scales = "free_y")
df_dday_agg_inst_site %>%
filter(rsvi %in% c("flue","dsndvi", "dsevi", "dscci", "dspri", "dsNIRv")) %>%
ggplot(aes(x=dday, y=median)) +
geom_line() +
geom_ribbon(aes(ymin=q33, ymax=q66), alpha=0.3) +
facet_wrap(~rsvi)