For a drought index we are using the recent ERA5–Drought based on the 5th generation ECMWF reanalysis system (ERA5). Its Standardized Precipitation Evapotranspiration Index (SPEI) is calculated over accumulation periods ranging from 1 month to 4 years; we use the 12 and 24 month products here. See Keune et al. (2025).
Code
# Note that the SPEI cache is different from the others, because extracting# the data isn't that expensive. So instead of a point-by-point cache# query, we just use it entire if available, otherwise reprocess everythingSPEI_CACHE <-"caches/SPEI_data_cache.parquet"if(file.exists(SPEI_CACHE)) {message("Reading data from SPEI cache...") spei <- arrow::read_parquet(SPEI_CACHE) } else { process_spei <-function(spei_window, coords) { files <-list.files(path ="data-spei/", pattern =paste0("^SPEI", spei_window, "_.*nc$"), full.names =TRUE)message("Processing ", length(files), " SPEI", spei_window, " files...") spei_list <-list()for(f in files) {#message(f) x <-suppressWarnings(terra::rast(f)) dat <- terra::extract(x, coords)colnames(dat)[2] <-"value"# this is SPEI12 or SPEI24 dat$time <- terra::time(x) dat$SPEI_window <- spei_window spei_list[[f]] <- dat }bind_rows(spei_list) %>%as_tibble() %>%return() }# The SPEI index comes in different 'accumulation periods' of drought;# we're interested in 12 and 24 months spei12 <-process_spei(12, coords) spei24 <-process_spei(24, coords)bind_rows(spei12, spei24) %>%filter(value !=-9999) %>%pivot_wider(values_from = value, names_from = SPEI_window, names_prefix ="SPEI") -> speimessage("Writing new cache ", SPEI_CACHE) arrow::write_parquet(spei, SPEI_CACHE)}
Reading data from SPEI cache...
Code
spei$Biome <- srdb_sample$Biome[spei$ID]# Distribution plotspei %>%pivot_longer(cols =c(SPEI12, SPEI24)) %>%ggplot(aes(x = value, color = name)) +geom_density(na.rm =TRUE) +facet_wrap(~Biome) +ggtitle("Distribution of monthly SPEI data")
Code
# Summarise to annual, January-March, and July-Septemberspei %>%mutate(Year =year(time), Month =month(time)) %>%arrange(time) %>%group_by(Biome, ID, Year) %>%summarise(SPEI12jja =round(mean(SPEI12[7:9]), 3),SPEI12jfm =round(mean(SPEI12[1:3]), 3),SPEI12 =round(mean(SPEI12), 3),SPEI24jja =round(mean(SPEI24[7:9]), 3),SPEI24jfm =round(mean(SPEI24[1:3]), 3),SPEI24 =round(mean(SPEI24), 3),.groups ="drop") -> spei_annual# Summary plotsggplot(spei_annual, aes(Year, SPEI12, color = Biome, group = ID)) +geom_line(na.rm =TRUE) +facet_grid(Biome~.) +theme(legend.position ="none") +ggtitle("SPEI12 - annual mean of SRDB sites")
Code
ggplot(spei_annual, aes(Year, SPEI24, color = Biome, group = ID)) +geom_line(na.rm =TRUE) +facet_grid(Biome~.) +theme(legend.position ="none") +ggtitle("SPEI24 - annual mean of SRDB sites")
Code
message("Merging into SRDB data...")
Merging into SRDB data...
Code
srdb_sample$ID <-seq_len(nrow(srdb_sample))srdb_sample$Year <-floor(srdb_sample$Study_midyear)srdb_sample$Year1 <- srdb_sample$Year -1spei_annual %>%select(ID, Year, SPEI12, SPEI24) -> spei_annual_joinspei_annual_join %>%rename(SPEI12_y1 = SPEI12, SPEI24_y1 = SPEI24) -> spei_annual_join_y1srdb_sample %>%left_join(spei_annual_join, by =c("ID"="ID", "Year"="Year")) %>%left_join(spei_annual_join_y1, by =c("ID"="ID", "Year1"="Year")) -> srdb_sampleggplot(srdb_sample, aes(x = SPEI12_y1)) +geom_histogram(bins =50, na.rm =TRUE) +geom_vline(xintercept =-0.84, linetype =2, color ="red") +geom_vline(xintercept =-1.28, linetype =2, color ="red") +geom_vline(xintercept =-1.65, linetype =2, color ="red") +facet_wrap(~Biome) +ggtitle("SPEI12 previous year", subtitle ="Dashed lines show SPEI categories for mild, moderate, and severe dryness")
Code
ggplot(srdb_sample, aes(x = SPEI24_y1)) +geom_histogram(bins =50, na.rm =TRUE) +geom_vline(xintercept =-0.84, linetype =2, color ="red") +geom_vline(xintercept =-1.28, linetype =2, color ="red") +geom_vline(xintercept =-1.65, linetype =2, color ="red") +facet_wrap(~Biome) +ggtitle("SPEI24 previous year", subtitle ="Dashed lines show SPEI categories for mild, moderate, and severe dryness")
Code
ggplot(srdb_sample, aes(Year, SPEI12_y1, color = Biome)) +geom_hline(yintercept =-0.84, linetype =2, color ="red") +geom_hline(yintercept =-1.28, linetype =2, color ="red") +geom_hline(yintercept =-1.65, linetype =2, color ="red") +geom_point(na.rm =TRUE) +facet_grid(Biome~.) +theme(legend.position ="none") +ggtitle("SPEI12 previous year of SRDB sites",subtitle ="Dashed lines show SPEI categories for mild, moderate, and severe dryness")
Code
# Are we identifying SPEI jumps in SRDB correctly?srdb_sample %>%filter(SPEI12 >0& SPEI12_y1 <=-1) %>%slice_sample(n =10) -> srdb_pbe # "potential birch effect"spei_annual %>%filter(ID %in% srdb_pbe$ID) %>%left_join(select(srdb_pbe, ID, SRDB_Year = Year), by ="ID") %>%ggplot(aes(Year, SPEI12, color = SRDB_Year == Year)) +geom_point(na.rm =TRUE) +geom_hline(yintercept =c(0, -1), linetype =2, color ="black") +scale_color_manual(values =c("black", "red")) +facet_grid(ID ~ .) +ggtitle("Are we identifying SPEI12 jumps (<=-1 to >0) in SRDB correctly? (random 10)")
Climate data
For each SRDB point we extract 2m air temperature, soil temperature and water content (levels 1 and 2), precipitation, and high-vegetation LAI from the ERA5-Land monthly averaged data (available from 1950 to present; I downloaded 1985-2024).
Ugh, the ERA5 data are big and operations are slow. I’m not a GIS expert, so spent a while playing around with terra::crop(), raster::aggregate(), etc., with little success.
We use two strategies to deal with this:
Download individual-year grib files from the EMCWF Climate Data Store (a pain to download, but then extracting from them is relatively fast); and
Use a cache so we only process things once, hopefully
Code
# Load cached data if availableERA5_SHORT_NAME <-read_csv("data-ERA5/ERA5_short_names.csv", col_types ="cc")ERA5_CACHE <-"caches/ERA5_data_cache.parquet"if(file.exists(ERA5_CACHE)) {message("Reading cache file ", ERA5_CACHE) cache <- arrow::read_parquet(ERA5_CACHE) } else {message("No ERA5 data cache found") cache <-tibble(Year =numeric(0), Longitude =numeric(0), Latitude =numeric(0))}
Reading cache file caches/ERA5_data_cache.parquet
Code
# Get distinct year/location combinations -- that's we want to pull out of ERA5 filessrdb_sample %>%distinct(Year, Longitude, Latitude) -> srdb_yrlonlatcache_hits <-0for(i inseq_len(nrow(srdb_yrlonlat))) { yr <- srdb_yrlonlat$Year[i] lon <- srdb_yrlonlat$Longitude[i] lat <- srdb_yrlonlat$Latitude[i]# Are data already in cache? cache %>%filter(Year == yr, Longitude == lon, Latitude == lat) -> datif(nrow(dat) >0) { cache_hits <- cache_hits +1#message("\t", nrow(dat), " data rows are in cache") } else {if(cache_hits >0) { message("(", cache_hits, " cache hits)") cache_hits <-0 }message(i, "/", nrow(srdb_yrlonlat), ": ", yr, " ", lon, " ", lat, appendLF =FALSE)# Construct year-specific file name; these were downloaded from ECMWF# and contain 2m temperature, soil temperature levels 1 and 2, # volumetric soil water content levels 1 and 2, total precipitation,# and LAI of high and low vegetation f <-file.path("data-ERA5/", paste0("ERA5_", yr, ".grib"))if(!file.exists(f)) {message("\t", f, " does not exist; skipping")next }message("\tNo data in cache; extracting from ", f, "...") x <- terra::rast(f)# Make sure the time period is what we're expectingif(!all(year(time(x)) == yr)) {stop("Year is supposed to be ", yr, " but file data dates are different!") }# Extract, reshape, clean up y <- terra::extract(x, srdb_yrlonlat[i,][-1]) # this is the expensive step y %>%pivot_longer(-ID) %>%mutate(time =time(x), Year = yr, Longitude = lon, Latitude = lat,short_name =rep(ERA5_SHORT_NAME$short_name, length.out =n())) %>%select(Year, time, Longitude, Latitude, short_name, value) -> dat cache <-bind_rows(cache, dat)message("\tAdding ", nrow(dat), " data rows to cache ", ERA5_CACHE, "...") arrow::write_parquet(cache, ERA5_CACHE) }}
1/2614: 1984 -82.11 29.95
data-ERA5//ERA5_1984.grib does not exist; skipping
2/2614: 1972 -84.29 35.99 data-ERA5//ERA5_1972.grib does not exist; skipping
3/2614: 1971 -84.29 35.99 data-ERA5//ERA5_1971.grib does not exist; skipping
(27 cache hits)
31/2614: 1975 -92.33 38.9 data-ERA5//ERA5_1975.grib does not exist; skipping
32/2614: 1971 -156.67 71.3 data-ERA5//ERA5_1971.grib does not exist; skipping
(11 cache hits)
44/2614: 1973 -1.37 51.7 data-ERA5//ERA5_1973.grib does not exist; skipping
45/2614: 1979 -104.39 32.75 data-ERA5//ERA5_1979.grib does not exist; skipping
46/2614: 1980 132.45 34.38 data-ERA5//ERA5_1980.grib does not exist; skipping
(15 cache hits)
62/2614: 1980 -147.72 64.84 data-ERA5//ERA5_1980.grib does not exist; skipping
63/2614: 1981 -147.72 64.84 data-ERA5//ERA5_1981.grib does not exist; skipping
(17 cache hits)
81/2614: 1984 -77.42 46 data-ERA5//ERA5_1984.grib does not exist; skipping
(106 cache hits)
188/2614: 1971 -92.2 38.8 data-ERA5//ERA5_1971.grib does not exist; skipping
189/2614: 1961 -84.29 35.99 data-ERA5//ERA5_1961.grib does not exist; skipping
190/2614: 1962 -84.29 35.99 data-ERA5//ERA5_1962.grib does not exist; skipping
191/2614: 1966 9.92 56.25 data-ERA5//ERA5_1966.grib does not exist; skipping
192/2614: 1970 135.25 34.75 data-ERA5//ERA5_1970.grib does not exist; skipping
193/2614: 1964 135.83 34.69 data-ERA5//ERA5_1964.grib does not exist; skipping
194/2614: 1966 135.83 34.69 data-ERA5//ERA5_1966.grib does not exist; skipping
195/2614: 1969 -81.67 33.5 data-ERA5//ERA5_1969.grib does not exist; skipping
196/2614: 1970 -81.67 33.5 data-ERA5//ERA5_1970.grib does not exist; skipping
197/2614: 1974 135.25 34.75 data-ERA5//ERA5_1974.grib does not exist; skipping
198/2614: 1976 135.42 32 data-ERA5//ERA5_1976.grib does not exist; skipping
199/2614: 1971 102.31 2.98 data-ERA5//ERA5_1971.grib does not exist; skipping
200/2614: 1979 -121.95 47.38 data-ERA5//ERA5_1979.grib does not exist; skipping
201/2614: 1979 -121.58 47.32 data-ERA5//ERA5_1979.grib does not exist; skipping
202/2614: 1980 -81.67 33.5 data-ERA5//ERA5_1980.grib does not exist; skipping
203/2614: 1975 135.83 35.08 data-ERA5//ERA5_1975.grib does not exist; skipping
(10 cache hits)
214/2614: 1980 -145.5 65.43 data-ERA5//ERA5_1980.grib does not exist; skipping
215/2614: 1980 -150.33 67.08 data-ERA5//ERA5_1980.grib does not exist; skipping
216/2614: 1980 -149.67 68.03 data-ERA5//ERA5_1980.grib does not exist; skipping
217/2614: 1980 -149.33 68.62 data-ERA5//ERA5_1980.grib does not exist; skipping
218/2614: 1980 -148.67 69.33 data-ERA5//ERA5_1980.grib does not exist; skipping
(109 cache hits)
328/2614: 1965 -93.17 45.5 data-ERA5//ERA5_1965.grib does not exist; skipping
329/2614: 1969 -92.2 38.75 data-ERA5//ERA5_1969.grib does not exist; skipping
(22 cache hits)
352/2614: 1984 -65.75 18.32 data-ERA5//ERA5_1984.grib does not exist; skipping
353/2614: 1984 -77.08 -1 data-ERA5//ERA5_1984.grib does not exist; skipping
354/2614: 1984 -112 40.75 data-ERA5//ERA5_1984.grib does not exist; skipping
(48 cache hits)
403/2614: 1983 -77.42 46 data-ERA5//ERA5_1983.grib does not exist; skipping
404/2614: 1975 76.18 29.97 data-ERA5//ERA5_1975.grib does not exist; skipping
405/2614: 1984 -77.38 45.97 data-ERA5//ERA5_1984.grib does not exist; skipping
(4 cache hits)
410/2614: 1984 -79.85 9.15 data-ERA5//ERA5_1984.grib does not exist; skipping
(1 cache hits)
412/2614: 1971 -107.75 50.6 data-ERA5//ERA5_1971.grib does not exist; skipping
(138 cache hits)
551/2614: 1965 33 67.5 data-ERA5//ERA5_1965.grib does not exist; skipping
552/2614: 1966 33 67.5 data-ERA5//ERA5_1966.grib does not exist; skipping
553/2614: 1976 75.72 23.18 data-ERA5//ERA5_1976.grib does not exist; skipping
554/2614: 1969 1.65 48.36 data-ERA5//ERA5_1969.grib does not exist; skipping
555/2614: 1970 1.65 48.36 data-ERA5//ERA5_1970.grib does not exist; skipping
556/2614: 1971 1.65 48.36 data-ERA5//ERA5_1971.grib does not exist; skipping
557/2614: 1969 1.61 48.56 data-ERA5//ERA5_1969.grib does not exist; skipping
558/2614: 1970 1.61 48.56 data-ERA5//ERA5_1970.grib does not exist; skipping
559/2614: 1971 1.61 48.56 data-ERA5//ERA5_1971.grib does not exist; skipping
560/2614: 1983 -79.18 35.33 data-ERA5//ERA5_1983.grib does not exist; skipping
561/2614: 1971 16.25 48.67 data-ERA5//ERA5_1971.grib does not exist; skipping
(1 cache hits)
563/2614: 1977 115.5 -33.83 data-ERA5//ERA5_1977.grib does not exist; skipping
564/2614: 1976 -1.92 50.67 data-ERA5//ERA5_1976.grib does not exist; skipping
(1 cache hits)
566/2614: 1983 -148 64.87 data-ERA5//ERA5_1983.grib does not exist; skipping
567/2614: 1982 84 20.5 data-ERA5//ERA5_1982.grib does not exist; skipping
(3 cache hits)
571/2614: 1977 75.72 23.18 data-ERA5//ERA5_1977.grib does not exist; skipping
572/2614: 1981 132.52 34.4 data-ERA5//ERA5_1981.grib does not exist; skipping
573/2614: 1980 132.52 34.4 data-ERA5//ERA5_1980.grib does not exist; skipping
(7 cache hits)
581/2614: 1967 -84.1 36 data-ERA5//ERA5_1967.grib does not exist; skipping
582/2614: 1983 79.42 29.25 data-ERA5//ERA5_1983.grib does not exist; skipping
(21 cache hits)
604/2614: 1979 -83.83 10.43 data-ERA5//ERA5_1979.grib does not exist; skipping
(28 cache hits)
633/2614: 1970 130.58 32.17 data-ERA5//ERA5_1970.grib does not exist; skipping
634/2614: 1971 130.58 32.17 data-ERA5//ERA5_1971.grib does not exist; skipping
(2 cache hits)
637/2614: 1961 23.73 0.8 data-ERA5//ERA5_1961.grib does not exist; skipping
(21 cache hits)
659/2614: 1970 -107.72 50.7 data-ERA5//ERA5_1970.grib does not exist; skipping
(5 cache hits)
665/2614: 1980 -90.58 29.75 data-ERA5//ERA5_1980.grib does not exist; skipping
666/2614: 1980 -90.33 29.58 data-ERA5//ERA5_1980.grib does not exist; skipping
667/2614: 1980 -90.08 29.25 data-ERA5//ERA5_1980.grib does not exist; skipping
(35 cache hits)
703/2614: 1984 73 33 data-ERA5//ERA5_1984.grib does not exist; skipping
(8 cache hits)
712/2614: 1967 -61.78 -1.4 data-ERA5//ERA5_1967.grib does not exist; skipping
713/2614: 1980 6.42 50.9 data-ERA5//ERA5_1980.grib does not exist; skipping
714/2614: 1981 6.42 50.9 data-ERA5//ERA5_1981.grib does not exist; skipping
715/2614: 1982 6.42 50.9 data-ERA5//ERA5_1982.grib does not exist; skipping
716/2614: 1983 6.42 50.9 data-ERA5//ERA5_1983.grib does not exist; skipping
(7 cache hits)
724/2614: 1977 16.7 49.34 data-ERA5//ERA5_1977.grib does not exist; skipping
725/2614: 1978 16.7 49.34 data-ERA5//ERA5_1978.grib does not exist; skipping
726/2614: 1979 16.7 49.34 data-ERA5//ERA5_1979.grib does not exist; skipping
727/2614: 1980 16.7 49.34 data-ERA5//ERA5_1980.grib does not exist; skipping
728/2614: 1981 16.7 49.34 data-ERA5//ERA5_1981.grib does not exist; skipping
729/2614: 1982 16.7 49.34 data-ERA5//ERA5_1982.grib does not exist; skipping
(3 cache hits)
733/2614: 1979 39.53 -69.02 data-ERA5//ERA5_1979.grib does not exist; skipping
(6 cache hits)
740/2614: 1972 14.65 41.13 data-ERA5//ERA5_1972.grib does not exist; skipping
741/2614: 1975 -39.25 -16.07 data-ERA5//ERA5_1975.grib does not exist; skipping
742/2614: 1974 19.05 68.37 data-ERA5//ERA5_1974.grib does not exist; skipping
743/2614: 1979 152.4 -30.5 data-ERA5//ERA5_1979.grib does not exist; skipping
(4 cache hits)
748/2614: 1970 4.83 43.9 data-ERA5//ERA5_1970.grib does not exist; skipping
749/2614: 1971 4.83 43.9 data-ERA5//ERA5_1971.grib does not exist; skipping
750/2614: 1978 -149.05 65.07 data-ERA5//ERA5_1978.grib does not exist; skipping
(3 cache hits)
754/2614: 1975 -60.02 -2.63 data-ERA5//ERA5_1975.grib does not exist; skipping
(295 cache hits)
1050/2614: 1981 -84.7 45.55 data-ERA5//ERA5_1981.grib does not exist; skipping
1051/2614: 1982 -84.7 45.55 data-ERA5//ERA5_1982.grib does not exist; skipping
1052/2614: 1983 -84.7 45.55 data-ERA5//ERA5_1983.grib does not exist; skipping
Code
# At this point the cache data frame has all the needed data# Reshape, summarise to annual (although this may change in# the future), and join with SRDB datamessage("Summarizing and joining with SRDB data...")
Summarizing and joining with SRDB data...
Code
cache %>%mutate(time =as.Date(time)) %>%pivot_wider(names_from ="short_name") %>%group_by(Year, Longitude, Latitude) %>%summarise(Tair =round(mean(Tair) -273.1, 2),Tsoil_lev1 =round(mean(Tsoil_lev1) -273.1, 2),Tsoil_lev2 =round(mean(Tsoil_lev2) -273.1, 2),VWC_lev1 =round(mean(VWC_lev1), 3),VWC_lev2 =round(mean(VWC_lev2), 3),Precip =round(sum(Precip) *1000, 2),LAI_high =round(max(LAI_high), 2),LAI_low =round(max(LAI_low), 2),.groups ="drop") -> cache_summarysrdb_sample %>%left_join(cache_summary, by =c("Year", "Longitude", "Latitude")) -> srdb_sample# A few sanity check plotsggplot(world) +geom_sf() +geom_point(data = srdb_sample, aes(x = Longitude, y = Latitude, color = Tair),na.rm =TRUE) +ggtitle("SRDB with ERA5 2m air temperature")
Code
ggplot(world) +geom_sf() +geom_point(data = srdb_sample, aes(x = Longitude, y = Latitude, color = Precip),na.rm =TRUE) +ggtitle("SRDB with ERA5 total precipitation")
Code
ggplot(world) +geom_sf() +geom_point(data = srdb_sample, aes(x = Longitude, y = Latitude, color = LAI_high),na.rm =TRUE) +ggtitle("SRDB with ERA5 max high LAI")
Vegetation and soil type data
The vegetation and soil data are also from ERA5 but thankfully they’re not time-varying, so fast and easy to extract.
Code
# Extract soil data for SRDB pointssoil <-rast("data-ERA5/ERA5_soil_type.grib")plot(soil, main ="ERA5 soil type")
Code
soil_dat <- terra::extract(soil, coords)names(soil_dat) <-c("ID", "Soil_type_number")# Attach names to numeric codessoil_dat$Soil_type_number <-as.character(soil_dat$Soil_type_number) # for join belowsoil_names <-tibble(strings =strsplit(units(soil), "; ")[[1]]) %>%separate(strings, into =c("Soil_type_number", "Soil_type_name"), convert =TRUE, sep ="=")soil_dat <- soil_dat %>%left_join(soil_names, by ="Soil_type_number")# Extract vegetation datatvh <-rast("data-ERA5/ERA5_tvh.grib")plot(tvh, main ="ERA5 high vegetation type")
The MODIS Version 6.1 annual NPP product is at 500 meter (m) pixel resolution.
Running, S. and Zhao, M.: MODIS/Terra net primary production gap-filled yearly L4 global 500m SIN grid V061, https://doi.org/10.5067/MODIS/MOD17A3HGF.061, 2021.
Code
library(MODISTools)# Load cached data if availableMODIS_NPP_CACHE <-"caches/MODIS_NPP_cache.parquet"if(file.exists(MODIS_NPP_CACHE)) {message("Reading cache file ", MODIS_NPP_CACHE) cache <- arrow::read_parquet(MODIS_NPP_CACHE) } else {message("No MODIS NPP data cache found") cache <-tibble(Year =numeric(0), Longitude =numeric(0), Latitude =numeric(0),MODIS_NPP =numeric(0))}
Reading cache file caches/MODIS_NPP_cache.parquet
Code
# srdb_yrlonlat (the unique combinations of location and time)# was already calculated abovecache_hits <-0for(i inseq_len(nrow(srdb_yrlonlat))) { yr <- srdb_yrlonlat$Year[i] lon <- srdb_yrlonlat$Longitude[i] lat <- srdb_yrlonlat$Latitude[i]# Are data already in cache? cache %>%filter(Year == yr, Longitude == lon, Latitude == lat) -> datif(nrow(dat) >0) {#message("\t", nrow(dat), " data rows are in cache") cache_hits <- cache_hits +1 } else {# Use MODISTools to download the needed dataif(cache_hits >0) { message("(", cache_hits, " cache hits)") cache_hits <-0 }message(i, "/", nrow(srdb_yrlonlat), ": ", yr, " ", lon, " ", lat, appendLF =FALSE)message("\tNo data in cache; querying MODIS NPP products...") npp <-NULLtry({ npp <-mt_subset("MOD17A3HGF", band ="Npp_500m", lat = lat, lon = lon, start =paste0(yr, "-01-01"), end =paste0(yr, "-12-31")) })if(is.null(npp)) { npp_dat <-tibble(Year = yr, Longitude = lon, Latitude = lat, MODIS_NPP =NA_real_) } else { npp_dat <-tibble(Year = yr, Longitude = lon, Latitude = lat, MODIS_NPP = npp$value) } cache <-bind_rows(cache, npp_dat) arrow::write_parquet(cache, MODIS_NPP_CACHE) }}cache %>%# 32767 is the 'fill value' for this data product -- no measurement# There are also some other values in the 32760s used for non-vegetated surfaces# see https://lpdaac.usgs.gov/documents/972/MOD17_User_Guide_V61.pdfmutate(MODIS_NPP =if_else(MODIS_NPP >=32760, NA_real_, MODIS_NPP /10.0)) -> cache_summary# Join and make a sanity-check plotsrdb_sample %>%left_join(cache_summary, by =c("Year", "Longitude", "Latitude")) -> srdb_sampleggplot(world) +geom_sf() +geom_point(data = srdb_sample, aes(x = Longitude, y = Latitude, color = MODIS_NPP),na.rm =TRUE) +ggtitle("SRDB with MOD17A3HGF NPP")
MODIS 8-day GPP
The GPP product is every eight days, 500 meter (m) resolution, and we sum it for an annual value.
This process is also slow.
Code
# Load cached data if availableMODIS_GPP_CACHE <-"caches/MODIS_GPP_cache.parquet"if(file.exists(MODIS_GPP_CACHE)) {message("Reading cache file ", MODIS_GPP_CACHE) cache <- arrow::read_parquet(MODIS_GPP_CACHE) } else {message("No MODIS GPP data cache found") cache <-tibble(Year =numeric(0), calendar_date =character(0),Longitude =numeric(0), Latitude =numeric(0),MODIS_GPP =numeric(0))}
Reading cache file caches/MODIS_GPP_cache.parquet
Code
# srdb_yrlonlat (the unique combinations of location and time)# was already calculated abovecache_hits <-0for(i inseq_len(nrow(srdb_yrlonlat))) { yr <- srdb_yrlonlat$Year[i] lon <- srdb_yrlonlat$Longitude[i] lat <- srdb_yrlonlat$Latitude[i]# Are data already in cache? cache %>%filter(Year == yr, Longitude == lon, Latitude == lat) -> datif(nrow(dat) >0) {#message("\t", nrow(dat), " data rows are in cache") cache_hits <- cache_hits +1 } else {# Use MODISTools to download the needed dataif(cache_hits >0) { message("(", cache_hits, " cache hits)") cache_hits <-0 }message(i, "/", nrow(srdb_yrlonlat), ": ", yr, " ", lon, " ", lat, appendLF =FALSE)message("\tNo data in cache; querying MODIS GPP products...") gpp <-NULLtry({ gpp <-mt_subset("MOD17A2HGF", band ="Gpp_500m", lat = lat, lon = lon, start =paste0(yr, "-01-01"), end =paste0(yr, "-12-31")) })if(is.null(gpp)) { gpp_dat <-tibble(Year = yr, calendar_date =NA_character_,Longitude = lon, Latitude = lat, MODIS_GPP =NA_real_) } else { gpp_dat <-tibble(Year = yr, calendar_date = gpp$calendar_date,Longitude = lon, Latitude = lat,MODIS_GPP = gpp$value) } cache <-bind_rows(cache, gpp_dat) arrow::write_parquet(cache, MODIS_GPP_CACHE) }}# Summarise cache, join, sanity check plotcache %>%# 32767 is the 'fill value' for this data product -- no measurement# There are also some other values in the 32760s used for non-vegetated surfaces# see https://lpdaac.usgs.gov/documents/972/MOD17_User_Guide_V61.pdfmutate(MODIS_GPP =if_else(MODIS_GPP >=32760, NA_real_, MODIS_GPP)) %>%group_by(Year, Longitude, Latitude) %>%summarise(MODIS_GPP =sum(MODIS_GPP) /10.0, .groups ="drop") -> cache_summarysrdb_sample %>%left_join(cache_summary, by =c("Year", "Longitude", "Latitude")) -> srdb_sampleggplot(world) +geom_sf() +geom_point(data = srdb_sample, aes(x = Longitude, y = Latitude, color = MODIS_GPP),na.rm =TRUE) +ggtitle("SRDB with MOD17A2HGF GPP (annual)")