For a drought index we are using the recent ERA5–Drought based on the 5th generation ECMWF reanalysis system (ERA5). Its Standardized Precipitation Evapotranspiration Index (SPEI) is calculated over accumulation periods ranging from 1 month to 4 years; we use the 12 and 24 month products here. See Keune et al. (2025).
Code
# Note that the SPEI cache is different from the others, because extracting# the data isn't that expensive. So instead of a point-by-point cache# query, we just use it entire if available, otherwise reprocess everythingSPEI_CACHE <-"caches/SPEI_data_cache.parquet"if(file.exists(SPEI_CACHE)) {message("Reading data from SPEI cache...") spei <- arrow::read_parquet(SPEI_CACHE) } else { process_spei <-function(spei_window, coords) { files <-list.files(path ="data-spei/", pattern =paste0("^SPEI", spei_window, "_.*nc$"), full.names =TRUE)message("Processing ", length(files), " SPEI", spei_window, " files...") spei_list <-list()for(f in files) {#message(f) x <-suppressWarnings(terra::rast(f)) dat <- terra::extract(x, coords)colnames(dat)[2] <-"value"# this is SPEI12 or SPEI24 dat$time <- terra::time(x) dat$SPEI_window <- spei_window spei_list[[f]] <- dat }bind_rows(spei_list) %>%as_tibble() %>%return() }# The SPEI index comes in different 'accumulation periods' of drought;# we're interested in 12 and 24 months spei12 <-process_spei(12, coords) spei24 <-process_spei(24, coords)bind_rows(spei12, spei24) %>%filter(value !=-9999) %>%pivot_wider(values_from = value, names_from = SPEI_window, names_prefix ="SPEI") -> speimessage("Writing new cache ", SPEI_CACHE) arrow::write_parquet(spei, SPEI_CACHE)}
Reading data from SPEI cache...
Code
spei$Biome <- srdb_sample$Biome[spei$ID]# Distribution plotspei %>%pivot_longer(cols =c(SPEI12, SPEI24)) %>%ggplot(aes(x = value, color = name)) +geom_density(na.rm =TRUE) +facet_wrap(~Biome) +ggtitle("Distribution of monthly SPEI data")
ggplot(spei_annual, aes(Year, SPEI24, color = Biome, group = ID)) +geom_line(na.rm =TRUE) +facet_grid(Biome~.) +theme(legend.position ="none") +ggtitle("SPEI24 - annual mean")
Code
message("Merging into SRDB data...")
Merging into SRDB data...
Code
srdb_sample$ID <-seq_len(nrow(srdb_sample))srdb_sample$Year <-floor(srdb_sample$Study_midyear)srdb_sample$Year1 <- srdb_sample$Year -1spei_annual %>%select(ID, Year, SPEI12, SPEI24) -> spei_annual_joinspei_annual_join %>%rename(SPEI12_y1 = SPEI12, SPEI24_y1 = SPEI24) -> spei_annual_join_y1srdb_sample %>%left_join(spei_annual_join, by =c("ID"="ID", "Year"="Year")) %>%left_join(spei_annual_join_y1, by =c("ID"="ID", "Year1"="Year")) -> srdb_sampleggplot(srdb_sample, aes(x = SPEI12_y1)) +geom_histogram(bins =50, na.rm =TRUE) +geom_vline(xintercept =-0.84, linetype =2, color ="red") +geom_vline(xintercept =-1.28, linetype =2, color ="red") +geom_vline(xintercept =-1.65, linetype =2, color ="red") +facet_wrap(~Biome) +ggtitle("SPEI12 previous year", subtitle ="Dashed lines show SPEI categories for mild, moderate, and severe dryness")
Code
ggplot(srdb_sample, aes(x = SPEI24_y1)) +geom_histogram(bins =50, na.rm =TRUE) +geom_vline(xintercept =-0.84, linetype =2, color ="red") +geom_vline(xintercept =-1.28, linetype =2, color ="red") +geom_vline(xintercept =-1.65, linetype =2, color ="red") +facet_wrap(~Biome) +ggtitle("SPEI24 previous year", subtitle ="Dashed lines show SPEI categories for mild, moderate, and severe dryness")
Code
ggplot(srdb_sample, aes(Year, SPEI12_y1, color = Biome)) +geom_hline(yintercept =-0.84, linetype =2, color ="red") +geom_hline(yintercept =-1.28, linetype =2, color ="red") +geom_hline(yintercept =-1.65, linetype =2, color ="red") +geom_point(na.rm =TRUE) +facet_grid(Biome~.) +theme(legend.position ="none") +ggtitle("SPEI12 previous year",subtitle ="Dashed lines show SPEI categories for mild, moderate, and severe dryness")
Code
# Are we identifying SPEI jumps in SRDB correctly?srdb_sample %>%filter(SPEI12 >0& SPEI12_y1 <=-1) %>%slice_sample(n =10) -> srdb_pbe # "potential birch effect"spei_annual %>%filter(ID %in% srdb_pbe$ID) %>%left_join(select(srdb_pbe, ID, SRDB_Year = Year), by ="ID") %>%ggplot(aes(Year, SPEI12, color = SRDB_Year == Year)) +geom_point(na.rm =TRUE) +facet_grid(ID ~ .) +ggtitle("Are we identifying SPEI12 jumps (from <=-1 to >0) in SRDB correctly? (random 10)")
Climate data
For each SRDB point we extract 2m air temperature, soil temperature and water content (levels 1 and 2), precipitation, and high-vegetation LAI from the ERA5-Land monthly averaged data (available from 1950 to present; I downloaded 1985-2024).
Ugh, the ERA5 data are big and operations are slow. I’m not a GIS expert, so spent a while playing around with terra::crop(), raster::aggregate(), etc., with little success.
We use two strategies to deal with this:
Download individual-year grib files from the EMCWF Climate Data Store (a pain to download, but then extracting from them is relatively fast); and
Use a cache so we only process things once, hopefully
Code
# Load cached data if availableERA5_SHORT_NAME <-read_csv("ERA5_short_names.csv", col_types ="cc")ERA5_CACHE <-"caches/ERA5_data_cache.parquet"if(file.exists(ERA5_CACHE)) {message("Reading cache file ", ERA5_CACHE) cache <- arrow::read_parquet(ERA5_CACHE) } else {message("No ERA5 data cache found") cache <-tibble(Year =numeric(0), Longitude =numeric(0), Latitude =numeric(0))}
Reading cache file caches/ERA5_data_cache.parquet
Code
# Get distinct year/location combinations -- that's we want to pull out of ERA5 filessrdb_sample %>%distinct(Year, Longitude, Latitude) -> srdb_yrlonlatcache_hits <-0for(i inseq_len(nrow(srdb_yrlonlat))) { yr <- srdb_yrlonlat$Year[i] lon <- srdb_yrlonlat$Longitude[i] lat <- srdb_yrlonlat$Latitude[i]# Are data already in cache? cache %>%filter(Year == yr, Longitude == lon, Latitude == lat) -> datif(nrow(dat) >0) { cache_hits <- cache_hits +1#message("\t", nrow(dat), " data rows are in cache") } else {if(cache_hits >0) { message("(", cache_hits, " cache hits)") cache_hits <-0 }message(i, "/", nrow(srdb_yrlonlat), ": ", yr, " ", lon, " ", lat, appendLF =FALSE)# Construct year-specific file name; these were downloaded from ECMWF# and contain 2m temperature, soil temperature levels 1 and 2, # volumetric soil water content levels 1 and 2, total precipitation,# and LAI of high vegetation f <-file.path("data-ERA5/", paste0("ERA5_", yr, ".grib"))if(!file.exists(f)) {message("\t", f, " does not exist; skipping")next }message("\tNo data in cache; extracting from ", f, "...") x <- terra::rast(f)# Make sure the time period is what we're expectingif(!all(year(time(x)) == yr)) {stop("Year is supposed to be ", yr, " but file data dates are different!") }# Extract, reshape, clean up y <- terra::extract(x, srdb_yrlonlat[i,][-1]) # this is the expensive step y %>%pivot_longer(-ID) %>%mutate(time =time(x), Year = yr, Longitude = lon, Latitude = lat) %>%left_join(ERA5_SHORT_NAME, by ="name") %>%select(Year, time, Longitude, Latitude, short_name, value) -> dat cache <-bind_rows(cache, dat)message("\tAdding ", nrow(dat), " data rows to cache ", ERA5_CACHE, "...") arrow::write_parquet(cache, ERA5_CACHE) }}
1/2815: 1984 -82.11 29.95
data-ERA5//ERA5_1984.grib does not exist; skipping
2/2815: 1972 -84.29 35.99 data-ERA5//ERA5_1972.grib does not exist; skipping
3/2815: 1971 -84.29 35.99 data-ERA5//ERA5_1971.grib does not exist; skipping
(27 cache hits)
31/2815: 1975 -92.33 38.9 data-ERA5//ERA5_1975.grib does not exist; skipping
32/2815: 1971 -156.67 71.3 data-ERA5//ERA5_1971.grib does not exist; skipping
(11 cache hits)
44/2815: 1973 -1.37 51.7 data-ERA5//ERA5_1973.grib does not exist; skipping
45/2815: 1979 -104.39 32.75 data-ERA5//ERA5_1979.grib does not exist; skipping
46/2815: 1980 132.45 34.38 data-ERA5//ERA5_1980.grib does not exist; skipping
47/2815: 1980 80.02 25.3 data-ERA5//ERA5_1980.grib does not exist; skipping
(15 cache hits)
63/2815: 1980 -147.72 64.84 data-ERA5//ERA5_1980.grib does not exist; skipping
64/2815: 1981 -147.72 64.84 data-ERA5//ERA5_1981.grib does not exist; skipping
(17 cache hits)
82/2815: 1984 -77.42 46 data-ERA5//ERA5_1984.grib does not exist; skipping
(107 cache hits)
190/2815: 1971 -92.2 38.8 data-ERA5//ERA5_1971.grib does not exist; skipping
191/2815: 1961 -84.29 35.99 data-ERA5//ERA5_1961.grib does not exist; skipping
192/2815: 1962 -84.29 35.99 data-ERA5//ERA5_1962.grib does not exist; skipping
193/2815: 1966 9.92 56.25 data-ERA5//ERA5_1966.grib does not exist; skipping
194/2815: 1970 135.25 34.75 data-ERA5//ERA5_1970.grib does not exist; skipping
195/2815: 1964 135.83 34.69 data-ERA5//ERA5_1964.grib does not exist; skipping
196/2815: 1966 135.83 34.69 data-ERA5//ERA5_1966.grib does not exist; skipping
197/2815: 1969 -81.67 33.5 data-ERA5//ERA5_1969.grib does not exist; skipping
198/2815: 1970 -81.67 33.5 data-ERA5//ERA5_1970.grib does not exist; skipping
199/2815: 1974 135.25 34.75 data-ERA5//ERA5_1974.grib does not exist; skipping
200/2815: 1976 135.42 32 data-ERA5//ERA5_1976.grib does not exist; skipping
201/2815: 1971 102.31 2.98 data-ERA5//ERA5_1971.grib does not exist; skipping
202/2815: 1979 -121.95 47.38 data-ERA5//ERA5_1979.grib does not exist; skipping
203/2815: 1979 -121.58 47.32 data-ERA5//ERA5_1979.grib does not exist; skipping
204/2815: 1980 -81.67 33.5 data-ERA5//ERA5_1980.grib does not exist; skipping
205/2815: 1975 135.83 35.08 data-ERA5//ERA5_1975.grib does not exist; skipping
(10 cache hits)
216/2815: 1980 -145.5 65.43 data-ERA5//ERA5_1980.grib does not exist; skipping
217/2815: 1980 -150.33 67.08 data-ERA5//ERA5_1980.grib does not exist; skipping
218/2815: 1980 -149.67 68.03 data-ERA5//ERA5_1980.grib does not exist; skipping
219/2815: 1980 -149.33 68.62 data-ERA5//ERA5_1980.grib does not exist; skipping
220/2815: 1980 -148.67 69.33 data-ERA5//ERA5_1980.grib does not exist; skipping
(116 cache hits)
337/2815: 1965 -93.17 45.5 data-ERA5//ERA5_1965.grib does not exist; skipping
338/2815: 1969 -92.2 38.75 data-ERA5//ERA5_1969.grib does not exist; skipping
(24 cache hits)
363/2815: 1984 -65.75 18.32 data-ERA5//ERA5_1984.grib does not exist; skipping
364/2815: 1984 -77.08 -1 data-ERA5//ERA5_1984.grib does not exist; skipping
365/2815: 1984 -112 40.75 data-ERA5//ERA5_1984.grib does not exist; skipping
(51 cache hits)
417/2815: 1983 -77.42 46 data-ERA5//ERA5_1983.grib does not exist; skipping
418/2815: 1975 76.18 29.97 data-ERA5//ERA5_1975.grib does not exist; skipping
419/2815: 1984 -77.38 45.97 data-ERA5//ERA5_1984.grib does not exist; skipping
(4 cache hits)
424/2815: 1984 -79.85 9.15 data-ERA5//ERA5_1984.grib does not exist; skipping
(1 cache hits)
426/2815: 1971 -107.75 50.6 data-ERA5//ERA5_1971.grib does not exist; skipping
(2 cache hits)
429/2815: 1972 -1 53 data-ERA5//ERA5_1972.grib does not exist; skipping
(138 cache hits)
568/2815: 1965 33 67.5 data-ERA5//ERA5_1965.grib does not exist; skipping
569/2815: 1966 33 67.5 data-ERA5//ERA5_1966.grib does not exist; skipping
570/2815: 1976 75.72 23.18 data-ERA5//ERA5_1976.grib does not exist; skipping
571/2815: 1969 1.65 48.36 data-ERA5//ERA5_1969.grib does not exist; skipping
572/2815: 1970 1.65 48.36 data-ERA5//ERA5_1970.grib does not exist; skipping
573/2815: 1971 1.65 48.36 data-ERA5//ERA5_1971.grib does not exist; skipping
574/2815: 1969 1.61 48.56 data-ERA5//ERA5_1969.grib does not exist; skipping
575/2815: 1970 1.61 48.56 data-ERA5//ERA5_1970.grib does not exist; skipping
576/2815: 1971 1.61 48.56 data-ERA5//ERA5_1971.grib does not exist; skipping
577/2815: 1983 -79.18 35.33 data-ERA5//ERA5_1983.grib does not exist; skipping
578/2815: 1975 24.67 52.67 data-ERA5//ERA5_1975.grib does not exist; skipping
579/2815: 1976 24.67 52.67 data-ERA5//ERA5_1976.grib does not exist; skipping
580/2815: 1977 24.67 52.67 data-ERA5//ERA5_1977.grib does not exist; skipping
581/2815: 1971 16.25 48.67 data-ERA5//ERA5_1971.grib does not exist; skipping
(1 cache hits)
583/2815: 1977 115.5 -33.83 data-ERA5//ERA5_1977.grib does not exist; skipping
584/2815: 1976 -1.92 50.67 data-ERA5//ERA5_1976.grib does not exist; skipping
(1 cache hits)
586/2815: 1983 -148 64.87 data-ERA5//ERA5_1983.grib does not exist; skipping
587/2815: 1982 84 20.5 data-ERA5//ERA5_1982.grib does not exist; skipping
(3 cache hits)
591/2815: 1977 75.72 23.18 data-ERA5//ERA5_1977.grib does not exist; skipping
592/2815: 1981 132.52 34.4 data-ERA5//ERA5_1981.grib does not exist; skipping
593/2815: 1980 132.52 34.4 data-ERA5//ERA5_1980.grib does not exist; skipping
(7 cache hits)
601/2815: 1967 -84.1 36 data-ERA5//ERA5_1967.grib does not exist; skipping
(1 cache hits)
603/2815: 1983 79.42 29.25 data-ERA5//ERA5_1983.grib does not exist; skipping
(24 cache hits)
628/2815: 1979 -83.83 10.43 data-ERA5//ERA5_1979.grib does not exist; skipping
(30 cache hits)
659/2815: 1970 130.58 32.17 data-ERA5//ERA5_1970.grib does not exist; skipping
660/2815: 1971 130.58 32.17 data-ERA5//ERA5_1971.grib does not exist; skipping
(2 cache hits)
663/2815: 1961 23.73 0.8 data-ERA5//ERA5_1961.grib does not exist; skipping
(22 cache hits)
686/2815: 1970 -107.72 50.7 data-ERA5//ERA5_1970.grib does not exist; skipping
(5 cache hits)
692/2815: 1980 -90.58 29.75 data-ERA5//ERA5_1980.grib does not exist; skipping
693/2815: 1980 -90.33 29.58 data-ERA5//ERA5_1980.grib does not exist; skipping
694/2815: 1980 -90.08 29.25 data-ERA5//ERA5_1980.grib does not exist; skipping
(10 cache hits)
705/2815: 1978 -96.5 41.15 data-ERA5//ERA5_1978.grib does not exist; skipping
706/2815: 1979 -96.5 41.15 data-ERA5//ERA5_1979.grib does not exist; skipping
707/2815: 1980 -96.5 41.15 data-ERA5//ERA5_1980.grib does not exist; skipping
(32 cache hits)
740/2815: 1984 73 33 data-ERA5//ERA5_1984.grib does not exist; skipping
(7 cache hits)
748/2815: 1980 14.13 52.5 data-ERA5//ERA5_1980.grib does not exist; skipping
(2 cache hits)
751/2815: 1967 -61.78 -1.4 data-ERA5//ERA5_1967.grib does not exist; skipping
752/2815: 1980 6.42 50.9 data-ERA5//ERA5_1980.grib does not exist; skipping
753/2815: 1981 6.42 50.9 data-ERA5//ERA5_1981.grib does not exist; skipping
754/2815: 1982 6.42 50.9 data-ERA5//ERA5_1982.grib does not exist; skipping
755/2815: 1983 6.42 50.9 data-ERA5//ERA5_1983.grib does not exist; skipping
(6 cache hits)
762/2815: 1982 -92.33 38.9 data-ERA5//ERA5_1982.grib does not exist; skipping
763/2815: 1983 -92.33 38.9 data-ERA5//ERA5_1983.grib does not exist; skipping
(1 cache hits)
765/2815: 1977 16.7 49.34 data-ERA5//ERA5_1977.grib does not exist; skipping
766/2815: 1978 16.7 49.34 data-ERA5//ERA5_1978.grib does not exist; skipping
767/2815: 1979 16.7 49.34 data-ERA5//ERA5_1979.grib does not exist; skipping
768/2815: 1980 16.7 49.34 data-ERA5//ERA5_1980.grib does not exist; skipping
769/2815: 1981 16.7 49.34 data-ERA5//ERA5_1981.grib does not exist; skipping
770/2815: 1982 16.7 49.34 data-ERA5//ERA5_1982.grib does not exist; skipping
(3 cache hits)
774/2815: 1979 39.53 -69.02 data-ERA5//ERA5_1979.grib does not exist; skipping
(6 cache hits)
781/2815: 1972 14.65 41.13 data-ERA5//ERA5_1972.grib does not exist; skipping
782/2815: 1975 -39.25 -16.07 data-ERA5//ERA5_1975.grib does not exist; skipping
783/2815: 1974 19.05 68.37 data-ERA5//ERA5_1974.grib does not exist; skipping
784/2815: 1979 152.4 -30.5 data-ERA5//ERA5_1979.grib does not exist; skipping
(5 cache hits)
790/2815: 1970 4.83 43.9 data-ERA5//ERA5_1970.grib does not exist; skipping
791/2815: 1971 4.83 43.9 data-ERA5//ERA5_1971.grib does not exist; skipping
792/2815: 1978 -149.05 65.07 data-ERA5//ERA5_1978.grib does not exist; skipping
(3 cache hits)
796/2815: 1975 -60.02 -2.63 data-ERA5//ERA5_1975.grib does not exist; skipping
(315 cache hits)
1112/2815: 1981 -84.7 45.55 data-ERA5//ERA5_1981.grib does not exist; skipping
1113/2815: 1982 -84.7 45.55 data-ERA5//ERA5_1982.grib does not exist; skipping
1114/2815: 1983 -84.7 45.55 data-ERA5//ERA5_1983.grib does not exist; skipping
Code
# At this point the cache data frame has all the needed data# Reshape, summarise to annual (NB this may change in the future),# and join with SRDB datamessage("Summarizing and joining with SRDB data...")
Summarizing and joining with SRDB data...
Code
cache %>%mutate(time =as.Date(time)) %>%pivot_wider(names_from ="short_name") %>%group_by(Year, Longitude, Latitude) %>%summarise(Tair =round(mean(Tair) -273.1, 2),Tsoil_lev1 =round(mean(Tsoil_lev1) -273.1, 2),Tsoil_lev2 =round(mean(Tsoil_lev2) -273.1, 2),VWC_lev1 =round(mean(VWC_lev1), 3),VWC_lev2 =round(mean(VWC_lev2), 3),Precip =round(sum(Precip) *1000, 2),LAI_high =round(max(LAI_high), 2),.groups ="drop") -> cache_summarysrdb_sample %>%left_join(cache_summary, by =c("Year", "Longitude", "Latitude")) -> srdb_sample# A few sanity check plotsggplot(world) +geom_sf() +geom_point(data = srdb_sample, aes(x = Longitude, y = Latitude, color = Tair),na.rm =TRUE) +ggtitle("SRDB with ERA5 2m air temperature")
Code
ggplot(world) +geom_sf() +geom_point(data = srdb_sample, aes(x = Longitude, y = Latitude, color = Precip),na.rm =TRUE) +ggtitle("SRDB with ERA5 total precipitation")
Code
ggplot(world) +geom_sf() +geom_point(data = srdb_sample, aes(x = Longitude, y = Latitude, color = LAI_high),na.rm =TRUE) +ggtitle("SRDB with ERA5 max high LAI")
Soil type data
The soil type data are also from ERA5 but thankfully they’re not time-varying, so fast and easy to extract.
Code
# Extract for SRDB pointsx <-rast("data-ERA5/ERA5_soil_type.grib")plot(x)
Code
y <- terra::extract(x, coords) # not time-varying, so fastnames(y) <-c("ID", "Soil_type_number")# Attach names to numeric codesy$Soil_type_number <-as.character(y$Soil_type_number) # for join belowsoil_names <-tibble(strings =strsplit(units(x), "; ")[[1]]) %>%separate(strings, into =c("Soil_type_number", "Soil_type_name"), convert =TRUE, sep ="=")y <- y %>%left_join(soil_names, by ="Soil_type_number")srdb_sample <-bind_cols(srdb_sample, y[-1])
MODIS annual NPP
The MODIS Version 6.1 annual NPP product is at 500 meter (m) pixel resolution.
Running, S. and Zhao, M.: MODIS/Terra net primary production gap-filled yearly L4 global 500m SIN grid V061, https://doi.org/10.5067/MODIS/MOD17A3HGF.061, 2021.
Code
library(MODISTools)# Load cached data if availableMODIS_NPP_CACHE <-"caches/MODIS_NPP_cache.parquet"if(file.exists(MODIS_NPP_CACHE)) {message("Reading cache file ", MODIS_NPP_CACHE) cache <- arrow::read_parquet(MODIS_NPP_CACHE) } else {message("No MODIS NPP data cache found") cache <-tibble(Year =numeric(0), Longitude =numeric(0), Latitude =numeric(0),MODIS_NPP =numeric(0))}
Reading cache file caches/MODIS_NPP_cache.parquet
Code
# srdb_yrlonlat (the unique combinations of location and time)# was already calculated abovecache_hits <-0for(i inseq_len(nrow(srdb_yrlonlat))) { yr <- srdb_yrlonlat$Year[i] lon <- srdb_yrlonlat$Longitude[i] lat <- srdb_yrlonlat$Latitude[i]# Are data already in cache? cache %>%filter(Year == yr, Longitude == lon, Latitude == lat) -> datif(nrow(dat) >0) {#message("\t", nrow(dat), " data rows are in cache") cache_hits <- cache_hits +1 } else {# Use MODISTools to download the needed dataif(cache_hits >0) { message("(", last_cache_hits, " cache hits)") cache_hits <-0 }message(i, "/", nrow(srdb_yrlonlat), ": ", yr, " ", lon, " ", lat, appendLF =FALSE)message("\tNo data in cache; querying MODIS NPP products...") npp <-NULLtry({ npp <-mt_subset("MOD17A3HGF", band ="Npp_500m", lat = lat, lon = lon, start =paste0(yr, "-01-01"), end =paste0(yr, "-12-31")) })if(is.null(npp)) { npp_dat <-tibble(Year = yr, Longitude = lon, Latitude = lat, MODIS_NPP =NA_real_) } else { npp_dat <-tibble(Year = yr, Longitude = lon, Latitude = lat, MODIS_NPP = npp$value) } cache <-bind_rows(cache, npp_dat) arrow::write_parquet(cache, MODIS_NPP_CACHE) }}cache %>%# 32767 is the 'fill value' for this data product -- no measurement# There are also some other values in the 32760s used for non-vegetated surfaces# see https://lpdaac.usgs.gov/documents/972/MOD17_User_Guide_V61.pdfmutate(MODIS_NPP =if_else(MODIS_NPP >=32760, NA_real_, MODIS_NPP /10.0)) -> cache_summary# Join and make a sanity-check plotsrdb_sample %>%left_join(cache_summary, by =c("Year", "Longitude", "Latitude")) -> srdb_sampleggplot(world) +geom_sf() +geom_point(data = srdb_sample, aes(x = Longitude, y = Latitude, color = MODIS_NPP),na.rm =TRUE) +ggtitle("SRDB with MOD17A3HGF NPP")
MODIS 8-day GPP
The GPP product is every eight days, 500 meter (m) resolution, and we sum it for an annual value.
This process is also slow.
Code
# Load cached data if availableMODIS_GPP_CACHE <-"caches/MODIS_GPP_cache.parquet"if(file.exists(MODIS_GPP_CACHE)) {message("Reading cache file ", MODIS_GPP_CACHE) cache <- arrow::read_parquet(MODIS_GPP_CACHE) } else {message("No MODIS GPP data cache found") cache <-tibble(Year =numeric(0), calendar_date =character(0),Longitude =numeric(0), Latitude =numeric(0),MODIS_GPP =numeric(0))}
Reading cache file caches/MODIS_GPP_cache.parquet
Code
# srdb_yrlonlat (the unique combinations of location and time)# was already calculated abovecache_hits <-0for(i inseq_len(nrow(srdb_yrlonlat))) { yr <- srdb_yrlonlat$Year[i] lon <- srdb_yrlonlat$Longitude[i] lat <- srdb_yrlonlat$Latitude[i]# Are data already in cache? cache %>%filter(Year == yr, Longitude == lon, Latitude == lat) -> datif(nrow(dat) >0) {#message("\t", nrow(dat), " data rows are in cache") cache_hits <- cache_hits +1 } else {# Use MODISTools to download the needed dataif(cache_hits >0) { message("(", cache_hits, " cache hits)") cache_hits <-0 }message(i, "/", nrow(srdb_yrlonlat), ": ", yr, " ", lon, " ", lat, appendLF =FALSE)message("\tNo data in cache; querying MODIS GPP products...") gpp <-NULLtry({ gpp <-mt_subset("MOD17A2HGF", band ="Gpp_500m", lat = lat, lon = lon, start =paste0(yr, "-01-01"), end =paste0(yr, "-12-31")) })if(is.null(gpp)) { gpp_dat <-tibble(Year = yr, calendar_date =NA_character_,Longitude = lon, Latitude = lat, MODIS_GPP =NA_real_) } else { gpp_dat <-tibble(Year = yr, calendar_date = gpp$calendar_date,Longitude = lon, Latitude = lat,MODIS_GPP = gpp$value) } cache <-bind_rows(cache, gpp_dat) arrow::write_parquet(cache, MODIS_GPP_CACHE) }}# Summarise cache, join, sanity check plotcache %>%# 32767 is the 'fill value' for this data product -- no measurement# There are also some other values in the 32760s used for non-vegetated surfaces# see https://lpdaac.usgs.gov/documents/972/MOD17_User_Guide_V61.pdfmutate(MODIS_GPP =if_else(MODIS_GPP >=32760, NA_real_, MODIS_GPP)) %>%group_by(Year, Longitude, Latitude) %>%summarise(MODIS_GPP =sum(MODIS_GPP) /10.0, .groups ="drop") -> cache_summarysrdb_sample %>%left_join(cache_summary, by =c("Year", "Longitude", "Latitude")) -> srdb_sampleggplot(world) +geom_sf() +geom_point(data = srdb_sample, aes(x = Longitude, y = Latitude, color = MODIS_GPP),na.rm =TRUE) +ggtitle("SRDB with MOD17A2HGF GPP (annual)")