Construct demo data frames

Synthetically, using ChatGPT with the following prompt:

Construct two data frames in R (tidyverse) with the following structure for the first data frame:

For the second data frame:

In this demo, a “pixel” is at the high resolution (0.05 degrees in our case), and a “gridcell” is at the coarse resolution (0.25 degrees in our case). It assumes (to be confirmed) that the landcover data is categorical.

# Parameters
set.seed(123)
n_gridcells <- 4
n_pixels_per_grid <- 25
years_landcover <- 1992:2020
years_lai <- 1982:2020
months <- 1:12

# Generate gridcell IDs
gridcells <- tibble(
  grid_id = 1:n_gridcells
)

# Expand to pixels within each gridcell
pixels <- gridcells %>%
  group_by(grid_id) %>%
  reframe(
    pixel_id = paste0(grid_id, "_", 1:n_pixels_per_grid),
    .groups = "drop"
  )

LAI data frame

lai_df <- expand_grid(
  year = years_lai,
  month = months,
  pixel_id = pixels$pixel_id
) %>%
  left_join(pixels, by = "pixel_id") %>%
  mutate(
    date = ymd(paste(year, month, 15, sep = "-")),
    LAI = runif(n(), 0, 6) # simulated LAI values
  ) %>%
  select(grid_id, pixel_id, date, year, month, LAI)

Land cover data frame

landcover_classes <- c("Forest", "Grassland", "Cropland", "Urban")

landcover_df <- expand_grid(
  year = years_landcover,
  pixel_id = pixels$pixel_id
) %>%
  left_join(pixels, by = "pixel_id") %>%
  mutate(
    landcover = sample(landcover_classes, n(), replace = TRUE)
  ) %>%
  select(grid_id, pixel_id, year, landcover)

Example outputs:

lai_df %>% slice(1:10)
## # A tibble: 10 × 6
##    grid_id pixel_id date        year month   LAI
##      <int> <chr>    <date>     <int> <int> <dbl>
##  1       1 1_1      1982-01-15  1982     1 1.73 
##  2       1 1_2      1982-01-15  1982     1 4.73 
##  3       1 1_3      1982-01-15  1982     1 2.45 
##  4       1 1_4      1982-01-15  1982     1 5.30 
##  5       1 1_5      1982-01-15  1982     1 5.64 
##  6       1 1_6      1982-01-15  1982     1 0.273
##  7       1 1_7      1982-01-15  1982     1 3.17 
##  8       1 1_8      1982-01-15  1982     1 5.35 
##  9       1 1_9      1982-01-15  1982     1 3.31 
## 10       1 1_10     1982-01-15  1982     1 2.74
landcover_df %>% slice(1:10)
## # A tibble: 10 × 4
##    grid_id pixel_id  year landcover
##      <int> <chr>    <int> <chr>    
##  1       1 1_1       1992 Forest   
##  2       1 1_2       1992 Grassland
##  3       1 1_3       1992 Urban    
##  4       1 1_4       1992 Grassland
##  5       1 1_5       1992 Urban    
##  6       1 1_6       1992 Urban    
##  7       1 1_7       1992 Forest   
##  8       1 1_8       1992 Cropland 
##  9       1 1_9       1992 Forest   
## 10       1 1_10      1992 Urban

Create mask

Consider a pixel “used” if it is classified as cropland or urban in 15 or more years. Used 15 here for demo but should be three in application. This yields a static mask that can be used to remove pixels that are affected by management at any point during the available period.

df_mask <- landcover_df |> 
  mutate(used = ifelse(landcover %in% c("Cropland", "Urban"), TRUE, FALSE)) |> 
  ungroup() |> 
  group_by(pixel_id) |> 
  summarise(sum_used = sum(used)) |> 
  mutate(
    used = ifelse(sum_used >= 15, TRUE, FALSE)
  )

df_mask
## # A tibble: 100 × 3
##    pixel_id sum_used used 
##    <chr>       <int> <lgl>
##  1 1_1            17 TRUE 
##  2 1_10           11 FALSE
##  3 1_11           19 TRUE 
##  4 1_12           15 TRUE 
##  5 1_13           17 TRUE 
##  6 1_14           14 FALSE
##  7 1_15           10 FALSE
##  8 1_16           17 TRUE 
##  9 1_17           11 FALSE
## 10 1_18           17 TRUE 
## # ℹ 90 more rows

Apply mask

Apply the static mask on all dates to retain only pixels that are not masked and aggregate (mean) remaining to gridcells.

lai_df |> 
  left_join(
    df_mask,
    by = join_by(pixel_id)
  ) |> 
  mutate(lai_masked = ifelse(used, NA, LAI)) |> 
  group_by(grid_id, date) |> 
  summarise(
    lai_unused = mean(lai_masked, na.rm = TRUE)  # important here to put na.rm = TRUE to at least retain information of non-used pixels for average at larger scale
  )
## `summarise()` has grouped output by 'grid_id'. You can override using the
## `.groups` argument.
## # A tibble: 1,872 × 3
## # Groups:   grid_id [4]
##    grid_id date       lai_unused
##      <int> <date>          <dbl>
##  1       1 1982-01-15       3.77
##  2       1 1982-02-15       3.53
##  3       1 1982-03-15       2.66
##  4       1 1982-04-15       2.50
##  5       1 1982-05-15       2.55
##  6       1 1982-06-15       3.02
##  7       1 1982-07-15       2.93
##  8       1 1982-08-15       2.36
##  9       1 1982-09-15       3.04
## 10       1 1982-10-15       3.36
## # ℹ 1,862 more rows