Synthetically, using ChatGPT with the following prompt:
Construct two data frames in R (tidyverse) with the following structure for the first data frame:
For the second data frame:
In this demo, a “pixel” is at the high resolution (0.05 degrees in our case), and a “gridcell” is at the coarse resolution (0.25 degrees in our case). It assumes (to be confirmed) that the landcover data is categorical.
# Parameters
set.seed(123)
n_gridcells <- 4
n_pixels_per_grid <- 25
years_landcover <- 1992:2020
years_lai <- 1982:2020
months <- 1:12
# Generate gridcell IDs
gridcells <- tibble(
grid_id = 1:n_gridcells
)
# Expand to pixels within each gridcell
pixels <- gridcells %>%
group_by(grid_id) %>%
reframe(
pixel_id = paste0(grid_id, "_", 1:n_pixels_per_grid),
.groups = "drop"
)
lai_df <- expand_grid(
year = years_lai,
month = months,
pixel_id = pixels$pixel_id
) %>%
left_join(pixels, by = "pixel_id") %>%
mutate(
date = ymd(paste(year, month, 15, sep = "-")),
LAI = runif(n(), 0, 6) # simulated LAI values
) %>%
select(grid_id, pixel_id, date, year, month, LAI)
landcover_classes <- c("Forest", "Grassland", "Cropland", "Urban")
landcover_df <- expand_grid(
year = years_landcover,
pixel_id = pixels$pixel_id
) %>%
left_join(pixels, by = "pixel_id") %>%
mutate(
landcover = sample(landcover_classes, n(), replace = TRUE)
) %>%
select(grid_id, pixel_id, year, landcover)
Example outputs:
lai_df %>% slice(1:10)
## # A tibble: 10 × 6
## grid_id pixel_id date year month LAI
## <int> <chr> <date> <int> <int> <dbl>
## 1 1 1_1 1982-01-15 1982 1 1.73
## 2 1 1_2 1982-01-15 1982 1 4.73
## 3 1 1_3 1982-01-15 1982 1 2.45
## 4 1 1_4 1982-01-15 1982 1 5.30
## 5 1 1_5 1982-01-15 1982 1 5.64
## 6 1 1_6 1982-01-15 1982 1 0.273
## 7 1 1_7 1982-01-15 1982 1 3.17
## 8 1 1_8 1982-01-15 1982 1 5.35
## 9 1 1_9 1982-01-15 1982 1 3.31
## 10 1 1_10 1982-01-15 1982 1 2.74
landcover_df %>% slice(1:10)
## # A tibble: 10 × 4
## grid_id pixel_id year landcover
## <int> <chr> <int> <chr>
## 1 1 1_1 1992 Forest
## 2 1 1_2 1992 Grassland
## 3 1 1_3 1992 Urban
## 4 1 1_4 1992 Grassland
## 5 1 1_5 1992 Urban
## 6 1 1_6 1992 Urban
## 7 1 1_7 1992 Forest
## 8 1 1_8 1992 Cropland
## 9 1 1_9 1992 Forest
## 10 1 1_10 1992 Urban
Consider a pixel “used” if it is classified as cropland or urban in 15 or more years. Used 15 here for demo but should be three in application. This yields a static mask that can be used to remove pixels that are affected by management at any point during the available period.
df_mask <- landcover_df |>
mutate(used = ifelse(landcover %in% c("Cropland", "Urban"), TRUE, FALSE)) |>
ungroup() |>
group_by(pixel_id) |>
summarise(sum_used = sum(used)) |>
mutate(
used = ifelse(sum_used >= 15, TRUE, FALSE)
)
df_mask
## # A tibble: 100 × 3
## pixel_id sum_used used
## <chr> <int> <lgl>
## 1 1_1 17 TRUE
## 2 1_10 11 FALSE
## 3 1_11 19 TRUE
## 4 1_12 15 TRUE
## 5 1_13 17 TRUE
## 6 1_14 14 FALSE
## 7 1_15 10 FALSE
## 8 1_16 17 TRUE
## 9 1_17 11 FALSE
## 10 1_18 17 TRUE
## # ℹ 90 more rows
Apply the static mask on all dates to retain only pixels that are not masked and aggregate (mean) remaining to gridcells.
lai_df |>
left_join(
df_mask,
by = join_by(pixel_id)
) |>
mutate(lai_masked = ifelse(used, NA, LAI)) |>
group_by(grid_id, date) |>
summarise(
lai_unused = mean(lai_masked, na.rm = TRUE) # important here to put na.rm = TRUE to at least retain information of non-used pixels for average at larger scale
)
## `summarise()` has grouped output by 'grid_id'. You can override using the
## `.groups` argument.
## # A tibble: 1,872 × 3
## # Groups: grid_id [4]
## grid_id date lai_unused
## <int> <date> <dbl>
## 1 1 1982-01-15 3.77
## 2 1 1982-02-15 3.53
## 3 1 1982-03-15 2.66
## 4 1 1982-04-15 2.50
## 5 1 1982-05-15 2.55
## 6 1 1982-06-15 3.02
## 7 1 1982-07-15 2.93
## 8 1 1982-08-15 2.36
## 9 1 1982-09-15 3.04
## 10 1 1982-10-15 3.36
## # ℹ 1,862 more rows