Herder MDP Action based PES

Overview of what the following analysis does:

In each time step:

  Weather is drawn (drought/normal/rainy).

  Each agent chooses an action (stocking rate) based on:
  + landscape composition (how much intact/good/poor/degraded),
  + weather,
  + blended transition (base transitions + ecosystem memory),
  + agent prior bias and temperature.

  Each agent chooses a cell (x,y) based on:
  + spatial memory of rewards for that cell+action,
  + expected survival and “improvement potential” under the kernel+weather.

  The chosen cell’s rangeland state is updated by sampling next state from the blended transition.

  Reward is computed and the agent updates:
  + EMA of action reward,
  + decayed state histogram,
  + spatial memory (sum/count by x,y,action).

Then simulation runs repeat over many steps, and repeat over many “chunks/runs”.

Key components and functions of the model

## here() starts at C:/LocalData/batkhor/OneDrive - University of Helsinki/Papers/Paper 2/mdp_1st_round

Stage	Variable	Function	Description
Key components and associated functions
Initialization	create_agent()	sets agent$bias and agent$eta_action	defines who the agent is (behavioral style)
Decision-making	select_action()	adds bias term to utility	bias influences which actions are more probable
Environment response	compute_reward(), transition_rangeland()	gives reward/penalty based on environment	bias indirectly affects these outcomes
Learning	update_agent()	adjusts EMA reward memory	experience modulates future influence of bias
Source: `mdp_2agents_as_with_priors_files/overview_table_for_MDP_two_agents_with_bias.csv`

Problem formulation

Here we define the rangeland states, actions and climatic conditions. The four rangeland states are intact, good, poor and degraded. The actions are expressed through herd sizes, <200, 200-400, 400-600 and 600 and above. And the climatic conditions are drought (below average annual precipitation), normal (average annual precipitation) and rainy (more than average precipitation). We then go on to define how the rangeland states transition under different climatic and stocking density combinations.

states   <- c("intact","good","poor","degraded")
actions  <- c("<200","200-400","400-600",">600")
weathers <- c("drought","normal","rainy")

pv <- function(intact, good, poor, degraded) {
  p <- c(intact=intact, good=good, poor=poor, degraded=degraded)
  stopifnot(abs(sum(p) - 1) < 1e-8)
  p
}

#SxS matrix with rows named in 'states'
mk_mat <- function(rows_named_vec_list) {
  M <- do.call(rbind, rows_named_vec_list)
  colnames(M) <- states
  stopifnot(all(rownames(M) == states))
  stopifnot(all(colnames(M) == states))
  M
}

#P(s' | s, a, weather)
base_transition_full <- list(
  "<200" = list(
    drought = mk_mat(list(
      intact   = pv(0.70,0.25,0.05,0.00),
      good     = pv(0.10,0.80,0.10,0.00),
      poor     = pv(0.00,0.40,0.55,0.05),
      degraded = pv(0.00,0.10,0.30,0.60)
    )),
    normal = mk_mat(list(
      intact   = pv(0.80,0.18,0.02,0.00),
      good     = pv(0.15,0.75,0.10,0.00),
      poor     = pv(0.05,0.50,0.40,0.05),
      degraded = pv(0.00,0.20,0.40,0.40)
    )),
    rainy = mk_mat(list(
      intact   = pv(0.85,0.14,0.01,0.00),
      good     = pv(0.20,0.75,0.05,0.00),
      poor     = pv(0.10,0.60,0.25,0.05),
      degraded = pv(0.05,0.35,0.40,0.20)
    ))
  ),

  "200-400" = list(
    drought = mk_mat(list(
      intact   = pv(0.40,0.45,0.12,0.03),
      good     = pv(0.05,0.70,0.20,0.05),
      poor     = pv(0.00,0.35,0.55,0.10),
      degraded = pv(0.00,0.10,0.35,0.55)
    )),
    normal = mk_mat(list(
      intact   = pv(0.55,0.40,0.05,0.00),
      good     = pv(0.10,0.75,0.13,0.02),
      poor     = pv(0.02,0.45,0.48,0.05),
      degraded = pv(0.00,0.20,0.45,0.35)
    )),
    rainy = mk_mat(list(
      intact   = pv(0.65,0.33,0.02,0.00),
      good     = pv(0.20,0.70,0.09,0.01),
      poor     = pv(0.05,0.55,0.35,0.05),
      degraded = pv(0.05,0.30,0.45,0.20)
    ))
  ),

  "400-600" = list(
    drought = mk_mat(list(
      intact   = pv(0.10,0.35,0.40,0.15),
      good     = pv(0.05,0.50,0.35,0.10),
      poor     = pv(0.00,0.25,0.55,0.20),
      degraded = pv(0.00,0.05,0.30,0.65)
    )),
    normal = mk_mat(list(
      intact   = pv(0.15,0.55,0.25,0.05),
      good     = pv(0.10,0.60,0.25,0.05),
      poor     = pv(0.02,0.30,0.55,0.13),
      degraded = pv(0.00,0.10,0.35,0.55)
    )),
    rainy = mk_mat(list(
      intact   = pv(0.20,0.60,0.15,0.05),
      good     = pv(0.15,0.65,0.15,0.05),
      poor     = pv(0.05,0.40,0.45,0.10),
      degraded = pv(0.02,0.20,0.45,0.33)
    ))
  ),

  ">600" = list(
    drought = mk_mat(list(
      intact   = pv(0.00,0.05,0.25,0.70),
      good     = pv(0.00,0.10,0.30,0.60),
      poor     = pv(0.00,0.10,0.35,0.55),
      degraded = pv(0.00,0.02,0.18,0.80)
    )),
    normal = mk_mat(list(
      intact   = pv(0.00,0.15,0.45,0.40),
      good     = pv(0.00,0.20,0.45,0.35),
      poor     = pv(0.00,0.20,0.45,0.35),
      degraded = pv(0.00,0.05,0.25,0.70)
    )),
    rainy = mk_mat(list(
      intact   = pv(0.00,0.35,0.55,0.10),
      good     = pv(0.00,0.40,0.50,0.10),
      poor     = pv(0.00,0.35,0.55,0.10),
      degraded = pv(0.05,0.25,0.45,0.25)
    ))
  )
)

check_base_target_full <- function(bt, states, actions, weathers, tol = 1e-8) {
  for (a in actions) {
    if (is.null(bt[[a]])) stop("Missing action in base_target_full: ", a)
    for (w in weathers) {
      M <- bt[[a]][[w]]
      if (is.null(M)) stop("Missing matrix for action=", a, " weather=", w)
      if (!is.matrix(M) || !all(dim(M) == c(length(states), length(states))))
        stop("Matrix for action=", a, " weather=", w, " must be ", length(states), "x", length(states))
      if (!setequal(rownames(M), states) || !setequal(colnames(M), states))
        stop("Row/col names for action=", a, " weather=", w, " must match states")
      M <- M[states, states, drop = FALSE]
      if (any(abs(rowSums(M) - 1) > tol))
        stop("Row sums must equal 1 for action=", a, " weather=", w)
      bt[[a]][[w]] <- M
    }
  }
  bt
}

build_base_target_full <- function(defs, states, actions, weathers, tol = 1e-8) {
  bt <- setNames(vector("list", length(actions)), actions)
  for (a in actions) {
    bt[[a]] <- setNames(vector("list", length(weathers)), weathers)
    for (w in weathers) bt[[a]][[w]] <- defs[[a]][[w]]
  }
  check_base_target_full(bt, states, actions, weathers, tol)
}

base_target_full <- build_base_target_full(
  defs = base_transition_full,
  states = states, actions = actions, weathers = weathers
)

draw_weather <- function(p = c(drought=0.3, normal=0.5, rainy=0.2)) {
  sample(names(p), size = 1, prob = p)
}

Ecosystem memory

This part encapsulates the idea that ecosystems have memory or certain “stickiness” under different climatic conditions. For example, once ecosystem state becomes degraded and it is in drought condition, the likelihood of recovery to improved states are very low. This eventually gets fed into the ecosystem transitions, which is based on a combination of functions - a base transition probability and the memory. Base prior determines how much influence the ecosystem memory should affect the transition.

#Kw(s'| s) weather conditioned, action-independent kernels
make_K <- function(rows) {
  M <- do.call(rbind, rows)
  rownames(M) <- states; colnames(M) <- states
  M
}

K_drought <- make_K(list(
  intact   = pv(0.65, 0.25, 0.08, 0.02),
  good     = pv(0.05, 0.70, 0.20, 0.05),
  poor     = pv(0.00, 0.20, 0.65, 0.15),
  degraded = pv(0.00, 0.05, 0.35, 0.60)
))

K_normal <- make_K(list(
  intact   = pv(0.75, 0.22, 0.03, 0.00),
  good     = pv(0.10, 0.75, 0.13, 0.02),
  poor     = pv(0.02, 0.28, 0.60, 0.10),
  degraded = pv(0.00, 0.15, 0.40, 0.45)
))

K_rainy <- make_K(list(
  intact   = pv(0.85, 0.14, 0.01, 0.00),
  good     = pv(0.20, 0.70, 0.09, 0.01),
  poor     = pv(0.05, 0.45, 0.45, 0.05),
  degraded = pv(0.05, 0.35, 0.40, 0.20)
))

K_weather <- list(drought = K_drought, normal = K_normal, rainy = K_rainy)

Tuning guidance: k_base (high) -> the prior has a strong influence over the transition, and the memory smaller. Larger k_base means “trust calibrated transitions more”. k_eco (high) -> the last end state and the current weather exerts a strong influence over the transition. Larger k_eco means “resilience kernel dominates”.

sample_next_state_dynamic <- function(current_state, action, weather,
                                      base_target_full, K_weather,
                                      k_base = 50, k_eco = 8) {

  p_base <- base_target_full[[action]][[weather]][current_state, , drop = TRUE]
  k_row  <- K_weather[[weather]][current_state, , drop = TRUE]

  alpha_total <- k_base * p_base + k_eco * k_row
  probs <- alpha_total / sum(alpha_total)

  sample(states, size = 1, prob = probs)
}

stock_survival <- matrix(
  c(
    # <200  200-400  400-600  >600
      1.2,  1.2,   1.2,    1.2,   # intact - need to think whether columns should add to 1
      1.1,  1.1,   1.1,    1.1,   # good
      0,  0,   0,    0,   # poor
      0,  0,   0,    0    # degraded
  ),
  nrow = 4, byrow = TRUE,
  dimnames = list(states, actions)
)

`%||%` <- function(a, b) if (!is.null(a)) a else b

#converts utilities into choice probabilities.
.softmax <- function(u, tau = 1) {
  if (!is.finite(tau) || tau <= 0) stop("tau must be finite and > 0")
  u2 <- u
  u2[!is.finite(u2)] <- -Inf
  m <- max(u2)
  if (!is.finite(m)) return(rep(1/length(u), length(u)))
  z <- (u2 - m) / tau
  p <- exp(z)
  s <- sum(p)
  if (!is.finite(s) || s <= 0) return(rep(1/length(u), length(u)))
  p / s
}

normalize <- function(x) { s <- sum(x); if (s == 0) x else x / s }

# spatial memory utility (avg past reward per x,y,action)
base_utility_from_spatial_memory <- function(agent, x, y, action) {
  key <- paste(x, y, action, sep = "_")
  if (!is.null(agent$reward_memory[[key]]) &&
      !is.null(agent$memory_count[[key]]) &&
      agent$memory_count[[key]] > 0) {
    agent$reward_memory[[key]] / agent$memory_count[[key]]
  } else 0
}

Bias

We describe bias through social variables that describes their current socio-economic situation (e.g. combintion of current income and workforce availability) that influence what actions they take next. Together with other parameters, “agent bias” influences action selection later one.

We introduce two sources of biases to the way that agents choose actions:

a memory based bias that comes from past action/reward combination This is based on an expected moving average of the past actions, and their rewards.
a specific action oriented bias which functions as a second lever that helps “anchor” action choice through herders’ original herd size. For example if herders have less than <200 goats to start with, they are unlikely to jump to >600 herd size in the next iteration of model run.

Determining how bias affect action selection: We use eta_action tuning parameter to determine how agent behavior type filters through action selection process later on. 0.0–0.2 == “Adaptive learner”|Bias nearly ignored; behavior mostly reactive to experience
0.5–1.0 == “Moderate consistency”|Balanced between bias and experience
1.0–2.0 == “Stubborn/Tradition bound”|Bias dominates; acts on prior preferences even if experience contradicts.

Other bias related parameter tuning: If we want bias to be stronger than base utility (softmax function),we increase w_bias (or eta_action on the agent). If we want recent rewards to dominate action selection, we increase w_ema.

All these sources of biases are then combined through a total bias utility function as follows:

U(a)=Ubase(a)+wema⋅EMA(a)+ηaction⋅wbias⋅bias(a)

create_agent <- function(id,
                         tau = 1,
                         beta_action = 0.8,
                         beta_state  = 0.9,
                         eta_bias = 0.6,
                         income = 1.0,
                         workforce = 1.0,
                         bias_vec_named = NULL) {

  if (is.null(bias_vec_named)) bias_vec_named <- setNames(rep(1, length(actions)), actions)
  bias_vec_named <- bias_vec_named[actions]
  bias_vec_named <- normalize(bias_vec_named)
  bias_log <- log(bias_vec_named + 1e-8)

  list(
    id = id,
    tau = tau,
    eta_bias = eta_bias,
    beta_action = beta_action,
    beta_state  = beta_state,
    income = income,
    workforce = workforce,
    bias = bias_log,
    ema_action_reward = setNames(rep(0, length(actions)), actions),
    ema_state_hist    = setNames(rep(1e-6, length(states)), states),
    reward_memory     = list(),
    memory_count      = list(),
    last_cell         = c(x = NA_integer_, y = NA_integer_)
  )
}

update_agent_after_step <- function(agent, chosen_action, reward, end_state, cell_xy = NULL) {
  if (!chosen_action %in% names(agent$ema_action_reward))
    stop("Unknown chosen_action: ", chosen_action)
  if (!end_state %in% names(agent$ema_state_hist))
    stop("Unknown end_state: ", end_state)
  if (!is.finite(reward)) reward <- 0

  old <- agent$ema_action_reward[[chosen_action]]
  agent$ema_action_reward[[chosen_action]] <-
    agent$beta_action * old + (1 - agent$beta_action) * reward

  agent$ema_state_hist <- agent$beta_state * agent$ema_state_hist
  agent$ema_state_hist[[end_state]] <- agent$ema_state_hist[[end_state]] + (1 - agent$beta_state)

  if (!is.null(cell_xy)) {
    key <- paste(cell_xy[["x"]], cell_xy[["y"]], chosen_action, sep = "_")
    agent$reward_memory[[key]] <- (agent$reward_memory[[key]] %||% 0) + reward
    agent$memory_count[[key]]  <- (agent$memory_count[[key]]  %||% 0) + 1
    agent$last_cell <- cell_xy[c("x","y")]
  }
  agent
}

build_agents_from_specs <- function(meta_df, bias_df, actions, default_bias = 1) {
  stopifnot("agent_id" %in% names(meta_df))
  stopifnot(all(c("agent_id","action","bias_weight") %in% names(bias_df)))

  all_pairs <- merge(data.frame(agent_id = unique(meta_df$agent_id)),
                     data.frame(action = actions),
                     by = NULL)

  bias_complete <- merge(all_pairs, bias_df, by=c("agent_id","action"), all.x=TRUE)
  bias_complete$bias_weight[is.na(bias_complete$bias_weight)] <- default_bias

  bias_list <- split(bias_complete, bias_complete$agent_id)
  bias_named <- lapply(bias_list, function(df) {
    df <- df[match(actions, df$action), ]
    v <- as.numeric(df$bias_weight); names(v) <- df$action
    v
  })

  agents <- lapply(seq_len(nrow(meta_df)), function(i) {
    row <- meta_df[i, ]
    id  <- as.character(row$agent_id)
    create_agent(
      id = id,
      tau = as.numeric(row$tau),
      beta_action = as.numeric(row$beta_action),
      beta_state  = as.numeric(row$beta_state),
      eta_bias = as.numeric(row$eta_bias),
      income = as.numeric(row$income),
      workforce = as.numeric(row$workforce),
      bias_vec_named = bias_named[[id]]
    )
  })
  names(agents) <- as.character(meta_df$agent_id)
  agents
}

Action selection in space

Agent selects actions depending on where they are in the landscape in relation to other agents - this is to emulate the preference for non-overlapping grazing practices employed by herders, which only exists as a silent/social agreement and are often breached by outsiders and desperate herders. Agents also have to consider the last time they were in a cell, because they are penalised if they return to the same cell immediately - this is an attempt to emulate ecosystem recovery from land use. If an agent hasn’t been to a cell before and never tried an action before then they assume reward to be an average of 2.This is clearly a naive and overly optimistic approach. Future simulation should focus on testing the sensitivity of this parameter. Alternative to using a constant of 2 as an average reward there is the option of using Bayesian smoothing based on prior knowledge.

state_rank <- c(intact=4, good=3, poor=2, degraded=1)

landscape_proportions <- function(land_grid, states) {
  tab <- table(factor(land_grid$rangeland, levels = states))
  tot<-sum(tab)
  props <- if (tot == 0) rep(1/length(states), length(states)) else as.numeric(tab) / tot
  setNames(props, states)
}

blend_row <- function(current_state, action, weather,
                      base_target_full, K_weather,
                      k_base = 50, k_eco = 8) {
  p_base <- base_target_full[[action]][[weather]][current_state, , drop = TRUE]
  k_row  <- K_weather[[weather]][current_state, , drop = TRUE]
  alpha  <- k_base * p_base + k_eco * k_row
  alpha / sum(alpha)
}

expected_survival_from_row <- function(p_next, action, stock_survival) {
  sum(p_next * stock_survival[names(p_next), action])
}

expected_improvement <- function(current_state, p_next) {
  cur <- state_rank[[current_state]]
  nxt <- state_rank[names(p_next)]
  sum(p_next * (nxt - cur))
}

#computes for each action a blended transition with stock survival and landscape composition
select_action_weather_landscape <- function(agent,
                                            land_grid,
                                            weather,
                                            states, actions,
                                            base_target_full, K_weather,
                                            stock_survival,
                                            k_base = 50, k_eco = 8,
                                            w_surv = 1.0,
                                            w_bias = 0.2) {

  props <- landscape_proportions(land_grid, states)

  U_surv <- setNames(numeric(length(actions)), actions)
  for (a in actions) {
    u <- 0
    for (s in states) {
      p_next <- blend_row(s, a, weather, base_target_full, K_weather, k_base, k_eco)
      u <- u + props[[s]] * expected_survival_from_row(p_next, a, stock_survival)
    }
    U_surv[[a]] <- u
  }

  bias_term <- agent$eta_bias * agent$bias[actions]
  bias_term[is.na(bias_term)] <- 0

  U <- w_surv * U_surv + w_bias * bias_term
  p <- .softmax(U, tau = agent$tau)

  list(action = sample(actions, 1, prob = p),
       U = U, U_surv = U_surv, p = p, props = props)
}

#agent decides which cell to go depending on past memory and expected survival
select_cell_weather_memory_kernel <- function(agent,
                                             land_grid,
                                             chosen_action,
                                             weather,
                                             states,
                                             base_target_full, K_weather,
                                             stock_survival,
                                             k_base = 50, k_eco = 8,
                                             base_utility_fun = base_utility_from_spatial_memory,
                                             w_mem = 1.0,
                                             w_surv_cell = 1.0,
                                             w_improve = 0.5) {

  cand <- land_grid[, c("x","y","rangeland")]
  names(cand)[3] <- "state"

  cand$U_mem <- mapply(
    function(xx, yy) base_utility_fun(agent, xx, yy, chosen_action),
    cand$x, cand$y
  )

  pnext_list <- lapply(cand$state, function(s) {
    blend_row(s, chosen_action, weather, base_target_full, K_weather, k_base, k_eco)
  })

  cand$U_surv <- vapply(pnext_list, function(pn)
    expected_survival_from_row(pn, chosen_action, stock_survival),
    numeric(1)
  )

  cand$U_improve <- mapply(function(s, pn) expected_improvement(s, pn),
                           cand$state, pnext_list)

  cand$U_total <- w_mem * cand$U_mem + w_surv_cell * cand$U_surv + w_improve * cand$U_improve
  probs <- .softmax(cand$U_total, tau = agent$tau)

  idx <- sample.int(nrow(cand), 1, prob = probs)
  list(x = cand$x[idx], y = cand$y[idx])
}

select_xy_action_weather <- function(agent,
                                     land_grid,
                                     weather,
                                     states, actions,
                                     base_target_full, K_weather,
                                     stock_survival,
                                     k_base = 50, k_eco = 8) {

  ares <- select_action_weather_landscape(
    agent, land_grid, weather,
    states, actions, base_target_full, K_weather, stock_survival,
    k_base = k_base, k_eco = k_eco
  )

  cres <- select_cell_weather_memory_kernel(
    agent, land_grid,
    chosen_action = ares$action,
    weather = weather,
    states = states,
    base_target_full = base_target_full,
    K_weather = K_weather,
    stock_survival = stock_survival,
    k_base = k_base, k_eco = k_eco
  )

  list(x = cres$x, y = cres$y, action = ares$action)
}

Simulation

Simulation creates an plain of cells that all start from intact condition. Then agents start to operate in them, iteratively choosing actions and optimising rewards as described above. Here we simulate 100 steps per agent x 200 independent runs.

simulate_adaptive_multi <- function(agents,
                                    steps,
                                    land_grid_x,
                                    land_grid_y,
                                    base_target_full,
                                    K_weather,
                                    stock_survival,
                                    initial_land_grid = NULL,
                                    weather_p = c(drought=0.3, normal=0.5, rainy=0.2),
                                    states  = c("intact","good","poor","degraded"),
                                    actions = c("<200","200-400","400-600",">600"),
                                    weathers = c("drought","normal","rainy"),
                                    k_base = 50,
                                    k_eco  = 8) {
  # reward multiplier by action (cash combined with PES benefits)
  action_income <- c("<200"=0.8, "200-400"=2.0, "400-600"=2.5, ">600"=3.5)

  compute_reward <- function(action, same_cell, revisit, cell_condition) {
    r <- action_income[action]
    if (isTRUE(same_cell)) r <- r * 0.7
    if (isTRUE(revisit))   r <- r * 0.7
    r
  }#Condition multiplier ~~~~ removed because it's not doing much. Also, not relevant for action based design. 
    #cond_mult <- dplyr::case_when(
      #cell_condition == "intact"   ~ 1.0,
      #cell_condition == "good"     ~ 0.6,
      #cell_condition == "poor"     ~ 0.1,
      #cell_condition == "degraded" ~ 0,
      #TRUE ~ 1.0
    #)
    #r * cond_mult


  # init land grid (supports carry-over)
  if (is.null(initial_land_grid)) {
    land_grid <- expand.grid(x = 0:(land_grid_x - 1), y = 0:(land_grid_y - 1))
    land_grid$rangeland <- "intact"
  } else {
    land_grid <- initial_land_grid
  }

  logs <- vector("list", steps)
  weather_history <- character(steps)
  agent_ids <- names(agents)

  for (t in 1:steps) {

    weather <- draw_weather(weather_p)
    weather_history[t] <- weather

    # selection: (x,y,action) using new policy
    selections <- lapply(agents, function(ag) {
      select_xy_action_weather(
        agent = ag,
        land_grid = land_grid,
        weather = weather,
        states = states, actions = actions,
        base_target_full = base_target_full,
        K_weather = K_weather,
        stock_survival = stock_survival,
        k_base = k_base, k_eco = k_eco
      )
    })

    coords <- do.call(rbind, lapply(selections, function(s) c(x=s$x, y=s$y)))
    rownames(coords) <- agent_ids

    # co-location
    same_cell_vec <- setNames(rep(FALSE, length(agent_ids)), agent_ids)
    if (nrow(coords) > 1) {
      for (i in seq_along(agent_ids)) {
        others <- coords[-i, , drop = FALSE]
        same_cell_vec[i] <- any(
          others[, "x"] == coords[i, "x"] &
          others[, "y"] == coords[i, "y"]
        )
      }
    }

    end_states <- setNames(character(length(agent_ids)), agent_ids)

    for (id in agent_ids) {
      sel <- selections[[id]]

      idx_cell <- which(land_grid$x == sel$x & land_grid$y == sel$y)
      s <- land_grid$rangeland[idx_cell]

      ag <- agents[[id]]
      revisit <- isTRUE(ag$last_cell["x"] == sel$x &&
                        ag$last_cell["y"] == sel$y)

      r <- compute_reward(
        action = sel$action,
        same_cell = same_cell_vec[id],
        revisit = revisit,
        cell_condition = s
      )

      s_next <- sample_next_state_dynamic(
        current_state    = s,
        action           = sel$action,
        weather          = weather,
        base_target_full = base_target_full,
        K_weather        = K_weather,
        k_base           = k_base,
        k_eco            = k_eco
      )

      land_grid$rangeland[idx_cell] <- s_next

      agents[[id]] <- update_agent_after_step(
        agent         = ag,
        chosen_action = sel$action,
        reward        = r,
        end_state     = s_next,
        cell_xy       = c(x = sel$x, y = sel$y)
      )

      end_states[id] <- s_next
    }
land_counts <- table(factor(land_grid$rangeland, levels = states))
  logs[[t]] <- list(
  step        = t,
  weather     = weather,
  selections  = selections,
  same_cell   = same_cell_vec,
  end_states  = end_states,
  land_counts = land_counts,
  land_grid   = land_grid   # <-- this is slowing things down quite a bit because its saving the step by step result, but REQUIRED for tile background for visualisation steps further down
)
  }

  list(
    logs            = logs,
    weather_history = weather_history,
    agents          = agents,
    land_grid_final = land_grid
  )
}

run_chunked_simulation_multi <- function(
  initial_agents,
  land_grid_x,
  land_grid_y,
  base_target_full,
  K_weather,
  stock_survival,
  n_runs,
  steps_per_run,
  states   = c("intact","good","poor","degraded"),
  actions  = c("<200","200-400","400-600",">600"),
  weathers = c("drought","normal","rainy"),
  k_base   = 50,
  k_eco    = 8,
  carry_land = TRUE,
  verbose = FALSE
) {
  all_logs <- vector("list", n_runs)
  all_weather <- vector("list", n_runs)

  agents <- initial_agents

  # initialize land grid - starts from intact state
  land_grid <- expand.grid(
    x = 0:(land_grid_x - 1),
    y = 0:(land_grid_y - 1)
  )
  land_grid$rangeland <- "intact"

  for (run_id in seq_len(n_runs)) {

    if (verbose) {
      start_counts <- table(factor(land_grid$rangeland, levels = states))
      message(
        sprintf("Chunk %d/%d start: %s",
                run_id, n_runs,
                paste(names(start_counts), as.integer(start_counts), sep = "=", collapse = ", "))
      )
    } else {
      message(sprintf("Running chunk %d / %d", run_id, n_runs))
    }

    res <- simulate_adaptive_multi(
      agents            = agents,
      steps             = steps_per_run,
      land_grid_x        = land_grid_x,
      land_grid_y        = land_grid_y,
      base_target_full   = base_target_full,
      K_weather          = K_weather,
      stock_survival     = stock_survival,
      initial_land_grid  = land_grid,
      states             = states,
      actions            = actions,
      weathers           = weathers,
      k_base             = k_base,
      k_eco              = k_eco
    )

    # run_id to each step log
    run_logs <- lapply(res$logs, function(L) {
      L$run <- run_id
      L
    })

    all_logs[[run_id]] <- run_logs
    all_weather[[run_id]] <- res$weather_history

    # carry forward agent learning
    agents <- res$agents

    # carry forward land grid OR reset it
    if (carry_land) {
      land_grid <- res$land_grid_final
    } else {
      land_grid$rangeland <- "intact" #reset to intact - is this reasonable?
    }

    if (verbose) {
      end_counts <- table(factor(land_grid$rangeland, levels = states))
      message(
        sprintf("Chunk %d/%d end:   %s",
                run_id, n_runs,
                paste(names(end_counts), as.integer(end_counts), sep = "=", collapse = ", "))
      )
    }
  }

  # flatten list-of-runs -> list-of-step-logs
  flat_logs <- unlist(all_logs, recursive = FALSE)

  list(
    logs            = flat_logs,
    weather_history = unlist(all_weather),
    agents_final    = agents,
    land_grid_final = land_grid
  )
}

Multi-agent wrapper

In create_agent function we define who an agent is and what their “bias” are, and the internal architecture of agents. In the step below we create a function that uploads multiple agents from an external .csv (real people profile) based on the general create_agent() architecture.

agents_meta <- read_csv("agents_meta.csv", show_col_types = FALSE)
agent_action_bias <-read_csv("agent_action_bias_all_agents_above_600.csv", show_col_types = FALSE)

agents <- build_agents_from_specs(agents_meta, agent_action_bias, actions)

results_multi <- run_chunked_simulation_multi(
  initial_agents   = agents,
  land_grid_x      = 5,
  land_grid_y      = 5,
  base_target_full = base_target_full,
  K_weather        = K_weather,
  stock_survival   = stock_survival,
  n_runs           = 50,
  steps_per_run    = 100,
  k_base           = 50,
  k_eco            = 8,
  carry_land       = TRUE
)

## Running chunk 1 / 50

## Running chunk 2 / 50

## Running chunk 3 / 50

## Running chunk 4 / 50

## Running chunk 5 / 50

## Running chunk 6 / 50

## Running chunk 7 / 50

## Running chunk 8 / 50

## Running chunk 9 / 50

## Running chunk 10 / 50

## Running chunk 11 / 50

## Running chunk 12 / 50

## Running chunk 13 / 50

## Running chunk 14 / 50

## Running chunk 15 / 50

## Running chunk 16 / 50

## Running chunk 17 / 50

## Running chunk 18 / 50

## Running chunk 19 / 50

## Running chunk 20 / 50

## Running chunk 21 / 50

## Running chunk 22 / 50

## Running chunk 23 / 50

## Running chunk 24 / 50

## Running chunk 25 / 50

## Running chunk 26 / 50

## Running chunk 27 / 50

## Running chunk 28 / 50

## Running chunk 29 / 50

## Running chunk 30 / 50

## Running chunk 31 / 50

## Running chunk 32 / 50

## Running chunk 33 / 50

## Running chunk 34 / 50

## Running chunk 35 / 50

## Running chunk 36 / 50

## Running chunk 37 / 50

## Running chunk 38 / 50

## Running chunk 39 / 50

## Running chunk 40 / 50

## Running chunk 41 / 50

## Running chunk 42 / 50

## Running chunk 43 / 50

## Running chunk 44 / 50

## Running chunk 45 / 50

## Running chunk 46 / 50

## Running chunk 47 / 50

## Running chunk 48 / 50

## Running chunk 49 / 50

## Running chunk 50 / 50

print(table(results_multi$land_grid_final$rangeland))

## 
## degraded     good     poor 
##        7        8       10

plot_degradation_overlaid <- function(results_multi, states = c("intact","good","poor","degraded")) {
  library(dplyr)
  library(purrr)
  library(tibble)
  library(ggplot2)

  `%||%` <- function(a, b) if (!is.null(a)) a else b

  deg_df <- purrr::imap_dfr(results_multi$logs, function(L, i) {

    # pull run + step with fallbacks
    run_id <- L$run %||% 1L
    step_i <- L$step %||% i

    # compute pct degraded from land_counts (preferred) or land_grid (fallback)
    if (!is.null(L$land_counts)) {
      lc <- L$land_counts
      degraded_n <- as.numeric(lc["degraded"] %||% 0)
      total <- sum(as.numeric(lc))
      pct_deg <- if (total == 0) NA_real_ else (degraded_n / total) * 100

    } else if (!is.null(L$land_grid)) {
      pct_deg <- mean(L$land_grid$rangeland == "degraded") * 100

    } else {
      stop("No land_counts or land_grid found in results_multi$logs. Add land_counts to logs.")
    }

    tibble(
      run          = as.integer(run_id),
      step         = as.integer(step_i),
      pct_degraded = pct_deg
    )
  })

  deg_df <- deg_df %>%
    group_by(run) %>%
    arrange(step, .by_group = TRUE) %>%
    mutate(global_step = row_number()) %>%
    ungroup()

  mean_curve <- deg_df %>%
    group_by(global_step) %>%
    summarise(mean_deg = mean(pct_degraded, na.rm = TRUE), .groups = "drop")

  ggplot() +
    geom_line(
      data = deg_df,
      aes(x = global_step, y = pct_degraded, group = run),
      color = "grey70", alpha = 0.35, linewidth = 0.5
    ) +
    geom_line(
      data = mean_curve,
      aes(x = global_step, y = mean_deg),
      color = "darkred", linewidth = 1.2
    ) +
    labs(
      x = "Global simulation step",
      y = "% degraded cells",
      title = "Degradation dynamics across runs",
      subtitle = "Thin grey = runs (if available), Red = mean"
    ) +
    theme_minimal() +
    theme(
      legend.position = "none",
      plot.title = element_text(face = "bold")
    )
}
plot_degradation_overlaid(results_multi)

## 
## Attaching package: 'purrr'

## The following object is masked _by_ '.GlobalEnv':
## 
##     %||%

## Warning: package 'tibble' was built under R version 4.3.2

library(dplyr)
library(purrr)
library(tidyr)

## Warning: package 'tidyr' was built under R version 4.3.3

library(tibble)
library(ggplot2)

`%||%` <- function(a, b) if (!is.null(a)) a else b

# extract a long action table from results_multi. One row = (run, step, weather, agent, x, y, action)
action_long_all <- purrr::map_dfr(results_multi$logs, function(L) {

  run_id  <- L$run %||% 1L
  step_id <- L$step
  weather <- L$weather

  # selections is a named list: names are agent ids
  sels <- L$selections
  agent_ids <- names(sels)
  if (is.null(agent_ids)) agent_ids <- paste0("A", seq_along(sels))

  purrr::imap_dfr(sels, function(sel, agent_name) {
    tibble(
      run    = as.integer(run_id),
      step   = as.integer(step_id),
      weather = as.character(weather),
      agent  = as.character(agent_name),
      x      = as.integer(sel$x),
      y      = as.integer(sel$y),
      action = as.character(sel$action)
    )
  })
})

# dominant action per cell per weather per agent
policy_map_weather <- action_long_all %>%
  group_by(agent, x, y, weather, action) %>%
  summarise(n = n(), .groups = "drop_last") %>%
  slice_max(n, n = 1, with_ties = FALSE) %>%
  ungroup()

# summary labels: % high vs low actions per panel (agent × weather)
high_actions <- c("400-600", ">600")
low_actions  <- c("<200", "200-400") 

summary_labels <- policy_map_weather %>%
  group_by(agent, weather) %>%
  summarise(
    n_cells = n_distinct(x, y),
    high = sum(action %in% high_actions),
    low  = sum(action %in% low_actions),
    high_pct = 100 * high / n_cells,
    low_pct  = 100 * low  / n_cells,
    .groups = "drop"
  ) %>%
  mutate(
    label = paste0(
      "High: ", sprintf("%.1f", high_pct), "%\n",
      "Low:  ", sprintf("%.1f", low_pct),  "%"
    )
  )

# panel bounds for label placement
x_min <- min(policy_map_weather$x, na.rm = TRUE)
y_max <- max(policy_map_weather$y, na.rm = TRUE)
 
long_title<-c("Dominant Stocking Rate by Agent \ under different climate conditions")


ggplot(policy_map_weather, aes(x = x, y = y, fill = action)) +
  geom_tile(color = "white") +
  facet_grid(agent ~ weather) +
  scale_fill_brewer(palette = "Set2") +
  coord_fixed(clip = "off") +
  expand_limits(y = y_max + 0.8) +
  labs(
    title =paste(strwrap(long_title, width = 40), collapse = "\n"),
    x = "Grid X", y = "Grid Y"
  ) +
  theme_minimal(base_size = 8) +
  theme(
    plot.margin = margin(10, 20, 10, 10),
    strip.background = element_rect(fill = "grey90", color = NA)
  )