mdp_3agents_new_action

Overview of what the following analysis does:

In each time step: 1. Weather is drawn (drought/normal/rainy). 2. Each agent chooses an action (stocking rate) based on: ~landscape composition (how much intact/good/poor/degraded), ~weather, ~blended transition (base transitions + ecosystem memory), ~agent prior bias and temperature.

Each agent chooses a cell (x,y) based on: ~spatial memory of rewards for that cell+action, ~expected survival and “improvement potential” under the kernel+weather.
The chosen cell’s rangeland state is updated by sampling next state from the blended transition.
Reward is computed and the agent updates: ~EMA of action reward, ~decayed state histogram, ~spatial memory (sum/count by x,y,action).

Then simulation runs repeat over many steps, and repeat over many “chunks/runs”.

states   <- c("intact","good","poor","degraded")
actions  <- c("<200","200-400","400-600",">600")
weathers <- c("drought","normal","rainy")

pv <- function(intact, good, poor, degraded) {
  p <- c(intact=intact, good=good, poor=poor, degraded=degraded)
  stopifnot(abs(sum(p) - 1) < 1e-8)
  p
}

#SxS matrix with rows named in 'states'
mk_mat <- function(rows_named_vec_list) {
  M <- do.call(rbind, rows_named_vec_list)
  colnames(M) <- states
  stopifnot(all(rownames(M) == states))
  stopifnot(all(colnames(M) == states))
  M
}

#P(s' | s, a, weather)
base_transition_full <- list(
  "<200" = list(
    drought = mk_mat(list(
      intact   = pv(0.70,0.25,0.05,0.00),
      good     = pv(0.10,0.80,0.10,0.00),
      poor     = pv(0.00,0.40,0.55,0.05),
      degraded = pv(0.00,0.10,0.30,0.60)
    )),
    normal = mk_mat(list(
      intact   = pv(0.80,0.18,0.02,0.00),
      good     = pv(0.15,0.75,0.10,0.00),
      poor     = pv(0.05,0.50,0.40,0.05),
      degraded = pv(0.00,0.20,0.40,0.40)
    )),
    rainy = mk_mat(list(
      intact   = pv(0.85,0.14,0.01,0.00),
      good     = pv(0.20,0.75,0.05,0.00),
      poor     = pv(0.10,0.60,0.25,0.05),
      degraded = pv(0.05,0.35,0.40,0.20)
    ))
  ),

  "200-400" = list(
    drought = mk_mat(list(
      intact   = pv(0.40,0.45,0.12,0.03),
      good     = pv(0.05,0.70,0.20,0.05),
      poor     = pv(0.00,0.35,0.55,0.10),
      degraded = pv(0.00,0.10,0.35,0.55)
    )),
    normal = mk_mat(list(
      intact   = pv(0.55,0.40,0.05,0.00),
      good     = pv(0.10,0.75,0.13,0.02),
      poor     = pv(0.02,0.45,0.48,0.05),
      degraded = pv(0.00,0.20,0.45,0.35)
    )),
    rainy = mk_mat(list(
      intact   = pv(0.65,0.33,0.02,0.00),
      good     = pv(0.20,0.70,0.09,0.01),
      poor     = pv(0.05,0.55,0.35,0.05),
      degraded = pv(0.05,0.30,0.45,0.20)
    ))
  ),

  "400-600" = list(
    drought = mk_mat(list(
      intact   = pv(0.10,0.35,0.40,0.15),
      good     = pv(0.05,0.50,0.35,0.10),
      poor     = pv(0.00,0.25,0.55,0.20),
      degraded = pv(0.00,0.05,0.30,0.65)
    )),
    normal = mk_mat(list(
      intact   = pv(0.15,0.55,0.25,0.05),
      good     = pv(0.10,0.60,0.25,0.05),
      poor     = pv(0.02,0.30,0.55,0.13),
      degraded = pv(0.00,0.10,0.35,0.55)
    )),
    rainy = mk_mat(list(
      intact   = pv(0.20,0.60,0.15,0.05),
      good     = pv(0.15,0.65,0.15,0.05),
      poor     = pv(0.05,0.40,0.45,0.10),
      degraded = pv(0.02,0.20,0.45,0.33)
    ))
  ),

  ">600" = list(
    drought = mk_mat(list(
      intact   = pv(0.00,0.05,0.25,0.70),
      good     = pv(0.00,0.10,0.30,0.60),
      poor     = pv(0.00,0.10,0.35,0.55),
      degraded = pv(0.00,0.02,0.18,0.80)
    )),
    normal = mk_mat(list(
      intact   = pv(0.00,0.15,0.45,0.40),
      good     = pv(0.00,0.20,0.45,0.35),
      poor     = pv(0.00,0.20,0.45,0.35),
      degraded = pv(0.00,0.05,0.25,0.70)
    )),
    rainy = mk_mat(list(
      intact   = pv(0.00,0.35,0.55,0.10),
      good     = pv(0.00,0.40,0.50,0.10),
      poor     = pv(0.00,0.35,0.55,0.10),
      degraded = pv(0.05,0.25,0.45,0.25)
    ))
  )
)

check_base_target_full <- function(bt, states, actions, weathers, tol = 1e-8) {
  for (a in actions) {
    if (is.null(bt[[a]])) stop("Missing action in base_target_full: ", a)
    for (w in weathers) {
      M <- bt[[a]][[w]]
      if (is.null(M)) stop("Missing matrix for action=", a, " weather=", w)
      if (!is.matrix(M) || !all(dim(M) == c(length(states), length(states))))
        stop("Matrix for action=", a, " weather=", w, " must be ", length(states), "x", length(states))
      if (!setequal(rownames(M), states) || !setequal(colnames(M), states))
        stop("Row/col names for action=", a, " weather=", w, " must match states")
      M <- M[states, states, drop = FALSE]
      if (any(abs(rowSums(M) - 1) > tol))
        stop("Row sums must equal 1 for action=", a, " weather=", w)
      bt[[a]][[w]] <- M
    }
  }
  bt
}

build_base_target_full <- function(defs, states, actions, weathers, tol = 1e-8) {
  bt <- setNames(vector("list", length(actions)), actions)
  for (a in actions) {
    bt[[a]] <- setNames(vector("list", length(weathers)), weathers)
    for (w in weathers) bt[[a]][[w]] <- defs[[a]][[w]]
  }
  check_base_target_full(bt, states, actions, weathers, tol)
}

base_target_full <- build_base_target_full(
  defs = base_transition_full,
  states = states, actions = actions, weathers = weathers
)

draw_weather <- function(p = c(drought=0.3, normal=0.5, rainy=0.2)) {
  sample(names(p), size = 1, prob = p)
}

Ecosystem memory This part encapsulates the idea that ecosystems have memory or certain “stickiness” under different climatic conditions. For example, once ecosystem state becomes degraded and it is in drought condition, the likelihood of recovery to improved states are very low.

#Kw(s'| s) weather conditioned, action-independent kernels
make_K <- function(rows) {
  M <- do.call(rbind, rows)
  rownames(M) <- states; colnames(M) <- states
  M
}

K_drought <- make_K(list(
  intact   = pv(0.65, 0.25, 0.08, 0.02),
  good     = pv(0.05, 0.70, 0.20, 0.05),
  poor     = pv(0.00, 0.20, 0.65, 0.15),
  degraded = pv(0.00, 0.05, 0.35, 0.60)
))

K_normal <- make_K(list(
  intact   = pv(0.75, 0.22, 0.03, 0.00),
  good     = pv(0.10, 0.75, 0.13, 0.02),
  poor     = pv(0.02, 0.28, 0.60, 0.10),
  degraded = pv(0.00, 0.15, 0.40, 0.45)
))

K_rainy <- make_K(list(
  intact   = pv(0.85, 0.14, 0.01, 0.00),
  good     = pv(0.20, 0.70, 0.09, 0.01),
  poor     = pv(0.05, 0.45, 0.45, 0.05),
  degraded = pv(0.05, 0.35, 0.40, 0.20)
))

K_weather <- list(drought = K_drought, normal = K_normal, rainy = K_rainy)

Blended transition combines the base transition rules with ecosystem memory k_base and k_eco are pseudo-counts. Larger k_base means “trust calibrated transitions more”. Larger k_eco means “resilience kernel dominates”.

sample_next_state_dynamic <- function(current_state, action, weather,
                                      base_target_full, K_weather,
                                      k_base = 50, k_eco = 8) {

  p_base <- base_target_full[[action]][[weather]][current_state, , drop = TRUE]
  k_row  <- K_weather[[weather]][current_state, , drop = TRUE]

  alpha_total <- k_base * p_base + k_eco * k_row
  probs <- alpha_total / sum(alpha_total)

  sample(states, size = 1, prob = probs)
}

stock_survival <- matrix(
  c(
    # <200  200-400  400-600  >600
      1.2,  1.2,   1.2,    1.2,   # intact - need to think whether columns should add to 1
      1.1,  1.1,   1.1,    1.1,   # good
      0,  0,   0,    0,   # poor
      0,  0,   0,    0    # degraded
  ),
  nrow = 4, byrow = TRUE,
  dimnames = list(states, actions)
)

`%||%` <- function(a, b) if (!is.null(a)) a else b

#converts utilities into choice probabilities.
.softmax <- function(u, tau = 1) {
  if (!is.finite(tau) || tau <= 0) stop("tau must be finite and > 0")
  u2 <- u
  u2[!is.finite(u2)] <- -Inf
  m <- max(u2)
  if (!is.finite(m)) return(rep(1/length(u), length(u)))
  z <- (u2 - m) / tau
  p <- exp(z)
  s <- sum(p)
  if (!is.finite(s) || s <= 0) return(rep(1/length(u), length(u)))
  p / s
}

normalize <- function(x) { s <- sum(x); if (s == 0) x else x / s }

# spatial memory utility (avg past reward per x,y,action)
base_utility_from_spatial_memory <- function(agent, x, y, action) {
  key <- paste(x, y, action, sep = "_")
  if (!is.null(agent$reward_memory[[key]]) &&
      !is.null(agent$memory_count[[key]]) &&
      agent$memory_count[[key]] > 0) {
    agent$reward_memory[[key]] / agent$memory_count[[key]]
  } else 0
}

create_agent <- function(id,
                         tau = 1,
                         beta_action = 0.8,
                         beta_state  = 0.9,
                         eta_bias = 0.6,
                         income = 1.0,
                         workforce = 1.0,
                         bias_vec_named = NULL) {

  if (is.null(bias_vec_named)) bias_vec_named <- setNames(rep(1, length(actions)), actions)
  bias_vec_named <- bias_vec_named[actions]
  bias_vec_named <- normalize(bias_vec_named)
  bias_log <- log(bias_vec_named + 1e-8)

  list(
    id = id,
    tau = tau,
    eta_bias = eta_bias,
    beta_action = beta_action,
    beta_state  = beta_state,
    income = income,
    workforce = workforce,
    bias = bias_log,
    ema_action_reward = setNames(rep(0, length(actions)), actions),
    ema_state_hist    = setNames(rep(1e-6, length(states)), states),
    reward_memory     = list(),
    memory_count      = list(),
    last_cell         = c(x = NA_integer_, y = NA_integer_)
  )
}

update_agent_after_step <- function(agent, chosen_action, reward, end_state, cell_xy = NULL) {
  if (!chosen_action %in% names(agent$ema_action_reward))
    stop("Unknown chosen_action: ", chosen_action)
  if (!end_state %in% names(agent$ema_state_hist))
    stop("Unknown end_state: ", end_state)
  if (!is.finite(reward)) reward <- 0

  old <- agent$ema_action_reward[[chosen_action]]
  agent$ema_action_reward[[chosen_action]] <-
    agent$beta_action * old + (1 - agent$beta_action) * reward

  agent$ema_state_hist <- agent$beta_state * agent$ema_state_hist
  agent$ema_state_hist[[end_state]] <- agent$ema_state_hist[[end_state]] + (1 - agent$beta_state)

  if (!is.null(cell_xy)) {
    key <- paste(cell_xy[["x"]], cell_xy[["y"]], chosen_action, sep = "_")
    agent$reward_memory[[key]] <- (agent$reward_memory[[key]] %||% 0) + reward
    agent$memory_count[[key]]  <- (agent$memory_count[[key]]  %||% 0) + 1
    agent$last_cell <- cell_xy[c("x","y")]
  }
  agent
}

build_agents_from_specs <- function(meta_df, bias_df, actions, default_bias = 1) {
  stopifnot("agent_id" %in% names(meta_df))
  stopifnot(all(c("agent_id","action","bias_weight") %in% names(bias_df)))

  all_pairs <- merge(data.frame(agent_id = unique(meta_df$agent_id)),
                     data.frame(action = actions),
                     by = NULL)

  bias_complete <- merge(all_pairs, bias_df, by=c("agent_id","action"), all.x=TRUE)
  bias_complete$bias_weight[is.na(bias_complete$bias_weight)] <- default_bias

  bias_list <- split(bias_complete, bias_complete$agent_id)
  bias_named <- lapply(bias_list, function(df) {
    df <- df[match(actions, df$action), ]
    v <- as.numeric(df$bias_weight); names(v) <- df$action
    v
  })

  agents <- lapply(seq_len(nrow(meta_df)), function(i) {
    row <- meta_df[i, ]
    id  <- as.character(row$agent_id)
    create_agent(
      id = id,
      tau = as.numeric(row$tau),
      beta_action = as.numeric(row$beta_action),
      beta_state  = as.numeric(row$beta_state),
      eta_bias = as.numeric(row$eta_bias),
      income = as.numeric(row$income),
      workforce = as.numeric(row$workforce),
      bias_vec_named = bias_named[[id]]
    )
  })
  names(agents) <- as.character(meta_df$agent_id)
  agents
}

state_rank <- c(intact=4, good=3, poor=2, degraded=1)

landscape_proportions <- function(land_grid, states) {
  tab <- table(factor(land_grid$rangeland, levels = states))
  tot<-sum(tab)
  props <- if (tot == 0) rep(1/length(states), length(states)) else as.numeric(tab) / tot
  setNames(props, states)
}

blend_row <- function(current_state, action, weather,
                      base_target_full, K_weather,
                      k_base = 50, k_eco = 8) {
  p_base <- base_target_full[[action]][[weather]][current_state, , drop = TRUE]
  k_row  <- K_weather[[weather]][current_state, , drop = TRUE]
  alpha  <- k_base * p_base + k_eco * k_row
  alpha / sum(alpha)
}

expected_survival_from_row <- function(p_next, action, stock_survival) {
  sum(p_next * stock_survival[names(p_next), action])
}

expected_improvement <- function(current_state, p_next) {
  cur <- state_rank[[current_state]]
  nxt <- state_rank[names(p_next)]
  sum(p_next * (nxt - cur))
}

#computes for each action a blended transition with stock survival and landscape composition
select_action_weather_landscape <- function(agent,
                                            land_grid,
                                            weather,
                                            states, actions,
                                            base_target_full, K_weather,
                                            stock_survival,
                                            k_base = 50, k_eco = 8,
                                            w_surv = 1.0,
                                            w_bias = 0.2) {

  props <- landscape_proportions(land_grid, states)

  U_surv <- setNames(numeric(length(actions)), actions)
  for (a in actions) {
    u <- 0
    for (s in states) {
      p_next <- blend_row(s, a, weather, base_target_full, K_weather, k_base, k_eco)
      u <- u + props[[s]] * expected_survival_from_row(p_next, a, stock_survival)
    }
    U_surv[[a]] <- u
  }

  bias_term <- agent$eta_bias * agent$bias[actions]
  bias_term[is.na(bias_term)] <- 0

  U <- w_surv * U_surv + w_bias * bias_term
  p <- .softmax(U, tau = agent$tau)

  list(action = sample(actions, 1, prob = p),
       U = U, U_surv = U_surv, p = p, props = props)
}

#agent decides which cell to go depending on past memory and expected survival
select_cell_weather_memory_kernel <- function(agent,
                                             land_grid,
                                             chosen_action,
                                             weather,
                                             states,
                                             base_target_full, K_weather,
                                             stock_survival,
                                             k_base = 50, k_eco = 8,
                                             base_utility_fun = base_utility_from_spatial_memory,
                                             w_mem = 1.0,
                                             w_surv_cell = 1.0,
                                             w_improve = 0.5) {

  cand <- land_grid[, c("x","y","rangeland")]
  names(cand)[3] <- "state"

  cand$U_mem <- mapply(
    function(xx, yy) base_utility_fun(agent, xx, yy, chosen_action),
    cand$x, cand$y
  )

  pnext_list <- lapply(cand$state, function(s) {
    blend_row(s, chosen_action, weather, base_target_full, K_weather, k_base, k_eco)
  })

  cand$U_surv <- vapply(pnext_list, function(pn)
    expected_survival_from_row(pn, chosen_action, stock_survival),
    numeric(1)
  )

  cand$U_improve <- mapply(function(s, pn) expected_improvement(s, pn),
                           cand$state, pnext_list)

  cand$U_total <- w_mem * cand$U_mem + w_surv_cell * cand$U_surv + w_improve * cand$U_improve
  probs <- .softmax(cand$U_total, tau = agent$tau)

  idx <- sample.int(nrow(cand), 1, prob = probs)
  list(x = cand$x[idx], y = cand$y[idx])
}

select_xy_action_weather <- function(agent,
                                     land_grid,
                                     weather,
                                     states, actions,
                                     base_target_full, K_weather,
                                     stock_survival,
                                     k_base = 50, k_eco = 8) {

  ares <- select_action_weather_landscape(
    agent, land_grid, weather,
    states, actions, base_target_full, K_weather, stock_survival,
    k_base = k_base, k_eco = k_eco
  )

  cres <- select_cell_weather_memory_kernel(
    agent, land_grid,
    chosen_action = ares$action,
    weather = weather,
    states = states,
    base_target_full = base_target_full,
    K_weather = K_weather,
    stock_survival = stock_survival,
    k_base = k_base, k_eco = k_eco
  )

  list(x = cres$x, y = cres$y, action = ares$action)
}

simulate_adaptive_multi <- function(agents,
                                    steps,
                                    land_grid_x,
                                    land_grid_y,
                                    base_target_full,
                                    K_weather,
                                    stock_survival,
                                    initial_land_grid = NULL,
                                    weather_p = c(drought=0.3, normal=0.5, rainy=0.2),
                                    states  = c("intact","good","poor","degraded"),
                                    actions = c("<200","200-400","400-600",">600"),
                                    weathers = c("drought","normal","rainy"),
                                    k_base = 50,
                                    k_eco  = 8) {
  # reward multiplier by action (cash combined with PES benefits)
  action_income <- c("<200"=0.8, "200-400"=2.0, "400-600"=2.5, ">600"=3.5)

  compute_reward <- function(action, same_cell, revisit, cell_condition) {
    r <- action_income[action]
    if (isTRUE(same_cell)) r <- r * 0.7
    if (isTRUE(revisit))   r <- r * 0.7
    r
  }#Condition multiplier ~~~~ removed because it's not doing much. Also, not relevant for action based design. 
    #cond_mult <- dplyr::case_when(
      #cell_condition == "intact"   ~ 1.0,
      #cell_condition == "good"     ~ 0.6,
      #cell_condition == "poor"     ~ 0.1,
      #cell_condition == "degraded" ~ 0,
      #TRUE ~ 1.0
    #)
    #r * cond_mult


  # init land grid (supports carry-over)
  if (is.null(initial_land_grid)) {
    land_grid <- expand.grid(x = 0:(land_grid_x - 1), y = 0:(land_grid_y - 1))
    land_grid$rangeland <- "intact"
  } else {
    land_grid <- initial_land_grid
  }

  logs <- vector("list", steps)
  weather_history <- character(steps)
  agent_ids <- names(agents)

  for (t in 1:steps) {

    weather <- draw_weather(weather_p)
    weather_history[t] <- weather

    # selection: (x,y,action) using new policy
    selections <- lapply(agents, function(ag) {
      select_xy_action_weather(
        agent = ag,
        land_grid = land_grid,
        weather = weather,
        states = states, actions = actions,
        base_target_full = base_target_full,
        K_weather = K_weather,
        stock_survival = stock_survival,
        k_base = k_base, k_eco = k_eco
      )
    })

    coords <- do.call(rbind, lapply(selections, function(s) c(x=s$x, y=s$y)))
    rownames(coords) <- agent_ids

    # co-location
    same_cell_vec <- setNames(rep(FALSE, length(agent_ids)), agent_ids)
    if (nrow(coords) > 1) {
      for (i in seq_along(agent_ids)) {
        others <- coords[-i, , drop = FALSE]
        same_cell_vec[i] <- any(
          others[, "x"] == coords[i, "x"] &
          others[, "y"] == coords[i, "y"]
        )
      }
    }

    end_states <- setNames(character(length(agent_ids)), agent_ids)

    for (id in agent_ids) {
      sel <- selections[[id]]

      idx_cell <- which(land_grid$x == sel$x & land_grid$y == sel$y)
      s <- land_grid$rangeland[idx_cell]

      ag <- agents[[id]]
      revisit <- isTRUE(ag$last_cell["x"] == sel$x &&
                        ag$last_cell["y"] == sel$y)

      r <- compute_reward(
        action = sel$action,
        same_cell = same_cell_vec[id],
        revisit = revisit,
        cell_condition = s
      )

      s_next <- sample_next_state_dynamic(
        current_state    = s,
        action           = sel$action,
        weather          = weather,
        base_target_full = base_target_full,
        K_weather        = K_weather,
        k_base           = k_base,
        k_eco            = k_eco
      )

      land_grid$rangeland[idx_cell] <- s_next

      agents[[id]] <- update_agent_after_step(
        agent         = ag,
        chosen_action = sel$action,
        reward        = r,
        end_state     = s_next,
        cell_xy       = c(x = sel$x, y = sel$y)
      )

      end_states[id] <- s_next
    }
land_counts <- table(factor(land_grid$rangeland, levels = states))
  logs[[t]] <- list(
  step        = t,
  weather     = weather,
  selections  = selections,
  same_cell   = same_cell_vec,
  end_states  = end_states,
  land_counts = land_counts,
  land_grid   = land_grid   # <-- this is slowing things down quite a bit because its saving the step by step result, but REQUIRED for tile background for visualisation steps further down
)
  }

  list(
    logs            = logs,
    weather_history = weather_history,
    agents          = agents,
    land_grid_final = land_grid
  )
}

run_chunked_simulation_multi <- function(
  initial_agents,
  land_grid_x,
  land_grid_y,
  base_target_full,
  K_weather,
  stock_survival,
  n_runs,
  steps_per_run,
  states   = c("intact","good","poor","degraded"),
  actions  = c("<200","200-400","400-600",">600"),
  weathers = c("drought","normal","rainy"),
  k_base   = 50,
  k_eco    = 8,
  carry_land = TRUE,
  verbose = FALSE
) {
  all_logs <- vector("list", n_runs)
  all_weather <- vector("list", n_runs)

  agents <- initial_agents

  # initialize land grid - starts from intact state
  land_grid <- expand.grid(
    x = 0:(land_grid_x - 1),
    y = 0:(land_grid_y - 1)
  )
  land_grid$rangeland <- "intact"

  for (run_id in seq_len(n_runs)) {

    if (verbose) {
      start_counts <- table(factor(land_grid$rangeland, levels = states))
      message(
        sprintf("Chunk %d/%d start: %s",
                run_id, n_runs,
                paste(names(start_counts), as.integer(start_counts), sep = "=", collapse = ", "))
      )
    } else {
      message(sprintf("Running chunk %d / %d", run_id, n_runs))
    }

    res <- simulate_adaptive_multi(
      agents            = agents,
      steps             = steps_per_run,
      land_grid_x        = land_grid_x,
      land_grid_y        = land_grid_y,
      base_target_full   = base_target_full,
      K_weather          = K_weather,
      stock_survival     = stock_survival,
      initial_land_grid  = land_grid,
      states             = states,
      actions            = actions,
      weathers           = weathers,
      k_base             = k_base,
      k_eco              = k_eco
    )

    # run_id to each step log
    run_logs <- lapply(res$logs, function(L) {
      L$run <- run_id
      L
    })

    all_logs[[run_id]] <- run_logs
    all_weather[[run_id]] <- res$weather_history

    # carry forward agent learning
    agents <- res$agents

    # carry forward land grid OR reset it
    if (carry_land) {
      land_grid <- res$land_grid_final
    } else {
      land_grid$rangeland <- "intact" #reset to intact - is this reasonable?
    }

    if (verbose) {
      end_counts <- table(factor(land_grid$rangeland, levels = states))
      message(
        sprintf("Chunk %d/%d end:   %s",
                run_id, n_runs,
                paste(names(end_counts), as.integer(end_counts), sep = "=", collapse = ", "))
      )
    }
  }

  # flatten list-of-runs -> list-of-step-logs
  flat_logs <- unlist(all_logs, recursive = FALSE)

  list(
    logs            = flat_logs,
    weather_history = unlist(all_weather),
    agents_final    = agents,
    land_grid_final = land_grid
  )
}

agents_meta <- read_csv("agents_meta.csv", show_col_types = FALSE)
agent_action_bias <-read_csv("agent_action_bias_all_agents_above_600.csv", show_col_types = FALSE)

agents <- build_agents_from_specs(agents_meta, agent_action_bias, actions)

results_multi <- run_chunked_simulation_multi(
  initial_agents   = agents,
  land_grid_x      = 5,
  land_grid_y      = 5,
  base_target_full = base_target_full,
  K_weather        = K_weather,
  stock_survival   = stock_survival,
  n_runs           = 50,
  steps_per_run    = 100,
  k_base           = 50,
  k_eco            = 8,
  carry_land       = TRUE
)

## Running chunk 1 / 50

## Running chunk 2 / 50

## Running chunk 3 / 50

## Running chunk 4 / 50

## Running chunk 5 / 50

## Running chunk 6 / 50

## Running chunk 7 / 50

## Running chunk 8 / 50

## Running chunk 9 / 50

## Running chunk 10 / 50

## Running chunk 11 / 50

## Running chunk 12 / 50

## Running chunk 13 / 50

## Running chunk 14 / 50

## Running chunk 15 / 50

## Running chunk 16 / 50

## Running chunk 17 / 50

## Running chunk 18 / 50

## Running chunk 19 / 50

## Running chunk 20 / 50

## Running chunk 21 / 50

## Running chunk 22 / 50

## Running chunk 23 / 50

## Running chunk 24 / 50

## Running chunk 25 / 50

## Running chunk 26 / 50

## Running chunk 27 / 50

## Running chunk 28 / 50

## Running chunk 29 / 50

## Running chunk 30 / 50

## Running chunk 31 / 50

## Running chunk 32 / 50

## Running chunk 33 / 50

## Running chunk 34 / 50

## Running chunk 35 / 50

## Running chunk 36 / 50

## Running chunk 37 / 50

## Running chunk 38 / 50

## Running chunk 39 / 50

## Running chunk 40 / 50

## Running chunk 41 / 50

## Running chunk 42 / 50

## Running chunk 43 / 50

## Running chunk 44 / 50

## Running chunk 45 / 50

## Running chunk 46 / 50

## Running chunk 47 / 50

## Running chunk 48 / 50

## Running chunk 49 / 50

## Running chunk 50 / 50

print(table(results_multi$land_grid_final$rangeland))

## 
## degraded     good   intact     poor 
##        7        7        2        9

plot_degradation_overlaid <- function(results_multi, states = c("intact","good","poor","degraded")) {
  library(dplyr)
  library(purrr)
  library(tibble)
  library(ggplot2)

  `%||%` <- function(a, b) if (!is.null(a)) a else b

  deg_df <- purrr::imap_dfr(results_multi$logs, function(L, i) {

    # pull run + step with fallbacks
    run_id <- L$run %||% 1L
    step_i <- L$step %||% i

    # compute pct degraded from land_counts (preferred) or land_grid (fallback)
    if (!is.null(L$land_counts)) {
      lc <- L$land_counts
      degraded_n <- as.numeric(lc["degraded"] %||% 0)
      total <- sum(as.numeric(lc))
      pct_deg <- if (total == 0) NA_real_ else (degraded_n / total) * 100

    } else if (!is.null(L$land_grid)) {
      pct_deg <- mean(L$land_grid$rangeland == "degraded") * 100

    } else {
      stop("No land_counts or land_grid found in results_multi$logs. Add land_counts to logs.")
    }

    tibble(
      run          = as.integer(run_id),
      step         = as.integer(step_i),
      pct_degraded = pct_deg
    )
  })

  deg_df <- deg_df %>%
    group_by(run) %>%
    arrange(step, .by_group = TRUE) %>%
    mutate(global_step = row_number()) %>%
    ungroup()

  mean_curve <- deg_df %>%
    group_by(global_step) %>%
    summarise(mean_deg = mean(pct_degraded, na.rm = TRUE), .groups = "drop")

  ggplot() +
    geom_line(
      data = deg_df,
      aes(x = global_step, y = pct_degraded, group = run),
      color = "grey70", alpha = 0.35, linewidth = 0.5
    ) +
    geom_line(
      data = mean_curve,
      aes(x = global_step, y = mean_deg),
      color = "darkred", linewidth = 1.2
    ) +
    labs(
      x = "Global simulation step",
      y = "% degraded cells",
      title = "Degradation dynamics across runs",
      subtitle = "Thin grey = runs (if available), Red = mean"
    ) +
    theme_minimal() +
    theme(
      legend.position = "none",
      plot.title = element_text(face = "bold")
    )
}
plot_degradation_overlaid(results_multi)

## 
## Attaching package: 'purrr'

## The following object is masked _by_ '.GlobalEnv':
## 
##     %||%

## Warning: package 'tibble' was built under R version 4.3.2

library(dplyr)
library(purrr)
library(tidyr)

## Warning: package 'tidyr' was built under R version 4.3.3

library(tibble)
library(ggplot2)

`%||%` <- function(a, b) if (!is.null(a)) a else b

# extract a long action table from results_multi. One row = (run, step, weather, agent, x, y, action)
action_long_all <- purrr::map_dfr(results_multi$logs, function(L) {

  run_id  <- L$run %||% 1L
  step_id <- L$step
  weather <- L$weather

  # selections is a named list: names are agent ids
  sels <- L$selections
  agent_ids <- names(sels)
  if (is.null(agent_ids)) agent_ids <- paste0("A", seq_along(sels))

  purrr::imap_dfr(sels, function(sel, agent_name) {
    tibble(
      run    = as.integer(run_id),
      step   = as.integer(step_id),
      weather = as.character(weather),
      agent  = as.character(agent_name),
      x      = as.integer(sel$x),
      y      = as.integer(sel$y),
      action = as.character(sel$action)
    )
  })
})

# dominant action per cell per weather per agent
policy_map_weather <- action_long_all %>%
  group_by(agent, x, y, weather, action) %>%
  summarise(n = n(), .groups = "drop_last") %>%
  slice_max(n, n = 1, with_ties = FALSE) %>%
  ungroup()

# summary labels: % high vs low actions per panel (agent × weather)
high_actions <- c("400-600", ">600")
low_actions  <- c("<200", "200-400") 

summary_labels <- policy_map_weather %>%
  group_by(agent, weather) %>%
  summarise(
    n_cells = n_distinct(x, y),
    high = sum(action %in% high_actions),
    low  = sum(action %in% low_actions),
    high_pct = 100 * high / n_cells,
    low_pct  = 100 * low  / n_cells,
    .groups = "drop"
  ) %>%
  mutate(
    label = paste0(
      "High: ", sprintf("%.1f", high_pct), "%\n",
      "Low:  ", sprintf("%.1f", low_pct),  "%"
    )
  )

# panel bounds for label placement
x_min <- min(policy_map_weather$x, na.rm = TRUE)
y_max <- max(policy_map_weather$y, na.rm = TRUE)
 
long_title<-c("Dominant Stocking Rate by Agent \ under different climate conditions")


ggplot(policy_map_weather, aes(x = x, y = y, fill = action)) +
  geom_tile(color = "white") +
  facet_grid(agent ~ weather) +
  scale_fill_brewer(palette = "Set2") +
  coord_fixed(clip = "off") +
  expand_limits(y = y_max + 0.8) +
  labs(
    title =paste(strwrap(long_title, width = 40), collapse = "\n"),
    x = "Grid X", y = "Grid Y"
  ) +
  theme_minimal(base_size = 8) +
  theme(
    plot.margin = margin(10, 20, 10, 10),
    strip.background = element_rect(fill = "grey90", color = NA)
  )

mdp_3agents_new_action_selection

2026-01-24