Data and objective

We fit Poisson GLMs with an offset of log(night_traps) for each guild (response), across all combinations of explanatory variable groups: (1) distance, (2) meteorological variables, (3) pesticide DSA (days since application). We report model selection metrics (AIC, log-likelihood, residual deviance, residual df, McFadden’s pseudo R²) and per-term inference (estimate, SE, z, p-value). All outputs are in English for scientific reporting.

suppressPackageStartupMessages({
  library(readr)
  library(dplyr)
  library(tidyr)
  library(janitor)
  library(purrr)
  library(glue)
})
# Path to the combined file containing predictors and guild abundances
data_path <- file.path("..", "data", "artho_2022_norm4_4guild_abundance_posion_env_20250904.csv")
df_raw <- read_csv(data_path, show_col_types = FALSE) %>% clean_names()


# בדיקה קצרה של שמות העמודות
names(df_raw)
##  [1] "trap_group"                 "date_of_collection"        
##  [3] "date_code"                  "distance"                  
##  [5] "subplot"                    "night_traps"               
##  [7] "temp_avg_avg"               "temp_max_avg"              
##  [9] "rh_avg_avg"                 "rh_max_avg"                
## [11] "days_since_april_1"         "days_since_april_18"       
## [13] "spirodi_polyalky_dsa"       "bt_chlorantraniliprole_dsa"
## [15] "safluf_prop_glyphosate_dsa" "bt_dsa"                    
## [17] "clothianidin_dsa"           "detritivorus"              
## [19] "omnivorous"                 "predacious"                
## [21] "herbivorous"                "tot_abund_4_giuld"         
## [23] "tot_abund_all_giuld"        "phytophagous"              
## [25] "granivorous"                "nectar_feeders"            
## [27] "sap_feeders"
# Keys and normalization
key_cols <- c("date_of_collection","date_code","distance","subplot","night_traps")

# Explanatory variable groups (reduced to mitigate collinearity)
group1_distance <- c("distance")
# Keep concise meteorology summaries and a single day counter
group2_meteo    <- c("temp_avg_avg","rh_avg_avg","days_since_april_18")
# Keep distinct DSA variables; drop perfectly collinear bt_dsa (keep clothianidin_dsa)
group3_dsa      <- c("spirodi_polyalky_dsa","bt_chlorantraniliprole_dsa",
                     "safluf_prop_glyphosate_dsa","clothianidin_dsa")

# Response variables: restrict to the six variables requested
# (after clean_names): tot_abund_all_giuld, tot_abund_4_giuld, herbivorous,
# predacious, omnivorous, detritivorus
response_guilds <- c(
  "tot_abund_all_giuld",
  "tot_abund_4_giuld",
  "herbivorous",
  "predacious",
  "omnivorous",
  "detritivorus"
)

# Keep only existing columns (robust to missing variables)
existing <- function(v) intersect(v, names(df_raw))
group1_distance <- existing(group1_distance)
group2_meteo    <- existing(group2_meteo)
group3_dsa      <- existing(group3_dsa)
response_guilds <- existing(response_guilds)

list(group1_distance = group1_distance,
     group2_meteo = group2_meteo,
     group3_dsa = group3_dsa,
     response_guilds = response_guilds)
## $group1_distance
## [1] "distance"
## 
## $group2_meteo
## [1] "temp_avg_avg"        "rh_avg_avg"          "days_since_april_18"
## 
## $group3_dsa
## [1] "spirodi_polyalky_dsa"       "bt_chlorantraniliprole_dsa"
## [3] "safluf_prop_glyphosate_dsa" "clothianidin_dsa"          
## 
## $response_guilds
## [1] "tot_abund_all_giuld" "tot_abund_4_giuld"   "herbivorous"        
## [4] "predacious"          "omnivorous"          "detritivorus"
df <- df_raw %>%
  mutate(
    distance = suppressWarnings(as.numeric(distance)),
    night_traps = suppressWarnings(as.numeric(night_traps))
  )

# Drop rows without valid night_traps
df <- df %>% filter(!is.na(night_traps) & night_traps > 0)

# Coerce response variables and predictors to numeric where appropriate
df <- df %>%
  mutate(across(all_of(response_guilds), ~ suppressWarnings(as.numeric(.))) ) %>%
  mutate(across(all_of(c(group2_meteo, group3_dsa)), ~ suppressWarnings(as.numeric(.))))

# Residualize DSA on core time/meteo predictors to remove shared time signal
base_time <- intersect(c("days_since_april_18","temp_avg_avg","rh_avg_avg"), names(df))
residualize <- function(y, X, df) {
  if (length(X) == 0 || !all(c(y, X) %in% names(df))) return(df[[y]])
  form <- as.formula(paste(y, "~", paste(X, collapse = " + ")))
  fit <- try(lm(form, data = df), silent = TRUE)
  if (inherits(fit, "try-error")) return(df[[y]])
  residuals(fit)
}
for (v in group3_dsa) {
  newv <- paste0(v, "_res")
  df[[newv]] <- residualize(v, base_time, df)
}
# Use residualized DSA variables going forward
group3_dsa <- paste0(group3_dsa, "_res")

# Center/scale continuous predictors for stability (does not change AIC)
scale_cols <- unique(c(group2_meteo, group3_dsa))
for (sc in scale_cols) {
  if (sc %in% names(df)) df[[sc]] <- as.numeric(scale(df[[sc]]))
}

# Remove rows where all predictors of a group are NA to avoid dropping everything in model frame
drop_all_na_rows <- function(data, predictors) {
  if (length(predictors) == 0) return(data)
  data %>% filter(rowSums(across(all_of(predictors), ~ !is.na(.))) > 0)
}
# All combinations of explanatory groups: 1, 2, 3, 1+2, 1+3, 2+3, 1+2+3
predictor_groups <- list(
  g1 = group1_distance,
  g2 = group2_meteo,
  g3 = group3_dsa,
  g12 = c(group1_distance, group2_meteo),
  g13 = c(group1_distance, group3_dsa),
  g23 = c(group2_meteo, group3_dsa),
  g123 = c(group1_distance, group2_meteo, group3_dsa)
)

fit_poisson_with_offset <- function(data, response, predictors) {
  # Intercept-only model
  if (length(predictors) == 0) {
    fml0 <- as.formula(paste(response, "~ 1 + offset(log(night_traps))"))
    return(glm(fml0, family = poisson(link = "log"), data = data))
  }
  dat <- data[, unique(c(response, predictors, "night_traps")), drop = FALSE]
  dat <- dat[complete.cases(dat[, c(response, "night_traps"), drop = FALSE]), , drop = FALSE]
  if (nrow(dat) == 0) return(NULL)
  ok_rows <- rowSums(!is.na(dat[, predictors, drop = FALSE])) > 0
  dat <- dat[ok_rows, , drop = FALSE]
  # Check rank; if aliased, automatically drop aliased columns and continue
  mm <- model.matrix(as.formula(paste("~", paste(predictors, collapse = " + "))), data = dat)
  q <- qr(mm)
  keep_idx <- sort(q$pivot[seq_len(q$rank)])
  keep_terms <- setdiff(colnames(mm)[keep_idx], "(Intercept)")
  predictors2 <- intersect(predictors, keep_terms)
  if (length(predictors2) == 0) {
    fml0 <- as.formula(paste(response, "~ 1 + offset(log(night_traps))"))
    return(glm(fml0, family = poisson(link = "log"), data = dat))
  }
  fml <- as.formula(paste(response, "~", paste(predictors2, collapse = " + "), "+ offset(log(night_traps))"))
  glm(fml, family = poisson(link = "log"), data = dat)
}

collect_aic <- function(model, response, group_name, predictors) {
  if (is.null(model)) return(NULL)
  tibble(
    response = response,
    model_group = group_name,
    predictors = paste(predictors, collapse = "+"),
    terms_in_model = paste(attr(terms(model), "term.labels"), collapse = "+"),
    n = nobs(model),
    aic = AIC(model)
  )
}
results <- map_dfr(response_guilds, function(resp) {
  map2_dfr(names(predictor_groups), predictor_groups, function(gname, preds) {
    # Use rows with non-missing response
    dat <- df %>% filter(!is.na(.data[[resp]]))
    if (nrow(dat) == 0) return(NULL)
    mod <- fit_poisson_with_offset(dat, resp, preds)
    collect_aic(mod, resp, gname, preds)
  })
})

results <- results %>% arrange(response, aic)
results
best_by_resp <- results %>% group_by(response) %>% slice_min(aic, n = 1, with_ties = FALSE) %>% ungroup()
best_by_resp
out_csv <- file.path("..", "data", "glmm_poisson_offset_aic_results.csv")
readr::write_csv(results, out_csv)
glue::glue("AIC table written to: {normalizePath(out_csv)}")
## AIC table written to: C:\Users\urish\Desktop\R\biodiversity_community\arthopods 2022\guild_abundance\data\glmm_poisson_offset_aic_results.csv

Extended model summaries (model-level and coefficient-level)

library(broom)

# helper to compute model-level statistics
model_stats <- function(model) {
  if (is.null(model)) return(NULL)
  ll <- as.numeric(logLik(model))
  dev <- suppressWarnings(model$deviance)
  df_res <- suppressWarnings(model$df.residual)
  k_used <- tryCatch({
    # number of non-offset coefficients including intercept
    length(coef(model))
  }, error = function(e) NA_integer_)
  # Pearson dispersion (useful diagnostic; for Poisson should be ~1)
  disp <- tryCatch({
    sum(residuals(model, type = "pearson")^2, na.rm = TRUE) / df_res
  }, error = function(e) NA_real_)
  tibble(logLik = ll, deviance = dev, df_residual = df_res, num_coefs = k_used, dispersion_pearson = disp)
}

# Fit null (intercept + offset) for each response to compute McFadden pseudo R2
null_model_for <- function(data, response) {
  f <- as.formula(glue("{response} ~ 1 + offset(log(night_traps))"))
  try(suppressWarnings(glm(f, family = poisson(link = "log"), data = data)), silent = TRUE)
}

# Build comprehensive model-level table
model_level <- map_dfr(response_guilds, function(resp) {
  map2_dfr(names(predictor_groups), predictor_groups, function(gname, preds) {
    dat <- df %>% filter(!is.na(.data[[resp]]))
    if (nrow(dat) == 0) return(NULL)
    mod <- fit_poisson_with_offset(dat, resp, preds)
    if (is.null(mod)) return(NULL)
    nullm <- null_model_for(dat, resp)
    ll_mod <- as.numeric(logLik(mod))
    ll_null <- if (!inherits(nullm, "try-error")) as.numeric(logLik(nullm)) else NA_real_
    pseudo_r2 <- if (!is.na(ll_null) && !is.na(ll_mod) && ll_null != 0) 1 - (ll_mod/ll_null) else NA_real_
    tibble(
      response = resp,
      model_group = gname,
      predictors = paste(preds, collapse = "+"),
      n = nobs(mod),
      aic = AIC(mod)
    ) %>% bind_cols(model_stats(mod)) %>% mutate(pseudo_r2_mcfadden = pseudo_r2)
  })
}) %>% arrange(response, aic)

# Coefficient-level table for all models
coef_level <- map_dfr(response_guilds, function(resp) {
  map2_dfr(names(predictor_groups), predictor_groups, function(gname, preds) {
    dat <- df %>% filter(!is.na(.data[[resp]]))
    if (nrow(dat) == 0) return(NULL)
    mod <- fit_poisson_with_offset(dat, resp, preds)
    if (is.null(mod)) return(NULL)
    broom::tidy(mod) %>%
      mutate(response = resp, model_group = gname, predictors = paste(preds, collapse = "+")) %>%
      relocate(response, model_group, predictors)
  })
})

# Save CSVs
out_model_csv <- file.path("..", "data", "glmm_poisson_offset_model_summary.csv")
out_coef_csv  <- file.path("..", "data", "glmm_poisson_offset_coefficients.csv")
readr::write_csv(model_level, out_model_csv)
readr::write_csv(coef_level, out_coef_csv)

glue::glue("Model-level summary: {normalizePath(out_model_csv)}")
## Model-level summary: C:\Users\urish\Desktop\R\biodiversity_community\arthopods 2022\guild_abundance\data\glmm_poisson_offset_model_summary.csv
glue::glue("Coefficient-level summary: {normalizePath(out_coef_csv)}")
## Coefficient-level summary: C:\Users\urish\Desktop\R\biodiversity_community\arthopods 2022\guild_abundance\data\glmm_poisson_offset_coefficients.csv

Display summary tables

library(knitr)

# Show best model per response
best_model_per_response <- model_level %>% group_by(response) %>% slice_min(aic, n = 1, with_ties = FALSE) %>% ungroup()

# Map model_group to concise human-readable group labels
group_labels <- c(
  g1 = "distance",
  g2 = "meteorological",
  g3 = "pesticide DSA",
  g12 = "distance + meteorological",
  g13 = "distance + pesticide DSA",
  g23 = "meteorological + pesticide DSA",
  g123 = "distance + meteorological + pesticide DSA"
)

best_model_per_response_view <- best_model_per_response %>%
  mutate(predictor_group = dplyr::recode(model_group, !!!group_labels)) %>%
  select(response, predictor_group, n, aic, logLik, deviance, df_residual, pseudo_r2_mcfadden, dispersion_pearson)

kable(best_model_per_response_view, caption = "Best model per response (lowest AIC)")
Best model per response (lowest AIC)
response predictor_group n aic logLik deviance df_residual pseudo_r2_mcfadden dispersion_pearson
detritivorus distance + meteorological + pesticide DSA 53 249.4499 -118.7249 116.2374 47 0.1928797 2.173912
herbivorous distance + meteorological 53 348.1516 -169.0758 230.1806 48 0.3502589 5.174823
omnivorous meteorological + pesticide DSA 53 614.1818 -302.0909 482.4087 48 0.0768007 12.613392
predacious meteorological + pesticide DSA 53 494.4716 -242.2358 255.6632 48 0.2588208 5.366473
tot_abund_4_giuld distance + meteorological + pesticide DSA 53 729.7470 -358.8735 458.4899 47 0.2514380 10.689687
tot_abund_all_giuld distance + meteorological + pesticide DSA 53 713.9048 -350.9524 440.7982 47 0.2623557 10.335330
# Show full model-level table (may be wide); truncate to key columns for display
model_level_view <- model_level %>% select(response, model_group, predictors, n, aic, logLik, deviance, df_residual, num_coefs, pseudo_r2_mcfadden, dispersion_pearson)
kable(model_level_view, caption = "Model-level summary across all predictor group combinations")
Model-level summary across all predictor group combinations
response model_group predictors n aic logLik deviance df_residual num_coefs pseudo_r2_mcfadden dispersion_pearson
detritivorus g123 distance+temp_avg_avg+rh_avg_avg+days_since_april_18+spirodi_polyalky_dsa_res+bt_chlorantraniliprole_dsa_res+safluf_prop_glyphosate_dsa_res+clothianidin_dsa_res 53 249.4499 -118.7249 116.2374 47 6 0.1928797 2.173912
detritivorus g12 distance+temp_avg_avg+rh_avg_avg+days_since_april_18 53 251.8197 -120.9098 120.6072 48 5 0.1780261 2.304039
detritivorus g23 temp_avg_avg+rh_avg_avg+days_since_april_18+spirodi_polyalky_dsa_res+bt_chlorantraniliprole_dsa_res+safluf_prop_glyphosate_dsa_res+clothianidin_dsa_res 53 257.0172 -123.5086 125.8048 48 5 0.1603591 2.263380
detritivorus g2 temp_avg_avg+rh_avg_avg+days_since_april_18 53 258.5464 -125.2732 129.3339 49 4 0.1483631 2.384323
detritivorus g13 distance+spirodi_polyalky_dsa_res+bt_chlorantraniliprole_dsa_res+safluf_prop_glyphosate_dsa_res+clothianidin_dsa_res 53 284.4763 -139.2382 157.2639 50 3 0.0534260 3.027479
detritivorus g1 distance 53 290.2873 -143.1437 165.0749 51 2 0.0268754 3.359326
detritivorus g3 spirodi_polyalky_dsa_res+bt_chlorantraniliprole_dsa_res+safluf_prop_glyphosate_dsa_res+clothianidin_dsa_res 53 291.2877 -143.6438 166.0752 51 2 0.0234750 3.130862
herbivorous g12 distance+temp_avg_avg+rh_avg_avg+days_since_april_18 53 348.1516 -169.0758 230.1806 48 5 0.3502589 5.174823
herbivorous g123 distance+temp_avg_avg+rh_avg_avg+days_since_april_18+spirodi_polyalky_dsa_res+bt_chlorantraniliprole_dsa_res+safluf_prop_glyphosate_dsa_res+clothianidin_dsa_res 53 350.1506 -169.0753 230.1796 47 6 0.3502609 5.286280
herbivorous g1 distance 53 411.1296 -203.5648 299.1586 51 2 0.2177212 6.977246
herbivorous g13 distance+spirodi_polyalky_dsa_res+bt_chlorantraniliprole_dsa_res+safluf_prop_glyphosate_dsa_res+clothianidin_dsa_res 53 412.2357 -203.1178 298.2646 50 3 0.2194389 7.167386
herbivorous g2 temp_avg_avg+rh_avg_avg+days_since_april_18 53 454.1497 -223.0748 338.1787 49 4 0.1427461 9.892848
herbivorous g23 temp_avg_avg+rh_avg_avg+days_since_april_18+spirodi_polyalky_dsa_res+bt_chlorantraniliprole_dsa_res+safluf_prop_glyphosate_dsa_res+clothianidin_dsa_res 53 455.5560 -222.7780 337.5850 48 5 0.1438868 9.912867
herbivorous g3 spirodi_polyalky_dsa_res+bt_chlorantraniliprole_dsa_res+safluf_prop_glyphosate_dsa_res+clothianidin_dsa_res 53 522.0195 -259.0098 410.0485 51 2 0.0046519 12.737397
omnivorous g23 temp_avg_avg+rh_avg_avg+days_since_april_18+spirodi_polyalky_dsa_res+bt_chlorantraniliprole_dsa_res+safluf_prop_glyphosate_dsa_res+clothianidin_dsa_res 53 614.1818 -302.0909 482.4087 48 5 0.0768007 12.613392
omnivorous g123 distance+temp_avg_avg+rh_avg_avg+days_since_april_18+spirodi_polyalky_dsa_res+bt_chlorantraniliprole_dsa_res+safluf_prop_glyphosate_dsa_res+clothianidin_dsa_res 53 615.8041 -301.9021 482.0311 47 6 0.0773778 12.879753
omnivorous g3 spirodi_polyalky_dsa_res+bt_chlorantraniliprole_dsa_res+safluf_prop_glyphosate_dsa_res+clothianidin_dsa_res 53 620.2954 -308.1477 494.5223 51 2 0.0582910 12.300352
omnivorous g13 distance+spirodi_polyalky_dsa_res+bt_chlorantraniliprole_dsa_res+safluf_prop_glyphosate_dsa_res+clothianidin_dsa_res 53 621.8499 -307.9249 494.0768 50 3 0.0589717 12.546388
omnivorous g1 distance 53 657.4113 -326.7057 531.6382 51 2 0.0015773 13.280739
omnivorous g2 temp_avg_avg+rh_avg_avg+days_since_april_18 53 659.8535 -325.9268 530.0805 49 4 0.0039576 14.263796
omnivorous g12 distance+temp_avg_avg+rh_avg_avg+days_since_april_18 53 660.8516 -325.4258 529.0785 48 5 0.0054886 14.411525
predacious g23 temp_avg_avg+rh_avg_avg+days_since_april_18+spirodi_polyalky_dsa_res+bt_chlorantraniliprole_dsa_res+safluf_prop_glyphosate_dsa_res+clothianidin_dsa_res 53 494.4716 -242.2358 255.6632 48 5 0.2588208 5.366473
predacious g123 distance+temp_avg_avg+rh_avg_avg+days_since_april_18+spirodi_polyalky_dsa_res+bt_chlorantraniliprole_dsa_res+safluf_prop_glyphosate_dsa_res+clothianidin_dsa_res 53 495.1876 -241.5938 254.3792 47 6 0.2607852 5.506423
predacious g3 spirodi_polyalky_dsa_res+bt_chlorantraniliprole_dsa_res+safluf_prop_glyphosate_dsa_res+clothianidin_dsa_res 53 542.2701 -269.1351 309.4617 51 2 0.1765160 6.554934
predacious g13 distance+spirodi_polyalky_dsa_res+bt_chlorantraniliprole_dsa_res+safluf_prop_glyphosate_dsa_res+clothianidin_dsa_res 53 542.2749 -268.1374 307.4664 50 3 0.1795685 6.733711
predacious g2 temp_avg_avg+rh_avg_avg+days_since_april_18 53 584.9461 -288.4731 348.1377 49 4 0.1173468 8.329165
predacious g12 distance+temp_avg_avg+rh_avg_avg+days_since_april_18 53 586.7495 -288.3747 347.9410 48 5 0.1176476 8.541530
predacious g1 distance 53 656.9948 -326.4974 424.1863 51 2 0.0010021 11.089736
tot_abund_4_giuld g123 distance+temp_avg_avg+rh_avg_avg+days_since_april_18+spirodi_polyalky_dsa_res+bt_chlorantraniliprole_dsa_res+safluf_prop_glyphosate_dsa_res+clothianidin_dsa_res 53 729.7470 -358.8735 458.4899 47 6 0.2514380 10.689687
tot_abund_4_giuld g23 temp_avg_avg+rh_avg_avg+days_since_april_18+spirodi_polyalky_dsa_res+bt_chlorantraniliprole_dsa_res+safluf_prop_glyphosate_dsa_res+clothianidin_dsa_res 53 757.7729 -373.8864 488.5157 48 5 0.2201230 10.848563
tot_abund_4_giuld g13 distance+spirodi_polyalky_dsa_res+bt_chlorantraniliprole_dsa_res+safluf_prop_glyphosate_dsa_res+clothianidin_dsa_res 53 811.8538 -402.9269 546.5967 50 3 0.1595485 11.968254
tot_abund_4_giuld g12 distance+temp_avg_avg+rh_avg_avg+days_since_april_18 53 827.1505 -408.5753 557.8934 48 5 0.1477668 13.606941
tot_abund_4_giuld g3 spirodi_polyalky_dsa_res+bt_chlorantraniliprole_dsa_res+safluf_prop_glyphosate_dsa_res+clothianidin_dsa_res 53 842.6666 -419.3333 579.4095 51 2 0.1253270 12.155193
tot_abund_4_giuld g2 temp_avg_avg+rh_avg_avg+days_since_april_18 53 848.1212 -420.0606 580.8641 49 4 0.1238099 13.194211
tot_abund_4_giuld g1 distance 53 936.8698 -466.4349 673.6127 51 2 0.0270794 16.423702
tot_abund_all_giuld g123 distance+temp_avg_avg+rh_avg_avg+days_since_april_18+spirodi_polyalky_dsa_res+bt_chlorantraniliprole_dsa_res+safluf_prop_glyphosate_dsa_res+clothianidin_dsa_res 53 713.9048 -350.9524 440.7982 47 6 0.2623557 10.335330
tot_abund_all_giuld g23 temp_avg_avg+rh_avg_avg+days_since_april_18+spirodi_polyalky_dsa_res+bt_chlorantraniliprole_dsa_res+safluf_prop_glyphosate_dsa_res+clothianidin_dsa_res 53 752.1170 -371.0585 481.0104 48 5 0.2200960 10.645047
tot_abund_all_giuld g13 distance+spirodi_polyalky_dsa_res+bt_chlorantraniliprole_dsa_res+safluf_prop_glyphosate_dsa_res+clothianidin_dsa_res 53 794.4757 -394.2378 527.3690 50 3 0.1713768 11.547085
tot_abund_all_giuld g12 distance+temp_avg_avg+rh_avg_avg+days_since_april_18 53 812.9069 -401.4534 541.8002 48 5 0.1562108 13.323578
tot_abund_all_giuld g3 spirodi_polyalky_dsa_res+bt_chlorantraniliprole_dsa_res+safluf_prop_glyphosate_dsa_res+clothianidin_dsa_res 53 835.8861 -415.9430 570.7794 51 2 0.1257561 11.870714
tot_abund_all_giuld g2 temp_avg_avg+rh_avg_avg+days_since_april_18 53 842.9170 -417.4585 573.8104 49 4 0.1225708 12.968303
tot_abund_all_giuld g1 distance 53 920.0703 -458.0351 654.9636 51 2 0.0372854 16.036936
# Show coefficients for best models only to keep readable
best_keys <- best_model_per_response %>% select(response, model_group)
coef_best <- coef_level %>% inner_join(best_keys, by = c("response","model_group"))
kable(coef_best, caption = "Coefficient-level summary for best models per response")
Coefficient-level summary for best models per response
response model_group predictors term estimate std.error statistic p.value
tot_abund_all_giuld g123 distance+temp_avg_avg+rh_avg_avg+days_since_april_18+spirodi_polyalky_dsa_res+bt_chlorantraniliprole_dsa_res+safluf_prop_glyphosate_dsa_res+clothianidin_dsa_res (Intercept) 0.4977550 0.0386852 12.8668078 0.0000000
tot_abund_all_giuld g123 distance+temp_avg_avg+rh_avg_avg+days_since_april_18+spirodi_polyalky_dsa_res+bt_chlorantraniliprole_dsa_res+safluf_prop_glyphosate_dsa_res+clothianidin_dsa_res distance 0.0009811 0.0001529 6.4175690 0.0000000
tot_abund_all_giuld g123 distance+temp_avg_avg+rh_avg_avg+days_since_april_18+spirodi_polyalky_dsa_res+bt_chlorantraniliprole_dsa_res+safluf_prop_glyphosate_dsa_res+clothianidin_dsa_res temp_avg_avg 0.2795197 0.0482458 5.7936646 0.0000000
tot_abund_all_giuld g123 distance+temp_avg_avg+rh_avg_avg+days_since_april_18+spirodi_polyalky_dsa_res+bt_chlorantraniliprole_dsa_res+safluf_prop_glyphosate_dsa_res+clothianidin_dsa_res rh_avg_avg 0.2158489 0.0320422 6.7363904 0.0000000
tot_abund_all_giuld g123 distance+temp_avg_avg+rh_avg_avg+days_since_april_18+spirodi_polyalky_dsa_res+bt_chlorantraniliprole_dsa_res+safluf_prop_glyphosate_dsa_res+clothianidin_dsa_res days_since_april_18 -0.2584309 0.0419881 -6.1548603 0.0000000
tot_abund_all_giuld g123 distance+temp_avg_avg+rh_avg_avg+days_since_april_18+spirodi_polyalky_dsa_res+bt_chlorantraniliprole_dsa_res+safluf_prop_glyphosate_dsa_res+clothianidin_dsa_res spirodi_polyalky_dsa_res -0.2432884 0.0255785 -9.5114276 0.0000000
tot_abund_4_giuld g123 distance+temp_avg_avg+rh_avg_avg+days_since_april_18+spirodi_polyalky_dsa_res+bt_chlorantraniliprole_dsa_res+safluf_prop_glyphosate_dsa_res+clothianidin_dsa_res (Intercept) 0.4896885 0.0389507 12.5720212 0.0000000
tot_abund_4_giuld g123 distance+temp_avg_avg+rh_avg_avg+days_since_april_18+spirodi_polyalky_dsa_res+bt_chlorantraniliprole_dsa_res+safluf_prop_glyphosate_dsa_res+clothianidin_dsa_res distance 0.0008620 0.0001556 5.5408005 0.0000000
tot_abund_4_giuld g123 distance+temp_avg_avg+rh_avg_avg+days_since_april_18+spirodi_polyalky_dsa_res+bt_chlorantraniliprole_dsa_res+safluf_prop_glyphosate_dsa_res+clothianidin_dsa_res temp_avg_avg 0.2635290 0.0488633 5.3931833 0.0000001
tot_abund_4_giuld g123 distance+temp_avg_avg+rh_avg_avg+days_since_april_18+spirodi_polyalky_dsa_res+bt_chlorantraniliprole_dsa_res+safluf_prop_glyphosate_dsa_res+clothianidin_dsa_res rh_avg_avg 0.2231588 0.0324274 6.8817987 0.0000000
tot_abund_4_giuld g123 distance+temp_avg_avg+rh_avg_avg+days_since_april_18+spirodi_polyalky_dsa_res+bt_chlorantraniliprole_dsa_res+safluf_prop_glyphosate_dsa_res+clothianidin_dsa_res days_since_april_18 -0.2533946 0.0427550 -5.9266672 0.0000000
tot_abund_4_giuld g123 distance+temp_avg_avg+rh_avg_avg+days_since_april_18+spirodi_polyalky_dsa_res+bt_chlorantraniliprole_dsa_res+safluf_prop_glyphosate_dsa_res+clothianidin_dsa_res spirodi_polyalky_dsa_res -0.2430275 0.0257402 -9.4415488 0.0000000
herbivorous g12 distance+temp_avg_avg+rh_avg_avg+days_since_april_18 (Intercept) -2.4874568 0.1542785 -16.1231574 0.0000000
herbivorous g12 distance+temp_avg_avg+rh_avg_avg+days_since_april_18 distance 0.0044413 0.0004474 9.9272593 0.0000000
herbivorous g12 distance+temp_avg_avg+rh_avg_avg+days_since_april_18 temp_avg_avg 0.8465343 0.1994738 4.2438375 0.0000220
herbivorous g12 distance+temp_avg_avg+rh_avg_avg+days_since_april_18 rh_avg_avg 0.6045424 0.1117678 5.4089139 0.0000001
herbivorous g12 distance+temp_avg_avg+rh_avg_avg+days_since_april_18 days_since_april_18 -0.7004636 0.1434267 -4.8837757 0.0000010
predacious g23 temp_avg_avg+rh_avg_avg+days_since_april_18+spirodi_polyalky_dsa_res+bt_chlorantraniliprole_dsa_res+safluf_prop_glyphosate_dsa_res+clothianidin_dsa_res (Intercept) 0.0089855 0.0383160 0.2345098 0.8145892
predacious g23 temp_avg_avg+rh_avg_avg+days_since_april_18+spirodi_polyalky_dsa_res+bt_chlorantraniliprole_dsa_res+safluf_prop_glyphosate_dsa_res+clothianidin_dsa_res temp_avg_avg 0.1657244 0.0644627 2.5708587 0.0101447
predacious g23 temp_avg_avg+rh_avg_avg+days_since_april_18+spirodi_polyalky_dsa_res+bt_chlorantraniliprole_dsa_res+safluf_prop_glyphosate_dsa_res+clothianidin_dsa_res rh_avg_avg 0.1564267 0.0430743 3.6315510 0.0002817
predacious g23 temp_avg_avg+rh_avg_avg+days_since_april_18+spirodi_polyalky_dsa_res+bt_chlorantraniliprole_dsa_res+safluf_prop_glyphosate_dsa_res+clothianidin_dsa_res days_since_april_18 -0.3137760 0.0571177 -5.4934988 0.0000000
predacious g23 temp_avg_avg+rh_avg_avg+days_since_april_18+spirodi_polyalky_dsa_res+bt_chlorantraniliprole_dsa_res+safluf_prop_glyphosate_dsa_res+clothianidin_dsa_res spirodi_polyalky_dsa_res -0.3202042 0.0361089 -8.8677401 0.0000000
omnivorous g23 temp_avg_avg+rh_avg_avg+days_since_april_18+spirodi_polyalky_dsa_res+bt_chlorantraniliprole_dsa_res+safluf_prop_glyphosate_dsa_res+clothianidin_dsa_res (Intercept) -1.0482949 0.0733218 -14.2971812 0.0000000
omnivorous g23 temp_avg_avg+rh_avg_avg+days_since_april_18+spirodi_polyalky_dsa_res+bt_chlorantraniliprole_dsa_res+safluf_prop_glyphosate_dsa_res+clothianidin_dsa_res temp_avg_avg 0.1212204 0.0943475 1.2848282 0.1988523
omnivorous g23 temp_avg_avg+rh_avg_avg+days_since_april_18+spirodi_polyalky_dsa_res+bt_chlorantraniliprole_dsa_res+safluf_prop_glyphosate_dsa_res+clothianidin_dsa_res rh_avg_avg -0.1749226 0.0906633 -1.9293657 0.0536855
omnivorous g23 temp_avg_avg+rh_avg_avg+days_since_april_18+spirodi_polyalky_dsa_res+bt_chlorantraniliprole_dsa_res+safluf_prop_glyphosate_dsa_res+clothianidin_dsa_res days_since_april_18 0.0519767 0.0959270 0.5418359 0.5879316
omnivorous g23 temp_avg_avg+rh_avg_avg+days_since_april_18+spirodi_polyalky_dsa_res+bt_chlorantraniliprole_dsa_res+safluf_prop_glyphosate_dsa_res+clothianidin_dsa_res spirodi_polyalky_dsa_res -0.5803001 0.1145204 -5.0672220 0.0000004
detritivorus g123 distance+temp_avg_avg+rh_avg_avg+days_since_april_18+spirodi_polyalky_dsa_res+bt_chlorantraniliprole_dsa_res+safluf_prop_glyphosate_dsa_res+clothianidin_dsa_res (Intercept) -1.9111531 0.1280931 -14.9200283 0.0000000
detritivorus g123 distance+temp_avg_avg+rh_avg_avg+days_since_april_18+spirodi_polyalky_dsa_res+bt_chlorantraniliprole_dsa_res+safluf_prop_glyphosate_dsa_res+clothianidin_dsa_res distance 0.0014321 0.0004564 3.1381449 0.0017002
detritivorus g123 distance+temp_avg_avg+rh_avg_avg+days_since_april_18+spirodi_polyalky_dsa_res+bt_chlorantraniliprole_dsa_res+safluf_prop_glyphosate_dsa_res+clothianidin_dsa_res temp_avg_avg 0.5493433 0.1865198 2.9452277 0.0032272
detritivorus g123 distance+temp_avg_avg+rh_avg_avg+days_since_april_18+spirodi_polyalky_dsa_res+bt_chlorantraniliprole_dsa_res+safluf_prop_glyphosate_dsa_res+clothianidin_dsa_res rh_avg_avg 0.6486883 0.1171916 5.5352784 0.0000000
detritivorus g123 distance+temp_avg_avg+rh_avg_avg+days_since_april_18+spirodi_polyalky_dsa_res+bt_chlorantraniliprole_dsa_res+safluf_prop_glyphosate_dsa_res+clothianidin_dsa_res days_since_april_18 -0.0023448 0.1632611 -0.0143621 0.9885411
detritivorus g123 distance+temp_avg_avg+rh_avg_avg+days_since_april_18+spirodi_polyalky_dsa_res+bt_chlorantraniliprole_dsa_res+safluf_prop_glyphosate_dsa_res+clothianidin_dsa_res spirodi_polyalky_dsa_res -0.1473424 0.0729328 -2.0202482 0.0433577

Top-3 models per response with Delta AIC

# Compute Delta AIC relative to best AIC within each response
top3 <- model_level %>%
  group_by(response) %>%
  mutate(delta_aic = aic - min(aic, na.rm = TRUE)) %>%
  arrange(aic, .by_group = TRUE) %>%
  slice_head(n = 3) %>%
  ungroup()

# Map concise group labels and format numbers to 3 decimals
top3_view <- top3 %>%
  mutate(predictor_group = dplyr::recode(model_group, !!!group_labels)) %>%
  transmute(
    response,
    predictor_group,
    n,
    aic = sprintf("%.3f", aic),
    delta_aic = sprintf("%.3f", delta_aic),
    logLik = sprintf("%.3f", logLik),
    deviance = sprintf("%.3f", deviance),
    df_residual,
    pseudo_r2_mcfadden = sprintf("%.3f", pseudo_r2_mcfadden),
    dispersion_pearson = sprintf("%.3f", dispersion_pearson)
  )

kable(top3_view, caption = "Top-3 models per response with Delta AIC (3 d.p.)")
Top-3 models per response with Delta AIC (3 d.p.)
response predictor_group n aic delta_aic logLik deviance df_residual pseudo_r2_mcfadden dispersion_pearson
detritivorus distance + meteorological + pesticide DSA 53 249.450 0.000 -118.725 116.237 47 0.193 2.174
detritivorus distance + meteorological 53 251.820 2.370 -120.910 120.607 48 0.178 2.304
detritivorus meteorological + pesticide DSA 53 257.017 7.567 -123.509 125.805 48 0.160 2.263
herbivorous distance + meteorological 53 348.152 0.000 -169.076 230.181 48 0.350 5.175
herbivorous distance + meteorological + pesticide DSA 53 350.151 1.999 -169.075 230.180 47 0.350 5.286
herbivorous distance 53 411.130 62.978 -203.565 299.159 51 0.218 6.977
omnivorous meteorological + pesticide DSA 53 614.182 0.000 -302.091 482.409 48 0.077 12.613
omnivorous distance + meteorological + pesticide DSA 53 615.804 1.622 -301.902 482.031 47 0.077 12.880
omnivorous pesticide DSA 53 620.295 6.114 -308.148 494.522 51 0.058 12.300
predacious meteorological + pesticide DSA 53 494.472 0.000 -242.236 255.663 48 0.259 5.366
predacious distance + meteorological + pesticide DSA 53 495.188 0.716 -241.594 254.379 47 0.261 5.506
predacious pesticide DSA 53 542.270 47.798 -269.135 309.462 51 0.177 6.555
tot_abund_4_giuld distance + meteorological + pesticide DSA 53 729.747 0.000 -358.873 458.490 47 0.251 10.690
tot_abund_4_giuld meteorological + pesticide DSA 53 757.773 28.026 -373.886 488.516 48 0.220 10.849
tot_abund_4_giuld distance + pesticide DSA 53 811.854 82.107 -402.927 546.597 50 0.160 11.968
tot_abund_all_giuld distance + meteorological + pesticide DSA 53 713.905 0.000 -350.952 440.798 47 0.262 10.335
tot_abund_all_giuld meteorological + pesticide DSA 53 752.117 38.212 -371.059 481.010 48 0.220 10.645
tot_abund_all_giuld distance + pesticide DSA 53 794.476 80.571 -394.238 527.369 50 0.171 11.547

Base-R GLM with Negative Binomial (NB2) for comparison

suppressPackageStartupMessages({ library(MASS) })
# Build same predictor groups as Poisson GLMs, but fit with NB2 and the same offset
fit_nb2_with_offset <- function(data, response, predictors) {
  # Intercept-only
  if (length(predictors) == 0) {
    fml0 <- as.formula(paste(response, "~ 1 + offset(log(night_traps))"))
    return(try(MASS::glm.nb(fml0, data = data), silent = TRUE))
  }
  dat <- data[, unique(c(response, predictors, "night_traps")), drop = FALSE]
  dat <- dat[complete.cases(dat[, c(response, "night_traps"), drop = FALSE]), , drop = FALSE]
  if (nrow(dat) == 0) return(NULL)
  ok_rows <- rowSums(!is.na(dat[, predictors, drop = FALSE])) > 0
  dat <- dat[ok_rows, , drop = FALSE]
  mm <- model.matrix(as.formula(paste("~", paste(predictors, collapse = " + "))), data = dat)
  q <- qr(mm)
  keep_idx <- sort(q$pivot[seq_len(q$rank)])
  keep_terms <- setdiff(colnames(mm)[keep_idx], "(Intercept)")
  predictors2 <- intersect(predictors, keep_terms)
  if (length(predictors2) == 0) {
    fml0 <- as.formula(paste(response, "~ 1 + offset(log(night_traps))"))
    return(try(MASS::glm.nb(fml0, data = dat), silent = TRUE))
  }
  fml <- as.formula(paste(response, "~", paste(predictors2, collapse = " + "), "+ offset(log(night_traps))"))
  try(MASS::glm.nb(fml, data = dat), silent = TRUE)
}

nb2_model_stats <- function(model) {
  if (inherits(model, "try-error") || is.null(model)) return(NULL)
  ll <- try(as.numeric(logLik(model)), silent = TRUE); if (inherits(ll, "try-error")) ll <- NA_real_
  dev <- suppressWarnings(model$deviance)
  df_res <- suppressWarnings(model$df.residual)
  k_used <- tryCatch(length(coef(model)), error = function(e) NA_integer_)
  disp <- tryCatch(sum(residuals(model, type = "pearson")^2, na.rm = TRUE) / df_res, error = function(e) NA_real_)
  tibble(logLik = ll, deviance = dev, df_residual = df_res, num_coefs = k_used, dispersion_pearson = disp)
}

glm_nb2_results <- map_dfr(response_guilds, function(resp) {
  map2_dfr(names(predictor_groups), predictor_groups, function(gname, preds) {
    dat <- df %>% filter(!is.na(.data[[resp]]))
    if (nrow(dat) == 0) return(NULL)
    mod <- fit_nb2_with_offset(dat, resp, preds)
    if (is.null(mod)) return(NULL)
    tibble(
      response = resp,
      model_group = gname,
      predictors = paste(preds, collapse = "+"),
      n = nobs(mod),
      aic = AIC(mod)
    ) %>% bind_cols(nb2_model_stats(mod))
  })
}) %>% arrange(response, aic)

# Compute LRT p-values vs null (intercept + offset) without precomputing null in mutate

compute_lrt_p <- function(resp, preds_mod) {
  dat <- df[!is.na(df[[resp]]), , drop = FALSE]
  if (inherits(preds_mod, "try-error") || is.null(preds_mod)) return(NA_real_)
  nullm <- try(MASS::glm.nb(as.formula(glue("{resp} ~ 1 + offset(log(night_traps))")), data = dat), silent = TRUE)
  if (inherits(nullm, "try-error")) return(NA_real_)
  out <- try(anova(nullm, preds_mod, test = "Chisq"), silent = TRUE)
  if (inherits(out, "try-error")) return(NA_real_)
  as.numeric(out$`Pr(>Chi)`[2])
}

# Refit models to get p-values (keep light; uses same formula)
glm_nb2_results <- glm_nb2_results %>%
  rowwise() %>%
  mutate(
    model_refit = list(try(fit_nb2_with_offset(df[!is.na(df[[response]]), , drop = FALSE], response, if (predictors=="") character(0) else strsplit(predictors, "\\+")[[1]]), silent = TRUE)),
    p_lrt_vs_null = list(compute_lrt_p(response, model_refit))
  ) %>%
  ungroup() %>%
  mutate(p_lrt_vs_null = purrr::map_dbl(p_lrt_vs_null, ~ if (length(.x)==1) as.numeric(.x) else NA_real_)) %>%
  dplyr::select(-model_refit)

glm_nb2_results
out_nb2 <- file.path("..","data","glm_nb2_results.csv")
readr::write_csv(glm_nb2_results, out_nb2)
glue::glue("NB2 GLM results written to: {normalizePath(out_nb2)}")
## NB2 GLM results written to: C:\Users\urish\Desktop\R\biodiversity_community\arthopods 2022\guild_abundance\data\glm_nb2_results.csv
library(knitr)
kable(glm_nb2_results, caption = "Base-R NB2 GLM results with offset(log(night_traps))")
Base-R NB2 GLM results with offset(log(night_traps))
response model_group predictors n aic logLik deviance df_residual num_coefs dispersion_pearson p_lrt_vs_null
detritivorus g12 distance+temp_avg_avg+rh_avg_avg+days_since_april_18 53 230.9179 -109.4590 60.02584 48 5 0.9706842 NA
detritivorus g123 distance+temp_avg_avg+rh_avg_avg+days_since_april_18+spirodi_polyalky_dsa_res+bt_chlorantraniliprole_dsa_res+safluf_prop_glyphosate_dsa_res+clothianidin_dsa_res 53 231.5345 -108.7672 60.57380 47 6 0.9568362 NA
detritivorus g2 temp_avg_avg+rh_avg_avg+days_since_april_18 53 232.0046 -111.0023 59.40735 49 4 0.9027055 NA
detritivorus g23 temp_avg_avg+rh_avg_avg+days_since_april_18+spirodi_polyalky_dsa_res+bt_chlorantraniliprole_dsa_res+safluf_prop_glyphosate_dsa_res+clothianidin_dsa_res 53 232.7487 -110.3744 59.63609 48 5 0.8764931 NA
detritivorus g1 distance 53 241.2360 -117.6180 59.44970 51 2 0.9915839 NA
detritivorus g13 distance+spirodi_polyalky_dsa_res+bt_chlorantraniliprole_dsa_res+safluf_prop_glyphosate_dsa_res+clothianidin_dsa_res 53 241.6678 -116.8339 59.91701 50 3 0.9318899 NA
detritivorus g3 spirodi_polyalky_dsa_res+bt_chlorantraniliprole_dsa_res+safluf_prop_glyphosate_dsa_res+clothianidin_dsa_res 53 242.4919 -118.2460 59.81685 51 2 0.8915851 NA
herbivorous g12 distance+temp_avg_avg+rh_avg_avg+days_since_april_18 53 235.7324 -111.8662 55.82670 48 5 0.9954796 NA
herbivorous g123 distance+temp_avg_avg+rh_avg_avg+days_since_april_18+spirodi_polyalky_dsa_res+bt_chlorantraniliprole_dsa_res+safluf_prop_glyphosate_dsa_res+clothianidin_dsa_res 53 236.9152 -111.4576 55.67343 47 6 0.9960867 NA
herbivorous g1 distance 53 239.5904 -116.7952 55.40234 51 2 1.0184259 NA
herbivorous g13 distance+spirodi_polyalky_dsa_res+bt_chlorantraniliprole_dsa_res+safluf_prop_glyphosate_dsa_res+clothianidin_dsa_res 53 240.7147 -116.3573 54.92511 50 3 0.9819846 NA
herbivorous g2 temp_avg_avg+rh_avg_avg+days_since_april_18 53 247.2343 -118.6171 54.91735 49 4 1.3009762 NA
herbivorous g23 temp_avg_avg+rh_avg_avg+days_since_april_18+spirodi_polyalky_dsa_res+bt_chlorantraniliprole_dsa_res+safluf_prop_glyphosate_dsa_res+clothianidin_dsa_res 53 249.1398 -118.5699 54.91822 48 5 1.2934627 NA
herbivorous g3 spirodi_polyalky_dsa_res+bt_chlorantraniliprole_dsa_res+safluf_prop_glyphosate_dsa_res+clothianidin_dsa_res 53 253.7983 -123.8992 55.56088 51 2 1.3560450 NA
omnivorous g3 spirodi_polyalky_dsa_res+bt_chlorantraniliprole_dsa_res+safluf_prop_glyphosate_dsa_res+clothianidin_dsa_res 53 280.2042 -137.1021 56.62689 51 2 1.0387101 NA
omnivorous g13 distance+spirodi_polyalky_dsa_res+bt_chlorantraniliprole_dsa_res+safluf_prop_glyphosate_dsa_res+clothianidin_dsa_res 53 281.9080 -136.9540 56.48363 50 3 1.0727710 NA
omnivorous g1 distance 53 284.4030 -139.2015 56.78748 51 2 0.9763958 NA
omnivorous g23 temp_avg_avg+rh_avg_avg+days_since_april_18+spirodi_polyalky_dsa_res+bt_chlorantraniliprole_dsa_res+safluf_prop_glyphosate_dsa_res+clothianidin_dsa_res 53 284.8078 -136.4039 56.56751 48 5 1.0658923 NA
omnivorous g123 distance+temp_avg_avg+rh_avg_avg+days_since_april_18+spirodi_polyalky_dsa_res+bt_chlorantraniliprole_dsa_res+safluf_prop_glyphosate_dsa_res+clothianidin_dsa_res 53 286.4922 -136.2461 56.34836 47 6 1.1132489 NA
omnivorous g2 temp_avg_avg+rh_avg_avg+days_since_april_18 53 288.3379 -139.1690 56.79144 49 4 1.0952212 NA
omnivorous g12 distance+temp_avg_avg+rh_avg_avg+days_since_april_18 53 290.1964 -139.0982 56.77934 48 5 1.0839004 NA
predacious g23 temp_avg_avg+rh_avg_avg+days_since_april_18+spirodi_polyalky_dsa_res+bt_chlorantraniliprole_dsa_res+safluf_prop_glyphosate_dsa_res+clothianidin_dsa_res 53 362.9750 -175.4875 51.13993 48 5 0.9977109 NA
predacious g123 distance+temp_avg_avg+rh_avg_avg+days_since_april_18+spirodi_polyalky_dsa_res+bt_chlorantraniliprole_dsa_res+safluf_prop_glyphosate_dsa_res+clothianidin_dsa_res 53 363.6946 -174.8473 50.61627 47 6 1.0369181 NA
predacious g3 spirodi_polyalky_dsa_res+bt_chlorantraniliprole_dsa_res+safluf_prop_glyphosate_dsa_res+clothianidin_dsa_res 53 370.6088 -182.3044 53.07275 51 2 1.1361113 NA
predacious g13 distance+spirodi_polyalky_dsa_res+bt_chlorantraniliprole_dsa_res+safluf_prop_glyphosate_dsa_res+clothianidin_dsa_res 53 371.6684 -181.8342 52.89311 50 3 1.2127588 NA
predacious g2 temp_avg_avg+rh_avg_avg+days_since_april_18 53 376.7518 -183.3759 52.35792 49 4 1.2318555 NA
predacious g12 distance+temp_avg_avg+rh_avg_avg+days_since_april_18 53 378.2916 -183.1458 52.13997 48 5 1.2880138 NA
predacious g1 distance 53 385.8941 -189.9470 54.01921 51 2 1.5979396 NA
tot_abund_4_giuld g123 distance+temp_avg_avg+rh_avg_avg+days_since_april_18+spirodi_polyalky_dsa_res+bt_chlorantraniliprole_dsa_res+safluf_prop_glyphosate_dsa_res+clothianidin_dsa_res 53 430.4276 -208.2138 53.78342 47 6 1.2735884 NA
tot_abund_4_giuld g23 temp_avg_avg+rh_avg_avg+days_since_april_18+spirodi_polyalky_dsa_res+bt_chlorantraniliprole_dsa_res+safluf_prop_glyphosate_dsa_res+clothianidin_dsa_res 53 436.0154 -212.0077 54.82526 48 5 1.1914701 NA
tot_abund_4_giuld g13 distance+spirodi_polyalky_dsa_res+bt_chlorantraniliprole_dsa_res+safluf_prop_glyphosate_dsa_res+clothianidin_dsa_res 53 436.5277 -214.2639 54.78500 50 3 1.2001258 NA
tot_abund_4_giuld g12 distance+temp_avg_avg+rh_avg_avg+days_since_april_18 53 439.6157 -213.8079 54.48769 48 5 1.3805247 NA
tot_abund_4_giuld g3 spirodi_polyalky_dsa_res+bt_chlorantraniliprole_dsa_res+safluf_prop_glyphosate_dsa_res+clothianidin_dsa_res 53 440.8688 -217.4344 55.52156 51 2 1.1044851 NA
tot_abund_4_giuld g2 temp_avg_avg+rh_avg_avg+days_since_april_18 53 441.5291 -215.7646 55.00077 49 4 1.2162380 NA
tot_abund_4_giuld g1 distance 53 446.0600 -220.0300 55.56854 51 2 1.4676311 NA
tot_abund_all_giuld g123 distance+temp_avg_avg+rh_avg_avg+days_since_april_18+spirodi_polyalky_dsa_res+bt_chlorantraniliprole_dsa_res+safluf_prop_glyphosate_dsa_res+clothianidin_dsa_res 53 430.0797 -208.0399 53.57842 47 6 1.2879081 NA
tot_abund_all_giuld g13 distance+spirodi_polyalky_dsa_res+bt_chlorantraniliprole_dsa_res+safluf_prop_glyphosate_dsa_res+clothianidin_dsa_res 53 436.5837 -214.2919 54.64063 50 3 1.2055741 NA
tot_abund_all_giuld g23 temp_avg_avg+rh_avg_avg+days_since_april_18+spirodi_polyalky_dsa_res+bt_chlorantraniliprole_dsa_res+safluf_prop_glyphosate_dsa_res+clothianidin_dsa_res 53 437.5211 -212.7605 54.78928 48 5 1.1859325 NA
tot_abund_all_giuld g12 distance+temp_avg_avg+rh_avg_avg+days_since_april_18 53 440.1614 -214.0807 54.34387 48 5 1.4020052 NA
tot_abund_all_giuld g3 spirodi_polyalky_dsa_res+bt_chlorantraniliprole_dsa_res+safluf_prop_glyphosate_dsa_res+clothianidin_dsa_res 53 442.3255 -218.1628 55.45618 51 2 1.0892790 NA
tot_abund_all_giuld g2 temp_avg_avg+rh_avg_avg+days_since_april_18 53 443.2336 -216.6168 54.95553 49 4 1.2086818 NA
tot_abund_all_giuld g1 distance 53 446.8842 -220.4421 55.45180 51 2 1.4839060 NA
# Per-term coefficients (NB2 GLM) with p-values
glm_nb2_coefs <- map_dfr(response_guilds, function(resp) {
  map2_dfr(names(predictor_groups), predictor_groups, function(gname, preds) {
    dat <- df[!is.na(df[[resp]]), , drop = FALSE]
    if (nrow(dat) == 0) return(NULL)
    mod <- fit_nb2_with_offset(dat, resp, preds)
    if (inherits(mod, "try-error") || is.null(mod)) return(NULL)
    broom::tidy(mod) %>% mutate(response = resp, model_group = gname, predictors = paste(preds, collapse = "+")) %>%
      relocate(response, model_group, predictors)
  })
})

out_nb2_coef <- file.path("..","data","glm_nb2_coefficients.csv")
readr::write_csv(glm_nb2_coefs, out_nb2_coef)
glue::glue("NB2 GLM coefficients written to: {normalizePath(out_nb2_coef)}")
## NB2 GLM coefficients written to: C:\Users\urish\Desktop\R\biodiversity_community\arthopods 2022\guild_abundance\data\glm_nb2_coefficients.csv
kable(glm_nb2_coefs, caption = "NB2 GLM per-term coefficients (estimate, SE, z, p-value)")
NB2 GLM per-term coefficients (estimate, SE, z, p-value)
response model_group predictors term estimate std.error statistic p.value
tot_abund_all_giuld g1 distance (Intercept) 0.5411765 0.1232218 4.3918883 0.0000112
tot_abund_all_giuld g1 distance distance 0.0010888 0.0005548 1.9627078 0.0496801
tot_abund_all_giuld g2 temp_avg_avg+rh_avg_avg+days_since_april_18 (Intercept) 0.6745199 0.0847988 7.9543603 0.0000000
tot_abund_all_giuld g2 temp_avg_avg+rh_avg_avg+days_since_april_18 temp_avg_avg 0.2859487 0.1354921 2.1104460 0.0348200
tot_abund_all_giuld g2 temp_avg_avg+rh_avg_avg+days_since_april_18 rh_avg_avg 0.2705642 0.1014309 2.6674744 0.0076424
tot_abund_all_giuld g2 temp_avg_avg+rh_avg_avg+days_since_april_18 days_since_april_18 -0.2684130 0.1264664 -2.1224048 0.0338038
tot_abund_all_giuld g3 spirodi_polyalky_dsa_res+bt_chlorantraniliprole_dsa_res+safluf_prop_glyphosate_dsa_res+clothianidin_dsa_res (Intercept) 0.6847911 0.0867511 7.8937414 0.0000000
tot_abund_all_giuld g3 spirodi_polyalky_dsa_res+bt_chlorantraniliprole_dsa_res+safluf_prop_glyphosate_dsa_res+clothianidin_dsa_res spirodi_polyalky_dsa_res -0.2331266 0.0881920 -2.6433978 0.0082079
tot_abund_all_giuld g12 distance+temp_avg_avg+rh_avg_avg+days_since_april_18 (Intercept) 0.4778181 0.1115826 4.2821926 0.0000185
tot_abund_all_giuld g12 distance+temp_avg_avg+rh_avg_avg+days_since_april_18 distance 0.0012056 0.0004998 2.4120281 0.0158641
tot_abund_all_giuld g12 distance+temp_avg_avg+rh_avg_avg+days_since_april_18 temp_avg_avg 0.2751004 0.1306431 2.1057400 0.0352269
tot_abund_all_giuld g12 distance+temp_avg_avg+rh_avg_avg+days_since_april_18 rh_avg_avg 0.2899897 0.0975244 2.9735109 0.0029441
tot_abund_all_giuld g12 distance+temp_avg_avg+rh_avg_avg+days_since_april_18 days_since_april_18 -0.2574888 0.1219744 -2.1110067 0.0347717
tot_abund_all_giuld g13 distance+spirodi_polyalky_dsa_res+bt_chlorantraniliprole_dsa_res+safluf_prop_glyphosate_dsa_res+clothianidin_dsa_res (Intercept) 0.4304645 0.1119377 3.8455710 0.0001203
tot_abund_all_giuld g13 distance+spirodi_polyalky_dsa_res+bt_chlorantraniliprole_dsa_res+safluf_prop_glyphosate_dsa_res+clothianidin_dsa_res distance 0.0015272 0.0005005 3.0516470 0.0022759
tot_abund_all_giuld g13 distance+spirodi_polyalky_dsa_res+bt_chlorantraniliprole_dsa_res+safluf_prop_glyphosate_dsa_res+clothianidin_dsa_res spirodi_polyalky_dsa_res -0.2839655 0.0833290 -3.4077626 0.0006550
tot_abund_all_giuld g23 temp_avg_avg+rh_avg_avg+days_since_april_18+spirodi_polyalky_dsa_res+bt_chlorantraniliprole_dsa_res+safluf_prop_glyphosate_dsa_res+clothianidin_dsa_res (Intercept) 0.6496300 0.0792200 8.2003276 0.0000000
tot_abund_all_giuld g23 temp_avg_avg+rh_avg_avg+days_since_april_18+spirodi_polyalky_dsa_res+bt_chlorantraniliprole_dsa_res+safluf_prop_glyphosate_dsa_res+clothianidin_dsa_res temp_avg_avg 0.2806011 0.1265843 2.2167131 0.0266427
tot_abund_all_giuld g23 temp_avg_avg+rh_avg_avg+days_since_april_18+spirodi_polyalky_dsa_res+bt_chlorantraniliprole_dsa_res+safluf_prop_glyphosate_dsa_res+clothianidin_dsa_res rh_avg_avg 0.2375815 0.0946020 2.5113785 0.0120261
tot_abund_all_giuld g23 temp_avg_avg+rh_avg_avg+days_since_april_18+spirodi_polyalky_dsa_res+bt_chlorantraniliprole_dsa_res+safluf_prop_glyphosate_dsa_res+clothianidin_dsa_res days_since_april_18 -0.2579072 0.1180561 -2.1846149 0.0289171
tot_abund_all_giuld g23 temp_avg_avg+rh_avg_avg+days_since_april_18+spirodi_polyalky_dsa_res+bt_chlorantraniliprole_dsa_res+safluf_prop_glyphosate_dsa_res+clothianidin_dsa_res spirodi_polyalky_dsa_res -0.2339065 0.0790601 -2.9585900 0.0030905
tot_abund_all_giuld g123 distance+temp_avg_avg+rh_avg_avg+days_since_april_18+spirodi_polyalky_dsa_res+bt_chlorantraniliprole_dsa_res+safluf_prop_glyphosate_dsa_res+clothianidin_dsa_res (Intercept) 0.3963995 0.1013970 3.9093811 0.0000925
tot_abund_all_giuld g123 distance+temp_avg_avg+rh_avg_avg+days_since_april_18+spirodi_polyalky_dsa_res+bt_chlorantraniliprole_dsa_res+safluf_prop_glyphosate_dsa_res+clothianidin_dsa_res distance 0.0015197 0.0004507 3.3719134 0.0007465
tot_abund_all_giuld g123 distance+temp_avg_avg+rh_avg_avg+days_since_april_18+spirodi_polyalky_dsa_res+bt_chlorantraniliprole_dsa_res+safluf_prop_glyphosate_dsa_res+clothianidin_dsa_res temp_avg_avg 0.2659339 0.1181083 2.2516104 0.0243469
tot_abund_all_giuld g123 distance+temp_avg_avg+rh_avg_avg+days_since_april_18+spirodi_polyalky_dsa_res+bt_chlorantraniliprole_dsa_res+safluf_prop_glyphosate_dsa_res+clothianidin_dsa_res rh_avg_avg 0.2487477 0.0878706 2.8308405 0.0046426
tot_abund_all_giuld g123 distance+temp_avg_avg+rh_avg_avg+days_since_april_18+spirodi_polyalky_dsa_res+bt_chlorantraniliprole_dsa_res+safluf_prop_glyphosate_dsa_res+clothianidin_dsa_res days_since_april_18 -0.2398338 0.1101142 -2.1780457 0.0294026
tot_abund_all_giuld g123 distance+temp_avg_avg+rh_avg_avg+days_since_april_18+spirodi_polyalky_dsa_res+bt_chlorantraniliprole_dsa_res+safluf_prop_glyphosate_dsa_res+clothianidin_dsa_res spirodi_polyalky_dsa_res -0.2822621 0.0734911 -3.8407650 0.0001227
tot_abund_4_giuld g1 distance (Intercept) 0.5339693 0.1261437 4.2330224 0.0000231
tot_abund_4_giuld g1 distance distance 0.0009794 0.0005686 1.7226018 0.0849606
tot_abund_4_giuld g2 temp_avg_avg+rh_avg_avg+days_since_april_18 (Intercept) 0.6462532 0.0862615 7.4917957 0.0000000
tot_abund_4_giuld g2 temp_avg_avg+rh_avg_avg+days_since_april_18 temp_avg_avg 0.2701882 0.1377747 1.9610867 0.0498689
tot_abund_4_giuld g2 temp_avg_avg+rh_avg_avg+days_since_april_18 rh_avg_avg 0.2781095 0.1031532 2.6960817 0.0070160
tot_abund_4_giuld g2 temp_avg_avg+rh_avg_avg+days_since_april_18 days_since_april_18 -0.2623007 0.1286913 -2.0382167 0.0415283
tot_abund_4_giuld g3 spirodi_polyalky_dsa_res+bt_chlorantraniliprole_dsa_res+safluf_prop_glyphosate_dsa_res+clothianidin_dsa_res (Intercept) 0.6579119 0.0884181 7.4409199 0.0000000
tot_abund_4_giuld g3 spirodi_polyalky_dsa_res+bt_chlorantraniliprole_dsa_res+safluf_prop_glyphosate_dsa_res+clothianidin_dsa_res spirodi_polyalky_dsa_res -0.2332490 0.0898806 -2.5950982 0.0094564
tot_abund_4_giuld g12 distance+temp_avg_avg+rh_avg_avg+days_since_april_18 (Intercept) 0.4701347 0.1144823 4.1066147 0.0000402
tot_abund_4_giuld g12 distance+temp_avg_avg+rh_avg_avg+days_since_april_18 distance 0.0010899 0.0005137 2.1217078 0.0338623
tot_abund_4_giuld g12 distance+temp_avg_avg+rh_avg_avg+days_since_april_18 temp_avg_avg 0.2617957 0.1340401 1.9531138 0.0508061
tot_abund_4_giuld g12 distance+temp_avg_avg+rh_avg_avg+days_since_april_18 rh_avg_avg 0.2958435 0.1001335 2.9544913 0.0031318
tot_abund_4_giuld g12 distance+temp_avg_avg+rh_avg_avg+days_since_april_18 days_since_april_18 -0.2502107 0.1252688 -1.9973900 0.0457828
tot_abund_4_giuld g13 distance+spirodi_polyalky_dsa_res+bt_chlorantraniliprole_dsa_res+safluf_prop_glyphosate_dsa_res+clothianidin_dsa_res (Intercept) 0.4220806 0.1153385 3.6594940 0.0002527
tot_abund_4_giuld g13 distance+spirodi_polyalky_dsa_res+bt_chlorantraniliprole_dsa_res+safluf_prop_glyphosate_dsa_res+clothianidin_dsa_res distance 0.0014288 0.0005165 2.7660271 0.0056744
tot_abund_4_giuld g13 distance+spirodi_polyalky_dsa_res+bt_chlorantraniliprole_dsa_res+safluf_prop_glyphosate_dsa_res+clothianidin_dsa_res spirodi_polyalky_dsa_res -0.2812492 0.0859119 -3.2736944 0.0010615
tot_abund_4_giuld g23 temp_avg_avg+rh_avg_avg+days_since_april_18+spirodi_polyalky_dsa_res+bt_chlorantraniliprole_dsa_res+safluf_prop_glyphosate_dsa_res+clothianidin_dsa_res (Intercept) 0.6211014 0.0807458 7.6920595 0.0000000
tot_abund_4_giuld g23 temp_avg_avg+rh_avg_avg+days_since_april_18+spirodi_polyalky_dsa_res+bt_chlorantraniliprole_dsa_res+safluf_prop_glyphosate_dsa_res+clothianidin_dsa_res temp_avg_avg 0.2647459 0.1289599 2.0529318 0.0400792
tot_abund_4_giuld g23 temp_avg_avg+rh_avg_avg+days_since_april_18+spirodi_polyalky_dsa_res+bt_chlorantraniliprole_dsa_res+safluf_prop_glyphosate_dsa_res+clothianidin_dsa_res rh_avg_avg 0.2448595 0.0963925 2.5402341 0.0110778
tot_abund_4_giuld g23 temp_avg_avg+rh_avg_avg+days_since_april_18+spirodi_polyalky_dsa_res+bt_chlorantraniliprole_dsa_res+safluf_prop_glyphosate_dsa_res+clothianidin_dsa_res days_since_april_18 -0.2518424 0.1203708 -2.0922224 0.0364186
tot_abund_4_giuld g23 temp_avg_avg+rh_avg_avg+days_since_april_18+spirodi_polyalky_dsa_res+bt_chlorantraniliprole_dsa_res+safluf_prop_glyphosate_dsa_res+clothianidin_dsa_res spirodi_polyalky_dsa_res -0.2349707 0.0805186 -2.9182171 0.0035204
tot_abund_4_giuld g123 distance+temp_avg_avg+rh_avg_avg+days_since_april_18+spirodi_polyalky_dsa_res+bt_chlorantraniliprole_dsa_res+safluf_prop_glyphosate_dsa_res+clothianidin_dsa_res (Intercept) 0.3881955 0.1047865 3.7046319 0.0002117
tot_abund_4_giuld g123 distance+temp_avg_avg+rh_avg_avg+days_since_april_18+spirodi_polyalky_dsa_res+bt_chlorantraniliprole_dsa_res+safluf_prop_glyphosate_dsa_res+clothianidin_dsa_res distance 0.0014107 0.0004669 3.0213294 0.0025167
tot_abund_4_giuld g123 distance+temp_avg_avg+rh_avg_avg+days_since_april_18+spirodi_polyalky_dsa_res+bt_chlorantraniliprole_dsa_res+safluf_prop_glyphosate_dsa_res+clothianidin_dsa_res temp_avg_avg 0.2528964 0.1220841 2.0714943 0.0383126
tot_abund_4_giuld g123 distance+temp_avg_avg+rh_avg_avg+days_since_april_18+spirodi_polyalky_dsa_res+bt_chlorantraniliprole_dsa_res+safluf_prop_glyphosate_dsa_res+clothianidin_dsa_res rh_avg_avg 0.2554420 0.0909292 2.8092401 0.0049659
tot_abund_4_giuld g123 distance+temp_avg_avg+rh_avg_avg+days_since_april_18+spirodi_polyalky_dsa_res+bt_chlorantraniliprole_dsa_res+safluf_prop_glyphosate_dsa_res+clothianidin_dsa_res days_since_april_18 -0.2322386 0.1139659 -2.0377907 0.0415709
tot_abund_4_giuld g123 distance+temp_avg_avg+rh_avg_avg+days_since_april_18+spirodi_polyalky_dsa_res+bt_chlorantraniliprole_dsa_res+safluf_prop_glyphosate_dsa_res+clothianidin_dsa_res spirodi_polyalky_dsa_res -0.2801418 0.0760356 -3.6843486 0.0002293
herbivorous g1 distance (Intercept) -2.2202541 0.2718505 -8.1671890 0.0000000
herbivorous g1 distance distance 0.0043612 0.0011470 3.8022640 0.0001434
herbivorous g2 temp_avg_avg+rh_avg_avg+days_since_april_18 (Intercept) -1.5135701 0.2016437 -7.5061607 0.0000000
herbivorous g2 temp_avg_avg+rh_avg_avg+days_since_april_18 temp_avg_avg 0.8846630 0.3461889 2.5554339 0.0106056
herbivorous g2 temp_avg_avg+rh_avg_avg+days_since_april_18 rh_avg_avg 0.5788129 0.2421561 2.3902477 0.0168370
herbivorous g2 temp_avg_avg+rh_avg_avg+days_since_april_18 days_since_april_18 -0.8021195 0.3022580 -2.6537579 0.0079601
herbivorous g3 spirodi_polyalky_dsa_res+bt_chlorantraniliprole_dsa_res+safluf_prop_glyphosate_dsa_res+clothianidin_dsa_res (Intercept) -1.2834987 0.2164412 -5.9300107 0.0000000
herbivorous g3 spirodi_polyalky_dsa_res+bt_chlorantraniliprole_dsa_res+safluf_prop_glyphosate_dsa_res+clothianidin_dsa_res spirodi_polyalky_dsa_res 0.0944774 0.2174848 0.4344093 0.6639912
herbivorous g12 distance+temp_avg_avg+rh_avg_avg+days_since_april_18 (Intercept) -2.3405935 0.2564860 -9.1256202 0.0000000
herbivorous g12 distance+temp_avg_avg+rh_avg_avg+days_since_april_18 distance 0.0039693 0.0010408 3.8137214 0.0001369
herbivorous g12 distance+temp_avg_avg+rh_avg_avg+days_since_april_18 temp_avg_avg 0.6438682 0.3099020 2.0776514 0.0377415
herbivorous g12 distance+temp_avg_avg+rh_avg_avg+days_since_april_18 rh_avg_avg 0.5681405 0.2131229 2.6657883 0.0076808
herbivorous g12 distance+temp_avg_avg+rh_avg_avg+days_since_april_18 days_since_april_18 -0.5550489 0.2707424 -2.0500993 0.0403547
herbivorous g13 distance+spirodi_polyalky_dsa_res+bt_chlorantraniliprole_dsa_res+safluf_prop_glyphosate_dsa_res+clothianidin_dsa_res (Intercept) -2.3001359 0.2740198 -8.3940492 0.0000000
herbivorous g13 distance+spirodi_polyalky_dsa_res+bt_chlorantraniliprole_dsa_res+safluf_prop_glyphosate_dsa_res+clothianidin_dsa_res distance 0.0047887 0.0011485 4.1696406 0.0000305
herbivorous g13 distance+spirodi_polyalky_dsa_res+bt_chlorantraniliprole_dsa_res+safluf_prop_glyphosate_dsa_res+clothianidin_dsa_res spirodi_polyalky_dsa_res -0.1572564 0.1932100 -0.8139143 0.4156941
herbivorous g23 temp_avg_avg+rh_avg_avg+days_since_april_18+spirodi_polyalky_dsa_res+bt_chlorantraniliprole_dsa_res+safluf_prop_glyphosate_dsa_res+clothianidin_dsa_res (Intercept) -1.5149713 0.2014394 -7.5207288 0.0000000
herbivorous g23 temp_avg_avg+rh_avg_avg+days_since_april_18+spirodi_polyalky_dsa_res+bt_chlorantraniliprole_dsa_res+safluf_prop_glyphosate_dsa_res+clothianidin_dsa_res temp_avg_avg 0.8851814 0.3459519 2.5586832 0.0105069
herbivorous g23 temp_avg_avg+rh_avg_avg+days_since_april_18+spirodi_polyalky_dsa_res+bt_chlorantraniliprole_dsa_res+safluf_prop_glyphosate_dsa_res+clothianidin_dsa_res rh_avg_avg 0.5771902 0.2421068 2.3840315 0.0171241
herbivorous g23 temp_avg_avg+rh_avg_avg+days_since_april_18+spirodi_polyalky_dsa_res+bt_chlorantraniliprole_dsa_res+safluf_prop_glyphosate_dsa_res+clothianidin_dsa_res days_since_april_18 -0.8003062 0.3020216 -2.6498309 0.0080532
herbivorous g23 temp_avg_avg+rh_avg_avg+days_since_april_18+spirodi_polyalky_dsa_res+bt_chlorantraniliprole_dsa_res+safluf_prop_glyphosate_dsa_res+clothianidin_dsa_res spirodi_polyalky_dsa_res 0.0580329 0.1892697 0.3066148 0.7591366
herbivorous g123 distance+temp_avg_avg+rh_avg_avg+days_since_april_18+spirodi_polyalky_dsa_res+bt_chlorantraniliprole_dsa_res+safluf_prop_glyphosate_dsa_res+clothianidin_dsa_res (Intercept) -2.4030444 0.2580013 -9.3140789 0.0000000
herbivorous g123 distance+temp_avg_avg+rh_avg_avg+days_since_april_18+spirodi_polyalky_dsa_res+bt_chlorantraniliprole_dsa_res+safluf_prop_glyphosate_dsa_res+clothianidin_dsa_res distance 0.0042750 0.0010387 4.1156056 0.0000386
herbivorous g123 distance+temp_avg_avg+rh_avg_avg+days_since_april_18+spirodi_polyalky_dsa_res+bt_chlorantraniliprole_dsa_res+safluf_prop_glyphosate_dsa_res+clothianidin_dsa_res temp_avg_avg 0.6260751 0.3084561 2.0297054 0.0423865
herbivorous g123 distance+temp_avg_avg+rh_avg_avg+days_since_april_18+spirodi_polyalky_dsa_res+bt_chlorantraniliprole_dsa_res+safluf_prop_glyphosate_dsa_res+clothianidin_dsa_res rh_avg_avg 0.5608334 0.2116654 2.6496227 0.0080582
herbivorous g123 distance+temp_avg_avg+rh_avg_avg+days_since_april_18+spirodi_polyalky_dsa_res+bt_chlorantraniliprole_dsa_res+safluf_prop_glyphosate_dsa_res+clothianidin_dsa_res days_since_april_18 -0.5438822 0.2698342 -2.0156159 0.0438402
herbivorous g123 distance+temp_avg_avg+rh_avg_avg+days_since_april_18+spirodi_polyalky_dsa_res+bt_chlorantraniliprole_dsa_res+safluf_prop_glyphosate_dsa_res+clothianidin_dsa_res spirodi_polyalky_dsa_res -0.1544292 0.1617046 -0.9550085 0.3395734
predacious g1 distance (Intercept) 0.0596767 0.1281890 0.4655372 0.6415468
predacious g1 distance distance 0.0002489 0.0005798 0.4291877 0.6677867
predacious g2 temp_avg_avg+rh_avg_avg+days_since_april_18 (Intercept) 0.0458654 0.0853388 0.5374503 0.5909566
predacious g2 temp_avg_avg+rh_avg_avg+days_since_april_18 temp_avg_avg 0.1733624 0.1371051 1.2644494 0.2060688
predacious g2 temp_avg_avg+rh_avg_avg+days_since_april_18 rh_avg_avg 0.2362047 0.1014161 2.3290653 0.0198556
predacious g2 temp_avg_avg+rh_avg_avg+days_since_april_18 days_since_april_18 -0.3095009 0.1277065 -2.4235330 0.0153704
predacious g3 spirodi_polyalky_dsa_res+bt_chlorantraniliprole_dsa_res+safluf_prop_glyphosate_dsa_res+clothianidin_dsa_res (Intercept) 0.0356449 0.0832043 0.4284020 0.6683585
predacious g3 spirodi_polyalky_dsa_res+bt_chlorantraniliprole_dsa_res+safluf_prop_glyphosate_dsa_res+clothianidin_dsa_res spirodi_polyalky_dsa_res -0.3267050 0.0859123 -3.8027727 0.0001431
predacious g12 distance+temp_avg_avg+rh_avg_avg+days_since_april_18 (Intercept) -0.0136888 0.1161792 -0.1178245 0.9062067
predacious g12 distance+temp_avg_avg+rh_avg_avg+days_since_april_18 distance 0.0003876 0.0005224 0.7419902 0.4580933
predacious g12 distance+temp_avg_avg+rh_avg_avg+days_since_april_18 temp_avg_avg 0.1694000 0.1369662 1.2368010 0.2161610
predacious g12 distance+temp_avg_avg+rh_avg_avg+days_since_april_18 rh_avg_avg 0.2453683 0.1012444 2.4235253 0.0153707
predacious g12 distance+temp_avg_avg+rh_avg_avg+days_since_april_18 days_since_april_18 -0.3022335 0.1277248 -2.3662868 0.0179675
predacious g13 distance+spirodi_polyalky_dsa_res+bt_chlorantraniliprole_dsa_res+safluf_prop_glyphosate_dsa_res+clothianidin_dsa_res (Intercept) -0.0476205 0.1129664 -0.4215462 0.6733563
predacious g13 distance+spirodi_polyalky_dsa_res+bt_chlorantraniliprole_dsa_res+safluf_prop_glyphosate_dsa_res+clothianidin_dsa_res distance 0.0005361 0.0005068 1.0579508 0.2900779
predacious g13 distance+spirodi_polyalky_dsa_res+bt_chlorantraniliprole_dsa_res+safluf_prop_glyphosate_dsa_res+clothianidin_dsa_res spirodi_polyalky_dsa_res -0.3357372 0.0855122 -3.9261920 0.0000863
predacious g23 temp_avg_avg+rh_avg_avg+days_since_april_18+spirodi_polyalky_dsa_res+bt_chlorantraniliprole_dsa_res+safluf_prop_glyphosate_dsa_res+clothianidin_dsa_res (Intercept) -0.0013142 0.0750184 -0.0175187 0.9860228
predacious g23 temp_avg_avg+rh_avg_avg+days_since_april_18+spirodi_polyalky_dsa_res+bt_chlorantraniliprole_dsa_res+safluf_prop_glyphosate_dsa_res+clothianidin_dsa_res temp_avg_avg 0.1610528 0.1202531 1.3392823 0.1804788
predacious g23 temp_avg_avg+rh_avg_avg+days_since_april_18+spirodi_polyalky_dsa_res+bt_chlorantraniliprole_dsa_res+safluf_prop_glyphosate_dsa_res+clothianidin_dsa_res rh_avg_avg 0.1747648 0.0884477 1.9759120 0.0481647
predacious g23 temp_avg_avg+rh_avg_avg+days_since_april_18+spirodi_polyalky_dsa_res+bt_chlorantraniliprole_dsa_res+safluf_prop_glyphosate_dsa_res+clothianidin_dsa_res days_since_april_18 -0.2939358 0.1117885 -2.6293910 0.0085538
predacious g23 temp_avg_avg+rh_avg_avg+days_since_april_18+spirodi_polyalky_dsa_res+bt_chlorantraniliprole_dsa_res+safluf_prop_glyphosate_dsa_res+clothianidin_dsa_res spirodi_polyalky_dsa_res -0.3257928 0.0742271 -4.3891383 0.0000114
predacious g123 distance+temp_avg_avg+rh_avg_avg+days_since_april_18+spirodi_polyalky_dsa_res+bt_chlorantraniliprole_dsa_res+safluf_prop_glyphosate_dsa_res+clothianidin_dsa_res (Intercept) -0.0886064 0.1022097 -0.8669079 0.3859924
predacious g123 distance+temp_avg_avg+rh_avg_avg+days_since_april_18+spirodi_polyalky_dsa_res+bt_chlorantraniliprole_dsa_res+safluf_prop_glyphosate_dsa_res+clothianidin_dsa_res distance 0.0005596 0.0004554 1.2288584 0.2191249
predacious g123 distance+temp_avg_avg+rh_avg_avg+days_since_april_18+spirodi_polyalky_dsa_res+bt_chlorantraniliprole_dsa_res+safluf_prop_glyphosate_dsa_res+clothianidin_dsa_res temp_avg_avg 0.1553011 0.1196580 1.2978742 0.1943306
predacious g123 distance+temp_avg_avg+rh_avg_avg+days_since_april_18+spirodi_polyalky_dsa_res+bt_chlorantraniliprole_dsa_res+safluf_prop_glyphosate_dsa_res+clothianidin_dsa_res rh_avg_avg 0.1846517 0.0878887 2.1009717 0.0356435
predacious g123 distance+temp_avg_avg+rh_avg_avg+days_since_april_18+spirodi_polyalky_dsa_res+bt_chlorantraniliprole_dsa_res+safluf_prop_glyphosate_dsa_res+clothianidin_dsa_res days_since_april_18 -0.2830411 0.1113669 -2.5415193 0.0110372
predacious g123 distance+temp_avg_avg+rh_avg_avg+days_since_april_18+spirodi_polyalky_dsa_res+bt_chlorantraniliprole_dsa_res+safluf_prop_glyphosate_dsa_res+clothianidin_dsa_res spirodi_polyalky_dsa_res -0.3342173 0.0738055 -4.5283554 0.0000059
omnivorous g1 distance (Intercept) -0.8567455 0.2954145 -2.9001470 0.0037299
omnivorous g1 distance distance -0.0005089 0.0013486 -0.3773267 0.7059308
omnivorous g2 temp_avg_avg+rh_avg_avg+days_since_april_18 (Intercept) -0.9340661 0.2180902 -4.2829345 0.0000184
omnivorous g2 temp_avg_avg+rh_avg_avg+days_since_april_18 temp_avg_avg 0.1294605 0.3438297 0.3765249 0.7065267
omnivorous g2 temp_avg_avg+rh_avg_avg+days_since_april_18 rh_avg_avg 0.0482613 0.2614816 0.1845688 0.8535673
omnivorous g2 temp_avg_avg+rh_avg_avg+days_since_april_18 days_since_april_18 -0.0128443 0.3239243 -0.0396523 0.9683703
omnivorous g3 spirodi_polyalky_dsa_res+bt_chlorantraniliprole_dsa_res+safluf_prop_glyphosate_dsa_res+clothianidin_dsa_res (Intercept) -1.0282791 0.2109207 -4.8751931 0.0000011
omnivorous g3 spirodi_polyalky_dsa_res+bt_chlorantraniliprole_dsa_res+safluf_prop_glyphosate_dsa_res+clothianidin_dsa_res spirodi_polyalky_dsa_res -0.5279825 0.2184018 -2.4174824 0.0156283
omnivorous g12 distance+temp_avg_avg+rh_avg_avg+days_since_april_18 (Intercept) -0.8513642 0.2950896 -2.8851040 0.0039128
omnivorous g12 distance+temp_avg_avg+rh_avg_avg+days_since_april_18 distance -0.0005788 0.0013489 -0.4290515 0.6678857
omnivorous g12 distance+temp_avg_avg+rh_avg_avg+days_since_april_18 temp_avg_avg 0.1001511 0.3429856 0.2919981 0.7702881
omnivorous g12 distance+temp_avg_avg+rh_avg_avg+days_since_april_18 rh_avg_avg 0.0265707 0.2612859 0.1016919 0.9190013
omnivorous g12 distance+temp_avg_avg+rh_avg_avg+days_since_april_18 days_since_april_18 0.0289152 0.3238362 0.0892895 0.9288519
omnivorous g13 distance+spirodi_polyalky_dsa_res+bt_chlorantraniliprole_dsa_res+safluf_prop_glyphosate_dsa_res+clothianidin_dsa_res (Intercept) -1.1533367 0.2876593 -4.0093846 0.0000609
omnivorous g13 distance+spirodi_polyalky_dsa_res+bt_chlorantraniliprole_dsa_res+safluf_prop_glyphosate_dsa_res+clothianidin_dsa_res distance 0.0007957 0.0012975 0.6132732 0.5396957
omnivorous g13 distance+spirodi_polyalky_dsa_res+bt_chlorantraniliprole_dsa_res+safluf_prop_glyphosate_dsa_res+clothianidin_dsa_res spirodi_polyalky_dsa_res -0.5863773 0.2197654 -2.6681971 0.0076260
omnivorous g23 temp_avg_avg+rh_avg_avg+days_since_april_18+spirodi_polyalky_dsa_res+bt_chlorantraniliprole_dsa_res+safluf_prop_glyphosate_dsa_res+clothianidin_dsa_res (Intercept) -1.0670524 0.2100974 -5.0788461 0.0000004
omnivorous g23 temp_avg_avg+rh_avg_avg+days_since_april_18+spirodi_polyalky_dsa_res+bt_chlorantraniliprole_dsa_res+safluf_prop_glyphosate_dsa_res+clothianidin_dsa_res temp_avg_avg 0.1123494 0.3241252 0.3466236 0.7288741
omnivorous g23 temp_avg_avg+rh_avg_avg+days_since_april_18+spirodi_polyalky_dsa_res+bt_chlorantraniliprole_dsa_res+safluf_prop_glyphosate_dsa_res+clothianidin_dsa_res rh_avg_avg -0.1247032 0.2524541 -0.4939640 0.6213316
omnivorous g23 temp_avg_avg+rh_avg_avg+days_since_april_18+spirodi_polyalky_dsa_res+bt_chlorantraniliprole_dsa_res+safluf_prop_glyphosate_dsa_res+clothianidin_dsa_res days_since_april_18 0.0758380 0.3076285 0.2465246 0.8052762
omnivorous g23 temp_avg_avg+rh_avg_avg+days_since_april_18+spirodi_polyalky_dsa_res+bt_chlorantraniliprole_dsa_res+safluf_prop_glyphosate_dsa_res+clothianidin_dsa_res spirodi_polyalky_dsa_res -0.5858960 0.2283961 -2.5652625 0.0103098
omnivorous g123 distance+temp_avg_avg+rh_avg_avg+days_since_april_18+spirodi_polyalky_dsa_res+bt_chlorantraniliprole_dsa_res+safluf_prop_glyphosate_dsa_res+clothianidin_dsa_res (Intercept) -1.2026062 0.2870672 -4.1892842 0.0000280
omnivorous g123 distance+temp_avg_avg+rh_avg_avg+days_since_april_18+spirodi_polyalky_dsa_res+bt_chlorantraniliprole_dsa_res+safluf_prop_glyphosate_dsa_res+clothianidin_dsa_res distance 0.0008665 0.0012891 0.6722269 0.5014392
omnivorous g123 distance+temp_avg_avg+rh_avg_avg+days_since_april_18+spirodi_polyalky_dsa_res+bt_chlorantraniliprole_dsa_res+safluf_prop_glyphosate_dsa_res+clothianidin_dsa_res temp_avg_avg 0.1543763 0.3245473 0.4756666 0.6343119
omnivorous g123 distance+temp_avg_avg+rh_avg_avg+days_since_april_18+spirodi_polyalky_dsa_res+bt_chlorantraniliprole_dsa_res+safluf_prop_glyphosate_dsa_res+clothianidin_dsa_res rh_avg_avg -0.1195317 0.2530833 -0.4723017 0.6367115
omnivorous g123 distance+temp_avg_avg+rh_avg_avg+days_since_april_18+spirodi_polyalky_dsa_res+bt_chlorantraniliprole_dsa_res+safluf_prop_glyphosate_dsa_res+clothianidin_dsa_res days_since_april_18 0.0328038 0.3079626 0.1065188 0.9151708
omnivorous g123 distance+temp_avg_avg+rh_avg_avg+days_since_april_18+spirodi_polyalky_dsa_res+bt_chlorantraniliprole_dsa_res+safluf_prop_glyphosate_dsa_res+clothianidin_dsa_res spirodi_polyalky_dsa_res -0.6466404 0.2310491 -2.7987140 0.0051307
detritivorus g1 distance (Intercept) -1.7798831 0.2036032 -8.7419190 0.0000000
detritivorus g1 distance distance 0.0016384 0.0008778 1.8665075 0.0619704
detritivorus g2 temp_avg_avg+rh_avg_avg+days_since_april_18 (Intercept) -1.6693798 0.1349276 -12.3724123 0.0000000
detritivorus g2 temp_avg_avg+rh_avg_avg+days_since_april_18 temp_avg_avg 0.5417794 0.2421413 2.2374518 0.0252568
detritivorus g2 temp_avg_avg+rh_avg_avg+days_since_april_18 rh_avg_avg 0.6772412 0.1655619 4.0905629 0.0000430
detritivorus g2 temp_avg_avg+rh_avg_avg+days_since_april_18 days_since_april_18 0.0050523 0.2207710 0.0228846 0.9817423
detritivorus g3 spirodi_polyalky_dsa_res+bt_chlorantraniliprole_dsa_res+safluf_prop_glyphosate_dsa_res+clothianidin_dsa_res (Intercept) -1.5230872 0.1466809 -10.3836771 0.0000000
detritivorus g3 spirodi_polyalky_dsa_res+bt_chlorantraniliprole_dsa_res+safluf_prop_glyphosate_dsa_res+clothianidin_dsa_res spirodi_polyalky_dsa_res -0.1674497 0.1498516 -1.1174369 0.2638076
detritivorus g12 distance+temp_avg_avg+rh_avg_avg+days_since_april_18 (Intercept) -1.9129764 0.1821863 -10.5001115 0.0000000
detritivorus g12 distance+temp_avg_avg+rh_avg_avg+days_since_april_18 distance 0.0014534 0.0007482 1.9424915 0.0520776
detritivorus g12 distance+temp_avg_avg+rh_avg_avg+days_since_april_18 temp_avg_avg 0.5142141 0.2356098 2.1824816 0.0290740
detritivorus g12 distance+temp_avg_avg+rh_avg_avg+days_since_april_18 rh_avg_avg 0.6683738 0.1607592 4.1576080 0.0000322
detritivorus g12 distance+temp_avg_avg+rh_avg_avg+days_since_april_18 days_since_april_18 0.0354894 0.2158462 0.1644199 0.8694006
detritivorus g13 distance+spirodi_polyalky_dsa_res+bt_chlorantraniliprole_dsa_res+safluf_prop_glyphosate_dsa_res+clothianidin_dsa_res (Intercept) -1.7917597 0.2003361 -8.9437690 0.0000000
detritivorus g13 distance+spirodi_polyalky_dsa_res+bt_chlorantraniliprole_dsa_res+safluf_prop_glyphosate_dsa_res+clothianidin_dsa_res distance 0.0016088 0.0008623 1.8656033 0.0620969
detritivorus g13 distance+spirodi_polyalky_dsa_res+bt_chlorantraniliprole_dsa_res+safluf_prop_glyphosate_dsa_res+clothianidin_dsa_res spirodi_polyalky_dsa_res -0.1655800 0.1457463 -1.1360837 0.2559215
detritivorus g23 temp_avg_avg+rh_avg_avg+days_since_april_18+spirodi_polyalky_dsa_res+bt_chlorantraniliprole_dsa_res+safluf_prop_glyphosate_dsa_res+clothianidin_dsa_res (Intercept) -1.6759738 0.1330422 -12.5973119 0.0000000
detritivorus g23 temp_avg_avg+rh_avg_avg+days_since_april_18+spirodi_polyalky_dsa_res+bt_chlorantraniliprole_dsa_res+safluf_prop_glyphosate_dsa_res+clothianidin_dsa_res temp_avg_avg 0.5438661 0.2387946 2.2775477 0.0227535
detritivorus g23 temp_avg_avg+rh_avg_avg+days_since_april_18+spirodi_polyalky_dsa_res+bt_chlorantraniliprole_dsa_res+safluf_prop_glyphosate_dsa_res+clothianidin_dsa_res rh_avg_avg 0.6576709 0.1632185 4.0293901 0.0000559
detritivorus g23 temp_avg_avg+rh_avg_avg+days_since_april_18+spirodi_polyalky_dsa_res+bt_chlorantraniliprole_dsa_res+safluf_prop_glyphosate_dsa_res+clothianidin_dsa_res days_since_april_18 0.0020326 0.2170067 0.0093664 0.9925268
detritivorus g23 temp_avg_avg+rh_avg_avg+days_since_april_18+spirodi_polyalky_dsa_res+bt_chlorantraniliprole_dsa_res+safluf_prop_glyphosate_dsa_res+clothianidin_dsa_res spirodi_polyalky_dsa_res -0.1349537 0.1193132 -1.1310874 0.2580183
detritivorus g123 distance+temp_avg_avg+rh_avg_avg+days_since_april_18+spirodi_polyalky_dsa_res+bt_chlorantraniliprole_dsa_res+safluf_prop_glyphosate_dsa_res+clothianidin_dsa_res (Intercept) -1.9199736 0.1792139 -10.7133078 0.0000000
detritivorus g123 distance+temp_avg_avg+rh_avg_avg+days_since_april_18+spirodi_polyalky_dsa_res+bt_chlorantraniliprole_dsa_res+safluf_prop_glyphosate_dsa_res+clothianidin_dsa_res distance 0.0014551 0.0007346 1.9808765 0.0476051
detritivorus g123 distance+temp_avg_avg+rh_avg_avg+days_since_april_18+spirodi_polyalky_dsa_res+bt_chlorantraniliprole_dsa_res+safluf_prop_glyphosate_dsa_res+clothianidin_dsa_res temp_avg_avg 0.5179191 0.2315740 2.2365165 0.0253180
detritivorus g123 distance+temp_avg_avg+rh_avg_avg+days_since_april_18+spirodi_polyalky_dsa_res+bt_chlorantraniliprole_dsa_res+safluf_prop_glyphosate_dsa_res+clothianidin_dsa_res rh_avg_avg 0.6479715 0.1577347 4.1079832 0.0000399
detritivorus g123 distance+temp_avg_avg+rh_avg_avg+days_since_april_18+spirodi_polyalky_dsa_res+bt_chlorantraniliprole_dsa_res+safluf_prop_glyphosate_dsa_res+clothianidin_dsa_res days_since_april_18 0.0308542 0.2112567 0.1460508 0.8838813
detritivorus g123 distance+temp_avg_avg+rh_avg_avg+days_since_april_18+spirodi_polyalky_dsa_res+bt_chlorantraniliprole_dsa_res+safluf_prop_glyphosate_dsa_res+clothianidin_dsa_res spirodi_polyalky_dsa_res -0.1357530 0.1141076 -1.1896924 0.2341673

GLMM distance structure selection (linear vs spline; random slope; Poisson vs NB)

suppressPackageStartupMessages({
  library(glmmTMB)
  library(splines)
  library(performance)
  library(broom.mixed)
})
# Choose grouping variable: prefer 'subplot' then 'trap_group' then 'site' if present
candidate_groups <- c("subplot","trap_group","site")
group_var <- intersect(candidate_groups, names(df))
group_var <- if (length(group_var) > 0) group_var[[1]] else NA_character_

if (is.na(group_var)) {
  stop("No suitable grouping variable found among: ", paste(candidate_groups, collapse = ", "))
}

# Ensure required columns exist
required_base <- c("distance", group_var, "night_traps")
missing_req <- setdiff(required_base, names(df))
if (length(missing_req) > 0) stop("Missing required columns: ", paste(missing_req, collapse = ", "))

# Model fitting wrappers ----------------------------------------------------
formulas_for <- function(resp, with_nb = FALSE) {
  fam <- if (with_nb) glmmTMB::nbinom2 else poisson
  list(
    M0   = as.formula(paste0(resp, " ~ 1 + offset(log(night_traps)) + (1|", group_var, ")")),
    M1   = as.formula(paste0(resp, " ~ distance + offset(log(night_traps)) + (1|", group_var, ")")),
    M1rs = as.formula(paste0(resp, " ~ distance + offset(log(night_traps)) + (distance|", group_var, ")")),
    M2   = as.formula(paste0(resp, " ~ ns(distance, df=3) + offset(log(night_traps)) + (1|", group_var, ")"))
  ) -> fmls
  list(fmls = fmls, family = fam)
}

fit_models_once <- function(data, resp, with_nb = FALSE) {
  ff <- formulas_for(resp, with_nb)
  fam <- ff$family
  fmls <- ff$fmls
  # Keep rows with needed columns
  keep_cols <- unique(c(resp, "distance", group_var, "night_traps"))
  dat <- data[, keep_cols, drop = FALSE]
  dat <- dat[complete.cases(dat[, c(resp, "distance", "night_traps"), drop = FALSE]), , drop = FALSE]
  if (nrow(dat) == 0) return(NULL)
  # Guard: positive night_traps
  dat <- dat[dat$night_traps > 0, , drop = FALSE]
  # Fit models; use try to continue on failure
  mods <- lapply(fmls, function(f) try(glmmTMB::glmmTMB(f, family = fam, data = dat), silent = TRUE))
  mods
}

pearson_dispersion <- function(model) {
  # For glmmTMB, use Pearson residuals over residual df approx
  if (inherits(model, "try-error") || is.null(model)) return(NA_real_)
  r <- try(residuals(model, type = "pearson"), silent = TRUE)
  if (inherits(r, "try-error")) return(NA_real_)
  n <- length(r)
  # Approx df = n - number of fixed-effect parameters
  k <- tryCatch(length(fixef(model)$cond), error = function(e) NA_integer_)
  df_res <- if (!is.na(k)) max(1L, n - k) else n
  sum(r^2, na.rm = TRUE) / df_res
}

model_metrics <- function(model) {
  if (inherits(model, "try-error") || is.null(model)) {
    return(tibble(aic = NA_real_, logLik = NA_real_, df = NA_integer_, dispersion_pearson = NA_real_, r2_marginal = NA_real_, r2_conditional = NA_real_))
  }
  ll <- try(as.numeric(logLik(model)), silent = TRUE)
  ll <- if (inherits(ll, "try-error")) NA_real_ else ll
  aicv <- try(AIC(model), silent = TRUE)
  aicv <- if (inherits(aicv, "try-error")) NA_real_ else aicv
  dfv <- try(attr(logLik(model), "df"), silent = TRUE)
  dfv <- if (inherits(dfv, "try-error")) NA_integer_ else as.integer(dfv)
  disp <- pearson_dispersion(model)
  r2v <- try(performance::r2_nakagawa(model), silent = TRUE)
  r2m <- r2c <- NA_real_
  if (!inherits(r2v, "try-error")) {
    r2m <- suppressWarnings(r2v$R2_marginal)
    r2c <- suppressWarnings(r2v$R2_conditional)
  }
  tibble(aic = aicv, logLik = ll, df = dfv, dispersion_pearson = disp, r2_marginal = r2m, r2_conditional = r2c)
}

lrt_p <- function(m1, m0) {
  if (inherits(m1, "try-error") || inherits(m0, "try-error")) return(NA_real_)
  out <- try(anova(m0, m1, test = "Chisq"), silent = TRUE)
  if (inherits(out, "try-error")) return(NA_real_)
  p <- try(as.numeric(out$`Pr(>Chisq)`[2]), silent = TRUE)
  if (inherits(p, "try-error")) NA_real_ else p
}

best_family_flag <- function(dispersion_poisson) {
  # decide if NB refit is needed based on M1 dispersion
  if (is.na(dispersion_poisson)) return("poisson")
  if (dispersion_poisson > 1.2) "nbinom2" else "poisson"
}
# Responses to process
responses <- response_guilds

results_list <- list()
recom_list <- list()

for (resp in responses) {
  # First pass: Poisson
  mods_p <- fit_models_once(df, resp, with_nb = FALSE)
  if (is.null(mods_p)) next
  met_p <- lapply(mods_p, model_metrics)
  met_p <- bind_rows(met_p, .id = "model_id") %>% mutate(response = resp, family = "poisson")

  # Overdispersion check on M1
  disp_M1 <- met_p %>% filter(model_id == "M1") %>% pull(dispersion_pearson)
  fam_choice <- best_family_flag(disp_M1)

  if (identical(fam_choice, "nbinom2")) {
    mods_nb <- fit_models_once(df, resp, with_nb = TRUE)
    met_nb <- lapply(mods_nb, model_metrics)
    met_nb <- bind_rows(met_nb, .id = "model_id") %>% mutate(response = resp, family = "nbinom2")
    met_all <- bind_rows(met_p, met_nb)
  } else {
    met_all <- met_p
  }

  # LRTs (use the family actually used for comparison; prefer NB if present)
  use_nb <- any(met_all$family == "nbinom2", na.rm = TRUE)
  mods_use <- if (use_nb) fit_models_once(df, resp, with_nb = TRUE) else mods_p
  p_M1_vs_M0   <- lrt_p(mods_use[["M1"]], mods_use[["M0"]])
  p_M1rs_vs_M1 <- lrt_p(mods_use[["M1rs"]], mods_use[["M1"]])

  # Linear vs spline by AIC within chosen family
  aic_lin <- met_all %>% filter(family == if (use_nb) "nbinom2" else "poisson", model_id == "M1") %>% pull(aic)
  aic_spl <- met_all %>% filter(family == if (use_nb) "nbinom2" else "poisson", model_id == "M2") %>% pull(aic)
  delta_aic_M2_vs_M1 <- if (length(aic_lin) == 1 && length(aic_spl) == 1) aic_lin - aic_spl else NA_real_

  # Recommendation rules
  fam_rec <- if (use_nb) "nbinom2" else "poisson"
  slope_rec <- if (!is.na(p_M1rs_vs_M1) && p_M1rs_vs_M1 <= 0.05) "random slope" else "random intercept only"
  shape_rec <- if (!is.na(delta_aic_M2_vs_M1) && delta_aic_M2_vs_M1 >= 2) "spline (ns(df=3))" else "linear"
  need_dist <- if (!is.na(p_M1_vs_M0) && p_M1_vs_M0 <= 0.05) TRUE else FALSE
  final_rec <- if (need_dist) paste(shape_rec, "+", slope_rec, "+", fam_rec) else paste("no distance effect (", fam_rec, ")")

  # Store recommendation
  recom_list[[resp]] <- tibble(
    response = resp,
    family_recommended = fam_rec,
    distance_effect_present = need_dist,
    prefer_random_slope = identical(slope_rec, "random slope"),
    prefer_spline = identical(shape_rec, "spline (ns(df=3))"),
    delta_aic_spline_vs_linear = delta_aic_M2_vs_M1,
    p_lrt_M1_vs_M0 = p_M1_vs_M0,
    p_lrt_M1rs_vs_M1 = p_M1rs_vs_M1,
    recommendation = final_rec
  )

  # Store model table
  results_list[[resp]] <- met_all %>% mutate(response = resp)
}
## Random effect variances not available. Returned R2 does not account for random effects.
## Random effect variances not available. Returned R2 does not account for random effects.
## Random effect variances not available. Returned R2 does not account for random effects.
## Random effect variances not available. Returned R2 does not account for random effects.
## Random effect variances not available. Returned R2 does not account for random effects.
## Random effect variances not available. Returned R2 does not account for random effects.
## Random effect variances not available. Returned R2 does not account for random effects.
## Random effect variances not available. Returned R2 does not account for random effects.
## Random effect variances not available. Returned R2 does not account for random effects.
## Random effect variances not available. Returned R2 does not account for random effects.
## Random effect variances not available. Returned R2 does not account for random effects.
## Random effect variances not available. Returned R2 does not account for random effects.
## Random effect variances not available. Returned R2 does not account for random effects.
## Random effect variances not available. Returned R2 does not account for random effects.
## Random effect variances not available. Returned R2 does not account for random effects.
glmm_distance_table <- bind_rows(results_list)
glmm_distance_recommend <- bind_rows(recom_list)
out1 <- file.path("..","data","glmm_distance_model_table.csv")
out2 <- file.path("..","data","glmm_distance_recommendation.csv")
readr::write_csv(glmm_distance_table, out1)
readr::write_csv(glmm_distance_recommend, out2)
glue::glue("GLMM distance model table: {normalizePath(out1)}")
## GLMM distance model table: C:\Users\urish\Desktop\R\biodiversity_community\arthopods 2022\guild_abundance\data\glmm_distance_model_table.csv
glue::glue("GLMM distance recommendation: {normalizePath(out2)}")
## GLMM distance recommendation: C:\Users\urish\Desktop\R\biodiversity_community\arthopods 2022\guild_abundance\data\glmm_distance_recommendation.csv
library(knitr)

# Compact view per response: show best AIC within chosen family
best_family <- glmm_distance_recommend %>% transmute(response, family = ifelse(family_recommended=="nbinom2","nbinom2","poisson"))
best_by <- glmm_distance_table %>% inner_join(best_family, by = c("response","family")) %>%
  group_by(response, family) %>% slice_min(aic, n = 1, with_ties = FALSE) %>% ungroup()

kable(best_by, caption = "Best distance structure per response (within recommended family)")
Best distance structure per response (within recommended family)
model_id aic logLik df dispersion_pearson r2_marginal r2_conditional response family
M2 234.2970 -111.1485 6 0.9784877 1.0000000 1 detritivorus nbinom2
M2 237.0952 -112.5476 6 0.8554230 1.0000000 NA herbivorous nbinom2
M0 284.5329 -139.2665 3 0.9788123 0.0000000 1 omnivorous nbinom2
M2 365.1417 -176.5709 6 1.2508474 1.0000000 NA predacious nbinom2
M2 431.3185 -209.6592 6 1.2180694 0.9500276 1 tot_abund_4_giuld nbinom2
M2 433.5504 -210.7752 6 1.3017909 0.9987793 1 tot_abund_all_giuld nbinom2
# Print concise recommendation lines
recs <- glmm_distance_recommend %>% mutate(
  message = sprintf("%s: %s", response, recommendation)
)
cat(paste0(recs$message, collapse = "\n"))
## tot_abund_all_giuld: no distance effect ( nbinom2 )
## tot_abund_4_giuld: no distance effect ( nbinom2 )
## herbivorous: spline (ns(df=3)) + random intercept only + nbinom2
## predacious: no distance effect ( nbinom2 )
## omnivorous: no distance effect ( nbinom2 )
## detritivorus: no distance effect ( nbinom2 )

Data support for distance and partial effect visualization

library(dplyr)
distance_support <- df %>%
  transmute(group = .data[[group_var]], distance = distance) %>%
  filter(!is.na(group), !is.na(distance)) %>%
  group_by(group) %>%
  summarise(n = n(), unique_distance = n_distinct(distance), .groups = "drop") %>%
  arrange(desc(unique_distance))
knitr::kable(distance_support, caption = "Unique distance values and coverage by group")
Unique distance values and coverage by group
group n unique_distance
center 19 4
north 12 3
south 13 3
east 4 1
west 5 1
library(ggplot2)

# Helper: build newdata grid for predictions holding offset and grouping reasonable
make_grid <- function(data, resp, family_use, use_spline = FALSE, grid_n = 100) {
  d <- data %>% filter(!is.na(.data[[resp]]), !is.na(distance), !is.na(.data[[group_var]]), night_traps > 0)
  if (nrow(d) == 0) return(NULL)
  rng <- range(d$distance, na.rm = TRUE)
  grid <- tibble(
    distance = seq(rng[1], rng[2], length.out = grid_n),
    night_traps = median(d$night_traps, na.rm = TRUE)
  )
  # choose most frequent group as representative for conditional predictions
  gstar <- d %>% count(.data[[group_var]], sort = TRUE) %>% slice_head(n = 1) %>% pull(1)
  grid[[group_var]] <- gstar
  grid
}

fit_and_predict <- function(data, resp, family_use, model_type = c("linear","spline"), grid_n = 100) {
  model_type <- match.arg(model_type)
  with_nb <- identical(family_use, "nbinom2")
  mods <- fit_models_once(data, resp, with_nb = with_nb)
  mdl <- if (model_type == "linear") mods[["M1"]] else mods[["M2"]]
  if (inherits(mdl, "try-error")) return(NULL)
  newd <- make_grid(data, resp, family_use, use_spline = model_type=="spline", grid_n = grid_n)
  if (is.null(newd)) return(NULL)
  pr <- predict(mdl, newdata = newd, type = "link", se.fit = TRUE, allow.new.levels = TRUE)
  newd$fit <- pr$fit
  newd$se  <- pr$se.fit
  newd$lo  <- newd$fit - 1.96*newd$se
  newd$hi  <- newd$fit + 1.96*newd$se
  # back-transform from link to response scale
  newd$mu  <- exp(newd$fit)
  newd$mu_lo <- exp(newd$lo)
  newd$mu_hi <- exp(newd$hi)
  newd$model_type <- model_type
  newd$response <- resp
  newd$family <- family_use
  newd
}
# Determine which family to use per response from recommendation
fam_map <- glmm_distance_recommend %>% transmute(response, family = ifelse(family_recommended=="nbinom2","nbinom2","poisson"))

pred_linear <- lapply(fam_map$response, function(r) fit_and_predict(df, r, fam_map$family[fam_map$response==r], model_type = "linear"))
pred_spline <- lapply(fam_map$response, function(r) fit_and_predict(df, r, fam_map$family[fam_map$response==r], model_type = "spline"))
pred_linear <- bind_rows(pred_linear)
pred_spline <- bind_rows(pred_spline)
plot_effect <- function(pred_df, title_suffix) {
  if (is.null(pred_df) || nrow(pred_df) == 0) return(NULL)
  ggplot(pred_df, aes(distance, mu)) +
    geom_ribbon(aes(ymin = mu_lo, ymax = mu_hi), alpha = 0.2) +
    geom_line(size = 1) +
    facet_wrap(~response, scales = "free_y") +
    labs(x = "Distance", y = "Expected count (offset-adjusted)", title = paste("Partial effect of distance -", title_suffix)) +
    theme_minimal(base_size = 12)
}

p_lin <- plot_effect(pred_linear, "linear (M1)")
p_spl <- plot_effect(pred_spline, "spline (M2)")

print(p_lin)

print(p_spl)

# Emphasize herbivorous
fam_h <- fam_map$family[fam_map$response=="herbivorous"]
pred_h_lin <- fit_and_predict(df, "herbivorous", fam_h, model_type = "linear")
pred_h_spl <- fit_and_predict(df, "herbivorous", fam_h, model_type = "spline")

if (!is.null(pred_h_lin)) {
  ggplot(pred_h_lin, aes(distance, mu)) +
    geom_ribbon(aes(ymin = mu_lo, ymax = mu_hi), alpha = 0.2, fill = "steelblue") +
    geom_line(size = 1, color = "steelblue4") +
    labs(x = "Distance", y = "Expected count (offset-adjusted)", title = "Herbivorous: linear (M1)") +
    theme_minimal(base_size = 12) -> p1
  print(p1)
}

if (!is.null(pred_h_spl)) {
  ggplot(pred_h_spl, aes(distance, mu)) +
    geom_ribbon(aes(ymin = mu_lo, ymax = mu_hi), alpha = 0.2, fill = "darkorange") +
    geom_line(size = 1, color = "darkorange4") +
    labs(x = "Distance", y = "Expected count (offset-adjusted)", title = "Herbivorous: spline (M2)") +
    theme_minimal(base_size = 12) -> p2
  print(p2)
}

# Inspect random effects variances and CIs in chosen family
library(broom.mixed)

re_table <- lapply(fam_map$response, function(r) {
  fam_vec <- fam_map$family[fam_map$response==r]
  fam_use <- if (length(fam_vec) >= 1) fam_vec[[1]] else "poisson"
  with_nb <- identical(fam_use, "nbinom2")
  mods <- fit_models_once(df, r, with_nb = with_nb)
  # choose model by recommendation shape and slope
  rec <- glmm_distance_recommend %>% filter(response == r)
  use_model <- if (isTRUE(rec$prefer_spline)) "M2" else "M1"
  use_model <- if (isTRUE(rec$prefer_random_slope)) ifelse(use_model=="M1","M1rs","M1rs") else use_model
  mdl <- mods[[use_model]]
  if (inherits(mdl, "try-error")) return(NULL)
  broom.mixed::tidy(mdl, effects = "ran_pars") %>% mutate(response = r, model_id = use_model)
})
re_table <- bind_rows(re_table)
knitr::kable(re_table, caption = "Random effects variances and CIs (chosen model per response)")
Random effects variances and CIs (chosen model per response)
effect component group term estimate response model_id
ran_pars cond subplot sd__(Intercept) 0.0158589 tot_abund_all_giuld M2
ran_pars cond subplot sd__(Intercept) 0.1097745 tot_abund_4_giuld M2
ran_pars cond subplot sd__(Intercept) 0.0000288 herbivorous M2
ran_pars cond subplot sd__(Intercept) 0.0000196 predacious M2
ran_pars cond subplot sd__(Intercept) 0.0001186 omnivorous M1
ran_pars cond subplot sd__(Intercept) 0.0001179 detritivorus M2
# AIC weights within chosen family per response
aic_weights <- glmm_distance_table %>%
  group_by(response, family) %>%
  mutate(delta = aic - min(aic, na.rm = TRUE)) %>%
  mutate(weight = exp(-0.5*delta) / sum(exp(-0.5*delta), na.rm = TRUE)) %>%
  ungroup()
knitr::kable(aic_weights %>% dplyr::select(response, family, model_id, aic, delta, weight), caption = "AIC weights within family")
AIC weights within family
response family model_id aic delta weight
tot_abund_all_giuld poisson M0 841.1393 105.556906 0.0000000
tot_abund_all_giuld poisson M1 836.5981 101.015728 0.0000000
tot_abund_all_giuld poisson M1rs 837.7858 102.203421 0.0000000
tot_abund_all_giuld poisson M2 735.5824 0.000000 1.0000000
tot_abund_all_giuld nbinom2 M0 448.3860 14.835600 0.0005997
tot_abund_all_giuld nbinom2 M1 448.3884 14.838001 0.0005990
tot_abund_all_giuld nbinom2 M1rs NA NA NA
tot_abund_all_giuld nbinom2 M2 433.5504 0.000000 0.9988012
tot_abund_4_giuld poisson M0 841.1493 108.518967 0.0000000
tot_abund_4_giuld poisson M1 840.4542 107.823954 0.0000000
tot_abund_4_giuld poisson M1rs 842.5865 109.956199 0.0000000
tot_abund_4_giuld poisson M2 732.6303 0.000000 1.0000000
tot_abund_4_giuld nbinom2 M0 446.1718 14.853338 0.0005945
tot_abund_4_giuld nbinom2 M1 446.9762 15.657711 0.0003977
tot_abund_4_giuld nbinom2 M1rs 449.9641 18.645640 0.0000893
tot_abund_4_giuld nbinom2 M2 431.3185 0.000000 0.9989186
herbivorous poisson M0 488.0464 110.349251 0.0000000
herbivorous poisson M1 405.5023 27.805144 0.0000009
herbivorous poisson M1rs 384.8068 7.109681 0.0277915
herbivorous poisson M2 377.6971 0.000000 0.9722076
herbivorous nbinom2 M0 254.0120 16.916775 0.0001918
herbivorous nbinom2 M1 241.5904 4.495170 0.0955397
herbivorous nbinom2 M1rs NA NA NA
herbivorous nbinom2 M2 237.0952 0.000000 0.9042685
predacious poisson M0 602.5644 77.162530 0.0000000
predacious poisson M1 599.0522 73.650323 0.0000000
predacious poisson M1rs 582.2178 56.815877 0.0000000
predacious poisson M2 525.4019 0.000000 1.0000000
predacious nbinom2 M0 384.0768 18.935028 0.0000773
predacious nbinom2 M1 385.9299 20.788214 0.0000306
predacious nbinom2 M1rs 385.6359 20.494144 0.0000355
predacious nbinom2 M2 365.1417 0.000000 0.9998566
omnivorous poisson M0 607.6419 51.666133 0.0000000
omnivorous poisson M1 608.2831 52.307332 0.0000000
omnivorous poisson M1rs 605.8990 49.923185 0.0000000
omnivorous poisson M2 555.9758 0.000000 1.0000000
omnivorous nbinom2 M0 284.5329 0.000000 0.6331801
omnivorous nbinom2 M1 286.4030 1.870046 0.2485719
omnivorous nbinom2 M1rs NA NA NA
omnivorous nbinom2 M2 287.8889 3.355940 0.1182481
detritivorus poisson M0 289.5240 34.781456 0.0000000
detritivorus poisson M1 286.8283 32.085754 0.0000001
detritivorus poisson M1rs NA NA NA
detritivorus poisson M2 254.7425 0.000000 0.9999999
detritivorus nbinom2 M0 244.0422 9.745215 0.0074918
detritivorus nbinom2 M1 243.2360 8.939009 0.0112111
detritivorus nbinom2 M1rs 246.3055 12.008481 0.0024161
detritivorus nbinom2 M2 234.2970 0.000000 0.9788810
# Verdict per guild: meteorology (g2) vs pesticide (g3) vs both (g23) using NB2 GLM AIC

# Helper to pull best of g2/g3/g23 for each response
choose_block <- function(tbl, resp) {
  sub <- tbl %>% filter(response == resp, model_group %in% c("g2","g3","g23"))
  if (nrow(sub) == 0) return(NULL)
  best <- sub %>% slice_min(aic, n = 1, with_ties = FALSE)
  # Compute deltas vs each candidate
  deltas <- sub %>% mutate(delta = aic - min(aic, na.rm = TRUE)) %>% dplyr::select(model_group, delta)
  list(best = best, deltas = deltas)
}

verdict_rows <- lapply(response_guilds, function(resp) {
  pick <- choose_block(glm_nb2_results, resp)
  if (is.null(pick)) return(NULL)
  best_group <- pick$best$model_group
  deltas <- pick$deltas
  # Rule: if g23 beats others by ΔAIC >= 2 -> "both"; else choose min of g2 vs g3; if ΔAIC < 2 -> "comparable"
  d_g2 <- deltas$delta[deltas$model_group=="g2"]; d_g3 <- deltas$delta[deltas$model_group=="g3"]; d_g23 <- deltas$delta[deltas$model_group=="g23"]
  decision <- NA_character_
  if (length(d_g23)==1 && d_g23 == 0 && ((length(d_g2)==1 && d_g2 >= 2) || (length(d_g3)==1 && d_g3 >= 2))) {
    decision <- "both"
  } else {
    # compare g2 vs g3
    if (length(d_g2)==1 && length(d_g3)==1) {
      if (abs(d_g2 - d_g3) < 2) decision <- "comparable" else decision <- if (d_g2 < d_g3) "meteorology" else "pesticide"
    } else if (length(d_g2)==1) {
      decision <- "meteorology"
    } else if (length(d_g3)==1) {
      decision <- "pesticide"
    } else {
      decision <- "insufficient"
    }
  }
  tibble(response = resp, best_group = best_group, decision = decision,
         delta_g2 = ifelse(length(d_g2)==1, d_g2, NA_real_),
         delta_g3 = ifelse(length(d_g3)==1, d_g3, NA_real_),
         delta_g23 = ifelse(length(d_g23)==1, d_g23, NA_real_))
})

verdict_tbl <- bind_rows(verdict_rows)

# Attach significant terms (p < 0.05) from NB2 GLM for the best model per response
sig_terms <- glm_nb2_coefs %>% mutate(term = as.character(term)) %>%
  inner_join(verdict_tbl %>% dplyr::select(response, best_group), by = "response") %>%
  filter(model_group == best_group, !grepl("^(Intercept|offset\\(log\\(night_traps\\)\\))$", term)) %>%
  mutate(significant = ifelse(!is.na(p.value) & p.value < 0.05, TRUE, FALSE)) %>%
  group_by(response) %>% summarise(
    significant_terms = paste(term[significant], collapse = ", "), .groups = "drop"
  )

verdict_tbl <- verdict_tbl %>% left_join(sig_terms, by = "response")

out_verdict <- file.path("..","data","glm_nb2_verdict_meteo_vs_pesticide.csv")
readr::write_csv(verdict_tbl, out_verdict)
glue::glue("NB2 GLM verdict table written to: {normalizePath(out_verdict)}")
## NB2 GLM verdict table written to: C:\Users\urish\Desktop\R\biodiversity_community\arthopods 2022\guild_abundance\data\glm_nb2_verdict_meteo_vs_pesticide.csv
knitr::kable(verdict_tbl, caption = "Per-guild verdict: meteorology vs pesticide vs both (NB2 GLM)")
Per-guild verdict: meteorology vs pesticide vs both (NB2 GLM)
response best_group decision delta_g2 delta_g3 delta_g23 significant_terms
tot_abund_all_giuld g23 both 5.712512 4.804434 0.0000000 (Intercept), temp_avg_avg, rh_avg_avg, days_since_april_18, spirodi_polyalky_dsa_res
tot_abund_4_giuld g23 both 5.513715 4.853373 0.0000000 (Intercept), temp_avg_avg, rh_avg_avg, days_since_april_18, spirodi_polyalky_dsa_res
herbivorous g2 meteorology 0.000000 6.564048 1.9055104 (Intercept), temp_avg_avg, rh_avg_avg, days_since_april_18
predacious g23 both 13.776714 7.633792 0.0000000 rh_avg_avg, days_since_april_18, spirodi_polyalky_dsa_res
omnivorous g3 pesticide 8.133741 0.000000 4.6036563 (Intercept), spirodi_polyalky_dsa_res
detritivorus g2 meteorology 0.000000 10.487319 0.7440909 (Intercept), temp_avg_avg, rh_avg_avg
# Optional: simple K-fold CV for linear vs spline within family (leave as example; set K small if n is small)
set.seed(1)
K <- 5

cv_results <- list()
for (r in fam_map$response) {
  fam_vec <- fam_map$family[fam_map$response==r]
  fam_use <- if (length(fam_vec) >= 1) fam_vec[[1]] else "poisson"
  with_nb <- identical(fam_use, "nbinom2")
  dat <- df %>% filter(!is.na(.data[[r]]), !is.na(distance), night_traps > 0)
  if (nrow(dat) < K) next
  folds <- sample(rep(1:K, length.out = nrow(dat)))
  for (k in 1:K) {
    train <- dat[folds != k, , drop = FALSE]
    test  <- dat[folds == k, , drop = FALSE]
    mods_tr <- fit_models_once(train, r, with_nb = with_nb)
    for (m in c("M1","M2")) {
      mdl <- mods_tr[[m]]
      if (inherits(mdl, "try-error")) next
      pr <- try(predict(mdl, newdata = test, type = "response", allow.new.levels = TRUE), silent = TRUE)
      if (inherits(pr, "try-error")) next
      y <- test[[r]]
      # Use Poisson or NB log-likelihood approximation for scoring
      eps <- 1e-9
      ll <- sum(dpois(y, lambda = pmax(pr, eps), log = TRUE))
      cv_results[[length(cv_results)+1]] <- tibble(response = r, model_id = m, fold = k, logLik = ll)
    }
  }
}
cv_tbl <- bind_rows(cv_results)
cv_summary <- cv_tbl %>% group_by(response, model_id) %>% summarise(mean_logLik = mean(logLik), .groups = "drop")
knitr::kable(cv_summary, caption = "K-fold CV (log-likelihood proxy) for linear vs spline")
K-fold CV (log-likelihood proxy) for linear vs spline
response model_id mean_logLik
detritivorus M1 -30.20069
detritivorus M2 -27.60786
herbivorous M1 -44.36134
herbivorous M2 -42.38044
omnivorous M1 -68.79555
omnivorous M2 -77.73540
predacious M1 -71.70632
predacious M2 -55.75842
tot_abund_4_giuld M1 -104.75498
tot_abund_4_giuld M2 -94.97090
tot_abund_all_giuld M1 -103.45030
tot_abund_all_giuld M2 -82.72418