Linear regression models were fit for all four disaster count
responses (integers >= 0) using polynomial terms in
delta.Temp (dt) and Year (Y) up to full 2nd
order. Plausibility required all non-intercept 95% CIs to
exclude zero. Best models selected are selected by using lowest
AIC and PRESS.
NOAAGISSWD <- read.csv("NOAAGISSWD.csv")
# Shorthand predictors
NOAAGISSWD$dt <- NOAAGISSWD$delta.Temp
NOAAGISSWD$Y <- NOAAGISSWD$year
# Create All.Disaster.Count (sum of all disaster type counts)
NOAAGISSWD$All.Disaster.Count <-
NOAAGISSWD$count_drought +
NOAAGISSWD$count_flooding +
NOAAGISSWD$count_freeze +
NOAAGISSWD$count_severe.storm +
NOAAGISSWD$count_tropical.cyclone +
NOAAGISSWD$count_wildfire +
NOAAGISSWD$count_winter.storm
#Define Candidate Models and Response Variables
# Response variables
responses <- c("count_wildfire",
"count_drought",
"All.Disaster.Count",
"count_severe.storm")
# Candidate right-hand side formulas
rhs_list <- c(
"dt",
"Y",
"dt + Y",
"dt + Y + dt:Y",
"dt + Y + I(dt^2)",
"dt + Y + I(Y^2)",
"dt + Y + I(dt^2) + I(Y^2)",
"dt + Y + dt:Y + I(dt^2)",
"dt + Y + dt:Y + I(Y^2)",
"dt + Y + dt:Y + I(dt^2) + I(Y^2)"
)
#Helper Funcitons
# PRESS statistic
PRESS <- function(mod) {
e <- residuals(mod)
h <- hatvalues(mod)
sum((e / (1 - h))^2)
}
# Plausibility check: all non-intercept 95% CIs must exclude zero
plausible_model <- function(mod) {
ci <- confint(mod, level = 0.95)
ci <- ci[rownames(ci) != "(Intercept)", , drop = FALSE]
if (nrow(ci) == 0) return(FALSE)
all(ci[, 1] > 0 | ci[, 2] < 0)
}
# Plausibility check using bootstrap CIs (excludes intercept)
plausible_boot <- function(boot_result) {
ci <- boot_result$ci
ci <- ci[rownames(ci) != "(Intercept)", , drop = FALSE]
if (nrow(ci) == 0) return(FALSE)
all(ci[, 1] > 0 | ci[, 2] < 0)
}
count_wildfire)resp <- "count_wildfire"
results <- data.frame()
models <- list()
for (rhs in rhs_list) {
f <- as.formula(paste(resp, "~", rhs))
mod <- lm(f, data = NOAAGISSWD)
models[[rhs]] <- mod
results <- rbind(results, data.frame(
Formula = paste(resp, "~", rhs),
Plausible = plausible_model(mod),
AIC = round(AIC(mod), 3),
PRESS = round(PRESS(mod), 3)
))
}
print(results, row.names = FALSE)
## Formula Plausible AIC PRESS
## count_wildfire ~ dt TRUE 53.850 8.126
## count_wildfire ~ Y TRUE 51.303 7.681
## count_wildfire ~ dt + Y FALSE 53.246 7.970
## count_wildfire ~ dt + Y + dt:Y FALSE 55.222 8.235
## count_wildfire ~ dt + Y + I(dt^2) FALSE 55.246 8.244
## count_wildfire ~ dt + Y + I(Y^2) FALSE 55.205 8.220
## count_wildfire ~ dt + Y + I(dt^2) + I(Y^2) FALSE 57.174 8.443
## count_wildfire ~ dt + Y + dt:Y + I(dt^2) FALSE 57.053 8.447
## count_wildfire ~ dt + Y + dt:Y + I(Y^2) FALSE 57.203 8.424
## count_wildfire ~ dt + Y + dt:Y + I(dt^2) + I(Y^2) FALSE 58.567 8.743
plausible_wf <- results[results$Plausible == TRUE, ]
print(plausible_wf, row.names = FALSE)
## Formula Plausible AIC PRESS
## count_wildfire ~ dt TRUE 53.850 8.126
## count_wildfire ~ Y TRUE 51.303 7.681
plausible_wf <- plausible_wf[order(plausible_wf$AIC, plausible_wf$PRESS), ]
best_rhs_wf <- sub(paste0(resp, " ~ "), "", plausible_wf$Formula[1])
best_mod_wf <- models[[best_rhs_wf]]
cat("Best Model Formula:\n")
## Best Model Formula:
print(formula(best_mod_wf))
## count_wildfire ~ Y
summary(best_mod_wf)
##
## Call:
## lm(formula = f, data = NOAAGISSWD)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.78557 -0.26364 -0.01027 0.27752 0.76503
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -45.419056 9.003535 -5.045 8.32e-06 ***
## Y 0.022942 0.004496 5.103 6.86e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4048 on 44 degrees of freedom
## Multiple R-squared: 0.3718, Adjusted R-squared: 0.3575
## F-statistic: 26.04 on 1 and 44 DF, p-value: 6.865e-06
confint(best_mod_wf)
## 2.5 % 97.5 %
## (Intercept) -63.56448804 -27.27362482
## Y 0.01388053 0.03200291
cat("AIC :", round(AIC(best_mod_wf), 4), "\n")
## AIC : 51.3034
cat("PRESS :", round(PRESS(best_mod_wf), 4), "\n")
## PRESS : 7.6812
Wildfire Count: Year alone best predicts wildfire
counts (R² = 0.372, AIC = 51.30, PRESS = 7.68), with wildfires
increasing significantly by approximately 0.023 per year (95% CI: 0.014,
0.032), selected over the dt-only model by both lower AIC
and PRESS.
count_drought)resp <- "count_drought"
results <- data.frame()
models <- list()
for (rhs in rhs_list) {
f <- as.formula(paste(resp, "~", rhs))
mod <- lm(f, data = NOAAGISSWD)
models[[rhs]] <- mod
results <- rbind(results, data.frame(
Formula = paste(resp, "~", rhs),
Plausible = plausible_model(mod),
AIC = round(AIC(mod), 3),
PRESS = round(PRESS(mod), 3)
))
}
print(results, row.names = FALSE)
## Formula Plausible AIC PRESS
## count_drought ~ dt TRUE 56.433 8.748
## count_drought ~ Y TRUE 55.658 8.647
## count_drought ~ dt + Y FALSE 57.641 8.950
## count_drought ~ dt + Y + dt:Y FALSE 58.819 9.127
## count_drought ~ dt + Y + I(dt^2) FALSE 58.866 9.140
## count_drought ~ dt + Y + I(Y^2) FALSE 59.348 9.271
## count_drought ~ dt + Y + I(dt^2) + I(Y^2) FALSE 60.857 9.549
## count_drought ~ dt + Y + dt:Y + I(dt^2) FALSE 60.813 9.477
## count_drought ~ dt + Y + dt:Y + I(Y^2) FALSE 60.507 9.534
## count_drought ~ dt + Y + dt:Y + I(dt^2) + I(Y^2) FALSE 60.880 9.834
plausible_dr <- results[results$Plausible == TRUE, ]
print(plausible_dr, row.names = FALSE)
## Formula Plausible AIC PRESS
## count_drought ~ dt TRUE 56.433 8.748
## count_drought ~ Y TRUE 55.658 8.647
plausible_dr <- plausible_dr[order(plausible_dr$AIC, plausible_dr$PRESS), ]
best_rhs_dr <- sub(paste0(resp, " ~ "), "", plausible_dr$Formula[1])
best_mod_dr <- models[[best_rhs_dr]]
cat("Best Model Formula:\n")
## Best Model Formula:
print(formula(best_mod_dr))
## count_drought ~ Y
summary(best_mod_dr)
##
## Call:
## lm(formula = f, data = NOAAGISSWD)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.9341 -0.4448 0.1250 0.3089 0.5782
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -25.587419 9.439947 -2.711 0.00954 **
## Y 0.013136 0.004714 2.787 0.00783 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4245 on 44 degrees of freedom
## Multiple R-squared: 0.15, Adjusted R-squared: 0.1307
## F-statistic: 7.765 on 1 and 44 DF, p-value: 0.007832
confint(best_mod_dr)
## 2.5 % 97.5 %
## (Intercept) -44.612381637 -6.56245648
## Y 0.003635588 0.02263638
cat("AIC :", round(AIC(best_mod_dr), 4), "\n")
## AIC : 55.6581
cat("PRESS :", round(PRESS(best_mod_dr), 4), "\n")
## PRESS : 8.6469
Drought Count: Year alone best predicts drought
counts (R² = 0.150, AIC = 55.66, PRESS =8.65), with a modest but
significant increase of approximately 0.013 droughts per year (95% CI:
0.004, 0.023), confirmed as the best plausible model by both lower AIC
and PRESS over the dt-only alternative, though the low R²
indicates year explains only a small portion of drought variability.
All.Disaster.Count)resp <- "All.Disaster.Count"
results <- data.frame()
models <- list()
for (rhs in rhs_list) {
f <- as.formula(paste(resp, "~", rhs))
mod <- lm(f, data = NOAAGISSWD)
models[[rhs]] <- mod
results <- rbind(results, data.frame(
Formula = paste(resp, "~", rhs),
Plausible = plausible_model(mod),
AIC = round(AIC(mod), 3),
PRESS = round(PRESS(mod), 3)
))
}
print(results, row.names = FALSE)
## Formula Plausible AIC
## All.Disaster.Count ~ dt TRUE 254.277
## All.Disaster.Count ~ Y TRUE 257.078
## All.Disaster.Count ~ dt + Y FALSE 253.365
## All.Disaster.Count ~ dt + Y + dt:Y FALSE 227.785
## All.Disaster.Count ~ dt + Y + I(dt^2) TRUE 230.310
## All.Disaster.Count ~ dt + Y + I(Y^2) FALSE 232.745
## All.Disaster.Count ~ dt + Y + I(dt^2) + I(Y^2) TRUE 227.422
## All.Disaster.Count ~ dt + Y + dt:Y + I(dt^2) FALSE 229.641
## All.Disaster.Count ~ dt + Y + dt:Y + I(Y^2) FALSE 229.452
## All.Disaster.Count ~ dt + Y + dt:Y + I(dt^2) + I(Y^2) TRUE 224.163
## PRESS
## 656.249
## 701.015
## 649.221
## 371.043
## 387.443
## 417.759
## 360.317
## 378.232
## 380.496
## 359.214
plausible_ad <- results[results$Plausible == TRUE, ]
print(plausible_ad, row.names = FALSE)
## Formula Plausible AIC
## All.Disaster.Count ~ dt TRUE 254.277
## All.Disaster.Count ~ Y TRUE 257.078
## All.Disaster.Count ~ dt + Y + I(dt^2) TRUE 230.310
## All.Disaster.Count ~ dt + Y + I(dt^2) + I(Y^2) TRUE 227.422
## All.Disaster.Count ~ dt + Y + dt:Y + I(dt^2) + I(Y^2) TRUE 224.163
## PRESS
## 656.249
## 701.015
## 387.443
## 360.317
## 359.214
plausible_ad <- plausible_ad[order(plausible_ad$AIC, plausible_ad$PRESS), ]
best_rhs_ad <- sub(paste0(resp, " ~ "), "", plausible_ad$Formula[1])
best_mod_ad <- models[[best_rhs_ad]]
cat("Best Model Formula:\n")
## Best Model Formula:
print(formula(best_mod_ad))
## All.Disaster.Count ~ dt + Y + dt:Y + I(dt^2) + I(Y^2)
summary(best_mod_ad)
##
## Call:
## lm(formula = f, data = NOAAGISSWD)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.9592 -1.9396 -0.6903 1.3465 6.0609
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.704e+05 6.417e+04 2.656 0.0113 *
## dt 6.673e+03 3.040e+03 2.195 0.0340 *
## Y -1.726e+02 6.496e+01 -2.656 0.0113 *
## I(dt^2) 9.291e+01 3.545e+01 2.621 0.0123 *
## I(Y^2) 4.369e-02 1.644e-02 2.657 0.0113 *
## dt:Y -3.387e+00 1.539e+00 -2.201 0.0335 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.548 on 40 degrees of freedom
## Multiple R-squared: 0.8815, Adjusted R-squared: 0.8666
## F-statistic: 59.49 on 5 and 40 DF, p-value: < 2.2e-16
confint(best_mod_ad)
## 2.5 % 97.5 %
## (Intercept) 4.071427e+04 3.000974e+05
## dt 5.285433e+02 1.281736e+04
## Y -3.038527e+02 -4.126945e+01
## I(dt^2) 2.125804e+01 1.645543e+02
## I(Y^2) 1.045935e-02 7.691500e-02
## dt:Y -6.496731e+00 -2.772369e-01
cat("AIC :", round(AIC(best_mod_ad), 4), "\n")
## AIC : 224.1626
cat("PRESS :", round(PRESS(best_mod_ad), 4), "\n")
## PRESS : 359.2137
All Disaster Count: The full second-order model
dt + Y + dt:Y + I(dt²) + I(Y²) best predicts total disaster
counts (R² = 0.882, AIC = 224.16, PRESS = 359.21), with all coefficients
significant and confidence intervals excluding zero (dt: 95% CI: 529,
12817; Y: 95% CI: −304, −41; I(dt²): 95% CI: 21.3, 164.6; I(Y²): 95% CI:
0.010, 0.077; dt:Y: 95% CI: −6.50, −0.28), selected over all simpler
plausible models by both the lowest AIC and PRESS.
count_severe.storm)resp <- "count_severe.storm"
results <- data.frame()
models <- list()
for (rhs in rhs_list) {
f <- as.formula(paste(resp, "~", rhs))
mod <- lm(f, data = NOAAGISSWD)
models[[rhs]] <- mod
results <- rbind(results, data.frame(
Formula = paste(resp, "~", rhs),
Plausible = plausible_model(mod),
AIC = round(AIC(mod), 3),
PRESS = round(PRESS(mod), 3)
))
}
print(results, row.names = FALSE)
## Formula Plausible AIC
## count_severe.storm ~ dt TRUE 226.474
## count_severe.storm ~ Y TRUE 232.258
## count_severe.storm ~ dt + Y FALSE 226.672
## count_severe.storm ~ dt + Y + dt:Y FALSE 187.047
## count_severe.storm ~ dt + Y + I(dt^2) TRUE 195.593
## count_severe.storm ~ dt + Y + I(Y^2) FALSE 193.923
## count_severe.storm ~ dt + Y + I(dt^2) + I(Y^2) TRUE 186.596
## count_severe.storm ~ dt + Y + dt:Y + I(dt^2) FALSE 188.869
## count_severe.storm ~ dt + Y + dt:Y + I(Y^2) FALSE 187.942
## count_severe.storm ~ dt + Y + dt:Y + I(dt^2) + I(Y^2) FALSE 187.359
## PRESS
## 367.375
## 414.684
## 366.829
## 156.081
## 195.686
## 185.510
## 155.399
## 161.157
## 158.167
## 176.797
plausible_ss <- results[results$Plausible == TRUE, ]
print(plausible_ss, row.names = FALSE)
## Formula Plausible AIC PRESS
## count_severe.storm ~ dt TRUE 226.474 367.375
## count_severe.storm ~ Y TRUE 232.258 414.684
## count_severe.storm ~ dt + Y + I(dt^2) TRUE 195.593 195.686
## count_severe.storm ~ dt + Y + I(dt^2) + I(Y^2) TRUE 186.596 155.399
plausible_ss <- plausible_ss[order(plausible_ss$AIC, plausible_ss$PRESS), ]
best_rhs_ss <- sub(paste0(resp, " ~ "), "", plausible_ss$Formula[1])
best_mod_ss <- models[[best_rhs_ss]]
cat("Best Model Formula:\n")
## Best Model Formula:
print(formula(best_mod_ss))
## count_severe.storm ~ dt + Y + I(dt^2) + I(Y^2)
summary(best_mod_ss)
##
## Call:
## lm(formula = f, data = NOAAGISSWD)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.5188 -0.9286 -0.0850 0.9595 4.3506
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.318e+04 1.012e+04 3.279 0.00213 **
## dt -1.401e+01 5.518e+00 -2.539 0.01501 *
## Y -3.345e+01 1.013e+01 -3.303 0.00199 **
## I(dt^2) 1.164e+01 3.833e+00 3.036 0.00416 **
## I(Y^2) 8.430e-03 2.533e-03 3.328 0.00186 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.71 on 41 degrees of freedom
## Multiple R-squared: 0.9034, Adjusted R-squared: 0.894
## F-statistic: 95.89 on 4 and 41 DF, p-value: < 2.2e-16
confint(best_mod_ss)
## 2.5 % 97.5 %
## (Intercept) 1.274512e+04 5.361190e+04
## dt -2.515292e+01 -2.865761e+00
## Y -5.389477e+01 -1.299665e+01
## I(dt^2) 3.895148e+00 1.937664e+01
## I(Y^2) 3.313726e-03 1.354565e-02
cat("AIC :", round(AIC(best_mod_ss), 4), "\n")
## AIC : 186.5957
cat("PRESS :", round(PRESS(best_mod_ss), 4), "\n")
## PRESS : 155.3994
Severe Storm Count: The model
dt + Y + I(dt²) + I(Y²) best predicts severe storm counts
(R² = 0.903, AIC = 186.60, PRESS = 155.40), with all coefficients
significant and confidence intervals excluding zero (dt: 95% CI: −25.15,
−2.87; Y: 95% CI: −53.89, −13.00; I(dt²): 95% CI: 3.90, 19.38; I(Y²):
95% CI: 0.003, 0.014), chosen over the full model including
dt:Y because that interaction term’s confidence interval
crossed zero, making it implausible, and confirmed by both lower AIC and
PRESS.
summary_table <- data.frame(
Response = c("count_wildfire",
"count_drought",
"All.Disaster.Count",
"count_severe.storm"),
Best_Model = c(
"~ Y",
"~ Y",
"~ dt + Y + dt:Y + I(dt^2) + I(Y^2)",
"~ dt + Y + I(dt^2) + I(Y^2)"
),
R_Squared = c(
round(summary(best_mod_wf)$r.squared, 3),
round(summary(best_mod_dr)$r.squared, 3),
round(summary(best_mod_ad)$r.squared, 3),
round(summary(best_mod_ss)$r.squared, 3)
),
AIC = c(
round(AIC(best_mod_wf), 3),
round(AIC(best_mod_dr), 3),
round(AIC(best_mod_ad), 3),
round(AIC(best_mod_ss), 3)
),
PRESS = c(
round(PRESS(best_mod_wf), 3),
round(PRESS(best_mod_dr), 3),
round(PRESS(best_mod_ad), 3),
round(PRESS(best_mod_ss), 3)
)
)
print(summary_table, row.names = FALSE)
## Response Best_Model R_Squared AIC
## count_wildfire ~ Y 0.372 51.303
## count_drought ~ Y 0.150 55.658
## All.Disaster.Count ~ dt + Y + dt:Y + I(dt^2) + I(Y^2) 0.881 224.163
## count_severe.storm ~ dt + Y + I(dt^2) + I(Y^2) 0.903 186.596
## PRESS
## 7.681
## 8.647
## 359.214
## 155.399
Wildfire and Drought counts are best explained by Year alone, indicating a simple linear upward trend over time. Temperature adds no significant independent information once Year is included.
All Disaster Count requires the full 2nd
order model including the interaction term dt:Y,
achieving R² = 0.882. The interaction suggests the combined effect of
warming and time is partially offsetting.
Severe Storm Count is best explained by
dt + Y + I(dt²) + I(Y²) (R² = 0.903), showing strong
nonlinear acceleration. The interaction term dt:Y was not
plausible for this response.