R Statistical Programming Code

Author

Troy Hanlon and Garrett Barnachez

#clear the workspace
rm(list = ls()) #clear environment
gc()            #clear unused memory
dev.off         #clear charts
cat("\f")     #clear console

# Set working directory
setwd("~/Desktop/BC/Summer 2025/ADEC7320/Final Project")

#Data provided in Google Drive link above
# 1. Load and process Natural Gas Data
gas_data_raw <- read_csv("Natural_Gas_Consumption_by_End_Use.csv", skip = 6)

gas_data <- gas_data_raw %>%
  rename(
    Commercial_Mcf = `Natural Gas Deliveries to Commercial Consumers (Including Vehicle Fuel through 1996) in Massachusetts MMcf`,
    Residential_Mcf = `Massachusetts Natural Gas Residential Consumption MMcf`,
    Industrial_Mcf = `Massachusetts Natural Gas Industrial Consumption MMcf`
  ) %>%
  filter(!is.na(Month)) %>%
  mutate(
    Month = my(Month),
    Year = year(Month),
    Month_Num = month(Month),
    Date = make_date(Year, Month_Num, 1)  # Explicit Date creation
  )

gas_long <- gas_data %>%
  pivot_longer(cols = c(Residential_Mcf, Commercial_Mcf, Industrial_Mcf),
               names_to = "Sector",
               values_to = "Consumption_Mcf") %>%
  mutate(
    Sector = str_remove(Sector, "_Mcf"),
    Fuel_Type = "Natural_Gas"
  )

# 2. Load and process Electricity Data
raw_data <- read_excel("sales_revenue.xlsx", col_names = FALSE)
cat("First 3 rows of raw data:\n")
print(head(raw_data, 3))

elec_ma <- read_excel("sales_revenue.xlsx", col_names = FALSE) %>%
  select(
    Year = ...1,
    Month = ...2,
    Resi_Sales_MWH = ...6,
    Com_Sales_MWH = ...10,
    Ind_Sales_MWH = ...14
  ) %>%
  filter(!Year %in% c("Year", "YEAR", "year"),
         !is.na(Year),
         !is.na(Month)) %>%
  mutate(
    Year = as.numeric(Year),
    Month = as.numeric(Month),
    Resi_Sales_MWH = as.numeric(gsub(",", "", Resi_Sales_MWH)),
    Com_Sales_MWH = as.numeric(gsub(",", "", Com_Sales_MWH)),
    Ind_Sales_MWH = as.numeric(gsub(",", "", Ind_Sales_MWH)),
    Date = make_date(Year, Month, 1)  # Explicit Date creation
  )

# Aggregate electricity data to monthly
elec_ma <- elec_ma %>%
  group_by(Year, Month) %>%
  summarise(
    Resi_Sales_MWH = sum(Resi_Sales_MWH, na.rm = TRUE),
    Com_Sales_MWH = sum(Com_Sales_MWH, na.rm = TRUE),
    Ind_Sales_MWH = sum(Ind_Sales_MWH, na.rm = TRUE),
    .groups = "drop"
  ) %>%
  mutate(Date = make_date(Year, Month, 1))

elec_long <- elec_ma %>%
  pivot_longer(
    cols = c(Resi_Sales_MWH, Com_Sales_MWH, Ind_Sales_MWH),
    names_to = "Sector",
    values_to = "Consumption_MWh"
  ) %>%
  mutate(
    Sector = case_when(
      Sector == "Resi_Sales_MWH" ~ "Residential",
      Sector == "Com_Sales_MWH" ~ "Commercial",
      Sector == "Ind_Sales_MWH" ~ "Industrial"
    ),
    Fuel_Type = "Electricity",
    Month_Num = Month
  )

# 3. Load and process Weather Data
weather <- read_excel("weather_vars.xlsx") %>%
  filter(!is.na(Month), !is.na(Year)) %>%
  mutate(
    Days_in_Month = days_in_month(ymd(paste(Year, Month, "01", sep="-"))),
    HDD = pmax(65 - `Avg Temp (°F)`, 0) * Days_in_Month,
    CDD = pmax(`Avg Temp (°F)` - 65, 0) * Days_in_Month,
    Date = make_date(Year, Month, 1),
    Avg_Temp_F = `Avg Temp (°F)`,
    Humidity_Pct = `Humidity (%)`
  )

# Filter data to 2010-2024 range
start_year <- 2010
end_year <- 2024

weather_filt <- weather %>%
  filter(Year >= start_year & Year <= end_year)

gas_filt <- gas_long %>%
  filter(Year >= start_year & Year <= end_year)

elec_filt <- elec_long %>%
  filter(Year >= start_year & Year <= end_year)

# Combine energy datasets with consistent Date column
energy_data <- bind_rows(
  gas_filt %>% select(Year, Month_Num, Sector, Fuel_Type, Consumption = Consumption_Mcf, Date),
  elec_filt %>% select(Year, Month_Num, Sector, Fuel_Type, Consumption = Consumption_MWh, Date)
)

# Create comprehensive merged dataset
merged_data <- energy_data %>%
  left_join(weather_filt %>% select(Date, Avg_Temp_F, Humidity_Pct, HDD, CDD), 
            by = "Date") %>%
  mutate(
    Season = case_when(
      Month_Num %in% c(12, 1, 2) ~ "Winter",
      Month_Num %in% c(3, 4, 5) ~ "Spring",
      Month_Num %in% c(6, 7, 8) ~ "Summer",
      Month_Num %in% c(9, 10, 11) ~ "Fall"
    ),
    Log_Consumption = log(Consumption + 1)  # Add log-transformed consumption
  )

# Final validation
cat("\nMerged Data Sample:\n")
print(head(merged_data))
cat("\nMissing values by column:\n")
print(colSums(is.na(merged_data)))
cat("\nNumber of observations by sector and fuel type:\n")
print(table(merged_data$Fuel_Type, merged_data$Sector))

# --- build data (same as before) ---
summary_stats <- merged_data %>%
  group_by(Fuel_Type, Sector) %>%
  summarise(
    N = n(),
    Mean_Consumption = mean(Consumption, na.rm = TRUE),
    SD_Consumption   = sd(Consumption,   na.rm = TRUE),
    Min_Consumption  = min(Consumption,  na.rm = TRUE),
    Max_Consumption  = max(Consumption,  na.rm = TRUE),
    Mean_Log_Consumption = mean(Log_Consumption, na.rm = TRUE),
    SD_Log_Consumption   = sd(Log_Consumption,   na.rm = TRUE),
    .groups = 'drop'
  ) %>%
  mutate(
    across(c(Mean_Consumption, SD_Consumption, Min_Consumption, Max_Consumption),
           ~ number(.x, big.mark = ",", accuracy = 0.1)),
    Mean_Log_Consumption = round(Mean_Log_Consumption, 2),
    SD_Log_Consumption   = round(SD_Log_Consumption, 2)
  )

# --- print table: fit-to-width + multipage + smaller font ---
kable(
  summary_stats,
  caption   = "Summary Statistics by Fuel Type and Sector",
  col.names = c("Fuel Type","Sector","N","Mean","SD","Min","Max","Mean (log)","SD (log)"),
  booktabs  = TRUE,
  longtable = TRUE,                 # allow page breaks
  align     = "llrrrrrrr"
) %>%
  kable_styling(
    full_width   = FALSE,
    font_size    = 9,               # slightly smaller
    latex_options = c("hold_position","repeat_header","scale_down")
    # scale_down shrinks to fit \textwidth if still too wide
  )
invisible(NULL)  # prevent accidental printing of last value in chunk

# ---- Weather summary (same idea) ----
weather_summary <- weather_filt %>%
  summarise(
    `Mean Temp (°F)` = round(mean(`Avg Temp (°F)`, na.rm = TRUE), 1),
    `SD Temp (°F)`   = round(sd(`Avg Temp (°F)`,   na.rm = TRUE), 1),
    `Mean Hum (%)`   = round(mean(`Humidity (%)`,  na.rm = TRUE), 1),
    `SD Hum (%)`     = round(sd(`Humidity (%)`,    na.rm = TRUE), 1),
    `Mean HDD`       = round(mean(HDD,             na.rm = TRUE), 1),
    `Mean CDD`       = round(mean(CDD,             na.rm = TRUE), 1)
  )

kable(
  weather_summary,
  caption   = "Weather Variables Summary Statistics (2010–2024)",
  booktabs  = TRUE,
  longtable = FALSE,
  align     = "rrrrrr"
) %>%
  kable_styling(full_width = FALSE, font_size = 9)
invisible(NULL)

# Time series plots for Consumption and Log_Consumption
p1 <- merged_data %>%
  filter(Fuel_Type == "Natural_Gas") %>%
  ggplot(aes(x = Date, y = Consumption, color = Sector)) +
  geom_line(size = 1) +
  labs(title = "Natural Gas Consumption by Sector (2010-2024)",
       x = "Date", y = "Consumption (MMcf)") +
  theme_minimal() +
  theme(legend.position = "bottom")

p1_log <- merged_data %>%
  filter(Fuel_Type == "Natural_Gas") %>%
  ggplot(aes(x = Date, y = Log_Consumption, color = Sector)) +
  geom_line(size = 1) +
  labs(title = "Log Natural Gas Consumption by Sector (2010-2024)",
       x = "Date", y = "Log Consumption") +
  theme_minimal() +
  theme(legend.position = "bottom")

p2 <- merged_data %>%
  filter(Fuel_Type == "Electricity") %>%
  ggplot(aes(x = Date, y = Consumption, color = Sector)) +
  geom_line(size = 1) +
  labs(title = "Electricity Consumption by Sector (2010-2024)",
       x = "Date", y = "Consumption (MWh)") +
  theme_minimal() +
  theme(legend.position = "bottom")

p2_log <- merged_data %>%
  filter(Fuel_Type == "Electricity") %>%
  ggplot(aes(x = Date, y = Log_Consumption, color = Sector)) +
  geom_line(size = 1) +
  labs(title = "Log Electricity Consumption by Sector (2010-2024)",
       x = "Date", y = "Log Consumption") +
  theme_minimal() +
  theme(legend.position = "bottom")

grid.arrange(p1, p1_log, p2, p2_log, ncol = 2, nrow = 2)

# Weather time series
p3 <- weather_filt %>%
  ggplot(aes(x = Date, y = `Avg Temp (°F)`)) +
  geom_line(color = "red", size = 1) +
  labs(title = "Average Monthly Temperature (2010-2024)",
       x = "Date", y = "Temperature (°F)") +
  theme_minimal()

p4 <- weather_filt %>%
  select(Date, HDD, CDD) %>%
  pivot_longer(cols = c(HDD, CDD), names_to = "Variable", values_to = "Value") %>%
  ggplot(aes(x = Date, y = Value, color = Variable)) +
  geom_line(size = 1) +
  labs(title = "Heating and Cooling Degree Days (2010-2024)",
       x = "Date", y = "Value") +
  theme_minimal() +
  theme(legend.position = "bottom")

grid.arrange(p3, p4, ncol = 1)

# Correlation analysis for natural gas
gas_corr_data <- merged_data %>%
  filter(Fuel_Type == "Natural_Gas") %>%
  select(Log_Consumption, Avg_Temp_F, Humidity_Pct, HDD, CDD) %>%
  na.omit()

cor_matrix_gas <- cor(gas_corr_data)

# Correlation analysis for electricity  
elec_corr_data <- merged_data %>%
  filter(Fuel_Type == "Electricity") %>%
  select(Log_Consumption, Avg_Temp_F, Humidity_Pct, HDD, CDD) %>%
  na.omit()

cor_matrix_elec <- cor(elec_corr_data)

par(mfrow = c(1, 2))
corrplot(cor_matrix_gas, method = "color", type = "upper", 
         title = "Natural Gas Correlations (Log-Consumption)", mar = c(0,0,2,0))
corrplot(cor_matrix_elec, method = "color", type = "upper",
         title = "Electricity Correlations (Log-Consumption)", mar = c(0,0,2,0))

# Initialize list to store models and diagnostics
gas_ts_models <- list()
arima_diagnostics <- list()

cat("Checking Natural Gas Data Quality:\n")
for(sector in c("Residential", "Commercial", "Industrial")) {
  sector_data <- merged_data %>%
    filter(Fuel_Type == "Natural_Gas", Sector == sector) %>%
    mutate(
      Trend = 1:n(),
      Log_Consumption = log(Consumption + 1)
    ) %>%
    select(Date, Consumption, Log_Consumption, HDD, CDD, Humidity_Pct, Trend) %>%
    na.omit()
  
  cat("\nSector:", sector, "\n")
  cat("Rows:", nrow(sector_data), "\n")
  cat("Missing values:", sum(is.na(sector_data)), "\n")
  cat("Log_Consumption range:", range(sector_data$Log_Consumption, na.rm = TRUE), "\n")
  adf_result <- adf.test(sector_data$Log_Consumption)
  cat("ADF Test (Log_Consumption, p-value):", adf_result$p.value, "\n")
  
  linear_model <- lm(Log_Consumption ~ HDD + CDD + Humidity_Pct + Trend,
                    data = sector_data)
  
  ts_data <- ts(sector_data$Log_Consumption, frequency = 12, start = c(2010, 1))
  xreg_matrix <- as.matrix(sector_data[, c("HDD", "CDD", "Humidity_Pct", "Trend")])
  
  if (any(is.na(xreg_matrix))) {
    stop("Missing values in xreg_matrix for sector ", sector)
  }
  
  arima_model <- tryCatch(
    {
      auto.arima(ts_data, xreg = xreg_matrix, seasonal = TRUE, 
                 max.p = 2, max.q = 2, max.d = 1, trace = TRUE, 
                 d = ifelse(adf_result$p.value > 0.05, 1, 0))
    },
    error = function(e) {
      message("Error fitting ARIMA for ", sector, ": ", e$message)
      return(NULL)
    }
  )
  
  gas_ts_models[[sector]] <- list(
    Linear = linear_model,
    ARIMA = arima_model
  )
  
  arima_diagnostics[[sector]] <- if (!is.null(arima_model)) {
    list(
      Order = arima_model$arma,
      Coefficients = coef(arima_model),
      AIC = arima_model$aic,
      BIC = BIC(arima_model),
      Residual_ACF = acf(resid(arima_model), plot = FALSE)$acf
    )
  } else {
    list(Error = "ARIMA fitting failed")
  }
}

cat("\nNatural Gas Consumption Models\n")
for(sector in names(gas_ts_models)) {
  cat("\n=== ", sector, " Sector ===\n")
  lin_summary <- summary(gas_ts_models[[sector]]$Linear)
  cat("\nLinear Model Coefficients (Log-Transformed):\n")
  print(coef(lin_summary))
  cat("\nR-squared:", lin_summary$r.squared, "\n")
  
  cat("\nARIMA Model:\n")
  if (!is.null(gas_ts_models[[sector]]$ARIMA)) {
    print(gas_ts_models[[sector]]$ARIMA)
    cat("AIC:", gas_ts_models[[sector]]$ARIMA$aic, "\n")
    cat("BIC:", BIC(gas_ts_models[[sector]]$ARIMA), "\n")
    cat("Residual ACF (first 5 lags):\n")
    print(head(arima_diagnostics[[sector]]$Residual_ACF, 5))
  } else {
    cat("ARIMA fitting failed for ", sector, "\n")
  }
  cat("\n--------------------------------\n")
}

coef_table <- do.call(rbind, lapply(names(gas_ts_models), function(sector) {
  coefs <- coef(summary(gas_ts_models[[sector]]$Linear))
  data.frame(
    Sector = sector,
    Term = rownames(coefs),
    Estimate = coefs[,1],
    Std.Error = coefs[,2],
    p.value = coefs[,4],
    row.names = NULL
  )
}))

kable(coef_table, digits = 4, 
      caption = "Natural Gas Consumption Coefficients (Log-Transformed)")

elec_ts_models <- list()
elec_arima_diagnostics <- list()

cat("Checking Electricity Data Quality:\n")
for(sector in c("Residential", "Commercial", "Industrial")) {
  sector_data <- merged_data %>%
    filter(Fuel_Type == "Electricity", Sector == sector) %>%
    mutate(
      Trend = 1:n(),
      Log_Consumption = log(Consumption + 1)
    ) %>%
    select(Date, Consumption, Log_Consumption, HDD, CDD, Humidity_Pct, Trend) %>%
    na.omit()
  
  cat("\nSector:", sector, "\n")
  cat("Rows:", nrow(sector_data), "\n")
  cat("Missing values:", sum(is.na(sector_data)), "\n")
  cat("Log_Consumption range:", range(sector_data$Log_Consumption, na.rm = TRUE), "\n")
  adf_result <- adf.test(sector_data$Log_Consumption)
  cat("ADF Test (Log_Consumption, p-value):", adf_result$p.value, "\n")
  
  linear_model <- lm(Log_Consumption ~ HDD + CDD + Humidity_Pct + Trend,
                    data = sector_data)
  
  ts_data <- ts(sector_data$Log_Consumption, frequency = 12, start = c(2010, 1))
  xreg_matrix <- as.matrix(sector_data[, c("HDD", "CDD", "Humidity_Pct", "Trend")])
  
  if (any(is.na(xreg_matrix))) {
    stop("Missing values in xreg_matrix for sector ", sector)
  }
  
  arima_model <- tryCatch(
    {
      auto.arima(ts_data, xreg = xreg_matrix, seasonal = TRUE, 
                 max.p = 2, max.q = 2, max.d = 1, trace = TRUE, 
                 d = ifelse(adf_result$p.value > 0.05, 1, 0))
    },
    error = function(e) {
      message("Error fitting ARIMA for ", sector, ": ", e$message)
      return(NULL)
    }
  )
  
  elec_ts_models[[sector]] <- list(
    Linear = linear_model,
    ARIMA = arima_model
  )
  
  elec_arima_diagnostics[[sector]] <- if (!is.null(arima_model)) {
    list(
      Order = arima_model$arma,
      Coefficients = coef(arima_model),
      AIC = arima_model$aic,
      BIC = BIC(arima_model),
      Residual_ACF = acf(resid(arima_model), plot = FALSE)$acf
    )
  } else {
    list(Error = "ARIMA fitting failed")
  }
}

cat("\nElectricity Consumption Models\n")
for(sector in names(elec_ts_models)) {
  cat("\n=== ", sector, " Sector ===\n")
  lin_summary <- summary(elec_ts_models[[sector]]$Linear)
  cat("\nLinear Model Coefficients (Log-Transformed):\n")
  print(coef(lin_summary))
  cat("\nR-squared:", lin_summary$r.squared, "\n")
  
  cat("\nARIMA Model:\n")
  if (!is.null(elec_ts_models[[sector]]$ARIMA)) {
    print(elec_ts_models[[sector]]$ARIMA)
    cat("AIC:", elec_ts_models[[sector]]$ARIMA$aic, "\n")
    cat("BIC:", BIC(elec_ts_models[[sector]]$ARIMA), "\n")
    cat("Residual ACF (first 5 lags):\n")
    print(head(elec_arima_diagnostics[[sector]]$Residual_ACF, 5))
  } else {
    cat("ARIMA fitting failed for ", sector, "\n")
  }
  cat("\n--------------------------------\n")
}

elec_coef_table <- do.call(rbind, lapply(names(elec_ts_models), function(sector) {
  coefs <- coef(summary(elec_ts_models[[sector]]$Linear))
  data.frame(
    Sector = sector,
    Term = rownames(coefs),
    Estimate = coefs[,1],
    Std.Error = coefs[,2],
    p.value = coefs[,4],
    row.names = NULL
  )
}))

kable(elec_coef_table, digits = 4, 
      caption = "Electricity Consumption Coefficients (Log-Transformed)")

# Function for linear model diagnostics (unchanged)
diagnostic_plots_linear <- function(model, title_text) {
  old_par <- par(mfrow = c(2, 2),  
                 mar = c(4, 4, 3, 2),  
                 oma = c(0, 0, 2, 0),  
                 cex.main = 1.1,       
                 font.main = 1,        
                 family = "sans")      
  
  res <- resid(model)
  fit <- fitted(model)
  
  plot(fit, res,
       main = "Residuals vs Fitted",
       xlab = "Fitted Values (Log-Consumption)",
       ylab = "Residuals",
       pch = 19,          
       col = "#1E88E5",   
       cex = 0.7)         
  abline(h = 0, col = "#FF5252", lwd = 2) 
  grid(col = "#E0E0E0", lty = "dotted")
  
  qqnorm(res,
         main = "Normal Q-Q Plot",
         pch = 19,
         col = "#1E88E5")
  qqline(res, col = "#FF5252", lwd = 2)
  grid(col = "#E0E0E0", lty = "dotted")
  
  hist(res, 
       main = "Residual Distribution",
       xlab = "Residuals (Log-Consumption)",
       col = "#1E88E5",
       border = "white",
       freq = FALSE)
  curve(dnorm(x, mean(res), sd(res)), 
        add = TRUE, 
        col = "#FF5252", 
        lwd = 2)
  grid(col = "#E0E0E0", lty = "dotted")
  
  acf(res,
      main = "ACF of Residuals",
      col = "#1E88E5",
      lwd = 2)
  grid(col = "#E0E0E0", lty = "dotted")
  
  mtext(title_text,
        outer = TRUE,
        cex = 1.3,
        font = 2,
        col = "#333333")
  
  par(old_par)
}

# Function for ARIMA diagnostics
diagnostic_plots_arima <- function(model, title_text) {
  old_par <- par(mfrow = c(2, 2),  
                 mar = c(4, 4, 3, 2),  
                 oma = c(0, 0, 2, 0),  
                 cex.main = 1.1,       
                 font.main = 1,        
                 family = "sans")      
  
  res <- resid(model)
  fit <- fitted(model)
  
  plot(fit, res,
       main = "Residuals vs Fitted",
       xlab = "Fitted Values (Consumption)",
       ylab = "Residuals",
       pch = 19,          
       col = "#1E88E5",   
       cex = 0.7)         
  abline(h = 0, col = "#FF5252", lwd = 2) 
  grid(col = "#E0E0E0", lty = "dotted")
  
  qqnorm(res,
         main = "Normal Q-Q Plot",
         pch = 19,
         col = "#1E88E5")
  qqline(res, col = "#FF5252", lwd = 2)
  grid(col = "#E0E0E0", lty = "dotted")
  
  hist(res, 
       main = "Residual Distribution",
       xlab = "Residuals (Consumption)",
       col = "#1E88E5",
       border = "white",
       freq = FALSE)
  curve(dnorm(x, mean(res), sd(res)), 
        add = TRUE, 
        col = "#FF5252", 
        lwd = 2)
  grid(col = "#E0E0E0", lty = "dotted")
  
  acf(res,
      main = "ACF of Residuals",
      col = "#1E88E5",
      lwd = 2)
  grid(col = "#E0E0E0", lty = "dotted")
  
  mtext(title_text,
        outer = TRUE,
        cex = 1.3,
        font = 2,
        col = "#333333")
  
  par(old_par)
}

run_diagnostics <- function(linear_model, arima_model, sector_name, fuel_type) {
  cat("\n", rep("=", 50), "\n", sep="")
  cat("DIAGNOSTICS FOR:", sector_name, "\n")
  cat(rep("=", 50), "\n\n")
  
  # Linear Model Diagnostics
  cat("LINEAR MODEL (LOG-TRANSFORMED):\n")
  print(summary(linear_model))
  
  res_linear <- resid(linear_model)
  
  cat("\nNormality Tests (Linear):\n")
  if(length(res_linear) <= 5000) {
    sw_test <- shapiro.test(res_linear)
    cat("Shapiro-Wilk: p =", format.pval(sw_test$p.value, digits=3), "\n")
  }
  ad_test <- ad.test(res_linear)
  cat("Anderson-Darling: p =", format.pval(ad_test$p.value, digits=3), "\n")
  
  bp_test <- bptest(linear_model)
  cat("\nHeteroskedasticity Test (Linear):\n")
  cat("Breusch-Pagan: p =", format.pval(bp_test$p.value, digits=3), "\n")
  
  dw_test <- dwtest(linear_model)
  cat("\nAutocorrelation Test (Linear):\n")
  cat("Durbin-Watson: p =", format.pval(dw_test$p.value, digits=3), "\n")
  
  cat("\nMulticollinearity Check (VIF):\n")
  vif_values <- vif(linear_model)
  print(vif_values)
  
  diagnostic_plots_linear(linear_model, paste(fuel_type, "-", toupper(sector_name), "(Linear, Log-Transformed)"))
  
  # ARIMA Model Diagnostics
  cat("\nARIMA MODEL:\n")
  if (!is.null(arima_model)) {
    print(arima_model)
    cat("AIC:", arima_model$aic, "\n")
    cat("BIC:", BIC(arima_model), "\n")
    
    res_arima <- resid(arima_model)
    
    cat("\nNormality Tests (ARIMA):\n")
    if(length(res_arima) <= 5000) {
      sw_test_arima <- shapiro.test(res_arima)
      cat("Shapiro-Wilk: p =", format.pval(sw_test_arima$p.value, digits=3), "\n")
    }
    ad_test_arima <- ad.test(res_arima)
    cat("Anderson-Darling: p =", format.pval(ad_test_arima$p.value, digits=3), "\n")
    
    cat("\nAutocorrelation Test (ARIMA):\n")
    lb_test <- Box.test(res_arima, lag = 12, type = "Ljung-Box")
    cat("Ljung-Box: p =", format.pval(lb_test$p.value, digits=3), "\n")
    
    diagnostic_plots_arima(arima_model, paste(fuel_type, "-", toupper(sector_name), "(ARIMA)"))
  } else {
    cat("ARIMA fitting failed for ", sector_name, "\n")
  }
}

cat("\n\nELECTRICITY MODEL DIAGNOSTICS\n")
for(sector in c("Residential", "Commercial", "Industrial")) {
  run_diagnostics(elec_ts_models[[sector]]$Linear, 
                  elec_ts_models[[sector]]$ARIMA, 
                  paste("ELECTRICITY -", toupper(sector)), 
                  "ELECTRICITY")
}

cat("\n\nNATURAL GAS MODEL DIAGNOSTICS\n")
for(sector in c("Residential", "Commercial", "Industrial")) {
  run_diagnostics(gas_ts_models[[sector]]$Linear, 
                  gas_ts_models[[sector]]$ARIMA, 
                  paste("NATURAL GAS -", toupper(sector)), 
                  "NATURAL GAS")
}