#clear the workspacerm(list =ls()) #clear environmentgc() #clear unused memory
used (Mb) gc trigger (Mb) limit (Mb) max used (Mb)
Ncells 608533 32.5 1380852 73.8 NA 715625 38.3
Vcells 1125889 8.6 8388608 64.0 24576 2010544 15.4
dev.off #clear charts
function (which = dev.cur())
{
if (which == 1)
stop("cannot shut down device 1 (the null device)")
.External(C_devoff, as.integer(which))
dev.cur()
}
<bytecode: 0x12643a4e8>
<environment: namespace:grDevices>
cat("\f") #clear console
# Set working directorysetwd("~/Desktop/BC/Summer 2025/ADEC7320/Final Project")#Data provided in Google Drive link above# 1. Load and process Natural Gas Datagas_data_raw <-read_csv("Natural_Gas_Consumption_by_End_Use.csv", skip =6)gas_data <- gas_data_raw %>%rename(Commercial_Mcf =`Natural Gas Deliveries to Commercial Consumers (Including Vehicle Fuel through 1996) in Massachusetts MMcf`,Residential_Mcf =`Massachusetts Natural Gas Residential Consumption MMcf`,Industrial_Mcf =`Massachusetts Natural Gas Industrial Consumption MMcf` ) %>%filter(!is.na(Month)) %>%mutate(Month =my(Month),Year =year(Month),Month_Num =month(Month),Date =make_date(Year, Month_Num, 1) # Explicit Date creation )gas_long <- gas_data %>%pivot_longer(cols =c(Residential_Mcf, Commercial_Mcf, Industrial_Mcf),names_to ="Sector",values_to ="Consumption_Mcf") %>%mutate(Sector =str_remove(Sector, "_Mcf"),Fuel_Type ="Natural_Gas" )# 2. Load and process Electricity Dataraw_data <-read_excel("sales_revenue.xlsx", col_names =FALSE)cat("First 3 rows of raw data:\n")
# Time series plots for Consumption and Log_Consumptionp1 <- merged_data %>%filter(Fuel_Type =="Natural_Gas") %>%ggplot(aes(x = Date, y = Consumption, color = Sector)) +geom_line(size =1) +labs(title ="Natural Gas Consumption by Sector (2010-2024)",x ="Date", y ="Consumption (MMcf)") +theme_minimal() +theme(legend.position ="bottom")p1_log <- merged_data %>%filter(Fuel_Type =="Natural_Gas") %>%ggplot(aes(x = Date, y = Log_Consumption, color = Sector)) +geom_line(size =1) +labs(title ="Log Natural Gas Consumption by Sector (2010-2024)",x ="Date", y ="Log Consumption") +theme_minimal() +theme(legend.position ="bottom")p2 <- merged_data %>%filter(Fuel_Type =="Electricity") %>%ggplot(aes(x = Date, y = Consumption, color = Sector)) +geom_line(size =1) +labs(title ="Electricity Consumption by Sector (2010-2024)",x ="Date", y ="Consumption (MWh)") +theme_minimal() +theme(legend.position ="bottom")p2_log <- merged_data %>%filter(Fuel_Type =="Electricity") %>%ggplot(aes(x = Date, y = Log_Consumption, color = Sector)) +geom_line(size =1) +labs(title ="Log Electricity Consumption by Sector (2010-2024)",x ="Date", y ="Log Consumption") +theme_minimal() +theme(legend.position ="bottom")grid.arrange(p1, p1_log, p2, p2_log, ncol =2, nrow =2)
# Weather time seriesp3 <- weather_filt %>%ggplot(aes(x = Date, y =`Avg Temp (°F)`)) +geom_line(color ="red", size =1) +labs(title ="Average Monthly Temperature (2010-2024)",x ="Date", y ="Temperature (°F)") +theme_minimal()p4 <- weather_filt %>%select(Date, HDD, CDD) %>%pivot_longer(cols =c(HDD, CDD), names_to ="Variable", values_to ="Value") %>%ggplot(aes(x = Date, y = Value, color = Variable)) +geom_line(size =1) +labs(title ="Heating and Cooling Degree Days (2010-2024)",x ="Date", y ="Value") +theme_minimal() +theme(legend.position ="bottom")grid.arrange(p3, p4, ncol =1)
# Correlation analysis for natural gasgas_corr_data <- merged_data %>%filter(Fuel_Type =="Natural_Gas") %>%select(Log_Consumption, Avg_Temp_F, Humidity_Pct, HDD, CDD) %>%na.omit()cor_matrix_gas <-cor(gas_corr_data)# Correlation analysis for electricity elec_corr_data <- merged_data %>%filter(Fuel_Type =="Electricity") %>%select(Log_Consumption, Avg_Temp_F, Humidity_Pct, HDD, CDD) %>%na.omit()cor_matrix_elec <-cor(elec_corr_data)par(mfrow =c(1, 2))corrplot(cor_matrix_gas, method ="color", type ="upper", title ="Natural Gas Correlations (Log-Consumption)", mar =c(0,0,2,0))corrplot(cor_matrix_elec, method ="color", type ="upper",title ="Electricity Correlations (Log-Consumption)", mar =c(0,0,2,0))
library(forecast)library(ggplot2)library(knitr)# Initialize list to store models and diagnosticsgas_ts_models <-list()arima_diagnostics <-list()cat("Checking Natural Gas Data Quality:\n")
Natural Gas Consumption Models (Linear, Log-Transformed)
Residential
Commercial
Industrial
+ p < 0.1, * p < 0.05, ** p < 0.01, *** p < 0.001
(Intercept)
8.442***
8.503***
8.073***
(0.182)
(0.164)
(0.108)
HDD
0.002***
0.001***
0.001***
(0.000)
(0.000)
(0.000)
CDD
-0.001***
-0.000***
-0.000**
(0.000)
(0.000)
(0.000)
Humidity_Pct
-0.004
0.000
-0.002
(0.002)
(0.002)
(0.001)
Trend
0.000+
-0.003***
-0.000
(0.000)
(0.000)
(0.000)
Num.Obs.
180
180
180
R2
0.965
0.950
0.920
F
1198.346
824.085
506.411
RMSE
0.15
0.14
0.09
library(modelsummary)library(knitr)elec_ts_models <-list()elec_arima_diagnostics <-list()elec_linear_models <-list() # store linear models for modelsummarycat("Checking Electricity Data Quality:\n")