library(forecast)
## Warning: package 'forecast' was built under R version 4.3.3
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
library(ggplot2)
library(readxl)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(zoo)
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(fpp2)
## ── Attaching packages ────────────────────────────────────────────── fpp2 2.5 ──
## ✔ fma 2.5 ✔ expsmooth 2.3
##
# data loading
file_path <- "/Users/timyang/Downloads/Illinois_Dry_Natural_Gas_Production.csv"
natural_gas_data <- read.csv(file_path, skip = 4)
# Convert the Month column to date format
natural_gas_data$Month <- zoo::as.yearmon(natural_gas_data$Month, format = "%b %Y")
# Create a time series object
# Extract year and month from the first data point
start_year <- as.numeric(format(natural_gas_data$Month[1], "%Y"))
start_month <- as.numeric(format(natural_gas_data$Month[1], "%m"))
ts_data <- ts(natural_gas_data$Illinois.Dry.Natural.Gas.Production.Million.Cubic.Feet, frequency = 12, start = c(start_year, start_month))
# Use only the last 4 years of data for model building
end_year <- start_year + floor(length(ts_data) / 12) - 1
train_data <- window(ts_data, start = c(end_year - 3, 1), end = c(end_year, 12))
# Forecast horizon (12 months for the 5th year)
h <- 12
# Fit Naive model
naive_forecast <- naive(train_data, h = h)
# Fit Drift model
drift_forecast <- rwf(train_data, h = h, drift = TRUE)
# Fit SNAIVE model
snaive_forecast <- snaive(train_data, h = h)
# Fit ETS model
ets_model <- ets(train_data)
ets_forecast <- forecast(ets_model, h = h)
# Plot the forecasts
autoplot(train_data) +
autolayer(naive_forecast, series = "Naive", PI = FALSE) +
autolayer(drift_forecast, series = "Drift", PI = FALSE) +
autolayer(snaive_forecast, series = "SNAIVE", PI = FALSE) +
autolayer(ets_forecast, series = "ETS", PI = FALSE) +
ggtitle("Forecasts from Different Models") +
xlab("Year") + ylab("Production (Million Cubic Feet)") +
guides(colour = guide_legend(title = "Forecast"))
# Calculate performance measures for each model
naive_accuracy <- accuracy(naive_forecast)
drift_accuracy <- accuracy(drift_forecast)
snaive_accuracy <- accuracy(snaive_forecast)
ets_accuracy <- accuracy(ets_forecast)
# Print accuracy measures
print(naive_accuracy)
## ME RMSE MAE MPE MAPE MASE
## Training set -3.148936 25.58091 13.40426 -31.04317 39.26186 0.5772167
## ACF1
## Training set 0.03643154
print(drift_accuracy)
## ME RMSE MAE MPE MAPE MASE
## Training set -5.744814e-15 25.38636 14.04527 -27.49236 38.93524 0.6048202
## ACF1
## Training set 0.03643154
print(snaive_accuracy)
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set -10.22222 28.56571 23.22222 -37.20881 48.93813 1 0.3915384
print(ets_accuracy)
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set -2.968299 25.32566 13.24059 -30.32318 38.51753 0.570169 0.02342824
The best model is identified based on the lowest RMSE, but MAE and MAPE are also considered. the lowest RMSE value is ETS model which preforming the best, those values for each model are all super close to each other, which is a little bit hard to statistical tell which one is the best.