library(readxl)
## Warning: package 'readxl' was built under R version 4.4.2
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(forecast)
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.4.2
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
file_path <- "C:/Users/james/Downloads/Gold_Prices.xlsx"
sheet_names <- c('Yearly_Avg', 'Quarterly_Avg', 'Monthly_Avg')
# Read each sheet into a separate data frame
Yearly_Avg <- read_excel(file_path, sheet = sheet_names[1]) %>%
rename(Date = 1)
## New names:
## • `` -> `...1`
Quarterly_Avg <- read_excel(file_path, sheet = sheet_names[2]) %>%
rename(Date = 1)
## New names:
## • `` -> `...1`
Monthly_Avg <- read_excel(file_path, sheet = sheet_names[3]) %>%
rename(Date = 1)
## New names:
## • `` -> `...1`
# Convert to Just US Currency
Yearly_Avg_USD <- Yearly_Avg[, 1:2]
Quarterly_Avg_USD <- Quarterly_Avg[, 1:2]
Monthly_Avg_USD <- Monthly_Avg[, 1:2]
Gold.ts <- ts(Monthly_Avg_USD$USD, start = c(1978, 1), end = c(2025, 1), freq = 12)
# Plot
plot(Gold.ts, main = "Monthly Average USD Time Series",
xlab = "Year", ylab = "USD",
col = "blue", lwd = 2)
nValid <- 113
nTrain <- length(Gold.ts) - nValid
train.ts<- window(Gold.ts, end = c(2015,8))
valid.ts <- window(Gold.ts, start=c(2015, 9), end = c(2025, 1))
##ACF Plot
diff.gold.ts <- diff(Gold.ts)
Acf(diff.gold.ts)
##Difference Plot
plot.ts(diff(Monthly_Avg_USD$USD))
##Models
##Polynomial Models
linear_mod <- tslm(train.ts ~ trend)
summary(linear_mod)
##
## Call:
## tslm(formula = train.ts ~ trend)
##
## Residuals:
## Min 1Q Median 3Q Max
## -414.60 -238.08 -16.77 159.31 849.91
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 120.6223 27.5114 4.384 1.45e-05 ***
## trend 1.9786 0.1052 18.799 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 292 on 450 degrees of freedom
## Multiple R-squared: 0.4399, Adjusted R-squared: 0.4386
## F-statistic: 353.4 on 1 and 450 DF, p-value: < 2.2e-16
#print(linear_mod$fitted.values)
linear_mod_pred <- forecast(linear_mod, h= nValid, level = 90)
#print(linear_mod_pred$mean)
# ggplot2 + forecast package
autoplot(linear_mod_pred) +
autolayer(linear_mod_pred$mean, series ="Linear Forecast") +
autolayer(linear_mod$fitted.values, series = "Linear Fit") +
autolayer(valid.ts, series = "Observed") +
xlab("Year") + ylab("Gold Price Per Ounce") +
guides(color = guide_legend(title = ""))
linear.res <- linear_mod$residuals
acf(linear.res, lag.max = 12)
##Exponential Trend
exp_mod <- tslm(train.ts ~ trend , lambda = 0) #lambda = 0 indicates exponential trend
summary(exp_mod)
##
## Call:
## tslm(formula = train.ts ~ trend, lambda = 0)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.75205 -0.30957 -0.01562 0.29730 0.89712
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.5813940 0.0390323 142.99 <2e-16 ***
## trend 0.0026185 0.0001493 17.54 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4142 on 450 degrees of freedom
## Multiple R-squared: 0.4059, Adjusted R-squared: 0.4046
## F-statistic: 307.5 on 1 and 450 DF, p-value: < 2.2e-16
exp_mod_pred <- forecast(exp_mod, h= nValid, level=0)
autoplot(exp_mod_pred) +
autolayer(exp_mod_pred$mean, series="Exponential Forecast") +
autolayer(exp_mod$fitted.values, series = 'Exponential Fit') +
autolayer(valid.ts, series = "Observed") +
xlab("Year") + ylab("Gold Price Per Ounce") +
guides(color = guide_legend(title = ""))
exp.res <- exp_mod$residuals
acf(exp.res, lag.max = 12)
##Quadratic Trend Model
quad_mod <- tslm(train.ts ~ trend + I(trend^2))
summary(quad_mod)
##
## Call:
## tslm(formula = train.ts ~ trend + I(trend^2))
##
## Residuals:
## Min 1Q Median 3Q Max
## -427.50 -132.08 -10.55 113.34 625.01
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.402e+02 2.527e+01 25.33 <2e-16 ***
## trend -4.888e+00 2.577e-01 -18.97 <2e-16 ***
## I(trend^2) 1.516e-02 5.508e-04 27.52 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 178.3 on 449 degrees of freedom
## Multiple R-squared: 0.7915, Adjusted R-squared: 0.7906
## F-statistic: 852.4 on 2 and 449 DF, p-value: < 2.2e-16
quad_mod_pred <- forecast(quad_mod, h= nValid, level=90)
Quad_mod_plot <- autoplot(quad_mod_pred) +
autolayer(quad_mod_pred$mean, series ="Quadratic Forecast") +
autolayer(quad_mod$fitted.values, series = "Quadratic Fit") +
autolayer(valid.ts, series = "Observed") +
xlab("Year") + ylab("Gold Price Per Ounce") +
guides(color = guide_legend(title = ""))
plotly::ggplotly(Quad_mod_plot)
## Warning in geom2trace.default(dots[[1L]][[1L]], dots[[2L]][[1L]], dots[[3L]][[1L]]): geom_GeomForecast() has yet to be implemented in plotly.
## If you'd like to see this geom implemented,
## Please open an issue with your example code at
## https://github.com/ropensci/plotly/issues
##Residuals
quad.res <- quad_mod$residuals
acf(quad.res, lag.max = 12)
##AR1 on Residuals to Improve Quadratic Trend
train.res.ar.1 <- Arima(quad.res, order = c(1, 0, 0))
summary(train.res.ar.1)
## Series: quad.res
## ARIMA(1,0,0) with non-zero mean
##
## Coefficients:
## ar1 mean
## 0.9917 -133.7298
## s.e. 0.0061 150.8811
##
## sigma^2 = 933.6: log likelihood = -2188.04
## AIC=4382.08 AICc=4382.13 BIC=4394.42
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 1.070233 30.48752 18.68111 11.42805 34.42235 0.2180712 0.1973301
quad.res.for <- forecast(quad.res, h =nValid, level = 90)
autoplot(quad.res)+
autolayer(quad.res.for$fitted)
autoplot(quad_mod_pred)+
autolayer(quad.res.for$mean + quad_mod_pred$mean, series = "Improved Quad. Trend")+
autolayer(valid.ts, series = "Observed") +
xlab("Year") + ylab("Gold Price Per Ounce") +
guides(color = guide_legend(title = ""))
acf(train.res.ar.1$residuals, lag.max = 12)
##Additive Seasonality
season_mod <- tslm(train.ts ~ season)
summary(season_mod)
##
## Call:
## tslm(formula = train.ts ~ season)
##
## Residuals:
## Min 1Q Median 3Q Max
## -359.58 -229.77 -178.26 59.82 1209.34
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 567.4113 63.9951 8.866 <2e-16 ***
## season2 11.2774 90.5028 0.125 0.901
## season3 6.8939 90.5028 0.076 0.939
## season4 -1.9242 90.5028 -0.021 0.983
## season5 -0.8937 90.5028 -0.010 0.992
## season6 -3.8403 90.5028 -0.042 0.966
## season7 -3.9671 90.5028 -0.044 0.965
## season8 2.2255 90.5028 0.025 0.980
## season9 -4.9027 91.1122 -0.054 0.957
## season10 1.3754 91.1122 0.015 0.988
## season11 6.4481 91.1122 0.071 0.944
## season12 3.5981 91.1122 0.039 0.969
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 394.5 on 440 degrees of freedom
## Multiple R-squared: 0.0001504, Adjusted R-squared: -0.02485
## F-statistic: 0.006019 on 11 and 440 DF, p-value: 1
season_mod_pred <- forecast(season_mod, h= nValid, level=0)
autoplot(season_mod_pred) +
autolayer(season_mod_pred$mean, series ="Seasonality Forecast") +
autolayer(valid.ts, series = "Observed") +
autolayer(season_mod$fitted.values, series = "Seasonality Fit") +
guides(color = guide_legend(title = ""))
season.res <- season_mod$residuals
acf(season.res, lag.max = 12)
gold_reg_plot <- autoplot(season_mod_pred) +
autolayer(season_mod_pred$mean, series ="Seasonality Forecast") +
autolayer(quad_mod_pred$mean, series ="Quadratic Forecast") +
autolayer(exp_mod_pred$mean, series="Exponential Forecast") +
autolayer(linear_mod_pred$mean, series = "Linear Forecast") +
autolayer(quad_mod_pred$fitted, series = "Quadratic Fit")+
autolayer(valid.ts, series = "Observed") +
labs(title = "Regression Based Model Forecasts", x = "Year", y = "Gold Price Per Ounce") +
guides(color = guide_legend(title = ""))
plotly::ggplotly(gold_reg_plot)
## Warning in geom2trace.default(dots[[1L]][[1L]], dots[[2L]][[1L]], dots[[3L]][[1L]]): geom_GeomForecast() has yet to be implemented in plotly.
## If you'd like to see this geom implemented,
## Please open an issue with your example code at
## https://github.com/ropensci/plotly/issues
##Residual Plot
autoplot(season_mod_pred$residuals) +
autolayer(season_mod_pred$residuals, series = "Training residuals") +
autolayer(valid.ts - season_mod_pred$mean, series = "Validation residuals")
##Holt Winters ZZZ Model
gold.ets <- ets(train.ts, model = "ZZZ")
gold.ets.forecast <- forecast(gold.ets, h = nValid, level = 0)
autoplot(gold.ets.forecast) +
autolayer(gold.ets.forecast$mean, series = "ETS Forecast")+
autolayer(valid.ts, series = "Observed")+
guides(color = guide_legend(title = ""))
ets.res <- gold.ets$residuals
acf(ets.res, lag.max = 12)
gold.arima <- auto.arima(train.ts)
summary(gold.arima)
## Series: train.ts
## ARIMA(2,1,2)(2,0,0)[12]
##
## Coefficients:
## ar1 ar2 ma1 ma2 sar1 sar2
## 0.3631 0.5549 -0.1334 -0.7133 0.0074 -0.0041
## s.e. 0.1205 0.1022 0.1044 0.0825 0.0494 0.0536
##
## sigma^2 = 870: log likelihood = -2163.31
## AIC=4340.63 AICc=4340.88 BIC=4369.41
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 0.946834 29.26585 18.08275 0.1589808 3.084795 0.2084232 -0.0042125
gold.arima.forecast <- forecast(gold.arima, h = nValid, level = 0)
gold_arima_plot <- autoplot(gold.arima.forecast)+
autolayer(valid.ts, series = "Observed")+
autolayer(gold.arima.forecast$mean, series ="ARIMA Forecast") +
xlab("Time") + ylab("Gold Price per Ounce") +
guides(color = guide_legend(title = ""))
plotly::ggplotly(gold_arima_plot)
## Warning in geom2trace.default(dots[[1L]][[1L]], dots[[2L]][[1L]], dots[[3L]][[1L]]): geom_GeomForecast() has yet to be implemented in plotly.
## If you'd like to see this geom implemented,
## Please open an issue with your example code at
## https://github.com/ropensci/plotly/issues
train.res.from.arima <- gold.arima$residuals
arima_res <- diff(train.res.from.arima)
acf(arima_res, lag.max = 12)
p <- 10 # Number of previous time steps used for forecast
P <- 8 # Number of previous seasonal values to use
size <- 7 # Number of hidden nodes
gold.nnetar <- nnetar(train.ts, repeats =20, p = p, P = P, size = size)
gold.nnetar.forecast <- forecast(gold.nnetar, h = nValid)
autoplot(gold.nnetar.forecast) +
autolayer(valid.ts, series = "Observed")+
autolayer(gold.nnetar.forecast$mean, series ="NN Forecast") +
autolayer(gold.nnetar.forecast$fitted, series = "NN Fit")+
guides(color = guide_legend(title = ""))
## Warning: Removed 96 rows containing missing values or values outside the scale range
## (`geom_line()`).
##Naive
gold.naive <- naive(train.ts, h = nValid, level = 0)
gold_naive_mod_pred <- forecast(gold.naive, h = nValid)
gold_naive <- autoplot(gold_naive_mod_pred) +
autolayer(valid.ts, series = "Observed")+
autolayer(gold_naive_mod_pred$mean, series ="Naive Forecast") +
xlab("Time") + ylab("Gold Price per Ounce") +
guides(color = guide_legend(title = ""))
plotly::ggplotly(gold_naive)
## Warning in geom2trace.default(dots[[1L]][[1L]], dots[[2L]][[1L]], dots[[3L]][[1L]]): geom_GeomForecast() has yet to be implemented in plotly.
## If you'd like to see this geom implemented,
## Please open an issue with your example code at
## https://github.com/ropensci/plotly/issues
##Holt Winters
gold.holt.mod <- ets(train.ts, model = "AAN")
summary(gold.holt.mod)
## ETS(A,Ad,N)
##
## Call:
## ets(y = train.ts, model = "AAN")
##
## Smoothing parameters:
## alpha = 0.9999
## beta = 0.0507
## phi = 0.9352
##
## Initial states:
## l = 189.4989
## b = 26.3622
##
## sigma: 30.1581
##
## AIC AICc BIC
## 5849.789 5849.978 5874.471
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 0.5904294 29.99079 18.19971 0.01909812 3.084645 0.2097713 0.131731
gold.holt.pred <- forecast(gold.holt.mod, h = nValid, level = 0)
autoplot(gold.holt.pred)+
autolayer(valid.ts, series = "Observed")+
autolayer(gold.holt.pred$mean, series = "Holt Winter Forecast") +
guides(color = guide_legend(title = ""))
holt.res <- gold.holt.mod$residuals
acf(holt.res, lag.max = 12)
dual_holt_plot <- autoplot(gold.holt.pred)+
autolayer(valid.ts, series = "Observed")+
autolayer(gold.holt.pred$mean, series = "Holt Winter (AAN) Forecast") +
autolayer(gold.ets.forecast$mean, series = "Holt Winter (ZZZ) Forecast" )+
labs(title = "Forecasts of Holt Winters Models", x = "Year", y = "Price of Gold per Ounce")+
guides(color = guide_legend(title = ""))
plotly:: ggplotly(dual_holt_plot)
## Warning in geom2trace.default(dots[[1L]][[1L]], dots[[2L]][[1L]], dots[[3L]][[1L]]): geom_GeomForecast() has yet to be implemented in plotly.
## If you'd like to see this geom implemented,
## Please open an issue with your example code at
## https://github.com/ropensci/plotly/issues
##Moving Average
library(zoo)
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
ma.trailing <- rollmean(train.ts, k = 12, align = "right")
last.ma <- tail(ma.trailing, 1)
ma.trailing.pred <- ts(rep(last.ma, nValid), start = c(1978, nTrain + 1), end = c(1978, nTrain + nValid), freq = 12)
ma_plot <- autoplot(Gold.ts)+
autolayer(ma.trailing, series = "Moving Average") +
autolayer(ma.trailing.pred, series = "MA Forecast")+
xlab("Time") + ylab("Gold Price per Ounce") +
ggtitle("Moving Average Forecast")
plotly::ggplotly(ma_plot)
##Polynomial Performance
# Model 1: Linear Trend Model (linear_mod)
linear_mod_acc <- accuracy(linear_mod_pred$fitted, train.ts)
print(linear_mod_acc)
## ME RMSE MAE MPE MAPE ACF1 Theil's U
## Test set -6.443822e-14 291.3189 234.1382 -19.55764 50.3282 0.9943422 13.09904
# Model 2: Exponential Trend Model (exp_model)
exp_mod_acc <- accuracy(exp_mod_pred$fitted, train.ts)
print(exp_mod_acc)
## ME RMSE MAE MPE MAPE ACF1 Theil's U
## Test set 59.90031 290.955 200.9366 -8.726191 35.37218 0.9936324 9.162622
# Model 3: Quadratic Trend Model (quad_mod)
quad_mod_acc <- accuracy(quad_mod_pred$fitted, train.ts)
print(quad_mod_acc)
## ME RMSE MAE MPE MAPE ACF1 Theil's U
## Test set -7.867478e-15 177.7268 139.9698 -6.876854 29.77414 0.9729729 7.736005
# Model 4: Additive Seasonality (season_mod)
season_mod_acc <- accuracy(season_mod_pred$fitted, train.ts)
print(season_mod_acc)
## ME RMSE MAE MPE MAPE ACF1 Theil's U
## Test set 7.215222e-15 389.2207 299.2125 -33.14014 56.48047 0.9938479 12.68431
##Improved Quadratic Trend With ARIMA
quad_mod_improved <- accuracy(quad.res.for$mean + quad_mod_pred$mean, valid.ts)
print(quad_mod_improved)
## ME RMSE MAE MPE MAPE ACF1 Theil's U
## Test set 84.19701 173.0776 123.0699 4.157082 6.947738 0.8982532 2.730262
##Naive Model Performance
naive_mod_acc <- accuracy(gold_naive_mod_pred$mean, valid.ts)
print(naive_mod_acc)
## ME RMSE MAE MPE MAPE ACF1 Theil's U
## Test set 524.5275 659.1947 526.3167 28.08148 28.24723 0.9518827 10.17317
arima_mod_acc <- accuracy(gold.arima.forecast$mean, valid.ts)
print(arima_mod_acc)
## ME RMSE MAE MPE MAPE ACF1 Theil's U
## Test set 597.6932 726.8177 598.7808 32.55612 32.65718 0.952131 11.41513
##Holt Winter Model Performance:
gold_holt_acc <- accuracy(gold.holt.pred$mean, valid.ts)
print(gold_holt_acc)
## ME RMSE MAE MPE MAPE ACF1 Theil's U
## Test set 598.8837 726.0852 599.6881 32.67957 32.75444 0.9518448 11.41632
nn_acc <- accuracy(gold.nnetar.forecast$mean, valid.ts)
print(nn_acc)
## ME RMSE MAE MPE MAPE ACF1 Theil's U
## Test set 420.9287 551.8818 424.3771 22.15393 22.47207 0.9442189 8.307124
ets_acc <- accuracy(gold.ets.forecast$mean, valid.ts)
print(ets_acc)
## ME RMSE MAE MPE MAPE ACF1 Theil's U
## Test set 1049.727 1254.146 1050.019 57.73315 57.76037 0.9629774 20.04131
ma_acc <- accuracy(ma.trailing.pred, valid.ts)
print(ma_acc)
## ME RMSE MAE MPE MAPE ACF1 Theil's U
## Test set 448.3692 600.3694 456.7989 23.18005 23.94663 0.9518827 9.088776
##Forecasting Using Improved Quadratic Model on Entire Dataset:
gold.quad <- tslm(Gold.ts ~ trend + I(trend^2))
gold_quad_pred <- forecast(gold.quad, h= nValid, level=95)
final.res <- gold.quad$residuals
quad.res.final <- forecast(final.res, h =nValid, level = 95)
gold_quad_plot <- autoplot(gold_quad_pred) +
autolayer(quad.res.final$mean + gold_quad_pred$mean, series ="Quadratic Forecast") +
labs(title = "Quadratic Forecast Improved Full", x = "Year", y = "Gold Price Per Ounce") +
guides(color = guide_legend(title = ""))
plotly::ggplotly(gold_quad_plot)
## Warning in geom2trace.default(dots[[1L]][[1L]], dots[[2L]][[1L]], dots[[3L]][[1L]]): geom_GeomForecast() has yet to be implemented in plotly.
## If you'd like to see this geom implemented,
## Please open an issue with your example code at
## https://github.com/ropensci/plotly/issues