Load in Gold Data

library(readxl)

## Warning: package 'readxl' was built under R version 4.4.2

library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(forecast)

## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo

library(ggplot2)

## Warning: package 'ggplot2' was built under R version 4.4.2

library(plotly)

## 
## Attaching package: 'plotly'

## The following object is masked from 'package:ggplot2':
## 
##     last_plot

## The following object is masked from 'package:stats':
## 
##     filter

## The following object is masked from 'package:graphics':
## 
##     layout

file_path <- "C:/Users/james/Downloads/Gold_Prices.xlsx"

sheet_names <- c('Yearly_Avg', 'Quarterly_Avg', 'Monthly_Avg')

# Read each sheet into a separate data frame
Yearly_Avg <- read_excel(file_path, sheet = sheet_names[1]) %>%
  rename(Date = 1)

## New names:
## • `` -> `...1`

Quarterly_Avg <- read_excel(file_path, sheet = sheet_names[2]) %>%
  rename(Date = 1)

## New names:
## • `` -> `...1`

Monthly_Avg <- read_excel(file_path, sheet = sheet_names[3]) %>%
  rename(Date = 1)

## New names:
## • `` -> `...1`

Clean Data

# Convert to Just US Currency
Yearly_Avg_USD <- Yearly_Avg[, 1:2]
Quarterly_Avg_USD <- Quarterly_Avg[, 1:2] 
Monthly_Avg_USD <- Monthly_Avg[, 1:2]

Create Timeseries for Monthly data

Gold.ts <- ts(Monthly_Avg_USD$USD, start = c(1978, 1), end = c(2025, 1), freq = 12)

# Plot
plot(Gold.ts, main = "Monthly Average USD Time Series",
     xlab = "Year", ylab = "USD",
     col = "blue", lwd = 2)

Set Test & Validation Data

nValid <- 113


nTrain <- length(Gold.ts) - nValid

train.ts<- window(Gold.ts, end = c(2015,8))

valid.ts <- window(Gold.ts, start=c(2015, 9), end = c(2025, 1))

##ACF Plot

diff.gold.ts <- diff(Gold.ts)
Acf(diff.gold.ts)

##Difference Plot

plot.ts(diff(Monthly_Avg_USD$USD))

##Models

##Polynomial Models

Linear Trend

linear_mod <- tslm(train.ts ~ trend)
summary(linear_mod)

## 
## Call:
## tslm(formula = train.ts ~ trend)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -414.60 -238.08  -16.77  159.31  849.91 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 120.6223    27.5114   4.384 1.45e-05 ***
## trend         1.9786     0.1052  18.799  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 292 on 450 degrees of freedom
## Multiple R-squared:  0.4399, Adjusted R-squared:  0.4386 
## F-statistic: 353.4 on 1 and 450 DF,  p-value: < 2.2e-16

#print(linear_mod$fitted.values)

linear_mod_pred <- forecast(linear_mod, h= nValid, level = 90)
#print(linear_mod_pred$mean)

# ggplot2 + forecast package 
autoplot(linear_mod_pred) +
  autolayer(linear_mod_pred$mean, series ="Linear Forecast") +
  autolayer(linear_mod$fitted.values, series = "Linear Fit") + 
  autolayer(valid.ts, series = "Observed") + 
  xlab("Year") + ylab("Gold Price Per Ounce") + 
  guides(color = guide_legend(title = ""))

linear.res <- linear_mod$residuals
acf(linear.res, lag.max = 12)

##Exponential Trend

exp_mod <- tslm(train.ts ~ trend , lambda = 0) #lambda = 0 indicates exponential trend
summary(exp_mod)

## 
## Call:
## tslm(formula = train.ts ~ trend, lambda = 0)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.75205 -0.30957 -0.01562  0.29730  0.89712 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 5.5813940  0.0390323  142.99   <2e-16 ***
## trend       0.0026185  0.0001493   17.54   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4142 on 450 degrees of freedom
## Multiple R-squared:  0.4059, Adjusted R-squared:  0.4046 
## F-statistic: 307.5 on 1 and 450 DF,  p-value: < 2.2e-16

exp_mod_pred <- forecast(exp_mod, h= nValid, level=0)

autoplot(exp_mod_pred) +
  autolayer(exp_mod_pred$mean, series="Exponential Forecast") + 
  autolayer(exp_mod$fitted.values, series = 'Exponential Fit') +
  autolayer(valid.ts, series = "Observed") + 
  xlab("Year") + ylab("Gold Price Per Ounce") + 
  guides(color = guide_legend(title = ""))

exp.res <- exp_mod$residuals
acf(exp.res, lag.max = 12)

##Quadratic Trend Model

quad_mod <- tslm(train.ts ~ trend + I(trend^2))
summary(quad_mod)

## 
## Call:
## tslm(formula = train.ts ~ trend + I(trend^2))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -427.50 -132.08  -10.55  113.34  625.01 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  6.402e+02  2.527e+01   25.33   <2e-16 ***
## trend       -4.888e+00  2.577e-01  -18.97   <2e-16 ***
## I(trend^2)   1.516e-02  5.508e-04   27.52   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 178.3 on 449 degrees of freedom
## Multiple R-squared:  0.7915, Adjusted R-squared:  0.7906 
## F-statistic: 852.4 on 2 and 449 DF,  p-value: < 2.2e-16

quad_mod_pred <- forecast(quad_mod, h= nValid, level=90)

Quad_mod_plot <- autoplot(quad_mod_pred) +
  autolayer(quad_mod_pred$mean, series ="Quadratic Forecast") +
  autolayer(quad_mod$fitted.values, series = "Quadratic Fit") +
  autolayer(valid.ts, series = "Observed") + 
  xlab("Year") + ylab("Gold Price Per Ounce") + 
  guides(color = guide_legend(title = ""))

plotly::ggplotly(Quad_mod_plot)

## Warning in geom2trace.default(dots[[1L]][[1L]], dots[[2L]][[1L]], dots[[3L]][[1L]]): geom_GeomForecast() has yet to be implemented in plotly.
##   If you'd like to see this geom implemented,
##   Please open an issue with your example code at
##   https://github.com/ropensci/plotly/issues

##Residuals

quad.res <- quad_mod$residuals
acf(quad.res, lag.max = 12)

##AR1 on Residuals to Improve Quadratic Trend

train.res.ar.1 <- Arima(quad.res, order = c(1, 0, 0))


summary(train.res.ar.1)

## Series: quad.res 
## ARIMA(1,0,0) with non-zero mean 
## 
## Coefficients:
##          ar1       mean
##       0.9917  -133.7298
## s.e.  0.0061   150.8811
## 
## sigma^2 = 933.6:  log likelihood = -2188.04
## AIC=4382.08   AICc=4382.13   BIC=4394.42
## 
## Training set error measures:
##                    ME     RMSE      MAE      MPE     MAPE      MASE      ACF1
## Training set 1.070233 30.48752 18.68111 11.42805 34.42235 0.2180712 0.1973301

quad.res.for <- forecast(quad.res, h =nValid, level = 90)

autoplot(quad.res)+
  autolayer(quad.res.for$fitted)

autoplot(quad_mod_pred)+
  autolayer(quad.res.for$mean + quad_mod_pred$mean, series = "Improved Quad. Trend")+
  autolayer(valid.ts, series = "Observed") + 
  xlab("Year") + ylab("Gold Price Per Ounce") + 
  guides(color = guide_legend(title = ""))

acf(train.res.ar.1$residuals, lag.max = 12)

##Additive Seasonality

season_mod <- tslm(train.ts ~ season)
summary(season_mod)

## 
## Call:
## tslm(formula = train.ts ~ season)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -359.58 -229.77 -178.26   59.82 1209.34 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 567.4113    63.9951   8.866   <2e-16 ***
## season2      11.2774    90.5028   0.125    0.901    
## season3       6.8939    90.5028   0.076    0.939    
## season4      -1.9242    90.5028  -0.021    0.983    
## season5      -0.8937    90.5028  -0.010    0.992    
## season6      -3.8403    90.5028  -0.042    0.966    
## season7      -3.9671    90.5028  -0.044    0.965    
## season8       2.2255    90.5028   0.025    0.980    
## season9      -4.9027    91.1122  -0.054    0.957    
## season10      1.3754    91.1122   0.015    0.988    
## season11      6.4481    91.1122   0.071    0.944    
## season12      3.5981    91.1122   0.039    0.969    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 394.5 on 440 degrees of freedom
## Multiple R-squared:  0.0001504,  Adjusted R-squared:  -0.02485 
## F-statistic: 0.006019 on 11 and 440 DF,  p-value: 1

season_mod_pred <- forecast(season_mod, h= nValid, level=0)


autoplot(season_mod_pred) +
  autolayer(season_mod_pred$mean, series ="Seasonality Forecast") +
  autolayer(valid.ts, series = "Observed") + 
  autolayer(season_mod$fitted.values, series = "Seasonality Fit") +
  guides(color = guide_legend(title = ""))

season.res <- season_mod$residuals
acf(season.res, lag.max = 12)

gold_reg_plot <- autoplot(season_mod_pred) +
  autolayer(season_mod_pred$mean, series ="Seasonality Forecast") +
  autolayer(quad_mod_pred$mean, series ="Quadratic Forecast") +
  autolayer(exp_mod_pred$mean, series="Exponential Forecast") +
  autolayer(linear_mod_pred$mean, series = "Linear Forecast") + 
  autolayer(quad_mod_pred$fitted, series = "Quadratic Fit")+
  autolayer(valid.ts, series = "Observed") + 
  labs(title = "Regression Based Model Forecasts", x = "Year", y = "Gold Price Per Ounce") + 
  guides(color = guide_legend(title = ""))

plotly::ggplotly(gold_reg_plot)

## Warning in geom2trace.default(dots[[1L]][[1L]], dots[[2L]][[1L]], dots[[3L]][[1L]]): geom_GeomForecast() has yet to be implemented in plotly.
##   If you'd like to see this geom implemented,
##   Please open an issue with your example code at
##   https://github.com/ropensci/plotly/issues

##Residual Plot

autoplot(season_mod_pred$residuals) + 
  autolayer(season_mod_pred$residuals, series = "Training residuals") + 
  autolayer(valid.ts - season_mod_pred$mean, series = "Validation residuals")

##Holt Winters ZZZ Model

gold.ets <- ets(train.ts, model = "ZZZ")

gold.ets.forecast <- forecast(gold.ets, h = nValid, level = 0)

autoplot(gold.ets.forecast) + 
  autolayer(gold.ets.forecast$mean, series = "ETS Forecast")+
  autolayer(valid.ts, series = "Observed")+
  guides(color = guide_legend(title = ""))

ets.res <- gold.ets$residuals
acf(ets.res, lag.max = 12)

Auto Arima

gold.arima <- auto.arima(train.ts)
summary(gold.arima)

## Series: train.ts 
## ARIMA(2,1,2)(2,0,0)[12] 
## 
## Coefficients:
##          ar1     ar2      ma1      ma2    sar1     sar2
##       0.3631  0.5549  -0.1334  -0.7133  0.0074  -0.0041
## s.e.  0.1205  0.1022   0.1044   0.0825  0.0494   0.0536
## 
## sigma^2 = 870:  log likelihood = -2163.31
## AIC=4340.63   AICc=4340.88   BIC=4369.41
## 
## Training set error measures:
##                    ME     RMSE      MAE       MPE     MAPE      MASE       ACF1
## Training set 0.946834 29.26585 18.08275 0.1589808 3.084795 0.2084232 -0.0042125

gold.arima.forecast <- forecast(gold.arima, h = nValid, level = 0)

gold_arima_plot <- autoplot(gold.arima.forecast)+
  autolayer(valid.ts, series = "Observed")+
  autolayer(gold.arima.forecast$mean, series ="ARIMA Forecast") +
  xlab("Time") + ylab("Gold Price per Ounce") +
  guides(color = guide_legend(title = ""))

plotly::ggplotly(gold_arima_plot)

## Warning in geom2trace.default(dots[[1L]][[1L]], dots[[2L]][[1L]], dots[[3L]][[1L]]): geom_GeomForecast() has yet to be implemented in plotly.
##   If you'd like to see this geom implemented,
##   Please open an issue with your example code at
##   https://github.com/ropensci/plotly/issues

train.res.from.arima <- gold.arima$residuals

arima_res <- diff(train.res.from.arima)

acf(arima_res, lag.max = 12)

NN model

p <- 10 # Number of previous time steps used for forecast
  
P <- 8 # Number of previous seasonal values to use
  
size <- 7 # Number of hidden nodes



gold.nnetar <- nnetar(train.ts, repeats =20, p = p, P = P, size = size)

gold.nnetar.forecast <- forecast(gold.nnetar, h = nValid)

autoplot(gold.nnetar.forecast) +
  autolayer(valid.ts, series = "Observed")+
  autolayer(gold.nnetar.forecast$mean, series ="NN Forecast") +
  autolayer(gold.nnetar.forecast$fitted, series = "NN Fit")+
  guides(color = guide_legend(title = ""))

## Warning: Removed 96 rows containing missing values or values outside the scale range
## (`geom_line()`).

##Naive

gold.naive <- naive(train.ts, h = nValid, level = 0)

gold_naive_mod_pred <- forecast(gold.naive, h = nValid)

gold_naive <- autoplot(gold_naive_mod_pred) +
  autolayer(valid.ts, series = "Observed")+
  autolayer(gold_naive_mod_pred$mean, series ="Naive Forecast") +
  xlab("Time") + ylab("Gold Price per Ounce") +
  guides(color = guide_legend(title = ""))

plotly::ggplotly(gold_naive)

## Warning in geom2trace.default(dots[[1L]][[1L]], dots[[2L]][[1L]], dots[[3L]][[1L]]): geom_GeomForecast() has yet to be implemented in plotly.
##   If you'd like to see this geom implemented,
##   Please open an issue with your example code at
##   https://github.com/ropensci/plotly/issues

##Holt Winters

gold.holt.mod <- ets(train.ts, model = "AAN")
summary(gold.holt.mod)

## ETS(A,Ad,N) 
## 
## Call:
## ets(y = train.ts, model = "AAN")
## 
##   Smoothing parameters:
##     alpha = 0.9999 
##     beta  = 0.0507 
##     phi   = 0.9352 
## 
##   Initial states:
##     l = 189.4989 
##     b = 26.3622 
## 
##   sigma:  30.1581
## 
##      AIC     AICc      BIC 
## 5849.789 5849.978 5874.471 
## 
## Training set error measures:
##                     ME     RMSE      MAE        MPE     MAPE      MASE     ACF1
## Training set 0.5904294 29.99079 18.19971 0.01909812 3.084645 0.2097713 0.131731

gold.holt.pred <- forecast(gold.holt.mod, h = nValid, level = 0)

autoplot(gold.holt.pred)+
  autolayer(valid.ts, series = "Observed")+
  autolayer(gold.holt.pred$mean, series = "Holt Winter Forecast") +
  guides(color = guide_legend(title = ""))

holt.res <- gold.holt.mod$residuals
acf(holt.res, lag.max = 12)

dual_holt_plot <- autoplot(gold.holt.pred)+
  autolayer(valid.ts, series = "Observed")+
  autolayer(gold.holt.pred$mean, series = "Holt Winter (AAN) Forecast") +
  autolayer(gold.ets.forecast$mean, series = "Holt Winter (ZZZ) Forecast" )+
  labs(title = "Forecasts of Holt Winters Models", x = "Year", y = "Price of Gold per Ounce")+
  guides(color = guide_legend(title = ""))

plotly:: ggplotly(dual_holt_plot)

## Warning in geom2trace.default(dots[[1L]][[1L]], dots[[2L]][[1L]], dots[[3L]][[1L]]): geom_GeomForecast() has yet to be implemented in plotly.
##   If you'd like to see this geom implemented,
##   Please open an issue with your example code at
##   https://github.com/ropensci/plotly/issues

##Moving Average

library(zoo)

## 
## Attaching package: 'zoo'

## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric

ma.trailing <- rollmean(train.ts, k = 12, align = "right")
last.ma <- tail(ma.trailing, 1)

ma.trailing.pred <- ts(rep(last.ma, nValid), start = c(1978, nTrain + 1), end = c(1978, nTrain + nValid), freq = 12)

ma_plot <- autoplot(Gold.ts)+
  autolayer(ma.trailing, series = "Moving Average") +
  autolayer(ma.trailing.pred, series = "MA Forecast")+
  xlab("Time") + ylab("Gold Price per Ounce") +
  ggtitle("Moving Average Forecast")

plotly::ggplotly(ma_plot)

##Polynomial Performance

# Model 1: Linear Trend Model (linear_mod)
linear_mod_acc <- accuracy(linear_mod_pred$fitted, train.ts)
print(linear_mod_acc)

##                     ME     RMSE      MAE       MPE    MAPE      ACF1 Theil's U
## Test set -6.443822e-14 291.3189 234.1382 -19.55764 50.3282 0.9943422  13.09904

# Model 2: Exponential Trend Model (exp_model)
exp_mod_acc <- accuracy(exp_mod_pred$fitted, train.ts)
print(exp_mod_acc)

##                ME    RMSE      MAE       MPE     MAPE      ACF1 Theil's U
## Test set 59.90031 290.955 200.9366 -8.726191 35.37218 0.9936324  9.162622

# Model 3: Quadratic Trend Model (quad_mod)
quad_mod_acc <- accuracy(quad_mod_pred$fitted, train.ts)
print(quad_mod_acc)

##                     ME     RMSE      MAE       MPE     MAPE      ACF1 Theil's U
## Test set -7.867478e-15 177.7268 139.9698 -6.876854 29.77414 0.9729729  7.736005

# Model 4: Additive Seasonality (season_mod)
season_mod_acc <- accuracy(season_mod_pred$fitted, train.ts)
print(season_mod_acc)

##                    ME     RMSE      MAE       MPE     MAPE      ACF1 Theil's U
## Test set 7.215222e-15 389.2207 299.2125 -33.14014 56.48047 0.9938479  12.68431

##Improved Quadratic Trend With ARIMA

quad_mod_improved <- accuracy(quad.res.for$mean + quad_mod_pred$mean, valid.ts)
print(quad_mod_improved)

##                ME     RMSE      MAE      MPE     MAPE      ACF1 Theil's U
## Test set 84.19701 173.0776 123.0699 4.157082 6.947738 0.8982532  2.730262

##Naive Model Performance

naive_mod_acc <- accuracy(gold_naive_mod_pred$mean, valid.ts)
print(naive_mod_acc)

##                ME     RMSE      MAE      MPE     MAPE      ACF1 Theil's U
## Test set 524.5275 659.1947 526.3167 28.08148 28.24723 0.9518827  10.17317

ARIMA Model Performance:

arima_mod_acc <- accuracy(gold.arima.forecast$mean, valid.ts)
print(arima_mod_acc)

##                ME     RMSE      MAE      MPE     MAPE     ACF1 Theil's U
## Test set 597.6932 726.8177 598.7808 32.55612 32.65718 0.952131  11.41513

##Holt Winter Model Performance:

gold_holt_acc <- accuracy(gold.holt.pred$mean, valid.ts)
print(gold_holt_acc)

##                ME     RMSE      MAE      MPE     MAPE      ACF1 Theil's U
## Test set 598.8837 726.0852 599.6881 32.67957 32.75444 0.9518448  11.41632

Neural Net Model Performance

nn_acc <- accuracy(gold.nnetar.forecast$mean, valid.ts)
print(nn_acc)

##                ME     RMSE      MAE      MPE     MAPE      ACF1 Theil's U
## Test set 420.9287 551.8818 424.3771 22.15393 22.47207 0.9442189  8.307124

ETS ZZZ Model Performance

ets_acc <- accuracy(gold.ets.forecast$mean, valid.ts)
print(ets_acc)

##                ME     RMSE      MAE      MPE     MAPE      ACF1 Theil's U
## Test set 1049.727 1254.146 1050.019 57.73315 57.76037 0.9629774  20.04131

Moving Average

ma_acc <- accuracy(ma.trailing.pred, valid.ts)
print(ma_acc)

##                ME     RMSE      MAE      MPE     MAPE      ACF1 Theil's U
## Test set 448.3692 600.3694 456.7989 23.18005 23.94663 0.9518827  9.088776

##Forecasting Using Improved Quadratic Model on Entire Dataset:

gold.quad <- tslm(Gold.ts ~ trend + I(trend^2))

gold_quad_pred <- forecast(gold.quad, h= nValid, level=95)

final.res <- gold.quad$residuals
  
quad.res.final <- forecast(final.res, h =nValid, level = 95)

gold_quad_plot <- autoplot(gold_quad_pred) +
  autolayer(quad.res.final$mean + gold_quad_pred$mean, series ="Quadratic Forecast") +
  labs(title = "Quadratic Forecast Improved Full", x = "Year", y = "Gold Price Per Ounce") + 
  guides(color = guide_legend(title = ""))

plotly::ggplotly(gold_quad_plot)

## Warning in geom2trace.default(dots[[1L]][[1L]], dots[[2L]][[1L]], dots[[3L]][[1L]]): geom_GeomForecast() has yet to be implemented in plotly.
##   If you'd like to see this geom implemented,
##   Please open an issue with your example code at
##   https://github.com/ropensci/plotly/issues

Gold_Project

James DiLuca, Matt Gutierrez, Mike Joyce

2025-02-18