Moving Average Forecasting

The data used below is the historical price of gold from 30th August 2000 to 21st January 2022. The data had 5449 rows and 7 columns.

df <- read.csv("GOLD.csv")
dim(df)

## [1] 5449    7

head(df)

##            X GC.F.Open GC.F.High GC.F.Low GC.F.Close GC.F.Volume GC.F.Adjusted
## 1 2000-08-30     273.9     273.9    273.9      273.9           0         273.9
## 2 2000-08-31     274.8     278.3    274.8      278.3           0         278.3
## 3 2000-09-01     277.0     277.0    277.0      277.0           0         277.0
## 4 2000-09-04        NA        NA       NA         NA          NA            NA
## 5 2000-09-05     275.8     275.8    275.8      275.8           2         275.8
## 6 2000-09-06     274.2     274.2    274.2      274.2           0         274.2

The forecasting process will require a time-series object. In R, a time-series object can be made by using the ts() function from the “stats” library. In order to reduce the noise from the daily data, we’re going to aggregate the data per 3 months (quarterly).

library(forecast)

## Warning: package 'forecast' was built under R version 4.1.2

## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo

ts.data <- ts(df$GC.F.Close, start = c(2000, 08, 30), end = c(2022, 01, 21),
              frequency = 4)

length(ts.data)

## [1] 82

Through a simple visualization, we can see that the data contains multiple missing values. Therefore, the missing values require the linear interpolation imputation method from the “imputeTS” library.

## [1] "missing values: 3 observations"

library(imputeTS)

## Warning: package 'imputeTS' was built under R version 4.1.2

ts.data.imp <- na_interpolation(ts.data)

The functions below are defined to return forecasts and metrics for SMA (Simple Moving Average) and DMA (Double Moving Average).

sma.custom <- function(ts.data, m, h){
    library(TTR)
    library(Metrics)
    
    result <- list()
    
    smooth.sma <- SMA(ts.data, n = m)  # SMA smoothing
    forecast.sma <- c(NA, smooth.sma)  # SMA forecast
    
    bind <- cbind(data = c(ts.data, NA), smooth.sma = c(smooth.sma, NA),
                  forecast = forecast.sma)
    
    # SMA forecast for t' > max(t) + 1
    future <- cbind(data = rep(NA, h - 1), smooth.sma = rep(NA, h - 1),
                    rep(forecast.sma[length(forecast.sma)], h - 1))
    
    # Row bind and omit NA to evaluate metrics
    result$data <- rbind(bind, future)
    bind <- as.data.frame(na.omit(bind))
    
    # Forecast metrics evaluation (MAE, MSE, MAPE)
    df.metrics <- data.frame(
        c(mae(bind$data, bind$forecast),
          mse(bind$data, bind$forecast),
          mape(bind$data, bind$forecast))
    )
    
    rownames(df.metrics) <- c("MAE", "MSE", "MAPE")
    colnames(df.metrics) <- c(paste("SMA (m = ", m, ")", sep = ""))
    
    result$metrics <- df.metrics
    return(result)
}

dma.custom <- function(ts.data, m, h) {
    library(TTR)
    library(Metrics)
    
    result <- list()
    smooth.sma <- SMA(ts.data, n = m)  # SMA
    smooth.dma <- SMA(smooth.sma, n = m)  # DMA = SMA of SMA
    
    # DMA forecast formula
    At <- 2*smooth.sma - smooth.dma
    Bt <- 2/(m - 1)*(smooth.sma - smooth.dma)
    
    data.dma <- At + Bt  
    forecast.dma <- c(NA, smooth.dma)
    
    bind <- cbind(data = c(ts.data, NA), smooth.dma = c(smooth.dma, NA),
                  forecast = forecast.dma)
    
    # DMA forecast for t' > max(t) + 1
    f = c()
    for (i in 1:h) {
        f[i] = At[length(At)] + Bt[length(Bt)]*(i)
    }
    
    future <- cbind(data = rep(NA, h - 1), smooth.dma = rep(NA, h - 1), f[-1])
    
    # Row bind and omit NA to evaluate metrics
    result$data <- rbind(bind, future)
    bind <- as.data.frame(na.omit(bind))
    
    df.metrics <- data.frame(
        c(mae(bind$data, bind$forecast),
          mse(bind$data, bind$forecast),
          mape(bind$data, bind$forecast))
    )
    
    # Forecast metrics evaluation (MAE, MSE, MAPE)
    rownames(df.metrics) <- c("MAE", "MSE", "MAPE")
    colnames(df.metrics) <- c(paste("DMA (m = ", m, ")", sep = ""))
    result$metrics <- df.metrics
    
    return(result)
}

Parameter tuning for both SMA and DMA. We will test from m = 2 to m = 4 in order to find the best value for m that will get the best metrics.

sma.2 <- sma.custom(ts.data.imp, m = 2, h = 3); sma.2$metrics

## Warning: package 'TTR' was built under R version 4.1.2

## Warning: package 'Metrics' was built under R version 4.1.2

## 
## Attaching package: 'Metrics'

## The following object is masked from 'package:forecast':
## 
##     accuracy

##      SMA (m = 2)
## MAE  1.258751088
## MSE  2.826194186
## MAPE 0.004645325

sma.3 <- sma.custom(ts.data.imp, m = 3, h = 3); sma.3$metrics

##      SMA (m = 3)
## MAE  1.395640729
## MSE  3.528897165
## MAPE 0.005152418

sma.4 <- sma.custom(ts.data.imp, m = 4, h = 3); sma.4$metrics

##      SMA (m = 4)
## MAE   1.55801380
## MSE   4.23705134
## MAPE  0.00575234

dma.2 <- dma.custom(ts.data.imp, m = 2, h = 3); dma.2$metrics

##      DMA (m = 2)
## MAE  1.438819420
## MSE  3.570693717
## MAPE 0.005311861

dma.3 <- dma.custom(ts.data.imp, m = 3, h = 3); dma.3$metrics

##      DMA (m = 3)
## MAE  1.765850299
## MSE  5.225288425
## MAPE 0.006521221

dma.4 <- dma.custom(ts.data.imp, m = 4, h = 3); dma.4$metrics

##      DMA (m = 4)
## MAE  1.959334743
## MSE  6.367467435
## MAPE 0.007243708

Best metrics for both SMA and DMA is using m = 2. Forecast projection are plotted below.

Moving Average Forecasting

Perisai Zidane Hanapi

1/28/2022