Forecasting using Simple Moving Average Model, Exponential Smoothening and ARIMA(3,0,0)

Objectives

To use various forecasting algorithms to determine the best model for a time series that will be created using artificially generated data below:

Get Data

newData <-read.csv("~/Desktop/ALL_HW/Project3_practise/OKOILU RUTH.csv", header=TRUE)
head(newData)
##           X
## 1  84.46624
## 2 107.03100
## 3 104.85777
## 4  89.38064
## 5  93.43544
## 6  94.95788
summary(newData)
##        X         
##  Min.   : 67.80  
##  1st Qu.: 86.87  
##  Median : 90.91  
##  Mean   : 90.99  
##  3rd Qu.: 95.15  
##  Max.   :109.24
nrow(newData)
## [1] 2000
plot(newData[,1]) 

From the plot above, data looks random, no correlation, constant variance.

Install required packages

library(forecast)
## Warning: package 'forecast' was built under R version 3.4.2
library(ggplot2)
library(tseries)
newData_ts <- ts(newData)
plot(newData_ts)

### How to test if a time series is stationary? The data looks stationary at first glance. To confirm this, we’ll use the Augmented Dickey-Fuller Test (adf test). A p-Value of less than 0.05 in adf.test() indicates that it is stationary.

adf.test(newData_ts) # p-value < 0.05 indicates the TS is stationary
## Warning in adf.test(newData_ts): p-value smaller than printed p-value
## 
##  Augmented Dickey-Fuller Test
## 
## data:  newData_ts
## Dickey-Fuller = -12.045, Lag order = 12, p-value = 0.01
## alternative hypothesis: stationary

Partitioning dataset into two parts, the training set and the testing set. For this project training set = First 1500 rows and testing = Last 500 rows

training <- newData[1:1500,]
testing <- newData[1501:2000,]
training <- as.data.frame(training)
testing <-as.data.frame(testing)
nrow(training)
## [1] 1500
nrow(testing)
## [1] 500

Tasks

Task 1. Simple moving average model

sma

sma

I have developed a funcrion called D_sma() in R that implements the above formula.

library(Metrics)


D_sma <- function(x, m, df){ # Takes in x = 1, m = no of period
  # i = x:m
  i <- x:m
  result <- data.frame()
  for(t in 1:nrow(df)){
    check = t-i
    for(z in check){
      if(z<1){ 
        value = TRUE 
        break
      }else{value = FALSE}
    }
    if(value){
      result[t,1] <- NA
    }else{
      result[t,1]  <- sum(df[check,])/m
    }
  }
  return(result)
}

1.1 Applying the simple moving average model above to the training data set, for m = 1 to 30

m_one <- D_sma(1, 1, training)
m_two <- D_sma(1, 2, training)
m_three <- D_sma(1, 3, training)
m_four <- D_sma(1, 4, training)
m_five <- D_sma(1, 5, training)
m_six <- D_sma(1, 6, training)  
m_seven <- D_sma(1, 7, training)
m_eight <- D_sma(1, 8, training)
m_nine <- D_sma(1, 9, training)
m_ten <- D_sma(1, 10, training)
m_eleven <- D_sma(1, 11, training)
m_twelve <- D_sma(1, 12, training)
m_thirteen <- D_sma(1, 13, training)
m_fourteen <- D_sma(1, 14, training)
m_fifteen <- D_sma(1, 15, training)
m_sixteen <- D_sma(1, 16, training)  
m_seventeen <- D_sma(1, 17, training)
m_eighteen <- D_sma(1, 18, training)
m_nineteen <- D_sma(1, 19, training)
m_twenty <- D_sma(1, 20, training)
m_twenty_one <- D_sma(1, 21, training)
m_twenty_two <- D_sma(1, 22, training)
m_twenty_three <- D_sma(1, 23, training)
m_twenty_four <- D_sma(1, 24, training)
m_twenty_five <- D_sma(1, 25, training)
m_twenty_six <- D_sma(1, 26, training)
m_twenty_seven <- D_sma(1, 27, training)
m_twenty_eight <- D_sma(1, 28, training)
m_twenty_nine <- D_sma(1, 29, training)
m_thirty <- D_sma(1, 30, training)

1.2 Calculating the error, i.e., the difference between the predicted and original value in the training data set

calculate_error <- function(observed, predicted){
  return(observed - predicted)
}


err_one <- calculate_error(training, m_one)
err_two <- calculate_error(training, m_two)
err_three <- calculate_error(training, m_three)
err_four <- calculate_error(training, m_four)
err_five <- calculate_error(training, m_five)
err_six <- calculate_error(training, m_six)
err_seven <- calculate_error(training, m_seven)
err_eight <- calculate_error(training, m_eight)
err_nine <- calculate_error(training, m_nine)
err_ten <- calculate_error(training, m_ten)
err_eleven <- calculate_error(training, m_eleven)
err_twelve <- calculate_error(training, m_twelve)
err_thirteen <- calculate_error(training, m_thirteen)
err_fourteen <- calculate_error(training, m_fourteen)
err_fifteen <- calculate_error(training, m_fifteen)
err_sixteen <- calculate_error(training, m_sixteen)
err_seventeen <- calculate_error(training, m_seventeen)
err_eighteen <- calculate_error(training, m_eighteen)
err_nineteen <- calculate_error(training, m_nineteen)
err_twenty <- calculate_error(training, m_twenty)
err_twenty_one <- calculate_error(training, m_twenty_one)
err_twenty_two <- calculate_error(training, m_twenty_two)
err_twenty_three <- calculate_error(training, m_twenty_three)
err_twenty_four <- calculate_error(training, m_twenty_four)
err_twenty_five <- calculate_error(training, m_twenty_five)
err_twenty_six <- calculate_error(training, m_twenty_six)
err_twenty_seven <- calculate_error(training, m_twenty_seven)
err_twenty_eight <- calculate_error(training, m_twenty_eight)
err_twenty_nine <- calculate_error(training, m_twenty_nine)
err_thirty <- calculate_error(training, m_thirty)

Computing the the root mean squared error (RMSE)

calculate_rmse <- function(err_m){
  err_m <- err_m[!is.na(err_m),]
  sqrt(mean(err_m^2))
}

rmse_one <- calculate_rmse(err_one)
rmse_two <- calculate_rmse(err_two)
rmse_three <- calculate_rmse(err_three)
rmse_four <- calculate_rmse(err_four)
rmse_five <- calculate_rmse(err_five)
rmse_six <- calculate_rmse(err_six)
rmse_seven <- calculate_rmse(err_seven)
rmse_eight <- calculate_rmse(err_eight)
rmse_nine <- calculate_rmse(err_nine)
rmse_ten <- calculate_rmse(err_ten)
rmse_eleven <- calculate_rmse(err_eleven)
rmse_twelve <- calculate_rmse(err_twelve)
rmse_thirteen <- calculate_rmse(err_thirteen)
rmse_fourteen <- calculate_rmse(err_fourteen)
rmse_fifteen <- calculate_rmse(err_fifteen)
rmse_sixteen <- calculate_rmse(err_sixteen)
rmse_seventeen <- calculate_rmse(err_seventeen)
rmse_eighteen <- calculate_rmse(err_eighteen)
rmse_nineteen <- calculate_rmse(err_nineteen)
rmse_twenty <- calculate_rmse(err_twenty)
rmse_twenty_one <- calculate_rmse(err_twenty_one)
rmse_twenty_two <- calculate_rmse(err_twenty_two)
rmse_twenty_three <- calculate_rmse(err_twenty_three)
rmse_twenty_four <- calculate_rmse(err_twenty_four)
rmse_twenty_five <- calculate_rmse(err_twenty_five)
rmse_twenty_six <- calculate_rmse(err_twenty_six)
rmse_twenty_seven <- calculate_rmse(err_twenty_seven)
rmse_twenty_eight <- calculate_rmse(err_twenty_eight)
rmse_twenty_nine <- calculate_rmse(err_twenty_nine)
rmse_thirty <- calculate_rmse(err_thirty)

Plotting RMSE vs m and selecting m based on the lowest RMSE value.

m <- 1:30 

sma_rmse <- c(rmse_one,rmse_two, rmse_three, rmse_four, rmse_five, rmse_six, rmse_seven, rmse_eight, rmse_nine, rmse_ten, rmse_eleven,rmse_twelve, rmse_thirteen, rmse_fourteen, rmse_fifteen, rmse_sixteen, rmse_seventeen, rmse_eighteen, rmse_nineteen, rmse_twenty, rmse_twenty_one, rmse_twenty_two, rmse_twenty_three, rmse_twenty_four, rmse_twenty_five, rmse_twenty_six, rmse_twenty_seven, rmse_twenty_eight, rmse_twenty_nine, rmse_thirty)

metrics_m_df <- data.frame(m, sma_rmse)
metrics_m_df
##     m sma_rmse
## 1   1 8.394683
## 2   2 7.035626
## 3   3 7.140791
## 4   4 6.795451
## 5   5 6.658700
## 6   6 6.487049
## 7   7 6.413648
## 8   8 6.339398
## 9   9 6.298913
## 10 10 6.267145
## 11 11 6.248079
## 12 12 6.217875
## 13 13 6.200579
## 14 14 6.174304
## 15 15 6.162268
## 16 16 6.156685
## 17 17 6.149031
## 18 18 6.146965
## 19 19 6.139815
## 20 20 6.137478
## 21 21 6.126631
## 22 22 6.120475
## 23 23 6.111956
## 24 24 6.105540
## 25 25 6.102102
## 26 26 6.099789
## 27 27 6.090181
## 28 28 6.089180
## 29 29 6.076412
## 30 30 6.082634
plot(metrics_m_df, type="o")

From the above, “metric_m_df” table we can see that our best choices for m are 8,9 and 10.

m = 1 to 7 have high rmse and beyond 10 the rmse decreases very slowly. We have to choose between 8, 9, and 10 with rmse 6.339398, 6.298913 and 6.267145 respectively. Due to the fact that a larger m leads to slower computation and to ensure that our choice of m is optimal, we choose m = 8.

# Selected m

choice <- subset(metrics_m_df, metrics_m_df[,1] == 8)
choice
##   m sma_rmse
## 8 8 6.339398
# choice is m = 8
rmse_eight
## [1] 6.339398
#make m_eight time series data

m_eight_ts <- as.ts(m_eight, start=1, end = numeric(), frequency=1)

# plot m_eight for each period : 1500 periods in all
plot(m_eight_ts, main ="Predicted values for Simple Moving Averahe model with m = 8")

The plot above are the predicted values from the SMA model.

Plotting the predicted values against the original values, for the best value of m (m=8).

data_to_plot <- data.frame(training, m_eight)
colnames(data_to_plot) <- c("training", "m = 8")

# Plot training values vs predicted values
plot(data_to_plot)

Observations

We observe that the training values vs predicted values show no correlation. No positive or negative correlation. The values are simple random. This is not an impressive model. RMSE is 6.339398.

Task 2. Exponential Smoothing

2.1 Applying the exponential smoothing algorithm, to the training data set, for a given alpha (α).

Exponential Smoothing

Exponential Smoothing

2.2 Experimenting with different values of alpha to find out the best value for α.

I developed an R function called exponential_smoothing() to implement the exponential smoothing formula above.

exponential_smoothing <- function(series, alpha){
  result <- data.frame()
  a<- c(series[1,1], series[2,1],series[3,1])
  result[1,1] = mean(series[,1]) # first value is mean of all values of observed/series
  for(n in 2:nrow(series)){
    result[n,]<-(alpha * series[n-1,1] + (1 - alpha) * result[n-1,1])
  }
  return(result)
}

alpha_one <- exponential_smoothing(training, 0.1)
alpha_two <- exponential_smoothing(training, 0.2)
alpha_three <- exponential_smoothing(training, 0.3)
alpha_four <- exponential_smoothing(training, 0.4)
alpha_five <- exponential_smoothing(training, 0.5)
alpha_six <- exponential_smoothing(training, 0.6) 
alpha_seven <- exponential_smoothing(training, 0.7)
alpha_eight <- exponential_smoothing(training, 0.8)
alpha_nine <- exponential_smoothing(training, 0.9)

2.3 Selecting α based on the lowest RMSE value.

err_alpha_one <- calculate_error(training, alpha_one)
err_alpha_two <- calculate_error(training, alpha_two)
err_alpha_three <- calculate_error(training, alpha_three)
err_alpha_four <- calculate_error(training, alpha_four)
err_alpha_five <- calculate_error(training, alpha_five)
err_alpha_six <- calculate_error(training, alpha_six)
err_alpha_seven <- calculate_error(training, alpha_seven)
err_alpha_eight <- calculate_error(training, alpha_eight)
err_alpha_nine <- calculate_error(training, alpha_nine)


rmse_alpha_one <- calculate_rmse(err_one)
rmse_alpha_two <- calculate_rmse(err_two)
rmse_alpha_three <- calculate_rmse(err_three)
rmse_alpha_four <- calculate_rmse(err_four)
rmse_alpha_five <- calculate_rmse(err_five)
rmse_alpha_six <- calculate_rmse(err_six)
rmse_alpha_seven <- calculate_rmse(err_seven)
rmse_alpha_eight <- calculate_rmse(err_eight)
rmse_alpha_nine <- calculate_rmse(err_nine)

es_rmse <- c(rmse_alpha_one,rmse_alpha_two, rmse_alpha_three, rmse_alpha_four, rmse_alpha_five, rmse_alpha_six, rmse_alpha_seven, rmse_alpha_eight, rmse_alpha_nine)
alpha<- c(0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9)

From the above, our best value of alpha is the one with the lowest rmse

Plotting the predicted values against the original values, for the best value of α

metric_alpha_df <- data.frame(alpha, es_rmse)
metric_alpha_df
##   alpha  es_rmse
## 1   0.1 8.394683
## 2   0.2 7.035626
## 3   0.3 7.140791
## 4   0.4 6.795451
## 5   0.5 6.658700
## 6   0.6 6.487049
## 7   0.7 6.413648
## 8   0.8 6.339398
## 9   0.9 6.298913
plot(metric_alpha_df, type="o")

For the best value of α plot the predicted values against the original values. α = 0.9 has the lowest rmse of 6.298913

data_alpha_plot <- data.frame(training, alpha_nine)
colnames(data_alpha_plot) <- c("training", " α = 0.9")

# Plot training values vs predicted values
plot(data_alpha_plot)

Observations

Again we can see, the training values vs predicted values show no correlation. No positive or negative correlation. The values are simple random. This is not an impressive model. RMSE is 6.298913. This RMSE is slightly lower than that of SMA.

Task 3 AR(p)

3.1 Applying the autoregressive algorithm AR(p) to the training data set for a given value p

We’ll use the Arima(p,0,0) function in R for this experiment. We’ll run the PACF functions on the training set to determine P

3.2 Apply the ACF and PACF functions

par(mfrow = c(1,1))
acf.training = acf(training, main='ACF Plot', lag.max=100)

pacf.training = pacf(training, main='PACF Plot', lag.max=100)

From the acf, Clearly, the decay of ACF chart is very fast, which means that the population is stationary.

Three factors considered when choosing p

  1. From the PACF, we can spot 3 lines that go beyond the blue line, so best p = 3

  2. Checking Akaike information criterion (AICc) value for p values 1,2,3,4

# AR(1)
fit <- Arima(training, order = c(1,0,0))
summary(fit)
## Series: training 
## ARIMA(1,0,0) with non-zero mean 
## 
## Coefficients:
##          ar1     mean
##       0.0153  91.0180
## s.e.  0.0258   0.1568
## 
## sigma^2 estimated as 35.82:  log likelihood=-4811.18
## AIC=9628.37   AICc=9628.38   BIC=9644.31
## 
## Training set error measures:
##                        ME     RMSE      MAE        MPE     MAPE      MASE
## Training set 6.754022e-05 5.980577 4.817361 -0.4369323 5.335187 0.7132365
##                      ACF1
## Training set -0.001575673
# AR(2)
fit <- Arima(training, order = c(2,0,0))
summary(fit)
## Series: training 
## ARIMA(2,0,0) with non-zero mean 
## 
## Coefficients:
##          ar1     ar2     mean
##       0.0136  0.1030  91.0186
## s.e.  0.0257  0.0257   0.1738
## 
## sigma^2 estimated as 35.46:  log likelihood=-4803.22
## AIC=9614.44   AICc=9614.47   BIC=9635.7
## 
## Training set error measures:
##                         ME     RMSE      MAE        MPE     MAPE      MASE
## Training set -0.0006807599 5.948878 4.789425 -0.4331384 5.303664 0.7091004
##                    ACF1
## Training set 0.02351946
# AR(3)
fit <- Arima(training, order = c(3,0,0))
summary(fit)
## Series: training 
## ARIMA(3,0,0) with non-zero mean 
## 
## Coefficients:
##          ar1     ar2      ar3     mean
##       0.0374  0.1056  -0.2280  91.0150
## s.e.  0.0251  0.0251   0.0252   0.1379
## 
## sigma^2 estimated as 33.65:  log likelihood=-4763.51
## AIC=9537.01   AICc=9537.05   BIC=9563.58
## 
## Training set error measures:
##                       ME     RMSE      MAE       MPE     MAPE      MASE
## Training set 0.002615907 5.793125 4.631301 -0.407282 5.128272 0.6856893
##                     ACF1
## Training set 0.003546986
# AR(4)
fit <- Arima(training, order = c(4,0,0))
summary(fit)
## Series: training 
## ARIMA(4,0,0) with non-zero mean 
## 
## Coefficients:
##          ar1     ar2      ar3     ar4     mean
##       0.0413  0.1037  -0.2286  0.0170  91.0152
## s.e.  0.0258  0.0252   0.0253  0.0259   0.1403
## 
## sigma^2 estimated as 33.66:  log likelihood=-4763.29
## AIC=9538.58   AICc=9538.64   BIC=9570.46
## 
## Training set error measures:
##                       ME    RMSE      MAE        MPE    MAPE      MASE
## Training set 0.002536353 5.79229 4.629502 -0.4072681 5.12633 0.6854229
##                       ACF1
## Training set -0.0004810112

We fit an ARIMA(3,0,0) model along with variations including ARIMA(1,0,0), ARIMA(2,0,0), ARIMA(4,0,0). Of all these, the ARIMA(3,0,0) has a slightly smaller AICc value.

  1. Lastly, we also use auto.arima() Function in R
auto.arima(training)
## Series: training 
## ARIMA(3,0,0) with non-zero mean 
## 
## Coefficients:
##          ar1     ar2      ar3     mean
##       0.0374  0.1056  -0.2280  91.0150
## s.e.  0.0251  0.0251   0.0252   0.1379
## 
## sigma^2 estimated as 33.65:  log likelihood=-4763.51
## AIC=9537.01   AICc=9537.05   BIC=9563.58

This function in R provides the Arima combination that produces the least AIC. The above result confirms that p =3.

Apply the autoregressive algorithm AR(p) to the training data set for p = 3 i.e. AR(3)

fit1 <- Arima(training, c(3,0,0))
#Plotting the forecated series in red and the original training series in yellow below
plot(forecast(fit1), col="red")
lines(training, col = "yellow")

3.3 Plotting the predicted values against the original values

data_p_to_plot <- data.frame(training, fit1$fitted)
colnames(data_p_to_plot) <- c("training", "predicted values for p = 3")
plot(data_p_to_plot)

accuracy(fit1)
##                       ME     RMSE      MAE       MPE     MAPE      MASE
## Training set 0.002615907 5.793125 4.631301 -0.407282 5.128272 0.6856893
##                     ACF1
## Training set 0.003546986

Observations

Here we can see, the training values vs predicted values show a slight positive correlation. This is a better model relative to the SMA and Exponential smooting we saw earlier. RMSE is 5.793125. So far, it has the least RMSE.

We will now run these models on the test data to see how they perform

Task 4. Run all three models on the test data, and chose the best one.

From our experiments, the best models are:

For Simple Average Model, using our D_sma() function, best model is m = 8.

m_testing <- D_sma(1, 8, testing)
err_testing <- calculate_error(testing, m_testing)
rmse_testing <- calculate_rmse(err_testing)
rmse_testing
## [1] 6.619054

Plotting predicted values from simple moving average model against testing set

testing_df <- data.frame(testing, m_testing)
plot(testing_df)

Observations

As in the training set, the testing values vs predicted values show no correlation. No positive or negative correlation. The values are simple random. This is not an impressive model. RMSE is 6.619054 This RMSE is worse than when the model was used on the training set.

For Exponential Smoothing, best model is alpha = 0.9

alpha_testing <- exponential_smoothing(testing, 0.9)
err_testing <- calculate_error(testing, alpha_testing)
rmse_testing <- calculate_rmse(err_testing)
rmse_testing
## [1] 7.87134

Plotting predicted values from exponential smoothing model against testing set

testing_df <- data.frame(testing, alpha_testing)
plot(testing_df)

Observations

Here we can see, the testing values vs predicted values show a slight positive correlation. However the RMSE is 7.87134. Which is worse than when the model was used on the training set and worse than the SMA model on the testing set.

For Auroregressive model, best p = 3

fit2 <- Arima(testing, c(3,0,0))

#Plotting the forecated series in red and the original testing series in blue
plot(forecast(fit2), col="red")
lines(testing, col = "blue")

data_arima_plot <- data.frame(testing, fit2$fitted)
colnames(data_arima_plot) <- c("testing", "predicted values for p = 3")
plot(data_arima_plot)

accuracy(fit2)
##                       ME     RMSE      MAE        MPE     MAPE      MASE
## Training set 0.004016223 5.917087 4.738452 -0.4288647 5.272369 0.7116276
##                      ACF1
## Training set -0.001330702

Conclusion

Here we can see, the testing values vs predicted values show a positive correlation. This is the best model relative to the SMA and Exponential smooting for testing. RMSE is 5.917087. So far, it has the least RMSE.

Therefore, based on the RMSE and correlation between the predicted and observed values when we used the ARIMA(3,0,0) model, we conclude that the best model is the ARIMA model.