# Clearing out the R environment
rm(list=ls())

# Uploading required libraries
library(TSA)
## 
## Attaching package: 'TSA'
## The following objects are masked from 'package:stats':
## 
##     acf, arima
## The following object is masked from 'package:utils':
## 
##     tar
library(tseries)
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
library(readr)
## 
## Attaching package: 'readr'
## The following object is masked from 'package:TSA':
## 
##     spec
sort.score <- function(x, score = c("bic", "aic")){
  if (score == "aic"){
    x[with(x, order(AIC)),]
  } else if (score == "bic") {
    x[with(x, order(BIC)),]
  } else {
    warning('score = "x" only accepts valid arguments ("aic","bic")')
  }
}
residual.analysis <- function(model, std = TRUE,start = 2, class = c("ARIMA","GARCH","ARMA-GARCH", "fGARCH")[1]){
  library(TSA)
  library(FitAR)
  if (class == "ARIMA"){
    if (std == TRUE){
      res.model = rstandard(model)
    }else{
      res.model = residuals(model)
    }
  }else if (class == "GARCH"){
    res.model = model$residuals[start:model$n.used]
  }else if (class == "ARMA-GARCH"){
    res.model = model@fit$residuals
  }else if (class == "fGARCH"){
    res.model = model@residuals
  }else {
    stop("The argument 'class' must be either 'ARIMA' or 'GARCH' ")
  }
  par(mfrow=c(3,2))
  plot(res.model,type='o',ylab='Standardised residuals', main="Time series plot of standardised residuals")
  abline(h=0)
  hist(res.model,main="Histogram of standardised residuals")
  qqnorm(res.model,main="QQ plot of standardised residuals")
  qqline(res.model, col = 2)
  acf(res.model,main="ACF of standardised residuals")
  print(shapiro.test(res.model))
  k=0
  LBQPlot(res.model, lag.max = 30, StartLag = k + 1, k = 0, SquaredQ = FALSE)
  par(mfrow=c(1,1))
}

# Loading Data set
df <- read_csv("1990-2021.csv", col_select = c('Date','United States(USD)'))
## Rows: 379 Columns: 2
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): Date
## dbl (1): United States(USD)
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Creating TS object
gold <- ts(df$`United States(USD)`,start = 1990, frequency = 12)

#Descriptive Analysis
plot (gold, ylab = 'Gold Price (USD)', xlab = 'Year', type = 'o', main = "Time Series Plot of monthly Gold Prices in USD (1990-2021)")

# Stationary test
par(mfrow=c(1,2))
acf(gold, main = 'ACF for Gold Prices (USD)')
pacf(gold, main = 'PACF for Gold Prices (USD)')

par(mfrow=c(1,1))
adf.test(gold)
## 
##  Augmented Dickey-Fuller Test
## 
## data:  gold
## Dickey-Fuller = -1.9984, Lag order = 7, p-value = 0.5778
## alternative hypothesis: stationary
pp.test(gold)
## 
##  Phillips-Perron Unit Root Test
## 
## data:  gold
## Dickey-Fuller Z(alpha) = -5.9862, Truncation lag parameter = 5, p-value
## = 0.7752
## alternative hypothesis: stationary
kpss.test(gold)
## Warning in kpss.test(gold): p-value smaller than printed p-value
## 
##  KPSS Test for Level Stationarity
## 
## data:  gold
## KPSS Level = 5.354, Truncation lag parameter = 5, p-value = 0.01
summary(gold)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   254.8   355.2   473.3   793.5  1258.0  1964.9
# Looking for normality 
qqnorm(y=gold, main = 'QQ plot for Gold Prices (USD)')
qqline(y=gold, col = 2, lwd = 1, lty = 2)

shapiro.test(gold)
## 
##  Shapiro-Wilk normality test
## 
## data:  gold
## W = 0.84341, p-value < 2.2e-16
# Creating scatter plot to observe correlation
plot(y=gold,x=zlag(gold),ylab = 'Gold Price (USD)', xlab = 'Previous Month Gold Price (USD)',main = "Scatter Plot of Gold Prices in USD")

# Calculating correlation
y = gold
x = zlag(gold)
index = 2:length(x)
cor(y[index],x[index])
## [1] 0.9956545
# Dealing With Non-Stationary

BC <- BoxCox.ar(gold)

BC$ci
## [1] -0.3 -0.2
lambda <- BC$lambda[which(max(BC$loglike) == BC$loglike)]
lambda
## [1] -0.2
BC.gold = ((gold^lambda)-1)/lambda
gold.BC <- diff(BC.gold, differences = 1)
plot (gold.BC, ylab = 'Gold Price (USD)', xlab = 'Year', type = 'o', main = "Time Series Plot of  Box-Cox tranformaed Gold Prices after first difference")

par(mfrow=c(1,2))
acf(gold.BC, main = 'ACF for transformed Gold Prices after 
    1st differennce')
pacf(gold.BC, main = 'PACF for transformed Gold Prices after 
     1st differennce')

par(mfrow=c(1,1))
adf.test(gold.BC)
## Warning in adf.test(gold.BC): p-value smaller than printed p-value
## 
##  Augmented Dickey-Fuller Test
## 
## data:  gold.BC
## Dickey-Fuller = -6.3917, Lag order = 7, p-value = 0.01
## alternative hypothesis: stationary
pp.test(gold.BC)
## Warning in pp.test(gold.BC): p-value smaller than printed p-value
## 
##  Phillips-Perron Unit Root Test
## 
## data:  gold.BC
## Dickey-Fuller Z(alpha) = -400.1, Truncation lag parameter = 5, p-value
## = 0.01
## alternative hypothesis: stationary
kpss.test(gold.BC)
## 
##  KPSS Test for Level Stationarity
## 
## data:  gold.BC
## KPSS Level = 0.38443, Truncation lag parameter = 5, p-value = 0.08387
qqnorm(y=gold.BC, main = 'QQ plot for Box-Cox tranformed Gold Prices after first difference')
qqline(y=gold.BC, col = 2, lwd = 1, lty = 2)

shapiro.test(gold.BC)
## 
##  Shapiro-Wilk normality test
## 
## data:  gold.BC
## W = 0.98932, p-value = 0.007365
gold.BC2 <- diff(BC.gold, differences = 2)
plot (gold.BC2, ylab = 'Gold Price (USD)', xlab = 'Year', type = 'o', main = "Time Series Plot of  Box-Cox tranformaed Gold Prices after second difference")

par(mfrow=c(1,2))
acf(gold.BC2, main = 'ACF for transformed Gold Prices after 
    2nd differennce')
pacf(gold.BC2, main = 'PACF for transformed Gold Prices after 
     2nd differennce')

par(mfrow=c(1,1))
adf.test(gold.BC2)
## Warning in adf.test(gold.BC2): p-value smaller than printed p-value
## 
##  Augmented Dickey-Fuller Test
## 
## data:  gold.BC2
## Dickey-Fuller = -11.393, Lag order = 7, p-value = 0.01
## alternative hypothesis: stationary
pp.test(gold.BC2)
## Warning in pp.test(gold.BC2): p-value smaller than printed p-value
## 
##  Phillips-Perron Unit Root Test
## 
## data:  gold.BC2
## Dickey-Fuller Z(alpha) = -470.92, Truncation lag parameter = 5, p-value
## = 0.01
## alternative hypothesis: stationary
kpss.test(gold.BC2)
## Warning in kpss.test(gold.BC2): p-value greater than printed p-value
## 
##  KPSS Test for Level Stationarity
## 
## data:  gold.BC2
## KPSS Level = 0.0087148, Truncation lag parameter = 5, p-value = 0.1
qqnorm(y=gold.BC2, main = 'QQ plot for Box-Cox tranformed Gold Prices after second difference')
qqline(y=gold.BC2, col = 2, lwd = 1, lty = 2)

shapiro.test(gold.BC2)
## 
##  Shapiro-Wilk normality test
## 
## data:  gold.BC2
## W = 0.98222, p-value = 0.0001345
# Thus use gold.BC because not much improvement in gold2 plus box cox was useless
eacf(gold.BC)
## AR/MA
##   0 1 2 3 4 5 6 7 8 9 10 11 12 13
## 0 x o o o o o o o o o o  o  o  o 
## 1 x o o o o o o o o o o  o  o  o 
## 2 x x o o o o o o o o o  x  o  o 
## 3 x x o o o o o o o o o  o  o  o 
## 4 x x x x o o o o o o o  o  o  o 
## 5 x o o x o o o o o o o  o  o  o 
## 6 x o o o x x o o o o o  o  o  o 
## 7 x o o o x x o o o o o  o  o  o
par(mfrow=c(1,1))
res = armasubsets(y=gold.BC,nar=5,nma=5,y.name='p',ar.method='ols')
## Warning in leaps.setup(x, y, wt = wt, nbest = nbest, nvmax = nvmax, force.in =
## force.in, : 3 linear dependencies found
## Reordering variables and trying again:
plot(res)

McLeod.Li.test(y=gold.BC, main="McLeod-Li test statistics for first differenced Gold prices")

#ARIMA Modeling 

# ARIMA(0,1,1)
model_011_css = arima(gold,order=c(0,1,1),method='CSS')
lmtest::coeftest(model_011_css)
## 
## z test of coefficients:
## 
##      Estimate Std. Error z value Pr(>|z|)   
## ma1 -0.149015   0.052638 -2.8309 0.004641 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
model_011_ml = arima(gold,order=c(0,1,1),method='ML')
lmtest::coeftest(model_011_ml)
## 
## z test of coefficients:
## 
##      Estimate Std. Error z value Pr(>|z|)   
## ma1 -0.148643   0.052591 -2.8264 0.004708 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# ARIMA(0,1,2)
model_012_css = arima(gold,order=c(0,1,2),method='CSS')
lmtest::coeftest(model_012_css)
## 
## z test of coefficients:
## 
##      Estimate Std. Error z value Pr(>|z|)   
## ma1 -0.136759   0.052754 -2.5924 0.009531 **
## ma2 -0.043632   0.053218 -0.8199 0.412297   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
model_012_ml = arima(gold,order=c(0,1,2),method='ML')
lmtest::coeftest(model_012_ml)
## 
## z test of coefficients:
## 
##      Estimate Std. Error z value Pr(>|z|)   
## ma1 -0.136457   0.052681 -2.5903  0.00959 **
## ma2 -0.043627   0.053124 -0.8212  0.41152   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# ARIMA(0,1,3)
model_013_css = arima(gold,order=c(0,1,3),method='CSS')
lmtest::coeftest(model_013_css)
## 
## z test of coefficients:
## 
##      Estimate Std. Error z value Pr(>|z|)   
## ma1 -0.136575   0.051830 -2.6351 0.008412 **
## ma2 -0.072097   0.054139 -1.3317 0.182956   
## ma3  0.065595   0.049501  1.3251 0.185125   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
model_013_ml = arima(gold,order=c(0,1,3),method='ML')
lmtest::coeftest(model_013_ml)
## 
## z test of coefficients:
## 
##      Estimate Std. Error z value Pr(>|z|)   
## ma1 -0.136273   0.051776 -2.6320 0.008488 **
## ma2 -0.071772   0.054037 -1.3282 0.184107   
## ma3  0.065129   0.049337  1.3201 0.186807   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# ARIMA(0,1,4)
model_014_css = arima(gold,order=c(0,1,4),method='CSS')
lmtest::coeftest(model_014_css)
## 
## z test of coefficients:
## 
##      Estimate Std. Error z value Pr(>|z|)  
## ma1 -0.134052   0.053357 -2.5123  0.01199 *
## ma2 -0.067331   0.058969 -1.1418  0.25354  
## ma3  0.063292   0.050517  1.2529  0.21025  
## ma4 -0.012452   0.063048 -0.1975  0.84344  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
model_014_ml = arima(gold,order=c(0,1,4),method='ML')
lmtest::coeftest(model_014_ml)
## 
## z test of coefficients:
## 
##      Estimate Std. Error z value Pr(>|z|)  
## ma1 -0.133716   0.053277 -2.5098  0.01208 *
## ma2 -0.066951   0.058765 -1.1393  0.25458  
## ma3  0.062815   0.050310  1.2486  0.21182  
## ma4 -0.012637   0.062624 -0.2018  0.84007  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# ARIMA(1,1,1)
model_111_css = arima(gold,order=c(1,1,1),method='CSS')
lmtest::coeftest(model_111_css)
## 
## z test of coefficients:
## 
##     Estimate Std. Error z value Pr(>|z|)
## ar1  0.15337    0.23608  0.6496   0.5159
## ma1 -0.29768    0.22472 -1.3247   0.1853
model_111_ml = arima(gold,order=c(1,1,1),method='ML')
lmtest::coeftest(model_111_ml)
## 
## z test of coefficients:
## 
##     Estimate Std. Error z value Pr(>|z|)
## ar1  0.14212    0.23876  0.5952   0.5517
## ma1 -0.28641    0.22871 -1.2523   0.2105
# ARIMA(1,1,2)
model_112_css = arima(gold,order=c(1,1,2),method='CSS')
lmtest::coeftest(model_112_css)
## 
## z test of coefficients:
## 
##      Estimate Std. Error z value Pr(>|z|)  
## ar1 -0.517979   0.277230 -1.8684  0.06171 .
## ma1  0.385368   0.276609  1.3932  0.16356  
## ma2 -0.135843   0.054386 -2.4978  0.01250 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
model_112_ml = arima(gold,order=c(1,1,2),method='ML')
lmtest::coeftest(model_112_ml)
## 
## z test of coefficients:
## 
##     Estimate Std. Error z value Pr(>|z|)   
## ar1 -0.53034    0.26267 -2.0191 0.043479 * 
## ma1  0.39866    0.26041  1.5309 0.125802   
## ma2 -0.13893    0.05240 -2.6513 0.008017 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# AIC and BIC values
# you need to source the sort.score() function, which is available in Canvas shell
sort.score(AIC(model_011_ml,model_012_ml,model_013_ml,model_014_ml,model_111_ml,model_112_ml), score = "aic")
##              df      AIC
## model_011_ml  2 3991.943
## model_012_ml  3 3993.270
## model_013_ml  4 3993.538
## model_111_ml  3 3993.594
## model_112_ml  4 3993.728
## model_014_ml  5 3995.498
sort.score(BIC(model_011_ml,model_012_ml,model_013_ml,model_014_ml,model_111_ml,model_112_ml), score = "bic" )
##              df      BIC
## model_011_ml  2 3999.812
## model_012_ml  3 4005.074
## model_111_ml  3 4005.399
## model_013_ml  4 4009.278
## model_112_ml  4 4009.468
## model_014_ml  5 4015.172
# Both AIC and BIC select ARIMA(0,1,1) model for this series.
# ARIMA(1,1,1) and ARIMA(0,1,2) are overfitting models.
# Already tested ARIMA(1,1,1) and ARIMA(0,1,2) and both were not better than ARTIMA (0,1,1)

residual.analysis(model = model_011_ml)
## Loading required package: lattice
## Loading required package: leaps
## Loading required package: ltsa
## Loading required package: bestglm
## 
##  Shapiro-Wilk normality test
## 
## data:  res.model
## W = 0.89018, p-value = 7.527e-16
## Warning in (ra^2)/(n - (1:lag.max)): longer object length is not a multiple of
## shorter object length

library(forecast)
## Registered S3 methods overwritten by 'forecast':
##   method       from
##   fitted.Arima TSA 
##   plot.Arima   TSA 
## 
## Attaching package: 'forecast'
## 
## The following object is masked _by_ '.GlobalEnv':
## 
##     gold
## 
## The following object is masked from 'package:FitAR':
## 
##     BoxCox
fit = Arima(gold,c(0,1,1), method='ML') 
summary(fit)
## Series: gold 
## ARIMA(0,1,1) 
## 
## Coefficients:
##           ma1
##       -0.1486
## s.e.   0.0526
## 
## sigma^2 = 2241:  log likelihood = -1993.97
## AIC=3991.94   AICc=3991.97   BIC=3999.81
## 
## Training set error measures:
##                    ME     RMSE      MAE       MPE     MAPE      MASE
## Training set 4.352532 47.21763 29.42404 0.3449972 3.359805 0.2909174
##                      ACF1
## Training set -0.002613669
frc = forecast::forecast(fit,h=20) 
plot(frc)

frc
##          Point Forecast    Lo 80    Hi 80    Lo 95    Hi 95
## Aug 2021       1819.051 1758.379 1879.723 1726.261 1911.841
## Sep 2021       1819.051 1739.369 1898.733 1697.188 1940.914
## Oct 2021       1819.051 1724.091 1914.010 1673.823 1964.279
## Nov 2021       1819.051 1710.952 1927.150 1653.727 1984.374
## Dec 2021       1819.051 1699.245 1938.857 1635.823 2002.278
## Jan 2022       1819.051 1688.584 1949.518 1619.519 2018.583
## Feb 2022       1819.051 1678.731 1959.371 1604.450 2033.652
## Mar 2022       1819.051 1669.525 1968.576 1590.372 2047.730
## Apr 2022       1819.051 1660.855 1977.246 1577.111 2060.990
## May 2022       1819.051 1652.636 1985.466 1564.541 2073.561
## Jun 2022       1819.051 1644.803 1993.298 1552.562 2085.539
## Jul 2022       1819.051 1637.309 2000.793 1541.100 2097.001
## Aug 2022       1819.051 1630.111 2007.991 1530.092 2108.009
## Sep 2022       1819.051 1623.177 2014.924 1519.488 2118.613
## Oct 2022       1819.051 1616.481 2021.620 1509.247 2128.854
## Nov 2022       1819.051 1609.999 2028.102 1499.334 2138.768
## Dec 2022       1819.051 1603.712 2034.389 1489.719 2148.383
## Jan 2023       1819.051 1597.604 2040.498 1480.377 2157.725
## Feb 2023       1819.051 1591.659 2046.442 1471.285 2166.816
## Mar 2023       1819.051 1585.866 2052.235 1462.426 2175.676