# Clearing out the R environment
rm(list=ls())
# Uploading required libraries
library(TSA)
##
## Attaching package: 'TSA'
## The following objects are masked from 'package:stats':
##
## acf, arima
## The following object is masked from 'package:utils':
##
## tar
library(tseries)
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
library(readr)
##
## Attaching package: 'readr'
## The following object is masked from 'package:TSA':
##
## spec
sort.score <- function(x, score = c("bic", "aic")){
if (score == "aic"){
x[with(x, order(AIC)),]
} else if (score == "bic") {
x[with(x, order(BIC)),]
} else {
warning('score = "x" only accepts valid arguments ("aic","bic")')
}
}
residual.analysis <- function(model, std = TRUE,start = 2, class = c("ARIMA","GARCH","ARMA-GARCH", "fGARCH")[1]){
library(TSA)
library(FitAR)
if (class == "ARIMA"){
if (std == TRUE){
res.model = rstandard(model)
}else{
res.model = residuals(model)
}
}else if (class == "GARCH"){
res.model = model$residuals[start:model$n.used]
}else if (class == "ARMA-GARCH"){
res.model = model@fit$residuals
}else if (class == "fGARCH"){
res.model = model@residuals
}else {
stop("The argument 'class' must be either 'ARIMA' or 'GARCH' ")
}
par(mfrow=c(3,2))
plot(res.model,type='o',ylab='Standardised residuals', main="Time series plot of standardised residuals")
abline(h=0)
hist(res.model,main="Histogram of standardised residuals")
qqnorm(res.model,main="QQ plot of standardised residuals")
qqline(res.model, col = 2)
acf(res.model,main="ACF of standardised residuals")
print(shapiro.test(res.model))
k=0
LBQPlot(res.model, lag.max = 30, StartLag = k + 1, k = 0, SquaredQ = FALSE)
par(mfrow=c(1,1))
}
# Loading Data set
df <- read_csv("1990-2021.csv", col_select = c('Date','United States(USD)'))
## Rows: 379 Columns: 2
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): Date
## dbl (1): United States(USD)
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Creating TS object
gold <- ts(df$`United States(USD)`,start = 1990, frequency = 12)
#Descriptive Analysis
plot (gold, ylab = 'Gold Price (USD)', xlab = 'Year', type = 'o', main = "Time Series Plot of monthly Gold Prices in USD (1990-2021)")

# Stationary test
par(mfrow=c(1,2))
acf(gold, main = 'ACF for Gold Prices (USD)')
pacf(gold, main = 'PACF for Gold Prices (USD)')

par(mfrow=c(1,1))
adf.test(gold)
##
## Augmented Dickey-Fuller Test
##
## data: gold
## Dickey-Fuller = -1.9984, Lag order = 7, p-value = 0.5778
## alternative hypothesis: stationary
pp.test(gold)
##
## Phillips-Perron Unit Root Test
##
## data: gold
## Dickey-Fuller Z(alpha) = -5.9862, Truncation lag parameter = 5, p-value
## = 0.7752
## alternative hypothesis: stationary
kpss.test(gold)
## Warning in kpss.test(gold): p-value smaller than printed p-value
##
## KPSS Test for Level Stationarity
##
## data: gold
## KPSS Level = 5.354, Truncation lag parameter = 5, p-value = 0.01
summary(gold)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 254.8 355.2 473.3 793.5 1258.0 1964.9
# Looking for normality
qqnorm(y=gold, main = 'QQ plot for Gold Prices (USD)')
qqline(y=gold, col = 2, lwd = 1, lty = 2)

shapiro.test(gold)
##
## Shapiro-Wilk normality test
##
## data: gold
## W = 0.84341, p-value < 2.2e-16
# Creating scatter plot to observe correlation
plot(y=gold,x=zlag(gold),ylab = 'Gold Price (USD)', xlab = 'Previous Month Gold Price (USD)',main = "Scatter Plot of Gold Prices in USD")

# Calculating correlation
y = gold
x = zlag(gold)
index = 2:length(x)
cor(y[index],x[index])
## [1] 0.9956545
# Dealing With Non-Stationary
BC <- BoxCox.ar(gold)

BC$ci
## [1] -0.3 -0.2
lambda <- BC$lambda[which(max(BC$loglike) == BC$loglike)]
lambda
## [1] -0.2
BC.gold = ((gold^lambda)-1)/lambda
gold.BC <- diff(BC.gold, differences = 1)
plot (gold.BC, ylab = 'Gold Price (USD)', xlab = 'Year', type = 'o', main = "Time Series Plot of Box-Cox tranformaed Gold Prices after first difference")

par(mfrow=c(1,2))
acf(gold.BC, main = 'ACF for transformed Gold Prices after
1st differennce')
pacf(gold.BC, main = 'PACF for transformed Gold Prices after
1st differennce')

par(mfrow=c(1,1))
adf.test(gold.BC)
## Warning in adf.test(gold.BC): p-value smaller than printed p-value
##
## Augmented Dickey-Fuller Test
##
## data: gold.BC
## Dickey-Fuller = -6.3917, Lag order = 7, p-value = 0.01
## alternative hypothesis: stationary
pp.test(gold.BC)
## Warning in pp.test(gold.BC): p-value smaller than printed p-value
##
## Phillips-Perron Unit Root Test
##
## data: gold.BC
## Dickey-Fuller Z(alpha) = -400.1, Truncation lag parameter = 5, p-value
## = 0.01
## alternative hypothesis: stationary
kpss.test(gold.BC)
##
## KPSS Test for Level Stationarity
##
## data: gold.BC
## KPSS Level = 0.38443, Truncation lag parameter = 5, p-value = 0.08387
qqnorm(y=gold.BC, main = 'QQ plot for Box-Cox tranformed Gold Prices after first difference')
qqline(y=gold.BC, col = 2, lwd = 1, lty = 2)

shapiro.test(gold.BC)
##
## Shapiro-Wilk normality test
##
## data: gold.BC
## W = 0.98932, p-value = 0.007365
gold.BC2 <- diff(BC.gold, differences = 2)
plot (gold.BC2, ylab = 'Gold Price (USD)', xlab = 'Year', type = 'o', main = "Time Series Plot of Box-Cox tranformaed Gold Prices after second difference")

par(mfrow=c(1,2))
acf(gold.BC2, main = 'ACF for transformed Gold Prices after
2nd differennce')
pacf(gold.BC2, main = 'PACF for transformed Gold Prices after
2nd differennce')

par(mfrow=c(1,1))
adf.test(gold.BC2)
## Warning in adf.test(gold.BC2): p-value smaller than printed p-value
##
## Augmented Dickey-Fuller Test
##
## data: gold.BC2
## Dickey-Fuller = -11.393, Lag order = 7, p-value = 0.01
## alternative hypothesis: stationary
pp.test(gold.BC2)
## Warning in pp.test(gold.BC2): p-value smaller than printed p-value
##
## Phillips-Perron Unit Root Test
##
## data: gold.BC2
## Dickey-Fuller Z(alpha) = -470.92, Truncation lag parameter = 5, p-value
## = 0.01
## alternative hypothesis: stationary
kpss.test(gold.BC2)
## Warning in kpss.test(gold.BC2): p-value greater than printed p-value
##
## KPSS Test for Level Stationarity
##
## data: gold.BC2
## KPSS Level = 0.0087148, Truncation lag parameter = 5, p-value = 0.1
qqnorm(y=gold.BC2, main = 'QQ plot for Box-Cox tranformed Gold Prices after second difference')
qqline(y=gold.BC2, col = 2, lwd = 1, lty = 2)

shapiro.test(gold.BC2)
##
## Shapiro-Wilk normality test
##
## data: gold.BC2
## W = 0.98222, p-value = 0.0001345
# Thus use gold.BC because not much improvement in gold2 plus box cox was useless
eacf(gold.BC)
## AR/MA
## 0 1 2 3 4 5 6 7 8 9 10 11 12 13
## 0 x o o o o o o o o o o o o o
## 1 x o o o o o o o o o o o o o
## 2 x x o o o o o o o o o x o o
## 3 x x o o o o o o o o o o o o
## 4 x x x x o o o o o o o o o o
## 5 x o o x o o o o o o o o o o
## 6 x o o o x x o o o o o o o o
## 7 x o o o x x o o o o o o o o
par(mfrow=c(1,1))
res = armasubsets(y=gold.BC,nar=5,nma=5,y.name='p',ar.method='ols')
## Warning in leaps.setup(x, y, wt = wt, nbest = nbest, nvmax = nvmax, force.in =
## force.in, : 3 linear dependencies found
## Reordering variables and trying again:
plot(res)

McLeod.Li.test(y=gold.BC, main="McLeod-Li test statistics for first differenced Gold prices")

#ARIMA Modeling
# ARIMA(0,1,1)
model_011_css = arima(gold,order=c(0,1,1),method='CSS')
lmtest::coeftest(model_011_css)
##
## z test of coefficients:
##
## Estimate Std. Error z value Pr(>|z|)
## ma1 -0.149015 0.052638 -2.8309 0.004641 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
model_011_ml = arima(gold,order=c(0,1,1),method='ML')
lmtest::coeftest(model_011_ml)
##
## z test of coefficients:
##
## Estimate Std. Error z value Pr(>|z|)
## ma1 -0.148643 0.052591 -2.8264 0.004708 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# ARIMA(0,1,2)
model_012_css = arima(gold,order=c(0,1,2),method='CSS')
lmtest::coeftest(model_012_css)
##
## z test of coefficients:
##
## Estimate Std. Error z value Pr(>|z|)
## ma1 -0.136759 0.052754 -2.5924 0.009531 **
## ma2 -0.043632 0.053218 -0.8199 0.412297
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
model_012_ml = arima(gold,order=c(0,1,2),method='ML')
lmtest::coeftest(model_012_ml)
##
## z test of coefficients:
##
## Estimate Std. Error z value Pr(>|z|)
## ma1 -0.136457 0.052681 -2.5903 0.00959 **
## ma2 -0.043627 0.053124 -0.8212 0.41152
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# ARIMA(0,1,3)
model_013_css = arima(gold,order=c(0,1,3),method='CSS')
lmtest::coeftest(model_013_css)
##
## z test of coefficients:
##
## Estimate Std. Error z value Pr(>|z|)
## ma1 -0.136575 0.051830 -2.6351 0.008412 **
## ma2 -0.072097 0.054139 -1.3317 0.182956
## ma3 0.065595 0.049501 1.3251 0.185125
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
model_013_ml = arima(gold,order=c(0,1,3),method='ML')
lmtest::coeftest(model_013_ml)
##
## z test of coefficients:
##
## Estimate Std. Error z value Pr(>|z|)
## ma1 -0.136273 0.051776 -2.6320 0.008488 **
## ma2 -0.071772 0.054037 -1.3282 0.184107
## ma3 0.065129 0.049337 1.3201 0.186807
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# ARIMA(0,1,4)
model_014_css = arima(gold,order=c(0,1,4),method='CSS')
lmtest::coeftest(model_014_css)
##
## z test of coefficients:
##
## Estimate Std. Error z value Pr(>|z|)
## ma1 -0.134052 0.053357 -2.5123 0.01199 *
## ma2 -0.067331 0.058969 -1.1418 0.25354
## ma3 0.063292 0.050517 1.2529 0.21025
## ma4 -0.012452 0.063048 -0.1975 0.84344
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
model_014_ml = arima(gold,order=c(0,1,4),method='ML')
lmtest::coeftest(model_014_ml)
##
## z test of coefficients:
##
## Estimate Std. Error z value Pr(>|z|)
## ma1 -0.133716 0.053277 -2.5098 0.01208 *
## ma2 -0.066951 0.058765 -1.1393 0.25458
## ma3 0.062815 0.050310 1.2486 0.21182
## ma4 -0.012637 0.062624 -0.2018 0.84007
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# ARIMA(1,1,1)
model_111_css = arima(gold,order=c(1,1,1),method='CSS')
lmtest::coeftest(model_111_css)
##
## z test of coefficients:
##
## Estimate Std. Error z value Pr(>|z|)
## ar1 0.15337 0.23608 0.6496 0.5159
## ma1 -0.29768 0.22472 -1.3247 0.1853
model_111_ml = arima(gold,order=c(1,1,1),method='ML')
lmtest::coeftest(model_111_ml)
##
## z test of coefficients:
##
## Estimate Std. Error z value Pr(>|z|)
## ar1 0.14212 0.23876 0.5952 0.5517
## ma1 -0.28641 0.22871 -1.2523 0.2105
# ARIMA(1,1,2)
model_112_css = arima(gold,order=c(1,1,2),method='CSS')
lmtest::coeftest(model_112_css)
##
## z test of coefficients:
##
## Estimate Std. Error z value Pr(>|z|)
## ar1 -0.517979 0.277230 -1.8684 0.06171 .
## ma1 0.385368 0.276609 1.3932 0.16356
## ma2 -0.135843 0.054386 -2.4978 0.01250 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
model_112_ml = arima(gold,order=c(1,1,2),method='ML')
lmtest::coeftest(model_112_ml)
##
## z test of coefficients:
##
## Estimate Std. Error z value Pr(>|z|)
## ar1 -0.53034 0.26267 -2.0191 0.043479 *
## ma1 0.39866 0.26041 1.5309 0.125802
## ma2 -0.13893 0.05240 -2.6513 0.008017 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# AIC and BIC values
# you need to source the sort.score() function, which is available in Canvas shell
sort.score(AIC(model_011_ml,model_012_ml,model_013_ml,model_014_ml,model_111_ml,model_112_ml), score = "aic")
## df AIC
## model_011_ml 2 3991.943
## model_012_ml 3 3993.270
## model_013_ml 4 3993.538
## model_111_ml 3 3993.594
## model_112_ml 4 3993.728
## model_014_ml 5 3995.498
sort.score(BIC(model_011_ml,model_012_ml,model_013_ml,model_014_ml,model_111_ml,model_112_ml), score = "bic" )
## df BIC
## model_011_ml 2 3999.812
## model_012_ml 3 4005.074
## model_111_ml 3 4005.399
## model_013_ml 4 4009.278
## model_112_ml 4 4009.468
## model_014_ml 5 4015.172
# Both AIC and BIC select ARIMA(0,1,1) model for this series.
# ARIMA(1,1,1) and ARIMA(0,1,2) are overfitting models.
# Already tested ARIMA(1,1,1) and ARIMA(0,1,2) and both were not better than ARTIMA (0,1,1)
residual.analysis(model = model_011_ml)
## Loading required package: lattice
## Loading required package: leaps
## Loading required package: ltsa
## Loading required package: bestglm
##
## Shapiro-Wilk normality test
##
## data: res.model
## W = 0.89018, p-value = 7.527e-16
## Warning in (ra^2)/(n - (1:lag.max)): longer object length is not a multiple of
## shorter object length

library(forecast)
## Registered S3 methods overwritten by 'forecast':
## method from
## fitted.Arima TSA
## plot.Arima TSA
##
## Attaching package: 'forecast'
##
## The following object is masked _by_ '.GlobalEnv':
##
## gold
##
## The following object is masked from 'package:FitAR':
##
## BoxCox
fit = Arima(gold,c(0,1,1), method='ML')
summary(fit)
## Series: gold
## ARIMA(0,1,1)
##
## Coefficients:
## ma1
## -0.1486
## s.e. 0.0526
##
## sigma^2 = 2241: log likelihood = -1993.97
## AIC=3991.94 AICc=3991.97 BIC=3999.81
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 4.352532 47.21763 29.42404 0.3449972 3.359805 0.2909174
## ACF1
## Training set -0.002613669
frc = forecast::forecast(fit,h=20)
plot(frc)

frc
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## Aug 2021 1819.051 1758.379 1879.723 1726.261 1911.841
## Sep 2021 1819.051 1739.369 1898.733 1697.188 1940.914
## Oct 2021 1819.051 1724.091 1914.010 1673.823 1964.279
## Nov 2021 1819.051 1710.952 1927.150 1653.727 1984.374
## Dec 2021 1819.051 1699.245 1938.857 1635.823 2002.278
## Jan 2022 1819.051 1688.584 1949.518 1619.519 2018.583
## Feb 2022 1819.051 1678.731 1959.371 1604.450 2033.652
## Mar 2022 1819.051 1669.525 1968.576 1590.372 2047.730
## Apr 2022 1819.051 1660.855 1977.246 1577.111 2060.990
## May 2022 1819.051 1652.636 1985.466 1564.541 2073.561
## Jun 2022 1819.051 1644.803 1993.298 1552.562 2085.539
## Jul 2022 1819.051 1637.309 2000.793 1541.100 2097.001
## Aug 2022 1819.051 1630.111 2007.991 1530.092 2108.009
## Sep 2022 1819.051 1623.177 2014.924 1519.488 2118.613
## Oct 2022 1819.051 1616.481 2021.620 1509.247 2128.854
## Nov 2022 1819.051 1609.999 2028.102 1499.334 2138.768
## Dec 2022 1819.051 1603.712 2034.389 1489.719 2148.383
## Jan 2023 1819.051 1597.604 2040.498 1480.377 2157.725
## Feb 2023 1819.051 1591.659 2046.442 1471.285 2166.816
## Mar 2023 1819.051 1585.866 2052.235 1462.426 2175.676