#Import Libraries
library(DT)
library(tidyquant)
## 필요한 패키지를 로딩중입니다: lubridate
##
## 다음의 패키지를 부착합니다: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
## 필요한 패키지를 로딩중입니다: PerformanceAnalytics
## 필요한 패키지를 로딩중입니다: xts
## 필요한 패키지를 로딩중입니다: zoo
##
## 다음의 패키지를 부착합니다: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
##
## 다음의 패키지를 부착합니다: 'PerformanceAnalytics'
## The following object is masked from 'package:graphics':
##
## legend
## 필요한 패키지를 로딩중입니다: quantmod
## 필요한 패키지를 로딩중입니다: TTR
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
library(magrittr)
library(tseries)
library(timetk)
library(stats)
library(forecast)
library(dplyr)
##
## 다음의 패키지를 부착합니다: 'dplyr'
## The following objects are masked from 'package:xts':
##
## first, last
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(rsample)
library(ggplot2)
library(ggfortify)
## Registered S3 methods overwritten by 'ggfortify':
## method from
## autoplot.Arima forecast
## autoplot.acf forecast
## autoplot.ar forecast
## autoplot.bats forecast
## autoplot.decomposed.ts forecast
## autoplot.ets forecast
## autoplot.forecast forecast
## autoplot.glmnet parsnip
## autoplot.stl forecast
## autoplot.ts forecast
## fitted.ar forecast
## fortify.ts forecast
## residuals.ar forecast
# Set the start and end dates
start_date <- "2013-01-01"
end_date <- "2023-03-28"
# Get the S&P 500 monthly price data
sp500_data <- tq_get("^GSPC",
from = start_date,
to = end_date,
get = "stock.prices",
periodicity = "monthly")
tail(sp500_data)
## # A tibble: 6 x 8
## symbol date open high low close volume adjusted
## <chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ^GSPC 2022-10-01 3610. 3905. 3492. 3872. 95823760000 3872.
## 2 ^GSPC 2022-11-01 3902. 4080. 3698. 4080. 92671910000 4080.
## 3 ^GSPC 2022-12-01 4087. 4101. 3764. 3840. 85249330000 3840.
## 4 ^GSPC 2023-01-01 3853. 4094. 3794. 4077. 80763810000 4077.
## 5 ^GSPC 2023-02-01 4070. 4195. 3943. 3970. 80392280000 3970.
## 6 ^GSPC 2023-03-01 3963. 4111. 3809. 4109. 113094800000 4109.
#Create a new dataframe for analysis
sp500 <- tibble::tibble(date=sp500_data$date,sp500=sp500_data$adjusted)
head(sp500)
## # A tibble: 6 x 2
## date sp500
## <date> <dbl>
## 1 2013-01-01 1498.
## 2 2013-02-01 1515.
## 3 2013-03-01 1569.
## 4 2013-04-01 1598.
## 5 2013-05-01 1631.
## 6 2013-06-01 1606.
sp500 %>%
ggplot(aes(x=date,y=sp500))+
geom_line(color='#4373B6') +
theme_bw() +
ggtitle('S&P500 from 2013(M1) - 2023(M3) ')+
xlab('Date')+
ylab('Index')
#Create a ts format data
ts_sp500 <- ts(sp500$sp500, start=c(2013,1),frequency = 12)
#Decomposition
fit_sp500 <- stl(ts_sp500,s.window = 'periodic')
#Plot Decomposition
autoplot(fit_sp500, ts.colour = '#4373B6',size=0.7) +
theme_bw()
Reference: https://rh8liuqy.github.io/ACF_PACF_by_ggplot2.html
#Custome code for ACF and PACF Plot
ggplot.corr <- function(data, lag.max = 24, ci = 0.95, large.sample.size = TRUE, horizontal = TRUE,...) {
options(dplyr.banner_expression = FALSE)
require(ggplot2)
require(dplyr)
require(cowplot)
if(horizontal == TRUE) {numofrow <- 1} else {numofrow <- 2}
list.acf <- acf(data, lag.max = lag.max, type = "correlation", plot = FALSE)
N <- as.numeric(list.acf$n.used)
df1 <- data.frame(lag = list.acf$lag, acf = list.acf$acf)
df1$lag.acf <- dplyr::lag(df1$acf, default = 0)
df1$lag.acf[2] <- 0
df1$lag.acf.cumsum <- cumsum((df1$lag.acf)^2)
df1$acfstd <- sqrt(1/N * (1 + 2 * df1$lag.acf.cumsum))
df1$acfstd[1] <- 0
df1 <- dplyr::select(df1, lag, acf, acfstd)
list.pacf <- acf(data, lag.max = lag.max, type = "partial", plot = FALSE)
df2 <- data.frame(lag = list.pacf$lag,pacf = list.pacf$acf)
df2$pacfstd <- sqrt(1/N)
if(large.sample.size == TRUE) {
plot.acf <- ggplot(data = df1, aes( x = lag, y = acf)) +
geom_area(aes(x = lag, y = qnorm((1+ci)/2)*acfstd), fill = "#B9CFE7") +
geom_area(aes(x = lag, y = -qnorm((1+ci)/2)*acfstd), fill = "#B9CFE7") +
geom_col(fill = "#4373B6", width = 0.7) +
scale_x_continuous(breaks = seq(0,max(df1$lag),6)) +
scale_y_continuous(name = element_blank(),
limits = c(min(df1$acf,df2$pacf),1)) +
ggtitle("ACF") +
theme_bw() +
theme(plot.title = element_text(size=10))
plot.pacf <- ggplot(data = df2, aes(x = lag, y = pacf)) +
geom_area(aes(x = lag, y = qnorm((1+ci)/2)*pacfstd), fill = "#B9CFE7") +
geom_area(aes(x = lag, y = -qnorm((1+ci)/2)*pacfstd), fill = "#B9CFE7") +
geom_col(fill = "#4373B6", width = 0.7) +
scale_x_continuous(breaks = seq(0,max(df2$lag, na.rm = TRUE),6)) +
scale_y_continuous(name = element_blank(),
limits = c(min(df1$acf,df2$pacf),1)) +
ggtitle("PACF") +
theme_bw() +
theme(plot.title = element_text(size=10))
}
else {
plot.acf <- ggplot(data = df1, aes( x = lag, y = acf)) +
geom_col(fill = "#4373B6", width = 0.7) +
geom_hline(yintercept = qnorm((1+ci)/2)/sqrt(N),
colour = "sandybrown",
linetype = "dashed",
size=1) +
geom_hline(yintercept = - qnorm((1+ci)/2)/sqrt(N),
colour = "sandybrown",
linetype = "dashed",
size=1) +
scale_x_continuous(breaks = seq(0,max(df1$lag),6)) +
scale_y_continuous(name = element_blank(),
limits = c(min(df1$acf,df2$pacf),1)) +
ggtitle("ACF") +
theme_bw() +
theme(plot.title = element_text(size=10))
plot.pacf <- ggplot(data = df2, aes(x = lag, y = pacf)) +
geom_col(fill = "#4373B6", width = 0.7) +
geom_hline(yintercept = qnorm((1+ci)/2)/sqrt(N),
colour = "sandybrown",
linetype = "dashed",
size=1) +
geom_hline(yintercept = - qnorm((1+ci)/2)/sqrt(N),
colour = "sandybrown",
linetype = "dashed",
size=1) +
scale_x_continuous(breaks = seq(0,max(df2$lag, na.rm = TRUE),6)) +
scale_y_continuous(name = element_blank(),
limits = c(min(df1$acf,df2$pacf),1)) +
ggtitle("PACF") +
theme_bw() +
theme(plot.title = element_text(size=10))
}
cowplot::plot_grid(plot.acf, plot.pacf, nrow = numofrow)
}
#Plot ACF and PACF
#ACF and PACF tell that the data is non-stationary
ggplot.corr(data = sp500$sp500,
lag.max = 24,
ci= 0.95,
large.sample.size = FALSE,
horizontal = TRUE)+
ggtitle("S&P 500") +
theme(plot.title = element_text(hjust = 0.5,size=12,face = 'bold'))
## 필요한 패키지를 로딩중입니다: cowplot
##
## 다음의 패키지를 부착합니다: 'cowplot'
## The following object is masked from 'package:lubridate':
##
## stamp
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## i Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: Removed 1 rows containing missing values (`geom_hline()`).
## Removed 1 rows containing missing values (`geom_hline()`).
#Conduct adf test
#Accept null = non-stationary
adf.test(sp500$sp500)
##
## Augmented Dickey-Fuller Test
##
## data: sp500$sp500
## Dickey-Fuller = -2.285, Lag order = 4, p-value = 0.4582
## alternative hypothesis: stationary
#Difference the data
diff <- diff(sp500$sp500)
head(diff)
## [1] 16.57007 54.50989 28.38000 33.17004 -24.45996 79.44995
#Add diff_sp500 into sp500 dataframe
diff_sp500 <- c(0,diff)
sp500$dsp500<- diff_sp500
head(sp500)
## # A tibble: 6 x 3
## date sp500 dsp500
## <date> <dbl> <dbl>
## 1 2013-01-01 1498. 0
## 2 2013-02-01 1515. 16.6
## 3 2013-03-01 1569. 54.5
## 4 2013-04-01 1598. 28.4
## 5 2013-05-01 1631. 33.2
## 6 2013-06-01 1606. -24.5
#The graph tells that tells that the variance is not stationary over time
#May not use ARIMA model but applied for coding practice purpose
sp500 %>%
ggplot(aes(x=date,y=dsp500))+
geom_line(color='#4373B6') +
theme_bw() +
ggtitle('S&P500 (1st Order Differencing) ')+
xlab('Date')+
ylab('Value')
#Plot ACF and PACF
ggplot.corr(data = sp500$dsp500,
lag.max = 24,
ci= 0.95,
large.sample.size = FALSE,
horizontal = TRUE)+
ggtitle("S&P 500 (1st Order Differencing)") +
theme(plot.title = element_text(hjust = 0.5,size=12,face = 'bold'))
#Conduct adf test for differenced sp500
#Verified stationarity
adf.test(sp500$dsp500)
## Warning in adf.test(sp500$dsp500): p-value smaller than printed p-value
##
## Augmented Dickey-Fuller Test
##
## data: sp500$dsp500
## Dickey-Fuller = -4.3071, Lag order = 4, p-value = 0.01
## alternative hypothesis: stationary
#Convert dsp500 to ts format
ts_dsp500 <- ts(sp500$dsp500, start=c(2013,1),frequency = 12)
head(ts_dsp500)
## Jan Feb Mar Apr May Jun
## 2013 0.00000 16.57007 54.50989 28.38000 33.17004 -24.45996
Reference: https://rh8liuqy.github.io/Data_Visulization_of_ARIMA_Model_Selection.html
#Custom code for ARIMA Model ACF PACF reisduals
arima.order <- c(2,0,2)
p <- 0:arima.order[1]
d <- 1 #Revise this part depending on how many differencing was conducted
q <- 0:arima.order[3]
df.arima.order <- data.frame(t(expand.grid(p=p,d=d,q=q)))
df.colnames <- data.frame(expand.grid(p=p,d=d,q=q))
df.colnames$type <- with(df.colnames,paste0
("ARIMA(",p,",",d,",",q,")"))
list.arima.model <- lapply(df.arima.order,
function(data, x, ...) {
arima(x = data, order = x, ...)
},
data = ts_dsp500) # Data has to be in ts format
df.arima.residual <- sapply(list.arima.model,
function(x) {
x$residuals
})
df.arima.residual <- data.frame(df.arima.residual)
colnames(df.arima.residual) <- df.colnames$type
df.arima.residual$time <- time(ts_dsp500) # Data has to be in ts format
df.arima.residual.plot <- gather(data = df.arima.residual, value = "residual", key = "type",-time)
df.acf <- sapply(subset(df.arima.residual, select = -time),
function(x, lag.max,...) {
acf(x = x, lag.max = lag.max, plot = FALSE)$acf
},
lag.max = 24)
df.acf <- data.frame(df.acf)
colnames(df.acf) <- df.colnames$type
df.acf$lag <- 0:(dim(df.acf)[1]-1)
df.acf.plot <- gather(df.acf,
value = "value",
key = "Model",
-lag)
df.pacf <- sapply(subset(df.arima.residual, select = -time),
function(x, lag.max,...) {
acfvalue <- as.numeric(acf(x = x,
lag.max = lag.max,
type = "partial",
plot = FALSE)$acf)
return(c(1,acfvalue))
},
lag.max = 24)
df.pacf <- data.frame(df.pacf)
colnames(df.pacf) <- df.colnames$type
df.pacf$lag <- 0:(dim(df.pacf)[1]-1)
df.pacf.plot <- gather(df.pacf,
value = "value",
key = "Model",
-lag)
fig.acf <- ggplot(data = df.acf.plot, aes(x = lag, y = value)) +
geom_col(width = 0.4, fill = "#667fa2") +
geom_hline(yintercept = qnorm((1+0.95)/2)/sqrt(dim(df.acf.plot)[1]),
linetype = "dashed",
colour = "grey50") +
geom_hline(yintercept = -qnorm((1+0.95)/2)/sqrt(dim(df.acf.plot)[1]),
linetype = "dashed",
colour = "grey50") +
scale_y_continuous(name = element_blank()) +
scale_x_continuous(breaks = seq(0,24,4)) +
ggtitle("ACF of residuals") +
facet_wrap(~Model) +
theme_bw() +
theme(panel.grid.minor = element_blank(),
strip.background = element_rect(fill = "#f8ddc4"))
fig.pacf <- ggplot(data = df.pacf.plot, aes(x = lag, y = value)) +
geom_col(width = 0.4, fill = "#667fa2") +
geom_hline(yintercept = qnorm((1+0.95)/2)/sqrt(dim(df.pacf.plot)[1]),
linetype = "dashed",
colour = "grey50") +
geom_hline(yintercept = -qnorm((1+0.95)/2)/sqrt(dim(df.pacf.plot)[1]),
linetype = "dashed",
colour = "grey50") +
scale_y_continuous(name = element_blank()) +
scale_x_continuous(breaks = seq(0,24,4)) +
ggtitle("PACF of residuals") +
facet_wrap(~Model) +
theme_bw() +
theme(panel.grid.minor = element_blank(),
strip.background = element_rect(fill = "#f8ddc4"))
fig.residual <- ggplot(data = df.arima.residual.plot, aes(x = time, y = residual)) +
geom_line(colour = "#667fa2") +
ggtitle("Residuals") +
facet_wrap(~type) +
theme_bw() +
theme(panel.grid.minor = element_blank(),
strip.background = element_rect(fill = "#f8ddc4"))
#Plot ACF of reiduals
fig.acf
#Plot PACF of reiduals
fig.pacf
#Plot reiduals
fig.residual
##
## Don't know how to automatically pick scale for object of type <ts>. Defaulting
## to continuous.
#Custom Code for AIC
aic <- sapply(list.arima.model, function(X) {X$aic})
aic <- round(as.numeric(aic),2)
df.aic.plot <- data.frame(aic = aic, p = df.colnames$p, q = df.colnames$q)
ggplot(data = df.aic.plot, aes(x = p, y = q)) +
geom_raster(aes(fill = aic)) +
geom_text(aes( label = aic), colour = "black") +
scale_x_continuous(expand = c(0.05,0.05)) +
scale_color_continuous( high = "#FFA800", low = "#4AC200") +
theme_bw() +
theme(panel.grid.minor = element_blank()) +
scale_fill_distiller(palette = "Spectral", direction = -1) +
ggtitle('Akaike Information Criterion (AIC)')
# Compare with auto.arima
auto.arima(ts_sp500)
## Series: ts_sp500
## ARIMA(0,1,1) with drift
##
## Coefficients:
## ma1 drift
## -0.2454 21.1996
## s.e. 0.0855 9.1050
##
## sigma^2 = 17960: log likelihood = -769.68
## AIC=1545.37 AICc=1545.57 BIC=1553.78
#Unload ggfority and forecast package
#Reload forecast
#Have to do this since ggfortify & forecast don't work well with visualization when loaded simulatneously
unloadNamespace('ggfortify')
unloadNamespace('forecast')
library('forecast')
## Registered S3 methods overwritten by 'forecast':
## method from
## autoplot.Arima ggfortify
## autoplot.acf ggfortify
## autoplot.ar ggfortify
## autoplot.bats ggfortify
## autoplot.decomposed.ts ggfortify
## autoplot.ets ggfortify
## autoplot.forecast ggfortify
## autoplot.stl ggfortify
## autoplot.ts ggfortify
## fitted.ar ggfortify
## fortify.ts ggfortify
## residuals.ar ggfortify
#Split the data
train <- window(ts_sp500,start=2013,end=c(2022,6))
test <- window(ts_sp500,start=c(2022,7))
#Fit the model and implement 8month forecast
f_fit_naive <- naive(train)
fore_fit_naive <- forecast(f_fit_naive,h=length(test))
f_fit_ses <- ses(train)
fore_fit_ses <- forecast(f_fit_ses,h=length(test))
f_fit_hwa <- hw(train,seasonal='additive')
fore_fit_hwa <- forecast(f_fit_hwa,h=length(test))
f_fit_hwm <- hw(train,seasonal='multiplicative')
fore_fit_hwm <- forecast(f_fit_hwm,h=length(test))
f_fit_ets <- ets(train)
fore_fit_ets <- forecast(f_fit_ets,h=length(test))
f_fit_arima <- auto.arima(train)
fore_fit_arima <- forecast(f_fit_arima,h=length(test))
ff1 <- autoplot(ts_sp500,color='grey40',size=1)+
autolayer(fore_fit_naive,PI=TRUE,series = 'Naive Forecast',size=1.2)+
autolayer(test,series=NA,color='black',linetype='longdash')+
labs(x='Date', y='Index',title='Actual vs Naive Forecast')+
theme_bw()+
scale_colour_manual(name='Method',values=c('#4373B6'))+
theme(legend.position=c(0.001, 0.98),
legend.justification=c('left', 'top'))
ff2 <- autoplot(ts_sp500,color='grey40',size=1)+
autolayer(fore_fit_ses,PI=TRUE,series = 'Simple Exponential Smoothing',size=1.2)+
autolayer(test,series=NA,color='black',linetype='longdash')+
labs(x='Date', y='Index',title='Actual vs Simple Exponenetial Smoothing')+
theme_bw()+
scale_colour_manual(name='Method',values=c('orangered'))+
theme(legend.position=c(0.001, 0.98),
legend.justification=c('left', 'top'))
ff3 <- autoplot(ts_sp500,color='grey40',size=1)+
autolayer(fore_fit_hwa,PI=TRUE,series = 'Holt-Winter Additive',size=1.2)+
autolayer(test,series=NA,color='black',linetype='longdash')+
labs(x='Date', y='Index',title='Actual vs Holt-Winter Additive')+
theme_bw()+
scale_colour_manual(name='Method',values=c('deepskyblue4'))+
theme(legend.position=c(0.001, 0.98),
legend.justification=c('left', 'top'))
ff4 <- autoplot(ts_sp500,color='grey40',size=1)+
autolayer(fore_fit_hwm,PI=TRUE,series = 'Holt-Winter Multiplicative',size=1.2)+
autolayer(test,series=NA,color='black',linetype='longdash')+
labs(x='Date', y='Index',title='Actual vs Holt-Winter Multiplicative')+
theme_bw()+
scale_colour_manual(name='Method',values=c('purple'))+
theme(legend.position=c(0.001, 0.98),
legend.justification=c('left', 'top'))
ff5 <- autoplot(ts_sp500,color='grey40',size=1)+
autolayer(fore_fit_ets,PI=TRUE,series = 'ETS',size=1.2)+
autolayer(test,series=NA,color='black',linetype='longdash')+
labs(x='Date', y='Index',title='Actual vs ETS')+
theme_bw()+
scale_colour_manual(name='Method',values=c('brown4'))+
theme(legend.position=c(0.001, 0.98),
legend.justification=c('left', 'top'))
ff6 <- autoplot(ts_sp500,color='grey40',size=1)+
autolayer(fore_fit_arima,PI=TRUE,series = 'ARIMA',size=1.2)+
autolayer(test,series=NA,color='black',linetype='longdash')+
labs(x='Date', y='Index',title='Actual vs ARIMA')+
theme_bw()+
scale_colour_manual(name='Method',values=c('red'))+
theme(legend.position=c(0.001, 0.98),
legend.justification=c('left', 'top'))
library(gridExtra)
##
## 다음의 패키지를 부착합니다: 'gridExtra'
## The following object is masked from 'package:dplyr':
##
## combine
grid.arrange(ff1, ff2, ff3, ff4, ff5,ff6, ncol=2)
#Fit several forecasting methods into S&P 500
library(prophet)
## 필요한 패키지를 로딩중입니다: Rcpp
##
## 다음의 패키지를 부착합니다: 'Rcpp'
## The following object is masked from 'package:rsample':
##
## populate
## 필요한 패키지를 로딩중입니다: rlang
##
## 다음의 패키지를 부착합니다: 'rlang'
## The following object is masked from 'package:magrittr':
##
## set_names
#Naive Forecast
fit_naive <- naive(ts_sp500)
#Simple Exponential Smoothing
fit_ses <- ses(ts_sp500)
#Holt-Winter method
fit_hwa <- hw(ts_sp500,seasonal='additive')
fit_hwm <- hw(ts_sp500,seasonal='multiplicative')
#ETS
fit_ets <- ets(ts_sp500)
fit_ets1 <- fit_ets%>%forecast()
#ARIMA(0,1,1)
fit_arima <- auto.arima(ts_sp500)
fit_arima1 <- fit_arima%>%forecast()
#Plot the result
f1 <- autoplot(ts_sp500,color='grey40',size=1) +
autolayer(fit_naive$fitted,series = 'Naive Forecast',size=1)+
autolayer(fit_naive,series = 'Naive Forecast',size=1) +
labs(x='Time', y='Index', title='Naive Forecast') +
theme_bw() +
scale_colour_manual(name='Method', values=c('#4373B6'))+
theme(legend.position=c(0.02, 0.98),
legend.justification=c('left', 'top'))
f2 <- autoplot(ts_sp500,color='grey40',size=1) +
autolayer(fit_ses$fitted,series = 'Simple Exponential Smoothing',size=1)+
autolayer(fit_ses,series = 'Simple Exponential Smoothing',size=1) +
labs(x='Time', y='Index', title='Simple Exponential Smoothing') +
theme_bw() +
scale_colour_manual(name='Method', values=c('orangered1'))+
theme(legend.position=c(0.02, 0.98),
legend.justification=c('left', 'top'))
f3 <- autoplot(ts_sp500,color='grey40',size=1) +
autolayer(fit_hwa$fitted,series = 'Holt-Winter Additive',size=1)+
autolayer(fit_hwa,series = 'Holt-Winter Additive',size=1) +
labs(x='Time', y='Index', title='Holt-Winter Additive') +
theme_bw() +
scale_colour_manual(name='Method', values=c('deepskyblue4'))+
theme(legend.position=c(0.02, 0.98),
legend.justification=c('left', 'top'))
f4 <- autoplot(ts_sp500,color='grey40',size=1) +
autolayer(fit_hwm$fitted,series = 'Holt-Winter Multiplicative',size=1)+
autolayer(fit_hwm,series = 'Holt-Winter Multiplicative',size=1) +
labs(x='Time', y='Index', title='Holt-Winter Multiplicative') +
theme_bw() +
scale_colour_manual(name='Method', values=c('purple'))+
theme(legend.position=c(0.02, 0.98),
legend.justification=c('left', 'top'))
f5 <- autoplot(ts_sp500,color='grey40',size=1) +
autolayer(fit_ets1$fitted,series = 'ETS',size=1)+
autolayer(fit_ets1,series = 'ETS',size=1) +
labs(x='Time', y='Index', title='ETS') +
theme_bw() +
scale_colour_manual(name='Method', values=c('brown4'))+
theme(legend.position=c(0.02, 0.98),
legend.justification=c('left', 'top'))
f6 <- autoplot(ts_sp500,color='grey40',size=1) +
autolayer(fit_arima1$fitted,series = 'ARIMA',size=1)+
autolayer(fit_arima1,series = 'ARIMA',size=1) +
labs(x='Time', y='Index', title='ARIMA') +
theme_bw() +
scale_colour_manual(name='Method', values=c('red'))+
theme(legend.position=c(0.02, 0.98),
legend.justification=c('left', 'top'))
library(gridExtra)
grid.arrange(f1, f2, f3, f4, f5,f6, ncol=2)
## Warning: Removed 1 row containing missing values (`geom_line()`).
grid.arrange(ff6,f6, ncol=1)
#Expected Index from ARIMA
fore_fit_arima$mean
## Jan Feb Mar Apr May Jun Jul Aug Sep Oct
## 2022 3805.621 3825.863 3846.104 3866.345
## 2023 3927.069 3947.310 3967.552
## Nov Dec
## 2022 3886.587 3906.828
## 2023
#Actual Index
test
## Jan Feb Mar Apr May Jun Jul Aug Sep Oct
## 2022 4130.29 3955.00 3585.62 3871.98
## 2023 4076.60 3970.15 4109.31
## Nov Dec
## 2022 4080.11 3839.50
## 2023
Since we had to difference S&P 500 to make it stationary, we are going to use the log return so that we don’t have to difference the data and forecast under parsimony principle
unloadNamespace('ggfortify')
unloadNamespace('forecast')
library(forecast)
library(ggfortify)
## Registered S3 methods overwritten by 'ggfortify':
## method from
## autoplot.Arima forecast
## autoplot.acf forecast
## autoplot.ar forecast
## autoplot.bats forecast
## autoplot.decomposed.ts forecast
## autoplot.ets forecast
## autoplot.forecast forecast
## autoplot.stl forecast
## autoplot.ts forecast
## fitted.ar forecast
## fortify.ts forecast
## residuals.ar forecast
#Compute log return of S&P 500
sp500_return <- sp500_data %>%
tq_transmute(select = adjusted,
mutate_fun = periodReturn,
type = 'log',
col_rename = 'returns')
head(sp500_return)
## # A tibble: 6 x 2
## date returns
## <date> <dbl>
## 1 2013-01-01 0
## 2 2013-02-01 0.0110
## 3 2013-03-01 0.0354
## 4 2013-04-01 0.0179
## 5 2013-05-01 0.0206
## 6 2013-06-01 -0.0151
#Graph the return
sp500_return %>%
ggplot(aes(x=date,y=returns))+
geom_line(color='#4373B6') +
theme_bw() +
ggtitle('S&P500 Returns from 2013(M1) - 2023(M3) ')+
xlab('Date')+
ylab('Returns')+
scale_y_continuous(labels = scales::percent)
#Plot ACF and PACF of S&P 500 return
ggplot.corr(data = sp500_return$returns,
lag.max = 24,
ci= 0.95,
large.sample.size = FALSE,
horizontal = TRUE)+
ggtitle("S&P 500 Log Return") +
theme(plot.title = element_text(hjust = 0.5,size=12,face = 'bold'))
#adf test
#Verified Stationarity
adf.test(sp500_return$returns)
## Warning in adf.test(sp500_return$returns): p-value smaller than printed p-value
##
## Augmented Dickey-Fuller Test
##
## data: sp500_return$returns
## Dickey-Fuller = -5.0361, Lag order = 4, p-value = 0.01
## alternative hypothesis: stationary
#Convert the return into ts format
ts_dsp500_return <- ts(sp500_return$returns,start = c(2013,1),frequency = 12)
head(ts_dsp500_return)
## Jan Feb Mar Apr May Jun
## 2013 0.00000000 0.01099993 0.03535529 0.01792417 0.02055020 -0.01511293
#Custom code for ARIMA Model ACF PACF reisduals
arima.order <- c(2,0,2)
p <- 0:arima.order[1]
d <- 0:arima.order[2] #Revise this part depending on how many differencing was conducted
q <- 0:arima.order[3]
df.arima.order <- data.frame(t(expand.grid(p=p,d=d,q=q)))
df.colnames <- data.frame(expand.grid(p=p,d=d,q=q))
df.colnames$type <- with(df.colnames,paste0
("ARIMA(",p,",",d,",",q,")"))
list.arima.model <- lapply(df.arima.order,
function(data, x, ...) {
arima(x = data, order = x, ...)
},
data = ts_dsp500_return) # Data has to be in ts format
df.arima.residual <- sapply(list.arima.model,
function(x) {
x$residuals
})
df.arima.residual <- data.frame(df.arima.residual)
colnames(df.arima.residual) <- df.colnames$type
df.arima.residual$time <- time(ts_dsp500_return) # Data has to be in ts format
df.arima.residual.plot <- gather(data = df.arima.residual, value = "residual", key = "type",-time)
df.acf <- sapply(subset(df.arima.residual, select = -time),
function(x, lag.max,...) {
acf(x = x, lag.max = lag.max, plot = FALSE)$acf
},
lag.max = 24)
df.acf <- data.frame(df.acf)
colnames(df.acf) <- df.colnames$type
df.acf$lag <- 0:(dim(df.acf)[1]-1)
df.acf.plot <- gather(df.acf,
value = "value",
key = "Model",
-lag)
df.pacf <- sapply(subset(df.arima.residual, select = -time),
function(x, lag.max,...) {
acfvalue <- as.numeric(acf(x = x,
lag.max = lag.max,
type = "partial",
plot = FALSE)$acf)
return(c(1,acfvalue))
},
lag.max = 24)
df.pacf <- data.frame(df.pacf)
colnames(df.pacf) <- df.colnames$type
df.pacf$lag <- 0:(dim(df.pacf)[1]-1)
df.pacf.plot <- gather(df.pacf,
value = "value",
key = "Model",
-lag)
fig.acf <- ggplot(data = df.acf.plot, aes(x = lag, y = value)) +
geom_col(width = 0.4, fill = "#667fa2") +
geom_hline(yintercept = qnorm((1+0.95)/2)/sqrt(dim(df.acf.plot)[1]),
linetype = "dashed",
colour = "grey50") +
geom_hline(yintercept = -qnorm((1+0.95)/2)/sqrt(dim(df.acf.plot)[1]),
linetype = "dashed",
colour = "grey50") +
scale_y_continuous(name = element_blank()) +
scale_x_continuous(breaks = seq(0,24,4)) +
ggtitle("ACF of residuals") +
facet_wrap(~Model) +
theme_bw() +
theme(panel.grid.minor = element_blank(),
strip.background = element_rect(fill = "#f8ddc4"))
fig.pacf <- ggplot(data = df.pacf.plot, aes(x = lag, y = value)) +
geom_col(width = 0.4, fill = "#667fa2") +
geom_hline(yintercept = qnorm((1+0.95)/2)/sqrt(dim(df.pacf.plot)[1]),
linetype = "dashed",
colour = "grey50") +
geom_hline(yintercept = -qnorm((1+0.95)/2)/sqrt(dim(df.pacf.plot)[1]),
linetype = "dashed",
colour = "grey50") +
scale_y_continuous(name = element_blank()) +
scale_x_continuous(breaks = seq(0,24,4)) +
ggtitle("PACF of residuals") +
facet_wrap(~Model) +
theme_bw() +
theme(panel.grid.minor = element_blank(),
strip.background = element_rect(fill = "#f8ddc4"))
fig.residual <- ggplot(data = df.arima.residual.plot, aes(x = time, y = residual)) +
geom_line(colour = "#667fa2") +
ggtitle("Residuals") +
facet_wrap(~type) +
theme_bw() +
theme(panel.grid.minor = element_blank(),
strip.background = element_rect(fill = "#f8ddc4"))
fig.acf
fig.pacf
fig.residual
##
## Don't know how to automatically pick scale for object of type <ts>. Defaulting
## to continuous.
#Custom Code for AIC
aic <- sapply(list.arima.model, function(X) {X$aic})
aic <- round(as.numeric(aic),2)
df.aic.plot <- data.frame(aic = aic, p = df.colnames$p, q = df.colnames$q)
ggplot(data = df.aic.plot, aes(x = p, y = q)) +
geom_raster(aes(fill = aic)) +
geom_text(aes( label = aic), colour = "black") +
scale_x_continuous(expand = c(0.05,0.05)) +
scale_color_continuous( high = "#FFA800", low = "#4AC200") +
theme_bw() +
theme(panel.grid.minor = element_blank()) +
scale_fill_distiller(palette = "Spectral", direction = -1) +
ggtitle('Akaike Information Criterion (AIC)')
auto.arima(ts_dsp500_return)
## Series: ts_dsp500_return
## ARIMA(0,0,1) with non-zero mean
##
## Coefficients:
## ma1 mean
## -0.2445 0.0082
## s.e. 0.0941 0.0028
##
## sigma^2 = 0.001732: log likelihood = 217.51
## AIC=-429.01 AICc=-428.81 BIC=-420.58
#Unload ggfority and forecast package
#Reload forecast
#Have to do this since ggfortify & forecast don't work well with visualization when loaded simulatneously
unloadNamespace('ggfortify')
unloadNamespace('forecast')
library('forecast')
## Registered S3 methods overwritten by 'forecast':
## method from
## autoplot.Arima ggfortify
## autoplot.acf ggfortify
## autoplot.ar ggfortify
## autoplot.bats ggfortify
## autoplot.decomposed.ts ggfortify
## autoplot.ets ggfortify
## autoplot.forecast ggfortify
## autoplot.stl ggfortify
## autoplot.ts ggfortify
## fitted.ar ggfortify
## fortify.ts ggfortify
## residuals.ar ggfortify
#Split the data
dtrain <- window(ts_dsp500_return,start=2013,end=c(2022,6))
dtest <- window(ts_dsp500_return,start=c(2022,7))
#Fit the model and implement 8month forecast
fd_fit_naive <- naive(dtrain)
dfore_fit_naive <- forecast(fd_fit_naive,h=length(dtest))
fd_fit_ses <- ses(dtrain)
dfore_fit_ses <- forecast(fd_fit_ses,h=length(dtest))
fd_fit_hwa <- hw(dtrain,seasonal='additive')
dfore_fit_hwa <- forecast(fd_fit_hwa,h=length(dtest))
fd_fit_ets <- ets(dtrain)
dfore_fit_ets <- forecast(fd_fit_ets,h=length(dtest))
fd_fit_arima <- auto.arima(dtrain)
dfore_fit_arima <- forecast(fd_fit_arima,h=length(dtest))
#Plot the result
fdd1 <- autoplot(ts_dsp500_return,color='grey30') +
autolayer(dfore_fit_naive,PI=TRUE,series='Naive Forecast',size=1.2)+
autolayer(dtest,series=NA,color='black',linetype='longdash')+
labs(x='Time', y='Return', title='Actual vs Naive Forecast') +
theme_bw() +
scale_y_continuous(labels = scales::percent,
breaks = seq(-0.2,0.2,0.1)) +
scale_colour_manual(name='Method', values=c('#4373B6'))+
theme(legend.position=c(0.02, 0.98),
legend.justification=c('left', 'top'))
fdd2 <- autoplot(ts_dsp500_return,color='grey30') +
autolayer(dfore_fit_ses,PI=TRUE,series='Simple Exponential Smoothing',size=1.2)+
autolayer(dtest,series=NA,color='black',linetype='longdash')+
labs(x='Time', y='Return', title='Actual vs Simple Exponential Smoothing') +
theme_bw() +
scale_y_continuous(labels = scales::percent) +
scale_colour_manual(name='Method', values=c('orangered1'))+
theme(legend.position=c(0.02, 0.98),
legend.justification=c('left', 'top'))
fdd3 <- autoplot(ts_dsp500_return,color='grey30') +
autolayer(dfore_fit_hwa,PI=TRUE,series='Holt-Winter Additive',size=1.2)+
autolayer(dtest,series=NA,color='black',linetype='longdash')+
labs(x='Time', y='Return', title='Actual vs Holt-Winter Additive') +
theme_bw() +
scale_y_continuous(labels = scales::percent) +
scale_colour_manual(name='Method', values=c('deepskyblue4'))+
theme(legend.position=c(0.02, 0.98),
legend.justification=c('left', 'top'))
fdd5 <- autoplot(ts_dsp500_return,color='grey30') +
autolayer(dfore_fit_ets,PI=TRUE,series='ETS',size=1.2)+
autolayer(dtest,series=NA,color='black',linetype='longdash')+
labs(x='Time', y='Return', title='Actual vs ETS') +
theme_bw() +
scale_y_continuous(labels = scales::percent) +
scale_colour_manual(name='Method', values=c('brown4'))+
theme(legend.position=c(0.02, 0.98),
legend.justification=c('left', 'top'))
fdd6 <- autoplot(ts_dsp500_return,color='grey30') +
autolayer(dfore_fit_arima,PI=TRUE,series='ARIMA',size=1.2)+
autolayer(dtest,series=NA,color='black',linetype='longdash')+
labs(x='Time', y='Return', title='Actual vs ARIMA') +
theme_bw() +
scale_y_continuous(labels = scales::percent) +
scale_colour_manual(name='Method', values=c('red'))+
theme(legend.position=c(0.02, 0.98),
legend.justification=c('left', 'top'))
library(gridExtra)
grid.arrange(fdd1, fdd2, fdd3, fdd5,fdd6, ncol=2)
#Fit several forecasting methods into S&P 500
#Naive Forecast
fit_dnaive <- naive(ts_dsp500_return)
#Simple Exponential Smoothing
fit_dses <- ses(ts_dsp500_return)
#Holt-Winter method
fit_dhwa <- hw(ts_dsp500_return,seasonal='additive')
#ETS
fit_dets <- ets(ts_dsp500_return)
fit_dets1 <- fit_dets%>%forecast()
#ARIMA(0,1,1)
fit_darima <- auto.arima(ts_dsp500_return)
fit_darima1 <- fit_darima%>%forecast()
#Plot the result
fd1 <- autoplot(ts_dsp500_return,color='grey30') +
autolayer(fit_dnaive$fitted,series = 'Naive Forecast')+
autolayer(fit_dnaive,series = 'Naive Forecast') +
labs(x='Time', y='Return', title='Naive Forecast') +
theme_bw() +
scale_colour_manual(name='Method', values=c('#4373B6'))+
scale_y_continuous(labels = scales::percent) +
theme(legend.position=c(0.02, 0.98),
legend.justification=c('left', 'top'))
fd2 <- autoplot(ts_dsp500_return,color='grey30') +
autolayer(fit_dses$fitted,series = 'Simple Exponential Smoothing')+
autolayer(fit_dses,series = 'Simple Exponential Smoothing') +
labs(x='Time', y='Return', title='Simple Exponential Smoothing') +
theme_bw() +
scale_colour_manual(name='Method', values=c('orangered1'))+
scale_y_continuous(labels = scales::percent) +
theme(legend.position=c(0.02, 0.98),
legend.justification=c('left', 'top'))
fd3 <- autoplot(ts_dsp500_return,color='grey30') +
autolayer(fit_dhwa$fitted,series = 'Holt-Winter Additive')+
autolayer(fit_dhwa,series = 'Holt-Winter Additive') +
labs(x='Time', y='Return', title='Holt-Winter Additive') +
theme_bw() +
scale_colour_manual(name='Method', values=c('deepskyblue4'))+
scale_y_continuous(labels = scales::percent) +
theme(legend.position=c(0.02, 0.98),
legend.justification=c('left', 'top'))
fd5 <- autoplot(ts_dsp500_return,color='grey30') +
autolayer(fit_dets1$fitted,series = 'ETS')+
autolayer(fit_dets1,series = 'ETS') +
labs(x='Time', y='Return', title='ETS') +
theme_bw() +
scale_colour_manual(name='Method', values=c('brown4'))+
scale_y_continuous(labels = scales::percent) +
theme(legend.position=c(0.02, 0.98),
legend.justification=c('left', 'top'))
fd6 <- autoplot(ts_dsp500_return,color='grey30') +
autolayer(fit_darima1$fitted,series = 'ARIMA')+
autolayer(fit_darima1,series = 'ARIMA') +
labs(x='Time', y='Return', title='ARIMA') +
theme_bw() +
scale_colour_manual(name='Method', values=c('red'))+
scale_y_continuous(labels = scales::percent) +
theme(legend.position=c(0.02, 0.98),
legend.justification=c('left', 'top'))
grid.arrange(fd1, fd2, fd3,fd5,fd6, ncol=2)
## Warning: Removed 1 row containing missing values (`geom_line()`).
grid.arrange(fdd6,fd6, ncol=1)
# Forecasted return from Arima
# 2.3%
dfore_fit_arima$mean
## Jan Feb Mar Apr May Jun Jul Aug
## 2022 0.023658837 0.008275137
## 2023 0.008275137 0.008275137 0.008275137
## Sep Oct Nov Dec
## 2022 0.008275137 0.008275137 0.008275137 0.008275137
## 2023
# Actual Return
# 8.7%
dtest
## Jan Feb Mar Apr May Jun Jul Aug
## 2022 0.08720138 -0.04336703
## 2023 0.05992118 -0.02645948 0.03445129
## Sep Oct Nov Dec
## 2022 -0.09804917 0.07683456 0.05235798 -0.06078183
## 2023
#ARIMA model forecasts that the return for Aprial will be 0.3%
fit_darima1[4]
## $mean
## Jan Feb Mar Apr May Jun
## 2023 0.003262986 0.008174623 0.008174623
## 2024 0.008174623 0.008174623 0.008174623 0.008174623 0.008174623 0.008174623
## 2025 0.008174623 0.008174623 0.008174623
## Jul Aug Sep Oct Nov Dec
## 2023 0.008174623 0.008174623 0.008174623 0.008174623 0.008174623 0.008174623
## 2024 0.008174623 0.008174623 0.008174623 0.008174623 0.008174623 0.008174623
## 2025
fit_darima$fitted %>% tk_tbl
## # A tibble: 123 x 2
## index value
## <yearmon> <dbl>
## 1 1 2013 0.00794
## 2 2 2013 0.0101
## 3 3 2013 0.00795
## 4 4 2013 0.00147
## 5 5 2013 0.00415
## 6 6 2013 0.00417
## 7 7 2013 0.0129
## 8 8 2013 -0.000479
## 9 9 2013 0.0158
## 10 10 2013 0.00488
## # ... with 113 more rows
ts_dsp500_return %>% tk_tbl
## # A tibble: 123 x 2
## index value
## <yearmon> <dbl>
## 1 1 2013 0
## 2 2 2013 0.0110
## 3 3 2013 0.0354
## 4 4 2013 0.0179
## 5 5 2013 0.0206
## 6 6 2013 -0.0151
## 7 7 2013 0.0483
## 8 8 2013 -0.0318
## 9 9 2013 0.0293
## 10 10 2013 0.0436
## # ... with 113 more rows