library(tidyquant)
## 필요한 패키지를 로딩중입니다: lubridate
##
## 다음의 패키지를 부착합니다: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
## 필요한 패키지를 로딩중입니다: PerformanceAnalytics
## 필요한 패키지를 로딩중입니다: xts
## 필요한 패키지를 로딩중입니다: zoo
##
## 다음의 패키지를 부착합니다: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
##
## 다음의 패키지를 부착합니다: 'PerformanceAnalytics'
## The following object is masked from 'package:graphics':
##
## legend
## 필요한 패키지를 로딩중입니다: quantmod
## 필요한 패키지를 로딩중입니다: TTR
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
library(DT)
library(magrittr)
library(tseries)
library(timetk)
library(stats)
library(forecast)
library(dplyr)
##
## 다음의 패키지를 부착합니다: 'dplyr'
## The following objects are masked from 'package:xts':
##
## first, last
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(rsample)
library(ggplot2)
library(stats)
# Set the start and end dates
start_date <- "2013-01-01"
end_date <- "2023-04-21"
# Get the S&P 500 monthly price data
spy_data <- tq_get("SPY",
from = start_date,
to = end_date,
get = "stock.prices",
periodicity = "monthly")
tail(spy_data)
## # A tibble: 6 x 8
## symbol date open high low close volume adjusted
## <chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 SPY 2022-11-01 390. 408. 369. 408. 1745985300 404.
## 2 SPY 2022-12-01 409. 410. 375. 382. 1735973600 379.
## 3 SPY 2023-01-01 384. 408. 378. 406. 1575450100 405.
## 4 SPY 2023-02-01 405. 418. 394. 396. 1603094700 395.
## 5 SPY 2023-03-01 395. 410. 381. 409. 2515907800 408.
## 6 SPY 2023-04-01 409. 416. 406. 406. 1132698300 406.
spy <- tibble::tibble(date=spy_data$date,spy=spy_data$close)
head(spy)
## # A tibble: 6 x 2
## date spy
## <date> <dbl>
## 1 2013-01-01 150.
## 2 2013-02-01 152.
## 3 2013-03-01 157.
## 4 2013-04-01 160.
## 5 2013-05-01 163.
## 6 2013-06-01 160.
spy %>%
ggplot(aes(x=date,y=spy))+
geom_line(color='#4373B6') +
theme_bw() +
ggtitle('SPY from 2013(M1) - 2023(M4)')+
xlab('Date')+
ylab('Close Price')

#Compute log return of S&P 500
spy_return <- spy_data %>%
tq_transmute(select = close,
mutate_fun = periodReturn,
type = 'log',
col_rename = 'returns')
head(spy_return)
## # A tibble: 6 x 2
## date returns
## <date> <dbl>
## 1 2013-01-01 0
## 2 2013-02-01 0.0127
## 3 2013-03-01 0.0328
## 4 2013-04-01 0.0190
## 5 2013-05-01 0.0233
## 6 2013-06-01 -0.0187
#Graph the return
spy_return %>%
ggplot(aes(x=date,y=returns))+
geom_line(color='#4373B6') +
theme_bw() +
ggtitle('SPY Returns from 2013(M1) - 2023(M4) ')+
xlab('Date')+
ylab('Returns')+
scale_y_continuous(labels = scales::percent)

ts_spy_return <- ts(spy_return$returns,start=c(2013,1),frequency = 12)
head(ts_spy_return)
## Jan Feb Mar Apr May Jun
## 2013 0.00000000 0.01267817 0.03283023 0.01903010 0.02333535 -0.01871175
train <- window(ts_spy_return,start=2013,end=c(2021,12))
train1 <- window(ts_spy_return,start=c(2013,1),end=c(2022,1))
train2 <- window(ts_spy_return,start=c(2013,1),end=c(2022,2))
train3 <- window(ts_spy_return,start=c(2013,1),end=c(2022,3))
train4 <- window(ts_spy_return,start=c(2013,1),end=c(2022,4))
train5 <- window(ts_spy_return,start=c(2013,1),end=c(2022,5))
train6 <- window(ts_spy_return,start=c(2013,1),end=c(2022,6))
train7 <- window(ts_spy_return,start=c(2013,1),end=c(2022,7))
train8 <- window(ts_spy_return,start=c(2013,1),end=c(2022,8))
test <- window(ts_spy_return,start=c(2022,1))
tail(train)
## Jul Aug Sep Oct Nov
## 2021 0.024119271 0.029325611 -0.050925009 0.067811483 -0.008067297
## Dec
## 2021 0.041703144
test
## Jan Feb Mar Apr May
## 2022 -0.054182999 -0.029961406 0.033799270 -0.091862086 0.002254721
## 2023 0.060989143 -0.025464168 0.032597697 -0.008118131
## Jun Jul Aug Sep Oct
## 2022 -0.090369972 0.088090955 -0.041657713 -0.101101503 0.078141403
## 2023
## Nov Dec
## 2022 0.054101296 -0.063936924
## 2023
#Custome code for ACF and PACF Plot
ggplot.corr <- function(data, lag.max = 24, ci = 0.95, large.sample.size = TRUE, horizontal = TRUE,...) {
options(dplyr.banner_expression = FALSE)
require(ggplot2)
require(dplyr)
require(cowplot)
if(horizontal == TRUE) {numofrow <- 1} else {numofrow <- 2}
list.acf <- acf(data, lag.max = lag.max, type = "correlation", plot = FALSE)
N <- as.numeric(list.acf$n.used)
df1 <- data.frame(lag = list.acf$lag, acf = list.acf$acf)
df1$lag.acf <- dplyr::lag(df1$acf, default = 0)
df1$lag.acf[2] <- 0
df1$lag.acf.cumsum <- cumsum((df1$lag.acf)^2)
df1$acfstd <- sqrt(1/N * (1 + 2 * df1$lag.acf.cumsum))
df1$acfstd[1] <- 0
df1 <- dplyr::select(df1, lag, acf, acfstd)
list.pacf <- acf(data, lag.max = lag.max, type = "partial", plot = FALSE)
df2 <- data.frame(lag = list.pacf$lag,pacf = list.pacf$acf)
df2$pacfstd <- sqrt(1/N)
if(large.sample.size == TRUE) {
plot.acf <- ggplot(data = df1, aes( x = lag, y = acf)) +
geom_area(aes(x = lag, y = qnorm((1+ci)/2)*acfstd), fill = "#B9CFE7") +
geom_area(aes(x = lag, y = -qnorm((1+ci)/2)*acfstd), fill = "#B9CFE7") +
geom_col(fill = "#4373B6", width = 0.7) +
scale_x_continuous(breaks = seq(0,max(df1$lag),6)) +
scale_y_continuous(name = element_blank(),
limits = c(min(df1$acf,df2$pacf),1)) +
ggtitle("ACF") +
theme_bw() +
theme(plot.title = element_text(size=10))
plot.pacf <- ggplot(data = df2, aes(x = lag, y = pacf)) +
geom_area(aes(x = lag, y = qnorm((1+ci)/2)*pacfstd), fill = "#B9CFE7") +
geom_area(aes(x = lag, y = -qnorm((1+ci)/2)*pacfstd), fill = "#B9CFE7") +
geom_col(fill = "#4373B6", width = 0.7) +
scale_x_continuous(breaks = seq(0,max(df2$lag, na.rm = TRUE),6)) +
scale_y_continuous(name = element_blank(),
limits = c(min(df1$acf,df2$pacf),1)) +
ggtitle("PACF") +
theme_bw() +
theme(plot.title = element_text(size=10))
}
else {
plot.acf <- ggplot(data = df1, aes( x = lag, y = acf)) +
geom_col(fill = "#4373B6", width = 0.7) +
geom_hline(yintercept = qnorm((1+ci)/2)/sqrt(N),
colour = "sandybrown",
linetype = "dashed",
size=1) +
geom_hline(yintercept = - qnorm((1+ci)/2)/sqrt(N),
colour = "sandybrown",
linetype = "dashed",
size=1) +
scale_x_continuous(breaks = seq(0,max(df1$lag),6)) +
scale_y_continuous(name = element_blank(),
limits = c(min(df1$acf,df2$pacf),1)) +
ggtitle("ACF") +
theme_bw() +
theme(plot.title = element_text(size=10))
plot.pacf <- ggplot(data = df2, aes(x = lag, y = pacf)) +
geom_col(fill = "#4373B6", width = 0.7) +
geom_hline(yintercept = qnorm((1+ci)/2)/sqrt(N),
colour = "sandybrown",
linetype = "dashed",
size=1) +
geom_hline(yintercept = - qnorm((1+ci)/2)/sqrt(N),
colour = "sandybrown",
linetype = "dashed",
size=1) +
scale_x_continuous(breaks = seq(0,max(df2$lag, na.rm = TRUE),6)) +
scale_y_continuous(name = element_blank(),
limits = c(min(df1$acf,df2$pacf),1)) +
ggtitle("PACF") +
theme_bw() +
theme(plot.title = element_text(size=10))
}
cowplot::plot_grid(plot.acf, plot.pacf, nrow = numofrow)
}
#Custom code for ARIMA Model ACF PACF reisduals
arima.order <- c(2,0,2)
p <- 0:arima.order[1]
d <- 0:arima.order[2] #Revise this part depending on how many differencing was conducted
q <- 0:arima.order[3]
df.arima.order <- data.frame(t(expand.grid(p=p,d=d,q=q)))
df.colnames <- data.frame(expand.grid(p=p,d=d,q=q))
df.colnames$type <- with(df.colnames,paste0
("ARIMA(",p,",",d,",",q,")"))
list.arima.model <- lapply(df.arima.order,
function(data, x, ...) {
arima(x = data, order = x, ...)
},
data = train) # Data has to be in ts format
df.arima.residual <- sapply(list.arima.model,
function(x) {
x$residuals
})
df.arima.residual <- data.frame(df.arima.residual)
colnames(df.arima.residual) <- df.colnames$type
df.arima.residual$time <- time(train) # Data has to be in ts format
df.arima.residual.plot <- gather(data = df.arima.residual, value = "residual", key = "type",-time)
df.acf <- sapply(subset(df.arima.residual, select = -time),
function(x, lag.max,...) {
acf(x = x, lag.max = lag.max, plot = FALSE)$acf
},
lag.max = 24)
df.acf <- data.frame(df.acf)
colnames(df.acf) <- df.colnames$type
df.acf$lag <- 0:(dim(df.acf)[1]-1)
df.acf.plot <- gather(df.acf,
value = "value",
key = "Model",
-lag)
df.pacf <- sapply(subset(df.arima.residual, select = -time),
function(x, lag.max,...) {
acfvalue <- as.numeric(acf(x = x,
lag.max = lag.max,
type = "partial",
plot = FALSE)$acf)
return(c(1,acfvalue))
},
lag.max = 24)
df.pacf <- data.frame(df.pacf)
colnames(df.pacf) <- df.colnames$type
df.pacf$lag <- 0:(dim(df.pacf)[1]-1)
df.pacf.plot <- gather(df.pacf,
value = "value",
key = "Model",
-lag)
fig.acf <- ggplot(data = df.acf.plot, aes(x = lag, y = value)) +
geom_col(width = 0.4, fill = "#667fa2") +
geom_hline(yintercept = qnorm((1+0.95)/2)/sqrt(dim(df.acf.plot)[1]),
linetype = "dashed",
colour = "grey50") +
geom_hline(yintercept = -qnorm((1+0.95)/2)/sqrt(dim(df.acf.plot)[1]),
linetype = "dashed",
colour = "grey50") +
scale_y_continuous(name = element_blank()) +
scale_x_continuous(breaks = seq(0,24,4)) +
ggtitle("ACF of residuals") +
facet_wrap(~Model) +
theme_bw() +
theme(panel.grid.minor = element_blank(),
strip.background = element_rect(fill = "#f8ddc4"))
fig.pacf <- ggplot(data = df.pacf.plot, aes(x = lag, y = value)) +
geom_col(width = 0.4, fill = "#667fa2") +
geom_hline(yintercept = qnorm((1+0.95)/2)/sqrt(dim(df.pacf.plot)[1]),
linetype = "dashed",
colour = "grey50") +
geom_hline(yintercept = -qnorm((1+0.95)/2)/sqrt(dim(df.pacf.plot)[1]),
linetype = "dashed",
colour = "grey50") +
scale_y_continuous(name = element_blank()) +
scale_x_continuous(breaks = seq(0,24,4)) +
ggtitle("PACF of residuals") +
facet_wrap(~Model) +
theme_bw() +
theme(panel.grid.minor = element_blank(),
strip.background = element_rect(fill = "#f8ddc4"))
fig.residual <- ggplot(data = df.arima.residual.plot, aes(x = time, y = residual)) +
geom_line(colour = "#667fa2") +
ggtitle("Residuals") +
facet_wrap(~type) +
theme_bw() +
theme(panel.grid.minor = element_blank(),
strip.background = element_rect(fill = "#f8ddc4"))
ggplot.corr(spy_return[1:108,2],large.sample.size = FALSE)
## 필요한 패키지를 로딩중입니다: cowplot
##
## 다음의 패키지를 부착합니다: 'cowplot'
## The following object is masked from 'package:lubridate':
##
## stamp
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## i Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: Removed 1 rows containing missing values (`geom_hline()`).
## Removed 1 rows containing missing values (`geom_hline()`).

fig.acf

fig.pacf

fig.residual
##
## Don't know how to automatically pick scale for object of type <ts>. Defaulting
## to continuous.

#Custom Code for AIC
aic <- sapply(list.arima.model, function(X) {X$aic})
aic <- round(as.numeric(aic),2)
df.aic.plot <- data.frame(aic = aic, p = df.colnames$p, q = df.colnames$q)
ggplot(data = df.aic.plot, aes(x = p, y = q)) +
geom_raster(aes(fill = aic)) +
geom_text(aes( label = aic), colour = "black") +
scale_x_continuous(expand = c(0.05,0.05)) +
scale_color_continuous( high = "#FFA800", low = "#4AC200") +
theme_bw() +
theme(panel.grid.minor = element_blank()) +
scale_fill_distiller(palette = "Spectral", direction = -1) +
ggtitle('Akaike Information Criterion (AIC)')

#From the Plot, it seems that MA(1) model is suitable.
#However auto.arima gives MA(2)
#We won't use auto arima
auto.arima(train)
## Series: train
## ARIMA(0,0,2) with non-zero mean
##
## Coefficients:
## ma1 ma2 mean
## -0.1995 -0.1745 0.0106
## s.e. 0.0947 0.0925 0.0023
##
## sigma^2 = 0.001437: log likelihood = 201.66
## AIC=-395.32 AICc=-394.93 BIC=-384.59
fit_train <- arima(train,c(0,0,1))%>% forecast()
fit_train
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## Jan 2022 0.004094099 -0.04456373 0.05275193 -0.07032163 0.07850982
## Feb 2022 0.010650051 -0.03922811 0.06052821 -0.06563201 0.08693212
## Mar 2022 0.010650051 -0.03922811 0.06052821 -0.06563201 0.08693212
## Apr 2022 0.010650051 -0.03922811 0.06052821 -0.06563201 0.08693212
## May 2022 0.010650051 -0.03922811 0.06052821 -0.06563201 0.08693212
## Jun 2022 0.010650051 -0.03922811 0.06052821 -0.06563201 0.08693212
## Jul 2022 0.010650051 -0.03922811 0.06052821 -0.06563201 0.08693212
## Aug 2022 0.010650051 -0.03922811 0.06052821 -0.06563201 0.08693212
## Sep 2022 0.010650051 -0.03922811 0.06052821 -0.06563201 0.08693212
## Oct 2022 0.010650051 -0.03922811 0.06052821 -0.06563201 0.08693212
## Nov 2022 0.010650051 -0.03922811 0.06052821 -0.06563201 0.08693212
## Dec 2022 0.010650051 -0.03922811 0.06052821 -0.06563201 0.08693212
## Jan 2023 0.010650051 -0.03922811 0.06052821 -0.06563201 0.08693212
## Feb 2023 0.010650051 -0.03922811 0.06052821 -0.06563201 0.08693212
## Mar 2023 0.010650051 -0.03922811 0.06052821 -0.06563201 0.08693212
## Apr 2023 0.010650051 -0.03922811 0.06052821 -0.06563201 0.08693212
## May 2023 0.010650051 -0.03922811 0.06052821 -0.06563201 0.08693212
## Jun 2023 0.010650051 -0.03922811 0.06052821 -0.06563201 0.08693212
## Jul 2023 0.010650051 -0.03922811 0.06052821 -0.06563201 0.08693212
## Aug 2023 0.010650051 -0.03922811 0.06052821 -0.06563201 0.08693212
## Sep 2023 0.010650051 -0.03922811 0.06052821 -0.06563201 0.08693212
## Oct 2023 0.010650051 -0.03922811 0.06052821 -0.06563201 0.08693212
## Nov 2023 0.010650051 -0.03922811 0.06052821 -0.06563201 0.08693212
## Dec 2023 0.010650051 -0.03922811 0.06052821 -0.06563201 0.08693212
train
## Jan Feb Mar Apr May
## 2013 0.0000000000 0.0126781693 0.0328302290 0.0190300991 0.0233353461
## 2014 -0.0358845187 0.0445103257 0.0038575029 0.0069274658 0.0229412151
## 2015 -0.0300770648 0.0546819151 -0.0202841404 0.0097858719 0.0127742059
## 2016 -0.0510686886 -0.0008262911 0.0599558407 0.0039334662 0.0168684780
## 2017 0.0177364647 0.0385392608 -0.0030918302 0.0098772360 0.0140142433
## 2018 0.0548282567 -0.0370379259 -0.0317902619 0.0051549066 0.0240183060
## 2019 0.0770217815 0.0319015049 0.0135436455 0.0400399643 -0.0658953606
## 2020 -0.0004039031 -0.0824752378 -0.1392473580 0.1195446421 0.0465450374
## 2021 -0.0102427078 0.0274259374 0.0411290315 0.0515581941 0.0065446074
## Jun Jul Aug Sep Oct
## 2013 -0.0187117513 0.0503859426 -0.0304513418 0.0262935074 0.0452665893
## 2014 0.0156543261 -0.0135286908 0.0387047343 -0.0185558483 0.0232778641
## 2015 -0.0253736385 0.0223378758 -0.0628866896 -0.0310325079 0.0816349954
## 2016 -0.0017170662 0.0358219227 0.0011968241 -0.0049806503 -0.0174890779
## 2017 0.0014899473 0.0203457943 0.0029134524 0.0149986409 0.0232907106
## 2018 0.0012540926 0.0363768172 0.0314209994 0.0014112974 -0.0716080153
## 2019 0.0624202148 0.0150062706 -0.0168851267 0.0146636380 0.0218638880
## 2020 0.0131880806 0.0572232859 0.0674686055 -0.0421576099 -0.0252496842
## 2021 0.0189134017 0.0241192714 0.0293256106 -0.0509250090 0.0678114831
## Nov Dec
## 2013 0.0292069703 0.0201817235
## 2014 0.0271013651 -0.0080438678
## 2015 0.0036484559 -0.0233673956
## 2016 0.0361760828 0.0141922770
## 2017 0.0301080500 0.0069565212
## 2018 0.0183793305 -0.0979910646
## 2019 0.0355584748 0.0237368681
## 2020 0.1032574582 0.0321249653
## 2021 -0.0080672971 0.0417031440
#Plot ARIMA model prediction
autoplot(train,color='grey30') +
autolayer(fit_train$fitted,series = 'ARIMA')+
autolayer(fit_train,series = 'ARIMA') +
labs(x='Time', y='Return', title='ARIMA') +
theme_bw() +
scale_colour_manual(name='Method', values=c('red'))+
scale_y_continuous(labels = scales::percent) +
theme(legend.position=c(0.02, 0.98),
legend.justification=c('left', 'top'))

fit_train$mean
## Jan Feb Mar Apr May Jun
## 2022 0.004094099 0.010650051 0.010650051 0.010650051 0.010650051 0.010650051
## 2023 0.010650051 0.010650051 0.010650051 0.010650051 0.010650051 0.010650051
## Jul Aug Sep Oct Nov Dec
## 2022 0.010650051 0.010650051 0.010650051 0.010650051 0.010650051 0.010650051
## 2023 0.010650051 0.010650051 0.010650051 0.010650051 0.010650051 0.010650051
# define start and end dates
start_date <- c(2013,1)
end_dates1 <- c(2022,1:12)
end_dates2 <- c(2023,1:3)
# create a list to store the forecasts
forecasts <- list()
# loop over end dates and fit ARIMA models
for (i in 1:length(end_dates1)) {
train <- window(ts_spy_return, start = start_date, end = c(2022, end_dates1[i]))
fit <- arima(train, c(0,0,1)) %>% forecast()
forecasts[[i]] <- fit$mean[1]
}
## Warning in window.default(x, ...): 'end' value not changed
for (i in 1:length(end_dates2)) {
train <- window(ts_spy_return, start = start_date, end = c(2023,end_dates2[i]))
fit <- arima(train, c(0,0,1)) %>% forecast()
forecasts[[i + length(end_dates1)]] <- fit$mean[1]
}
## Warning in window.default(x, ...): 'end' value not changed
# combine results into a data frame
results <- data.frame(end_date = c(end_dates1, end_dates2), forecast = unlist(forecasts))
results
## end_date forecast
## 1 2022 0.011033389
## 2 1 0.024064490
## 3 2 0.021081002
## 4 3 0.007241615
## 5 4 0.030936546
## 6 5 0.014279113
## 7 6 0.025400795
## 8 7 -0.003371496
## 9 8 0.016683643
## 10 9 0.030475868
## 11 10 -0.001945132
## 12 11 -0.004081110
## 13 12 0.022091153
## 14 2023 0.011033389
## 15 1 -0.001435208
## 16 2 0.014098649
## 17 3 0.003444871
predicted_return <- c(fit_train$mean[1], results[c(-1,-14),]$forecast)
predicted_return
## [1] 0.004094099 0.024064490 0.021081002 0.007241615 0.030936546
## [6] 0.014279113 0.025400795 -0.003371496 0.016683643 0.030475868
## [11] -0.001945132 -0.004081110 0.022091153 -0.001435208 0.014098649
## [16] 0.003444871
library(tibble)
test_df <- tibble(date=spy[109:124,]$date,spy=spy[109:124,]$spy,return=spy_return[109:124,]$returns,predict=predicted_return)
test_df
## # A tibble: 16 x 4
## date spy return predict
## <date> <dbl> <dbl> <dbl>
## 1 2022-01-01 450. -0.0542 0.00409
## 2 2022-02-01 437. -0.0300 0.0241
## 3 2022-03-01 452. 0.0338 0.0211
## 4 2022-04-01 412 -0.0919 0.00724
## 5 2022-05-01 413. 0.00225 0.0309
## 6 2022-06-01 377. -0.0904 0.0143
## 7 2022-07-01 412. 0.0881 0.0254
## 8 2022-08-01 395. -0.0417 -0.00337
## 9 2022-09-01 357. -0.101 0.0167
## 10 2022-10-01 386. 0.0781 0.0305
## 11 2022-11-01 408. 0.0541 -0.00195
## 12 2022-12-01 382. -0.0639 -0.00408
## 13 2023-01-01 406. 0.0610 0.0221
## 14 2023-02-01 396. -0.0255 -0.00144
## 15 2023-03-01 409. 0.0326 0.0141
## 16 2023-04-01 406. -0.00812 0.00344
# Initialize signal vector
signal1 <- rep("Hold", nrow(test_df))
# Implement signal rules
for(i in 1:nrow(test_df)) {
if(i == 1) {
if(test_df$predict[i] > 0) {
signal1[i] <- "Buy"
}
} else {
if(test_df$predict[i] > 0) {
if(test_df$predict[i-1] < 0) {
signal1[i] <- "Buy"
} else {
signal1[i] <- "Hold"
}
} else if(test_df$predict[i] < 0) {
if(test_df$predict[i-1] > 0) {
signal1[i] <- "Sell"
} else {
signal1[i] <- "Hold"
}
}
}
}
signal1
## [1] "Buy" "Hold" "Hold" "Hold" "Hold" "Hold" "Hold" "Sell" "Buy" "Hold"
## [11] "Sell" "Hold" "Buy" "Sell" "Buy" "Hold"
test_df$signal1 <- signal1
# create a new column 'signal2' based on the predict column
test_df$signal2 <- ifelse(test_df$predict > 0, "Long", "Short")
datatable(test_df)
test_df$signal2_direction <- ifelse(test_df$signal2 == "Short", -1, 1)
test_df$trading_return <- test_df$return*test_df$signal2_direction
datatable(test_df)
return_df <- test_df[,c(1,3,8)]
return_df
## # A tibble: 16 x 3
## date return trading_return
## <date> <dbl> <dbl>
## 1 2022-01-01 -0.0542 -0.0542
## 2 2022-02-01 -0.0300 -0.0300
## 3 2022-03-01 0.0338 0.0338
## 4 2022-04-01 -0.0919 -0.0919
## 5 2022-05-01 0.00225 0.00225
## 6 2022-06-01 -0.0904 -0.0904
## 7 2022-07-01 0.0881 0.0881
## 8 2022-08-01 -0.0417 0.0417
## 9 2022-09-01 -0.101 -0.101
## 10 2022-10-01 0.0781 0.0781
## 11 2022-11-01 0.0541 -0.0541
## 12 2022-12-01 -0.0639 0.0639
## 13 2023-01-01 0.0610 0.0610
## 14 2023-02-01 -0.0255 0.0255
## 15 2023-03-01 0.0326 0.0326
## 16 2023-04-01 -0.00812 -0.00812
return_df_pivot <- return_df %>%
tidyr::pivot_longer(cols=-date,
names_to = 'type',
values_to = 'returns')
head(return_df_pivot)
## # A tibble: 6 x 3
## date type returns
## <date> <chr> <dbl>
## 1 2022-01-01 return -0.0542
## 2 2022-01-01 trading_return -0.0542
## 3 2022-02-01 return -0.0300
## 4 2022-02-01 trading_return -0.0300
## 5 2022-03-01 return 0.0338
## 6 2022-03-01 trading_return 0.0338
# Plot bar graph
a1 <- ggplot(return_df, aes(x = date, y = trading_return)) +
geom_bar(position = "dodge",stat='identity',fill='#4373B6') +
labs(x = "Date", y = "Returns",
title = "ARIMA Trading Returns") +
scale_y_continuous(labels = scales::percent_format()) +
scale_x_date(breaks='2 month')+
theme_bw()+
theme(legend.position = "top")
a2 <- ggplot(return_df, aes(x = date, y = return)) +
geom_bar(position = "dodge",stat='identity',fill='grey30') +
labs(x = "Date", y = "Returns",
title = "Buy&Hold") +
scale_y_continuous(labels = scales::percent_format()) +
scale_x_date(breaks='2 month')+
theme_bw()+
theme(legend.position = "top")
gridExtra::grid.arrange(a1,a2)

return_df
## # A tibble: 16 x 3
## date return trading_return
## <date> <dbl> <dbl>
## 1 2022-01-01 -0.0542 -0.0542
## 2 2022-02-01 -0.0300 -0.0300
## 3 2022-03-01 0.0338 0.0338
## 4 2022-04-01 -0.0919 -0.0919
## 5 2022-05-01 0.00225 0.00225
## 6 2022-06-01 -0.0904 -0.0904
## 7 2022-07-01 0.0881 0.0881
## 8 2022-08-01 -0.0417 0.0417
## 9 2022-09-01 -0.101 -0.101
## 10 2022-10-01 0.0781 0.0781
## 11 2022-11-01 0.0541 -0.0541
## 12 2022-12-01 -0.0639 0.0639
## 13 2023-01-01 0.0610 0.0610
## 14 2023-02-01 -0.0255 0.0255
## 15 2023-03-01 0.0326 0.0326
## 16 2023-04-01 -0.00812 -0.00812
return_df$total_return <- cumprod(1 + return_df$return)
return_df$total_trading_return <- cumprod(1 + return_df$trading_return)
# plot the total returns on a graph
ggplot(return_df, aes(x = date)) +
geom_line(aes(y = total_return, color = "Buy&Hold"),size=1) +
geom_line(aes(y = total_trading_return, color = "Trading Return"),size=1) +
scale_color_manual(values = c("Buy&Hold" = "grey30", "Trading Return" = '#4373B6')) +
labs(x = "Date", y = "Cumulative Return", color = "",title='Buy&Hold vs ARIMA') +
scale_x_date(breaks='2 month')+
theme_bw()+
theme(legend.position = "top")

return_df
## # A tibble: 16 x 5
## date return trading_return total_return total_trading_return
## <date> <dbl> <dbl> <dbl> <dbl>
## 1 2022-01-01 -0.0542 -0.0542 0.946 0.946
## 2 2022-02-01 -0.0300 -0.0300 0.917 0.917
## 3 2022-03-01 0.0338 0.0338 0.948 0.948
## 4 2022-04-01 -0.0919 -0.0919 0.861 0.861
## 5 2022-05-01 0.00225 0.00225 0.863 0.863
## 6 2022-06-01 -0.0904 -0.0904 0.785 0.785
## 7 2022-07-01 0.0881 0.0881 0.854 0.854
## 8 2022-08-01 -0.0417 0.0417 0.819 0.890
## 9 2022-09-01 -0.101 -0.101 0.736 0.800
## 10 2022-10-01 0.0781 0.0781 0.794 0.863
## 11 2022-11-01 0.0541 -0.0541 0.837 0.816
## 12 2022-12-01 -0.0639 0.0639 0.783 0.868
## 13 2023-01-01 0.0610 0.0610 0.831 0.921
## 14 2023-02-01 -0.0255 0.0255 0.810 0.944
## 15 2023-03-01 0.0326 0.0326 0.836 0.975
## 16 2023-04-01 -0.00812 -0.00812 0.829 0.967
# calculate total return for each trading method
buy_hold_total_return <- tail(return_df$total_return,1)-1
trading_total_return <- tail(return_df$total_trading_return,1)-1
# create a data frame with the total returns for each method
total_returns <- data.frame(method = c("Buy&Hold", "ARIMA Trading"),
total_return = c(buy_hold_total_return, trading_total_return))
# create bar graph
library(ggplot2)
ggplot(total_returns, aes(x = method,y=total_return,fill=method)) +
geom_bar(stat = "identity",width = 0.3)+
ggtitle("Total Return by Trading Method") +
scale_fill_manual(values = c('#4373B6',"grey30")) +
xlab("Trading Method") +
ylab("Total Return")+
theme_bw()+
theme(legend.position = "top") +
geom_text(aes(label=scales::percent(total_return)), vjust=1.2, size=5)
