#Gold_Price <- Quandl('LBMA/GOLD')
Gold_Price <- read_csv("/Users/datascience/Desktop/Time Series Data Science/Time Series Project/Gold_Nasdaq_Max.csv",
col_types = cols(Date = col_date(format = "%m/%d/%Y")))
# Time Series Data of Closing Price
Close.TS <- ts(Gold_Price$`Close/Last`)
autoplot(Close.TS, ylab='Close Price', main='Time Series Plot of Gold Prices') +
theme_classic()
# Check for any NA Values
cbind(
lapply(
lapply(Gold_Price, is.na)
, sum)
)
## [,1]
## Date 0
## Close/Last 0
## Volume 0
## Open 0
## High 0
## Low 0
Price_plot <- Gold_Price %>%
plot_ly(x = ~Date,
type = "candlestick",
open = ~Open,
close = ~`Close/Last`,
high = ~High,
low = ~Low,
name = "price") %>%
layout(
xaxis = list(
rangeselector = list(
buttons = list(
list(
count = 3,
label = "3 mo",
step = "month",
stepmode = "backward"),
list(
count = 6,
label = "6 mo",
step = "month",
stepmode = "backward"),
list(
count = 1,
label = "1 yr",
step = "year",
stepmode = "backward"),
list(
count = 2,
label = "2 yr",
step = "year",
stepmode = "backward"),
list(
count = 3,
label = "3 yr",
step = "year",
stepmode = "backward"),
list(
count = 5,
label = "5 yr",
step = "year",
stepmode = "backward"),
list(step = "all"))),
rangeslider = list(visible = FALSE)),
yaxis = list(title = "Price ($)",
showgrid = TRUE,
showticklabels = TRUE))
Volume <- select(Gold_Price, Date, Volume)
Volume$Date <- as.Date(Volume$Date , format = "%m/%d/%y")
Volume$Vol <- as.numeric(as.character(Volume$Volume)) / 1000
Volume_plot <- Volume %>%
plot_ly(x=~Date, y=~Vol, type='bar', name = "Volume") %>%
layout(yaxis = list(title = "Volume (Units of Thousand)"))
plot <- subplot(Price_plot, Volume_plot, heights = c(0.7,0.3), nrows=2,
shareX = TRUE, titleY = TRUE) %>%
layout(title = 'GC:CMX')
plot
# Subset Date
Gold_Price_Subset <- subset(Gold_Price, Date > as.Date("2019-01-01") )
Close.TS_1 <- ts(Gold_Price_Subset$`Close/Last`)
autoplot(Close.TS_1, ylab='Close Price', main='Time Series Plot of Gold Prices') +
theme_classic()
# Random Walk Test
random_walk_test <- arima(Close.TS, order = c(1, 0, 0))
summary(random_walk_test)
##
## Call:
## arima(x = Close.TS, order = c(1, 0, 0))
##
## Coefficients:
## ar1 intercept
## 0.9985 1439.0070
## s.e. 0.0011 177.2876
##
## sigma^2 estimated as 218.5: log likelihood = -10481.13, aic = 20968.25
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set -0.00646364 14.78174 9.986105 -0.00971381 0.6854392 0.9984929
## ACF1
## Training set -0.04134238
Acf(diff(Close.TS_1), main= 'Autocorrelation Plot of differenced series')
### Several different time periods were tested (Max, 5 Year, 3 year, 2 year). Attempted to gather the largest sample size that does not result in a random walk series
# Traditionally, some people who believe the efficient market hypothesis theory [1] argue that the future stock price is predictable based on the historical stock data. Others who trust the random walk theory believe that the future stock price does not depend on the historical stock data, and hence no useful patterns could be found in the historical data to reflect the pattern of the upcoming stock sequences (https://www.mdpi.com/2227-7390/8/9/1441)
# Based on the experiment, data science is very useful for visualization data and our proposed method using Long Short-Term Memory (LSTM) can be used as predictor in short term data with accuracy 94.57% comes from the short term (1 year) with high epoch in training phase rather than using 3 years training data. (https://journalofbigdata.springeropen.com/articles/10.1186/s40537-021-00430-0)
Economic Indicators
Google Search Interest
#trends <- gtrends(keyword = "Gold", geo = "US", onlyInterest = TRUE) # US Search
trends <- gtrends(keyword = "Gold", onlyInterest = TRUE, time = '2016-01-01 2022-11-17') # World Search
trends <- trends$interest_over_time %>%
as_tibble() %>%
select(c(date, hits, keyword))
trends$date <- as_tibble(ceiling_date(trends$date, unit = "day", change_on_boundary = NULL,
week_start = getOption("lubridate.week.start", 1))) ##ISSUE -> Returns Weeks instead of Days due to the time format. I can either do daily for past 30-90 days, or it will return monthly with specific date, or return weekly for past 5 years
DATE <- as.data.frame(trends$date)
trends$Date <- DATE$value
trends %>%
plot_ly(type='scatter',x=~Date, y=~hits, mode = 'lines', name = "Google Search Trends") %>%
layout(title = paste0("Interest over Time: ", "Gold"), yaxis = list(title = "hits"))
Gold_Price_Subset_2 <- subset(Gold_Price, Date > as.Date("2016-01-01") )
Gold_Price_Subset_2$DDate <- as.Date(Gold_Price_Subset_2$Date , format = "%m/%y")
Gold_Subset_Plot <- Gold_Price_Subset_2 %>%
plot_ly(x = ~Date,
type = "candlestick",
open = ~Open,
close = ~`Close/Last`,
high = ~High,
low = ~Low,
name = "price") %>%
layout(xaxis = list(rangeslider = list(visible = F)), yaxis = list(title = "Gold Price"))
Gold_Subset_Plot
Gold_Price$Close <- Gold_Price$`Close/Last`
trends %>%
left_join(Gold_Price, by = "Date") %>%
select(one_of(c("Date", "hits", "Close"))) %>%
drop_na() %>%
ggplot(aes(hits, Close)) + geom_point(color="red") + geom_smooth(method = 'loess') +
labs(title =paste0("Gold",": Relationship between World Interest (Hits) and Close Price (Gold)"))
## `geom_smooth()` using formula = 'y ~ x'
#Test/Train split (Train 5 years of data, Test 1 year)
Gold_Price_Subset_3 <- subset(Gold_Price, Date > as.Date("2017-11-17") )
Gold.ts <- ts(Gold_Price_Subset_3$Close)
Gold.Train.ts <- window(Gold.ts, end = 1006 )
Gold.Test.ts <- window(Gold.ts, start = 1007)
Gold_Price_Subset_3 <- subset(Gold_Price, Date > as.Date("2017-11-17") )
Gold.Train <- subset(Gold_Price_Subset_3, Date < as.Date("2021-11-17"))
Gold.Test <- subset(Gold_Price_Subset_3, Date >= as.Date("2021-11-17"))
Gold_Train_df <- Gold.Train %>%
select(c("Date","Close")) %>%
rename(ds = Date, y = Close)
Gold_Test_df <- Gold.Test %>%
select(c("Date","Close"))
#predictions using Prophet
Prophet <- prophet(Gold_Train_df, interval.width = 0.95, daily.seasonality = TRUE)
future <- make_future_dataframe(Prophet, periods = 365) %>% filter(!wday(ds) %in% c(1,7)) #account for regular gaps on weekends
Prophet_Forecast <- predict(Prophet, future)
Forecast_subset <- Prophet_Forecast %>%
select(c('ds','yhat','yhat_lower','yhat_upper')) %>%
rename(Date = ds, ClosePrice = yhat , ClosePrice_lower = yhat_lower, ClosePrice_upper = yhat_upper)
datatable(Forecast_subset[c('Date','ClosePrice','ClosePrice_lower','ClosePrice_upper')])
plot(Prophet, Prophet_Forecast, xlabel = "Date", ylabel = "Gold Close Price ($)") + ggtitle(paste0("Gold", ": Price Prediction"))
prophet_plot_components(Prophet, Prophet_Forecast)
#autoplot(apple, series = "actual") +
# autolayer(apple.pred, series = "predicted", alpha = .4) +
# theme_classic() +
# coord_cartesian(xlim = c(210, 251))
Prophet_Results <- Prophet_Forecast %>%
select(c("ds","yhat")) %>%
rename(Date = ds, Close = yhat)
tail(Prophet_Results)
## Date Close
## 1262 2022-11-09 1723.936
## 1263 2022-11-10 1722.952
## 1264 2022-11-11 1720.154
## 1265 2022-11-14 1718.374
## 1266 2022-11-15 1716.655
## 1267 2022-11-16 1715.051
Prophet_Results$Date <- as.Date(Prophet_Results$Date , format = "%m/%d/%y")
Prophet_Results <- subset(Prophet_Results, Date >= as.Date("2021-11-17"))
Gold_Results <- Gold_Price %>%
select(c("Date","Close")) %>%
rename(Close_Actual = Close)
Gold_Results$Date <- as.Date(Gold_Results$Date , format = "%m/%d/%y")
Gold_Results <- subset(Gold_Results, Date >= as.Date("2021-11-17"))
Results <- inner_join(Gold_Results, Prophet_Results, by="Date")
accuracy(Results$Close, Results$Close_Actual)
## ME RMSE MAE MPE MAPE
## Test set 62.18735 113.3095 85.21852 3.197662 4.572932
#Predicted.ts <- ts(Results$Close)
#Actual.ts <- ts(Results$Close_Actual)
#autoplot(Predicted.ts, series ='predicted') +
# autolayer(Actual.ts, series = 'actual')
Results
## # A tibble: 252 × 3
## Date Close_Actual Close
## <date> <dbl> <dbl>
## 1 2022-11-16 1776. 1715.
## 2 2022-11-15 1775. 1717.
## 3 2022-11-14 1774. 1718.
## 4 2022-11-11 1774. 1720.
## 5 2022-11-10 1754. 1723.
## 6 2022-11-09 1716. 1724.
## 7 2022-11-08 1716 1725.
## 8 2022-11-07 1680. 1726.
## 9 2022-11-04 1686. 1727.
## 10 2022-11-03 1638. 1729.
## # … with 242 more rows
p <- ggplot(Results, aes(Date, Close_Actual, group=1)) +
geom_line() +
theme_light() + ggtitle("Actual 1 Year Gold Price")
p
p2 <- ggplot(Results, aes(Date, Close, group=1)) +
geom_line() +
theme_light() + ggtitle("Predctivedl 1 Year Gold Price")
p2
Notes for myself * Add In External Regressor (Price of silver, interest
rate, dollar index, so on) https://rpubs.com/mpleo/timeseries_prophet (example)
*Restart the prophet method or try a different method