The objective of this project is to produce a set of models to predict the returns of a given set of stocks. ARIMA (and variations thereof), Fama-French CAPM, Holt-Winters/Triple Exponential Smoothing, Neural Network Autoregression, and KNN Regression are considered.
Install the necessary packages to be used.
#install.packages("tidyverse")
#install.packages("gridExtra")
#install.packages("GGally")
#install.packages("forecast")
#install.packages("tseries")
#install.packages("vrtest")
#install.packages("stats")
#install.packages("quantmod")
#install.packages("dyn")
#install.packages("Metrics")
#install.packages("rugarch")
#install.packages("fGarch")
#install.packages("tsfknn")
library(tidyverse)
library(gridExtra)
library(GGally)
library(forecast)
library(tseries)
library(vrtest)
library(stats)
library(quantmod)
library(dyn)
library(Metrics)
library(rugarch)
library(fGarch)
library(tsfknn)For the chosen stocks and time frame, a dataset contain the log returns of adjusted closing prices as well as the 3 Fama-French factors will be generated. Data sources are Yahoo Finance and Dartmouth College.
stockTickers <- c("AAPL","FB","GOOG","GE","PG","AMZN","TSLA")
end <- as.Date("2020-06-01")
start <- end - 365
url = "http://mba.tuck.dartmouth.edu/pages/faculty/ken.french/ftp/F-F_Research_Data_Factors_daily_CSV.zip"
download.file(url, "F-F_Research_Data_Factors_daily_CSV.zip")
unzip("F-F_Research_Data_Factors_daily_CSV.zip")
file.remove("F-F_Research_Data_Factors_daily_CSV.zip")## [1] TRUE
ff <- read.csv("F-F_Research_Data_Factors_daily.CSV", header=T, skip=4)
colnames(ff) <- c("date", "zm","smb","hml","rf")
ff_factors <- c("zm","smb","hml")
ff[,1] <- as.Date(ff[["date"]], "%Y%m%d")
ff <- ff[ff$date >= start & ff$date <= end,]
priceData <- as.data.frame(ff[,"date"])
colnames(priceData) <- "date"
for (i in seq(length(stockTickers))){
ticker = stockTickers[i]
getSymbols(ticker, verbose = FALSE, src = "yahoo",from=start,to=end)
temp_df <- select(as.data.frame(get(ticker)),contains(".Adjusted"))
temp_df <- rownames_to_column(temp_df,"date")
temp_df[,1] <- as.Date(temp_df[["date"]],"%Y-%m-%d")
priceData <- semi_join(priceData,temp_df,by="date")
priceData <- left_join(priceData,temp_df,by="date")
}
colnames(priceData) <- c("date",stockTickers)
returnsData <- priceData
LNreturnsData <- priceData
for (stock in stockTickers)
{
returnsData[[stock]] <- Delt(priceData[[stock]],type = "arithmetic")
LNreturnsData[[stock]] <- Delt(priceData[[stock]],type = "log")
}
returnsData <- semi_join(returnsData,ff,by="date")
returnsData <- left_join(returnsData,ff,by="date")
returnsData <- na.omit(returnsData)
row.names(returnsData) <- (1:nrow(returnsData))
LNreturnsData <- semi_join(LNreturnsData,ff,by="date")
LNreturnsData <- left_join(LNreturnsData,ff,by="date")
LNreturnsData <- na.omit(LNreturnsData)
row.names(LNreturnsData) <- (1:nrow(LNreturnsData))
head(LNreturnsData)## date AAPL FB GOOG GE PG
## 1 2019-06-04 0.03593075 0.0202027439 0.016101654 0.048140540 0.0084419911
## 2 2019-06-05 0.01601431 0.0039920094 -0.010337736 -0.011060917 0.0193943609
## 3 2019-06-06 0.01457514 0.0009509897 0.002032049 0.003028749 0.0060715364
## 4 2019-06-07 0.02626885 0.0293863564 0.020565811 0.006030015 0.0128615304
## 5 2019-06-10 0.01269847 0.0084442065 0.013361951 0.006989517 -0.0004596271
## 6 2019-06-11 0.01151302 0.0185883108 -0.001537710 0.006941102 0.0060522448
## AMZN TSLA zm smb hml rf
## 1 0.021548130 0.078576006 2.33 0.45 -0.17 0.009
## 2 0.005155597 0.015326116 0.69 -1.01 -0.91 0.009
## 3 0.009081437 0.046513085 0.55 -1.02 -0.01 0.009
## 4 0.027918958 -0.007065431 1.04 -0.01 -1.15 0.009
## 5 0.030892075 0.040160675 0.53 0.00 -0.07 0.009
## 6 0.001648590 0.019629454 -0.12 -0.26 0.59 0.009
## date AAPL FB GOOG GE
## 245 2020-05-21 -0.0074833859 0.006155724 -0.0027904585 0.009302365
## 246 2020-05-22 0.0064177681 0.015097885 0.0054172894 -0.010861207
## 247 2020-05-26 -0.0067965521 -0.011603423 0.0046685255 0.059063318
## 248 2020-05-27 0.0043474771 -0.013265890 0.0005784737 0.069581011
## 249 2020-05-28 0.0004400497 -0.016190375 -0.0007831778 -0.072526512
## 250 2020-05-29 -0.0009745454 -0.001642486 0.0085675599 -0.031463327
## PG AMZN TSLA zm smb hml rf
## 245 -0.014762349 -0.020709846 0.014654925 -0.70 0.65 0.46 0
## 246 0.008741426 -0.004038038 -0.013037706 0.27 0.49 -0.88 0
## 247 -0.005075014 -0.006182600 0.002433124 1.23 0.07 4.63 0
## 248 0.016466378 -0.004747368 0.001659429 1.54 0.64 3.60 0
## 249 0.018874220 -0.003861509 -0.017736784 -0.41 -1.46 -2.44 0
## 250 -0.001207001 0.017041921 0.035583745 0.59 -0.31 -1.97 0
Time Series and Box Plots of Stock returns and Fama-French Factors (normal and logged versions).
dataSets <- list("Returns" = returnsData, "Log Returns" = LNreturnsData, "Fama-Fench Factors" = ff)
plots <- list()
for (i in 1:length(dataSets))
{
name <- names(dataSets[i])
df <- dataSets[[i]]
if (i == 3) {columns = c("date",ff_factors)} else {columns = c("date",stockTickers)}
temp_df <- df %>%
select(columns) %>%
gather(key = "Series", value = "Daily_Change", -date)
plot <- ggplot(temp_df, aes(x = date, y = Daily_Change,color=Series)) +
geom_line() +
ggtitle(name)
label <- paste("LinePlot_",name,sep = "")
plots[[label]] <- plot
plot <- ggplot(temp_df, aes(x = Series, y = Daily_Change,color=Series)) +
geom_boxplot() +
ggtitle(name)
label <- paste("BoxPlot_",name,sep = "")
plots[[label]] <- plot
}Test Log Returns of each Stock for Normality.
plots <- list()
results <- list()
for (stock in c(stockTickers))
{
x <- LNreturnsData[[stock]]
avg <- mean(x)
sdev <- sd(x)
result = shapiro.test(x)
x <- as.data.frame(x)
densityPlot <- ggplot(x, aes(x=V1)) +
geom_density() +
stat_function(fun = dnorm, args = list(mean = avg, sd = sdev),colour = "red") +
labs(title= stock,x="Log Returns", y = "Density")
name <- paste(stock)
plots[[name]] <- densityPlot
results[[name]] <- result
}
print(results)## $AAPL
##
## Shapiro-Wilk normality test
##
## data: x
## W = 0.87443, p-value = 1.771e-13
##
##
## $FB
##
## Shapiro-Wilk normality test
##
## data: x
## W = 0.89756, p-value = 5.288e-12
##
##
## $GOOG
##
## Shapiro-Wilk normality test
##
## data: x
## W = 0.86856, p-value = 8.008e-14
##
##
## $GE
##
## Shapiro-Wilk normality test
##
## data: x
## W = 0.91933, p-value = 2.152e-10
##
##
## $PG
##
## Shapiro-Wilk normality test
##
## data: x
## W = 0.84683, p-value = 5.199e-15
##
##
## $AMZN
##
## Shapiro-Wilk normality test
##
## data: x
## W = 0.93025, p-value = 1.772e-09
##
##
## $TSLA
##
## Shapiro-Wilk normality test
##
## data: x
## W = 0.8783, p-value = 3.03e-13
Apply Augmented Dickey-Fuller Test and Identify Required Order of Differencing.
adfTests <- list()
ARIMA_Orders <- as.data.frame(stockTickers)
colnames(ARIMA_Orders) <- "Stock"
ARIMA_Orders$p <- NA
ARIMA_Orders$d <- NA
ARIMA_Orders$q <- NA
for (stock in c(stockTickers))
{
returns <- ts(LNreturnsData[,stock])
adfTests[[stock]] <- adf.test(returns)
NumDiff <- 0
if (adf.test(returns)$p.value > 0.05)
{
for (d in c(1:3))
{
diff_returns <- diff(returns, lag = 1, differences = d)
if (adf.test(diff_returns)$p.value < 0.05)
{
NumDiff <- d
break
}
}
}
ARIMA_Orders[which(ARIMA_Orders$Stock == stock),3] <- NumDiff
if (NumDiff > 0)
{
label <- paste(stock,"_D_order_",NumDiff,sep = "")
adfTests[[label]] <- adf.test(diff(returns, lag = 1, differences = NumDiff))
}
}
print(adfTests)## $AAPL
##
## Augmented Dickey-Fuller Test
##
## data: returns
## Dickey-Fuller = -5.3359, Lag order = 6, p-value = 0.01
## alternative hypothesis: stationary
##
##
## $FB
##
## Augmented Dickey-Fuller Test
##
## data: returns
## Dickey-Fuller = -4.628, Lag order = 6, p-value = 0.01
## alternative hypothesis: stationary
##
##
## $GOOG
##
## Augmented Dickey-Fuller Test
##
## data: returns
## Dickey-Fuller = -4.7999, Lag order = 6, p-value = 0.01
## alternative hypothesis: stationary
##
##
## $GE
##
## Augmented Dickey-Fuller Test
##
## data: returns
## Dickey-Fuller = -4.9234, Lag order = 6, p-value = 0.01
## alternative hypothesis: stationary
##
##
## $PG
##
## Augmented Dickey-Fuller Test
##
## data: returns
## Dickey-Fuller = -6.6662, Lag order = 6, p-value = 0.01
## alternative hypothesis: stationary
##
##
## $AMZN
##
## Augmented Dickey-Fuller Test
##
## data: returns
## Dickey-Fuller = -4.988, Lag order = 6, p-value = 0.01
## alternative hypothesis: stationary
##
##
## $TSLA
##
## Augmented Dickey-Fuller Test
##
## data: returns
## Dickey-Fuller = -5.8391, Lag order = 6, p-value = 0.01
## alternative hypothesis: stationary
Inspect ACF and PACF functions of the (appropriately differenced) log returns to identify the range of orders p and q to consider (see link for reference). Then, select the combination that would result in the arima model with the lowest AIC. https://online.stat.psu.edu/stat510/lesson/3/3.1
plots <- list()
for (stock in c(stockTickers))
{
d <- ARIMA_Orders[which(ARIMA_Orders$Stock == stock),3]
returns <- LNreturnsData[,stock]
train <- head(returns,length(returns)-10)
title1 <- paste("ACF_",stock,sep = "")
label1 <- paste(stock,"_ACF",sep = "")
title2 <- paste("PACF_",stock,sep = "")
label2 <- paste(stock,"_PACF",sep = "")
if (d>0)
{
returns <- diff(returns, lag = 1, differences = d)
title1 <- paste("ACF_",stock,"_Diff_",d,sep = "")
title2 <- paste("PACF_",stock,"_Diff_",d,sep = "")
}
plot <- ggAcf(returns,lag.max=15,type="correlation",plot=TRUE,main=title1)
plots[[label1]] <- plot
plot <- ggAcf(returns,lag.max=15,type="partial",plot=TRUE,main=title2)
plots[[label2]] <- plot
acf <- acf(returns,lag.max=15,type="correlation",plot=FALSE)
clim1 <- qnorm((1 + .95)/2)/sqrt(acf$n.used)
pacf <- acf(returns,lag.max=15,type="partial",plot=FALSE)
clim2 <- qnorm((1 + .95)/2)/sqrt(pacf$n.used)
MaxSigLag_ACF <- 1
sigACFs <- as.array(abs(acf$acf[-1]) > clim1)
for (i in (2:length(sigACFs)))
{
if ((sigACFs[i] == TRUE) & (sigACFs[i+1] == FALSE))
{
MaxSigLag_ACF <- i
break
}
}
MaxSigLag_PACF <- 1
sigPACFs <- as.array(abs(pacf$acf) > clim2)
for (i in (2:length(sigPACFs)))
{
if ((sigPACFs[i] == TRUE) & (sigPACFs[i+1] == FALSE))
{
MaxSigLag_PACF <- i
break
}
}
CountSigLag_ACF <-sum(abs(acf$acf[-1]) > clim1)
CountSigLag_PACF <- sum(abs(pacf$acf) > clim2)
P_Order <- MaxSigLag_PACF
Q_Order <- MaxSigLag_ACF
if (CountSigLag_ACF > CountSigLag_PACF + 1)
{
Q_Order <- 0
}
if (CountSigLag_ACF < CountSigLag_PACF - 1)
{
P_Order <- 0
}
AIC_Table <- data.frame("p"=as.numeric(),"q"=as.numeric(),"aic"=as.numeric())
for (i in c(0:P_Order))
{
for (j in c(0:Q_Order))
{
aic <- AIC(Arima(train, order=c(i,d,j),method="ML"))
AIC_Table[nrow(AIC_Table) + 1,] = c(i,j,aic)
}
}
P_Order <- AIC_Table[which(AIC_Table$aic == min(AIC_Table$aic)),1]
Q_Order <- AIC_Table[which(AIC_Table$aic == min(AIC_Table$aic)),2]
ARIMA_Orders[which(ARIMA_Orders$Stock == stock),2] <- P_Order
ARIMA_Orders[which(ARIMA_Orders$Stock == stock),4] <- Q_Order
}## Stock p d q
## 1 AAPL 9 0 0
## 2 FB 7 0 0
## 3 GOOG 9 0 0
## 4 GE 4 0 7
## 5 PG 4 0 3
## 6 AMZN 8 0 0
## 7 TSLA 0 0 0
arima_models <- list()
arima_titles <- list()
arima_plots <- list()
arima_resfits <- list()
arima_aic <- list()
arima_rmse <- list()
date <- as.Date(LNreturnsData$date, "%Y-%m-%d")
start <- min(date)
end <- max(date)
plot_midpoint <- nrow(LNreturnsData)
for (stock in c(stockTickers))
{
p <- ARIMA_Orders[which(ARIMA_Orders$Stock == stock),2]
d <- ARIMA_Orders[which(ARIMA_Orders$Stock == stock),3]
q <- ARIMA_Orders[which(ARIMA_Orders$Stock == stock),4]
title <- paste("ARIMA Model For Stock:", stock,"; (p,d,q)=", p,d,q)
log_returns <- ts(LNreturnsData[,stock])
train <- head(log_returns,length(log_returns)-50)
test <- tail(log_returns,50)
model <- Arima(train, order=c(p,d,q),method="ML")
forecast <- forecast(model, h = 50)
plot <- autoplot(forecast,main = title,xlab = "Days",ylab = "Log Returns") +
autolayer(train, series = 'Train') +
autolayer(fitted(model), series = 'Fitted') +
autolayer(forecast$mean, series = 'Forecast') +
autolayer(test, series = 'Test') +
coord_cartesian(xlim = c(plot_midpoint-125,plot_midpoint+10)) +
labs(caption = paste("From:",start,"to",end,"(last 125 days shown)"))
arima_models[[stock]] <- model
label <- paste(stock,"_ARIMA_Model_(p,d,q)=", p,d,q)
arima_titles[[stock]] <- label
arima_plots[[label]] <- plot
label <- paste(stock,"_ARIMA_Model_Residuals",sep = "")
arima_resfits[[label]] <- residuals(model)
label <- stock
arima_aic[[label]] <- round(AIC(model),4)
arima_rmse[[label]] <- round(rmse(test, predict(model,n.ahead=50)$pred),4)
}## $AAPL
## Series: train
## ARIMA(9,0,0) with non-zero mean
##
## Coefficients:
## ar1 ar2 ar3 ar4 ar5 ar6 ar7 ar8
## -0.2494 -0.0296 0.1059 -0.0231 0.0605 0.0117 0.2328 -0.2182
## s.e. 0.0690 0.0705 0.0737 0.0758 0.0804 0.0792 0.0770 0.0810
## ar9 mean
## 0.2815 0.0017
## s.e. 0.0837 0.0018
##
## sigma^2 estimated as 0.0004563: log likelihood=488.88
## AIC=-955.77 AICc=-954.36 BIC=-919.48
##
## $FB
## Series: train
## ARIMA(7,0,0) with non-zero mean
##
## Coefficients:
## ar1 ar2 ar3 ar4 ar5 ar6 ar7 mean
## -0.3173 0.096 0.2382 -0.0038 0.0650 0.0894 0.3172 -0.0011
## s.e. 0.0680 0.071 0.0764 0.0800 0.0839 0.0842 0.0846 0.0027
##
## sigma^2 estimated as 0.0004334: log likelihood=494.1
## AIC=-970.19 AICc=-969.24 BIC=-940.51
##
## $GOOG
## Series: train
## ARIMA(9,0,0) with non-zero mean
##
## Coefficients:
## ar1 ar2 ar3 ar4 ar5 ar6 ar7 ar8
## -0.2201 0.0208 0.2416 -0.0331 0.0011 -0.1021 0.3162 -0.0925
## s.e. 0.0684 0.0699 0.0707 0.0736 0.0771 0.0763 0.0754 0.0808
## ar9 mean
## 0.2903 -0.0002
## s.e. 0.0823 0.0021
##
## sigma^2 estimated as 0.0003242: log likelihood=523.17
## AIC=-1024.33 AICc=-1022.93 BIC=-988.05
##
## $GE
## Series: train
## ARIMA(4,0,7) with non-zero mean
##
## Coefficients:
## ar1 ar2 ar3 ar4 ma1 ma2 ma3 ma4 ma5
## -1.108 -0.1093 0.9239 0.7457 0.9316 0.0458 -0.6998 -0.3782 0.2436
## s.e. 0.105 0.1589 0.1509 0.0964 0.1253 0.1735 0.1613 0.1309 0.1046
## ma6 ma7 mean
## -0.0153 0.1261 -0.0027
## s.e. 0.1084 0.0859 0.0048
##
## sigma^2 estimated as 0.0009511: log likelihood=415.67
## AIC=-805.34 AICc=-803.38 BIC=-762.46
##
## $PG
## Series: train
## ARIMA(4,0,3) with non-zero mean
##
## Coefficients:
## ar1 ar2 ar3 ar4 ma1 ma2 ma3 mean
## 0.1664 0.7075 -0.4427 -0.4754 -0.4814 -0.6140 0.6430 5e-04
## s.e. 0.1165 0.1088 0.1217 0.0745 0.1198 0.1103 0.1292 6e-04
##
## sigma^2 estimated as 0.0002832: log likelihood=536.43
## AIC=-1054.87 AICc=-1053.92 BIC=-1025.18
##
## $AMZN
## Series: train
## ARIMA(8,0,0) with non-zero mean
##
## Coefficients:
## ar1 ar2 ar3 ar4 ar5 ar6 ar7 ar8 mean
## -0.1384 0.1634 0.1528 -0.1085 0.0151 0.0168 0.1640 -0.2597 0.0004
## s.e. 0.0694 0.0721 0.0730 0.0753 0.0822 0.0821 0.0827 0.0852 0.0012
##
## sigma^2 estimated as 0.0002854: log likelihood=536.41
## AIC=-1052.82 AICc=-1051.65 BIC=-1019.83
##
## $TSLA
## Series: train
## ARIMA(0,0,0) with non-zero mean
##
## Coefficients:
## mean
## 0.0035
## s.e. 0.0033
##
## sigma^2 estimated as 0.002136: log likelihood=331.58
## AIC=-659.16 AICc=-659.09 BIC=-652.56
Check to see if residuals for each model look like like white noise and test for autocorrelation and normality.
plots <- list()
for (i in c(1:length(arima_models)))
{
title <- arima_titles[[i]]
model <- arima_models[[i]]
df <- as.data.frame(cbind(fitted(model),model$residuals))
colnames(df) <- c("fitted", "residuals")
plot <- ggplot(data = df, aes(x = fitted, y = residuals)) +
geom_point(size = 1.5) +
geom_smooth(aes(colour = fitted, fill = fitted)) +
geom_hline(yintercept=0,color = "red") +
scale_x_log10() +
ggtitle(label=title)
plots[[i]] <- plot
}for (i in c(1:length(arima_models)))
{
title <- arima_titles[[i]]
print(title)
model <- arima_models[[i]]
checkresiduals(model)
residuals <- arima_resfits[[i]]
print(shapiro.test(residuals))
}## [1] "AAPL _ARIMA_Model_(p,d,q)= 9 0 0"
##
## Ljung-Box test
##
## data: Residuals from ARIMA(9,0,0) with non-zero mean
## Q* = 4.1784, df = 3, p-value = 0.2428
##
## Model df: 10. Total lags used: 13
##
##
## Shapiro-Wilk normality test
##
## data: residuals
## W = 0.91972, p-value = 5.548e-09
##
## [1] "FB _ARIMA_Model_(p,d,q)= 7 0 0"
##
## Ljung-Box test
##
## data: Residuals from ARIMA(7,0,0) with non-zero mean
## Q* = 1.4316, df = 3, p-value = 0.6982
##
## Model df: 8. Total lags used: 11
##
##
## Shapiro-Wilk normality test
##
## data: residuals
## W = 0.94633, p-value = 8.496e-07
##
## [1] "GOOG _ARIMA_Model_(p,d,q)= 9 0 0"
##
## Ljung-Box test
##
## data: Residuals from ARIMA(9,0,0) with non-zero mean
## Q* = 4.9247, df = 3, p-value = 0.1774
##
## Model df: 10. Total lags used: 13
##
##
## Shapiro-Wilk normality test
##
## data: residuals
## W = 0.91877, p-value = 4.735e-09
##
## [1] "GE _ARIMA_Model_(p,d,q)= 4 0 7"
##
## Ljung-Box test
##
## data: Residuals from ARIMA(4,0,7) with non-zero mean
## Q* = 2.504, df = 3, p-value = 0.4746
##
## Model df: 12. Total lags used: 15
##
##
## Shapiro-Wilk normality test
##
## data: residuals
## W = 0.92376, p-value = 1.109e-08
##
## [1] "PG _ARIMA_Model_(p,d,q)= 4 0 3"
##
## Ljung-Box test
##
## data: Residuals from ARIMA(4,0,3) with non-zero mean
## Q* = 5.8278, df = 3, p-value = 0.1203
##
## Model df: 8. Total lags used: 11
##
##
## Shapiro-Wilk normality test
##
## data: residuals
## W = 0.88032, p-value = 1.66e-11
##
## [1] "AMZN _ARIMA_Model_(p,d,q)= 8 0 0"
##
## Ljung-Box test
##
## data: Residuals from ARIMA(8,0,0) with non-zero mean
## Q* = 4.6516, df = 3, p-value = 0.1992
##
## Model df: 9. Total lags used: 12
##
##
## Shapiro-Wilk normality test
##
## data: residuals
## W = 0.93782, p-value = 1.488e-07
##
## [1] "TSLA _ARIMA_Model_(p,d,q)= 0 0 0"
##
## Ljung-Box test
##
## data: Residuals from ARIMA(0,0,0) with non-zero mean
## Q* = 16.533, df = 9, p-value = 0.05655
##
## Model df: 1. Total lags used: 10
##
##
## Shapiro-Wilk normality test
##
## data: residuals
## W = 0.83769, p-value = 1.128e-13
for (stock in c(stockTickers))
{
model <- arima_models[[stock]]
squaredResiduals <- (model$residuals)^2
title <- paste("ARIMA Squared Residuals For Stock:",stock)
plot(ggAcf(squaredResiduals) + ggtitle(title))
print(title)
print(Box.test(squaredResiduals,type = "Ljung-Box"))
}## [1] "ARIMA Squared Residuals For Stock: AAPL"
##
## Box-Ljung test
##
## data: squaredResiduals
## X-squared = 0.81273, df = 1, p-value = 0.3673
## [1] "ARIMA Squared Residuals For Stock: FB"
##
## Box-Ljung test
##
## data: squaredResiduals
## X-squared = 16.785, df = 1, p-value = 4.186e-05
## [1] "ARIMA Squared Residuals For Stock: GOOG"
##
## Box-Ljung test
##
## data: squaredResiduals
## X-squared = 1.8204, df = 1, p-value = 0.1773
## [1] "ARIMA Squared Residuals For Stock: GE"
##
## Box-Ljung test
##
## data: squaredResiduals
## X-squared = 17.457, df = 1, p-value = 2.939e-05
## [1] "ARIMA Squared Residuals For Stock: PG"
##
## Box-Ljung test
##
## data: squaredResiduals
## X-squared = 60.983, df = 1, p-value = 5.773e-15
## [1] "ARIMA Squared Residuals For Stock: AMZN"
##
## Box-Ljung test
##
## data: squaredResiduals
## X-squared = 3.1168, df = 1, p-value = 0.07749
## [1] "ARIMA Squared Residuals For Stock: TSLA"
##
## Box-Ljung test
##
## data: squaredResiduals
## X-squared = 2.414, df = 1, p-value = 0.1203
Note: only with the simple case of GARCH order (1,1). If d>0, undifferencing is not applied in the end.
arima_garch_models <- list()
arima_garch_model_sums <- list()
arima_garch_titles <- list()
arima_garch_plots <- list()
arima_garch_resfits <- list()
arima_garch_aic <- list()
arima_garch_rmse <- list()
for (stock in c(stockTickers))
{
p <- ARIMA_Orders[which(ARIMA_Orders$Stock == stock),2]
d <- ARIMA_Orders[which(ARIMA_Orders$Stock == stock),3]
q <- ARIMA_Orders[which(ARIMA_Orders$Stock == stock),4]
returns <- ts(LNreturnsData[,stock])
date <- as.Date(LNreturnsData$date, "%Y-%m-%d")
train_date <- head(date,length(date)-50)
test_date <- tail(date,50)
if (d>0)
{
returns <- diff(returns, lag = 1, differences = d)
train_date <- tail(train_date,-d)
}
train <- ts(head(returns,length(returns)-50))
test <- ts(tail(returns,50))
spec <- ugarchspec(variance.model = list(model="sGARCH",garchOrder=c(1,1)),
mean.model = list(armaOrder=c(p,q)),
distribution.model = "std")
model <- ugarchfit(spec=spec, data = train)
fitted <- as.data.frame(fitted(model))
train <- as.data.frame(cbind(train_date,train,fitted))
train$train_date <- as.Date(train$train_date)
colnames(train) <- c("date","train","fitted")
forecast <- ugarchroll(spec,data=returns,
forecast.length=50,
n.start=200,
refit.every=5,
refit.window="moving")
forecastMU <- as.data.frame(forecast@forecast[["density"]][["Mu"]])
forecastSigma <- as.data.frame(forecast@forecast[["density"]][["Sigma"]])
test <- cbind(as.data.frame(test_date),as.data.frame(test),forecastMU,forecastSigma)
colnames(test) <- c("date","test","forecast","sigma")
test$lwr <- test$forecast - 1.96*test$sigma
test$upr <- test$forecast + 1.96*test$sigma
row.names(test) <- (nrow(train)+1):nrow(returns)
colors <- c("Train"="black","Fitted"="red","Test"="green4","Forecast"="blue")
subtitle <- ""
ylab <- "Log-Returns"
if (d > 0)
{
subtitle <- paste("Note: differencing of order",d,"applied and not reversed in plot")
ylab <- "Differenced Log-Returns"
}
title <- paste("ARIMA-GARCH Model For Stock:", stock,"; (p,d,q)=", p,d,q)
plot <- ggplot(train, aes(x = date, y = train,color="Train")) +
geom_line() +
geom_line(aes(y=fitted,color = "Fitted")) +
geom_line(data=test,aes(y=test, color = "Test")) +
geom_line(data=test,aes(y=forecast, color = "Forecast")) +
geom_ribbon(data=test,aes(x=date,ymin=lwr,ymax=upr),fill="grey",inherit.aes=FALSE,alpha=0.5) +
coord_cartesian(xlim = c(max(date)-125,max(date)+10)) +
scale_x_date(date_labels ="%Y/%m/%d") +
scale_color_manual(values = colors) +
labs(x = "Date",y = ylab,color = "Legend") +
ggtitle(label = title) +
labs(caption = paste("From:",start,"to",end,"(last 125 days shown)",paste("\n", subtitle)))
arima_garch_models[[stock]] <- model
arima_garch_model_sums[[stock]] <- model@fit[["matcoef"]]
label <- paste(stock,"_ARIMA_GARCH")
arima_garch_titles[[stock]] <- label
arima_garch_plots[[stock]] <- plot
label <- paste(stock,"_ARIMA_GARCH_Residuals",sep = "")
arima_garch_resfits[[stock]] <- model@fit[["residuals"]]
arima_garch_aic[[stock]] <- round(infocriteria(model)[1],4)
arima_garch_rmse[[stock]] <- round(rmse(test$test, test$forecast),4)
}plots <- list()
for (stock in c(stockTickers))
{
print(stock)
model <- arima_garch_model_sums[[stock]]
show(model)
}## [1] "AAPL"
## Estimate Std. Error t value Pr(>|t|)
## mu 3.649621e-03 7.365424e-04 4.95507229 7.230340e-07
## ar1 -2.352217e-01 7.671176e-02 -3.06630585 2.167215e-03
## ar2 -2.071692e-01 7.783390e-02 -2.66168392 7.775085e-03
## ar3 -1.656015e-01 7.579359e-02 -2.18490076 2.889612e-02
## ar4 1.478404e-02 7.704716e-02 0.19188295 8.478339e-01
## ar5 1.732262e-02 7.521528e-02 0.23030723 8.178530e-01
## ar6 1.079708e-01 7.402707e-02 1.45853195 1.446940e-01
## ar7 8.828972e-02 7.429979e-02 1.18829035 2.347190e-01
## ar8 1.258571e-03 7.624262e-02 0.01650744 9.868296e-01
## ar9 8.768097e-02 7.153007e-02 1.22579176 2.202770e-01
## omega 2.450037e-05 2.924416e-05 0.83778682 4.021504e-01
## alpha1 2.337583e-01 1.287765e-01 1.81522418 6.948946e-02
## beta1 7.652392e-01 1.569318e-01 4.87625373 1.081195e-06
## shape 3.761687e+00 1.343719e+00 2.79945869 5.118837e-03
## [1] "FB"
## Estimate Std. Error t value Pr(>|t|)
## mu 1.103197e-03 1.060005e-03 1.0407470 0.29799298
## ar1 -1.100828e-01 8.148102e-02 -1.3510239 0.17668777
## ar2 7.269209e-02 7.695702e-02 0.9445803 0.34487316
## ar3 -1.408833e-02 7.692144e-02 -0.1831522 0.85467862
## ar4 -4.056923e-02 7.421605e-02 -0.5466369 0.58462825
## ar5 -3.475349e-02 7.106246e-02 -0.4890555 0.62480238
## ar6 4.739023e-02 7.493545e-02 0.6324141 0.52711634
## ar7 9.515701e-02 6.638571e-02 1.4333960 0.15174469
## omega 4.901197e-05 3.633246e-05 1.3489855 0.17734161
## alpha1 4.601461e-01 2.789960e-01 1.6492929 0.09908764
## beta1 4.692428e-01 2.838560e-01 1.6531016 0.09831020
## shape 3.261638e+01 1.178421e+02 0.2767804 0.78194874
## [1] "GOOG"
## Estimate Std. Error t value Pr(>|t|)
## mu 1.745419e-03 7.981142e-04 2.1869291 0.0287477032
## ar1 -1.144491e-01 7.382990e-02 -1.5501728 0.1211000457
## ar2 -8.131998e-02 7.209612e-02 -1.1279384 0.2593459302
## ar3 -4.538006e-02 7.119346e-02 -0.6374190 0.5238519788
## ar4 -6.434872e-02 7.357059e-02 -0.8746528 0.3817628574
## ar5 -2.209055e-02 7.488251e-02 -0.2950029 0.7679916956
## ar6 2.846210e-02 7.498092e-02 0.3795912 0.7042488606
## ar7 1.150059e-01 7.297966e-02 1.5758626 0.1150574659
## ar8 -2.009201e-02 6.466469e-02 -0.3107107 0.7560205737
## ar9 1.496309e-01 6.767711e-02 2.2109521 0.0270391523
## omega 2.591566e-05 2.783158e-05 0.9311600 0.3517707830
## alpha1 2.563903e-01 1.669178e-01 1.5360269 0.1245317725
## beta1 7.426097e-01 2.007569e-01 3.6990488 0.0002164090
## shape 3.188464e+00 9.023965e-01 3.5333298 0.0004103602
## [1] "GE"
## Estimate Std. Error t value Pr(>|t|)
## mu -0.002117824 2.022519e-06 -1047.122080 0.00000000
## ar1 -0.062999743 5.438901e-05 -1158.317500 0.00000000
## ar2 0.600303162 2.412883e-04 2487.908535 0.00000000
## ar3 0.186027399 8.462096e-05 2198.360660 0.00000000
## ar4 -0.745290245 2.702428e-04 -2757.853935 0.00000000
## ma1 -0.207893770 1.253322e-04 -1658.741984 0.00000000
## ma2 -0.586410998 2.446813e-04 -2396.632225 0.00000000
## ma3 0.110922135 1.282175e-04 865.109062 0.00000000
## ma4 1.008121396 3.230569e-04 3120.569494 0.00000000
## ma5 -0.057857052 3.236202e-05 -1787.807037 0.00000000
## ma6 0.087868058 4.373392e-05 2009.151372 0.00000000
## ma7 0.099609033 4.782429e-05 2082.812419 0.00000000
## omega 0.000226115 2.016024e-04 1.121589 0.26203740
## alpha1 0.455903004 4.232426e-01 1.077167 0.28140561
## beta1 0.455371496 1.803986e-01 2.524252 0.01159448
## shape 3.092001859 1.670667e+00 1.850759 0.06420418
## [1] "PG"
## Estimate Std. Error t value Pr(>|t|)
## mu 1.341153e-04 6.444471e-07 208.109150 0.000000e+00
## ar1 7.421954e-01 1.261071e-04 5885.435988 0.000000e+00
## ar2 7.114814e-01 1.206413e-04 5897.492285 0.000000e+00
## ar3 -5.371457e-01 9.107051e-05 -5898.129292 0.000000e+00
## ar4 4.719911e-02 2.041247e-05 2312.268038 0.000000e+00
## ma1 -9.402452e-01 6.142530e-04 -1530.713228 0.000000e+00
## ma2 -7.027761e-01 4.020936e-04 -1747.792318 0.000000e+00
## ma3 6.218166e-01 1.882717e-04 3302.762500 0.000000e+00
## omega 8.385115e-06 1.182808e-05 0.708916 4.783766e-01
## alpha1 2.625797e-01 7.863126e-02 3.339380 8.396549e-04
## beta1 7.363399e-01 1.030964e-01 7.142244 9.181544e-13
## shape 5.078784e+00 7.305070e-01 6.952410 3.590905e-12
## [1] "AMZN"
## Estimate Std. Error t value Pr(>|t|)
## mu 7.633696e-04 8.270722e-04 0.9229782 3.560186e-01
## ar1 -9.423775e-02 7.816358e-02 -1.2056477 2.279534e-01
## ar2 1.392131e-01 7.430129e-02 1.8736299 6.098145e-02
## ar3 4.932394e-02 7.444118e-02 0.6625894 5.075936e-01
## ar4 -6.420778e-02 7.550703e-02 -0.8503550 3.951277e-01
## ar5 -1.266740e-02 7.153865e-02 -0.1770708 8.594528e-01
## ar6 5.212815e-02 7.263238e-02 0.7176985 4.729432e-01
## ar7 9.246147e-02 6.942511e-02 1.3318160 1.829207e-01
## ar8 -1.234066e-01 6.989609e-02 -1.7655730 7.746752e-02
## omega 1.111474e-05 8.156383e-06 1.3627045 1.729757e-01
## alpha1 1.994853e-01 7.735557e-02 2.5788094 9.914147e-03
## beta1 7.995145e-01 2.635150e-02 30.3403805 0.000000e+00
## shape 4.457273e+00 9.002633e-01 4.9510766 7.380405e-07
## [1] "TSLA"
## Estimate Std. Error t value Pr(>|t|)
## mu 6.157080e-03 0.0017476314 3.523100 4.265308e-04
## omega 8.486799e-05 0.0001538637 0.551579 5.812368e-01
## alpha1 1.113458e-01 0.0848657045 1.312023 1.895123e-01
## beta1 8.876542e-01 0.1493125393 5.944941 2.765565e-09
## shape 2.584444e+00 0.3988481236 6.479770 9.186230e-11
Check to see if residuals for each model look like like white noise and test for autocorrelation and normality.
plots <- list()
for (i in c(1:length(arima_garch_models)))
{
title <- arima_garch_titles[[i]]
model <- arima_garch_models[[i]]
fitted <- model@fit[["fitted.values"]]
residuals <- model@fit[["residuals"]]
df <- as.data.frame(cbind(fitted,residuals))
colnames(df) <- c("fitted", "residuals")
plot <- ggplot(data = df, aes(x = fitted, y = residuals)) +
geom_point(size = 1.5) +
geom_smooth(aes(colour = fitted, fill = fitted)) +
geom_hline(yintercept=0,color = "red") +
scale_x_log10() +
ggtitle(label=title)
plots[[i]] <- plot
}for (i in c(1:length(arima_garch_models)))
{
title <- arima_garch_titles[[i]]
print(title)
residuals <- arima_garch_resfits[[i]]
checkresiduals(residuals)
print(shapiro.test(residuals))
}## [1] "AAPL _ARIMA_GARCH"
##
## Shapiro-Wilk normality test
##
## data: residuals
## W = 0.81542, p-value = 1.187e-14
##
## [1] "FB _ARIMA_GARCH"
##
## Shapiro-Wilk normality test
##
## data: residuals
## W = 0.86325, p-value = 1.983e-12
##
## [1] "GOOG _ARIMA_GARCH"
##
## Shapiro-Wilk normality test
##
## data: residuals
## W = 0.82706, p-value = 3.755e-14
##
## [1] "GE _ARIMA_GARCH"
##
## Shapiro-Wilk normality test
##
## data: residuals
## W = 0.85561, p-value = 8.121e-13
##
## [1] "PG _ARIMA_GARCH"
##
## Shapiro-Wilk normality test
##
## data: residuals
## W = 0.77116, p-value = 2.273e-16
##
## [1] "AMZN _ARIMA_GARCH"
##
## Shapiro-Wilk normality test
##
## data: residuals
## W = 0.91646, p-value = 3.227e-09
##
## [1] "TSLA _ARIMA_GARCH"
##
## Shapiro-Wilk normality test
##
## data: residuals
## W = 0.83769, p-value = 1.128e-13
for (stock in c(stockTickers))
{
print(paste("For Stock:",stock))
temp_df <- as.data.frame(LNreturnsData[,c(stock,ff_factors,"rf")])
temp_df[,stock] <- temp_df[,stock] - temp_df$rf
temp_df <- temp_df[,1:4]
plot(ggcorr(temp_df,
method = c("everything", "pearson"),
label = TRUE,
label_round= 3))
}## [1] "For Stock: AAPL"
## [1] "For Stock: FB"
## [1] "For Stock: GOOG"
## [1] "For Stock: GE"
## [1] "For Stock: PG"
## [1] "For Stock: AMZN"
## [1] "For Stock: TSLA"
famafrench_models <- list()
famafrench_titles <- list()
famafrench_plots <- list()
famafrench_model_sums <- list()
famafrench_resfits <- list()
famafrench_aic <- list()
famafrench_rmse <- list()
rf <- log(LNreturnsData[,"rf"]+1)
factors <- LNreturnsData[,ff_factors]
date <- as.Date(LNreturnsData$date, "%Y-%m-%d")
start <- min(date)
end <- max(date)
plot_midpoint <- max(date)
for (stock in c(stockTickers))
{
r <- LNreturnsData[,stock]
z <- r-rf
df <- cbind.data.frame(date,z,factors)
colnames(df) <- c("date","z","zm","smb","hml")
train <- head(df,nrow(df)-50)
test <- tail(df,50)
model <- dyn$lm(z ~
lag(zm,-1) +
lag(zm,-2) +
lag(zm,-3) +
lag(zm,-4) +
lag(zm,-5) +
lag(smb,-1) +
lag(smb,-2) +
lag(smb,-3) +
lag(smb,-4) +
lag(smb,-5) +
lag(hml,-1) +
lag(hml,-2) +
lag(hml,-3) +
lag(hml,-4) +
lag(hml,-5),
data=ts(train))
fitted <- as.data.frame(c(x <- rep(NA, 5),as.vector(fitted(model))))
train <- cbind(train,fitted)
colnames(train) <- c("date","z","zm","smb","hml","fitted")
predictInt <- as.data.frame(predict(model,newdata = ts(tail(df,54)),interval = "prediction"))
predictInt <- na.omit(predictInt)
row.names(predictInt) <- (nrow(df)-49):nrow(df)
test <- cbind(test,predictInt)
test <- rename(test,predicted=fit)
colors <- c("Train"="black","Fitted"="red","Test"="green4","Forecast"="blue")
plot <- ggplot(train, aes(x = date, y = z,color="Train")) +
geom_line() +
geom_line(aes(y=fitted,color = "Fitted")) +
geom_line(data=test,aes(y=z, color = "Test")) +
geom_line(data=test,aes(y=predicted, color = "Forecast")) +
geom_ribbon(data=test,aes(ymin=lwr,ymax=upr),fill="grey",alpha=0.5) +
coord_cartesian(xlim = c(max(date)-125,max(date)+10)) +
scale_x_date(date_labels ="%m/%d/%Y") +
scale_color_manual(values = colors) +
labs(x = "Date",y = "Log Return",color = "Legend") +
ggtitle(label = paste("Fama-French model for Stock:",stock)) +
labs(caption = paste("From:",start,"to",end,"(last 125 days shown)"))
famafrench_models[[stock]] <- model
label <- paste(stock,"_FamaFrench")
famafrench_titles[[stock]] <- label
famafrench_plots[[label]] <- plot
famafrench_model_sums[[label]] <- summary(model)
label <- paste(stock,"_FamaFrench_Residuals",sep = "")
famafrench_resfits[[label]] <- residuals(model)
label <- stock
famafrench_aic[[label]] <- round(AIC(model),4)
famafrench_rmse[[label]] <- round(rmse(test$z, predict(model, test)),4)
}## $`AAPL _FamaFrench`
##
## Call:
## lm(formula = dyn(z ~ lag(zm, -1) + lag(zm, -2) + lag(zm, -3) +
## lag(zm, -4) + lag(zm, -5) + lag(smb, -1) + lag(smb, -2) +
## lag(smb, -3) + lag(smb, -4) + lag(smb, -5) + lag(hml, -1) +
## lag(hml, -2) + lag(hml, -3) + lag(hml, -4) + lag(hml, -5)),
## data = ts(train))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.096531 -0.008097 0.001547 0.011401 0.083056
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.0051758 0.0016507 -3.136 0.00201 **
## lag(zm, -1) -0.0046312 0.0011841 -3.911 0.00013 ***
## lag(zm, -2) 0.0002498 0.0012688 0.197 0.84414
## lag(zm, -3) 0.0017815 0.0013077 1.362 0.17481
## lag(zm, -4) -0.0005875 0.0013315 -0.441 0.65960
## lag(zm, -5) 0.0001734 0.0013954 0.124 0.90126
## lag(smb, -1) 0.0082301 0.0032226 2.554 0.01149 *
## lag(smb, -2) 0.0018676 0.0033225 0.562 0.57474
## lag(smb, -3) 0.0008159 0.0033238 0.245 0.80636
## lag(smb, -4) -0.0011517 0.0033251 -0.346 0.72947
## lag(smb, -5) 0.0005752 0.0033121 0.174 0.86232
## lag(hml, -1) -0.0020547 0.0023763 -0.865 0.38837
## lag(hml, -2) 0.0001039 0.0024554 0.042 0.96629
## lag(hml, -3) 0.0040521 0.0024486 1.655 0.09970 .
## lag(hml, -4) -0.0034198 0.0026233 -1.304 0.19404
## lag(hml, -5) 0.0038567 0.0026290 1.467 0.14414
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.02164 on 179 degrees of freedom
## (10 observations deleted due to missingness)
## Multiple R-squared: 0.2798, Adjusted R-squared: 0.2195
## F-statistic: 4.637 on 15 and 179 DF, p-value: 1.82e-07
##
##
## $`FB _FamaFrench`
##
## Call:
## lm(formula = dyn(z ~ lag(zm, -1) + lag(zm, -2) + lag(zm, -3) +
## lag(zm, -4) + lag(zm, -5) + lag(smb, -1) + lag(smb, -2) +
## lag(smb, -3) + lag(smb, -4) + lag(smb, -5) + lag(hml, -1) +
## lag(hml, -2) + lag(hml, -3) + lag(hml, -4) + lag(hml, -5)),
## data = ts(train))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.084136 -0.009460 0.001651 0.012612 0.045797
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -7.479e-03 1.468e-03 -5.094 8.83e-07 ***
## lag(zm, -1) -5.073e-03 1.053e-03 -4.817 3.10e-06 ***
## lag(zm, -2) 2.245e-03 1.128e-03 1.990 0.04814 *
## lag(zm, -3) 3.158e-03 1.163e-03 2.715 0.00728 **
## lag(zm, -4) -2.508e-04 1.184e-03 -0.212 0.83251
## lag(zm, -5) 3.035e-04 1.241e-03 0.245 0.80710
## lag(smb, -1) 5.008e-03 2.866e-03 1.747 0.08230 .
## lag(smb, -2) 4.509e-03 2.955e-03 1.526 0.12880
## lag(smb, -3) -2.946e-03 2.956e-03 -0.997 0.32030
## lag(smb, -4) -1.128e-03 2.957e-03 -0.382 0.70327
## lag(smb, -5) -1.951e-05 2.946e-03 -0.007 0.99472
## lag(hml, -1) -2.602e-03 2.113e-03 -1.231 0.21980
## lag(hml, -2) -2.790e-03 2.184e-03 -1.278 0.20301
## lag(hml, -3) 5.396e-03 2.178e-03 2.478 0.01415 *
## lag(hml, -4) -9.101e-04 2.333e-03 -0.390 0.69695
## lag(hml, -5) 3.625e-03 2.338e-03 1.550 0.12281
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.01925 on 179 degrees of freedom
## (10 observations deleted due to missingness)
## Multiple R-squared: 0.3581, Adjusted R-squared: 0.3043
## F-statistic: 6.657 on 15 and 179 DF, p-value: 2.877e-11
##
##
## $`GOOG _FamaFrench`
##
## Call:
## lm(formula = dyn(z ~ lag(zm, -1) + lag(zm, -2) + lag(zm, -3) +
## lag(zm, -4) + lag(zm, -5) + lag(smb, -1) + lag(smb, -2) +
## lag(smb, -3) + lag(smb, -4) + lag(smb, -5) + lag(hml, -1) +
## lag(hml, -2) + lag(hml, -3) + lag(hml, -4) + lag(hml, -5)),
## data = ts(train))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.066802 -0.007592 0.001034 0.008129 0.083094
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -6.022e-03 1.360e-03 -4.426 1.66e-05 ***
## lag(zm, -1) -4.703e-03 9.759e-04 -4.819 3.07e-06 ***
## lag(zm, -2) 1.701e-03 1.046e-03 1.626 0.10566
## lag(zm, -3) 2.060e-03 1.078e-03 1.911 0.05755 .
## lag(zm, -4) -4.265e-04 1.097e-03 -0.389 0.69801
## lag(zm, -5) -1.105e-03 1.150e-03 -0.961 0.33787
## lag(smb, -1) 4.582e-03 2.656e-03 1.725 0.08627 .
## lag(smb, -2) 3.406e-03 2.738e-03 1.244 0.21516
## lag(smb, -3) 1.194e-03 2.739e-03 0.436 0.66339
## lag(smb, -4) -5.089e-05 2.741e-03 -0.019 0.98521
## lag(smb, -5) 7.943e-04 2.730e-03 0.291 0.77140
## lag(hml, -1) -1.382e-03 1.959e-03 -0.706 0.48121
## lag(hml, -2) 1.144e-04 2.024e-03 0.057 0.95500
## lag(hml, -3) 5.920e-03 2.018e-03 2.934 0.00379 **
## lag(hml, -4) -1.808e-03 2.162e-03 -0.836 0.40406
## lag(hml, -5) 2.289e-03 2.167e-03 1.057 0.29216
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.01784 on 179 degrees of freedom
## (10 observations deleted due to missingness)
## Multiple R-squared: 0.3339, Adjusted R-squared: 0.278
## F-statistic: 5.981 on 15 and 179 DF, p-value: 5.095e-10
##
##
## $`GE _FamaFrench`
##
## Call:
## lm(formula = dyn(z ~ lag(zm, -1) + lag(zm, -2) + lag(zm, -3) +
## lag(zm, -4) + lag(zm, -5) + lag(smb, -1) + lag(smb, -2) +
## lag(smb, -3) + lag(smb, -4) + lag(smb, -5) + lag(hml, -1) +
## lag(hml, -2) + lag(hml, -3) + lag(hml, -4) + lag(hml, -5)),
## data = ts(train))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.138256 -0.011334 -0.000839 0.010854 0.109396
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.0074231 0.0023828 -3.115 0.00214 **
## lag(zm, -1) -0.0052737 0.0017093 -3.085 0.00236 **
## lag(zm, -2) 0.0013234 0.0018316 0.723 0.47089
## lag(zm, -3) 0.0016665 0.0018877 0.883 0.37851
## lag(zm, -4) 0.0010255 0.0019221 0.534 0.59434
## lag(zm, -5) 0.0005717 0.0020143 0.284 0.77689
## lag(smb, -1) 0.0033377 0.0046520 0.717 0.47401
## lag(smb, -2) 0.0043733 0.0047961 0.912 0.36308
## lag(smb, -3) 0.0012808 0.0047980 0.267 0.78981
## lag(smb, -4) -0.0008123 0.0048000 -0.169 0.86580
## lag(smb, -5) -0.0032222 0.0047811 -0.674 0.50122
## lag(hml, -1) 0.0004706 0.0034303 0.137 0.89103
## lag(hml, -2) 0.0007205 0.0035444 0.203 0.83915
## lag(hml, -3) 0.0063964 0.0035346 1.810 0.07203 .
## lag(hml, -4) -0.0011070 0.0037868 -0.292 0.77038
## lag(hml, -5) 0.0080490 0.0037950 2.121 0.03531 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.03124 on 179 degrees of freedom
## (10 observations deleted due to missingness)
## Multiple R-squared: 0.1871, Adjusted R-squared: 0.119
## F-statistic: 2.747 on 15 and 179 DF, p-value: 0.0007871
##
##
## $`PG _FamaFrench`
##
## Call:
## lm(formula = dyn(z ~ lag(zm, -1) + lag(zm, -2) + lag(zm, -3) +
## lag(zm, -4) + lag(zm, -5) + lag(smb, -1) + lag(smb, -2) +
## lag(smb, -3) + lag(smb, -4) + lag(smb, -5) + lag(hml, -1) +
## lag(hml, -2) + lag(hml, -3) + lag(hml, -4) + lag(hml, -5)),
## data = ts(train))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.089711 -0.004965 0.000685 0.008010 0.051285
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.0065327 0.0012147 -5.378 2.33e-07 ***
## lag(zm, -1) -0.0040107 0.0008713 -4.603 7.87e-06 ***
## lag(zm, -2) 0.0012282 0.0009337 1.315 0.19006
## lag(zm, -3) 0.0009051 0.0009623 0.941 0.34820
## lag(zm, -4) -0.0013258 0.0009798 -1.353 0.17775
## lag(zm, -5) -0.0004770 0.0010269 -0.464 0.64286
## lag(smb, -1) 0.0035648 0.0023714 1.503 0.13454
## lag(smb, -2) 0.0023225 0.0024449 0.950 0.34344
## lag(smb, -3) -0.0014090 0.0024459 -0.576 0.56529
## lag(smb, -4) -0.0017078 0.0024469 -0.698 0.48611
## lag(smb, -5) 0.0001877 0.0024373 0.077 0.93870
## lag(hml, -1) -0.0020262 0.0017487 -1.159 0.24811
## lag(hml, -2) 0.0029394 0.0018069 1.627 0.10553
## lag(hml, -3) 0.0054876 0.0018019 3.046 0.00267 **
## lag(hml, -4) -0.0026290 0.0019304 -1.362 0.17495
## lag(hml, -5) -0.0016634 0.0019346 -0.860 0.39104
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.01592 on 179 degrees of freedom
## (10 observations deleted due to missingness)
## Multiple R-squared: 0.3452, Adjusted R-squared: 0.2903
## F-statistic: 6.29 on 15 and 179 DF, p-value: 1.357e-10
##
##
## $`AMZN _FamaFrench`
##
## Call:
## lm(formula = dyn(z ~ lag(zm, -1) + lag(zm, -2) + lag(zm, -3) +
## lag(zm, -4) + lag(zm, -5) + lag(smb, -1) + lag(smb, -2) +
## lag(smb, -3) + lag(smb, -4) + lag(smb, -5) + lag(hml, -1) +
## lag(hml, -2) + lag(hml, -3) + lag(hml, -4) + lag(hml, -5)),
## data = ts(train))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.074965 -0.006987 0.000583 0.008321 0.073823
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -7.213e-03 1.254e-03 -5.752 3.76e-08 ***
## lag(zm, -1) -3.143e-03 8.996e-04 -3.494 0.000599 ***
## lag(zm, -2) 1.273e-03 9.640e-04 1.321 0.188315
## lag(zm, -3) 1.355e-03 9.935e-04 1.364 0.174202
## lag(zm, -4) -1.149e-03 1.012e-03 -1.136 0.257440
## lag(zm, -5) -6.959e-04 1.060e-03 -0.656 0.512399
## lag(smb, -1) 2.445e-03 2.448e-03 0.999 0.319320
## lag(smb, -2) 9.119e-05 2.524e-03 0.036 0.971223
## lag(smb, -3) -1.506e-03 2.525e-03 -0.596 0.551606
## lag(smb, -4) -1.998e-03 2.526e-03 -0.791 0.430070
## lag(smb, -5) -7.390e-04 2.516e-03 -0.294 0.769343
## lag(hml, -1) -7.241e-04 1.805e-03 -0.401 0.688841
## lag(hml, -2) 2.148e-04 1.865e-03 0.115 0.908456
## lag(hml, -3) 4.911e-03 1.860e-03 2.640 0.009031 **
## lag(hml, -4) -1.356e-03 1.993e-03 -0.680 0.497276
## lag(hml, -5) -8.584e-05 1.997e-03 -0.043 0.965769
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.01644 on 179 degrees of freedom
## (10 observations deleted due to missingness)
## Multiple R-squared: 0.223, Adjusted R-squared: 0.1578
## F-statistic: 3.424 on 15 and 179 DF, p-value: 4.044e-05
##
##
## $`TSLA _FamaFrench`
##
## Call:
## lm(formula = dyn(z ~ lag(zm, -1) + lag(zm, -2) + lag(zm, -3) +
## lag(zm, -4) + lag(zm, -5) + lag(smb, -1) + lag(smb, -2) +
## lag(smb, -3) + lag(smb, -4) + lag(smb, -5) + lag(hml, -1) +
## lag(hml, -2) + lag(hml, -3) + lag(hml, -4) + lag(hml, -5)),
## data = ts(train))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.163220 -0.018067 0.000805 0.018476 0.172205
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.0017733 0.0033424 -0.531 0.596
## lag(zm, -1) 0.0001504 0.0023977 0.063 0.950
## lag(zm, -2) 0.0042193 0.0025692 1.642 0.102
## lag(zm, -3) 0.0034362 0.0026480 1.298 0.196
## lag(zm, -4) 0.0040320 0.0026962 1.495 0.137
## lag(zm, -5) 0.0017572 0.0028256 0.622 0.535
## lag(smb, -1) -0.0016971 0.0065255 -0.260 0.795
## lag(smb, -2) -0.0046190 0.0067277 -0.687 0.493
## lag(smb, -3) 0.0109193 0.0067304 1.622 0.106
## lag(smb, -4) 0.0078797 0.0067331 1.170 0.243
## lag(smb, -5) 0.0097444 0.0067067 1.453 0.148
## lag(hml, -1) -0.0051803 0.0048118 -1.077 0.283
## lag(hml, -2) 0.0048022 0.0049719 0.966 0.335
## lag(hml, -3) 0.0054301 0.0049582 1.095 0.275
## lag(hml, -4) -0.0063700 0.0053119 -1.199 0.232
## lag(hml, -5) 0.0072876 0.0053235 1.369 0.173
##
## Residual standard error: 0.04382 on 179 degrees of freedom
## (10 observations deleted due to missingness)
## Multiple R-squared: 0.1728, Adjusted R-squared: 0.1034
## F-statistic: 2.492 on 15 and 179 DF, p-value: 0.002338
Check to see if residuals for each model look like like white noise and test for autocorrelation and normality.
plots <- list()
for (i in c(1:length(famafrench_models)))
{
title <- famafrench_titles[[i]]
model <- famafrench_models[[i]]
df <- as.data.frame(cbind(fitted(model),model$residuals))
colnames(df) <- c("fitted", "residuals")
plot <- ggplot(data = df, aes(x = fitted, y = residuals)) +
geom_point(size = 1.5) +
geom_smooth(aes(colour = fitted, fill = fitted)) +
geom_hline(yintercept=0,color = "red") +
scale_x_log10() +
ggtitle(label=title)
plots[[i]] <- plot
}for (i in c(1:length(famafrench_models)))
{
title <- famafrench_titles[[i]]
print(title)
model <- famafrench_models[[i]]
checkresiduals(model)
residuals <- famafrench_resfits[[i]]
print(shapiro.test(residuals))
}## [1] "AAPL _FamaFrench"
##
## Shapiro-Wilk normality test
##
## data: residuals
## W = 0.92188, p-value = 1.12e-08
##
## [1] "FB _FamaFrench"
##
## Shapiro-Wilk normality test
##
## data: residuals
## W = 0.96406, p-value = 6.999e-05
##
## [1] "GOOG _FamaFrench"
##
## Shapiro-Wilk normality test
##
## data: residuals
## W = 0.93153, p-value = 6.136e-08
##
## [1] "GE _FamaFrench"
##
## Shapiro-Wilk normality test
##
## data: residuals
## W = 0.89841, p-value = 2.897e-10
##
## [1] "PG _FamaFrench"
##
## Shapiro-Wilk normality test
##
## data: residuals
## W = 0.90096, p-value = 4.192e-10
##
## [1] "AMZN _FamaFrench"
##
## Shapiro-Wilk normality test
##
## data: residuals
## W = 0.92828, p-value = 3.411e-08
##
## [1] "TSLA _FamaFrench"
##
## Shapiro-Wilk normality test
##
## data: residuals
## W = 0.9139, p-value = 3.016e-09
arimax_models <- list()
arimax_titles <- list()
arimax_plots <- list()
arimax_resfits <- list()
arimax_aic <- list()
arimax_rmse <- list()
r <- LNreturnsData[,stockTickers]
rf <- LNreturnsData[,"rf"]
z <- as.data.frame(r-rf)
xregs <- as.matrix(LNreturnsData[,ff_factors])
xregs_train <- head(xregs,nrow(xregs)-50)
xregs_test <- tail(xregs,50)
row.names(xregs_test) <- (nrow(xregs)-49):nrow(xregs)
date <- as.Date(LNreturnsData$date, "%Y-%m-%d")
start <- min(date)
end <- max(date)
plot_midpoint <- nrow(LNreturnsData)
for (stock in c(stockTickers))
{
p <- ARIMA_Orders[which(ARIMA_Orders$Stock == stock),2]
d <- ARIMA_Orders[which(ARIMA_Orders$Stock == stock),3]
q <- ARIMA_Orders[which(ARIMA_Orders$Stock == stock),4]
title <- paste("ARIMAX Model For Stock:", stock,"; (p,d,q)=", p,d,q)
log_returns <- ts(z[,stock])
train <- head(log_returns,length(log_returns)-50)
test <- tail(log_returns,50)
model <- Arima(train, order=c(p,d,q),xreg = xregs_train,method="ML")
forecast <- forecast(model, h = 50,xreg = xregs_test)
plot <- autoplot(forecast,main = title,xlab = "Days",ylab = "Log Returns") +
autolayer(train, series = 'Train') +
autolayer(fitted(model), series = 'Fitted') +
autolayer(forecast$mean, series = 'Forecast') +
autolayer(test, series = 'Test') +
coord_cartesian(xlim = c(plot_midpoint-125,plot_midpoint+10)) +
labs(caption = paste("From:",start,"to",end,"(last 125 days shown)"))
arimax_models[[stock]] <- model
label <- paste(stock,"_ARIMAX_Model_(p,d,q)=", p,d,q)
arimax_titles[[stock]] <- label
arimax_plots[[label]] <- plot
label <- paste(stock,"_ARIMAX_Model_Residuals",sep = "")
arimax_resfits[[label]] <- residuals(model)
label <- stock
arimax_aic[[label]] <- round(AIC(model),4)
arimax_rmse[[label]] <- round(rmse(test, forecast$mean),4)
}## $AAPL
## Series: train
## Regression with ARIMA(9,0,0) errors
##
## Coefficients:
## ar1 ar2 ar3 ar4 ar5 ar6 ar7 ar8 ar9
## 0.0186 -0.1638 -0.1269 0.0792 0.0144 0.0101 0.0164 0.0012 0.0169
## s.e. 0.0722 0.0749 0.0743 0.0750 0.0758 0.0762 0.0741 0.0741 0.0746
## intercept zm smb hml
## -0.0054 0.0119 -0.0050 -0.0018
## s.e. 0.0006 0.0004 0.0012 0.0010
##
## sigma^2 estimated as 9.69e-05: log likelihood=647.04
## AIC=-1266.09 AICc=-1263.82 BIC=-1219.91
##
## $FB
## Series: train
## Regression with ARIMA(7,0,0) errors
##
## Coefficients:
## ar1 ar2 ar3 ar4 ar5 ar6 ar7 intercept
## -0.0759 -0.1005 -0.1503 -0.0825 -0.0500 0.0513 -0.0589 -0.0077
## s.e. 0.0724 0.0756 0.0789 0.0765 0.0768 0.0783 0.0766 0.0006
## zm smb hml
## 0.0113 0.0006 -0.0051
## s.e. 0.0005 0.0015 0.0012
##
## sigma^2 estimated as 0.0001496: log likelihood=602.55
## AIC=-1181.11 AICc=-1179.44 BIC=-1141.53
##
## $GOOG
## Series: train
## Regression with ARIMA(9,0,0) errors
##
## Coefficients:
## ar1 ar2 ar3 ar4 ar5 ar6 ar7 ar8
## -0.0945 -0.1786 -0.0044 -0.0578 -0.0894 -0.0431 -0.0250 0.0399
## s.e. 0.0720 0.0789 0.0768 0.0748 0.0744 0.0743 0.0766 0.0733
## ar9 intercept zm smb hml
## 0.0745 -0.0067 0.0099 -0.0012 -0.0021
## s.e. 0.0731 0.0005 0.0004 0.0013 0.0011
##
## sigma^2 estimated as 0.0001062: log likelihood=637.89
## AIC=-1247.79 AICc=-1245.52 BIC=-1201.61
##
## $GE
## Series: train
## Regression with ARIMA(4,0,7) errors
##
## Coefficients:
## ar1 ar2 ar3 ar4 ma1 ma2 ma3 ma4
## 0.0505 0.2695 -0.3801 -0.512 -0.1727 -0.2452 0.4001 0.5801
## s.e. NaN NaN 0.2405 NaN NaN NaN NaN NaN
## ma5 ma6 ma7 intercept zm smb hml
## -0.0228 0.0546 0.0441 -0.0071 0.0122 0.0047 0.0056
## s.e. NaN NaN NaN 0.0015 0.0008 0.0025 0.0022
##
## sigma^2 estimated as 0.000462: log likelihood=491.46
## AIC=-950.93 AICc=-947.96 BIC=-898.16
##
## $PG
## Series: train
## Regression with ARIMA(4,0,3) errors
##
## Coefficients:
## ar1 ar2 ar3 ar4 ma1 ma2 ma3 intercept
## 0.5738 0.4030 -0.7824 0.1069 -0.3860 -0.5248 0.4852 -0.0071
## s.e. 0.3931 0.1962 0.3106 0.1622 0.3854 0.1835 0.3751 0.0006
## zm smb hml
## 0.0081 -0.0061 -0.0026
## s.e. 0.0004 0.0012 0.0010
##
## sigma^2 estimated as 0.0001109: log likelihood=632.29
## AIC=-1240.57 AICc=-1238.9 BIC=-1200.99
##
## $AMZN
## Series: train
## Regression with ARIMA(8,0,0) errors
##
## Coefficients:
## ar1 ar2 ar3 ar4 ar5 ar6 ar7 ar8
## 0.0692 0.1293 -0.0523 -0.0130 0.0787 0.1269 -0.0183 -0.0789
## s.e. 0.0716 0.0729 0.0760 0.0743 0.0771 0.0761 0.0757 0.0766
## intercept zm smb hml
## -0.0072 8e-03 -0.0036 -0.0039
## s.e. 0.0010 4e-04 0.0013 0.0011
##
## sigma^2 estimated as 0.0001172: log likelihood=627.46
## AIC=-1228.92 AICc=-1226.97 BIC=-1186.05
##
## $TSLA
## Series: train
## Regression with ARIMA(0,0,0) errors
##
## Coefficients:
## intercept zm smb hml
## -0.0023 0.0137 0.0120 -0.0036
## s.e. 0.0028 0.0016 0.0047 0.0038
##
## sigma^2 estimated as 0.001513: log likelihood=367.57
## AIC=-725.14 AICc=-724.83 BIC=-708.65
Check to see if residuals for each model look like like white noise and test for autocorrelation and normality
plots <- list()
for (i in c(1:length(arimax_models)))
{
title <- arimax_titles[[i]]
model <- arimax_models[[i]]
df <- as.data.frame(cbind(fitted(model),residuals(model)))
colnames(df) <- c("fitted", "residuals")
plot <- ggplot(data = df, aes(x = fitted, y = residuals)) +
geom_point(size = 1.5) +
geom_smooth(aes(colour = fitted, fill = fitted)) +
geom_hline(yintercept=0,color = "red") +
scale_x_log10() +
ggtitle(label=title)
plots[[i]] <- plot
}for (i in c(1:length(arimax_models)))
{
title <- arimax_titles[[i]]
print(title)
model <- arimax_models[[i]]
checkresiduals(model)
residuals <- arimax_resfits[[i]]
print(shapiro.test(residuals))
}## [1] "AAPL _ARIMAX_Model_(p,d,q)= 9 0 0"
##
## Ljung-Box test
##
## data: Residuals from Regression with ARIMA(9,0,0) errors
## Q* = 2.3656, df = 3, p-value = 0.5001
##
## Model df: 13. Total lags used: 16
##
##
## Shapiro-Wilk normality test
##
## data: residuals
## W = 0.99483, p-value = 0.7244
##
## [1] "FB _ARIMAX_Model_(p,d,q)= 7 0 0"
##
## Ljung-Box test
##
## data: Residuals from Regression with ARIMA(7,0,0) errors
## Q* = 5.736, df = 3, p-value = 0.1252
##
## Model df: 11. Total lags used: 14
##
##
## Shapiro-Wilk normality test
##
## data: residuals
## W = 0.95826, p-value = 1.287e-05
##
## [1] "GOOG _ARIMAX_Model_(p,d,q)= 9 0 0"
##
## Ljung-Box test
##
## data: Residuals from Regression with ARIMA(9,0,0) errors
## Q* = 5.6505, df = 3, p-value = 0.1299
##
## Model df: 13. Total lags used: 16
##
##
## Shapiro-Wilk normality test
##
## data: residuals
## W = 0.78345, p-value = 6.423e-16
##
## [1] "GE _ARIMAX_Model_(p,d,q)= 4 0 7"
##
## Ljung-Box test
##
## data: Residuals from Regression with ARIMA(4,0,7) errors
## Q* = 1.2632, df = 3, p-value = 0.7379
##
## Model df: 15. Total lags used: 18
##
##
## Shapiro-Wilk normality test
##
## data: residuals
## W = 0.84548, p-value = 2.607e-13
##
## [1] "PG _ARIMAX_Model_(p,d,q)= 4 0 3"
##
## Ljung-Box test
##
## data: Residuals from Regression with ARIMA(4,0,3) errors
## Q* = 12.371, df = 3, p-value = 0.006215
##
## Model df: 11. Total lags used: 14
##
##
## Shapiro-Wilk normality test
##
## data: residuals
## W = 0.96221, p-value = 3.456e-05
##
## [1] "AMZN _ARIMAX_Model_(p,d,q)= 8 0 0"
##
## Ljung-Box test
##
## data: Residuals from Regression with ARIMA(8,0,0) errors
## Q* = 4.6201, df = 3, p-value = 0.2018
##
## Model df: 12. Total lags used: 15
##
##
## Shapiro-Wilk normality test
##
## data: residuals
## W = 0.80593, p-value = 4.822e-15
##
## [1] "TSLA _ARIMAX_Model_(p,d,q)= 0 0 0"
##
## Ljung-Box test
##
## data: Residuals from Regression with ARIMA(0,0,0) errors
## Q* = 10.185, df = 6, p-value = 0.1171
##
## Model df: 4. Total lags used: 10
##
##
## Shapiro-Wilk normality test
##
## data: residuals
## W = 0.88076, p-value = 1.758e-11
Using the auto.arima function to automatically select the optimal orders as well as seasonal components.
auto_sarimax_models <- list()
auto_sarimax_titles <- list()
auto_sarimax_plots <- list()
auto_sarimax_resfits <- list()
auto_sarimax_aic <- list()
auto_sarimax_rmse <- list()
r <- LNreturnsData[,stockTickers]
rf <- LNreturnsData[,"rf"]
z <- as.data.frame(r-rf)
xregs <- as.matrix(LNreturnsData[,ff_factors])
xregs_train <- head(xregs,nrow(xregs)-50)
xregs_test <- tail(xregs,50)
row.names(xregs_test) <- (nrow(xregs)-49):nrow(xregs)
date <- as.Date(LNreturnsData$date, "%Y-%m-%d")
start <- min(date)
end <- max(date)
plot_midpoint <- nrow(LNreturnsData)/5
for (stock in c(stockTickers))
{
title <- paste("SARIMAX Model For Stock:", stock)
log_returns <- ts(z[,stock],frequency = 5)
train <- head(log_returns,length(log_returns)-50)
test <- tail(log_returns,50)
model <- auto.arima(train,xreg = xregs_train)
forecast <- forecast(model, h = 50,xreg = xregs_test)
plot <- autoplot(forecast,main = title,xlab = "Weeks",ylab = "Log Returns") +
autolayer(train, series = 'Train') +
autolayer(fitted(model), series = 'Fitted') +
autolayer(forecast$mean, series = 'Forecast') +
autolayer(test, series = 'Test') +
coord_cartesian(xlim = c(plot_midpoint-25,plot_midpoint+1)) +
labs(caption = paste("From:",start,"to",end,"(last 25 weeks shown)"))
auto_sarimax_models[[stock]] <- model
label <- paste(stock,"_Auto_SARIMAX_Model=")
auto_sarimax_titles[[stock]] <- label
auto_sarimax_plots[[label]] <- plot
label <- paste(stock,"_Auto_SARIMAX_Model_Residuals",sep = "")
auto_sarimax_resfits[[label]] <- residuals(model)
label <- stock
auto_sarimax_aic[[label]] <- round(AIC(model),4)
auto_sarimax_rmse[[label]] <- round(rmse(test, forecast$mean),4)
}## $AAPL
## Series: train
## Regression with ARIMA(0,0,0) errors
##
## Coefficients:
## intercept zm smb hml
## -0.0054 0.0121 -0.0045 -0.002
## s.e. 0.0007 0.0004 0.0012 0.001
##
## sigma^2 estimated as 9.772e-05: log likelihood=641.57
## AIC=-1273.14 AICc=-1272.83 BIC=-1256.65
##
## $FB
## Series: train
## Regression with ARIMA(0,0,0)(2,0,0)[5] errors
##
## Coefficients:
## sar1 sar2 intercept zm smb hml
## -0.0170 0.0339 -0.0078 0.0111 0.0004 -0.0054
## s.e. 0.0764 0.0794 0.0009 0.0005 0.0015 0.0012
##
## sigma^2 estimated as 0.0001517: log likelihood=598.63
## AIC=-1183.27 AICc=-1182.68 BIC=-1160.18
##
## $GOOG
## Series: train
## Regression with ARIMA(0,0,2)(1,0,0)[5] errors
##
## Coefficients:
## ma1 ma2 sar1 intercept zm smb hml
## -0.0897 -0.1831 -0.0864 -0.0067 0.0099 -0.0010 -0.0021
## s.e. 0.0721 0.0804 0.0735 0.0005 0.0004 0.0013 0.0010
##
## sigma^2 estimated as 0.0001039: log likelihood=636.94
## AIC=-1257.89 AICc=-1257.14 BIC=-1231.5
##
## $GE
## Series: train
## Regression with ARIMA(0,0,0) errors
##
## Coefficients:
## intercept zm smb hml
## -0.0072 0.0122 0.0041 0.0054
## s.e. 0.0015 0.0009 0.0026 0.0021
##
## sigma^2 estimated as 0.0004645: log likelihood=485.69
## AIC=-961.38 AICc=-961.07 BIC=-944.89
##
## $PG
## Series: train
## Regression with ARIMA(2,0,2)(0,0,1)[5] errors
##
## Coefficients:
## ar1 ar2 ma1 ma2 sma1 intercept zm smb
## 0.1781 -0.8144 0.0503 0.8977 -0.3504 -0.0071 0.0082 -0.0062
## s.e. 0.1314 0.0652 0.1026 0.0639 0.0775 0.0006 0.0004 0.0012
## hml
## -0.0024
## s.e. 0.0010
##
## sigma^2 estimated as 0.0001062: log likelihood=635.18
## AIC=-1250.36 AICc=-1249.19 BIC=-1217.37
##
## $AMZN
## Series: train
## Regression with ARIMA(0,0,0)(1,0,0)[5] errors
##
## Coefficients:
## sar1 intercept zm smb hml
## 0.0667 -0.0073 8e-03 -0.0037 -0.0044
## s.e. 0.0764 0.0008 5e-04 0.0013 0.0011
##
## sigma^2 estimated as 0.0001177: log likelihood=623.47
## AIC=-1234.94 AICc=-1234.51 BIC=-1215.15
##
## $TSLA
## Series: train
## Regression with ARIMA(1,0,1) errors
##
## Coefficients:
## ar1 ma1 zm smb hml
## -0.5505 0.7375 0.0129 0.0124 -0.0036
## s.e. 0.1677 0.1330 0.0015 0.0045 0.0037
##
## sigma^2 estimated as 0.001456: log likelihood=371.91
## AIC=-731.81 AICc=-731.38 BIC=-712.02
Check to see if residuals for each model look like like white noise and test for autocorrelation and normality.
plots <- list()
for (stock in c(stockTickers))
{
title <- auto_sarimax_titles[[stock]]
model <- auto_sarimax_models[[stock]]
df <- as.data.frame(cbind(fitted(model),model$residuals))
colnames(df) <- c("fitted", "residuals")
plot <- ggplot(data = df, aes(x = fitted, y = residuals)) +
geom_point(size = 1.5) +
geom_smooth(aes(colour = fitted, fill = fitted)) +
geom_hline(yintercept=0,color = "red") +
scale_x_log10() +
ggtitle(label=title)
plots[[stock]] <- plot
}for (i in c(1:length(auto_sarimax_models)))
{
title <- auto_sarimax_titles[[i]]
print(title)
model <- auto_sarimax_models[[i]]
checkresiduals(model)
residuals <- auto_sarimax_resfits[[i]]
print(shapiro.test(residuals))
}## [1] "AAPL _Auto_SARIMAX_Model="
##
## Ljung-Box test
##
## data: Residuals from Regression with ARIMA(0,0,0) errors
## Q* = 12.493, df = 6, p-value = 0.05183
##
## Model df: 4. Total lags used: 10
##
##
## Shapiro-Wilk normality test
##
## data: residuals
## W = 0.99562, p-value = 0.8359
##
## [1] "FB _Auto_SARIMAX_Model="
##
## Ljung-Box test
##
## data: Residuals from Regression with ARIMA(0,0,0)(2,0,0)[5] errors
## Q* = 8.5698, df = 4, p-value = 0.0728
##
## Model df: 6. Total lags used: 10
##
##
## Shapiro-Wilk normality test
##
## data: residuals
## W = 0.95517, p-value = 6.146e-06
##
## [1] "GOOG _Auto_SARIMAX_Model="
##
## Ljung-Box test
##
## data: Residuals from Regression with ARIMA(0,0,2)(1,0,0)[5] errors
## Q* = 3.063, df = 3, p-value = 0.382
##
## Model df: 7. Total lags used: 10
##
##
## Shapiro-Wilk normality test
##
## data: residuals
## W = 0.78414, p-value = 6.819e-16
##
## [1] "GE _Auto_SARIMAX_Model="
##
## Ljung-Box test
##
## data: Residuals from Regression with ARIMA(0,0,0) errors
## Q* = 6.0952, df = 6, p-value = 0.4126
##
## Model df: 4. Total lags used: 10
##
##
## Shapiro-Wilk normality test
##
## data: residuals
## W = 0.83498, p-value = 8.481e-14
##
## [1] "PG _Auto_SARIMAX_Model="
##
## Ljung-Box test
##
## data: Residuals from Regression with ARIMA(2,0,2)(0,0,1)[5] errors
## Q* = 7.0682, df = 3, p-value = 0.06976
##
## Model df: 9. Total lags used: 12
##
##
## Shapiro-Wilk normality test
##
## data: residuals
## W = 0.95991, p-value = 1.932e-05
##
## [1] "AMZN _Auto_SARIMAX_Model="
##
## Ljung-Box test
##
## data: Residuals from Regression with ARIMA(0,0,0)(1,0,0)[5] errors
## Q* = 8.4427, df = 5, p-value = 0.1335
##
## Model df: 5. Total lags used: 10
##
##
## Shapiro-Wilk normality test
##
## data: residuals
## W = 0.8191, p-value = 1.698e-14
##
## [1] "TSLA _Auto_SARIMAX_Model="
##
## Ljung-Box test
##
## data: Residuals from Regression with ARIMA(1,0,1) errors
## Q* = 3.8256, df = 5, p-value = 0.5748
##
## Model df: 5. Total lags used: 10
##
##
## Shapiro-Wilk normality test
##
## data: residuals
## W = 0.88644, p-value = 3.735e-11
For each stock, plot the decomposed times series of log returns to consider if seasonality is present (specifically, weekly).
plots <- list()
for (stock in c(stockTickers))
{
print(paste("For Stock:",stock))
returns <- LNreturnsData[,stock]
returns <- ts(as.vector(returns),frequency = 5)
plot <- autoplot(stl(returns, s.window = "periodic")) + ggtitle(label=stock)
plots[[stock]] <- plot
}## [1] "For Stock: AAPL"
## [1] "For Stock: FB"
## [1] "For Stock: GOOG"
## [1] "For Stock: GE"
## [1] "For Stock: PG"
## [1] "For Stock: AMZN"
## [1] "For Stock: TSLA"
hw_models <- list()
hw_titles <- list()
hw_plots <- list()
hw_resfits <- list()
hw_aic <- list()
hw_rmse <- list()
date <- as.Date(LNreturnsData$date, "%Y-%m-%d")
start <- min(date)
end <- max(date)
plot_midpoint <- nrow(LNreturnsData)/5
for (stock in c(stockTickers))
{
title <- paste("HW Model For Stock:", stock)
log_returns <- ts(LNreturnsData[,stock],frequency = 5)
train <- head(log_returns,length(log_returns)-50)
test <- tail(log_returns,50)
model <- hw(train,h=50, seasonal='additive')
plot <- autoplot(model,main = title,xlab = "Weeks",ylab = "Log Returns") +
autolayer(train, series = 'Train') +
autolayer(fitted(model), series = 'Fitted') +
autolayer(model$mean, series = 'Forecast') +
autolayer(test, series = 'Test') +
coord_cartesian(xlim = c(plot_midpoint-25,plot_midpoint+1)) +
labs(caption = paste("From:",start,"to",end,"(last 25 weeks shown)"))
hw_models[[stock]] <- model
label <- paste(stock,"_HW_Model")
hw_titles[[stock]] <- label
hw_plots[[label]] <- plot
label <- paste(stock,"_HW_Model_Residuals",sep = "")
hw_resfits[[label]] <- residuals(model)
label <- stock
hw_aic[[label]] <- round(model$model$aic,4)
hw_rmse[[label]] <- round(rmse(test, model$mean),4)
}##
## Forecast method: Holt-Winters' additive method
##
## Model Information:
## Holt-Winters' additive method
##
## Call:
## hw(y = train, h = 50, seasonal = "additive")
##
## Smoothing parameters:
## alpha = 0.0198
## beta = 0.0088
## gamma = 1e-04
##
## Initial states:
## l = 0.0099
## b = -0.0011
## s = 0.0048 -0.0014 -0.0042 0.0032 -0.0024
##
## sigma: 0.0247
##
## AIC AICc BIC
## -410.7185 -409.5545 -377.7353
##
## Error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 4.905666e-05 0.02409009 0.01535927 102.6073 316.7456 0.7808198
## ACF1
## Training set -0.3812695
##
## Forecasts:
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 41.00 -0.02435048 -0.05594216 0.0072412008 -0.07266579 0.023964827
## 41.20 -0.01976881 -0.05137333 0.0118357113 -0.06810376 0.028566135
## 41.40 -0.02829765 -0.05992411 0.0033288018 -0.07666614 0.020070836
## 41.60 -0.02652078 -0.05818066 0.0051391072 -0.07494039 0.021898837
## 41.80 -0.02135821 -0.05306540 0.0103489814 -0.06985017 0.027133753
## 42.00 -0.02955841 -0.06132933 0.0022125103 -0.07814784 0.019031019
## 42.20 -0.02497674 -0.05682973 0.0068762481 -0.07369168 0.023738201
## 42.40 -0.03350558 -0.06546143 -0.0015497355 -0.08237783 0.015366668
## 42.60 -0.03172870 -0.06381039 0.0003529815 -0.08079341 0.017336000
## 42.80 -0.02656614 -0.05879875 0.0056664806 -0.07586167 0.022729397
## 43.00 -0.03476634 -0.06717732 -0.0023553573 -0.08433465 0.014801979
## 43.20 -0.03018467 -0.06280269 0.0024333552 -0.08006963 0.019700293
## 43.40 -0.03871351 -0.07156937 -0.0058576512 -0.08896221 0.011535190
## 43.60 -0.03693663 -0.07006277 -0.0038104931 -0.08759869 0.013725426
## 43.80 -0.03177406 -0.06520444 0.0016563081 -0.08290140 0.019353278
## 44.00 -0.03997427 -0.07374462 -0.0062039130 -0.09162156 0.011673032
## 44.20 -0.03539260 -0.06953897 -0.0012462186 -0.08761497 0.016829781
## 44.40 -0.04392144 -0.07848136 -0.0093615169 -0.09677628 0.008933400
## 44.60 -0.04214456 -0.07715641 -0.0071327154 -0.09569056 0.011401436
## 44.80 -0.03698199 -0.07248484 -0.0014791482 -0.09127891 0.017314922
## 45.00 -0.04518219 -0.08121618 -0.0091482095 -0.10029142 0.009927030
## 45.20 -0.04060052 -0.07720508 -0.0039959717 -0.09658236 0.015381308
## 45.40 -0.04912937 -0.08634468 -0.0119140515 -0.10604528 0.007786546
## 45.60 -0.04735249 -0.08521884 -0.0094861372 -0.10526408 0.010559099
## 45.80 -0.04218992 -0.08074753 -0.0036323082 -0.10115870 0.016778859
## 46.00 -0.05039012 -0.08967965 -0.0111005942 -0.11047827 0.009698026
## 46.20 -0.04580845 -0.08586905 -0.0057478558 -0.10707585 0.015458943
## 46.40 -0.05433729 -0.09520835 -0.0134662363 -0.11684418 0.008169595
## 46.60 -0.05256042 -0.09428087 -0.0108399616 -0.11636635 0.011245514
## 46.80 -0.04739785 -0.09000611 -0.0047895887 -0.11256156 0.017765862
## 47.00 -0.05559805 -0.09913258 -0.0120635229 -0.12217837 0.010982264
## 47.20 -0.05101638 -0.09551369 -0.0065190669 -0.11906915 0.017036388
## 47.40 -0.05954522 -0.10504183 -0.0140486203 -0.12912627 0.010035826
## 47.60 -0.05776835 -0.10430004 -0.0112366551 -0.12893243 0.013395734
## 47.80 -0.05260578 -0.10020763 -0.0050039237 -0.12540653 0.020194975
## 48.00 -0.06080598 -0.10951300 -0.0120989595 -0.13529694 0.013684979
## 48.20 -0.05622431 -0.10606940 -0.0063792158 -0.13245580 0.020007183
## 48.40 -0.06475315 -0.11576916 -0.0137371398 -0.14277541 0.013269105
## 48.60 -0.06297627 -0.11519531 -0.0107572407 -0.14283840 0.016885846
## 48.80 -0.05781371 -0.11126713 -0.0043602791 -0.13956367 0.023936255
## 49.00 -0.06601391 -0.12073305 -0.0112947663 -0.14969961 0.017671797
## 49.20 -0.06143224 -0.11744634 -0.0054181321 -0.14709842 0.024233944
## 49.40 -0.06996108 -0.12729940 -0.0126227628 -0.15765247 0.017730309
## 49.60 -0.06818420 -0.12687531 -0.0094930997 -0.15794450 0.021576094
## 49.80 -0.06302163 -0.12309344 -0.0029498244 -0.15489354 0.028850270
## 50.00 -0.07122184 -0.13270231 -0.0097413634 -0.16524810 0.022804433
## 50.20 -0.06664017 -0.12955529 -0.0037250451 -0.16286054 0.029580208
## 50.40 -0.07516901 -0.13954486 -0.0107931605 -0.17362337 0.023285355
## 50.60 -0.07339213 -0.13925421 -0.0075300481 -0.17411949 0.027335233
## 50.80 -0.06822956 -0.13560284 -0.0008562858 -0.17126810 0.034808972
##
## Forecast method: Holt-Winters' additive method
##
## Model Information:
## Holt-Winters' additive method
##
## Call:
## hw(y = train, h = 50, seasonal = "additive")
##
## Smoothing parameters:
## alpha = 0.0078
## beta = 0.0077
## gamma = 0.0515
##
## Initial states:
## l = 0.0171
## b = -9e-04
## s = 0.0091 -1e-04 -0.0045 -7e-04 -0.0038
##
## sigma: 0.023
##
## AIC AICc BIC
## -439.0239 -437.8599 -406.0407
##
## Error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set -0.000337886 0.0224443 0.01491014 -Inf Inf 0.7265193 -0.3744474
##
## Forecasts:
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 41.00 -0.03069492 -0.06012832 -0.001261513 -0.07570943 0.0143195906
## 41.20 -0.02050976 -0.04994668 0.008927151 -0.06552964 0.0245101115
## 41.40 -0.03760641 -0.06705117 -0.008161649 -0.08263829 0.0074254667
## 41.60 -0.02451170 -0.05397038 0.004946973 -0.06956486 0.0205414538
## 41.80 -0.02487569 -0.05435606 0.004604671 -0.06996202 0.0202106338
## 42.00 -0.03796778 -0.06758764 -0.008347919 -0.08326745 0.0073318878
## 42.20 -0.02778262 -0.05744467 0.001879427 -0.07314681 0.0175815670
## 42.40 -0.04487927 -0.07459630 -0.015162240 -0.09032755 0.0005690067
## 42.60 -0.03178456 -0.06157101 -0.001998118 -0.07733900 0.0137698726
## 42.80 -0.03214855 -0.06202044 -0.002276670 -0.07783366 0.0135365503
## 43.00 -0.04524064 -0.07537892 -0.015102358 -0.09133316 0.0008518838
## 43.20 -0.03505548 -0.06531523 -0.004795740 -0.08133377 0.0112228007
## 43.40 -0.05215213 -0.08255378 -0.021750487 -0.09864743 -0.0056568289
## 43.60 -0.03905742 -0.06962278 -0.008492058 -0.08580311 0.0076882673
## 43.80 -0.03942141 -0.07017362 -0.008669211 -0.08645285 0.0076100213
## 44.00 -0.05251350 -0.08368989 -0.021337109 -0.10019367 -0.0048333257
## 44.20 -0.04232834 -0.07373979 -0.010916898 -0.09036800 0.0057113158
## 44.40 -0.05942499 -0.09109786 -0.027752127 -0.10786446 -0.0109855254
## 44.60 -0.04633028 -0.07829187 -0.014368688 -0.09521132 0.0025507575
## 44.80 -0.04669427 -0.07897275 -0.014415802 -0.09605994 0.0026713893
## 45.00 -0.05978636 -0.09266481 -0.026907911 -0.11006960 -0.0095031124
## 45.20 -0.04960120 -0.08285206 -0.016350340 -0.10045401 0.0012516029
## 45.40 -0.06669785 -0.10035101 -0.033044688 -0.11816592 -0.0152297798
## 45.60 -0.05360314 -0.08768892 -0.019517358 -0.10573285 -0.0014734340
## 45.80 -0.05396713 -0.08851619 -0.019418079 -0.10680535 -0.0011289143
## 46.00 -0.06705922 -0.10238758 -0.031730852 -0.12108929 -0.0130291452
## 46.20 -0.05687406 -0.09272343 -0.021024691 -0.11170094 -0.0020471811
## 46.40 -0.07397071 -0.11037205 -0.037569373 -0.12964175 -0.0182996682
## 46.60 -0.06087600 -0.09786026 -0.023891744 -0.11743854 -0.0043134607
## 46.80 -0.06123999 -0.09883803 -0.023641955 -0.11874123 -0.0037387553
## 47.00 -0.07433208 -0.11288040 -0.035783758 -0.13328665 -0.0153775104
## 47.20 -0.06414692 -0.10336495 -0.024928893 -0.12412572 -0.0041681229
## 47.40 -0.08124357 -0.12116147 -0.041325670 -0.14229273 -0.0201944098
## 47.60 -0.06814886 -0.10879649 -0.027501232 -0.13031404 -0.0059836773
## 47.80 -0.06851285 -0.10991971 -0.027105992 -0.13183918 -0.0051865246
## 48.00 -0.08160494 -0.12411768 -0.039092196 -0.14662257 -0.0165873103
## 48.20 -0.07141978 -0.11474358 -0.028095985 -0.13767781 -0.0051617525
## 48.40 -0.08851643 -0.13267956 -0.044353303 -0.15605811 -0.0209747559
## 48.60 -0.07542172 -0.12045200 -0.030391439 -0.14428959 -0.0065538480
## 48.80 -0.07578571 -0.12171050 -0.029860924 -0.14602162 -0.0055498089
## 49.00 -0.08887780 -0.13604639 -0.041709201 -0.16101594 -0.0167396550
## 49.20 -0.07869264 -0.12680264 -0.030582638 -0.15227054 -0.0051147408
## 49.40 -0.09578929 -0.14486667 -0.046711915 -0.17084666 -0.0207319218
## 49.60 -0.08269458 -0.13276480 -0.032624358 -0.15927038 -0.0061187827
## 49.80 -0.08305857 -0.13414663 -0.031970518 -0.16119101 -0.0049261359
## 50.00 -0.09615066 -0.14860354 -0.043697773 -0.17637042 -0.0159308934
## 50.20 -0.08596550 -0.13947830 -0.032452706 -0.16780626 -0.0041247427
## 50.40 -0.10306215 -0.15765851 -0.048465795 -0.18656007 -0.0195642302
## 50.60 -0.08996744 -0.14567054 -0.034264344 -0.17515797 -0.0047769064
## 50.80 -0.09033143 -0.14716399 -0.033498875 -0.17724933 -0.0034135370
##
## Forecast method: Holt-Winters' additive method
##
## Model Information:
## Holt-Winters' additive method
##
## Call:
## hw(y = train, h = 50, seasonal = "additive")
##
## Smoothing parameters:
## alpha = 0.0083
## beta = 0.0083
## gamma = 0.059
##
## Initial states:
## l = 0.0046
## b = 1e-04
## s = 0.0029 -0.0012 1e-04 4e-04 -0.0021
##
## sigma: 0.0212
##
## AIC AICc BIC
## -471.2123 -470.0482 -438.2291
##
## Error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set -0.001010463 0.02070895 0.0127571 100.9621 206.8857 0.7523486
## ACF1
## Training set -0.3466156
##
## Forecasts:
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 41.00 -0.03256451 -0.05972218 -0.0054068398 -0.07409858 0.0089695613
## 41.20 -0.01731358 -0.04447503 0.0098478704 -0.05885343 0.0242262729
## 41.40 -0.03484109 -0.06201105 -0.0076711384 -0.07639395 0.0067117662
## 41.60 -0.02245107 -0.04963614 0.0047339955 -0.06402704 0.0191249005
## 41.80 -0.02805733 -0.05526599 -0.0008486651 -0.06966939 0.0135547316
## 42.00 -0.04067780 -0.06804708 -0.0133085070 -0.08253551 0.0011799189
## 42.20 -0.02542687 -0.05284207 0.0019883434 -0.06735481 0.0165010784
## 42.40 -0.04295438 -0.07042945 -0.0154793075 -0.08497388 -0.0009348831
## 42.60 -0.03056436 -0.05811501 -0.0030137093 -0.07269944 0.0115707233
## 42.80 -0.03617062 -0.06381429 -0.0085269469 -0.07844796 0.0061067277
## 43.00 -0.04879108 -0.07673623 -0.0208459321 -0.09152950 -0.0060526631
## 43.20 -0.03354015 -0.06161727 -0.0054630330 -0.07648040 0.0094000965
## 43.40 -0.05106767 -0.07929888 -0.0228364525 -0.09424358 -0.0078917505
## 43.60 -0.03867765 -0.06708653 -0.0102687656 -0.08212528 0.0047699871
## 43.80 -0.04428390 -0.07289538 -0.0156724303 -0.08804138 -0.0005264313
## 44.00 -0.05690437 -0.08598933 -0.0278194097 -0.10138598 -0.0124227620
## 44.20 -0.04165344 -0.07099237 -0.0123145119 -0.08652346 0.0032165786
## 44.40 -0.05918095 -0.08880202 -0.0295598887 -0.10448246 -0.0138794439
## 44.60 -0.04679093 -0.07672322 -0.0168586437 -0.09256842 -0.0010134469
## 44.80 -0.05239719 -0.08267059 -0.0221237914 -0.09869636 -0.0060980218
## 45.00 -0.06501766 -0.09595184 -0.0340834715 -0.11232741 -0.0177079029
## 45.20 -0.04976673 -0.08109999 -0.0184334652 -0.09768682 -0.0018466380
## 45.40 -0.06729424 -0.09905795 -0.0355305321 -0.11587264 -0.0187158402
## 45.60 -0.05490422 -0.08713008 -0.0226783585 -0.10418942 -0.0056190178
## 45.80 -0.06051048 -0.09323042 -0.0277905320 -0.11055132 -0.0104696388
## 46.00 -0.07313094 -0.10669780 -0.0395640899 -0.12446702 -0.0217948705
## 46.20 -0.05788001 -0.09199983 -0.0237601967 -0.11006177 -0.0056982561
## 46.40 -0.07540753 -0.11011220 -0.0407028593 -0.12848374 -0.0223313171
## 46.60 -0.06301751 -0.09833878 -0.0276962303 -0.11703674 -0.0089982755
## 46.80 -0.06862377 -0.10459320 -0.0326543288 -0.12363427 -0.0136132592
## 47.00 -0.08124423 -0.11823342 -0.0442550436 -0.13781431 -0.0246741503
## 47.20 -0.06599330 -0.10368646 -0.0283001439 -0.12364001 -0.0083465913
## 47.40 -0.08352082 -0.12194848 -0.0450931494 -0.14229086 -0.0247507716
## 47.60 -0.07113079 -0.11032310 -0.0319384893 -0.13107025 -0.0111913359
## 47.80 -0.07673705 -0.11672368 -0.0367504214 -0.13789133 -0.0155827774
## 48.00 -0.08935752 -0.13051767 -0.0481973651 -0.15230654 -0.0264084959
## 48.20 -0.07410659 -0.11611189 -0.0321012877 -0.13834815 -0.0098650242
## 48.40 -0.09163410 -0.13451281 -0.0487554005 -0.15721142 -0.0260567866
## 48.60 -0.07924408 -0.12302392 -0.0354642442 -0.14619957 -0.0122885979
## 48.80 -0.08485034 -0.12955852 -0.0401421606 -0.15322560 -0.0164750800
## 49.00 -0.09747081 -0.14348650 -0.0514551144 -0.16784573 -0.0270958786
## 49.20 -0.08221988 -0.12920935 -0.0352303984 -0.15408408 -0.0103556712
## 49.40 -0.09974739 -0.14773637 -0.0517584092 -0.17314020 -0.0263545772
## 49.60 -0.08735737 -0.13637104 -0.0383436949 -0.16231732 -0.0123974229
## 49.80 -0.09296363 -0.14302666 -0.0429005899 -0.16952844 -0.0163988188
## 50.00 -0.10558409 -0.15707091 -0.0540972757 -0.18432639 -0.0268418008
## 50.20 -0.09033316 -0.14290984 -0.0377564891 -0.17074225 -0.0099240786
## 50.40 -0.10786068 -0.16155051 -0.0541708409 -0.18997220 -0.0257491582
## 50.60 -0.09547066 -0.15029648 -0.0406448366 -0.17931951 -0.0116218006
## 50.80 -0.10107691 -0.15706107 -0.0450927616 -0.18669729 -0.0154565411
##
## Forecast method: Holt-Winters' additive method
##
## Model Information:
## Holt-Winters' additive method
##
## Call:
## hw(y = train, h = 50, seasonal = "additive")
##
## Smoothing parameters:
## alpha = 0.0601
## beta = 0.0035
## gamma = 1e-04
##
## Initial states:
## l = 0.0098
## b = -5e-04
## s = 0.0059 -7e-04 -0.0023 -3e-04 -0.0027
##
## sigma: 0.0334
##
## AIC AICc BIC
## -288.8528 -287.6887 -255.8696
##
## Error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set -0.001208304 0.03267024 0.02170976 NaN Inf 0.7921311 -0.248789
##
## Forecasts:
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 41.00 -0.03822031 -0.08106399 0.0046233723 -0.1037441 0.0273034467
## 41.20 -0.03715664 -0.08008683 0.0057735404 -0.1028127 0.0284994064
## 41.40 -0.04053480 -0.08356106 0.0024914610 -0.1063378 0.0252681867
## 41.60 -0.04023551 -0.08336788 0.0028968520 -0.1062008 0.0257297469
## 41.80 -0.03498703 -0.07823598 0.0082619155 -0.1011306 0.0311565250
## 42.00 -0.04489562 -0.08827239 -0.0015188579 -0.1112347 0.0214434147
## 42.20 -0.04383196 -0.08734754 -0.0003163746 -0.1103833 0.0227193836
## 42.40 -0.04721011 -0.09087625 -0.0035439778 -0.1139917 0.0195714779
## 42.60 -0.04691083 -0.09073964 -0.0030820150 -0.1139412 0.0201195572
## 42.80 -0.04166235 -0.08566634 0.0023416464 -0.1089606 0.0256359531
## 43.00 -0.05157094 -0.09576337 -0.0073785102 -0.1191574 0.0160155482
## 43.20 -0.05050727 -0.09490094 -0.0061136027 -0.1184015 0.0173869866
## 43.40 -0.05388543 -0.09849387 -0.0092769887 -0.1221081 0.0143372927
## 43.60 -0.05358614 -0.09842319 -0.0087491006 -0.1221585 0.0149861961
## 43.80 -0.04833766 -0.09341743 -0.0032578940 -0.1172812 0.0206058935
## 44.00 -0.05824625 -0.10358359 -0.0129089155 -0.1275837 0.0110912216
## 44.20 -0.05718259 -0.10279168 -0.0115734992 -0.1269357 0.0125704940
## 44.40 -0.06056074 -0.10645645 -0.0146650363 -0.1307522 0.0096306835
## 44.60 -0.06026146 -0.10645886 -0.0140640562 -0.1309143 0.0103913714
## 44.80 -0.05501298 -0.10152734 -0.0084986129 -0.1261506 0.0161246041
## 45.00 -0.06492157 -0.11176884 -0.0180742981 -0.1365683 0.0067251490
## 45.20 -0.06385790 -0.11105315 -0.0166626541 -0.1360368 0.0083210017
## 45.40 -0.06723606 -0.11479500 -0.0196771161 -0.1399712 0.0054990672
## 45.60 -0.06693677 -0.11487524 -0.0189983125 -0.1402523 0.0063787761
## 45.80 -0.06168829 -0.11002219 -0.0133543945 -0.1356086 0.0122320253
## 46.00 -0.07159688 -0.12034276 -0.0228510042 -0.1461473 0.0029535058
## 46.20 -0.07053322 -0.11970657 -0.0213598712 -0.1457374 0.0046709264
## 46.40 -0.07391137 -0.12352827 -0.0242944760 -0.1497939 0.0019711231
## 46.60 -0.07361209 -0.12368864 -0.0235355364 -0.1501976 0.0029733893
## 46.80 -0.06836361 -0.11891592 -0.0178112907 -0.1456767 0.0089494891
## 47.00 -0.07827220 -0.12931697 -0.0272274270 -0.1563384 -0.0002059569
## 47.20 -0.07720853 -0.12876124 -0.0256558320 -0.1560516 0.0016345198
## 47.40 -0.08058669 -0.13266335 -0.0285100242 -0.1602311 -0.0009423031
## 47.60 -0.08028740 -0.13290401 -0.0276707948 -0.1607576 0.0001827557
## 47.80 -0.07503892 -0.12821139 -0.0218664515 -0.1563592 0.0062813542
## 48.00 -0.08494751 -0.13869231 -0.0312027181 -0.1671431 -0.0027519415
## 48.20 -0.08388385 -0.13821609 -0.0295516033 -0.1669778 -0.0007898499
## 48.40 -0.08726200 -0.14219736 -0.0323266525 -0.1712784 -0.0032456340
## 48.60 -0.08696272 -0.14251673 -0.0314087104 -0.1719252 -0.0020001945
## 48.80 -0.08171424 -0.13790234 -0.0255261341 -0.1676465 0.0042180512
## 49.00 -0.09162283 -0.14846099 -0.0347846703 -0.1785493 -0.0046963662
## 49.20 -0.09055916 -0.14806191 -0.0330564153 -0.1785020 -0.0026162989
## 49.40 -0.09393732 -0.15211971 -0.0357549290 -0.1829196 -0.0049550321
## 49.60 -0.09363803 -0.15251498 -0.0347610902 -0.1836826 -0.0035935185
## 49.80 -0.08838955 -0.14797582 -0.0288032866 -0.1795189 0.0027397773
## 50.00 -0.09829814 -0.15860901 -0.0379872760 -0.1905357 -0.0060606307
## 50.20 -0.09723448 -0.15828375 -0.0361852112 -0.1906013 -0.0038676810
## 50.40 -0.10061263 -0.16241461 -0.0388106562 -0.1951306 -0.0060946652
## 50.60 -0.10031335 -0.16288219 -0.0377445075 -0.1960041 -0.0046225631
## 50.80 -0.09506487 -0.15841457 -0.0317151676 -0.1919499 0.0018201380
##
## Forecast method: Holt-Winters' additive method
##
## Model Information:
## Holt-Winters' additive method
##
## Call:
## hw(y = train, h = 50, seasonal = "additive")
##
## Smoothing parameters:
## alpha = 0.0419
## beta = 1e-04
## gamma = 0.0543
##
## Initial states:
## l = 0.0057
## b = 0
## s = -5e-04 8e-04 -0.0016 0.0062 -0.0049
##
## sigma: 0.0194
##
## AIC AICc BIC
## -506.0220 -504.8580 -473.0389
##
## Error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set -4.789381e-05 0.01898296 0.01105661 -Inf Inf 0.7571336 -0.2609261
##
## Forecasts:
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 41.00 -0.011266993 -0.03616120 0.01362721 -0.04933939 0.02680541
## 41.20 0.007386652 -0.01752953 0.03230283 -0.03071936 0.04549266
## 41.40 -0.006676565 -0.03161480 0.01826167 -0.04481631 0.03146318
## 41.60 0.004055481 -0.02090490 0.02901587 -0.03411813 0.04222909
## 41.80 -0.002242529 -0.02722514 0.02274009 -0.04045014 0.03596508
## 42.00 -0.011429258 -0.03652761 0.01366909 -0.04981388 0.02695536
## 42.20 0.007224387 -0.01789628 0.03234506 -0.03119436 0.04564314
## 42.40 -0.006838830 -0.03198190 0.01830424 -0.04529184 0.03161418
## 42.60 0.003893216 -0.02127235 0.02905878 -0.03459419 0.04238062
## 42.80 -0.002404794 -0.02759293 0.02278334 -0.04092672 0.03611714
## 43.00 -0.011591523 -0.03689565 0.01371260 -0.05029084 0.02710779
## 43.20 0.007062122 -0.01826466 0.03238891 -0.03167185 0.04579609
## 43.40 -0.007001095 -0.03235062 0.01834843 -0.04576985 0.03176766
## 43.60 0.003730951 -0.02164141 0.02910331 -0.03507273 0.04253463
## 43.80 -0.002567059 -0.02796234 0.02282822 -0.04140578 0.03627166
## 44.00 -0.011753788 -0.03726529 0.01375772 -0.05077027 0.02726269
## 44.20 0.006899856 -0.01863465 0.03243436 -0.03215180 0.04595152
## 44.40 -0.007163361 -0.03272096 0.01839423 -0.04625033 0.03192361
## 44.60 0.003568686 -0.02201208 0.02914945 -0.03555372 0.04269109
## 44.80 -0.002729324 -0.02833335 0.02287470 -0.04188730 0.03642865
## 45.00 -0.011916053 -0.03763655 0.01380444 -0.05125215 0.02742005
## 45.20 0.006737591 -0.01900624 0.03248143 -0.03263421 0.04610939
## 45.40 -0.007325626 -0.03309289 0.01844164 -0.04673325 0.03208200
## 45.60 0.003406420 -0.02238436 0.02919720 -0.03603717 0.04285001
## 45.80 -0.002891589 -0.02870596 0.02292278 -0.04237127 0.03658809
## 46.00 -0.012078318 -0.03800939 0.01385276 -0.05173647 0.02757984
## 46.20 0.006575326 -0.01937943 0.03253008 -0.03311905 0.04626970
## 46.40 -0.007487891 -0.03346642 0.01849063 -0.04721862 0.03224283
## 46.60 0.003244155 -0.02275822 0.02924653 -0.03652305 0.04301136
## 46.80 -0.003053854 -0.02908017 0.02297246 -0.04285767 0.03674996
## 47.00 -0.012240584 -0.03838383 0.01390266 -0.05222322 0.02774206
## 47.20 0.006413061 -0.01975420 0.03258032 -0.03360632 0.04643244
## 47.40 -0.007650156 -0.03384153 0.01854121 -0.04770640 0.03240609
## 47.60 0.003081890 -0.02313367 0.02929745 -0.03701135 0.04317513
## 47.80 -0.003216120 -0.02945596 0.02302372 -0.04334649 0.03691425
## 48.00 -0.012402849 -0.03875984 0.01395414 -0.05271238 0.02790669
## 48.20 0.006250796 -0.02013055 0.03263214 -0.03409599 0.04659758
## 48.40 -0.007812421 -0.03421821 0.01859337 -0.04819659 0.03257175
## 48.60 0.002919625 -0.02351069 0.02934994 -0.03750206 0.04334131
## 48.80 -0.003378385 -0.02983332 0.02307655 -0.04383771 0.03708094
## 49.00 -0.012565114 -0.03913742 0.01400719 -0.05320394 0.02807371
## 49.20 0.006088531 -0.02050847 0.03268553 -0.03458807 0.04676513
## 49.40 -0.007974686 -0.03459646 0.01864709 -0.04868918 0.03273981
## 49.60 0.002757360 -0.02388928 0.02940400 -0.03799516 0.04350988
## 49.80 -0.003540650 -0.03021224 0.02313094 -0.04433133 0.03725003
## 50.00 -0.012727379 -0.03951655 0.01406180 -0.05369789 0.02824313
## 50.20 0.005926265 -0.02088794 0.03274047 -0.03508252 0.04693506
## 50.40 -0.008136952 -0.03497627 0.01870237 -0.04918415 0.03291025
## 50.60 0.002595095 -0.02426943 0.02945962 -0.03849065 0.04368084
## 50.80 -0.003702915 -0.03059272 0.02318689 -0.04482733 0.03742150
##
## Forecast method: Holt-Winters' additive method
##
## Model Information:
## Holt-Winters' additive method
##
## Call:
## hw(y = train, h = 50, seasonal = "additive")
##
## Smoothing parameters:
## alpha = 0.0451
## beta = 1e-04
## gamma = 0.0549
##
## Initial states:
## l = 0.009
## b = -1e-04
## s = 0.0042 -4e-04 -0.003 4e-04 -0.0011
##
## sigma: 0.0185
##
## AIC AICc BIC
## -525.7990 -524.6349 -492.8158
##
## Error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set -3.201684e-05 0.01806722 0.01221663 83.02854 167.6833 0.7674483
## ACF1
## Training set -0.2268501
##
## Forecasts:
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 41.00 -0.0132419835 -0.03693529 0.010451325 -0.04947777 0.02299380
## 41.20 -0.0024396228 -0.02615712 0.021277879 -0.03871241 0.03383317
## 41.40 -0.0058435715 -0.02958535 0.017898205 -0.04215349 0.03046634
## 41.60 0.0015962986 -0.02216984 0.025362433 -0.03475087 0.03794346
## 41.80 0.0013313447 -0.02245923 0.025121919 -0.03505320 0.03771589
## 42.00 -0.0135288835 -0.03743831 0.010380541 -0.05009519 0.02303743
## 42.20 -0.0027265227 -0.02666046 0.021207410 -0.03933031 0.03387727
## 42.40 -0.0061304714 -0.03008899 0.017828051 -0.04277187 0.03051093
## 42.60 0.0013093987 -0.02267380 0.025292593 -0.03536973 0.03798853
## 42.80 0.0010444447 -0.02296350 0.025052393 -0.03567254 0.03776143
## 43.00 -0.0138157834 -0.03794268 0.010311116 -0.05071469 0.02308313
## 43.20 -0.0030134226 -0.02716514 0.021138298 -0.03995029 0.03392345
## 43.40 -0.0064173714 -0.03059399 0.017759252 -0.04339233 0.03055758
## 43.60 0.0010224988 -0.02317911 0.025224107 -0.03599067 0.03803566
## 43.80 0.0007575448 -0.02346913 0.024984220 -0.03629396 0.03780905
## 44.00 -0.0141026833 -0.03844841 0.010243042 -0.05133626 0.02313089
## 44.20 -0.0033003226 -0.02767118 0.021070536 -0.04057233 0.03397169
## 44.40 -0.0067042713 -0.03110034 0.017691802 -0.04401485 0.03060630
## 44.60 0.0007355988 -0.02368577 0.025156968 -0.03661366 0.03808486
## 44.80 0.0004706449 -0.02397610 0.024917392 -0.03691743 0.03785872
## 45.00 -0.0143895833 -0.03895548 0.010176311 -0.05195988 0.02318071
## 45.20 -0.0035872225 -0.02817856 0.021004115 -0.04119643 0.03402198
## 45.40 -0.0069911712 -0.03160803 0.017625692 -0.04463941 0.03065707
## 45.60 0.0004486989 -0.02419377 0.025091169 -0.03723871 0.03813610
## 45.80 0.0001837449 -0.02448441 0.024851903 -0.03754295 0.03791044
## 46.00 -0.0146764832 -0.03946388 0.010110915 -0.05258554 0.02323257
## 46.20 -0.0038741224 -0.02868727 0.020939029 -0.04182256 0.03407432
## 46.40 -0.0072780712 -0.03211706 0.017560915 -0.04526602 0.03070988
## 46.60 0.0001617990 -0.02470310 0.025026702 -0.03786579 0.03818939
## 46.80 -0.0001031550 -0.02499405 0.024787745 -0.03817050 0.03796419
## 47.00 -0.0149633831 -0.03997361 0.010046848 -0.05321323 0.02328646
## 47.20 -0.0041610223 -0.02919732 0.020875270 -0.04245073 0.03412868
## 47.40 -0.0075649711 -0.03262741 0.017497465 -0.04589466 0.03076472
## 47.60 -0.0001251010 -0.02521376 0.024963559 -0.03849489 0.03824469
## 47.80 -0.0003900549 -0.02550502 0.024724910 -0.03880008 0.03801997
## 48.00 -0.0152502830 -0.04048467 0.009984101 -0.05384294 0.02334238
## 48.20 -0.0044479223 -0.02970868 0.020812831 -0.04308091 0.03418507
## 48.40 -0.0078518710 -0.03313907 0.017435332 -0.04652531 0.03082157
## 48.60 -0.0004120009 -0.02572573 0.024901733 -0.03912602 0.03830201
## 48.80 -0.0006769549 -0.02601730 0.024663390 -0.03943167 0.03807776
## 49.00 -0.0155371830 -0.04099703 0.009922668 -0.05447467 0.02340030
## 49.20 -0.0047348222 -0.03022135 0.020751704 -0.04371310 0.03424346
## 49.40 -0.0081387709 -0.03365205 0.017374511 -0.04715797 0.03088043
## 49.60 -0.0006989008 -0.02623902 0.024841217 -0.03975914 0.03836134
## 49.80 -0.0009638548 -0.02653089 0.024603179 -0.04006526 0.03813755
## 50.00 -0.0158240829 -0.04151071 0.009862542 -0.05510839 0.02346022
## 50.20 -0.0050217221 -0.03073533 0.020691882 -0.04434729 0.03430384
## 50.40 -0.0084256709 -0.03416633 0.017314993 -0.04779262 0.03094128
## 50.60 -0.0009858008 -0.02675360 0.024782003 -0.04039426 0.03842265
## 50.80 -0.0012507547 -0.02704578 0.024544269 -0.04070084 0.03819933
##
## Forecast method: Holt-Winters' additive method
##
## Model Information:
## Holt-Winters' additive method
##
## Call:
## hw(y = train, h = 50, seasonal = "additive")
##
## Smoothing parameters:
## alpha = 0.0127
## beta = 0.0126
## gamma = 0.0364
##
## Initial states:
## l = 0.0212
## b = -0.0019
## s = 0.0037 0.0065 0.0033 -0.0043 -0.0092
##
## sigma: 0.0452
##
## AIC AICc BIC
## -168.4221 -167.2581 -135.4389
##
## Error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set -0.002427766 0.04414772 0.02874494 137.9406 210.527 0.7050566
## ACF1
## Training set 0.05347147
##
## Forecasts:
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 41.00 -0.09656497 -0.1544602 -0.03866974 -0.1851081 -0.008021866
## 41.20 -0.08965143 -0.1475652 -0.03173765 -0.1782229 -0.001079941
## 41.40 -0.10847006 -0.1664255 -0.05051462 -0.1971053 -0.019834869
## 41.60 -0.09694867 -0.1549780 -0.03891935 -0.1856969 -0.008200478
## 41.80 -0.10728662 -0.1654311 -0.04914210 -0.1962110 -0.018362259
## 42.00 -0.13688970 -0.1953958 -0.07838359 -0.2263671 -0.047412323
## 42.20 -0.12997616 -0.1887058 -0.07124651 -0.2197954 -0.040156907
## 42.40 -0.14879479 -0.2078151 -0.08977451 -0.2390585 -0.058531055
## 42.60 -0.13727340 -0.1966594 -0.07788740 -0.2280965 -0.046450348
## 42.80 -0.14761134 -0.2074457 -0.08777699 -0.2391201 -0.056102589
## 43.00 -0.17721443 -0.2379032 -0.11652565 -0.2700299 -0.084398952
## 43.20 -0.17030089 -0.2316204 -0.10898138 -0.2640810 -0.076520790
## 43.40 -0.18911952 -0.2511710 -0.12706801 -0.2840191 -0.094219925
## 43.60 -0.17759813 -0.2404879 -0.11470841 -0.2737797 -0.081416603
## 43.80 -0.18793607 -0.2517744 -0.12409775 -0.2855684 -0.090303780
## 44.00 -0.21753916 -0.2828522 -0.15222610 -0.3174269 -0.117651456
## 44.20 -0.21062562 -0.2771101 -0.14414113 -0.3123049 -0.108946361
## 44.40 -0.22944425 -0.2972179 -0.16167057 -0.3330951 -0.125793357
## 44.60 -0.21792286 -0.2871046 -0.14874113 -0.3237272 -0.112118533
## 44.80 -0.22826080 -0.2989699 -0.15755169 -0.3364011 -0.120120547
## 45.00 -0.25786388 -0.3306953 -0.18503245 -0.3692500 -0.146477811
## 45.20 -0.25095035 -0.3255355 -0.17636524 -0.3650184 -0.136882264
## 45.40 -0.26976898 -0.3462248 -0.19331319 -0.3866980 -0.152839936
## 45.60 -0.25824759 -0.3366895 -0.17980568 -0.3782141 -0.138281046
## 45.80 -0.26858553 -0.3491271 -0.18804398 -0.3917632 -0.145407861
## 46.00 -0.29818861 -0.3814499 -0.21492729 -0.4255258 -0.170851410
## 46.20 -0.29127508 -0.3768425 -0.20570764 -0.4221392 -0.160410977
## 46.40 -0.31009371 -0.3980738 -0.22211365 -0.4446476 -0.175539815
## 46.60 -0.29857232 -0.3890689 -0.20807575 -0.4369749 -0.160169753
## 46.80 -0.30891026 -0.4020246 -0.21579597 -0.4513163 -0.166504248
## 47.00 -0.33851334 -0.4348634 -0.24216329 -0.4858680 -0.191158646
## 47.20 -0.33159981 -0.4307471 -0.23245254 -0.4832325 -0.179967152
## 47.40 -0.35041844 -0.4524563 -0.24838055 -0.5064719 -0.194364949
## 47.60 -0.33889705 -0.4439164 -0.23387775 -0.4995102 -0.178283885
## 47.80 -0.34923499 -0.4573239 -0.24114606 -0.5145427 -0.183927238
## 48.00 -0.37883807 -0.4905991 -0.26707705 -0.5497618 -0.207914335
## 48.20 -0.37192454 -0.4869096 -0.25693945 -0.5477791 -0.196070015
## 48.40 -0.39074317 -0.5090337 -0.27245260 -0.5716530 -0.209833354
## 48.60 -0.37922178 -0.5008970 -0.25754657 -0.5653080 -0.193135610
## 48.80 -0.38955972 -0.5146966 -0.26442287 -0.5809400 -0.198179428
## 49.00 -0.41916280 -0.5483428 -0.28998278 -0.6167266 -0.221599012
## 49.20 -0.41224927 -0.5450251 -0.27947348 -0.6153123 -0.209186221
## 49.40 -0.43106790 -0.5675110 -0.29462478 -0.6397396 -0.222396158
## 49.60 -0.41954651 -0.5597267 -0.27936632 -0.6339336 -0.205159411
## 49.80 -0.42988445 -0.5738697 -0.28589917 -0.6500909 -0.209677960
## 50.00 -0.45948753 -0.6078372 -0.31113782 -0.6863688 -0.232606231
## 50.20 -0.45257400 -0.6048473 -0.30030073 -0.6854559 -0.219692134
## 50.40 -0.47139263 -0.6276532 -0.31513202 -0.7103726 -0.232412650
## 50.60 -0.45987124 -0.6201815 -0.29956094 -0.7050447 -0.214697788
## 50.80 -0.47020918 -0.6346302 -0.30578819 -0.7216694 -0.218748972
Check to see if residuals for each model look like like white noise and test for autocorrelation and normality.
plots <- list()
for (i in c(1:length(hw_models)))
{
title <- hw_titles[[i]]
model <- hw_models[[i]]
df <- as.data.frame(cbind(fitted(model),model$residuals))
colnames(df) <- c("fitted", "residuals")
plot <- ggplot(data = df, aes(x = fitted, y = residuals)) +
geom_point(size = 1.5) +
geom_smooth(aes(colour = fitted, fill = fitted)) +
geom_hline(yintercept=0,color = "red") +
scale_x_log10() +
ggtitle(label=title)
plots[[i]] <- plot
}for (i in c(1:length(hw_models)))
{
title <- hw_titles[[i]]
print(title)
model <- hw_models[[i]]
checkresiduals(hw_models[[i]])
residuals <- hw_resfits[[i]]
print(shapiro.test(residuals))
}## [1] "AAPL _HW_Model"
##
## Ljung-Box test
##
## data: Residuals from Holt-Winters' additive method
## Q* = 64.716, df = 3, p-value = 5.773e-14
##
## Model df: 9. Total lags used: 12
##
##
## Shapiro-Wilk normality test
##
## data: residuals
## W = 0.85085, p-value = 4.731e-13
##
## [1] "FB _HW_Model"
##
## Ljung-Box test
##
## data: Residuals from Holt-Winters' additive method
## Q* = 46.579, df = 3, p-value = 4.272e-10
##
## Model df: 9. Total lags used: 12
##
##
## Shapiro-Wilk normality test
##
## data: residuals
## W = 0.88079, p-value = 1.765e-11
##
## [1] "GOOG _HW_Model"
##
## Ljung-Box test
##
## data: Residuals from Holt-Winters' additive method
## Q* = 67.521, df = 3, p-value = 1.454e-14
##
## Model df: 9. Total lags used: 12
##
##
## Shapiro-Wilk normality test
##
## data: residuals
## W = 0.85237, p-value = 5.612e-13
##
## [1] "GE _HW_Model"
##
## Ljung-Box test
##
## data: Residuals from Holt-Winters' additive method
## Q* = 22.939, df = 3, p-value = 4.158e-05
##
## Model df: 9. Total lags used: 12
##
##
## Shapiro-Wilk normality test
##
## data: residuals
## W = 0.89468, p-value = 1.167e-10
##
## [1] "PG _HW_Model"
##
## Ljung-Box test
##
## data: Residuals from Holt-Winters' additive method
## Q* = 46.635, df = 3, p-value = 4.157e-10
##
## Model df: 9. Total lags used: 12
##
##
## Shapiro-Wilk normality test
##
## data: residuals
## W = 0.7815, p-value = 5.431e-16
##
## [1] "AMZN _HW_Model"
##
## Ljung-Box test
##
## data: Residuals from Holt-Winters' additive method
## Q* = 33.619, df = 3, p-value = 2.384e-07
##
## Model df: 9. Total lags used: 12
##
##
## Shapiro-Wilk normality test
##
## data: residuals
## W = 0.90376, p-value = 4.389e-10
##
## [1] "TSLA _HW_Model"
##
## Ljung-Box test
##
## data: Residuals from Holt-Winters' additive method
## Q* = 9.1889, df = 3, p-value = 0.02688
##
## Model df: 9. Total lags used: 12
##
##
## Shapiro-Wilk normality test
##
## data: residuals
## W = 0.90039, p-value = 2.66e-10
nnet_models <- list()
nnet_titles <- list()
nnet_plots <- list()
nnet_resfits <- list()
nnet_aic <- list()
nnet_rmse <- list()
date <- as.Date(LNreturnsData$date, "%Y-%m-%d")
start <- min(date)
end <- max(date)
plot_midpoint <- nrow(LNreturnsData)
xregs <- as.matrix(LNreturnsData[,ff_factors])
xregs_train <- head(xregs,nrow(xregs)-50)
xregs_test <- tail(xregs,50)
row.names(xregs_test) <- (nrow(xregs)-49):nrow(xregs)
for (stock in c(stockTickers))
{
title <- paste("Neural Network Autoregression Model For Stock:", stock)
log_returns <- ts(LNreturnsData[,stock])
train <- head(log_returns,length(log_returns)-50)
test <- tail(log_returns,50)
alpha <- 1.5^(-10)
hn <- length(train)/(alpha*(length(train)+50))
model <- nnetar(train,size = hn, xreg = xregs_train, lambda = "auto")
forecast <- forecast(model,PI=TRUE,h=50,xreg=xregs_test,npaths=100)
colors <- c("Train"="black","Fitted"="red","Test"="green4","Forecast"="blue")
plot <- autoplot(forecast,main = title,xlab = "Days",ylab = "Log Returns") +
autolayer(train, series = 'Train') +
autolayer(fitted(model), series = 'Fitted') +
autolayer(test, series = 'Test') +
autolayer(forecast$mean, series = 'Forecast') +
coord_cartesian(xlim = c(plot_midpoint-125,plot_midpoint+10)) +
labs(caption = paste("From:",start,"to",end,"(last 125 days shown)")) +
scale_color_manual(values=colors)
nnet_models[[stock]] <- model
label <- paste(stock,"_NNET_Model")
nnet_titles[[stock]] <- label
nnet_plots[[label]] <- plot
label <- paste(stock,"_NNET_Model_Residuals",sep = "")
nnet_resfits[[label]] <- model[["residuals"]]
label <- stock
nnet_aic[[label]] <- NULL
nnet_rmse[[label]] <- round(rmse(test, forecast$mean),4)
}## [1] "For Stock: AAPL"
## Series: train
## Model: NNAR(1,46.13203125)
## Call: nnetar(y = train, size = hn, xreg = xregs_train, lambda = "auto")
##
## Average of 20 networks, each of which is
## a 4-46.13203-1 network with 277 weights
## options were - linear output units
##
## sigma^2 estimated as 0.0003572
## [1] "For Stock: FB"
## Series: train
## Model: NNAR(7,46.13203125)
## Call: nnetar(y = train, size = hn, xreg = xregs_train, lambda = "auto")
##
## Average of 20 networks, each of which is
## a 10-46.13203-1 network with 553 weights
## options were - linear output units
##
## sigma^2 estimated as 3.257e-08
## [1] "For Stock: GOOG"
## Series: train
## Model: NNAR(11,46.13203125)
## Call: nnetar(y = train, size = hn, xreg = xregs_train, lambda = "auto")
##
## Average of 20 networks, each of which is
## a 14-46.13203-1 network with 737 weights
## options were - linear output units
##
## sigma^2 estimated as 7.543e-09
## [1] "For Stock: GE"
## Series: train
## Model: NNAR(5,46.13203125)
## Call: nnetar(y = train, size = hn, xreg = xregs_train, lambda = "auto")
##
## Average of 20 networks, each of which is
## a 8-46.13203-1 network with 461 weights
## options were - linear output units
##
## sigma^2 estimated as 2.333e-06
## [1] "For Stock: PG"
## Series: train
## Model: NNAR(1,46.13203125)
## Call: nnetar(y = train, size = hn, xreg = xregs_train, lambda = "auto")
##
## Average of 20 networks, each of which is
## a 4-46.13203-1 network with 277 weights
## options were - linear output units
##
## sigma^2 estimated as 0.002344
## [1] "For Stock: AMZN"
## Series: train
## Model: NNAR(3,46.13203125)
## Call: nnetar(y = train, size = hn, xreg = xregs_train, lambda = "auto")
##
## Average of 20 networks, each of which is
## a 6-46.13203-1 network with 369 weights
## options were - linear output units
##
## sigma^2 estimated as 2.898e-05
## [1] "For Stock: TSLA"
## Series: train
## Model: NNAR(1,46.13203125)
## Call: nnetar(y = train, size = hn, xreg = xregs_train, lambda = "auto")
##
## Average of 20 networks, each of which is
## a 4-46.13203-1 network with 277 weights
## options were - linear output units
##
## sigma^2 estimated as 0.004328
Check to see if residuals for each model look like like white noise and test for autocorrelation and normality.
plots <- list()
for (i in c(1:length(nnet_models)))
{
title <- nnet_titles[[i]]
model <- nnet_models[[i]]
df <- as.data.frame(cbind(fitted(model),model$residuals))
colnames(df) <- c("fitted", "residuals")
plot <- ggplot(data = df, aes(x = fitted, y = residuals)) +
geom_point(size = 1.5) +
geom_smooth(aes(colour = fitted, fill = fitted)) +
geom_hline(yintercept=0,color = "red") +
scale_x_log10() +
ggtitle(label=title)
plots[[i]] <- plot
}for (i in c(1:length(nnet_models)))
{
title <- nnet_titles[[i]]
print(title)
residuals <- nnet_resfits[[i]]
checkresiduals(residuals)
print(shapiro.test(residuals))
}## [1] "AAPL _NNET_Model"
##
## Shapiro-Wilk normality test
##
## data: residuals
## W = 0.97418, p-value = 0.0009938
##
## [1] "FB _NNET_Model"
##
## Shapiro-Wilk normality test
##
## data: residuals
## W = 0.79197, p-value = 2.677e-15
##
## [1] "GOOG _NNET_Model"
##
## Shapiro-Wilk normality test
##
## data: residuals
## W = 0.77342, p-value = 8.316e-16
##
## [1] "GE _NNET_Model"
##
## Shapiro-Wilk normality test
##
## data: residuals
## W = 0.83572, p-value = 1.435e-13
##
## [1] "PG _NNET_Model"
##
## Shapiro-Wilk normality test
##
## data: residuals
## W = 0.93246, p-value = 5.663e-08
##
## [1] "AMZN _NNET_Model"
##
## Shapiro-Wilk normality test
##
## data: residuals
## W = 0.87817, p-value = 1.598e-11
##
## [1] "TSLA _NNET_Model"
##
## Shapiro-Wilk normality test
##
## data: residuals
## W = 0.87167, p-value = 5.987e-12
knn_models <- list()
knn_titles <- list()
knn_plots <- list()
knn_resfits <- list()
knn_aic <- list()
knn_rmse <- list()
date <- as.Date(LNreturnsData$date, "%Y-%m-%d")
start <- min(date)
end <- max(date)
plot_midpoint <- nrow(LNreturnsData)
for (stock in c(stockTickers))
{
title <- paste("KNN TS Regression For Stock:", stock)
log_returns <- ts(LNreturnsData[,stock])
train <- head(log_returns,length(log_returns)-50)
test <- tail(log_returns,50)
k <- round(sqrt(nrow(train)))
model <- knn_forecasting(train,h=50,lags=1:10,k=k,msas="MIMO")
plot <- autoplot(model,highlight = "neighbors",faceting = FALSE) +
autolayer(test, series = 'Test') +
labs(x = "Date",y = "Log Return",color = "Legend") +
ggtitle(label=title) +
labs(caption = paste("From:",start,"to",end)) +
scale_color_manual(values=c("blue","black","red","green","purple"))
knn_models[[stock]] <- model
label <- paste(stock,"_KNN_Model")
knn_titles[[stock]] <- label
knn_plots[[label]] <- plot
label <- paste(stock,"_KNN_Model_Residuals",sep = "")
knn_resfits[[label]] <- (model$prediction - test)
label <- stock
knn_aic[[label]] <- NULL
knn_rmse[[label]] <- round(rmse(test, model$prediction),4)
}for (i in c(1:length(knn_models)))
{
title <- knn_titles[[i]]
print(title)
residuals <- knn_resfits[[i]]
checkresiduals(residuals)
print(shapiro.test(residuals))
}## [1] "AAPL _KNN_Model"
##
## Shapiro-Wilk normality test
##
## data: residuals
## W = 0.93019, p-value = 0.00562
##
## [1] "FB _KNN_Model"
##
## Shapiro-Wilk normality test
##
## data: residuals
## W = 0.96266, p-value = 0.1149
##
## [1] "GOOG _KNN_Model"
##
## Shapiro-Wilk normality test
##
## data: residuals
## W = 0.94396, p-value = 0.01939
##
## [1] "GE _KNN_Model"
##
## Shapiro-Wilk normality test
##
## data: residuals
## W = 0.96907, p-value = 0.2122
##
## [1] "PG _KNN_Model"
##
## Shapiro-Wilk normality test
##
## data: residuals
## W = 0.96346, p-value = 0.1241
##
## [1] "AMZN _KNN_Model"
##
## Shapiro-Wilk normality test
##
## data: residuals
## W = 0.95416, p-value = 0.05071
##
## [1] "TSLA _KNN_Model"
##
## Shapiro-Wilk normality test
##
## data: residuals
## W = 0.9688, p-value = 0.2067
For each stock, Compare RMSE and AIC values of each model
library(knitr)
library(kableExtra)
RMSE <- as.matrix(cbind(stockTickers,
arima_rmse,
arima_garch_rmse,
famafrench_rmse,
arimax_rmse,
auto_sarimax_rmse,
hw_rmse,
nnet_rmse,
knn_rmse))
AIC <- as.matrix(cbind(stockTickers,
arima_aic,
arima_garch_aic,
famafrench_aic,
arimax_aic,
auto_sarimax_aic,
hw_aic,
nnet_aic,
knn_aic))
kable(RMSE, caption = 'RMSE Table') %>%
kable_styling(full_width = FALSE,
latex_options = c('hold_position', 'striped'))| stockTickers | arima_rmse | arima_garch_rmse | famafrench_rmse | arimax_rmse | auto_sarimax_rmse | hw_rmse | nnet_rmse | knn_rmse | |
|---|---|---|---|---|---|---|---|---|---|
| AAPL | AAPL | 0.037 | 0.0277 | 0.0392 | 0.0135 | 0.0132 | 0.062 | 0.0268 | 0.0278 |
| FB | FB | 0.0311 | 0.0308 | 0.0322 | 0.0206 | 0.0211 | 0.0778 | 0.0259 | 0.0304 |
| GOOG | GOOG | 0.0291 | 0.0249 | 0.0363 | 0.015 | 0.015 | 0.0781 | 0.02 | 0.0277 |
| GE | GE | 0.0538 | 0.0531 | 0.041 | 0.0333 | 0.0335 | 0.0875 | 0.0414 | 0.0542 |
| PG | PG | 0.0267 | 0.0305 | 0.0356 | 0.0184 | 0.0183 | 0.0297 | 0.0449 | 0.027 |
| AMZN | AMZN | 0.0227 | 0.0243 | 0.0325 | 0.0224 | 0.0219 | 0.0264 | 0.0293 | 0.0245 |
| TSLA | TSLA | 0.0555 | 0.0549 | 0.0554 | 0.0462 | 0.0455 | 0.3193 | 0.0808 | 0.0573 |
kable(AIC, caption = 'AIC Table') %>%
kable_styling(full_width = FALSE,
latex_options = c('hold_position', 'striped'))| stockTickers | arima_aic | arima_garch_aic | famafrench_aic | arimax_aic | auto_sarimax_aic | hw_aic | |
|---|---|---|---|---|---|---|---|
| AAPL | AAPL | -955.7663 | -5.261 | -924.2631 | -1266.0871 | -1273.1428 | -410.7185 |
| FB | FB | -970.1904 | -5.1951 | -969.9724 | -1181.106 | -1183.266 | -439.0239 |
| GOOG | GOOG | -1024.3302 | -5.5749 | -999.6686 | -1247.7873 | -1257.8897 | -471.2123 |
| GE | GE | -805.3401 | -4.5859 | -781.0967 | -950.9291 | -961.377 | -288.8528 |
| PG | PG | -1054.8698 | -6.0227 | -1043.8743 | -1240.5704 | -1250.3551 | -506.022 |
| AMZN | AMZN | -1052.8152 | -5.6238 | -1031.4278 | -1228.9241 | -1234.9415 | -525.799 |
| TSLA | TSLA | -659.1552 | -3.8891 | -649.11 | -725.1371 | -731.814 | -168.4221 |