Modeling Stock Returns

Kerthi Parameswaran

Introduction:
Initial Setup:
Web Scraping and Data Pre-processing:
Exploratory Data Analysis:
- Initial Plots:
- Normality:
Part 1a: ARIMA
Part 1b: ARIMA-GARCH
Part 2: Fama-French CAPM (Dynamic Multiple Regression)
Part 3a: ARIMAX (Adding Fama-French Factors to ARIMA Model)
Part 3b: SARIMAX (Using auto.arima)
Part 4: Holt-Winters (Triple Exponetial Smoothing)
Part 5: Neural Network Autoregression
Part 6: KNN Time Series Regression
Compare Results

Introduction:

The objective of this project is to produce a set of models to predict the returns of a given set of stocks. ARIMA (and variations thereof), Fama-French CAPM, Holt-Winters/Triple Exponential Smoothing, Neural Network Autoregression, and KNN Regression are considered.

Initial Setup:

Install the necessary packages to be used.

#install.packages("tidyverse")
#install.packages("gridExtra")
#install.packages("GGally")
#install.packages("forecast")
#install.packages("tseries")
#install.packages("vrtest")
#install.packages("stats")
#install.packages("quantmod")
#install.packages("dyn")
#install.packages("Metrics")
#install.packages("rugarch")
#install.packages("fGarch")
#install.packages("tsfknn")
library(tidyverse)
library(gridExtra)
library(GGally)
library(forecast)
library(tseries)
library(vrtest)
library(stats)
library(quantmod)
library(dyn)
library(Metrics)
library(rugarch)
library(fGarch)
library(tsfknn)

Web Scraping and Data Pre-processing:

For the chosen stocks and time frame, a dataset contain the log returns of adjusted closing prices as well as the 3 Fama-French factors will be generated. Data sources are Yahoo Finance and Dartmouth College.

stockTickers <- c("AAPL","FB","GOOG","GE","PG","AMZN","TSLA")
end <- as.Date("2020-06-01")
start <- end - 365

url = "http://mba.tuck.dartmouth.edu/pages/faculty/ken.french/ftp/F-F_Research_Data_Factors_daily_CSV.zip"
download.file(url, "F-F_Research_Data_Factors_daily_CSV.zip")
unzip("F-F_Research_Data_Factors_daily_CSV.zip")
file.remove("F-F_Research_Data_Factors_daily_CSV.zip")

## [1] TRUE

ff <- read.csv("F-F_Research_Data_Factors_daily.CSV", header=T, skip=4)
colnames(ff) <- c("date", "zm","smb","hml","rf")
ff_factors <- c("zm","smb","hml")
ff[,1] <- as.Date(ff[["date"]], "%Y%m%d")
ff <- ff[ff$date >= start & ff$date <= end,]
priceData <- as.data.frame(ff[,"date"])
colnames(priceData) <- "date"

for (i in seq(length(stockTickers))){
  ticker =  stockTickers[i]
  getSymbols(ticker, verbose = FALSE, src = "yahoo",from=start,to=end)
  temp_df <- select(as.data.frame(get(ticker)),contains(".Adjusted"))
  temp_df <- rownames_to_column(temp_df,"date")
  temp_df[,1] <- as.Date(temp_df[["date"]],"%Y-%m-%d")
  priceData <- semi_join(priceData,temp_df,by="date")
  priceData <- left_join(priceData,temp_df,by="date")
}
colnames(priceData) <- c("date",stockTickers)

returnsData <- priceData
LNreturnsData <- priceData
for (stock in stockTickers)
{
  returnsData[[stock]] <- Delt(priceData[[stock]],type = "arithmetic")
  LNreturnsData[[stock]] <- Delt(priceData[[stock]],type = "log")
}

returnsData <- semi_join(returnsData,ff,by="date")
returnsData <- left_join(returnsData,ff,by="date")
returnsData <- na.omit(returnsData) 
row.names(returnsData) <- (1:nrow(returnsData)) 

LNreturnsData <- semi_join(LNreturnsData,ff,by="date")
LNreturnsData <- left_join(LNreturnsData,ff,by="date")
LNreturnsData <- na.omit(LNreturnsData) 
row.names(LNreturnsData) <- (1:nrow(LNreturnsData)) 

head(LNreturnsData)

##         date       AAPL           FB         GOOG           GE            PG
## 1 2019-06-04 0.03593075 0.0202027439  0.016101654  0.048140540  0.0084419911
## 2 2019-06-05 0.01601431 0.0039920094 -0.010337736 -0.011060917  0.0193943609
## 3 2019-06-06 0.01457514 0.0009509897  0.002032049  0.003028749  0.0060715364
## 4 2019-06-07 0.02626885 0.0293863564  0.020565811  0.006030015  0.0128615304
## 5 2019-06-10 0.01269847 0.0084442065  0.013361951  0.006989517 -0.0004596271
## 6 2019-06-11 0.01151302 0.0185883108 -0.001537710  0.006941102  0.0060522448
##          AMZN         TSLA    zm   smb   hml    rf
## 1 0.021548130  0.078576006  2.33  0.45 -0.17 0.009
## 2 0.005155597  0.015326116  0.69 -1.01 -0.91 0.009
## 3 0.009081437  0.046513085  0.55 -1.02 -0.01 0.009
## 4 0.027918958 -0.007065431  1.04 -0.01 -1.15 0.009
## 5 0.030892075  0.040160675  0.53  0.00 -0.07 0.009
## 6 0.001648590  0.019629454 -0.12 -0.26  0.59 0.009

tail(LNreturnsData)

##           date          AAPL           FB          GOOG           GE
## 245 2020-05-21 -0.0074833859  0.006155724 -0.0027904585  0.009302365
## 246 2020-05-22  0.0064177681  0.015097885  0.0054172894 -0.010861207
## 247 2020-05-26 -0.0067965521 -0.011603423  0.0046685255  0.059063318
## 248 2020-05-27  0.0043474771 -0.013265890  0.0005784737  0.069581011
## 249 2020-05-28  0.0004400497 -0.016190375 -0.0007831778 -0.072526512
## 250 2020-05-29 -0.0009745454 -0.001642486  0.0085675599 -0.031463327
##               PG         AMZN         TSLA    zm   smb   hml rf
## 245 -0.014762349 -0.020709846  0.014654925 -0.70  0.65  0.46  0
## 246  0.008741426 -0.004038038 -0.013037706  0.27  0.49 -0.88  0
## 247 -0.005075014 -0.006182600  0.002433124  1.23  0.07  4.63  0
## 248  0.016466378 -0.004747368  0.001659429  1.54  0.64  3.60  0
## 249  0.018874220 -0.003861509 -0.017736784 -0.41 -1.46 -2.44  0
## 250 -0.001207001  0.017041921  0.035583745  0.59 -0.31 -1.97  0

Exploratory Data Analysis:

Initial Plots:

Time Series and Box Plots of Stock returns and Fama-French Factors (normal and logged versions).

dataSets <- list("Returns" = returnsData, "Log Returns" = LNreturnsData, "Fama-Fench Factors" = ff)

plots <- list()

for (i in 1:length(dataSets))
{
  name <- names(dataSets[i])
  df <- dataSets[[i]]
  if (i == 3) {columns = c("date",ff_factors)} else {columns = c("date",stockTickers)}
  
  temp_df <- df %>%
    select(columns) %>%
    gather(key = "Series", value = "Daily_Change", -date)
  
  plot <- ggplot(temp_df, aes(x = date, y = Daily_Change,color=Series)) + 
    geom_line() +
    ggtitle(name)
  
  label <- paste("LinePlot_",name,sep = "")
  plots[[label]] <- plot
  
  plot <- ggplot(temp_df, aes(x = Series, y = Daily_Change,color=Series)) + 
    geom_boxplot() +
    ggtitle(name)
  
  label <- paste("BoxPlot_",name,sep = "")
  plots[[label]] <- plot
}

marrangeGrob(plots, nrow=1, ncol=1)

Normality:

Test Log Returns of each Stock for Normality.

plots <- list()
results <- list()

for (stock in c(stockTickers))
{
  x <- LNreturnsData[[stock]]
  avg <- mean(x)
  sdev <- sd(x)
  result = shapiro.test(x)
  x <- as.data.frame(x)
  
  densityPlot <- ggplot(x, aes(x=V1)) +
    geom_density() +
    stat_function(fun = dnorm, args = list(mean = avg, sd = sdev),colour = "red") +
    labs(title= stock,x="Log Returns", y = "Density")
  
  name <- paste(stock)
  plots[[name]] <- densityPlot
  results[[name]] <- result
}

print(results)

## $AAPL
## 
##  Shapiro-Wilk normality test
## 
## data:  x
## W = 0.87443, p-value = 1.771e-13
## 
## 
## $FB
## 
##  Shapiro-Wilk normality test
## 
## data:  x
## W = 0.89756, p-value = 5.288e-12
## 
## 
## $GOOG
## 
##  Shapiro-Wilk normality test
## 
## data:  x
## W = 0.86856, p-value = 8.008e-14
## 
## 
## $GE
## 
##  Shapiro-Wilk normality test
## 
## data:  x
## W = 0.91933, p-value = 2.152e-10
## 
## 
## $PG
## 
##  Shapiro-Wilk normality test
## 
## data:  x
## W = 0.84683, p-value = 5.199e-15
## 
## 
## $AMZN
## 
##  Shapiro-Wilk normality test
## 
## data:  x
## W = 0.93025, p-value = 1.772e-09
## 
## 
## $TSLA
## 
##  Shapiro-Wilk normality test
## 
## data:  x
## W = 0.8783, p-value = 3.03e-13

marrangeGrob(plots, nrow=2, ncol=2)

Part 1a: ARIMA

Stationarity and Differencing Order d:

Apply Augmented Dickey-Fuller Test and Identify Required Order of Differencing.

adfTests <- list()
ARIMA_Orders <- as.data.frame(stockTickers)
colnames(ARIMA_Orders) <- "Stock"
ARIMA_Orders$p <- NA
ARIMA_Orders$d <- NA
ARIMA_Orders$q <- NA

for (stock in c(stockTickers))
{
  returns <- ts(LNreturnsData[,stock])
  adfTests[[stock]] <- adf.test(returns)
  NumDiff <- 0
  
  if (adf.test(returns)$p.value > 0.05) 
  {
    for (d in c(1:3))
    {
      diff_returns <- diff(returns, lag = 1, differences = d)
      if (adf.test(diff_returns)$p.value < 0.05)
      {
        NumDiff <- d
        break
      }
    }
  }
  ARIMA_Orders[which(ARIMA_Orders$Stock == stock),3] <- NumDiff
  if (NumDiff > 0)
  {
    label <- paste(stock,"_D_order_",NumDiff,sep = "")
    adfTests[[label]] <- adf.test(diff(returns, lag = 1, differences = NumDiff))
  }
}

print(adfTests)

## $AAPL
## 
##  Augmented Dickey-Fuller Test
## 
## data:  returns
## Dickey-Fuller = -5.3359, Lag order = 6, p-value = 0.01
## alternative hypothesis: stationary
## 
## 
## $FB
## 
##  Augmented Dickey-Fuller Test
## 
## data:  returns
## Dickey-Fuller = -4.628, Lag order = 6, p-value = 0.01
## alternative hypothesis: stationary
## 
## 
## $GOOG
## 
##  Augmented Dickey-Fuller Test
## 
## data:  returns
## Dickey-Fuller = -4.7999, Lag order = 6, p-value = 0.01
## alternative hypothesis: stationary
## 
## 
## $GE
## 
##  Augmented Dickey-Fuller Test
## 
## data:  returns
## Dickey-Fuller = -4.9234, Lag order = 6, p-value = 0.01
## alternative hypothesis: stationary
## 
## 
## $PG
## 
##  Augmented Dickey-Fuller Test
## 
## data:  returns
## Dickey-Fuller = -6.6662, Lag order = 6, p-value = 0.01
## alternative hypothesis: stationary
## 
## 
## $AMZN
## 
##  Augmented Dickey-Fuller Test
## 
## data:  returns
## Dickey-Fuller = -4.988, Lag order = 6, p-value = 0.01
## alternative hypothesis: stationary
## 
## 
## $TSLA
## 
##  Augmented Dickey-Fuller Test
## 
## data:  returns
## Dickey-Fuller = -5.8391, Lag order = 6, p-value = 0.01
## alternative hypothesis: stationary

Identifying AR() and MA() Orders:

Inspect ACF and PACF functions of the (appropriately differenced) log returns to identify the range of orders p and q to consider (see link for reference). Then, select the combination that would result in the arima model with the lowest AIC. https://online.stat.psu.edu/stat510/lesson/3/3.1

plots <- list()

for (stock in c(stockTickers))
{
  
  d <- ARIMA_Orders[which(ARIMA_Orders$Stock == stock),3]
  returns <- LNreturnsData[,stock]
  train <- head(returns,length(returns)-10) 
  title1 <- paste("ACF_",stock,sep = "")
  label1 <- paste(stock,"_ACF",sep = "")
  title2 <- paste("PACF_",stock,sep = "")
  label2 <- paste(stock,"_PACF",sep = "")
  
  if (d>0) 
  {
    returns <- diff(returns, lag = 1, differences = d)
    title1 <- paste("ACF_",stock,"_Diff_",d,sep = "")
    title2 <- paste("PACF_",stock,"_Diff_",d,sep = "")
  }
  
  plot <- ggAcf(returns,lag.max=15,type="correlation",plot=TRUE,main=title1)
  plots[[label1]] <- plot
  plot <- ggAcf(returns,lag.max=15,type="partial",plot=TRUE,main=title2)
  plots[[label2]] <- plot 
  
  acf <- acf(returns,lag.max=15,type="correlation",plot=FALSE)
  clim1 <- qnorm((1 + .95)/2)/sqrt(acf$n.used)
  
  pacf <- acf(returns,lag.max=15,type="partial",plot=FALSE)
  clim2 <- qnorm((1 + .95)/2)/sqrt(pacf$n.used)
  
  MaxSigLag_ACF <- 1
  sigACFs <- as.array(abs(acf$acf[-1]) > clim1)
  for (i in (2:length(sigACFs)))
  {
    if ((sigACFs[i] == TRUE) & (sigACFs[i+1] == FALSE))
    {
      MaxSigLag_ACF <- i
      break
    }
  }
  
  MaxSigLag_PACF <- 1
  sigPACFs <- as.array(abs(pacf$acf) > clim2)
  for (i in (2:length(sigPACFs)))
  {
    if ((sigPACFs[i] == TRUE) & (sigPACFs[i+1] == FALSE))
    {
      MaxSigLag_PACF <- i
      break
    }
  }  
  
  CountSigLag_ACF <-sum(abs(acf$acf[-1]) > clim1)
  CountSigLag_PACF <- sum(abs(pacf$acf) > clim2)
  
  P_Order <- MaxSigLag_PACF
  Q_Order <- MaxSigLag_ACF
  
  
  if (CountSigLag_ACF > CountSigLag_PACF + 1)
  {
    Q_Order <- 0
  }
  
  if (CountSigLag_ACF < CountSigLag_PACF - 1)
  {
    P_Order <- 0
  }
  
  AIC_Table <- data.frame("p"=as.numeric(),"q"=as.numeric(),"aic"=as.numeric())
  
  for (i in c(0:P_Order))
  {
    for (j in c(0:Q_Order))
    {
      
      aic <- AIC(Arima(train, order=c(i,d,j),method="ML"))
      AIC_Table[nrow(AIC_Table) + 1,] = c(i,j,aic)
    }
  }
  
  P_Order <- AIC_Table[which(AIC_Table$aic == min(AIC_Table$aic)),1]
  Q_Order <- AIC_Table[which(AIC_Table$aic == min(AIC_Table$aic)),2]
  ARIMA_Orders[which(ARIMA_Orders$Stock == stock),2] <- P_Order
  ARIMA_Orders[which(ARIMA_Orders$Stock == stock),4] <- Q_Order
}

marrangeGrob(plots, nrow=1, ncol=2)

Fitting Models:

print(ARIMA_Orders)

##   Stock p d q
## 1  AAPL 9 0 0
## 2    FB 7 0 0
## 3  GOOG 9 0 0
## 4    GE 4 0 7
## 5    PG 4 0 3
## 6  AMZN 8 0 0
## 7  TSLA 0 0 0

arima_models <- list()
arima_titles <- list()
arima_plots <- list()
arima_resfits <- list()
arima_aic <- list()
arima_rmse <- list()

date <- as.Date(LNreturnsData$date, "%Y-%m-%d") 
start <- min(date)
end <- max(date)
plot_midpoint <- nrow(LNreturnsData)

for (stock in c(stockTickers))
{
  p <- ARIMA_Orders[which(ARIMA_Orders$Stock == stock),2]
  d <- ARIMA_Orders[which(ARIMA_Orders$Stock == stock),3]
  q <- ARIMA_Orders[which(ARIMA_Orders$Stock == stock),4]
  title <- paste("ARIMA Model For Stock:", stock,"; (p,d,q)=", p,d,q)
  log_returns <- ts(LNreturnsData[,stock])
  train <- head(log_returns,length(log_returns)-50)
  test <- tail(log_returns,50)
  model <- Arima(train, order=c(p,d,q),method="ML")
  forecast <- forecast(model, h = 50)
  plot <- autoplot(forecast,main = title,xlab = "Days",ylab = "Log Returns") +
    autolayer(train, series = 'Train') +
    autolayer(fitted(model), series = 'Fitted') +
    autolayer(forecast$mean, series = 'Forecast') +
    autolayer(test, series = 'Test') +
    coord_cartesian(xlim = c(plot_midpoint-125,plot_midpoint+10)) +
    labs(caption = paste("From:",start,"to",end,"(last 125 days shown)"))
  
  arima_models[[stock]] <- model
  label <- paste(stock,"_ARIMA_Model_(p,d,q)=", p,d,q)
  arima_titles[[stock]] <- label
  arima_plots[[label]] <- plot
  label <- paste(stock,"_ARIMA_Model_Residuals",sep = "")
  arima_resfits[[label]] <- residuals(model)
  label <- stock
  arima_aic[[label]] <- round(AIC(model),4)
  arima_rmse[[label]] <- round(rmse(test, predict(model,n.ahead=50)$pred),4)
}

Review and Visualise Models:

print(arima_models)

## $AAPL
## Series: train 
## ARIMA(9,0,0) with non-zero mean 
## 
## Coefficients:
##           ar1      ar2     ar3      ar4     ar5     ar6     ar7      ar8
##       -0.2494  -0.0296  0.1059  -0.0231  0.0605  0.0117  0.2328  -0.2182
## s.e.   0.0690   0.0705  0.0737   0.0758  0.0804  0.0792  0.0770   0.0810
##          ar9    mean
##       0.2815  0.0017
## s.e.  0.0837  0.0018
## 
## sigma^2 estimated as 0.0004563:  log likelihood=488.88
## AIC=-955.77   AICc=-954.36   BIC=-919.48
## 
## $FB
## Series: train 
## ARIMA(7,0,0) with non-zero mean 
## 
## Coefficients:
##           ar1    ar2     ar3      ar4     ar5     ar6     ar7     mean
##       -0.3173  0.096  0.2382  -0.0038  0.0650  0.0894  0.3172  -0.0011
## s.e.   0.0680  0.071  0.0764   0.0800  0.0839  0.0842  0.0846   0.0027
## 
## sigma^2 estimated as 0.0004334:  log likelihood=494.1
## AIC=-970.19   AICc=-969.24   BIC=-940.51
## 
## $GOOG
## Series: train 
## ARIMA(9,0,0) with non-zero mean 
## 
## Coefficients:
##           ar1     ar2     ar3      ar4     ar5      ar6     ar7      ar8
##       -0.2201  0.0208  0.2416  -0.0331  0.0011  -0.1021  0.3162  -0.0925
## s.e.   0.0684  0.0699  0.0707   0.0736  0.0771   0.0763  0.0754   0.0808
##          ar9     mean
##       0.2903  -0.0002
## s.e.  0.0823   0.0021
## 
## sigma^2 estimated as 0.0003242:  log likelihood=523.17
## AIC=-1024.33   AICc=-1022.93   BIC=-988.05
## 
## $GE
## Series: train 
## ARIMA(4,0,7) with non-zero mean 
## 
## Coefficients:
##          ar1      ar2     ar3     ar4     ma1     ma2      ma3      ma4     ma5
##       -1.108  -0.1093  0.9239  0.7457  0.9316  0.0458  -0.6998  -0.3782  0.2436
## s.e.   0.105   0.1589  0.1509  0.0964  0.1253  0.1735   0.1613   0.1309  0.1046
##           ma6     ma7     mean
##       -0.0153  0.1261  -0.0027
## s.e.   0.1084  0.0859   0.0048
## 
## sigma^2 estimated as 0.0009511:  log likelihood=415.67
## AIC=-805.34   AICc=-803.38   BIC=-762.46
## 
## $PG
## Series: train 
## ARIMA(4,0,3) with non-zero mean 
## 
## Coefficients:
##          ar1     ar2      ar3      ar4      ma1      ma2     ma3   mean
##       0.1664  0.7075  -0.4427  -0.4754  -0.4814  -0.6140  0.6430  5e-04
## s.e.  0.1165  0.1088   0.1217   0.0745   0.1198   0.1103  0.1292  6e-04
## 
## sigma^2 estimated as 0.0002832:  log likelihood=536.43
## AIC=-1054.87   AICc=-1053.92   BIC=-1025.18
## 
## $AMZN
## Series: train 
## ARIMA(8,0,0) with non-zero mean 
## 
## Coefficients:
##           ar1     ar2     ar3      ar4     ar5     ar6     ar7      ar8    mean
##       -0.1384  0.1634  0.1528  -0.1085  0.0151  0.0168  0.1640  -0.2597  0.0004
## s.e.   0.0694  0.0721  0.0730   0.0753  0.0822  0.0821  0.0827   0.0852  0.0012
## 
## sigma^2 estimated as 0.0002854:  log likelihood=536.41
## AIC=-1052.82   AICc=-1051.65   BIC=-1019.83
## 
## $TSLA
## Series: train 
## ARIMA(0,0,0) with non-zero mean 
## 
## Coefficients:
##         mean
##       0.0035
## s.e.  0.0033
## 
## sigma^2 estimated as 0.002136:  log likelihood=331.58
## AIC=-659.16   AICc=-659.09   BIC=-652.56

marrangeGrob(arima_plots, nrow=1, ncol=1)

Inspect Residuals:

Check to see if residuals for each model look like like white noise and test for autocorrelation and normality.

plots <- list()
for (i in c(1:length(arima_models)))
{
  title <- arima_titles[[i]]
  model <- arima_models[[i]]
  df <- as.data.frame(cbind(fitted(model),model$residuals))
  colnames(df) <- c("fitted", "residuals")
  plot <- ggplot(data = df, aes(x = fitted, y = residuals)) +
    geom_point(size = 1.5) +
    geom_smooth(aes(colour = fitted, fill = fitted)) +
    geom_hline(yintercept=0,color = "red") +
    scale_x_log10() +
    ggtitle(label=title)
  plots[[i]] <- plot
}

marrangeGrob(plots, nrow=1, ncol=1)

for (i in c(1:length(arima_models)))
{
  title <- arima_titles[[i]]
  print(title)
  model <- arima_models[[i]]
  checkresiduals(model)
  residuals <- arima_resfits[[i]]
  print(shapiro.test(residuals))
}

## [1] "AAPL _ARIMA_Model_(p,d,q)= 9 0 0"

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(9,0,0) with non-zero mean
## Q* = 4.1784, df = 3, p-value = 0.2428
## 
## Model df: 10.   Total lags used: 13
## 
## 
##  Shapiro-Wilk normality test
## 
## data:  residuals
## W = 0.91972, p-value = 5.548e-09
## 
## [1] "FB _ARIMA_Model_(p,d,q)= 7 0 0"

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(7,0,0) with non-zero mean
## Q* = 1.4316, df = 3, p-value = 0.6982
## 
## Model df: 8.   Total lags used: 11
## 
## 
##  Shapiro-Wilk normality test
## 
## data:  residuals
## W = 0.94633, p-value = 8.496e-07
## 
## [1] "GOOG _ARIMA_Model_(p,d,q)= 9 0 0"

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(9,0,0) with non-zero mean
## Q* = 4.9247, df = 3, p-value = 0.1774
## 
## Model df: 10.   Total lags used: 13
## 
## 
##  Shapiro-Wilk normality test
## 
## data:  residuals
## W = 0.91877, p-value = 4.735e-09
## 
## [1] "GE _ARIMA_Model_(p,d,q)= 4 0 7"

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(4,0,7) with non-zero mean
## Q* = 2.504, df = 3, p-value = 0.4746
## 
## Model df: 12.   Total lags used: 15
## 
## 
##  Shapiro-Wilk normality test
## 
## data:  residuals
## W = 0.92376, p-value = 1.109e-08
## 
## [1] "PG _ARIMA_Model_(p,d,q)= 4 0 3"

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(4,0,3) with non-zero mean
## Q* = 5.8278, df = 3, p-value = 0.1203
## 
## Model df: 8.   Total lags used: 11
## 
## 
##  Shapiro-Wilk normality test
## 
## data:  residuals
## W = 0.88032, p-value = 1.66e-11
## 
## [1] "AMZN _ARIMA_Model_(p,d,q)= 8 0 0"

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(8,0,0) with non-zero mean
## Q* = 4.6516, df = 3, p-value = 0.1992
## 
## Model df: 9.   Total lags used: 12
## 
## 
##  Shapiro-Wilk normality test
## 
## data:  residuals
## W = 0.93782, p-value = 1.488e-07
## 
## [1] "TSLA _ARIMA_Model_(p,d,q)= 0 0 0"

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(0,0,0) with non-zero mean
## Q* = 16.533, df = 9, p-value = 0.05655
## 
## Model df: 1.   Total lags used: 10
## 
## 
##  Shapiro-Wilk normality test
## 
## data:  residuals
## W = 0.83769, p-value = 1.128e-13

Part 1b: ARIMA-GARCH

Volatility Clustering

Inspect the ACF and apply the Ljung-Box test on the squared residuals from the ARIMA model to check for conditional heteroskedasticity.

for (stock in c(stockTickers))
{
  model <- arima_models[[stock]]
  squaredResiduals <- (model$residuals)^2
  title <- paste("ARIMA Squared Residuals For Stock:",stock)
  plot(ggAcf(squaredResiduals) + ggtitle(title)) 
  print(title)
  print(Box.test(squaredResiduals,type = "Ljung-Box"))
}

## [1] "ARIMA Squared Residuals For Stock: AAPL"
## 
##  Box-Ljung test
## 
## data:  squaredResiduals
## X-squared = 0.81273, df = 1, p-value = 0.3673

## [1] "ARIMA Squared Residuals For Stock: FB"
## 
##  Box-Ljung test
## 
## data:  squaredResiduals
## X-squared = 16.785, df = 1, p-value = 4.186e-05

## [1] "ARIMA Squared Residuals For Stock: GOOG"
## 
##  Box-Ljung test
## 
## data:  squaredResiduals
## X-squared = 1.8204, df = 1, p-value = 0.1773

## [1] "ARIMA Squared Residuals For Stock: GE"
## 
##  Box-Ljung test
## 
## data:  squaredResiduals
## X-squared = 17.457, df = 1, p-value = 2.939e-05

## [1] "ARIMA Squared Residuals For Stock: PG"
## 
##  Box-Ljung test
## 
## data:  squaredResiduals
## X-squared = 60.983, df = 1, p-value = 5.773e-15

## [1] "ARIMA Squared Residuals For Stock: AMZN"
## 
##  Box-Ljung test
## 
## data:  squaredResiduals
## X-squared = 3.1168, df = 1, p-value = 0.07749

## [1] "ARIMA Squared Residuals For Stock: TSLA"
## 
##  Box-Ljung test
## 
## data:  squaredResiduals
## X-squared = 2.414, df = 1, p-value = 0.1203

Fitting Models

Note: only with the simple case of GARCH order (1,1). If d>0, undifferencing is not applied in the end.

arima_garch_models <- list()
arima_garch_model_sums <- list()
arima_garch_titles <- list()
arima_garch_plots <- list()
arima_garch_resfits <- list()
arima_garch_aic <- list()
arima_garch_rmse <- list()

for (stock in c(stockTickers))
{
  p <- ARIMA_Orders[which(ARIMA_Orders$Stock == stock),2]
  d <- ARIMA_Orders[which(ARIMA_Orders$Stock == stock),3]
  q <- ARIMA_Orders[which(ARIMA_Orders$Stock == stock),4]
  
  returns <- ts(LNreturnsData[,stock])
  date <- as.Date(LNreturnsData$date, "%Y-%m-%d")
  train_date <- head(date,length(date)-50)
  test_date <- tail(date,50)

  if (d>0) 
  {
    returns <- diff(returns, lag = 1, differences = d)
    train_date <- tail(train_date,-d)
  }
  
  train <- ts(head(returns,length(returns)-50))
  test <- ts(tail(returns,50))

  spec <- ugarchspec(variance.model = list(model="sGARCH",garchOrder=c(1,1)),
                     mean.model = list(armaOrder=c(p,q)),
                     distribution.model = "std")
  model <- ugarchfit(spec=spec, data = train)
  fitted <- as.data.frame(fitted(model))
  train <- as.data.frame(cbind(train_date,train,fitted))
  train$train_date <- as.Date(train$train_date)
  colnames(train) <- c("date","train","fitted")  

  forecast <- ugarchroll(spec,data=returns,
                         forecast.length=50,
                         n.start=200,
                         refit.every=5, 
                         refit.window="moving")
  
  forecastMU <- as.data.frame(forecast@forecast[["density"]][["Mu"]])
  forecastSigma <- as.data.frame(forecast@forecast[["density"]][["Sigma"]])
  
  test <- cbind(as.data.frame(test_date),as.data.frame(test),forecastMU,forecastSigma)
  colnames(test) <- c("date","test","forecast","sigma")
  test$lwr <- test$forecast - 1.96*test$sigma
  test$upr <- test$forecast + 1.96*test$sigma
  row.names(test) <- (nrow(train)+1):nrow(returns)
  
  colors <- c("Train"="black","Fitted"="red","Test"="green4","Forecast"="blue")
  subtitle <- ""
  ylab <- "Log-Returns"
  if (d > 0) 
  {
    subtitle <- paste("Note: differencing of order",d,"applied and not reversed in plot")
    ylab <- "Differenced Log-Returns"
  }
  title <- paste("ARIMA-GARCH Model For Stock:", stock,"; (p,d,q)=", p,d,q)
  plot <- ggplot(train, aes(x = date, y = train,color="Train")) + 
    geom_line() +
    geom_line(aes(y=fitted,color = "Fitted")) +
    geom_line(data=test,aes(y=test, color = "Test")) +
    geom_line(data=test,aes(y=forecast, color = "Forecast")) +
    geom_ribbon(data=test,aes(x=date,ymin=lwr,ymax=upr),fill="grey",inherit.aes=FALSE,alpha=0.5) +
    coord_cartesian(xlim = c(max(date)-125,max(date)+10)) +
    scale_x_date(date_labels ="%Y/%m/%d") +
    scale_color_manual(values = colors) +
    labs(x = "Date",y = ylab,color = "Legend") +
    ggtitle(label = title) +
    labs(caption = paste("From:",start,"to",end,"(last 125 days shown)",paste("\n", subtitle)))
  
  arima_garch_models[[stock]] <- model
  arima_garch_model_sums[[stock]] <- model@fit[["matcoef"]]
  label <- paste(stock,"_ARIMA_GARCH")
  arima_garch_titles[[stock]] <- label
  arima_garch_plots[[stock]] <- plot
  label <- paste(stock,"_ARIMA_GARCH_Residuals",sep = "")
  arima_garch_resfits[[stock]] <- model@fit[["residuals"]]
  arima_garch_aic[[stock]] <- round(infocriteria(model)[1],4)
  arima_garch_rmse[[stock]] <- round(rmse(test$test, test$forecast),4)
}

Review and Visualise Models:

plots <- list()
for (stock in c(stockTickers))
{
  print(stock)
  model <- arima_garch_model_sums[[stock]]
  show(model)
}

## [1] "AAPL"
##             Estimate   Std. Error     t value     Pr(>|t|)
## mu      3.649621e-03 7.365424e-04  4.95507229 7.230340e-07
## ar1    -2.352217e-01 7.671176e-02 -3.06630585 2.167215e-03
## ar2    -2.071692e-01 7.783390e-02 -2.66168392 7.775085e-03
## ar3    -1.656015e-01 7.579359e-02 -2.18490076 2.889612e-02
## ar4     1.478404e-02 7.704716e-02  0.19188295 8.478339e-01
## ar5     1.732262e-02 7.521528e-02  0.23030723 8.178530e-01
## ar6     1.079708e-01 7.402707e-02  1.45853195 1.446940e-01
## ar7     8.828972e-02 7.429979e-02  1.18829035 2.347190e-01
## ar8     1.258571e-03 7.624262e-02  0.01650744 9.868296e-01
## ar9     8.768097e-02 7.153007e-02  1.22579176 2.202770e-01
## omega   2.450037e-05 2.924416e-05  0.83778682 4.021504e-01
## alpha1  2.337583e-01 1.287765e-01  1.81522418 6.948946e-02
## beta1   7.652392e-01 1.569318e-01  4.87625373 1.081195e-06
## shape   3.761687e+00 1.343719e+00  2.79945869 5.118837e-03
## [1] "FB"
##             Estimate   Std. Error    t value   Pr(>|t|)
## mu      1.103197e-03 1.060005e-03  1.0407470 0.29799298
## ar1    -1.100828e-01 8.148102e-02 -1.3510239 0.17668777
## ar2     7.269209e-02 7.695702e-02  0.9445803 0.34487316
## ar3    -1.408833e-02 7.692144e-02 -0.1831522 0.85467862
## ar4    -4.056923e-02 7.421605e-02 -0.5466369 0.58462825
## ar5    -3.475349e-02 7.106246e-02 -0.4890555 0.62480238
## ar6     4.739023e-02 7.493545e-02  0.6324141 0.52711634
## ar7     9.515701e-02 6.638571e-02  1.4333960 0.15174469
## omega   4.901197e-05 3.633246e-05  1.3489855 0.17734161
## alpha1  4.601461e-01 2.789960e-01  1.6492929 0.09908764
## beta1   4.692428e-01 2.838560e-01  1.6531016 0.09831020
## shape   3.261638e+01 1.178421e+02  0.2767804 0.78194874
## [1] "GOOG"
##             Estimate   Std. Error    t value     Pr(>|t|)
## mu      1.745419e-03 7.981142e-04  2.1869291 0.0287477032
## ar1    -1.144491e-01 7.382990e-02 -1.5501728 0.1211000457
## ar2    -8.131998e-02 7.209612e-02 -1.1279384 0.2593459302
## ar3    -4.538006e-02 7.119346e-02 -0.6374190 0.5238519788
## ar4    -6.434872e-02 7.357059e-02 -0.8746528 0.3817628574
## ar5    -2.209055e-02 7.488251e-02 -0.2950029 0.7679916956
## ar6     2.846210e-02 7.498092e-02  0.3795912 0.7042488606
## ar7     1.150059e-01 7.297966e-02  1.5758626 0.1150574659
## ar8    -2.009201e-02 6.466469e-02 -0.3107107 0.7560205737
## ar9     1.496309e-01 6.767711e-02  2.2109521 0.0270391523
## omega   2.591566e-05 2.783158e-05  0.9311600 0.3517707830
## alpha1  2.563903e-01 1.669178e-01  1.5360269 0.1245317725
## beta1   7.426097e-01 2.007569e-01  3.6990488 0.0002164090
## shape   3.188464e+00 9.023965e-01  3.5333298 0.0004103602
## [1] "GE"
##            Estimate   Std. Error      t value   Pr(>|t|)
## mu     -0.002117824 2.022519e-06 -1047.122080 0.00000000
## ar1    -0.062999743 5.438901e-05 -1158.317500 0.00000000
## ar2     0.600303162 2.412883e-04  2487.908535 0.00000000
## ar3     0.186027399 8.462096e-05  2198.360660 0.00000000
## ar4    -0.745290245 2.702428e-04 -2757.853935 0.00000000
## ma1    -0.207893770 1.253322e-04 -1658.741984 0.00000000
## ma2    -0.586410998 2.446813e-04 -2396.632225 0.00000000
## ma3     0.110922135 1.282175e-04   865.109062 0.00000000
## ma4     1.008121396 3.230569e-04  3120.569494 0.00000000
## ma5    -0.057857052 3.236202e-05 -1787.807037 0.00000000
## ma6     0.087868058 4.373392e-05  2009.151372 0.00000000
## ma7     0.099609033 4.782429e-05  2082.812419 0.00000000
## omega   0.000226115 2.016024e-04     1.121589 0.26203740
## alpha1  0.455903004 4.232426e-01     1.077167 0.28140561
## beta1   0.455371496 1.803986e-01     2.524252 0.01159448
## shape   3.092001859 1.670667e+00     1.850759 0.06420418
## [1] "PG"
##             Estimate   Std. Error      t value     Pr(>|t|)
## mu      1.341153e-04 6.444471e-07   208.109150 0.000000e+00
## ar1     7.421954e-01 1.261071e-04  5885.435988 0.000000e+00
## ar2     7.114814e-01 1.206413e-04  5897.492285 0.000000e+00
## ar3    -5.371457e-01 9.107051e-05 -5898.129292 0.000000e+00
## ar4     4.719911e-02 2.041247e-05  2312.268038 0.000000e+00
## ma1    -9.402452e-01 6.142530e-04 -1530.713228 0.000000e+00
## ma2    -7.027761e-01 4.020936e-04 -1747.792318 0.000000e+00
## ma3     6.218166e-01 1.882717e-04  3302.762500 0.000000e+00
## omega   8.385115e-06 1.182808e-05     0.708916 4.783766e-01
## alpha1  2.625797e-01 7.863126e-02     3.339380 8.396549e-04
## beta1   7.363399e-01 1.030964e-01     7.142244 9.181544e-13
## shape   5.078784e+00 7.305070e-01     6.952410 3.590905e-12
## [1] "AMZN"
##             Estimate   Std. Error    t value     Pr(>|t|)
## mu      7.633696e-04 8.270722e-04  0.9229782 3.560186e-01
## ar1    -9.423775e-02 7.816358e-02 -1.2056477 2.279534e-01
## ar2     1.392131e-01 7.430129e-02  1.8736299 6.098145e-02
## ar3     4.932394e-02 7.444118e-02  0.6625894 5.075936e-01
## ar4    -6.420778e-02 7.550703e-02 -0.8503550 3.951277e-01
## ar5    -1.266740e-02 7.153865e-02 -0.1770708 8.594528e-01
## ar6     5.212815e-02 7.263238e-02  0.7176985 4.729432e-01
## ar7     9.246147e-02 6.942511e-02  1.3318160 1.829207e-01
## ar8    -1.234066e-01 6.989609e-02 -1.7655730 7.746752e-02
## omega   1.111474e-05 8.156383e-06  1.3627045 1.729757e-01
## alpha1  1.994853e-01 7.735557e-02  2.5788094 9.914147e-03
## beta1   7.995145e-01 2.635150e-02 30.3403805 0.000000e+00
## shape   4.457273e+00 9.002633e-01  4.9510766 7.380405e-07
## [1] "TSLA"
##            Estimate   Std. Error  t value     Pr(>|t|)
## mu     6.157080e-03 0.0017476314 3.523100 4.265308e-04
## omega  8.486799e-05 0.0001538637 0.551579 5.812368e-01
## alpha1 1.113458e-01 0.0848657045 1.312023 1.895123e-01
## beta1  8.876542e-01 0.1493125393 5.944941 2.765565e-09
## shape  2.584444e+00 0.3988481236 6.479770 9.186230e-11

marrangeGrob(arima_garch_plots, nrow=1, ncol=1)

Inspect Residuals:

Check to see if residuals for each model look like like white noise and test for autocorrelation and normality.

plots <- list()
for (i in c(1:length(arima_garch_models)))
{
  title <- arima_garch_titles[[i]]
  model <- arima_garch_models[[i]]
  fitted <- model@fit[["fitted.values"]]
  residuals <- model@fit[["residuals"]]
  df <- as.data.frame(cbind(fitted,residuals))
  colnames(df) <- c("fitted", "residuals")
  plot <- ggplot(data = df, aes(x = fitted, y = residuals)) +
    geom_point(size = 1.5) +
    geom_smooth(aes(colour = fitted, fill = fitted)) +
    geom_hline(yintercept=0,color = "red") +
    scale_x_log10() +
    ggtitle(label=title)
  plots[[i]] <- plot
}

marrangeGrob(plots, nrow=1, ncol=1)

for (i in c(1:length(arima_garch_models)))
{
  title <- arima_garch_titles[[i]]
  print(title)
  residuals <- arima_garch_resfits[[i]]
  checkresiduals(residuals)
  print(shapiro.test(residuals))
}

## [1] "AAPL _ARIMA_GARCH"

## 
##  Shapiro-Wilk normality test
## 
## data:  residuals
## W = 0.81542, p-value = 1.187e-14
## 
## [1] "FB _ARIMA_GARCH"

## 
##  Shapiro-Wilk normality test
## 
## data:  residuals
## W = 0.86325, p-value = 1.983e-12
## 
## [1] "GOOG _ARIMA_GARCH"

## 
##  Shapiro-Wilk normality test
## 
## data:  residuals
## W = 0.82706, p-value = 3.755e-14
## 
## [1] "GE _ARIMA_GARCH"

## 
##  Shapiro-Wilk normality test
## 
## data:  residuals
## W = 0.85561, p-value = 8.121e-13
## 
## [1] "PG _ARIMA_GARCH"

## 
##  Shapiro-Wilk normality test
## 
## data:  residuals
## W = 0.77116, p-value = 2.273e-16
## 
## [1] "AMZN _ARIMA_GARCH"

## 
##  Shapiro-Wilk normality test
## 
## data:  residuals
## W = 0.91646, p-value = 3.227e-09
## 
## [1] "TSLA _ARIMA_GARCH"

## 
##  Shapiro-Wilk normality test
## 
## data:  residuals
## W = 0.83769, p-value = 1.128e-13

Part 2: Fama-French CAPM (Dynamic Multiple Regression)

Inspect Correlations

For each stock’s log returns, plot correlation matrix with Fama-French factors.

for (stock in c(stockTickers))
{
  print(paste("For Stock:",stock))
  temp_df <- as.data.frame(LNreturnsData[,c(stock,ff_factors,"rf")])
  temp_df[,stock] <- temp_df[,stock] - temp_df$rf
  temp_df <- temp_df[,1:4]
  plot(ggcorr(temp_df, 
              method = c("everything", "pearson"),
              label = TRUE,
              label_round= 3))
}

## [1] "For Stock: AAPL"

## [1] "For Stock: FB"

## [1] "For Stock: GOOG"

## [1] "For Stock: GE"

## [1] "For Stock: PG"

## [1] "For Stock: AMZN"

## [1] "For Stock: TSLA"

Fitting Models:

famafrench_models <- list()
famafrench_titles <- list()
famafrench_plots <- list()
famafrench_model_sums <- list()
famafrench_resfits <- list()
famafrench_aic <- list()
famafrench_rmse <- list()

rf <- log(LNreturnsData[,"rf"]+1)
factors <- LNreturnsData[,ff_factors]
date <- as.Date(LNreturnsData$date, "%Y-%m-%d") 
start <- min(date)
end <- max(date)
plot_midpoint <- max(date)

for (stock in c(stockTickers))
{
  r <- LNreturnsData[,stock]
  z <- r-rf
  df <- cbind.data.frame(date,z,factors)
  colnames(df) <- c("date","z","zm","smb","hml")
  train <- head(df,nrow(df)-50)
  test <- tail(df,50)
  
  model <- dyn$lm(z ~
                    lag(zm,-1) +
                    lag(zm,-2) +
                    lag(zm,-3) +
                    lag(zm,-4) +
                    lag(zm,-5) +
                    lag(smb,-1) +
                    lag(smb,-2) +
                    lag(smb,-3) +
                    lag(smb,-4) +
                    lag(smb,-5) +
                    lag(hml,-1) +
                    lag(hml,-2) +
                    lag(hml,-3) +
                    lag(hml,-4) +
                    lag(hml,-5),
                  data=ts(train))
  
  fitted <- as.data.frame(c(x <- rep(NA, 5),as.vector(fitted(model))))
  train <- cbind(train,fitted)
  colnames(train) <- c("date","z","zm","smb","hml","fitted")
  predictInt <- as.data.frame(predict(model,newdata = ts(tail(df,54)),interval = "prediction"))
  predictInt <- na.omit(predictInt)
  row.names(predictInt) <- (nrow(df)-49):nrow(df)
  test <- cbind(test,predictInt)
  test <- rename(test,predicted=fit)
  colors <- c("Train"="black","Fitted"="red","Test"="green4","Forecast"="blue")
  plot <- ggplot(train, aes(x = date, y = z,color="Train")) + 
    geom_line() +
    geom_line(aes(y=fitted,color = "Fitted")) +
    geom_line(data=test,aes(y=z, color = "Test")) +
    geom_line(data=test,aes(y=predicted, color = "Forecast")) +
    geom_ribbon(data=test,aes(ymin=lwr,ymax=upr),fill="grey",alpha=0.5) +
    coord_cartesian(xlim = c(max(date)-125,max(date)+10)) +
    scale_x_date(date_labels ="%m/%d/%Y") +
    scale_color_manual(values = colors) +
    labs(x = "Date",y = "Log Return",color = "Legend") +
    ggtitle(label = paste("Fama-French model for Stock:",stock)) +
    labs(caption = paste("From:",start,"to",end,"(last 125 days shown)"))
 
  famafrench_models[[stock]] <- model
  label <- paste(stock,"_FamaFrench")
  famafrench_titles[[stock]] <- label
  famafrench_plots[[label]] <- plot
  famafrench_model_sums[[label]] <- summary(model)
  label <- paste(stock,"_FamaFrench_Residuals",sep = "")
  famafrench_resfits[[label]] <- residuals(model)
  label <- stock
  famafrench_aic[[label]] <- round(AIC(model),4)
  famafrench_rmse[[label]] <- round(rmse(test$z, predict(model, test)),4)
}

Review and Visualise Models:

print(famafrench_model_sums)

## $`AAPL _FamaFrench`
## 
## Call:
## lm(formula = dyn(z ~ lag(zm, -1) + lag(zm, -2) + lag(zm, -3) + 
##     lag(zm, -4) + lag(zm, -5) + lag(smb, -1) + lag(smb, -2) + 
##     lag(smb, -3) + lag(smb, -4) + lag(smb, -5) + lag(hml, -1) + 
##     lag(hml, -2) + lag(hml, -3) + lag(hml, -4) + lag(hml, -5)), 
##     data = ts(train))
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.096531 -0.008097  0.001547  0.011401  0.083056 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -0.0051758  0.0016507  -3.136  0.00201 ** 
## lag(zm, -1)  -0.0046312  0.0011841  -3.911  0.00013 ***
## lag(zm, -2)   0.0002498  0.0012688   0.197  0.84414    
## lag(zm, -3)   0.0017815  0.0013077   1.362  0.17481    
## lag(zm, -4)  -0.0005875  0.0013315  -0.441  0.65960    
## lag(zm, -5)   0.0001734  0.0013954   0.124  0.90126    
## lag(smb, -1)  0.0082301  0.0032226   2.554  0.01149 *  
## lag(smb, -2)  0.0018676  0.0033225   0.562  0.57474    
## lag(smb, -3)  0.0008159  0.0033238   0.245  0.80636    
## lag(smb, -4) -0.0011517  0.0033251  -0.346  0.72947    
## lag(smb, -5)  0.0005752  0.0033121   0.174  0.86232    
## lag(hml, -1) -0.0020547  0.0023763  -0.865  0.38837    
## lag(hml, -2)  0.0001039  0.0024554   0.042  0.96629    
## lag(hml, -3)  0.0040521  0.0024486   1.655  0.09970 .  
## lag(hml, -4) -0.0034198  0.0026233  -1.304  0.19404    
## lag(hml, -5)  0.0038567  0.0026290   1.467  0.14414    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.02164 on 179 degrees of freedom
##   (10 observations deleted due to missingness)
## Multiple R-squared:  0.2798, Adjusted R-squared:  0.2195 
## F-statistic: 4.637 on 15 and 179 DF,  p-value: 1.82e-07
## 
## 
## $`FB _FamaFrench`
## 
## Call:
## lm(formula = dyn(z ~ lag(zm, -1) + lag(zm, -2) + lag(zm, -3) + 
##     lag(zm, -4) + lag(zm, -5) + lag(smb, -1) + lag(smb, -2) + 
##     lag(smb, -3) + lag(smb, -4) + lag(smb, -5) + lag(hml, -1) + 
##     lag(hml, -2) + lag(hml, -3) + lag(hml, -4) + lag(hml, -5)), 
##     data = ts(train))
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.084136 -0.009460  0.001651  0.012612  0.045797 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -7.479e-03  1.468e-03  -5.094 8.83e-07 ***
## lag(zm, -1)  -5.073e-03  1.053e-03  -4.817 3.10e-06 ***
## lag(zm, -2)   2.245e-03  1.128e-03   1.990  0.04814 *  
## lag(zm, -3)   3.158e-03  1.163e-03   2.715  0.00728 ** 
## lag(zm, -4)  -2.508e-04  1.184e-03  -0.212  0.83251    
## lag(zm, -5)   3.035e-04  1.241e-03   0.245  0.80710    
## lag(smb, -1)  5.008e-03  2.866e-03   1.747  0.08230 .  
## lag(smb, -2)  4.509e-03  2.955e-03   1.526  0.12880    
## lag(smb, -3) -2.946e-03  2.956e-03  -0.997  0.32030    
## lag(smb, -4) -1.128e-03  2.957e-03  -0.382  0.70327    
## lag(smb, -5) -1.951e-05  2.946e-03  -0.007  0.99472    
## lag(hml, -1) -2.602e-03  2.113e-03  -1.231  0.21980    
## lag(hml, -2) -2.790e-03  2.184e-03  -1.278  0.20301    
## lag(hml, -3)  5.396e-03  2.178e-03   2.478  0.01415 *  
## lag(hml, -4) -9.101e-04  2.333e-03  -0.390  0.69695    
## lag(hml, -5)  3.625e-03  2.338e-03   1.550  0.12281    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.01925 on 179 degrees of freedom
##   (10 observations deleted due to missingness)
## Multiple R-squared:  0.3581, Adjusted R-squared:  0.3043 
## F-statistic: 6.657 on 15 and 179 DF,  p-value: 2.877e-11
## 
## 
## $`GOOG _FamaFrench`
## 
## Call:
## lm(formula = dyn(z ~ lag(zm, -1) + lag(zm, -2) + lag(zm, -3) + 
##     lag(zm, -4) + lag(zm, -5) + lag(smb, -1) + lag(smb, -2) + 
##     lag(smb, -3) + lag(smb, -4) + lag(smb, -5) + lag(hml, -1) + 
##     lag(hml, -2) + lag(hml, -3) + lag(hml, -4) + lag(hml, -5)), 
##     data = ts(train))
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.066802 -0.007592  0.001034  0.008129  0.083094 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -6.022e-03  1.360e-03  -4.426 1.66e-05 ***
## lag(zm, -1)  -4.703e-03  9.759e-04  -4.819 3.07e-06 ***
## lag(zm, -2)   1.701e-03  1.046e-03   1.626  0.10566    
## lag(zm, -3)   2.060e-03  1.078e-03   1.911  0.05755 .  
## lag(zm, -4)  -4.265e-04  1.097e-03  -0.389  0.69801    
## lag(zm, -5)  -1.105e-03  1.150e-03  -0.961  0.33787    
## lag(smb, -1)  4.582e-03  2.656e-03   1.725  0.08627 .  
## lag(smb, -2)  3.406e-03  2.738e-03   1.244  0.21516    
## lag(smb, -3)  1.194e-03  2.739e-03   0.436  0.66339    
## lag(smb, -4) -5.089e-05  2.741e-03  -0.019  0.98521    
## lag(smb, -5)  7.943e-04  2.730e-03   0.291  0.77140    
## lag(hml, -1) -1.382e-03  1.959e-03  -0.706  0.48121    
## lag(hml, -2)  1.144e-04  2.024e-03   0.057  0.95500    
## lag(hml, -3)  5.920e-03  2.018e-03   2.934  0.00379 ** 
## lag(hml, -4) -1.808e-03  2.162e-03  -0.836  0.40406    
## lag(hml, -5)  2.289e-03  2.167e-03   1.057  0.29216    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.01784 on 179 degrees of freedom
##   (10 observations deleted due to missingness)
## Multiple R-squared:  0.3339, Adjusted R-squared:  0.278 
## F-statistic: 5.981 on 15 and 179 DF,  p-value: 5.095e-10
## 
## 
## $`GE _FamaFrench`
## 
## Call:
## lm(formula = dyn(z ~ lag(zm, -1) + lag(zm, -2) + lag(zm, -3) + 
##     lag(zm, -4) + lag(zm, -5) + lag(smb, -1) + lag(smb, -2) + 
##     lag(smb, -3) + lag(smb, -4) + lag(smb, -5) + lag(hml, -1) + 
##     lag(hml, -2) + lag(hml, -3) + lag(hml, -4) + lag(hml, -5)), 
##     data = ts(train))
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.138256 -0.011334 -0.000839  0.010854  0.109396 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)   
## (Intercept)  -0.0074231  0.0023828  -3.115  0.00214 **
## lag(zm, -1)  -0.0052737  0.0017093  -3.085  0.00236 **
## lag(zm, -2)   0.0013234  0.0018316   0.723  0.47089   
## lag(zm, -3)   0.0016665  0.0018877   0.883  0.37851   
## lag(zm, -4)   0.0010255  0.0019221   0.534  0.59434   
## lag(zm, -5)   0.0005717  0.0020143   0.284  0.77689   
## lag(smb, -1)  0.0033377  0.0046520   0.717  0.47401   
## lag(smb, -2)  0.0043733  0.0047961   0.912  0.36308   
## lag(smb, -3)  0.0012808  0.0047980   0.267  0.78981   
## lag(smb, -4) -0.0008123  0.0048000  -0.169  0.86580   
## lag(smb, -5) -0.0032222  0.0047811  -0.674  0.50122   
## lag(hml, -1)  0.0004706  0.0034303   0.137  0.89103   
## lag(hml, -2)  0.0007205  0.0035444   0.203  0.83915   
## lag(hml, -3)  0.0063964  0.0035346   1.810  0.07203 . 
## lag(hml, -4) -0.0011070  0.0037868  -0.292  0.77038   
## lag(hml, -5)  0.0080490  0.0037950   2.121  0.03531 * 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.03124 on 179 degrees of freedom
##   (10 observations deleted due to missingness)
## Multiple R-squared:  0.1871, Adjusted R-squared:  0.119 
## F-statistic: 2.747 on 15 and 179 DF,  p-value: 0.0007871
## 
## 
## $`PG _FamaFrench`
## 
## Call:
## lm(formula = dyn(z ~ lag(zm, -1) + lag(zm, -2) + lag(zm, -3) + 
##     lag(zm, -4) + lag(zm, -5) + lag(smb, -1) + lag(smb, -2) + 
##     lag(smb, -3) + lag(smb, -4) + lag(smb, -5) + lag(hml, -1) + 
##     lag(hml, -2) + lag(hml, -3) + lag(hml, -4) + lag(hml, -5)), 
##     data = ts(train))
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.089711 -0.004965  0.000685  0.008010  0.051285 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -0.0065327  0.0012147  -5.378 2.33e-07 ***
## lag(zm, -1)  -0.0040107  0.0008713  -4.603 7.87e-06 ***
## lag(zm, -2)   0.0012282  0.0009337   1.315  0.19006    
## lag(zm, -3)   0.0009051  0.0009623   0.941  0.34820    
## lag(zm, -4)  -0.0013258  0.0009798  -1.353  0.17775    
## lag(zm, -5)  -0.0004770  0.0010269  -0.464  0.64286    
## lag(smb, -1)  0.0035648  0.0023714   1.503  0.13454    
## lag(smb, -2)  0.0023225  0.0024449   0.950  0.34344    
## lag(smb, -3) -0.0014090  0.0024459  -0.576  0.56529    
## lag(smb, -4) -0.0017078  0.0024469  -0.698  0.48611    
## lag(smb, -5)  0.0001877  0.0024373   0.077  0.93870    
## lag(hml, -1) -0.0020262  0.0017487  -1.159  0.24811    
## lag(hml, -2)  0.0029394  0.0018069   1.627  0.10553    
## lag(hml, -3)  0.0054876  0.0018019   3.046  0.00267 ** 
## lag(hml, -4) -0.0026290  0.0019304  -1.362  0.17495    
## lag(hml, -5) -0.0016634  0.0019346  -0.860  0.39104    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.01592 on 179 degrees of freedom
##   (10 observations deleted due to missingness)
## Multiple R-squared:  0.3452, Adjusted R-squared:  0.2903 
## F-statistic:  6.29 on 15 and 179 DF,  p-value: 1.357e-10
## 
## 
## $`AMZN _FamaFrench`
## 
## Call:
## lm(formula = dyn(z ~ lag(zm, -1) + lag(zm, -2) + lag(zm, -3) + 
##     lag(zm, -4) + lag(zm, -5) + lag(smb, -1) + lag(smb, -2) + 
##     lag(smb, -3) + lag(smb, -4) + lag(smb, -5) + lag(hml, -1) + 
##     lag(hml, -2) + lag(hml, -3) + lag(hml, -4) + lag(hml, -5)), 
##     data = ts(train))
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.074965 -0.006987  0.000583  0.008321  0.073823 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -7.213e-03  1.254e-03  -5.752 3.76e-08 ***
## lag(zm, -1)  -3.143e-03  8.996e-04  -3.494 0.000599 ***
## lag(zm, -2)   1.273e-03  9.640e-04   1.321 0.188315    
## lag(zm, -3)   1.355e-03  9.935e-04   1.364 0.174202    
## lag(zm, -4)  -1.149e-03  1.012e-03  -1.136 0.257440    
## lag(zm, -5)  -6.959e-04  1.060e-03  -0.656 0.512399    
## lag(smb, -1)  2.445e-03  2.448e-03   0.999 0.319320    
## lag(smb, -2)  9.119e-05  2.524e-03   0.036 0.971223    
## lag(smb, -3) -1.506e-03  2.525e-03  -0.596 0.551606    
## lag(smb, -4) -1.998e-03  2.526e-03  -0.791 0.430070    
## lag(smb, -5) -7.390e-04  2.516e-03  -0.294 0.769343    
## lag(hml, -1) -7.241e-04  1.805e-03  -0.401 0.688841    
## lag(hml, -2)  2.148e-04  1.865e-03   0.115 0.908456    
## lag(hml, -3)  4.911e-03  1.860e-03   2.640 0.009031 ** 
## lag(hml, -4) -1.356e-03  1.993e-03  -0.680 0.497276    
## lag(hml, -5) -8.584e-05  1.997e-03  -0.043 0.965769    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.01644 on 179 degrees of freedom
##   (10 observations deleted due to missingness)
## Multiple R-squared:  0.223,  Adjusted R-squared:  0.1578 
## F-statistic: 3.424 on 15 and 179 DF,  p-value: 4.044e-05
## 
## 
## $`TSLA _FamaFrench`
## 
## Call:
## lm(formula = dyn(z ~ lag(zm, -1) + lag(zm, -2) + lag(zm, -3) + 
##     lag(zm, -4) + lag(zm, -5) + lag(smb, -1) + lag(smb, -2) + 
##     lag(smb, -3) + lag(smb, -4) + lag(smb, -5) + lag(hml, -1) + 
##     lag(hml, -2) + lag(hml, -3) + lag(hml, -4) + lag(hml, -5)), 
##     data = ts(train))
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.163220 -0.018067  0.000805  0.018476  0.172205 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)
## (Intercept)  -0.0017733  0.0033424  -0.531    0.596
## lag(zm, -1)   0.0001504  0.0023977   0.063    0.950
## lag(zm, -2)   0.0042193  0.0025692   1.642    0.102
## lag(zm, -3)   0.0034362  0.0026480   1.298    0.196
## lag(zm, -4)   0.0040320  0.0026962   1.495    0.137
## lag(zm, -5)   0.0017572  0.0028256   0.622    0.535
## lag(smb, -1) -0.0016971  0.0065255  -0.260    0.795
## lag(smb, -2) -0.0046190  0.0067277  -0.687    0.493
## lag(smb, -3)  0.0109193  0.0067304   1.622    0.106
## lag(smb, -4)  0.0078797  0.0067331   1.170    0.243
## lag(smb, -5)  0.0097444  0.0067067   1.453    0.148
## lag(hml, -1) -0.0051803  0.0048118  -1.077    0.283
## lag(hml, -2)  0.0048022  0.0049719   0.966    0.335
## lag(hml, -3)  0.0054301  0.0049582   1.095    0.275
## lag(hml, -4) -0.0063700  0.0053119  -1.199    0.232
## lag(hml, -5)  0.0072876  0.0053235   1.369    0.173
## 
## Residual standard error: 0.04382 on 179 degrees of freedom
##   (10 observations deleted due to missingness)
## Multiple R-squared:  0.1728, Adjusted R-squared:  0.1034 
## F-statistic: 2.492 on 15 and 179 DF,  p-value: 0.002338

marrangeGrob(famafrench_plots, nrow=1, ncol=1)

Inspect Residuals:

Check to see if residuals for each model look like like white noise and test for autocorrelation and normality.

plots <- list()
for (i in c(1:length(famafrench_models)))
{
  title <- famafrench_titles[[i]]
  model <- famafrench_models[[i]]
  df <- as.data.frame(cbind(fitted(model),model$residuals))
  colnames(df) <- c("fitted", "residuals")
  plot <- ggplot(data = df, aes(x = fitted, y = residuals)) +
    geom_point(size = 1.5) +
    geom_smooth(aes(colour = fitted, fill = fitted)) +
    geom_hline(yintercept=0,color = "red") +
    scale_x_log10() +
    ggtitle(label=title)
  plots[[i]] <- plot
}

marrangeGrob(plots, nrow=1, ncol=1)

for (i in c(1:length(famafrench_models)))
{
  title <- famafrench_titles[[i]]
  print(title)
  model <- famafrench_models[[i]]
  checkresiduals(model)
  residuals <- famafrench_resfits[[i]]
  print(shapiro.test(residuals))
}

## [1] "AAPL _FamaFrench"

## 
##  Shapiro-Wilk normality test
## 
## data:  residuals
## W = 0.92188, p-value = 1.12e-08
## 
## [1] "FB _FamaFrench"

## 
##  Shapiro-Wilk normality test
## 
## data:  residuals
## W = 0.96406, p-value = 6.999e-05
## 
## [1] "GOOG _FamaFrench"

## 
##  Shapiro-Wilk normality test
## 
## data:  residuals
## W = 0.93153, p-value = 6.136e-08
## 
## [1] "GE _FamaFrench"

## 
##  Shapiro-Wilk normality test
## 
## data:  residuals
## W = 0.89841, p-value = 2.897e-10
## 
## [1] "PG _FamaFrench"

## 
##  Shapiro-Wilk normality test
## 
## data:  residuals
## W = 0.90096, p-value = 4.192e-10
## 
## [1] "AMZN _FamaFrench"

## 
##  Shapiro-Wilk normality test
## 
## data:  residuals
## W = 0.92828, p-value = 3.411e-08
## 
## [1] "TSLA _FamaFrench"

## 
##  Shapiro-Wilk normality test
## 
## data:  residuals
## W = 0.9139, p-value = 3.016e-09

Part 3a: ARIMAX (Adding Fama-French Factors to ARIMA Model)

Fitting Models:

arimax_models <- list()
arimax_titles <- list()
arimax_plots <- list()
arimax_resfits <- list()
arimax_aic <- list()
arimax_rmse <- list()

r <- LNreturnsData[,stockTickers]
rf <- LNreturnsData[,"rf"]
z <- as.data.frame(r-rf)
xregs <- as.matrix(LNreturnsData[,ff_factors])
xregs_train <- head(xregs,nrow(xregs)-50)
xregs_test <- tail(xregs,50)
row.names(xregs_test) <- (nrow(xregs)-49):nrow(xregs)
date <- as.Date(LNreturnsData$date, "%Y-%m-%d") 
start <- min(date)
end <- max(date)
plot_midpoint <- nrow(LNreturnsData)

for (stock in c(stockTickers))
{
  p <- ARIMA_Orders[which(ARIMA_Orders$Stock == stock),2]
  d <- ARIMA_Orders[which(ARIMA_Orders$Stock == stock),3]
  q <- ARIMA_Orders[which(ARIMA_Orders$Stock == stock),4]
  title <- paste("ARIMAX Model For Stock:", stock,"; (p,d,q)=", p,d,q)
  log_returns <- ts(z[,stock])
  train <- head(log_returns,length(log_returns)-50)
  test <- tail(log_returns,50)
  model <- Arima(train, order=c(p,d,q),xreg = xregs_train,method="ML")
  forecast <- forecast(model, h = 50,xreg = xregs_test)
  plot <- autoplot(forecast,main = title,xlab = "Days",ylab = "Log Returns") +
    autolayer(train, series = 'Train') +
    autolayer(fitted(model), series = 'Fitted') +
    autolayer(forecast$mean, series = 'Forecast') +
    autolayer(test, series = 'Test') +
    coord_cartesian(xlim = c(plot_midpoint-125,plot_midpoint+10)) +
    labs(caption = paste("From:",start,"to",end,"(last 125 days shown)"))
  
  arimax_models[[stock]] <- model
  label <- paste(stock,"_ARIMAX_Model_(p,d,q)=", p,d,q)
  arimax_titles[[stock]] <- label
  arimax_plots[[label]] <- plot
  label <- paste(stock,"_ARIMAX_Model_Residuals",sep = "")
  arimax_resfits[[label]] <- residuals(model)
  label <- stock
  arimax_aic[[label]] <- round(AIC(model),4)
  arimax_rmse[[label]] <- round(rmse(test, forecast$mean),4)
}

Review and Visualise Models:

print(arimax_models)

## $AAPL
## Series: train 
## Regression with ARIMA(9,0,0) errors 
## 
## Coefficients:
##          ar1      ar2      ar3     ar4     ar5     ar6     ar7     ar8     ar9
##       0.0186  -0.1638  -0.1269  0.0792  0.0144  0.0101  0.0164  0.0012  0.0169
## s.e.  0.0722   0.0749   0.0743  0.0750  0.0758  0.0762  0.0741  0.0741  0.0746
##       intercept      zm      smb      hml
##         -0.0054  0.0119  -0.0050  -0.0018
## s.e.     0.0006  0.0004   0.0012   0.0010
## 
## sigma^2 estimated as 9.69e-05:  log likelihood=647.04
## AIC=-1266.09   AICc=-1263.82   BIC=-1219.91
## 
## $FB
## Series: train 
## Regression with ARIMA(7,0,0) errors 
## 
## Coefficients:
##           ar1      ar2      ar3      ar4      ar5     ar6      ar7  intercept
##       -0.0759  -0.1005  -0.1503  -0.0825  -0.0500  0.0513  -0.0589    -0.0077
## s.e.   0.0724   0.0756   0.0789   0.0765   0.0768  0.0783   0.0766     0.0006
##           zm     smb      hml
##       0.0113  0.0006  -0.0051
## s.e.  0.0005  0.0015   0.0012
## 
## sigma^2 estimated as 0.0001496:  log likelihood=602.55
## AIC=-1181.11   AICc=-1179.44   BIC=-1141.53
## 
## $GOOG
## Series: train 
## Regression with ARIMA(9,0,0) errors 
## 
## Coefficients:
##           ar1      ar2      ar3      ar4      ar5      ar6      ar7     ar8
##       -0.0945  -0.1786  -0.0044  -0.0578  -0.0894  -0.0431  -0.0250  0.0399
## s.e.   0.0720   0.0789   0.0768   0.0748   0.0744   0.0743   0.0766  0.0733
##          ar9  intercept      zm      smb      hml
##       0.0745    -0.0067  0.0099  -0.0012  -0.0021
## s.e.  0.0731     0.0005  0.0004   0.0013   0.0011
## 
## sigma^2 estimated as 0.0001062:  log likelihood=637.89
## AIC=-1247.79   AICc=-1245.52   BIC=-1201.61
## 
## $GE
## Series: train 
## Regression with ARIMA(4,0,7) errors 
## 
## Coefficients:
##          ar1     ar2      ar3     ar4      ma1      ma2     ma3     ma4
##       0.0505  0.2695  -0.3801  -0.512  -0.1727  -0.2452  0.4001  0.5801
## s.e.     NaN     NaN   0.2405     NaN      NaN      NaN     NaN     NaN
##           ma5     ma6     ma7  intercept      zm     smb     hml
##       -0.0228  0.0546  0.0441    -0.0071  0.0122  0.0047  0.0056
## s.e.      NaN     NaN     NaN     0.0015  0.0008  0.0025  0.0022
## 
## sigma^2 estimated as 0.000462:  log likelihood=491.46
## AIC=-950.93   AICc=-947.96   BIC=-898.16
## 
## $PG
## Series: train 
## Regression with ARIMA(4,0,3) errors 
## 
## Coefficients:
##          ar1     ar2      ar3     ar4      ma1      ma2     ma3  intercept
##       0.5738  0.4030  -0.7824  0.1069  -0.3860  -0.5248  0.4852    -0.0071
## s.e.  0.3931  0.1962   0.3106  0.1622   0.3854   0.1835  0.3751     0.0006
##           zm      smb      hml
##       0.0081  -0.0061  -0.0026
## s.e.  0.0004   0.0012   0.0010
## 
## sigma^2 estimated as 0.0001109:  log likelihood=632.29
## AIC=-1240.57   AICc=-1238.9   BIC=-1200.99
## 
## $AMZN
## Series: train 
## Regression with ARIMA(8,0,0) errors 
## 
## Coefficients:
##          ar1     ar2      ar3      ar4     ar5     ar6      ar7      ar8
##       0.0692  0.1293  -0.0523  -0.0130  0.0787  0.1269  -0.0183  -0.0789
## s.e.  0.0716  0.0729   0.0760   0.0743  0.0771  0.0761   0.0757   0.0766
##       intercept     zm      smb      hml
##         -0.0072  8e-03  -0.0036  -0.0039
## s.e.     0.0010  4e-04   0.0013   0.0011
## 
## sigma^2 estimated as 0.0001172:  log likelihood=627.46
## AIC=-1228.92   AICc=-1226.97   BIC=-1186.05
## 
## $TSLA
## Series: train 
## Regression with ARIMA(0,0,0) errors 
## 
## Coefficients:
##       intercept      zm     smb      hml
##         -0.0023  0.0137  0.0120  -0.0036
## s.e.     0.0028  0.0016  0.0047   0.0038
## 
## sigma^2 estimated as 0.001513:  log likelihood=367.57
## AIC=-725.14   AICc=-724.83   BIC=-708.65

marrangeGrob(arimax_plots, nrow=1, ncol=1)

Inspect Residuals:

Check to see if residuals for each model look like like white noise and test for autocorrelation and normality

plots <- list()
for (i in c(1:length(arimax_models)))
{
  title <- arimax_titles[[i]]
  model <- arimax_models[[i]]
  df <- as.data.frame(cbind(fitted(model),residuals(model)))
  colnames(df) <- c("fitted", "residuals")
  plot <- ggplot(data = df, aes(x = fitted, y = residuals)) +
    geom_point(size = 1.5) +
    geom_smooth(aes(colour = fitted, fill = fitted)) +
    geom_hline(yintercept=0,color = "red") +
    scale_x_log10() +
    ggtitle(label=title)
  plots[[i]] <- plot
}

marrangeGrob(plots, nrow=1, ncol=1)

for (i in c(1:length(arimax_models)))
{
  title <- arimax_titles[[i]]
  print(title)
  model <- arimax_models[[i]]
  checkresiduals(model)
  residuals <- arimax_resfits[[i]]
  print(shapiro.test(residuals))
}

## [1] "AAPL _ARIMAX_Model_(p,d,q)= 9 0 0"

## 
##  Ljung-Box test
## 
## data:  Residuals from Regression with ARIMA(9,0,0) errors
## Q* = 2.3656, df = 3, p-value = 0.5001
## 
## Model df: 13.   Total lags used: 16
## 
## 
##  Shapiro-Wilk normality test
## 
## data:  residuals
## W = 0.99483, p-value = 0.7244
## 
## [1] "FB _ARIMAX_Model_(p,d,q)= 7 0 0"

## 
##  Ljung-Box test
## 
## data:  Residuals from Regression with ARIMA(7,0,0) errors
## Q* = 5.736, df = 3, p-value = 0.1252
## 
## Model df: 11.   Total lags used: 14
## 
## 
##  Shapiro-Wilk normality test
## 
## data:  residuals
## W = 0.95826, p-value = 1.287e-05
## 
## [1] "GOOG _ARIMAX_Model_(p,d,q)= 9 0 0"

## 
##  Ljung-Box test
## 
## data:  Residuals from Regression with ARIMA(9,0,0) errors
## Q* = 5.6505, df = 3, p-value = 0.1299
## 
## Model df: 13.   Total lags used: 16
## 
## 
##  Shapiro-Wilk normality test
## 
## data:  residuals
## W = 0.78345, p-value = 6.423e-16
## 
## [1] "GE _ARIMAX_Model_(p,d,q)= 4 0 7"

## 
##  Ljung-Box test
## 
## data:  Residuals from Regression with ARIMA(4,0,7) errors
## Q* = 1.2632, df = 3, p-value = 0.7379
## 
## Model df: 15.   Total lags used: 18
## 
## 
##  Shapiro-Wilk normality test
## 
## data:  residuals
## W = 0.84548, p-value = 2.607e-13
## 
## [1] "PG _ARIMAX_Model_(p,d,q)= 4 0 3"

## 
##  Ljung-Box test
## 
## data:  Residuals from Regression with ARIMA(4,0,3) errors
## Q* = 12.371, df = 3, p-value = 0.006215
## 
## Model df: 11.   Total lags used: 14
## 
## 
##  Shapiro-Wilk normality test
## 
## data:  residuals
## W = 0.96221, p-value = 3.456e-05
## 
## [1] "AMZN _ARIMAX_Model_(p,d,q)= 8 0 0"

## 
##  Ljung-Box test
## 
## data:  Residuals from Regression with ARIMA(8,0,0) errors
## Q* = 4.6201, df = 3, p-value = 0.2018
## 
## Model df: 12.   Total lags used: 15
## 
## 
##  Shapiro-Wilk normality test
## 
## data:  residuals
## W = 0.80593, p-value = 4.822e-15
## 
## [1] "TSLA _ARIMAX_Model_(p,d,q)= 0 0 0"

## 
##  Ljung-Box test
## 
## data:  Residuals from Regression with ARIMA(0,0,0) errors
## Q* = 10.185, df = 6, p-value = 0.1171
## 
## Model df: 4.   Total lags used: 10
## 
## 
##  Shapiro-Wilk normality test
## 
## data:  residuals
## W = 0.88076, p-value = 1.758e-11

Part 3b: SARIMAX (Using auto.arima)

Fitting Models:

Using the auto.arima function to automatically select the optimal orders as well as seasonal components.

auto_sarimax_models <- list()
auto_sarimax_titles <- list()
auto_sarimax_plots <- list()
auto_sarimax_resfits <- list()
auto_sarimax_aic <- list()
auto_sarimax_rmse <- list()

r <- LNreturnsData[,stockTickers]
rf <- LNreturnsData[,"rf"]
z <- as.data.frame(r-rf)
xregs <- as.matrix(LNreturnsData[,ff_factors])
xregs_train <- head(xregs,nrow(xregs)-50)
xregs_test <- tail(xregs,50)
row.names(xregs_test) <- (nrow(xregs)-49):nrow(xregs)
date <- as.Date(LNreturnsData$date, "%Y-%m-%d") 
start <- min(date)
end <- max(date)
plot_midpoint <- nrow(LNreturnsData)/5

for (stock in c(stockTickers))
{
  title <- paste("SARIMAX Model For Stock:", stock)
  log_returns <- ts(z[,stock],frequency = 5)
  train <- head(log_returns,length(log_returns)-50)
  test <- tail(log_returns,50)
  model <- auto.arima(train,xreg = xregs_train)
  forecast <- forecast(model, h = 50,xreg = xregs_test)
  plot <- autoplot(forecast,main = title,xlab = "Weeks",ylab = "Log Returns") +
    autolayer(train, series = 'Train') +
    autolayer(fitted(model), series = 'Fitted') +
    autolayer(forecast$mean, series = 'Forecast') +
    autolayer(test, series = 'Test') +
    coord_cartesian(xlim = c(plot_midpoint-25,plot_midpoint+1)) +
    labs(caption = paste("From:",start,"to",end,"(last 25 weeks shown)"))

  auto_sarimax_models[[stock]] <- model
  label <- paste(stock,"_Auto_SARIMAX_Model=")
  auto_sarimax_titles[[stock]] <- label
  auto_sarimax_plots[[label]] <- plot
  label <- paste(stock,"_Auto_SARIMAX_Model_Residuals",sep = "")
  auto_sarimax_resfits[[label]] <- residuals(model)
  label <- stock
  auto_sarimax_aic[[label]] <- round(AIC(model),4)
  auto_sarimax_rmse[[label]] <- round(rmse(test, forecast$mean),4)
}

Review and Visualise Models:

print(auto_sarimax_models)

## $AAPL
## Series: train 
## Regression with ARIMA(0,0,0) errors 
## 
## Coefficients:
##       intercept      zm      smb     hml
##         -0.0054  0.0121  -0.0045  -0.002
## s.e.     0.0007  0.0004   0.0012   0.001
## 
## sigma^2 estimated as 9.772e-05:  log likelihood=641.57
## AIC=-1273.14   AICc=-1272.83   BIC=-1256.65
## 
## $FB
## Series: train 
## Regression with ARIMA(0,0,0)(2,0,0)[5] errors 
## 
## Coefficients:
##          sar1    sar2  intercept      zm     smb      hml
##       -0.0170  0.0339    -0.0078  0.0111  0.0004  -0.0054
## s.e.   0.0764  0.0794     0.0009  0.0005  0.0015   0.0012
## 
## sigma^2 estimated as 0.0001517:  log likelihood=598.63
## AIC=-1183.27   AICc=-1182.68   BIC=-1160.18
## 
## $GOOG
## Series: train 
## Regression with ARIMA(0,0,2)(1,0,0)[5] errors 
## 
## Coefficients:
##           ma1      ma2     sar1  intercept      zm      smb      hml
##       -0.0897  -0.1831  -0.0864    -0.0067  0.0099  -0.0010  -0.0021
## s.e.   0.0721   0.0804   0.0735     0.0005  0.0004   0.0013   0.0010
## 
## sigma^2 estimated as 0.0001039:  log likelihood=636.94
## AIC=-1257.89   AICc=-1257.14   BIC=-1231.5
## 
## $GE
## Series: train 
## Regression with ARIMA(0,0,0) errors 
## 
## Coefficients:
##       intercept      zm     smb     hml
##         -0.0072  0.0122  0.0041  0.0054
## s.e.     0.0015  0.0009  0.0026  0.0021
## 
## sigma^2 estimated as 0.0004645:  log likelihood=485.69
## AIC=-961.38   AICc=-961.07   BIC=-944.89
## 
## $PG
## Series: train 
## Regression with ARIMA(2,0,2)(0,0,1)[5] errors 
## 
## Coefficients:
##          ar1      ar2     ma1     ma2     sma1  intercept      zm      smb
##       0.1781  -0.8144  0.0503  0.8977  -0.3504    -0.0071  0.0082  -0.0062
## s.e.  0.1314   0.0652  0.1026  0.0639   0.0775     0.0006  0.0004   0.0012
##           hml
##       -0.0024
## s.e.   0.0010
## 
## sigma^2 estimated as 0.0001062:  log likelihood=635.18
## AIC=-1250.36   AICc=-1249.19   BIC=-1217.37
## 
## $AMZN
## Series: train 
## Regression with ARIMA(0,0,0)(1,0,0)[5] errors 
## 
## Coefficients:
##         sar1  intercept     zm      smb      hml
##       0.0667    -0.0073  8e-03  -0.0037  -0.0044
## s.e.  0.0764     0.0008  5e-04   0.0013   0.0011
## 
## sigma^2 estimated as 0.0001177:  log likelihood=623.47
## AIC=-1234.94   AICc=-1234.51   BIC=-1215.15
## 
## $TSLA
## Series: train 
## Regression with ARIMA(1,0,1) errors 
## 
## Coefficients:
##           ar1     ma1      zm     smb      hml
##       -0.5505  0.7375  0.0129  0.0124  -0.0036
## s.e.   0.1677  0.1330  0.0015  0.0045   0.0037
## 
## sigma^2 estimated as 0.001456:  log likelihood=371.91
## AIC=-731.81   AICc=-731.38   BIC=-712.02

marrangeGrob(auto_sarimax_plots, nrow=1, ncol=1)

Inspect Residuals:

Check to see if residuals for each model look like like white noise and test for autocorrelation and normality.

plots <- list()
for (stock in c(stockTickers))
{
  title <- auto_sarimax_titles[[stock]]
  model <- auto_sarimax_models[[stock]]
  df <- as.data.frame(cbind(fitted(model),model$residuals))
  colnames(df) <- c("fitted", "residuals")
  plot <- ggplot(data = df, aes(x = fitted, y = residuals)) +
    geom_point(size = 1.5) +
    geom_smooth(aes(colour = fitted, fill = fitted)) +
    geom_hline(yintercept=0,color = "red") +
    scale_x_log10() +
    ggtitle(label=title)
  plots[[stock]] <- plot
}

marrangeGrob(plots, nrow=1, ncol=1)

for (i in c(1:length(auto_sarimax_models)))
{
  title <- auto_sarimax_titles[[i]]
  print(title)
  model <- auto_sarimax_models[[i]]
  checkresiduals(model)
  residuals <- auto_sarimax_resfits[[i]]
  print(shapiro.test(residuals))
}

## [1] "AAPL _Auto_SARIMAX_Model="

## 
##  Ljung-Box test
## 
## data:  Residuals from Regression with ARIMA(0,0,0) errors
## Q* = 12.493, df = 6, p-value = 0.05183
## 
## Model df: 4.   Total lags used: 10
## 
## 
##  Shapiro-Wilk normality test
## 
## data:  residuals
## W = 0.99562, p-value = 0.8359
## 
## [1] "FB _Auto_SARIMAX_Model="

## 
##  Ljung-Box test
## 
## data:  Residuals from Regression with ARIMA(0,0,0)(2,0,0)[5] errors
## Q* = 8.5698, df = 4, p-value = 0.0728
## 
## Model df: 6.   Total lags used: 10
## 
## 
##  Shapiro-Wilk normality test
## 
## data:  residuals
## W = 0.95517, p-value = 6.146e-06
## 
## [1] "GOOG _Auto_SARIMAX_Model="

## 
##  Ljung-Box test
## 
## data:  Residuals from Regression with ARIMA(0,0,2)(1,0,0)[5] errors
## Q* = 3.063, df = 3, p-value = 0.382
## 
## Model df: 7.   Total lags used: 10
## 
## 
##  Shapiro-Wilk normality test
## 
## data:  residuals
## W = 0.78414, p-value = 6.819e-16
## 
## [1] "GE _Auto_SARIMAX_Model="

## 
##  Ljung-Box test
## 
## data:  Residuals from Regression with ARIMA(0,0,0) errors
## Q* = 6.0952, df = 6, p-value = 0.4126
## 
## Model df: 4.   Total lags used: 10
## 
## 
##  Shapiro-Wilk normality test
## 
## data:  residuals
## W = 0.83498, p-value = 8.481e-14
## 
## [1] "PG _Auto_SARIMAX_Model="

## 
##  Ljung-Box test
## 
## data:  Residuals from Regression with ARIMA(2,0,2)(0,0,1)[5] errors
## Q* = 7.0682, df = 3, p-value = 0.06976
## 
## Model df: 9.   Total lags used: 12
## 
## 
##  Shapiro-Wilk normality test
## 
## data:  residuals
## W = 0.95991, p-value = 1.932e-05
## 
## [1] "AMZN _Auto_SARIMAX_Model="

## 
##  Ljung-Box test
## 
## data:  Residuals from Regression with ARIMA(0,0,0)(1,0,0)[5] errors
## Q* = 8.4427, df = 5, p-value = 0.1335
## 
## Model df: 5.   Total lags used: 10
## 
## 
##  Shapiro-Wilk normality test
## 
## data:  residuals
## W = 0.8191, p-value = 1.698e-14
## 
## [1] "TSLA _Auto_SARIMAX_Model="

## 
##  Ljung-Box test
## 
## data:  Residuals from Regression with ARIMA(1,0,1) errors
## Q* = 3.8256, df = 5, p-value = 0.5748
## 
## Model df: 5.   Total lags used: 10
## 
## 
##  Shapiro-Wilk normality test
## 
## data:  residuals
## W = 0.88644, p-value = 3.735e-11

Part 4: Holt-Winters (Triple Exponetial Smoothing)

Time Series Decomposition

For each stock, plot the decomposed times series of log returns to consider if seasonality is present (specifically, weekly).

plots <- list()
for (stock in c(stockTickers))
{
  print(paste("For Stock:",stock))
  returns <- LNreturnsData[,stock]
  returns <- ts(as.vector(returns),frequency = 5)
  plot <- autoplot(stl(returns, s.window = "periodic")) + ggtitle(label=stock)
  plots[[stock]] <- plot
}

## [1] "For Stock: AAPL"
## [1] "For Stock: FB"
## [1] "For Stock: GOOG"
## [1] "For Stock: GE"
## [1] "For Stock: PG"
## [1] "For Stock: AMZN"
## [1] "For Stock: TSLA"

marrangeGrob(plots, nrow=1, ncol=1)

Fitting Models:

hw_models <- list()
hw_titles <- list()
hw_plots <- list()
hw_resfits <- list()
hw_aic <- list()
hw_rmse <- list()

date <- as.Date(LNreturnsData$date, "%Y-%m-%d") 
start <- min(date)
end <- max(date)
plot_midpoint <- nrow(LNreturnsData)/5

for (stock in c(stockTickers))
{
  title <- paste("HW Model For Stock:", stock)
  log_returns <- ts(LNreturnsData[,stock],frequency = 5)
  train <- head(log_returns,length(log_returns)-50)
  test <- tail(log_returns,50)
  model <- hw(train,h=50, seasonal='additive')
  plot <- autoplot(model,main = title,xlab = "Weeks",ylab = "Log Returns") +
    autolayer(train, series = 'Train') +
    autolayer(fitted(model), series = 'Fitted') +
    autolayer(model$mean, series = 'Forecast') +
    autolayer(test, series = 'Test') +
    coord_cartesian(xlim = c(plot_midpoint-25,plot_midpoint+1)) +
    labs(caption = paste("From:",start,"to",end,"(last 25 weeks shown)"))

  hw_models[[stock]] <- model
  label <- paste(stock,"_HW_Model")
  hw_titles[[stock]] <- label
  hw_plots[[label]] <- plot
  label <- paste(stock,"_HW_Model_Residuals",sep = "")
  hw_resfits[[label]] <- residuals(model)
  label <- stock
  hw_aic[[label]] <- round(model$model$aic,4)
  hw_rmse[[label]] <- round(rmse(test, model$mean),4)
}

Review and Visualise Models:

for (i in c(1:length(hw_models)))
{
  summary(hw_models[[i]])
}

## 
## Forecast method: Holt-Winters' additive method
## 
## Model Information:
## Holt-Winters' additive method 
## 
## Call:
##  hw(y = train, h = 50, seasonal = "additive") 
## 
##   Smoothing parameters:
##     alpha = 0.0198 
##     beta  = 0.0088 
##     gamma = 1e-04 
## 
##   Initial states:
##     l = 0.0099 
##     b = -0.0011 
##     s = 0.0048 -0.0014 -0.0042 0.0032 -0.0024
## 
##   sigma:  0.0247
## 
##       AIC      AICc       BIC 
## -410.7185 -409.5545 -377.7353 
## 
## Error measures:
##                        ME       RMSE        MAE      MPE     MAPE      MASE
## Training set 4.905666e-05 0.02409009 0.01535927 102.6073 316.7456 0.7808198
##                    ACF1
## Training set -0.3812695
## 
## Forecasts:
##       Point Forecast       Lo 80         Hi 80       Lo 95       Hi 95
## 41.00    -0.02435048 -0.05594216  0.0072412008 -0.07266579 0.023964827
## 41.20    -0.01976881 -0.05137333  0.0118357113 -0.06810376 0.028566135
## 41.40    -0.02829765 -0.05992411  0.0033288018 -0.07666614 0.020070836
## 41.60    -0.02652078 -0.05818066  0.0051391072 -0.07494039 0.021898837
## 41.80    -0.02135821 -0.05306540  0.0103489814 -0.06985017 0.027133753
## 42.00    -0.02955841 -0.06132933  0.0022125103 -0.07814784 0.019031019
## 42.20    -0.02497674 -0.05682973  0.0068762481 -0.07369168 0.023738201
## 42.40    -0.03350558 -0.06546143 -0.0015497355 -0.08237783 0.015366668
## 42.60    -0.03172870 -0.06381039  0.0003529815 -0.08079341 0.017336000
## 42.80    -0.02656614 -0.05879875  0.0056664806 -0.07586167 0.022729397
## 43.00    -0.03476634 -0.06717732 -0.0023553573 -0.08433465 0.014801979
## 43.20    -0.03018467 -0.06280269  0.0024333552 -0.08006963 0.019700293
## 43.40    -0.03871351 -0.07156937 -0.0058576512 -0.08896221 0.011535190
## 43.60    -0.03693663 -0.07006277 -0.0038104931 -0.08759869 0.013725426
## 43.80    -0.03177406 -0.06520444  0.0016563081 -0.08290140 0.019353278
## 44.00    -0.03997427 -0.07374462 -0.0062039130 -0.09162156 0.011673032
## 44.20    -0.03539260 -0.06953897 -0.0012462186 -0.08761497 0.016829781
## 44.40    -0.04392144 -0.07848136 -0.0093615169 -0.09677628 0.008933400
## 44.60    -0.04214456 -0.07715641 -0.0071327154 -0.09569056 0.011401436
## 44.80    -0.03698199 -0.07248484 -0.0014791482 -0.09127891 0.017314922
## 45.00    -0.04518219 -0.08121618 -0.0091482095 -0.10029142 0.009927030
## 45.20    -0.04060052 -0.07720508 -0.0039959717 -0.09658236 0.015381308
## 45.40    -0.04912937 -0.08634468 -0.0119140515 -0.10604528 0.007786546
## 45.60    -0.04735249 -0.08521884 -0.0094861372 -0.10526408 0.010559099
## 45.80    -0.04218992 -0.08074753 -0.0036323082 -0.10115870 0.016778859
## 46.00    -0.05039012 -0.08967965 -0.0111005942 -0.11047827 0.009698026
## 46.20    -0.04580845 -0.08586905 -0.0057478558 -0.10707585 0.015458943
## 46.40    -0.05433729 -0.09520835 -0.0134662363 -0.11684418 0.008169595
## 46.60    -0.05256042 -0.09428087 -0.0108399616 -0.11636635 0.011245514
## 46.80    -0.04739785 -0.09000611 -0.0047895887 -0.11256156 0.017765862
## 47.00    -0.05559805 -0.09913258 -0.0120635229 -0.12217837 0.010982264
## 47.20    -0.05101638 -0.09551369 -0.0065190669 -0.11906915 0.017036388
## 47.40    -0.05954522 -0.10504183 -0.0140486203 -0.12912627 0.010035826
## 47.60    -0.05776835 -0.10430004 -0.0112366551 -0.12893243 0.013395734
## 47.80    -0.05260578 -0.10020763 -0.0050039237 -0.12540653 0.020194975
## 48.00    -0.06080598 -0.10951300 -0.0120989595 -0.13529694 0.013684979
## 48.20    -0.05622431 -0.10606940 -0.0063792158 -0.13245580 0.020007183
## 48.40    -0.06475315 -0.11576916 -0.0137371398 -0.14277541 0.013269105
## 48.60    -0.06297627 -0.11519531 -0.0107572407 -0.14283840 0.016885846
## 48.80    -0.05781371 -0.11126713 -0.0043602791 -0.13956367 0.023936255
## 49.00    -0.06601391 -0.12073305 -0.0112947663 -0.14969961 0.017671797
## 49.20    -0.06143224 -0.11744634 -0.0054181321 -0.14709842 0.024233944
## 49.40    -0.06996108 -0.12729940 -0.0126227628 -0.15765247 0.017730309
## 49.60    -0.06818420 -0.12687531 -0.0094930997 -0.15794450 0.021576094
## 49.80    -0.06302163 -0.12309344 -0.0029498244 -0.15489354 0.028850270
## 50.00    -0.07122184 -0.13270231 -0.0097413634 -0.16524810 0.022804433
## 50.20    -0.06664017 -0.12955529 -0.0037250451 -0.16286054 0.029580208
## 50.40    -0.07516901 -0.13954486 -0.0107931605 -0.17362337 0.023285355
## 50.60    -0.07339213 -0.13925421 -0.0075300481 -0.17411949 0.027335233
## 50.80    -0.06822956 -0.13560284 -0.0008562858 -0.17126810 0.034808972
## 
## Forecast method: Holt-Winters' additive method
## 
## Model Information:
## Holt-Winters' additive method 
## 
## Call:
##  hw(y = train, h = 50, seasonal = "additive") 
## 
##   Smoothing parameters:
##     alpha = 0.0078 
##     beta  = 0.0077 
##     gamma = 0.0515 
## 
##   Initial states:
##     l = 0.0171 
##     b = -9e-04 
##     s = 0.0091 -1e-04 -0.0045 -7e-04 -0.0038
## 
##   sigma:  0.023
## 
##       AIC      AICc       BIC 
## -439.0239 -437.8599 -406.0407 
## 
## Error measures:
##                        ME      RMSE        MAE  MPE MAPE      MASE       ACF1
## Training set -0.000337886 0.0224443 0.01491014 -Inf  Inf 0.7265193 -0.3744474
## 
## Forecasts:
##       Point Forecast       Lo 80        Hi 80       Lo 95         Hi 95
## 41.00    -0.03069492 -0.06012832 -0.001261513 -0.07570943  0.0143195906
## 41.20    -0.02050976 -0.04994668  0.008927151 -0.06552964  0.0245101115
## 41.40    -0.03760641 -0.06705117 -0.008161649 -0.08263829  0.0074254667
## 41.60    -0.02451170 -0.05397038  0.004946973 -0.06956486  0.0205414538
## 41.80    -0.02487569 -0.05435606  0.004604671 -0.06996202  0.0202106338
## 42.00    -0.03796778 -0.06758764 -0.008347919 -0.08326745  0.0073318878
## 42.20    -0.02778262 -0.05744467  0.001879427 -0.07314681  0.0175815670
## 42.40    -0.04487927 -0.07459630 -0.015162240 -0.09032755  0.0005690067
## 42.60    -0.03178456 -0.06157101 -0.001998118 -0.07733900  0.0137698726
## 42.80    -0.03214855 -0.06202044 -0.002276670 -0.07783366  0.0135365503
## 43.00    -0.04524064 -0.07537892 -0.015102358 -0.09133316  0.0008518838
## 43.20    -0.03505548 -0.06531523 -0.004795740 -0.08133377  0.0112228007
## 43.40    -0.05215213 -0.08255378 -0.021750487 -0.09864743 -0.0056568289
## 43.60    -0.03905742 -0.06962278 -0.008492058 -0.08580311  0.0076882673
## 43.80    -0.03942141 -0.07017362 -0.008669211 -0.08645285  0.0076100213
## 44.00    -0.05251350 -0.08368989 -0.021337109 -0.10019367 -0.0048333257
## 44.20    -0.04232834 -0.07373979 -0.010916898 -0.09036800  0.0057113158
## 44.40    -0.05942499 -0.09109786 -0.027752127 -0.10786446 -0.0109855254
## 44.60    -0.04633028 -0.07829187 -0.014368688 -0.09521132  0.0025507575
## 44.80    -0.04669427 -0.07897275 -0.014415802 -0.09605994  0.0026713893
## 45.00    -0.05978636 -0.09266481 -0.026907911 -0.11006960 -0.0095031124
## 45.20    -0.04960120 -0.08285206 -0.016350340 -0.10045401  0.0012516029
## 45.40    -0.06669785 -0.10035101 -0.033044688 -0.11816592 -0.0152297798
## 45.60    -0.05360314 -0.08768892 -0.019517358 -0.10573285 -0.0014734340
## 45.80    -0.05396713 -0.08851619 -0.019418079 -0.10680535 -0.0011289143
## 46.00    -0.06705922 -0.10238758 -0.031730852 -0.12108929 -0.0130291452
## 46.20    -0.05687406 -0.09272343 -0.021024691 -0.11170094 -0.0020471811
## 46.40    -0.07397071 -0.11037205 -0.037569373 -0.12964175 -0.0182996682
## 46.60    -0.06087600 -0.09786026 -0.023891744 -0.11743854 -0.0043134607
## 46.80    -0.06123999 -0.09883803 -0.023641955 -0.11874123 -0.0037387553
## 47.00    -0.07433208 -0.11288040 -0.035783758 -0.13328665 -0.0153775104
## 47.20    -0.06414692 -0.10336495 -0.024928893 -0.12412572 -0.0041681229
## 47.40    -0.08124357 -0.12116147 -0.041325670 -0.14229273 -0.0201944098
## 47.60    -0.06814886 -0.10879649 -0.027501232 -0.13031404 -0.0059836773
## 47.80    -0.06851285 -0.10991971 -0.027105992 -0.13183918 -0.0051865246
## 48.00    -0.08160494 -0.12411768 -0.039092196 -0.14662257 -0.0165873103
## 48.20    -0.07141978 -0.11474358 -0.028095985 -0.13767781 -0.0051617525
## 48.40    -0.08851643 -0.13267956 -0.044353303 -0.15605811 -0.0209747559
## 48.60    -0.07542172 -0.12045200 -0.030391439 -0.14428959 -0.0065538480
## 48.80    -0.07578571 -0.12171050 -0.029860924 -0.14602162 -0.0055498089
## 49.00    -0.08887780 -0.13604639 -0.041709201 -0.16101594 -0.0167396550
## 49.20    -0.07869264 -0.12680264 -0.030582638 -0.15227054 -0.0051147408
## 49.40    -0.09578929 -0.14486667 -0.046711915 -0.17084666 -0.0207319218
## 49.60    -0.08269458 -0.13276480 -0.032624358 -0.15927038 -0.0061187827
## 49.80    -0.08305857 -0.13414663 -0.031970518 -0.16119101 -0.0049261359
## 50.00    -0.09615066 -0.14860354 -0.043697773 -0.17637042 -0.0159308934
## 50.20    -0.08596550 -0.13947830 -0.032452706 -0.16780626 -0.0041247427
## 50.40    -0.10306215 -0.15765851 -0.048465795 -0.18656007 -0.0195642302
## 50.60    -0.08996744 -0.14567054 -0.034264344 -0.17515797 -0.0047769064
## 50.80    -0.09033143 -0.14716399 -0.033498875 -0.17724933 -0.0034135370
## 
## Forecast method: Holt-Winters' additive method
## 
## Model Information:
## Holt-Winters' additive method 
## 
## Call:
##  hw(y = train, h = 50, seasonal = "additive") 
## 
##   Smoothing parameters:
##     alpha = 0.0083 
##     beta  = 0.0083 
##     gamma = 0.059 
## 
##   Initial states:
##     l = 0.0046 
##     b = 1e-04 
##     s = 0.0029 -0.0012 1e-04 4e-04 -0.0021
## 
##   sigma:  0.0212
## 
##       AIC      AICc       BIC 
## -471.2123 -470.0482 -438.2291 
## 
## Error measures:
##                        ME       RMSE       MAE      MPE     MAPE      MASE
## Training set -0.001010463 0.02070895 0.0127571 100.9621 206.8857 0.7523486
##                    ACF1
## Training set -0.3466156
## 
## Forecasts:
##       Point Forecast       Lo 80         Hi 80       Lo 95         Hi 95
## 41.00    -0.03256451 -0.05972218 -0.0054068398 -0.07409858  0.0089695613
## 41.20    -0.01731358 -0.04447503  0.0098478704 -0.05885343  0.0242262729
## 41.40    -0.03484109 -0.06201105 -0.0076711384 -0.07639395  0.0067117662
## 41.60    -0.02245107 -0.04963614  0.0047339955 -0.06402704  0.0191249005
## 41.80    -0.02805733 -0.05526599 -0.0008486651 -0.06966939  0.0135547316
## 42.00    -0.04067780 -0.06804708 -0.0133085070 -0.08253551  0.0011799189
## 42.20    -0.02542687 -0.05284207  0.0019883434 -0.06735481  0.0165010784
## 42.40    -0.04295438 -0.07042945 -0.0154793075 -0.08497388 -0.0009348831
## 42.60    -0.03056436 -0.05811501 -0.0030137093 -0.07269944  0.0115707233
## 42.80    -0.03617062 -0.06381429 -0.0085269469 -0.07844796  0.0061067277
## 43.00    -0.04879108 -0.07673623 -0.0208459321 -0.09152950 -0.0060526631
## 43.20    -0.03354015 -0.06161727 -0.0054630330 -0.07648040  0.0094000965
## 43.40    -0.05106767 -0.07929888 -0.0228364525 -0.09424358 -0.0078917505
## 43.60    -0.03867765 -0.06708653 -0.0102687656 -0.08212528  0.0047699871
## 43.80    -0.04428390 -0.07289538 -0.0156724303 -0.08804138 -0.0005264313
## 44.00    -0.05690437 -0.08598933 -0.0278194097 -0.10138598 -0.0124227620
## 44.20    -0.04165344 -0.07099237 -0.0123145119 -0.08652346  0.0032165786
## 44.40    -0.05918095 -0.08880202 -0.0295598887 -0.10448246 -0.0138794439
## 44.60    -0.04679093 -0.07672322 -0.0168586437 -0.09256842 -0.0010134469
## 44.80    -0.05239719 -0.08267059 -0.0221237914 -0.09869636 -0.0060980218
## 45.00    -0.06501766 -0.09595184 -0.0340834715 -0.11232741 -0.0177079029
## 45.20    -0.04976673 -0.08109999 -0.0184334652 -0.09768682 -0.0018466380
## 45.40    -0.06729424 -0.09905795 -0.0355305321 -0.11587264 -0.0187158402
## 45.60    -0.05490422 -0.08713008 -0.0226783585 -0.10418942 -0.0056190178
## 45.80    -0.06051048 -0.09323042 -0.0277905320 -0.11055132 -0.0104696388
## 46.00    -0.07313094 -0.10669780 -0.0395640899 -0.12446702 -0.0217948705
## 46.20    -0.05788001 -0.09199983 -0.0237601967 -0.11006177 -0.0056982561
## 46.40    -0.07540753 -0.11011220 -0.0407028593 -0.12848374 -0.0223313171
## 46.60    -0.06301751 -0.09833878 -0.0276962303 -0.11703674 -0.0089982755
## 46.80    -0.06862377 -0.10459320 -0.0326543288 -0.12363427 -0.0136132592
## 47.00    -0.08124423 -0.11823342 -0.0442550436 -0.13781431 -0.0246741503
## 47.20    -0.06599330 -0.10368646 -0.0283001439 -0.12364001 -0.0083465913
## 47.40    -0.08352082 -0.12194848 -0.0450931494 -0.14229086 -0.0247507716
## 47.60    -0.07113079 -0.11032310 -0.0319384893 -0.13107025 -0.0111913359
## 47.80    -0.07673705 -0.11672368 -0.0367504214 -0.13789133 -0.0155827774
## 48.00    -0.08935752 -0.13051767 -0.0481973651 -0.15230654 -0.0264084959
## 48.20    -0.07410659 -0.11611189 -0.0321012877 -0.13834815 -0.0098650242
## 48.40    -0.09163410 -0.13451281 -0.0487554005 -0.15721142 -0.0260567866
## 48.60    -0.07924408 -0.12302392 -0.0354642442 -0.14619957 -0.0122885979
## 48.80    -0.08485034 -0.12955852 -0.0401421606 -0.15322560 -0.0164750800
## 49.00    -0.09747081 -0.14348650 -0.0514551144 -0.16784573 -0.0270958786
## 49.20    -0.08221988 -0.12920935 -0.0352303984 -0.15408408 -0.0103556712
## 49.40    -0.09974739 -0.14773637 -0.0517584092 -0.17314020 -0.0263545772
## 49.60    -0.08735737 -0.13637104 -0.0383436949 -0.16231732 -0.0123974229
## 49.80    -0.09296363 -0.14302666 -0.0429005899 -0.16952844 -0.0163988188
## 50.00    -0.10558409 -0.15707091 -0.0540972757 -0.18432639 -0.0268418008
## 50.20    -0.09033316 -0.14290984 -0.0377564891 -0.17074225 -0.0099240786
## 50.40    -0.10786068 -0.16155051 -0.0541708409 -0.18997220 -0.0257491582
## 50.60    -0.09547066 -0.15029648 -0.0406448366 -0.17931951 -0.0116218006
## 50.80    -0.10107691 -0.15706107 -0.0450927616 -0.18669729 -0.0154565411
## 
## Forecast method: Holt-Winters' additive method
## 
## Model Information:
## Holt-Winters' additive method 
## 
## Call:
##  hw(y = train, h = 50, seasonal = "additive") 
## 
##   Smoothing parameters:
##     alpha = 0.0601 
##     beta  = 0.0035 
##     gamma = 1e-04 
## 
##   Initial states:
##     l = 0.0098 
##     b = -5e-04 
##     s = 0.0059 -7e-04 -0.0023 -3e-04 -0.0027
## 
##   sigma:  0.0334
## 
##       AIC      AICc       BIC 
## -288.8528 -287.6887 -255.8696 
## 
## Error measures:
##                        ME       RMSE        MAE MPE MAPE      MASE      ACF1
## Training set -0.001208304 0.03267024 0.02170976 NaN  Inf 0.7921311 -0.248789
## 
## Forecasts:
##       Point Forecast       Lo 80         Hi 80      Lo 95         Hi 95
## 41.00    -0.03822031 -0.08106399  0.0046233723 -0.1037441  0.0273034467
## 41.20    -0.03715664 -0.08008683  0.0057735404 -0.1028127  0.0284994064
## 41.40    -0.04053480 -0.08356106  0.0024914610 -0.1063378  0.0252681867
## 41.60    -0.04023551 -0.08336788  0.0028968520 -0.1062008  0.0257297469
## 41.80    -0.03498703 -0.07823598  0.0082619155 -0.1011306  0.0311565250
## 42.00    -0.04489562 -0.08827239 -0.0015188579 -0.1112347  0.0214434147
## 42.20    -0.04383196 -0.08734754 -0.0003163746 -0.1103833  0.0227193836
## 42.40    -0.04721011 -0.09087625 -0.0035439778 -0.1139917  0.0195714779
## 42.60    -0.04691083 -0.09073964 -0.0030820150 -0.1139412  0.0201195572
## 42.80    -0.04166235 -0.08566634  0.0023416464 -0.1089606  0.0256359531
## 43.00    -0.05157094 -0.09576337 -0.0073785102 -0.1191574  0.0160155482
## 43.20    -0.05050727 -0.09490094 -0.0061136027 -0.1184015  0.0173869866
## 43.40    -0.05388543 -0.09849387 -0.0092769887 -0.1221081  0.0143372927
## 43.60    -0.05358614 -0.09842319 -0.0087491006 -0.1221585  0.0149861961
## 43.80    -0.04833766 -0.09341743 -0.0032578940 -0.1172812  0.0206058935
## 44.00    -0.05824625 -0.10358359 -0.0129089155 -0.1275837  0.0110912216
## 44.20    -0.05718259 -0.10279168 -0.0115734992 -0.1269357  0.0125704940
## 44.40    -0.06056074 -0.10645645 -0.0146650363 -0.1307522  0.0096306835
## 44.60    -0.06026146 -0.10645886 -0.0140640562 -0.1309143  0.0103913714
## 44.80    -0.05501298 -0.10152734 -0.0084986129 -0.1261506  0.0161246041
## 45.00    -0.06492157 -0.11176884 -0.0180742981 -0.1365683  0.0067251490
## 45.20    -0.06385790 -0.11105315 -0.0166626541 -0.1360368  0.0083210017
## 45.40    -0.06723606 -0.11479500 -0.0196771161 -0.1399712  0.0054990672
## 45.60    -0.06693677 -0.11487524 -0.0189983125 -0.1402523  0.0063787761
## 45.80    -0.06168829 -0.11002219 -0.0133543945 -0.1356086  0.0122320253
## 46.00    -0.07159688 -0.12034276 -0.0228510042 -0.1461473  0.0029535058
## 46.20    -0.07053322 -0.11970657 -0.0213598712 -0.1457374  0.0046709264
## 46.40    -0.07391137 -0.12352827 -0.0242944760 -0.1497939  0.0019711231
## 46.60    -0.07361209 -0.12368864 -0.0235355364 -0.1501976  0.0029733893
## 46.80    -0.06836361 -0.11891592 -0.0178112907 -0.1456767  0.0089494891
## 47.00    -0.07827220 -0.12931697 -0.0272274270 -0.1563384 -0.0002059569
## 47.20    -0.07720853 -0.12876124 -0.0256558320 -0.1560516  0.0016345198
## 47.40    -0.08058669 -0.13266335 -0.0285100242 -0.1602311 -0.0009423031
## 47.60    -0.08028740 -0.13290401 -0.0276707948 -0.1607576  0.0001827557
## 47.80    -0.07503892 -0.12821139 -0.0218664515 -0.1563592  0.0062813542
## 48.00    -0.08494751 -0.13869231 -0.0312027181 -0.1671431 -0.0027519415
## 48.20    -0.08388385 -0.13821609 -0.0295516033 -0.1669778 -0.0007898499
## 48.40    -0.08726200 -0.14219736 -0.0323266525 -0.1712784 -0.0032456340
## 48.60    -0.08696272 -0.14251673 -0.0314087104 -0.1719252 -0.0020001945
## 48.80    -0.08171424 -0.13790234 -0.0255261341 -0.1676465  0.0042180512
## 49.00    -0.09162283 -0.14846099 -0.0347846703 -0.1785493 -0.0046963662
## 49.20    -0.09055916 -0.14806191 -0.0330564153 -0.1785020 -0.0026162989
## 49.40    -0.09393732 -0.15211971 -0.0357549290 -0.1829196 -0.0049550321
## 49.60    -0.09363803 -0.15251498 -0.0347610902 -0.1836826 -0.0035935185
## 49.80    -0.08838955 -0.14797582 -0.0288032866 -0.1795189  0.0027397773
## 50.00    -0.09829814 -0.15860901 -0.0379872760 -0.1905357 -0.0060606307
## 50.20    -0.09723448 -0.15828375 -0.0361852112 -0.1906013 -0.0038676810
## 50.40    -0.10061263 -0.16241461 -0.0388106562 -0.1951306 -0.0060946652
## 50.60    -0.10031335 -0.16288219 -0.0377445075 -0.1960041 -0.0046225631
## 50.80    -0.09506487 -0.15841457 -0.0317151676 -0.1919499  0.0018201380
## 
## Forecast method: Holt-Winters' additive method
## 
## Model Information:
## Holt-Winters' additive method 
## 
## Call:
##  hw(y = train, h = 50, seasonal = "additive") 
## 
##   Smoothing parameters:
##     alpha = 0.0419 
##     beta  = 1e-04 
##     gamma = 0.0543 
## 
##   Initial states:
##     l = 0.0057 
##     b = 0 
##     s = -5e-04 8e-04 -0.0016 0.0062 -0.0049
## 
##   sigma:  0.0194
## 
##       AIC      AICc       BIC 
## -506.0220 -504.8580 -473.0389 
## 
## Error measures:
##                         ME       RMSE        MAE  MPE MAPE      MASE       ACF1
## Training set -4.789381e-05 0.01898296 0.01105661 -Inf  Inf 0.7571336 -0.2609261
## 
## Forecasts:
##       Point Forecast       Lo 80      Hi 80       Lo 95      Hi 95
## 41.00   -0.011266993 -0.03616120 0.01362721 -0.04933939 0.02680541
## 41.20    0.007386652 -0.01752953 0.03230283 -0.03071936 0.04549266
## 41.40   -0.006676565 -0.03161480 0.01826167 -0.04481631 0.03146318
## 41.60    0.004055481 -0.02090490 0.02901587 -0.03411813 0.04222909
## 41.80   -0.002242529 -0.02722514 0.02274009 -0.04045014 0.03596508
## 42.00   -0.011429258 -0.03652761 0.01366909 -0.04981388 0.02695536
## 42.20    0.007224387 -0.01789628 0.03234506 -0.03119436 0.04564314
## 42.40   -0.006838830 -0.03198190 0.01830424 -0.04529184 0.03161418
## 42.60    0.003893216 -0.02127235 0.02905878 -0.03459419 0.04238062
## 42.80   -0.002404794 -0.02759293 0.02278334 -0.04092672 0.03611714
## 43.00   -0.011591523 -0.03689565 0.01371260 -0.05029084 0.02710779
## 43.20    0.007062122 -0.01826466 0.03238891 -0.03167185 0.04579609
## 43.40   -0.007001095 -0.03235062 0.01834843 -0.04576985 0.03176766
## 43.60    0.003730951 -0.02164141 0.02910331 -0.03507273 0.04253463
## 43.80   -0.002567059 -0.02796234 0.02282822 -0.04140578 0.03627166
## 44.00   -0.011753788 -0.03726529 0.01375772 -0.05077027 0.02726269
## 44.20    0.006899856 -0.01863465 0.03243436 -0.03215180 0.04595152
## 44.40   -0.007163361 -0.03272096 0.01839423 -0.04625033 0.03192361
## 44.60    0.003568686 -0.02201208 0.02914945 -0.03555372 0.04269109
## 44.80   -0.002729324 -0.02833335 0.02287470 -0.04188730 0.03642865
## 45.00   -0.011916053 -0.03763655 0.01380444 -0.05125215 0.02742005
## 45.20    0.006737591 -0.01900624 0.03248143 -0.03263421 0.04610939
## 45.40   -0.007325626 -0.03309289 0.01844164 -0.04673325 0.03208200
## 45.60    0.003406420 -0.02238436 0.02919720 -0.03603717 0.04285001
## 45.80   -0.002891589 -0.02870596 0.02292278 -0.04237127 0.03658809
## 46.00   -0.012078318 -0.03800939 0.01385276 -0.05173647 0.02757984
## 46.20    0.006575326 -0.01937943 0.03253008 -0.03311905 0.04626970
## 46.40   -0.007487891 -0.03346642 0.01849063 -0.04721862 0.03224283
## 46.60    0.003244155 -0.02275822 0.02924653 -0.03652305 0.04301136
## 46.80   -0.003053854 -0.02908017 0.02297246 -0.04285767 0.03674996
## 47.00   -0.012240584 -0.03838383 0.01390266 -0.05222322 0.02774206
## 47.20    0.006413061 -0.01975420 0.03258032 -0.03360632 0.04643244
## 47.40   -0.007650156 -0.03384153 0.01854121 -0.04770640 0.03240609
## 47.60    0.003081890 -0.02313367 0.02929745 -0.03701135 0.04317513
## 47.80   -0.003216120 -0.02945596 0.02302372 -0.04334649 0.03691425
## 48.00   -0.012402849 -0.03875984 0.01395414 -0.05271238 0.02790669
## 48.20    0.006250796 -0.02013055 0.03263214 -0.03409599 0.04659758
## 48.40   -0.007812421 -0.03421821 0.01859337 -0.04819659 0.03257175
## 48.60    0.002919625 -0.02351069 0.02934994 -0.03750206 0.04334131
## 48.80   -0.003378385 -0.02983332 0.02307655 -0.04383771 0.03708094
## 49.00   -0.012565114 -0.03913742 0.01400719 -0.05320394 0.02807371
## 49.20    0.006088531 -0.02050847 0.03268553 -0.03458807 0.04676513
## 49.40   -0.007974686 -0.03459646 0.01864709 -0.04868918 0.03273981
## 49.60    0.002757360 -0.02388928 0.02940400 -0.03799516 0.04350988
## 49.80   -0.003540650 -0.03021224 0.02313094 -0.04433133 0.03725003
## 50.00   -0.012727379 -0.03951655 0.01406180 -0.05369789 0.02824313
## 50.20    0.005926265 -0.02088794 0.03274047 -0.03508252 0.04693506
## 50.40   -0.008136952 -0.03497627 0.01870237 -0.04918415 0.03291025
## 50.60    0.002595095 -0.02426943 0.02945962 -0.03849065 0.04368084
## 50.80   -0.003702915 -0.03059272 0.02318689 -0.04482733 0.03742150
## 
## Forecast method: Holt-Winters' additive method
## 
## Model Information:
## Holt-Winters' additive method 
## 
## Call:
##  hw(y = train, h = 50, seasonal = "additive") 
## 
##   Smoothing parameters:
##     alpha = 0.0451 
##     beta  = 1e-04 
##     gamma = 0.0549 
## 
##   Initial states:
##     l = 0.009 
##     b = -1e-04 
##     s = 0.0042 -4e-04 -0.003 4e-04 -0.0011
## 
##   sigma:  0.0185
## 
##       AIC      AICc       BIC 
## -525.7990 -524.6349 -492.8158 
## 
## Error measures:
##                         ME       RMSE        MAE      MPE     MAPE      MASE
## Training set -3.201684e-05 0.01806722 0.01221663 83.02854 167.6833 0.7674483
##                    ACF1
## Training set -0.2268501
## 
## Forecasts:
##       Point Forecast       Lo 80       Hi 80       Lo 95      Hi 95
## 41.00  -0.0132419835 -0.03693529 0.010451325 -0.04947777 0.02299380
## 41.20  -0.0024396228 -0.02615712 0.021277879 -0.03871241 0.03383317
## 41.40  -0.0058435715 -0.02958535 0.017898205 -0.04215349 0.03046634
## 41.60   0.0015962986 -0.02216984 0.025362433 -0.03475087 0.03794346
## 41.80   0.0013313447 -0.02245923 0.025121919 -0.03505320 0.03771589
## 42.00  -0.0135288835 -0.03743831 0.010380541 -0.05009519 0.02303743
## 42.20  -0.0027265227 -0.02666046 0.021207410 -0.03933031 0.03387727
## 42.40  -0.0061304714 -0.03008899 0.017828051 -0.04277187 0.03051093
## 42.60   0.0013093987 -0.02267380 0.025292593 -0.03536973 0.03798853
## 42.80   0.0010444447 -0.02296350 0.025052393 -0.03567254 0.03776143
## 43.00  -0.0138157834 -0.03794268 0.010311116 -0.05071469 0.02308313
## 43.20  -0.0030134226 -0.02716514 0.021138298 -0.03995029 0.03392345
## 43.40  -0.0064173714 -0.03059399 0.017759252 -0.04339233 0.03055758
## 43.60   0.0010224988 -0.02317911 0.025224107 -0.03599067 0.03803566
## 43.80   0.0007575448 -0.02346913 0.024984220 -0.03629396 0.03780905
## 44.00  -0.0141026833 -0.03844841 0.010243042 -0.05133626 0.02313089
## 44.20  -0.0033003226 -0.02767118 0.021070536 -0.04057233 0.03397169
## 44.40  -0.0067042713 -0.03110034 0.017691802 -0.04401485 0.03060630
## 44.60   0.0007355988 -0.02368577 0.025156968 -0.03661366 0.03808486
## 44.80   0.0004706449 -0.02397610 0.024917392 -0.03691743 0.03785872
## 45.00  -0.0143895833 -0.03895548 0.010176311 -0.05195988 0.02318071
## 45.20  -0.0035872225 -0.02817856 0.021004115 -0.04119643 0.03402198
## 45.40  -0.0069911712 -0.03160803 0.017625692 -0.04463941 0.03065707
## 45.60   0.0004486989 -0.02419377 0.025091169 -0.03723871 0.03813610
## 45.80   0.0001837449 -0.02448441 0.024851903 -0.03754295 0.03791044
## 46.00  -0.0146764832 -0.03946388 0.010110915 -0.05258554 0.02323257
## 46.20  -0.0038741224 -0.02868727 0.020939029 -0.04182256 0.03407432
## 46.40  -0.0072780712 -0.03211706 0.017560915 -0.04526602 0.03070988
## 46.60   0.0001617990 -0.02470310 0.025026702 -0.03786579 0.03818939
## 46.80  -0.0001031550 -0.02499405 0.024787745 -0.03817050 0.03796419
## 47.00  -0.0149633831 -0.03997361 0.010046848 -0.05321323 0.02328646
## 47.20  -0.0041610223 -0.02919732 0.020875270 -0.04245073 0.03412868
## 47.40  -0.0075649711 -0.03262741 0.017497465 -0.04589466 0.03076472
## 47.60  -0.0001251010 -0.02521376 0.024963559 -0.03849489 0.03824469
## 47.80  -0.0003900549 -0.02550502 0.024724910 -0.03880008 0.03801997
## 48.00  -0.0152502830 -0.04048467 0.009984101 -0.05384294 0.02334238
## 48.20  -0.0044479223 -0.02970868 0.020812831 -0.04308091 0.03418507
## 48.40  -0.0078518710 -0.03313907 0.017435332 -0.04652531 0.03082157
## 48.60  -0.0004120009 -0.02572573 0.024901733 -0.03912602 0.03830201
## 48.80  -0.0006769549 -0.02601730 0.024663390 -0.03943167 0.03807776
## 49.00  -0.0155371830 -0.04099703 0.009922668 -0.05447467 0.02340030
## 49.20  -0.0047348222 -0.03022135 0.020751704 -0.04371310 0.03424346
## 49.40  -0.0081387709 -0.03365205 0.017374511 -0.04715797 0.03088043
## 49.60  -0.0006989008 -0.02623902 0.024841217 -0.03975914 0.03836134
## 49.80  -0.0009638548 -0.02653089 0.024603179 -0.04006526 0.03813755
## 50.00  -0.0158240829 -0.04151071 0.009862542 -0.05510839 0.02346022
## 50.20  -0.0050217221 -0.03073533 0.020691882 -0.04434729 0.03430384
## 50.40  -0.0084256709 -0.03416633 0.017314993 -0.04779262 0.03094128
## 50.60  -0.0009858008 -0.02675360 0.024782003 -0.04039426 0.03842265
## 50.80  -0.0012507547 -0.02704578 0.024544269 -0.04070084 0.03819933
## 
## Forecast method: Holt-Winters' additive method
## 
## Model Information:
## Holt-Winters' additive method 
## 
## Call:
##  hw(y = train, h = 50, seasonal = "additive") 
## 
##   Smoothing parameters:
##     alpha = 0.0127 
##     beta  = 0.0126 
##     gamma = 0.0364 
## 
##   Initial states:
##     l = 0.0212 
##     b = -0.0019 
##     s = 0.0037 0.0065 0.0033 -0.0043 -0.0092
## 
##   sigma:  0.0452
## 
##       AIC      AICc       BIC 
## -168.4221 -167.2581 -135.4389 
## 
## Error measures:
##                        ME       RMSE        MAE      MPE    MAPE      MASE
## Training set -0.002427766 0.04414772 0.02874494 137.9406 210.527 0.7050566
##                    ACF1
## Training set 0.05347147
## 
## Forecasts:
##       Point Forecast      Lo 80       Hi 80      Lo 95        Hi 95
## 41.00    -0.09656497 -0.1544602 -0.03866974 -0.1851081 -0.008021866
## 41.20    -0.08965143 -0.1475652 -0.03173765 -0.1782229 -0.001079941
## 41.40    -0.10847006 -0.1664255 -0.05051462 -0.1971053 -0.019834869
## 41.60    -0.09694867 -0.1549780 -0.03891935 -0.1856969 -0.008200478
## 41.80    -0.10728662 -0.1654311 -0.04914210 -0.1962110 -0.018362259
## 42.00    -0.13688970 -0.1953958 -0.07838359 -0.2263671 -0.047412323
## 42.20    -0.12997616 -0.1887058 -0.07124651 -0.2197954 -0.040156907
## 42.40    -0.14879479 -0.2078151 -0.08977451 -0.2390585 -0.058531055
## 42.60    -0.13727340 -0.1966594 -0.07788740 -0.2280965 -0.046450348
## 42.80    -0.14761134 -0.2074457 -0.08777699 -0.2391201 -0.056102589
## 43.00    -0.17721443 -0.2379032 -0.11652565 -0.2700299 -0.084398952
## 43.20    -0.17030089 -0.2316204 -0.10898138 -0.2640810 -0.076520790
## 43.40    -0.18911952 -0.2511710 -0.12706801 -0.2840191 -0.094219925
## 43.60    -0.17759813 -0.2404879 -0.11470841 -0.2737797 -0.081416603
## 43.80    -0.18793607 -0.2517744 -0.12409775 -0.2855684 -0.090303780
## 44.00    -0.21753916 -0.2828522 -0.15222610 -0.3174269 -0.117651456
## 44.20    -0.21062562 -0.2771101 -0.14414113 -0.3123049 -0.108946361
## 44.40    -0.22944425 -0.2972179 -0.16167057 -0.3330951 -0.125793357
## 44.60    -0.21792286 -0.2871046 -0.14874113 -0.3237272 -0.112118533
## 44.80    -0.22826080 -0.2989699 -0.15755169 -0.3364011 -0.120120547
## 45.00    -0.25786388 -0.3306953 -0.18503245 -0.3692500 -0.146477811
## 45.20    -0.25095035 -0.3255355 -0.17636524 -0.3650184 -0.136882264
## 45.40    -0.26976898 -0.3462248 -0.19331319 -0.3866980 -0.152839936
## 45.60    -0.25824759 -0.3366895 -0.17980568 -0.3782141 -0.138281046
## 45.80    -0.26858553 -0.3491271 -0.18804398 -0.3917632 -0.145407861
## 46.00    -0.29818861 -0.3814499 -0.21492729 -0.4255258 -0.170851410
## 46.20    -0.29127508 -0.3768425 -0.20570764 -0.4221392 -0.160410977
## 46.40    -0.31009371 -0.3980738 -0.22211365 -0.4446476 -0.175539815
## 46.60    -0.29857232 -0.3890689 -0.20807575 -0.4369749 -0.160169753
## 46.80    -0.30891026 -0.4020246 -0.21579597 -0.4513163 -0.166504248
## 47.00    -0.33851334 -0.4348634 -0.24216329 -0.4858680 -0.191158646
## 47.20    -0.33159981 -0.4307471 -0.23245254 -0.4832325 -0.179967152
## 47.40    -0.35041844 -0.4524563 -0.24838055 -0.5064719 -0.194364949
## 47.60    -0.33889705 -0.4439164 -0.23387775 -0.4995102 -0.178283885
## 47.80    -0.34923499 -0.4573239 -0.24114606 -0.5145427 -0.183927238
## 48.00    -0.37883807 -0.4905991 -0.26707705 -0.5497618 -0.207914335
## 48.20    -0.37192454 -0.4869096 -0.25693945 -0.5477791 -0.196070015
## 48.40    -0.39074317 -0.5090337 -0.27245260 -0.5716530 -0.209833354
## 48.60    -0.37922178 -0.5008970 -0.25754657 -0.5653080 -0.193135610
## 48.80    -0.38955972 -0.5146966 -0.26442287 -0.5809400 -0.198179428
## 49.00    -0.41916280 -0.5483428 -0.28998278 -0.6167266 -0.221599012
## 49.20    -0.41224927 -0.5450251 -0.27947348 -0.6153123 -0.209186221
## 49.40    -0.43106790 -0.5675110 -0.29462478 -0.6397396 -0.222396158
## 49.60    -0.41954651 -0.5597267 -0.27936632 -0.6339336 -0.205159411
## 49.80    -0.42988445 -0.5738697 -0.28589917 -0.6500909 -0.209677960
## 50.00    -0.45948753 -0.6078372 -0.31113782 -0.6863688 -0.232606231
## 50.20    -0.45257400 -0.6048473 -0.30030073 -0.6854559 -0.219692134
## 50.40    -0.47139263 -0.6276532 -0.31513202 -0.7103726 -0.232412650
## 50.60    -0.45987124 -0.6201815 -0.29956094 -0.7050447 -0.214697788
## 50.80    -0.47020918 -0.6346302 -0.30578819 -0.7216694 -0.218748972

marrangeGrob(hw_plots, nrow=1, ncol=1)

Inspect Residuals:

Check to see if residuals for each model look like like white noise and test for autocorrelation and normality.

plots <- list()
for (i in c(1:length(hw_models)))
{
  title <- hw_titles[[i]]
  model <- hw_models[[i]]
  df <- as.data.frame(cbind(fitted(model),model$residuals))
  colnames(df) <- c("fitted", "residuals")
  plot <- ggplot(data = df, aes(x = fitted, y = residuals)) +
    geom_point(size = 1.5) +
    geom_smooth(aes(colour = fitted, fill = fitted)) +
    geom_hline(yintercept=0,color = "red") +
    scale_x_log10() +
    ggtitle(label=title)
  plots[[i]] <- plot
}

marrangeGrob(plots, nrow=1, ncol=1)

for (i in c(1:length(hw_models)))
{
  title <- hw_titles[[i]]
  print(title)
  model <- hw_models[[i]]
  checkresiduals(hw_models[[i]])
  residuals <- hw_resfits[[i]]
  print(shapiro.test(residuals))
}

## [1] "AAPL _HW_Model"

## 
##  Ljung-Box test
## 
## data:  Residuals from Holt-Winters' additive method
## Q* = 64.716, df = 3, p-value = 5.773e-14
## 
## Model df: 9.   Total lags used: 12
## 
## 
##  Shapiro-Wilk normality test
## 
## data:  residuals
## W = 0.85085, p-value = 4.731e-13
## 
## [1] "FB _HW_Model"

## 
##  Ljung-Box test
## 
## data:  Residuals from Holt-Winters' additive method
## Q* = 46.579, df = 3, p-value = 4.272e-10
## 
## Model df: 9.   Total lags used: 12
## 
## 
##  Shapiro-Wilk normality test
## 
## data:  residuals
## W = 0.88079, p-value = 1.765e-11
## 
## [1] "GOOG _HW_Model"

## 
##  Ljung-Box test
## 
## data:  Residuals from Holt-Winters' additive method
## Q* = 67.521, df = 3, p-value = 1.454e-14
## 
## Model df: 9.   Total lags used: 12
## 
## 
##  Shapiro-Wilk normality test
## 
## data:  residuals
## W = 0.85237, p-value = 5.612e-13
## 
## [1] "GE _HW_Model"

## 
##  Ljung-Box test
## 
## data:  Residuals from Holt-Winters' additive method
## Q* = 22.939, df = 3, p-value = 4.158e-05
## 
## Model df: 9.   Total lags used: 12
## 
## 
##  Shapiro-Wilk normality test
## 
## data:  residuals
## W = 0.89468, p-value = 1.167e-10
## 
## [1] "PG _HW_Model"

## 
##  Ljung-Box test
## 
## data:  Residuals from Holt-Winters' additive method
## Q* = 46.635, df = 3, p-value = 4.157e-10
## 
## Model df: 9.   Total lags used: 12
## 
## 
##  Shapiro-Wilk normality test
## 
## data:  residuals
## W = 0.7815, p-value = 5.431e-16
## 
## [1] "AMZN _HW_Model"

## 
##  Ljung-Box test
## 
## data:  Residuals from Holt-Winters' additive method
## Q* = 33.619, df = 3, p-value = 2.384e-07
## 
## Model df: 9.   Total lags used: 12
## 
## 
##  Shapiro-Wilk normality test
## 
## data:  residuals
## W = 0.90376, p-value = 4.389e-10
## 
## [1] "TSLA _HW_Model"

## 
##  Ljung-Box test
## 
## data:  Residuals from Holt-Winters' additive method
## Q* = 9.1889, df = 3, p-value = 0.02688
## 
## Model df: 9.   Total lags used: 12
## 
## 
##  Shapiro-Wilk normality test
## 
## data:  residuals
## W = 0.90039, p-value = 2.66e-10

Part 5: Neural Network Autoregression

Fitting Models:

nnet_models <- list()
nnet_titles <- list()
nnet_plots <- list()
nnet_resfits <- list()
nnet_aic <- list()
nnet_rmse <- list()

date <- as.Date(LNreturnsData$date, "%Y-%m-%d") 
start <- min(date)
end <- max(date)
plot_midpoint <- nrow(LNreturnsData)
xregs <- as.matrix(LNreturnsData[,ff_factors])
xregs_train <- head(xregs,nrow(xregs)-50)
xregs_test <- tail(xregs,50)
row.names(xregs_test) <- (nrow(xregs)-49):nrow(xregs)

for (stock in c(stockTickers))
{
  title <- paste("Neural Network Autoregression Model For Stock:", stock)
  log_returns <- ts(LNreturnsData[,stock])
  train <- head(log_returns,length(log_returns)-50)
  test <- tail(log_returns,50)
  alpha <- 1.5^(-10)
  hn <- length(train)/(alpha*(length(train)+50))
  model <- nnetar(train,size = hn, xreg = xregs_train, lambda = "auto")
  forecast <- forecast(model,PI=TRUE,h=50,xreg=xregs_test,npaths=100)
  colors <- c("Train"="black","Fitted"="red","Test"="green4","Forecast"="blue")
  plot <- autoplot(forecast,main = title,xlab = "Days",ylab = "Log Returns") +
    autolayer(train, series = 'Train') +
    autolayer(fitted(model), series = 'Fitted') +
    autolayer(test, series = 'Test') +
    autolayer(forecast$mean, series = 'Forecast') +
    coord_cartesian(xlim = c(plot_midpoint-125,plot_midpoint+10)) +
    labs(caption = paste("From:",start,"to",end,"(last 125 days shown)")) +
    scale_color_manual(values=colors)
  
  nnet_models[[stock]] <- model
  label <- paste(stock,"_NNET_Model")
  nnet_titles[[stock]] <- label
  nnet_plots[[label]] <- plot
  label <- paste(stock,"_NNET_Model_Residuals",sep = "")
  nnet_resfits[[label]] <- model[["residuals"]]
  label <- stock
  nnet_aic[[label]] <- NULL
  nnet_rmse[[label]] <- round(rmse(test, forecast$mean),4)
}

Review and Visualise Models:

for (stock in c(stockTickers))
{
  print(paste("For Stock:",stock))
  print(nnet_models[[stock]])
}

## [1] "For Stock: AAPL"
## Series: train 
## Model:  NNAR(1,46.13203125) 
## Call:   nnetar(y = train, size = hn, xreg = xregs_train, lambda = "auto")
## 
## Average of 20 networks, each of which is
## a 4-46.13203-1 network with 277 weights
## options were - linear output units 
## 
## sigma^2 estimated as 0.0003572
## [1] "For Stock: FB"
## Series: train 
## Model:  NNAR(7,46.13203125) 
## Call:   nnetar(y = train, size = hn, xreg = xregs_train, lambda = "auto")
## 
## Average of 20 networks, each of which is
## a 10-46.13203-1 network with 553 weights
## options were - linear output units 
## 
## sigma^2 estimated as 3.257e-08
## [1] "For Stock: GOOG"
## Series: train 
## Model:  NNAR(11,46.13203125) 
## Call:   nnetar(y = train, size = hn, xreg = xregs_train, lambda = "auto")
## 
## Average of 20 networks, each of which is
## a 14-46.13203-1 network with 737 weights
## options were - linear output units 
## 
## sigma^2 estimated as 7.543e-09
## [1] "For Stock: GE"
## Series: train 
## Model:  NNAR(5,46.13203125) 
## Call:   nnetar(y = train, size = hn, xreg = xregs_train, lambda = "auto")
## 
## Average of 20 networks, each of which is
## a 8-46.13203-1 network with 461 weights
## options were - linear output units 
## 
## sigma^2 estimated as 2.333e-06
## [1] "For Stock: PG"
## Series: train 
## Model:  NNAR(1,46.13203125) 
## Call:   nnetar(y = train, size = hn, xreg = xregs_train, lambda = "auto")
## 
## Average of 20 networks, each of which is
## a 4-46.13203-1 network with 277 weights
## options were - linear output units 
## 
## sigma^2 estimated as 0.002344
## [1] "For Stock: AMZN"
## Series: train 
## Model:  NNAR(3,46.13203125) 
## Call:   nnetar(y = train, size = hn, xreg = xregs_train, lambda = "auto")
## 
## Average of 20 networks, each of which is
## a 6-46.13203-1 network with 369 weights
## options were - linear output units 
## 
## sigma^2 estimated as 2.898e-05
## [1] "For Stock: TSLA"
## Series: train 
## Model:  NNAR(1,46.13203125) 
## Call:   nnetar(y = train, size = hn, xreg = xregs_train, lambda = "auto")
## 
## Average of 20 networks, each of which is
## a 4-46.13203-1 network with 277 weights
## options were - linear output units 
## 
## sigma^2 estimated as 0.004328

marrangeGrob(nnet_plots, nrow=1, ncol=1)

Inspect Residuals:

Check to see if residuals for each model look like like white noise and test for autocorrelation and normality.

plots <- list()
for (i in c(1:length(nnet_models)))
{
  title <- nnet_titles[[i]]
  model <- nnet_models[[i]]
  df <- as.data.frame(cbind(fitted(model),model$residuals))
  colnames(df) <- c("fitted", "residuals")
  plot <- ggplot(data = df, aes(x = fitted, y = residuals)) +
    geom_point(size = 1.5) +
    geom_smooth(aes(colour = fitted, fill = fitted)) +
    geom_hline(yintercept=0,color = "red") +
    scale_x_log10() +
    ggtitle(label=title)
  plots[[i]] <- plot
}

marrangeGrob(plots, nrow=1, ncol=1)

for (i in c(1:length(nnet_models)))
{
  title <- nnet_titles[[i]]
  print(title)
  residuals <- nnet_resfits[[i]]
  checkresiduals(residuals)
  print(shapiro.test(residuals))
}

## [1] "AAPL _NNET_Model"

## 
##  Shapiro-Wilk normality test
## 
## data:  residuals
## W = 0.97418, p-value = 0.0009938
## 
## [1] "FB _NNET_Model"

## 
##  Shapiro-Wilk normality test
## 
## data:  residuals
## W = 0.79197, p-value = 2.677e-15
## 
## [1] "GOOG _NNET_Model"

## 
##  Shapiro-Wilk normality test
## 
## data:  residuals
## W = 0.77342, p-value = 8.316e-16
## 
## [1] "GE _NNET_Model"

## 
##  Shapiro-Wilk normality test
## 
## data:  residuals
## W = 0.83572, p-value = 1.435e-13
## 
## [1] "PG _NNET_Model"

## 
##  Shapiro-Wilk normality test
## 
## data:  residuals
## W = 0.93246, p-value = 5.663e-08
## 
## [1] "AMZN _NNET_Model"

## 
##  Shapiro-Wilk normality test
## 
## data:  residuals
## W = 0.87817, p-value = 1.598e-11
## 
## [1] "TSLA _NNET_Model"

## 
##  Shapiro-Wilk normality test
## 
## data:  residuals
## W = 0.87167, p-value = 5.987e-12

Part 6: KNN Time Series Regression

Fitting Models:

knn_models <- list()
knn_titles <- list()
knn_plots <- list()
knn_resfits <- list()
knn_aic <- list()
knn_rmse <- list()

date <- as.Date(LNreturnsData$date, "%Y-%m-%d") 
start <- min(date)
end <- max(date)
plot_midpoint <- nrow(LNreturnsData)

for (stock in c(stockTickers))
{
  title <- paste("KNN TS Regression For Stock:", stock)
  log_returns <- ts(LNreturnsData[,stock])
  train <- head(log_returns,length(log_returns)-50)
  test <- tail(log_returns,50)
  k <- round(sqrt(nrow(train)))
  model <- knn_forecasting(train,h=50,lags=1:10,k=k,msas="MIMO")
  plot <- autoplot(model,highlight = "neighbors",faceting = FALSE) +
    autolayer(test, series = 'Test') +
    labs(x = "Date",y = "Log Return",color = "Legend") +
    ggtitle(label=title) +
    labs(caption = paste("From:",start,"to",end)) +
    scale_color_manual(values=c("blue","black","red","green","purple"))

  knn_models[[stock]] <- model
  label <- paste(stock,"_KNN_Model")
  knn_titles[[stock]] <- label
  knn_plots[[label]] <- plot
  label <- paste(stock,"_KNN_Model_Residuals",sep = "")
  knn_resfits[[label]] <- (model$prediction - test)
  label <- stock
  knn_aic[[label]] <- NULL
  knn_rmse[[label]] <- round(rmse(test, model$prediction),4)
}

Visualise Models:

marrangeGrob(knn_plots, nrow=1, ncol=1)

Inspect Residuals:

Check to see if residuals for each model look like like white noise and test for autocorrelation and normality.

for (i in c(1:length(knn_models)))
{
  title <- knn_titles[[i]]
  print(title)
  residuals <- knn_resfits[[i]]
  checkresiduals(residuals)
  print(shapiro.test(residuals))
}

## [1] "AAPL _KNN_Model"

## 
##  Shapiro-Wilk normality test
## 
## data:  residuals
## W = 0.93019, p-value = 0.00562
## 
## [1] "FB _KNN_Model"

## 
##  Shapiro-Wilk normality test
## 
## data:  residuals
## W = 0.96266, p-value = 0.1149
## 
## [1] "GOOG _KNN_Model"

## 
##  Shapiro-Wilk normality test
## 
## data:  residuals
## W = 0.94396, p-value = 0.01939
## 
## [1] "GE _KNN_Model"

## 
##  Shapiro-Wilk normality test
## 
## data:  residuals
## W = 0.96907, p-value = 0.2122
## 
## [1] "PG _KNN_Model"

## 
##  Shapiro-Wilk normality test
## 
## data:  residuals
## W = 0.96346, p-value = 0.1241
## 
## [1] "AMZN _KNN_Model"

## 
##  Shapiro-Wilk normality test
## 
## data:  residuals
## W = 0.95416, p-value = 0.05071
## 
## [1] "TSLA _KNN_Model"

## 
##  Shapiro-Wilk normality test
## 
## data:  residuals
## W = 0.9688, p-value = 0.2067

Compare Results

For each stock, Compare RMSE and AIC values of each model

library(knitr)
library(kableExtra)

RMSE <- as.matrix(cbind(stockTickers,
              arima_rmse,
              arima_garch_rmse,
              famafrench_rmse,
              arimax_rmse,
              auto_sarimax_rmse,
              hw_rmse,
              nnet_rmse,
              knn_rmse))

AIC <- as.matrix(cbind(stockTickers,
             arima_aic,
             arima_garch_aic,
             famafrench_aic,
             arimax_aic,
             auto_sarimax_aic,
             hw_aic,
             nnet_aic,
             knn_aic))

kable(RMSE, caption = 'RMSE Table') %>%
   kable_styling(full_width = FALSE, 
                 latex_options = c('hold_position', 'striped'))

RMSE Table
	stockTickers	arima_rmse	arima_garch_rmse	famafrench_rmse	arimax_rmse	auto_sarimax_rmse	hw_rmse	nnet_rmse	knn_rmse
AAPL	AAPL	0.037	0.0277	0.0392	0.0135	0.0132	0.062	0.0268	0.0278
FB	FB	0.0311	0.0308	0.0322	0.0206	0.0211	0.0778	0.0259	0.0304
GOOG	GOOG	0.0291	0.0249	0.0363	0.015	0.015	0.0781	0.02	0.0277
GE	GE	0.0538	0.0531	0.041	0.0333	0.0335	0.0875	0.0414	0.0542
PG	PG	0.0267	0.0305	0.0356	0.0184	0.0183	0.0297	0.0449	0.027
AMZN	AMZN	0.0227	0.0243	0.0325	0.0224	0.0219	0.0264	0.0293	0.0245
TSLA	TSLA	0.0555	0.0549	0.0554	0.0462	0.0455	0.3193	0.0808	0.0573

kable(AIC, caption = 'AIC Table') %>%
   kable_styling(full_width = FALSE, 
                 latex_options = c('hold_position', 'striped'))

AIC Table
	stockTickers	arima_aic	arima_garch_aic	famafrench_aic	arimax_aic	auto_sarimax_aic	hw_aic
AAPL	AAPL	-955.7663	-5.261	-924.2631	-1266.0871	-1273.1428	-410.7185
FB	FB	-970.1904	-5.1951	-969.9724	-1181.106	-1183.266	-439.0239
GOOG	GOOG	-1024.3302	-5.5749	-999.6686	-1247.7873	-1257.8897	-471.2123
GE	GE	-805.3401	-4.5859	-781.0967	-950.9291	-961.377	-288.8528
PG	PG	-1054.8698	-6.0227	-1043.8743	-1240.5704	-1250.3551	-506.022
AMZN	AMZN	-1052.8152	-5.6238	-1031.4278	-1228.9241	-1234.9415	-525.799
TSLA	TSLA	-659.1552	-3.8891	-649.11	-725.1371	-731.814	-168.4221