For this assignments, I used the data sets from Nintendo and Sony:

https://finance.yahoo.com/quote/NTDOY/history?period1=1438732800&period2=1596585600&interval=1mo&filter=history&frequency=1mo

https://finance.yahoo.com/quote/SNE/history?period1=1438732800&period2=1596585600&interval=1mo&filter=history&frequency=1mo

Both companies are famous for their high-quality video games on the Nintendo Switch for Nintendo and the Playstation 4 for Sony. Since they are direct competitors, they must certainly have a lot of similarties since they target the same audience(the gamers).

#Reading our data sets

#Summary of our data sets

str(nintendo)
## 'data.frame':    60 obs. of  7 variables:
##  $ Date     : chr  "2015-09-01" "2015-10-01" "2015-11-01" "2015-12-01" ...
##  $ Open     : num  24.1 21.5 20 19.4 17.2 ...
##  $ High     : num  25 24.9 21 19.7 17.6 ...
##  $ Low      : num  20.2 20 19 16.6 15.3 ...
##  $ Close    : num  21 20.2 19.1 17.3 17.6 ...
##  $ Adj.Close: num  19.2 18.5 17.5 15.8 16.1 ...
##  $ Volume   : int  1292500 3185600 890500 1353700 2312000 2475500 1536000 1484400 1235300 1022700 ...
head(nintendo)
##         Date  Open  High   Low Close Adj.Close  Volume
## 1 2015-09-01 24.10 24.95 20.21 20.97  19.16429 1292500
## 2 2015-10-01 21.52 24.88 20.05 20.22  18.50571 3185600
## 3 2015-11-01 20.02 21.03 19.05 19.13  17.50812  890500
## 4 2015-12-01 19.42 19.67 16.61 17.26  15.79667 1353700
## 5 2016-01-01 17.24 17.58 15.34 17.57  16.08038 2312000
## 6 2016-02-01 17.75 18.80 16.06 17.39  15.91564 2475500
tail(nintendo)
##          Date  Open  High   Low Close Adj.Close   Volume
## 55 2020-03-01 42.01 48.75 35.82 48.28  47.27861  9859900
## 56 2020-04-01 47.62 55.92 47.61 51.44  51.44000 10586000
## 57 2020-05-01 51.82 54.84 50.50 50.84  50.84000  6537600
## 58 2020-06-01 51.30 58.95 50.43 55.90  55.90000 10004600
## 59 2020-07-01 55.58 60.00 54.72 55.01  55.01000  3932800
## 60 2020-08-01 55.49 57.25 55.00 57.05  57.05000   289000
str(sony)
## 'data.frame':    61 obs. of  7 variables:
##  $ Date     : chr  "2015-08-01" "2015-09-01" "2015-10-01" "2015-11-01" ...
##  $ Open     : num  25.5 25.1 24.8 28.5 26.2 ...
##  $ High     : num  26 26.9 29.1 29.1 26.4 ...
##  $ Low      : num  25.5 23.3 24.5 25.8 24 ...
##  $ Close    : num  25.9 25.4 28.4 25.9 24.6 ...
##  $ Adj.Close: num  25.1 24.6 27.6 25.2 23.9 ...
##  $ Volume   : int  1866700 43220300 24156900 13899900 25763800 45561800 27120900 32082900 39773500 35696500 ...
head(sony)
##         Date  Open  High   Low Close Adj.Close   Volume
## 1 2015-08-01 25.53 25.95 25.52 25.86  25.05767  1866700
## 2 2015-09-01 25.13 26.86 23.26 25.41  24.62163 43220300
## 3 2015-10-01 24.80 29.08 24.53 28.40  27.60809 24156900
## 4 2015-11-01 28.54 29.08 25.83 25.92  25.19724 13899900
## 5 2015-12-01 26.20 26.36 23.97 24.61  23.92377 25763800
## 6 2016-01-01 24.45 25.72 19.90 23.88  23.21413 45561800
tail(sony)
##          Date  Open  High   Low Close Adj.Close   Volume
## 56 2020-03-01 63.60 64.95 50.94 59.18  58.95736 38973100
## 57 2020-04-01 57.93 64.85 56.65 64.25  64.25000 31255800
## 58 2020-05-01 63.23 66.40 61.35 64.76  64.76000 26341100
## 59 2020-06-01 64.91 72.34 64.56 69.13  69.13000 29662000
## 60 2020-07-01 69.17 78.76 68.98 77.96  77.96000 21469000
## 61 2020-08-01 78.71 84.15 78.67 83.37  83.37000  3829900

#Creating our time series - VAR model style

ts(nintendo['Adj.Close']) %>% autoplot(series='nintendo')+
   ts(sony['Adj.Close']) %>% autolayer(series='sony') +xlab("Time") + ylab("Adjusting Closing Price in USD")

#As we can see both companies have the same tendencies; it appears that sony has the upper hand.

#Building a data set containing stock prices of both companies

apple = read.csv("C:\\Users\\student\\Desktop\\Predictive Analytics - Forecasting\\Discussion 6 - VAR model\\APPLE.csv")

price = cbind(apple['Adj.Close'],sony['Adj.Close']) #Adj.Close is our variable of interest

##For the previous code, the data sets did not have the same length and therefore, I had to use a different data set. 

names(price) = c('apple','sony')

price.var = VAR(price, type='const', lag.max=1, ic='AIC') #let's try lag 1.
price.var
## 
## VAR Estimation Results:
## ======================= 
## 
## Estimated coefficients for equation apple: 
## ========================================== 
## Call:
## apple = apple.l1 + sony.l1 + const 
## 
##   apple.l1    sony.l1      const 
##  1.1502023 -0.4113169 -2.3918840 
## 
## 
## Estimated coefficients for equation sony: 
## ========================================= 
## Call:
## sony = apple.l1 + sony.l1 + const 
## 
##   apple.l1    sony.l1      const 
## 0.04908489 0.78706918 1.81518838
#Reminder:
##For the previous code, the data sets (Nintendo) did not have the same length as Sony and therefore, I had to use a different data set(Apple).
##It is just to demonstrate that the code is working.

#Fitted values

ts(nintendo['Adj.Close']) %>% autoplot(series='nintendo')+
   ts(fitted(price.var)[,2]) %>% autolayer(series='nintendo.fit')

#When we enter the value 2 for the predictor, the mean response values are somehow simular to the actual data set.
ts(sony['Adj.Close']) %>% autoplot(series='sony')+
   ts(fitted(price.var)[,2]) %>% autolayer(series='sony.fit')

#When we enter the value 2 for the predictor, the mean response values are simular to the actual data set; it is actually fitting perfectly the data set. Inferring that eventually the predictions for sony may be better than nintendo.

#Selecting a VAR model

ts = ts(nintendo, frequency = 12, start = c(2015,8))
ts = ts[,-1]

train = as.ts(head(ts, n = 48))
test = as.ts(tail(ts, n = 12))
select  = VARselect(train[,1:3], lag.max=8,
  type="const")[["selection"]]
select
## AIC(n)  HQ(n)  SC(n) FPE(n) 
##      8      1      1      1
#Based on the results, I will fit a VAR(8) and a VAR(1) model

#VAR model (8)

var1 = VAR(train[,1:3], p=8, type="const")
serial.test(var1, lags.pt=10, type="PT.asymptotic")
## 
##  Portmanteau Test (asymptotic)
## 
## data:  Residuals of VAR object var1
## Chi-squared = 62.591, df = 18, p-value = 7.751e-07

#VAR model (1)

var2 = VAR(train[,1:3], p=1, type="const")
serial.test(var2, lags.pt=10, type="PT.asymptotic")
## 
##  Portmanteau Test (asymptotic)
## 
## data:  Residuals of VAR object var2
## Chi-squared = 85.917, df = 81, p-value = 0.3333
#The Var model (8) appears to be the best model

#Forecasting var model (8)

fore.var1 = forecast(var1,h=12)
autoplot(fore.var1)

checkresiduals(fore.var1$forecast$Open) #Le open est recommandé par ce programme.

#For some reasons it does not let met knit the Close version so I take the open version instead.
accuracy(fore.var1$forecast$Open, test[1])
##                        ME      RMSE       MAE        MPE     MAPE      MASE
## Training set 8.880632e-17 0.8969521 0.7035678 -0.1168626 1.864784 0.2627756
## Test set     6.094267e-01 0.6094267 0.6094267  1.2741515 1.274151 0.2276149
##                    ACF1
## Training set 0.03381969
## Test set             NA
accuracy(fore.var1$forecast$Open, test[2])
##                        ME      RMSE       MAE        MPE      MAPE       MASE
## Training set 8.880632e-17 0.8969521 0.7035678 -0.1168626 1.8647842 0.26277563
## Test set     1.094267e-01 0.1094267 0.1094267  0.2311994 0.2311994 0.04086978
##                    ACF1
## Training set 0.03381969
## Test set             NA
#The second test provides more accurate metrics since the margin of error is smaller.