For this assignments, I used the data sets from Nintendo and Sony:
Both companies are famous for their high-quality video games on the Nintendo Switch for Nintendo and the Playstation 4 for Sony. Since they are direct competitors, they must certainly have a lot of similarties since they target the same audience(the gamers).
#Reading our data sets
#Summary of our data sets
str(nintendo)
## 'data.frame': 60 obs. of 7 variables:
## $ Date : chr "2015-09-01" "2015-10-01" "2015-11-01" "2015-12-01" ...
## $ Open : num 24.1 21.5 20 19.4 17.2 ...
## $ High : num 25 24.9 21 19.7 17.6 ...
## $ Low : num 20.2 20 19 16.6 15.3 ...
## $ Close : num 21 20.2 19.1 17.3 17.6 ...
## $ Adj.Close: num 19.2 18.5 17.5 15.8 16.1 ...
## $ Volume : int 1292500 3185600 890500 1353700 2312000 2475500 1536000 1484400 1235300 1022700 ...
head(nintendo)
## Date Open High Low Close Adj.Close Volume
## 1 2015-09-01 24.10 24.95 20.21 20.97 19.16429 1292500
## 2 2015-10-01 21.52 24.88 20.05 20.22 18.50571 3185600
## 3 2015-11-01 20.02 21.03 19.05 19.13 17.50812 890500
## 4 2015-12-01 19.42 19.67 16.61 17.26 15.79667 1353700
## 5 2016-01-01 17.24 17.58 15.34 17.57 16.08038 2312000
## 6 2016-02-01 17.75 18.80 16.06 17.39 15.91564 2475500
tail(nintendo)
## Date Open High Low Close Adj.Close Volume
## 55 2020-03-01 42.01 48.75 35.82 48.28 47.27861 9859900
## 56 2020-04-01 47.62 55.92 47.61 51.44 51.44000 10586000
## 57 2020-05-01 51.82 54.84 50.50 50.84 50.84000 6537600
## 58 2020-06-01 51.30 58.95 50.43 55.90 55.90000 10004600
## 59 2020-07-01 55.58 60.00 54.72 55.01 55.01000 3932800
## 60 2020-08-01 55.49 57.25 55.00 57.05 57.05000 289000
str(sony)
## 'data.frame': 61 obs. of 7 variables:
## $ Date : chr "2015-08-01" "2015-09-01" "2015-10-01" "2015-11-01" ...
## $ Open : num 25.5 25.1 24.8 28.5 26.2 ...
## $ High : num 26 26.9 29.1 29.1 26.4 ...
## $ Low : num 25.5 23.3 24.5 25.8 24 ...
## $ Close : num 25.9 25.4 28.4 25.9 24.6 ...
## $ Adj.Close: num 25.1 24.6 27.6 25.2 23.9 ...
## $ Volume : int 1866700 43220300 24156900 13899900 25763800 45561800 27120900 32082900 39773500 35696500 ...
head(sony)
## Date Open High Low Close Adj.Close Volume
## 1 2015-08-01 25.53 25.95 25.52 25.86 25.05767 1866700
## 2 2015-09-01 25.13 26.86 23.26 25.41 24.62163 43220300
## 3 2015-10-01 24.80 29.08 24.53 28.40 27.60809 24156900
## 4 2015-11-01 28.54 29.08 25.83 25.92 25.19724 13899900
## 5 2015-12-01 26.20 26.36 23.97 24.61 23.92377 25763800
## 6 2016-01-01 24.45 25.72 19.90 23.88 23.21413 45561800
tail(sony)
## Date Open High Low Close Adj.Close Volume
## 56 2020-03-01 63.60 64.95 50.94 59.18 58.95736 38973100
## 57 2020-04-01 57.93 64.85 56.65 64.25 64.25000 31255800
## 58 2020-05-01 63.23 66.40 61.35 64.76 64.76000 26341100
## 59 2020-06-01 64.91 72.34 64.56 69.13 69.13000 29662000
## 60 2020-07-01 69.17 78.76 68.98 77.96 77.96000 21469000
## 61 2020-08-01 78.71 84.15 78.67 83.37 83.37000 3829900
#Creating our time series - VAR model style
ts(nintendo['Adj.Close']) %>% autoplot(series='nintendo')+
ts(sony['Adj.Close']) %>% autolayer(series='sony') +xlab("Time") + ylab("Adjusting Closing Price in USD")
#As we can see both companies have the same tendencies; it appears that sony has the upper hand.
#Building a data set containing stock prices of both companies
apple = read.csv("C:\\Users\\student\\Desktop\\Predictive Analytics - Forecasting\\Discussion 6 - VAR model\\APPLE.csv")
price = cbind(apple['Adj.Close'],sony['Adj.Close']) #Adj.Close is our variable of interest
##For the previous code, the data sets did not have the same length and therefore, I had to use a different data set.
names(price) = c('apple','sony')
price.var = VAR(price, type='const', lag.max=1, ic='AIC') #let's try lag 1.
price.var
##
## VAR Estimation Results:
## =======================
##
## Estimated coefficients for equation apple:
## ==========================================
## Call:
## apple = apple.l1 + sony.l1 + const
##
## apple.l1 sony.l1 const
## 1.1502023 -0.4113169 -2.3918840
##
##
## Estimated coefficients for equation sony:
## =========================================
## Call:
## sony = apple.l1 + sony.l1 + const
##
## apple.l1 sony.l1 const
## 0.04908489 0.78706918 1.81518838
#Reminder:
##For the previous code, the data sets (Nintendo) did not have the same length as Sony and therefore, I had to use a different data set(Apple).
##It is just to demonstrate that the code is working.
#Fitted values
ts(nintendo['Adj.Close']) %>% autoplot(series='nintendo')+
ts(fitted(price.var)[,2]) %>% autolayer(series='nintendo.fit')
#When we enter the value 2 for the predictor, the mean response values are somehow simular to the actual data set.
ts(sony['Adj.Close']) %>% autoplot(series='sony')+
ts(fitted(price.var)[,2]) %>% autolayer(series='sony.fit')
#When we enter the value 2 for the predictor, the mean response values are simular to the actual data set; it is actually fitting perfectly the data set. Inferring that eventually the predictions for sony may be better than nintendo.
#Selecting a VAR model
ts = ts(nintendo, frequency = 12, start = c(2015,8))
ts = ts[,-1]
train = as.ts(head(ts, n = 48))
test = as.ts(tail(ts, n = 12))
select = VARselect(train[,1:3], lag.max=8,
type="const")[["selection"]]
select
## AIC(n) HQ(n) SC(n) FPE(n)
## 8 1 1 1
#Based on the results, I will fit a VAR(8) and a VAR(1) model
#VAR model (8)
var1 = VAR(train[,1:3], p=8, type="const")
serial.test(var1, lags.pt=10, type="PT.asymptotic")
##
## Portmanteau Test (asymptotic)
##
## data: Residuals of VAR object var1
## Chi-squared = 62.591, df = 18, p-value = 7.751e-07
#VAR model (1)
var2 = VAR(train[,1:3], p=1, type="const")
serial.test(var2, lags.pt=10, type="PT.asymptotic")
##
## Portmanteau Test (asymptotic)
##
## data: Residuals of VAR object var2
## Chi-squared = 85.917, df = 81, p-value = 0.3333
#The Var model (8) appears to be the best model
#Forecasting var model (8)
fore.var1 = forecast(var1,h=12)
autoplot(fore.var1)
checkresiduals(fore.var1$forecast$Open) #Le open est recommandé par ce programme.
#For some reasons it does not let met knit the Close version so I take the open version instead.
accuracy(fore.var1$forecast$Open, test[1])
## ME RMSE MAE MPE MAPE MASE
## Training set 8.880632e-17 0.8969521 0.7035678 -0.1168626 1.864784 0.2627756
## Test set 6.094267e-01 0.6094267 0.6094267 1.2741515 1.274151 0.2276149
## ACF1
## Training set 0.03381969
## Test set NA
accuracy(fore.var1$forecast$Open, test[2])
## ME RMSE MAE MPE MAPE MASE
## Training set 8.880632e-17 0.8969521 0.7035678 -0.1168626 1.8647842 0.26277563
## Test set 1.094267e-01 0.1094267 0.1094267 0.2311994 0.2311994 0.04086978
## ACF1
## Training set 0.03381969
## Test set NA
#The second test provides more accurate metrics since the margin of error is smaller.