The two stocks i decided to work on this week are Nestle and Unilever. Source: Yahoo Finance
#Packages
library(readr)
library(ggplot2)
library(forecast)
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
library(tseries)
library(vars)
## Loading required package: MASS
## Loading required package: strucchange
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
## Loading required package: sandwich
## Loading required package: urca
## Loading required package: lmtest
Importing Data
#Monthly frequency: 08/04/2015-08/04/2020
#Nestle
Nest <- read.csv("/Users/dorothymensah/Desktop/NESTLE.csv")
str(Nest)
## 'data.frame': 61 obs. of 7 variables:
## $ Date : Factor w/ 61 levels "2015-08-01","2015-09-01",..: 1 2 3 4 5 6 7 8 9 10 ...
## $ Open : num 73.9 72.4 75.5 76.1 74.1 ...
## $ High : num 74.2 76.9 79.4 76.7 76.1 ...
## $ Low : num 73.2 72.1 74.6 72.6 72.2 ...
## $ Close : num 73.6 75.2 76.2 73.9 74.4 ...
## $ Adj.Close: num 63.7 65.1 65.9 64 64.4 ...
## $ Volume : int 950900 10645300 8623100 6782700 9049700 13460500 13297900 12406100 13215300 10262900 ...
#Unilever
Un <- read.csv("/Users/dorothymensah/Desktop//UN.csv")
str(Un)
## 'data.frame': 60 obs. of 7 variables:
## $ Date : Factor w/ 60 levels "2015-09-01","2015-10-01",..: 1 2 3 4 5 6 7 8 9 10 ...
## $ Open : num 39.2 40.4 45.4 43.8 42.2 ...
## $ High : num 41 46.3 45.5 44.5 44.5 ...
## $ Low : num 37.9 39.9 42.6 42 40 ...
## $ Close : num 40.2 45 43.7 43.3 44.4 ...
## $ Adj.Close: num 34.6 38.7 37.9 37.5 38.5 ...
## $ Volume : int 49330600 46840100 38221600 42093300 48549800 54009500 44953600 39851500 34445700 65260500 ...
#DATA
#Time series for adj.close variable
Nest.ts <- ts(Nest$Adj.Close,frequency = 12, start = c(2015,1))
Un.ts <- ts(Un$Adj.Close,frequency = 12, start = c(2015,1))
##Plotting
ts(Nest['Adj.Close']) %>% autoplot(series='Nestle')+
ts(Un['Adj.Close']) %>% autolayer(series='Unilever')
##Though the two stock brands are somewhat correlated in terms of what they produce, we see Nestle showing a greater increase in trend compared to Unilever.
###Residuals
checkresiduals(Nest.ts)
## Warning in modeldf.default(object): Could not find appropriate degrees of
## freedom for this model.
checkresiduals(Un.ts)
## Warning in modeldf.default(object): Could not find appropriate degrees of
## freedom for this model.
VAR MODEL
##Combining both data sets into one
v1 <- cbind(Nest$Adj.Close , Un$Adj.Close)
## Warning in cbind(Nest$Adj.Close, Un$Adj.Close): number of rows of result is not
## a multiple of vector length (arg 2)
names(v1) <- c("Neslte","Unilever")
##in order for me to forecast, i think i have to turn v1 into a ts function
v1.ts <- ts(v1, frequency = 12, start = c(2013,4))
Using Varselect to determine which VAR is the best fit for the data set
select = VARselect(v1.ts, lag.max=8,
type="const")[["selection"]]
select
## AIC(n) HQ(n) SC(n) FPE(n)
## 2 2 2 2
#Based on the results above, it looks like VAR (2) is the best fit
var1 <- VAR(v1.ts, p=2,type = "const")
summary(var1)
##
## VAR Estimation Results:
## =========================
## Endogenous variables: Series.1, Series.2
## Deterministic variables: const
## Sample size: 59
## Log Likelihood: -303.188
## Roots of the characteristic polynomial:
## 0.967 0.967 0.2412 0.2412
## Call:
## VAR(y = v1.ts, p = 2, type = "const")
##
##
## Estimation results for equation Series.1:
## =========================================
## Series.1 = Series.1.l1 + Series.2.l1 + Series.1.l2 + Series.2.l2 + const
##
## Estimate Std. Error t value Pr(>|t|)
## Series.1.l1 1.01191 0.11587 8.733 6.58e-12 ***
## Series.2.l1 0.84585 0.15984 5.292 2.26e-06 ***
## Series.1.l2 -0.01763 0.11570 -0.152 0.879
## Series.2.l2 -0.76086 0.16128 -4.718 1.73e-05 ***
## const -3.15234 2.44334 -1.290 0.202
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
##
## Residual standard error: 2.723 on 54 degrees of freedom
## Multiple R-Squared: 0.9744, Adjusted R-squared: 0.9725
## F-statistic: 514.6 on 4 and 54 DF, p-value: < 2.2e-16
##
##
## Estimation results for equation Series.2:
## =========================================
## Series.2 = Series.1.l1 + Series.2.l1 + Series.1.l2 + Series.2.l2 + const
##
## Estimate Std. Error t value Pr(>|t|)
## Series.1.l1 -0.14348 0.17237 -0.832 0.408877
## Series.2.l1 0.92292 0.23780 3.881 0.000285 ***
## Series.1.l2 0.07313 0.17212 0.425 0.672614
## Series.2.l2 0.06911 0.23993 0.288 0.774417
## const 6.12339 3.63492 1.685 0.097838 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
##
## Residual standard error: 4.051 on 54 degrees of freedom
## Multiple R-Squared: 0.7373, Adjusted R-squared: 0.7179
## F-statistic: 37.9 on 4 and 54 DF, p-value: 4.407e-15
##
##
##
## Covariance matrix of residuals:
## Series.1 Series.2
## Series.1 7.415 1.65
## Series.2 1.650 16.41
##
## Correlation matrix of residuals:
## Series.1 Series.2
## Series.1 1.0000 0.1495
## Series.2 0.1495 1.0000
##diagram of fit and residuals for both time series
plot(var1)
FORECAST
var1.fc <- forecast(var1, h=12 )
var1.fc
## Series.1
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## May 2018 98.79872 95.30892 102.2885 93.46153 104.1359
## Jun 2018 96.92192 89.95260 103.8913 86.26326 107.5806
## Jul 2018 96.44026 87.33097 105.5495 82.50881 110.3717
## Aug 2018 94.90212 84.22347 105.5808 78.57053 111.2337
## Sep 2018 93.30672 81.30184 105.3116 74.94684 111.6666
## Oct 2018 91.80948 78.65105 104.9679 71.68539 111.9336
## Nov 2018 90.35972 76.18872 104.5307 68.68705 112.0324
## Dec 2018 88.95406 73.88740 104.0207 65.91159 111.9965
## Jan 2019 87.60072 71.73826 103.4632 63.34118 111.8603
## Feb 2019 86.30456 69.73384 102.8753 60.96183 111.6473
## Mar 2019 85.06912 67.86810 102.2701 58.76243 111.3758
## Apr 2019 83.89750 66.13630 101.6587 56.73409 111.0609
##
## Series.2
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## May 2018 33.69203 28.50030 38.88377 25.75196 41.63210
## Jun 2018 34.15842 27.12663 41.19021 23.40422 44.91262
## Jul 2018 33.29660 25.15415 41.43906 20.84379 45.74942
## Aug 2018 32.46531 23.47972 41.45089 18.72304 46.20758
## Sep 2018 31.82399 22.17229 41.47569 17.06299 46.58500
## Oct 2018 31.29107 21.10855 41.47360 15.71824 46.86390
## Nov 2018 30.85305 20.24239 41.46372 14.62544 47.08067
## Dec 2018 30.51048 19.55054 41.47043 13.74869 47.27227
## Jan 2019 30.25970 19.01154 41.50785 13.05712 47.46227
## Feb 2019 30.09594 18.60668 41.58520 12.52464 47.66724
## Mar 2019 30.01447 18.31999 41.70895 12.12931 47.89963
## Apr 2019 30.01043 18.13745 41.88340 11.85228 48.16857
autoplot(var1.fc)
Both graphs look somewhat similar however, Unilever (series2) seems to forecast poorly which we also observed in the diagram of fit; corresponding with the residual graphs from earlier as well.