library(quantmod)
library(dygraphs)
library(PerformanceAnalytics)
library(moments)
library(MASS)
library(metRology)
library(rugarch)
library(TSstudio)
library(forecast)
library(TSA)
library(aTSA)
library(fBasics)
library(bayesforecast)
library(lmtest)
GFBanorte <- getSymbols("GFNORTEO.MX", src="yahoo", auto.assign = FALSE) # Retreaving historical data for Grupo Financiero Banorte, S.A.B. de C.V.
GFBanorte <- GFBanorte[,4] # Getting the daily closed prices
GFBanorte <- na.omit(GFBanorte) # Removing NA'S
GFBanorte <- GFBanorte["2018-01-02/2023-06-30"] # Getting the data from 2018-01-02 to 2023-06-30
dygraph(GFBanorte, main = "Daily closing prices for Grupo Financiero Banorte Stock <br> January 2018-June 2023")
The plot shows that the value of this stock has increased during this period and it is possible to see a trend in the time series which could indicate that it is not stationary.
logret <- diff(log(GFBanorte$GFNORTEO.MX.Close))[-1] # Calculating the continuous returns
names(logret) <- "logret"
dygraph(logret, main = "Grupo Financiero Banorte Stock Daily Log Returns <br> January 2007-June 2023") # Visualizing the Daily Log Returns for the stock
The plot shows that there are clusters of volatility, that is high volatility days tend be followed by high volatility days and low volatility days tend to be followed by low volatility days.
Because the Value at Risk and Expected Shortfall are related to the shape of the returns distribution it’s important to check if the returns are normally distributed.
chart.Histogram(logret, methods = c("add.normal", "add.density"), colorset = c("gray", "blue", "red"))
legend("topright", legend = c("Hist-logret", "logret dist", "dnorm logret"), col = c("gray", "blue", "red"), lty = 1)
The plot shows that the empirical distribution of the logreturns have a higher peak in the middle and heavier tails in comparison to the normal distribution, with that in mind the VaR and Expected Shorfall could be underestimated assuming normality for the returns. With that in mind, the Jarque Bera test is calculated to test formally for normality:
rvec <- as.vector(logret)
jarque.test(rvec)
##
## Jarque-Bera Normality Test
##
## data: rvec
## JB = 923.63, p-value < 2.2e-16
## alternative hypothesis: greater
The result is confirmed by the Jarque-Bera Normality Test since the P-Value is less than the 5% significance level, we reject the null hypothesis that the data is normally distributed.
Since the distribution of the Daily Log Returns is not normally distributed we could try to estimate the VaR and ES using Monte Carlo Simulation using the empirical distribution fo the data as follows:
alpha <- 0.05
set.seed(1996)
rvec <- sample(as.vector(logret), 100000, replace = TRUE)
VaR <- quantile(rvec, alpha)
ES <- mean(rvec[rvec<VaR])
round(VaR, 6)
## 5%
## -0.037806
round(ES, 6)
## [1] -0.057109
# How money can a hedge fund lose if they invest 100 million dollars in the stock
100*(exp(VaR)-1)
## 5%
## -3.710043
100*(exp(ES)-1)
## [1] -5.550874
Simulating from the empirical distribution gives a Var of 3.78% and a Expected Shorfall of 5.71%, meaning that if a hedge fund invest 100 million dollars in the stock, it could expect to loose 3.71 million dollars 5% of the time. On the other hand, if the returns are less than the Value at Risk the expected loss is about 5.55 million dollars.
rvec <- as.vector(logret)
t.fit <- fitdistr(rvec, "t") # Estimating the parameters for a T Distribution Distribution
alpha <- 0.05
set.seed(1996)
rvec <- rt.scaled(100000, mean = t.fit$estimate[1], sd=t.fit$estimate[2], df=t.fit$estimate[3])
VaR <- quantile(rvec, alpha)
ES <- mean(rvec[rvec<VaR])
round(VaR,6)
## 5%
## -0.036144
round(ES, 6)
## [1] -0.053792
# How money can a hedge fund lose if they invest 100 million dollars in the stock
100*(exp(VaR)-1)
## 5%
## -3.549828
100*(exp(ES)-1)
## [1] -5.237074
Thus, simulating from a parametric distribution and using a 95% confidence level, we get a VaR of -3.61% an Expected Shortfall of -5.37%, meaning that if a hedge fund invest 100 million dollars in the stock, it could expect to loose 3.54 million dollars 5% of the time. On the other hand, if the returns are less than the Value at Risk the expected loss is about 5.23 million dollars.
ggAcf(logret, main = "Serial Correlation for Banorte Stock", plot =TRUE)
The plot shows that there is not strong evidence of serial correlation, meaning that is not possible to predict the direction of stock prices for this period in particular.
ggacf(abs(logret))
The Autocorrelation function of the absolute value of the log returns shows that log returns whether positive of negative tend to be followed by large returns. On the other hand, for test if there are ARCH effects an ARMA model is estimated as follows:
eacf(logret) # The extended acf for the time series suggest an ARMA(3,0)
## AR/MA
## 0 1 2 3 4 5 6 7 8 9 10 11 12 13
## 0 o o o o o o x o o o o o o o
## 1 x o o o o o x o o o o o o o
## 2 x x o o o o x o o o o o o o
## 3 x x x o o o x o x x o o o o
## 4 o x x o o o o o o o o o o o
## 5 o o x o x o o o o o o o o o
## 6 x x x x x o o o o o o o o o
## 7 o x x x x x o o o o o o o o
auto.arima(logret) # The auto.arima function suggest an ARMA(2,0,2)
## Series: logret
## ARIMA(0,0,0) with zero mean
##
## sigma^2 = 0.0005609: log likelihood = 3211.81
## AIC=-6421.63 AICc=-6421.62 BIC=-6416.4
m1 = arima(logret, order=c(3,0,3))
m1
##
## Call:
## arima(x = logret, order = c(3, 0, 3))
##
## Coefficients:
## ar1 ar2 ar3 ma1 ma2 ma3 intercept
## 0.5738 0.5727 -0.8430 -0.5845 -0.6238 0.8558 2e-04
## s.e. 0.1144 0.0685 0.1092 0.1085 0.0591 0.1143 6e-04
##
## sigma^2 estimated as 0.0005537: log likelihood = 3220.73, aic = -6427.47
coeftest(m1)
##
## z test of coefficients:
##
## Estimate Std. Error z value Pr(>|z|)
## ar1 0.57376340 0.11439141 5.0158 5.282e-07 ***
## ar2 0.57272183 0.06845522 8.3664 < 2.2e-16 ***
## ar3 -0.84296256 0.10921911 -7.7181 1.181e-14 ***
## ma1 -0.58453247 0.10850082 -5.3874 7.150e-08 ***
## ma2 -0.62379910 0.05906820 -10.5607 < 2.2e-16 ***
## ma3 0.85576698 0.11433726 7.4846 7.177e-14 ***
## intercept 0.00019231 0.00058901 0.3265 0.744
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
tsdiag(m1) # The residuals seems be correct since there´s no serial correlation in the residuals
arch.test(m1)
## ARCH heteroscedasticity test for residuals
## alternative: heteroscedastic
##
## Portmanteau-Q test:
## order PQ p.value
## [1,] 4 120 0
## [2,] 8 268 0
## [3,] 12 357 0
## [4,] 16 388 0
## [5,] 20 399 0
## [6,] 24 404 0
## Lagrange-Multiplier test:
## order LM p.value
## [1,] 4 1048 0.00e+00
## [2,] 8 441 0.00e+00
## [3,] 12 254 0.00e+00
## [4,] 16 186 0.00e+00
## [5,] 20 144 0.00e+00
## [6,] 24 117 1.28e-14
Since the P-Value is less than the 5% significance level, we reject the null hypothesis that the residuals are homocedastic even for higher lags of the residuals. With that in mind, a GARCH model is more appropriate to forecast the future volatility of the daily log returns.
ModelMean = list(armaOrder = c(3,3), include.mean=FALSE) # Specifying the mean equation
ModelVar = list(model = "sGARCH", garchOrder = c(1,1)) # Specifying the Variance equation
garch.N <- ugarchspec(variance.model = ModelVar, mean.model = ModelMean, distribution.model = "std")
fit.garch.N <- ugarchfit(spec=garch.N, data=logret)
fit.garch.N
##
## *---------------------------------*
## * GARCH Model Fit *
## *---------------------------------*
##
## Conditional Variance Dynamics
## -----------------------------------
## GARCH Model : sGARCH(1,1)
## Mean Model : ARFIMA(3,0,3)
## Distribution : std
##
## Optimal Parameters
## ------------------------------------
## Estimate Std. Error t value Pr(>|t|)
## ar1 -0.243033 0.044677 -5.4398 0.000000
## ar2 0.521313 0.024708 21.0993 0.000000
## ar3 -0.195704 0.030529 -6.4105 0.000000
## ma1 0.213044 0.034301 6.2111 0.000000
## ma2 -0.597582 0.002575 -232.0690 0.000000
## ma3 0.165992 0.024333 6.8216 0.000000
## omega 0.000035 0.000012 2.8843 0.003923
## alpha1 0.135863 0.031157 4.3605 0.000013
## beta1 0.804819 0.043206 18.6275 0.000000
## shape 6.326925 1.066181 5.9342 0.000000
##
## Robust Standard Errors:
## Estimate Std. Error t value Pr(>|t|)
## ar1 -0.243033 0.031798 -7.6431 0.000000
## ar2 0.521313 0.026023 20.0326 0.000000
## ar3 -0.195704 0.024753 -7.9064 0.000000
## ma1 0.213044 0.012115 17.5854 0.000000
## ma2 -0.597582 0.002660 -224.6198 0.000000
## ma3 0.165992 0.008215 20.2064 0.000000
## omega 0.000035 0.000014 2.4761 0.013282
## alpha1 0.135863 0.037686 3.6051 0.000312
## beta1 0.804819 0.053477 15.0499 0.000000
## shape 6.326925 1.146686 5.5176 0.000000
##
## LogLikelihood : 3353.808
##
## Information Criteria
## ------------------------------------
##
## Akaike -4.8391
## Bayes -4.8012
## Shibata -4.8392
## Hannan-Quinn -4.8249
##
## Weighted Ljung-Box Test on Standardized Residuals
## ------------------------------------
## statistic p-value
## Lag[1] 0.2283 0.6328
## Lag[2*(p+q)+(p+q)-1][17] 5.8386 1.0000
## Lag[4*(p+q)+(p+q)-1][29] 10.4740 0.9548
## d.o.f=6
## H0 : No serial correlation
##
## Weighted Ljung-Box Test on Standardized Squared Residuals
## ------------------------------------
## statistic p-value
## Lag[1] 0.1198 0.7293
## Lag[2*(p+q)+(p+q)-1][5] 0.9561 0.8695
## Lag[4*(p+q)+(p+q)-1][9] 3.7335 0.6343
## d.o.f=2
##
## Weighted ARCH LM Tests
## ------------------------------------
## Statistic Shape Scale P-Value
## ARCH Lag[3] 0.1144 0.500 2.000 0.7352
## ARCH Lag[5] 1.6316 1.440 1.667 0.5584
## ARCH Lag[7] 4.3342 2.315 1.543 0.3009
##
## Nyblom stability test
## ------------------------------------
## Joint Statistic: 2.3179
## Individual Statistics:
## ar1 0.25404
## ar2 0.16276
## ar3 0.07243
## ma1 0.15594
## ma2 0.10522
## ma3 0.04806
## omega 0.52452
## alpha1 0.24756
## beta1 0.32956
## shape 0.49340
##
## Asymptotic Critical Values (10% 5% 1%)
## Joint Statistic: 2.29 2.54 3.05
## Individual Statistic: 0.35 0.47 0.75
##
## Sign Bias Test
## ------------------------------------
## t-value prob sig
## Sign Bias 0.4453 0.6562
## Negative Sign Bias 0.7018 0.4829
## Positive Sign Bias 0.4103 0.6816
## Joint Effect 0.6932 0.8748
##
##
## Adjusted Pearson Goodness-of-Fit Test:
## ------------------------------------
## group statistic p-value(g-1)
## 1 20 26.71 0.11154
## 2 30 39.94 0.08494
## 3 40 50.24 0.10716
## 4 50 56.93 0.20384
##
##
## Elapsed time : 0.513335
save1 <- cbind( logret[,1], fit.garch.N@fit$sigma, fit.garch.N@fit$z )
names(save1) <- c( "logret", "s", "z" )
ggAcf(save1$z, main = "Serial Correlation for the residuals", plot =TRUE)
ggAcf(abs(save1$z), main = "Serial Correlation for the absolute value of the residuals", plot =TRUE)
As we can see the GARCH model has captured the volatility clustering observed in the data that result is confirmed by the Ljung-Box Test on the Standardized Residuals showed in the output of the GARCH Model, since there is not serial correlation in the residuals the model can be trusted.
meanReturns <- fitted(fit.garch.N)
autoplot(meanReturns)
volatility <- sigma(fit.garch.N)
autoplot(volatility)
forc1<-ugarchforecast(fitORspec = fit.garch.N, n.ahead=50)
plot(forc1, which=1)
plot(forc1, which=3)
# Simulation of the volatility
plot(fit.garch.N, which = "all")
##
## please wait...calculating quantiles...
meanGFB<-fitted(fit.garch.N)
plot(meanGFB)
VolatilityGFB<-sigma(fit.garch.N)
windows(width = 10, height = 8)
simulation <- ugarchsim(fit.garch.N, n.sim=100, m.sim =25,
startMethod = "sample")
plot(simulation, which = "all", cex=0.05)