Loading data and libraries

library(quantmod)
library(dygraphs)
library(PerformanceAnalytics)
library(moments)
library(MASS)
library(metRology)
library(rugarch)
library(TSstudio)
library(forecast)
library(TSA)
library(aTSA)
library(fBasics)
library(bayesforecast)
library(lmtest)
GFBanorte <- getSymbols("GFNORTEO.MX", src="yahoo", auto.assign = FALSE) # Retreaving historical data for Grupo Financiero Banorte, S.A.B. de C.V.

Preparing and visualizing the data

GFBanorte <- GFBanorte[,4] # Getting the daily closed prices
GFBanorte <- na.omit(GFBanorte) # Removing NA'S
GFBanorte <- GFBanorte["2018-01-02/2023-06-30"] # Getting the data from 2018-01-02 to 2023-06-30
dygraph(GFBanorte, main = "Daily closing prices for Grupo Financiero Banorte Stock <br> January 2018-June 2023")

The plot shows that the value of this stock has increased during this period and it is possible to see a trend in the time series which could indicate that it is not stationary.

Calculating and analyzing the returns for the stock

logret <- diff(log(GFBanorte$GFNORTEO.MX.Close))[-1] # Calculating the continuous returns
names(logret) <- "logret"
dygraph(logret, main = "Grupo Financiero Banorte Stock Daily Log Returns <br> January 2007-June 2023") # Visualizing the Daily Log Returns for the stock

The plot shows that there are clusters of volatility, that is high volatility days tend be followed by high volatility days and low volatility days tend to be followed by low volatility days.

Calculating basic statistics for the logreturns

Because the Value at Risk and Expected Shortfall are related to the shape of the returns distribution it’s important to check if the returns are normally distributed.

chart.Histogram(logret, methods = c("add.normal", "add.density"), colorset = c("gray", "blue", "red"))
legend("topright", legend = c("Hist-logret", "logret dist", "dnorm logret"), col = c("gray", "blue", "red"), lty = 1)

The plot shows that the empirical distribution of the logreturns have a higher peak in the middle and heavier tails in comparison to the normal distribution, with that in mind the VaR and Expected Shorfall could be underestimated assuming normality for the returns. With that in mind, the Jarque Bera test is calculated to test formally for normality:

rvec <- as.vector(logret)
jarque.test(rvec)
## 
##  Jarque-Bera Normality Test
## 
## data:  rvec
## JB = 923.63, p-value < 2.2e-16
## alternative hypothesis: greater

The result is confirmed by the Jarque-Bera Normality Test since the P-Value is less than the 5% significance level, we reject the null hypothesis that the data is normally distributed.

Calculating VaR and Expected Shorfall with Monte Carlo Simulation using the empirical distribution

Since the distribution of the Daily Log Returns is not normally distributed we could try to estimate the VaR and ES using Monte Carlo Simulation using the empirical distribution fo the data as follows:

alpha <- 0.05
set.seed(1996)
rvec <- sample(as.vector(logret), 100000, replace = TRUE)
VaR <- quantile(rvec, alpha)
ES <- mean(rvec[rvec<VaR])
round(VaR, 6)
##        5% 
## -0.037806
round(ES, 6)
## [1] -0.057109
# How money can a hedge fund lose if  they invest 100 million dollars in the stock
100*(exp(VaR)-1)
##        5% 
## -3.710043
100*(exp(ES)-1)
## [1] -5.550874

Simulating from the empirical distribution gives a Var of 3.78% and a Expected Shorfall of 5.71%, meaning that if a hedge fund invest 100 million dollars in the stock, it could expect to loose 3.71 million dollars 5% of the time. On the other hand, if the returns are less than the Value at Risk the expected loss is about 5.55 million dollars.

Calculating the Var and ES with Monte Carlo Simulation using a parametric distribution

rvec <- as.vector(logret)
t.fit <- fitdistr(rvec, "t") # Estimating the parameters for a T Distribution Distribution
alpha <- 0.05
set.seed(1996)
rvec <- rt.scaled(100000, mean = t.fit$estimate[1], sd=t.fit$estimate[2], df=t.fit$estimate[3])
VaR <- quantile(rvec, alpha)
ES <- mean(rvec[rvec<VaR])
round(VaR,6)
##        5% 
## -0.036144
round(ES, 6)
## [1] -0.053792
# How money can a hedge fund lose if  they invest 100 million dollars in the stock
100*(exp(VaR)-1)
##        5% 
## -3.549828
100*(exp(ES)-1)
## [1] -5.237074

Thus, simulating from a parametric distribution and using a 95% confidence level, we get a VaR of -3.61% an Expected Shortfall of -5.37%, meaning that if a hedge fund invest 100 million dollars in the stock, it could expect to loose 3.54 million dollars 5% of the time. On the other hand, if the returns are less than the Value at Risk the expected loss is about 5.23 million dollars.

Serial correlation, Volatility clustering and GARCH Model for banorte stock

ggAcf(logret, main = "Serial Correlation for Banorte Stock", plot =TRUE)

The plot shows that there is not strong evidence of serial correlation, meaning that is not possible to predict the direction of stock prices for this period in particular.

Testing for ARCH Effects

ggacf(abs(logret))

The Autocorrelation function of the absolute value of the log returns shows that log returns whether positive of negative tend to be followed by large returns. On the other hand, for test if there are ARCH effects an ARMA model is estimated as follows:

eacf(logret)      # The extended acf for the time series suggest an ARMA(3,0)
## AR/MA
##   0 1 2 3 4 5 6 7 8 9 10 11 12 13
## 0 o o o o o o x o o o o  o  o  o 
## 1 x o o o o o x o o o o  o  o  o 
## 2 x x o o o o x o o o o  o  o  o 
## 3 x x x o o o x o x x o  o  o  o 
## 4 o x x o o o o o o o o  o  o  o 
## 5 o o x o x o o o o o o  o  o  o 
## 6 x x x x x o o o o o o  o  o  o 
## 7 o x x x x x o o o o o  o  o  o
auto.arima(logret) # The auto.arima function suggest an ARMA(2,0,2)
## Series: logret 
## ARIMA(0,0,0) with zero mean 
## 
## sigma^2 = 0.0005609:  log likelihood = 3211.81
## AIC=-6421.63   AICc=-6421.62   BIC=-6416.4
m1 =  arima(logret, order=c(3,0,3))
m1
## 
## Call:
## arima(x = logret, order = c(3, 0, 3))
## 
## Coefficients:
##          ar1     ar2      ar3      ma1      ma2     ma3  intercept
##       0.5738  0.5727  -0.8430  -0.5845  -0.6238  0.8558      2e-04
## s.e.  0.1144  0.0685   0.1092   0.1085   0.0591  0.1143      6e-04
## 
## sigma^2 estimated as 0.0005537:  log likelihood = 3220.73,  aic = -6427.47
coeftest(m1)
## 
## z test of coefficients:
## 
##              Estimate  Std. Error  z value  Pr(>|z|)    
## ar1        0.57376340  0.11439141   5.0158 5.282e-07 ***
## ar2        0.57272183  0.06845522   8.3664 < 2.2e-16 ***
## ar3       -0.84296256  0.10921911  -7.7181 1.181e-14 ***
## ma1       -0.58453247  0.10850082  -5.3874 7.150e-08 ***
## ma2       -0.62379910  0.05906820 -10.5607 < 2.2e-16 ***
## ma3        0.85576698  0.11433726   7.4846 7.177e-14 ***
## intercept  0.00019231  0.00058901   0.3265     0.744    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
tsdiag(m1) # The residuals seems be correct since there´s no serial correlation in the residuals 

arch.test(m1)
## ARCH heteroscedasticity test for residuals 
## alternative: heteroscedastic 
## 
## Portmanteau-Q test: 
##      order  PQ p.value
## [1,]     4 120       0
## [2,]     8 268       0
## [3,]    12 357       0
## [4,]    16 388       0
## [5,]    20 399       0
## [6,]    24 404       0
## Lagrange-Multiplier test: 
##      order   LM  p.value
## [1,]     4 1048 0.00e+00
## [2,]     8  441 0.00e+00
## [3,]    12  254 0.00e+00
## [4,]    16  186 0.00e+00
## [5,]    20  144 0.00e+00
## [6,]    24  117 1.28e-14

Since the P-Value is less than the 5% significance level, we reject the null hypothesis that the residuals are homocedastic even for higher lags of the residuals. With that in mind, a GARCH model is more appropriate to forecast the future volatility of the daily log returns.

Especifying a GARCH Model

ModelMean = list(armaOrder = c(3,3), include.mean=FALSE) # Specifying the mean equation
ModelVar = list(model = "sGARCH", garchOrder = c(1,1)) # Specifying the Variance equation
garch.N <- ugarchspec(variance.model = ModelVar, mean.model = ModelMean, distribution.model = "std")
fit.garch.N <- ugarchfit(spec=garch.N, data=logret)
fit.garch.N
## 
## *---------------------------------*
## *          GARCH Model Fit        *
## *---------------------------------*
## 
## Conditional Variance Dynamics    
## -----------------------------------
## GARCH Model  : sGARCH(1,1)
## Mean Model   : ARFIMA(3,0,3)
## Distribution : std 
## 
## Optimal Parameters
## ------------------------------------
##         Estimate  Std. Error   t value Pr(>|t|)
## ar1    -0.243033    0.044677   -5.4398 0.000000
## ar2     0.521313    0.024708   21.0993 0.000000
## ar3    -0.195704    0.030529   -6.4105 0.000000
## ma1     0.213044    0.034301    6.2111 0.000000
## ma2    -0.597582    0.002575 -232.0690 0.000000
## ma3     0.165992    0.024333    6.8216 0.000000
## omega   0.000035    0.000012    2.8843 0.003923
## alpha1  0.135863    0.031157    4.3605 0.000013
## beta1   0.804819    0.043206   18.6275 0.000000
## shape   6.326925    1.066181    5.9342 0.000000
## 
## Robust Standard Errors:
##         Estimate  Std. Error   t value Pr(>|t|)
## ar1    -0.243033    0.031798   -7.6431 0.000000
## ar2     0.521313    0.026023   20.0326 0.000000
## ar3    -0.195704    0.024753   -7.9064 0.000000
## ma1     0.213044    0.012115   17.5854 0.000000
## ma2    -0.597582    0.002660 -224.6198 0.000000
## ma3     0.165992    0.008215   20.2064 0.000000
## omega   0.000035    0.000014    2.4761 0.013282
## alpha1  0.135863    0.037686    3.6051 0.000312
## beta1   0.804819    0.053477   15.0499 0.000000
## shape   6.326925    1.146686    5.5176 0.000000
## 
## LogLikelihood : 3353.808 
## 
## Information Criteria
## ------------------------------------
##                     
## Akaike       -4.8391
## Bayes        -4.8012
## Shibata      -4.8392
## Hannan-Quinn -4.8249
## 
## Weighted Ljung-Box Test on Standardized Residuals
## ------------------------------------
##                          statistic p-value
## Lag[1]                      0.2283  0.6328
## Lag[2*(p+q)+(p+q)-1][17]    5.8386  1.0000
## Lag[4*(p+q)+(p+q)-1][29]   10.4740  0.9548
## d.o.f=6
## H0 : No serial correlation
## 
## Weighted Ljung-Box Test on Standardized Squared Residuals
## ------------------------------------
##                         statistic p-value
## Lag[1]                     0.1198  0.7293
## Lag[2*(p+q)+(p+q)-1][5]    0.9561  0.8695
## Lag[4*(p+q)+(p+q)-1][9]    3.7335  0.6343
## d.o.f=2
## 
## Weighted ARCH LM Tests
## ------------------------------------
##             Statistic Shape Scale P-Value
## ARCH Lag[3]    0.1144 0.500 2.000  0.7352
## ARCH Lag[5]    1.6316 1.440 1.667  0.5584
## ARCH Lag[7]    4.3342 2.315 1.543  0.3009
## 
## Nyblom stability test
## ------------------------------------
## Joint Statistic:  2.3179
## Individual Statistics:              
## ar1    0.25404
## ar2    0.16276
## ar3    0.07243
## ma1    0.15594
## ma2    0.10522
## ma3    0.04806
## omega  0.52452
## alpha1 0.24756
## beta1  0.32956
## shape  0.49340
## 
## Asymptotic Critical Values (10% 5% 1%)
## Joint Statistic:          2.29 2.54 3.05
## Individual Statistic:     0.35 0.47 0.75
## 
## Sign Bias Test
## ------------------------------------
##                    t-value   prob sig
## Sign Bias           0.4453 0.6562    
## Negative Sign Bias  0.7018 0.4829    
## Positive Sign Bias  0.4103 0.6816    
## Joint Effect        0.6932 0.8748    
## 
## 
## Adjusted Pearson Goodness-of-Fit Test:
## ------------------------------------
##   group statistic p-value(g-1)
## 1    20     26.71      0.11154
## 2    30     39.94      0.08494
## 3    40     50.24      0.10716
## 4    50     56.93      0.20384
## 
## 
## Elapsed time : 0.513335

Dianostics test

save1 <- cbind( logret[,1], fit.garch.N@fit$sigma, fit.garch.N@fit$z )
names(save1) <- c( "logret", "s", "z" ) 
ggAcf(save1$z, main = "Serial Correlation for the residuals", plot =TRUE)

ggAcf(abs(save1$z), main = "Serial Correlation for the absolute value of the residuals", plot =TRUE)

As we can see the GARCH model has captured the volatility clustering observed in the data that result is confirmed by the Ljung-Box Test on the Standardized Residuals showed in the output of the GARCH Model, since there is not serial correlation in the residuals the model can be trusted.

meanReturns <- fitted(fit.garch.N)
autoplot(meanReturns)

volatility <- sigma(fit.garch.N)
autoplot(volatility)

Forecast of the volatility of the next 15 days

forc1<-ugarchforecast(fitORspec = fit.garch.N, n.ahead=50)
plot(forc1, which=1)

plot(forc1, which=3)

# Simulation of the volatility

plot(fit.garch.N, which = "all")
## 
## please wait...calculating quantiles...

meanGFB<-fitted(fit.garch.N)
plot(meanGFB)

VolatilityGFB<-sigma(fit.garch.N)

windows(width = 10, height = 8)
simulation <- ugarchsim(fit.garch.N, n.sim=100, m.sim =25,
                        startMethod = "sample")
plot(simulation, which = "all", cex=0.05)