We install all the packages that we will need.

library(xts)
library(readxl)
library(quantmod)
library(zoo)
library(tseries)
library(forecast)
library(astsa)
library(lmtest)
library(rugarch)
library(moments)
library(astsa)
library(PerformanceAnalytics)
library(lmtest)

Forecasting the Mexican and the US economies under the COVID post-crisis

We download the GDP from US and Mexican

library(quantmod)
getSymbols.FRED(Symbols = c("GDPC1", "NAEXKP01MXQ189S"), env = (.GlobalEnv), return.class = "xts")
## [1] "GDPC1"           "NAEXKP01MXQ189S"

I merge the data,

dataset <- na.omit(merge(GDPC1, NAEXKP01MXQ189S))

We calculate index

dataset$indexusa = dataset$GDPC1 /as.numeric( dataset$GDPC1[1])
dataset$indexmxx = dataset$NAEXKP01MXQ189S /as.numeric( dataset$NAEXKP01MXQ189S[1])
dataset$lngdpus <- log(dataset$indexusa)
dataset$lngdpmxx <- log(dataset$indexmxx)

Then we plot the data to visualize the GDP or both countries.

plot(dataset$indexusa)

plot(dataset$indexmxx)

## Calibration of the ARIMA-SARIMA model

USA ARIMA-SARIMA calibration model

We start by checking stationarity of the series

lnusarate = log(dataset$GDPC1)
plot(diff(lnusarate,lag=4))

By looking at the graph we can not be sure of the stationarity of the series so we apply the Dicky-Fuller test.

adf.test(na.omit(diff(lnusarate,lag=4)),alternative="stationary",k=0)
## Warning in adf.test(na.omit(diff(lnusarate, lag = 4)), alternative =
## "stationary", : p-value smaller than printed p-value
## 
##  Augmented Dickey-Fuller Test
## 
## data:  na.omit(diff(lnusarate, lag = 4))
## Dickey-Fuller = -4.0975, Lag order = 0, p-value = 0.01
## alternative hypothesis: stationary

Since the p-value is less than 0.05 we can conclude that the series of the real GDP is stationarity.

acf2(diff(lnusarate,lag=4,))

##      [,1]  [,2]  [,3]  [,4] [,5] [,6]  [,7]  [,8] [,9] [,10] [,11] [,12] [,13]
## ACF  0.78  0.59  0.40  0.23 0.16 0.12  0.09  0.06 0.05  0.03  0.02  0.01  0.01
## PACF 0.78 -0.03 -0.14 -0.06 0.12 0.03 -0.06 -0.02 0.06 -0.01 -0.03  0.02  0.02
##      [,14] [,15] [,16] [,17] [,18] [,19] [,20] [,21]
## ACF   0.01  0.02  0.04  0.02  0.01 -0.01 -0.06 -0.08
## PACF  0.00  0.02  0.02 -0.04  0.00 -0.02 -0.10  0.01

As we can look, the ACF plot show a slow decline of the autocorrelations, and the PACF shows a fast decline of autocorrelations too. Since this, we can assume this si going to be a AR signature.

We create the COVID variable

dataset$gdpccus <- diff(log(dataset$GDPC1),lag=4)
dataset$covidusdummy <- ifelse( dataset$gdpccus < 0 ,1,0) 
tworstus <- as.numeric((min(na.omit(dataset$gdpccus))))
dataset$covidusshock <- dataset$covidusdummy * (dataset$gdpccus/tworstus)
dataset$s4shockus <- diff(dataset$covidusshock,lag=4)

So we design our first ARIMA-SARIMA(1,0,0)(0,1,0) with the independant variable of the covid shocks.

m1 <- Arima(dataset$indexusa, order = c(1,0,0), 
            seasonal = list(order=c(0,1,0),period=4),
            include.constant = TRUE,
            xreg = dataset$covidusshock, 
            lambda = 0) 
coeftest(m1)
## 
## z test of coefficients:
## 
##                Estimate Std. Error  z value  Pr(>|z|)    
## ar1           0.8955190  0.0420974  21.2726 < 2.2e-16 ***
## drift         0.0056676  0.0014849   3.8168 0.0001352 ***
## covidusshock -0.0853762  0.0048849 -17.4776 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
acf2(m1$residuals,max.lag = 24)

##      [,1] [,2] [,3]  [,4] [,5] [,6] [,7]  [,8]  [,9] [,10] [,11] [,12] [,13]
## ACF  0.09 0.08 0.01 -0.21 0.20 0.04 0.11 -0.25 -0.18 -0.01 -0.07  0.11  0.01
## PACF 0.09 0.07 0.00 -0.22 0.25 0.03 0.07 -0.38 -0.01  0.04  0.02 -0.12  0.09
##      [,14] [,15] [,16] [,17] [,18] [,19] [,20] [,21] [,22] [,23] [,24]
## ACF  -0.01 -0.07  0.03  0.03 -0.06  0.01 -0.19 -0.10 -0.07  0.01  0.02
## PACF  0.07 -0.03 -0.04 -0.03 -0.05 -0.05 -0.19 -0.04 -0.02  0.08 -0.08

There is a significant seasonal autorregresion so we decide to add a new MA term in the model

m1_1 <- Arima(dataset$indexusa, order = c(1,0,0), 
            seasonal = list(order=c(0,1,1),period=4),
            include.constant = TRUE,
            xreg = dataset$covidusshock, 
            lambda = 0) 
coeftest(m1_1)
## 
## z test of coefficients:
## 
##                Estimate Std. Error  z value  Pr(>|z|)    
## ar1           0.9909215  0.0114472  86.5644 < 2.2e-16 ***
## sma1         -0.8311524  0.0910016  -9.1334 < 2.2e-16 ***
## drift         0.0059995  0.0016585   3.6174 0.0002976 ***
## covidusshock -0.0905223  0.0045397 -19.9402 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

We check the residual to see a behave of withe noise.

acf2(m1_1$residuals,max.lag = 24)

##      [,1] [,2] [,3] [,4] [,5] [,6] [,7]  [,8]  [,9] [,10] [,11] [,12] [,13]
## ACF  0.19 0.17 0.15 0.17 0.20 0.14 0.15 -0.18 -0.08  0.03 -0.03 -0.03 -0.05
## PACF 0.19 0.14 0.10 0.11 0.13 0.05 0.06 -0.30 -0.12  0.04 -0.03  0.02  0.05
##      [,14] [,15] [,16] [,17] [,18] [,19] [,20] [,21] [,22] [,23] [,24]
## ACF  -0.05 -0.10 -0.09 -0.03 -0.13 -0.04 -0.20 -0.08 -0.15  0.00 -0.02
## PACF  0.03 -0.01 -0.12 -0.06 -0.10  0.02 -0.18  0.04 -0.04  0.14  0.03

Now we can forecast the real GDP of USA

forecast_usgdp <- forecast(m1_1,xreg=dataset$lngdpus)
## Warning in forecast.forecast_ARIMA(m1_1, xreg = dataset$lngdpus): xreg contains
## different column names from the xreg used in training. Please check that the
## regressors are in the same order.
autoplot(forecast_usgdp)

Mexico ARIMA-SARIMA calibration model

We start by checking stationarity of the series

lnmxxrate = log(dataset$indexmxx)
plot(diff(lnmxxrate,lag=4))

By looking at the graph we can not be sure of the stationarity of the series so we apply the Dicky-Fuller test.

adf.test(na.omit(diff(lnmxxrate,lag=4)),alternative="stationary",k=0)
## 
##  Augmented Dickey-Fuller Test
## 
## data:  na.omit(diff(lnmxxrate, lag = 4))
## Dickey-Fuller = -4.0126, Lag order = 0, p-value = 0.0112
## alternative hypothesis: stationary

Since the p-value is less than 0.05 we can conclude that the series of the real GDP is stationarity.

acf2(diff(lnmxxrate,lag=4,))

##      [,1]  [,2]  [,3]  [,4]  [,5]  [,6]  [,7]  [,8]  [,9] [,10] [,11] [,12]
## ACF  0.74  0.45  0.18 -0.04 -0.06 -0.06 -0.05 -0.05 -0.08 -0.09 -0.09 -0.09
## PACF 0.74 -0.21 -0.16 -0.10  0.20 -0.06 -0.08 -0.03 -0.02  0.00 -0.04 -0.03
##      [,13] [,14] [,15] [,16] [,17] [,18] [,19] [,20] [,21]
## ACF  -0.06 -0.03  0.00  0.00 -0.05 -0.10 -0.16 -0.16 -0.11
## PACF  0.03  0.01  0.01 -0.08 -0.07 -0.05 -0.05  0.02  0.00

As we can look, the ACF plot show a slow decline of the autocorrelations, and the PACF shows a fast decline of autocorrelations too. Since this, we can assume this si going to be a AR signature.

We create the COVID variable

dataset$gdpccmxx <- diff(log(dataset$NAEXKP01MXQ189S),lag=4)
dataset$covidmxxdummy <- ifelse( dataset$gdpccmxx < 0 ,1,0) 
tworstmxx <- as.numeric((min(na.omit(dataset$gdpccmxx))))
dataset$covidmxxshock <- dataset$covidmxxdummy * (dataset$gdpccmxx/tworstmxx)
m2 <- Arima(dataset$indexmxx, order = c(1,0,0), 
            seasonal = list(order=c(0,1,0),period=4),
            include.constant = TRUE,
            xreg = dataset[,c("covidmxxshock", "indexusa")],
            lambda = 0) 
coeftest(m2)
## 
## z test of coefficients:
## 
##                 Estimate Std. Error  z value  Pr(>|z|)    
## ar1            0.8954421  0.0458553  19.5276 < 2.2e-16 ***
## drift          0.0023570  0.0020333   1.1592 0.2463784    
## covidmxxshock -0.1637264  0.0099501 -16.4547 < 2.2e-16 ***
## indexusa       0.1954151  0.0569298   3.4326 0.0005979 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
acf2(m2$residuals,max.lag = 24)

##      [,1] [,2]  [,3]  [,4] [,5]  [,6]  [,7]  [,8]  [,9] [,10] [,11] [,12] [,13]
## ACF  0.29 0.14  0.00 -0.21 0.00 -0.13 -0.25 -0.05 -0.09  0.06  0.03  0.00  0.14
## PACF 0.29 0.06 -0.06 -0.22 0.14 -0.14 -0.24  0.07 -0.01  0.02 -0.10  0.06  0.10
##      [,14] [,15] [,16] [,17] [,18] [,19] [,20] [,21] [,22] [,23] [,24]
## ACF   0.01  0.02 -0.05 -0.15 -0.01 -0.11 -0.17 -0.06 -0.10  0.09  0.12
## PACF -0.12 -0.02 -0.04 -0.06  0.00 -0.09 -0.14  0.00 -0.04  0.04  0.00

It seems to be that the there is a significant seasonal MA and term so we decide to calibrate that MA term

m2_1 <- Arima(dataset$indexmxx, order = c(2,0,0), 
            seasonal = list(order=c(0,1,0),period=4),
            include.constant = TRUE,
            xreg = dataset[,c("covidmxxshock", "lngdpus")],
            lambda = 0) 
coeftest(m2_1)
## 
## z test of coefficients:
## 
##                 Estimate Std. Error  z value  Pr(>|z|)    
## ar1            1.2058191  0.0958198  12.5842 < 2.2e-16 ***
## ar2           -0.3625642  0.0974869  -3.7191 0.0001999 ***
## drift          0.0030592  0.0013930   2.1961 0.0280815 *  
## covidmxxshock -0.1729748  0.0097829 -17.6813 < 2.2e-16 ***
## lngdpus        0.2724355  0.0921057   2.9579 0.0030979 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

All the coefficientas are significant so we check the residuals

acf2(m2_1$residuals,max.lag = 24)

##       [,1] [,2] [,3]  [,4] [,5]  [,6]  [,7] [,8]  [,9] [,10] [,11] [,12] [,13]
## ACF  -0.05  0.1 0.05 -0.20 0.16 -0.04 -0.21 0.07 -0.12  0.10 -0.01 -0.08  0.15
## PACF -0.05  0.1 0.06 -0.21 0.14  0.01 -0.25 0.02  0.01  0.07 -0.08 -0.01  0.13
##      [,14] [,15] [,16] [,17] [,18] [,19] [,20] [,21] [,22] [,23] [,24]
## ACF  -0.08  0.03 -0.07 -0.17  0.06 -0.06 -0.12  0.03 -0.15  0.10  0.04
## PACF -0.08 -0.04 -0.08 -0.08 -0.01 -0.02 -0.13 -0.03 -0.06  0.01  0.00

We calculate the aic criteria to see what model we should use

m2$aic
## [1] -681.4289
m2_1$aic
## [1] -693.1338

Now we forecast the real GDP of México

forecast_mxxgdp <- forecast(m2_1,xreg=dataset[,c("covidmxxshock", "lngdpus")])
## Warning in forecast.forecast_ARIMA(m2_1, xreg = dataset[, c("covidmxxshock", :
## Upper prediction intervals are not finite.
autoplot(forecast_mxxgdp)

Portfolio Design

Introduction to Portfolio Theory

Instructions You have to explain what is a Financial portfolio, how you can calculate histor- ical portfolio returns, and also how to estimate expected returns and risk of a portfolio.

A Financial portfolio is the collection of all kind of financial assets such as stocks, bonds, commodities, cash, ETF, cryptocurrecies, etc. And this Financial portfolios have a lot of innovation tools to examined this prtfolios such as the Ph. D Markowitz when we examined how the weights of the assets in the financial portfolio can determine the minimum risk and miximize the return.

There are many ways to calculate or estimate the expected return for a portfolio such as Geometric Algebra, CAPM model, ARIMA-SARIMA models, etc. There

Lets calculate the Global Minimum Variance Portfolio

Install the IntroCompFinR package.

Load the package and download stock data

library(IntroCompFinR)
library(quantmod)

We select 5 assets from the US stock market.

tickers = c("CX","PFE", "SPCE", "NVDA", "GOOGL")
getSymbols(Symbols = tickers, from="2018-01-01", to = "2021-05-01", periodicity="monthly")
## 'getSymbols' currently uses auto.assign=TRUE by default, but will
## use auto.assign=FALSE in 0.5-0. You will still be able to use
## 'loadSymbols' to automatically load data. getOption("getSymbols.env")
## and getOption("getSymbols.auto.assign") will still be checked for
## alternate defaults.
## 
## This message is shown once per session and may be disabled by setting 
## options("getSymbols.warning4.0"=FALSE). See ?getSymbols for details.
## [1] "CX"    "PFE"   "SPCE"  "NVDA"  "GOOGL"

Calculating cc monthly returns

prices <- Ad(merge(CX,PFE,NVDA,SPCE,GOOGL))
returns <- na.omit(diff(log(prices)))

Now I calculate the expectef return for each assets:

ER = exp(colMeans(returns)) -1 
ER
##    CX.Adjusted   PFE.Adjusted  NVDA.Adjusted  SPCE.Adjusted GOOGL.Adjusted 
##  -0.0009206507   0.0055939579   0.0233904599   0.0204438715   0.0178108464

I can get descriptive statistics of the asset return

library(psych)
describe(returns)
##                vars  n mean   sd median trimmed  mad   min  max range  skew
## CX.Adjusted       1 39 0.00 0.13   0.02    0.01 0.11 -0.44 0.18  0.62 -1.28
## PFE.Adjusted      2 39 0.01 0.07  -0.01    0.00 0.06 -0.15 0.16  0.31  0.31
## NVDA.Adjusted     3 39 0.02 0.12   0.03    0.03 0.09 -0.29 0.23  0.52 -0.94
## SPCE.Adjusted     4 39 0.02 0.22   0.00    0.01 0.05 -0.51 0.62  1.13  0.61
## GOOGL.Adjusted    5 39 0.02 0.07   0.03    0.02 0.07 -0.14 0.15  0.29 -0.29
##                kurtosis   se
## CX.Adjusted        1.55 0.02
## PFE.Adjusted      -0.02 0.01
## NVDA.Adjusted      0.76 0.02
## SPCE.Adjusted      1.02 0.03
## GOOGL.Adjusted    -0.60 0.01

From this descriptive statistics we can say that the stock with high volatility is Virgin Galactic “SPCE” with a standar deviation of 22%, followed by CEMEX with a standar deviation of 12%. With this volatility we can conclude that Virgin calactic losess a but gain a lot despite CEMEX that losees a lot and the gains are not profitable.

With this graph we can ilustrate how a $ 1.00 invested in these assets main behave.

library(PerformanceAnalytics)
charts.PerformanceSummary(returns, 
                          main = "Performance of $1.00 over time",
                          wealth.index = TRUE)

By looking at the graph we can see the behavior of the stacks we are analysing. We are going to start saying that Virgin Galactic is the most volatile a fact that has been seeing coming since the Standard Deviation was to high. The Next stock is NVidia with a decrease in 2018 nevertheless in the beginnigs of 2020 the stock increase in an huge way and starts to look like a profitable stock. Cemex, Pfizer and Google provide linear returns.

Before doing portfolio optimization, I need to calculate the VAR - COV matrix:

COV = var(returns)
COV 
##                CX.Adjusted PFE.Adjusted NVDA.Adjusted SPCE.Adjusted
## CX.Adjusted    0.018154489  0.001221957  0.0046398058  0.0055140532
## PFE.Adjusted   0.001221957  0.004668978  0.0008646110  0.0016226379
## NVDA.Adjusted  0.004639806  0.000864611  0.0148332741  0.0007804118
## SPCE.Adjusted  0.005514053  0.001622638  0.0007804118  0.0470865957
## GOOGL.Adjusted 0.004952910  0.001272390  0.0038300218  0.0015442821
##                GOOGL.Adjusted
## CX.Adjusted       0.004952910
## PFE.Adjusted      0.001272390
## NVDA.Adjusted     0.003830022
## SPCE.Adjusted     0.001544282
## GOOGL.Adjusted    0.004751728

Now I get the correlation matrix to have an idea about how each pair of asset will contribute to the portfolio risk:

CORR = cor(returns)
CORR
##                CX.Adjusted PFE.Adjusted NVDA.Adjusted SPCE.Adjusted
## CX.Adjusted      1.0000000    0.1327250    0.28274131    0.18859505
## PFE.Adjusted     0.1327250    1.0000000    0.10389421    0.10943647
## NVDA.Adjusted    0.2827413    0.1038942    1.00000000    0.02952954
## SPCE.Adjusted    0.1885950    0.1094365    0.02952954    1.00000000
## GOOGL.Adjusted   0.5332644    0.2701367    0.45620173    0.10324100
##                GOOGL.Adjusted
## CX.Adjusted         0.5332644
## PFE.Adjusted        0.2701367
## NVDA.Adjusted       0.4562017
## SPCE.Adjusted       0.1032410
## GOOGL.Adjusted      1.0000000

After analysing the matrix of correlation we can canclude that these stocks are not so correlated, there are a porcentage of correlation between them but there are not significant.

Calculating the Global Minimum Variance Portfolio

gmv = globalMin.portfolio(ER, COV, )
gmv
## Call:
## globalMin.portfolio(er = ER, cov.mat = COV)
## 
## Portfolio expected return:     0.01262516 
## Portfolio standard deviation:  0.05392271 
## Portfolio weights:
##    CX.Adjusted   PFE.Adjusted  NVDA.Adjusted  SPCE.Adjusted GOOGL.Adjusted 
##        -0.0179         0.4847         0.0569         0.0316         0.4447

Efficient Frontier

efrontier = efficient.frontier(ER,COV,nport = 100, 
                               alpha.min = -0.5,
                               alpha.max = 1.5,)
plot(efrontier, plot.assets = TRUE, col= "green")

plot(efrontier, col= "green")

The optimal/tangent portfolio

rf = 0.0
optport = tangency.portfolio(ER,COV,rf, )
optport
## Call:
## tangency.portfolio(er = ER, cov.mat = COV, risk.free = rf)
## 
## Portfolio expected return:     0.02633147 
## Portfolio standard deviation:  0.07787371 
## Portfolio weights:
##    CX.Adjusted   PFE.Adjusted  NVDA.Adjusted  SPCE.Adjusted GOOGL.Adjusted 
##        -0.3875         0.0123         0.2029         0.1066         1.0657
opweights = getPortfolio(ER,COV,weights = optport$weights)
opweights
## Call:
## getPortfolio(er = ER, cov.mat = COV, weights = optport$weights)
## 
## Portfolio expected return:     0.02633147 
## Portfolio standard deviation:  0.07787371 
## Portfolio weights:
##    CX.Adjusted   PFE.Adjusted  NVDA.Adjusted  SPCE.Adjusted GOOGL.Adjusted 
##        -0.3875         0.0123         0.2029         0.1066         1.0657
plot(opweights)

### Capital market line:

The slope of the capital market line is the sharpe ratio

plot(efrontier)
# primero se pone el eje x
points(gmv$sd , gmv$er, col="green")
# ahora va el tangete portafolio 
points(optport$sd, optport$er, col="red", pch = 16, cex = 2)

sharper = (optport$er - rf) / optport$sd
sharper
## [1] 0.3381304
# que tantos premium returns te da un punto de riesgo 

abline(a=rf, b=sharper, col="blue")