We install all the packages that we will need.
library(xts)
library(readxl)
library(quantmod)
library(zoo)
library(tseries)
library(forecast)
library(astsa)
library(lmtest)
library(rugarch)
library(moments)
library(astsa)
library(PerformanceAnalytics)
library(lmtest)
We download the GDP from US and Mexican
library(quantmod)
getSymbols.FRED(Symbols = c("GDPC1", "NAEXKP01MXQ189S"), env = (.GlobalEnv), return.class = "xts")
## [1] "GDPC1" "NAEXKP01MXQ189S"
I merge the data,
dataset <- na.omit(merge(GDPC1, NAEXKP01MXQ189S))
We calculate index
dataset$indexusa = dataset$GDPC1 /as.numeric( dataset$GDPC1[1])
dataset$indexmxx = dataset$NAEXKP01MXQ189S /as.numeric( dataset$NAEXKP01MXQ189S[1])
dataset$lngdpus <- log(dataset$indexusa)
dataset$lngdpmxx <- log(dataset$indexmxx)
Then we plot the data to visualize the GDP or both countries.
plot(dataset$indexusa)
plot(dataset$indexmxx)
## Calibration of the ARIMA-SARIMA model
We start by checking stationarity of the series
lnusarate = log(dataset$GDPC1)
plot(diff(lnusarate,lag=4))
By looking at the graph we can not be sure of the stationarity of the series so we apply the Dicky-Fuller test.
adf.test(na.omit(diff(lnusarate,lag=4)),alternative="stationary",k=0)
## Warning in adf.test(na.omit(diff(lnusarate, lag = 4)), alternative =
## "stationary", : p-value smaller than printed p-value
##
## Augmented Dickey-Fuller Test
##
## data: na.omit(diff(lnusarate, lag = 4))
## Dickey-Fuller = -4.0975, Lag order = 0, p-value = 0.01
## alternative hypothesis: stationary
Since the p-value is less than 0.05 we can conclude that the series of the real GDP is stationarity.
acf2(diff(lnusarate,lag=4,))
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12] [,13]
## ACF 0.78 0.59 0.40 0.23 0.16 0.12 0.09 0.06 0.05 0.03 0.02 0.01 0.01
## PACF 0.78 -0.03 -0.14 -0.06 0.12 0.03 -0.06 -0.02 0.06 -0.01 -0.03 0.02 0.02
## [,14] [,15] [,16] [,17] [,18] [,19] [,20] [,21]
## ACF 0.01 0.02 0.04 0.02 0.01 -0.01 -0.06 -0.08
## PACF 0.00 0.02 0.02 -0.04 0.00 -0.02 -0.10 0.01
As we can look, the ACF plot show a slow decline of the autocorrelations, and the PACF shows a fast decline of autocorrelations too. Since this, we can assume this si going to be a AR signature.
We create the COVID variable
dataset$gdpccus <- diff(log(dataset$GDPC1),lag=4)
dataset$covidusdummy <- ifelse( dataset$gdpccus < 0 ,1,0)
tworstus <- as.numeric((min(na.omit(dataset$gdpccus))))
dataset$covidusshock <- dataset$covidusdummy * (dataset$gdpccus/tworstus)
dataset$s4shockus <- diff(dataset$covidusshock,lag=4)
So we design our first ARIMA-SARIMA(1,0,0)(0,1,0) with the independant variable of the covid shocks.
m1 <- Arima(dataset$indexusa, order = c(1,0,0),
seasonal = list(order=c(0,1,0),period=4),
include.constant = TRUE,
xreg = dataset$covidusshock,
lambda = 0)
coeftest(m1)
##
## z test of coefficients:
##
## Estimate Std. Error z value Pr(>|z|)
## ar1 0.8955190 0.0420974 21.2726 < 2.2e-16 ***
## drift 0.0056676 0.0014849 3.8168 0.0001352 ***
## covidusshock -0.0853762 0.0048849 -17.4776 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
acf2(m1$residuals,max.lag = 24)
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12] [,13]
## ACF 0.09 0.08 0.01 -0.21 0.20 0.04 0.11 -0.25 -0.18 -0.01 -0.07 0.11 0.01
## PACF 0.09 0.07 0.00 -0.22 0.25 0.03 0.07 -0.38 -0.01 0.04 0.02 -0.12 0.09
## [,14] [,15] [,16] [,17] [,18] [,19] [,20] [,21] [,22] [,23] [,24]
## ACF -0.01 -0.07 0.03 0.03 -0.06 0.01 -0.19 -0.10 -0.07 0.01 0.02
## PACF 0.07 -0.03 -0.04 -0.03 -0.05 -0.05 -0.19 -0.04 -0.02 0.08 -0.08
There is a significant seasonal autorregresion so we decide to add a new MA term in the model
m1_1 <- Arima(dataset$indexusa, order = c(1,0,0),
seasonal = list(order=c(0,1,1),period=4),
include.constant = TRUE,
xreg = dataset$covidusshock,
lambda = 0)
coeftest(m1_1)
##
## z test of coefficients:
##
## Estimate Std. Error z value Pr(>|z|)
## ar1 0.9909215 0.0114472 86.5644 < 2.2e-16 ***
## sma1 -0.8311524 0.0910016 -9.1334 < 2.2e-16 ***
## drift 0.0059995 0.0016585 3.6174 0.0002976 ***
## covidusshock -0.0905223 0.0045397 -19.9402 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
We check the residual to see a behave of withe noise.
acf2(m1_1$residuals,max.lag = 24)
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12] [,13]
## ACF 0.19 0.17 0.15 0.17 0.20 0.14 0.15 -0.18 -0.08 0.03 -0.03 -0.03 -0.05
## PACF 0.19 0.14 0.10 0.11 0.13 0.05 0.06 -0.30 -0.12 0.04 -0.03 0.02 0.05
## [,14] [,15] [,16] [,17] [,18] [,19] [,20] [,21] [,22] [,23] [,24]
## ACF -0.05 -0.10 -0.09 -0.03 -0.13 -0.04 -0.20 -0.08 -0.15 0.00 -0.02
## PACF 0.03 -0.01 -0.12 -0.06 -0.10 0.02 -0.18 0.04 -0.04 0.14 0.03
Now we can forecast the real GDP of USA
forecast_usgdp <- forecast(m1_1,xreg=dataset$lngdpus)
## Warning in forecast.forecast_ARIMA(m1_1, xreg = dataset$lngdpus): xreg contains
## different column names from the xreg used in training. Please check that the
## regressors are in the same order.
autoplot(forecast_usgdp)
We start by checking stationarity of the series
lnmxxrate = log(dataset$indexmxx)
plot(diff(lnmxxrate,lag=4))
By looking at the graph we can not be sure of the stationarity of the series so we apply the Dicky-Fuller test.
adf.test(na.omit(diff(lnmxxrate,lag=4)),alternative="stationary",k=0)
##
## Augmented Dickey-Fuller Test
##
## data: na.omit(diff(lnmxxrate, lag = 4))
## Dickey-Fuller = -4.0126, Lag order = 0, p-value = 0.0112
## alternative hypothesis: stationary
Since the p-value is less than 0.05 we can conclude that the series of the real GDP is stationarity.
acf2(diff(lnmxxrate,lag=4,))
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12]
## ACF 0.74 0.45 0.18 -0.04 -0.06 -0.06 -0.05 -0.05 -0.08 -0.09 -0.09 -0.09
## PACF 0.74 -0.21 -0.16 -0.10 0.20 -0.06 -0.08 -0.03 -0.02 0.00 -0.04 -0.03
## [,13] [,14] [,15] [,16] [,17] [,18] [,19] [,20] [,21]
## ACF -0.06 -0.03 0.00 0.00 -0.05 -0.10 -0.16 -0.16 -0.11
## PACF 0.03 0.01 0.01 -0.08 -0.07 -0.05 -0.05 0.02 0.00
As we can look, the ACF plot show a slow decline of the autocorrelations, and the PACF shows a fast decline of autocorrelations too. Since this, we can assume this si going to be a AR signature.
We create the COVID variable
dataset$gdpccmxx <- diff(log(dataset$NAEXKP01MXQ189S),lag=4)
dataset$covidmxxdummy <- ifelse( dataset$gdpccmxx < 0 ,1,0)
tworstmxx <- as.numeric((min(na.omit(dataset$gdpccmxx))))
dataset$covidmxxshock <- dataset$covidmxxdummy * (dataset$gdpccmxx/tworstmxx)
m2 <- Arima(dataset$indexmxx, order = c(1,0,0),
seasonal = list(order=c(0,1,0),period=4),
include.constant = TRUE,
xreg = dataset[,c("covidmxxshock", "indexusa")],
lambda = 0)
coeftest(m2)
##
## z test of coefficients:
##
## Estimate Std. Error z value Pr(>|z|)
## ar1 0.8954421 0.0458553 19.5276 < 2.2e-16 ***
## drift 0.0023570 0.0020333 1.1592 0.2463784
## covidmxxshock -0.1637264 0.0099501 -16.4547 < 2.2e-16 ***
## indexusa 0.1954151 0.0569298 3.4326 0.0005979 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
acf2(m2$residuals,max.lag = 24)
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12] [,13]
## ACF 0.29 0.14 0.00 -0.21 0.00 -0.13 -0.25 -0.05 -0.09 0.06 0.03 0.00 0.14
## PACF 0.29 0.06 -0.06 -0.22 0.14 -0.14 -0.24 0.07 -0.01 0.02 -0.10 0.06 0.10
## [,14] [,15] [,16] [,17] [,18] [,19] [,20] [,21] [,22] [,23] [,24]
## ACF 0.01 0.02 -0.05 -0.15 -0.01 -0.11 -0.17 -0.06 -0.10 0.09 0.12
## PACF -0.12 -0.02 -0.04 -0.06 0.00 -0.09 -0.14 0.00 -0.04 0.04 0.00
It seems to be that the there is a significant seasonal MA and term so we decide to calibrate that MA term
m2_1 <- Arima(dataset$indexmxx, order = c(2,0,0),
seasonal = list(order=c(0,1,0),period=4),
include.constant = TRUE,
xreg = dataset[,c("covidmxxshock", "lngdpus")],
lambda = 0)
coeftest(m2_1)
##
## z test of coefficients:
##
## Estimate Std. Error z value Pr(>|z|)
## ar1 1.2058191 0.0958198 12.5842 < 2.2e-16 ***
## ar2 -0.3625642 0.0974869 -3.7191 0.0001999 ***
## drift 0.0030592 0.0013930 2.1961 0.0280815 *
## covidmxxshock -0.1729748 0.0097829 -17.6813 < 2.2e-16 ***
## lngdpus 0.2724355 0.0921057 2.9579 0.0030979 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
All the coefficientas are significant so we check the residuals
acf2(m2_1$residuals,max.lag = 24)
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12] [,13]
## ACF -0.05 0.1 0.05 -0.20 0.16 -0.04 -0.21 0.07 -0.12 0.10 -0.01 -0.08 0.15
## PACF -0.05 0.1 0.06 -0.21 0.14 0.01 -0.25 0.02 0.01 0.07 -0.08 -0.01 0.13
## [,14] [,15] [,16] [,17] [,18] [,19] [,20] [,21] [,22] [,23] [,24]
## ACF -0.08 0.03 -0.07 -0.17 0.06 -0.06 -0.12 0.03 -0.15 0.10 0.04
## PACF -0.08 -0.04 -0.08 -0.08 -0.01 -0.02 -0.13 -0.03 -0.06 0.01 0.00
We calculate the aic criteria to see what model we should use
m2$aic
## [1] -681.4289
m2_1$aic
## [1] -693.1338
Now we forecast the real GDP of México
forecast_mxxgdp <- forecast(m2_1,xreg=dataset[,c("covidmxxshock", "lngdpus")])
## Warning in forecast.forecast_ARIMA(m2_1, xreg = dataset[, c("covidmxxshock", :
## Upper prediction intervals are not finite.
autoplot(forecast_mxxgdp)
Instructions You have to explain what is a Financial portfolio, how you can calculate histor- ical portfolio returns, and also how to estimate expected returns and risk of a portfolio.
A Financial portfolio is the collection of all kind of financial assets such as stocks, bonds, commodities, cash, ETF, cryptocurrecies, etc. And this Financial portfolios have a lot of innovation tools to examined this prtfolios such as the Ph. D Markowitz when we examined how the weights of the assets in the financial portfolio can determine the minimum risk and miximize the return.
There are many ways to calculate or estimate the expected return for a portfolio such as Geometric Algebra, CAPM model, ARIMA-SARIMA models, etc. There
Lets calculate the Global Minimum Variance Portfolio
Install the IntroCompFinR package.
Load the package and download stock data
library(IntroCompFinR)
library(quantmod)
We select 5 assets from the US stock market.
tickers = c("CX","PFE", "SPCE", "NVDA", "GOOGL")
getSymbols(Symbols = tickers, from="2018-01-01", to = "2021-05-01", periodicity="monthly")
## 'getSymbols' currently uses auto.assign=TRUE by default, but will
## use auto.assign=FALSE in 0.5-0. You will still be able to use
## 'loadSymbols' to automatically load data. getOption("getSymbols.env")
## and getOption("getSymbols.auto.assign") will still be checked for
## alternate defaults.
##
## This message is shown once per session and may be disabled by setting
## options("getSymbols.warning4.0"=FALSE). See ?getSymbols for details.
## [1] "CX" "PFE" "SPCE" "NVDA" "GOOGL"
Calculating cc monthly returns
prices <- Ad(merge(CX,PFE,NVDA,SPCE,GOOGL))
returns <- na.omit(diff(log(prices)))
Now I calculate the expectef return for each assets:
ER = exp(colMeans(returns)) -1
ER
## CX.Adjusted PFE.Adjusted NVDA.Adjusted SPCE.Adjusted GOOGL.Adjusted
## -0.0009206507 0.0055939579 0.0233904599 0.0204438715 0.0178108464
I can get descriptive statistics of the asset return
library(psych)
describe(returns)
## vars n mean sd median trimmed mad min max range skew
## CX.Adjusted 1 39 0.00 0.13 0.02 0.01 0.11 -0.44 0.18 0.62 -1.28
## PFE.Adjusted 2 39 0.01 0.07 -0.01 0.00 0.06 -0.15 0.16 0.31 0.31
## NVDA.Adjusted 3 39 0.02 0.12 0.03 0.03 0.09 -0.29 0.23 0.52 -0.94
## SPCE.Adjusted 4 39 0.02 0.22 0.00 0.01 0.05 -0.51 0.62 1.13 0.61
## GOOGL.Adjusted 5 39 0.02 0.07 0.03 0.02 0.07 -0.14 0.15 0.29 -0.29
## kurtosis se
## CX.Adjusted 1.55 0.02
## PFE.Adjusted -0.02 0.01
## NVDA.Adjusted 0.76 0.02
## SPCE.Adjusted 1.02 0.03
## GOOGL.Adjusted -0.60 0.01
From this descriptive statistics we can say that the stock with high volatility is Virgin Galactic “SPCE” with a standar deviation of 22%, followed by CEMEX with a standar deviation of 12%. With this volatility we can conclude that Virgin calactic losess a but gain a lot despite CEMEX that losees a lot and the gains are not profitable.
With this graph we can ilustrate how a $ 1.00 invested in these assets main behave.
library(PerformanceAnalytics)
charts.PerformanceSummary(returns,
main = "Performance of $1.00 over time",
wealth.index = TRUE)
By looking at the graph we can see the behavior of the stacks we are analysing. We are going to start saying that Virgin Galactic is the most volatile a fact that has been seeing coming since the Standard Deviation was to high. The Next stock is NVidia with a decrease in 2018 nevertheless in the beginnigs of 2020 the stock increase in an huge way and starts to look like a profitable stock. Cemex, Pfizer and Google provide linear returns.
Before doing portfolio optimization, I need to calculate the VAR - COV matrix:
COV = var(returns)
COV
## CX.Adjusted PFE.Adjusted NVDA.Adjusted SPCE.Adjusted
## CX.Adjusted 0.018154489 0.001221957 0.0046398058 0.0055140532
## PFE.Adjusted 0.001221957 0.004668978 0.0008646110 0.0016226379
## NVDA.Adjusted 0.004639806 0.000864611 0.0148332741 0.0007804118
## SPCE.Adjusted 0.005514053 0.001622638 0.0007804118 0.0470865957
## GOOGL.Adjusted 0.004952910 0.001272390 0.0038300218 0.0015442821
## GOOGL.Adjusted
## CX.Adjusted 0.004952910
## PFE.Adjusted 0.001272390
## NVDA.Adjusted 0.003830022
## SPCE.Adjusted 0.001544282
## GOOGL.Adjusted 0.004751728
Now I get the correlation matrix to have an idea about how each pair of asset will contribute to the portfolio risk:
CORR = cor(returns)
CORR
## CX.Adjusted PFE.Adjusted NVDA.Adjusted SPCE.Adjusted
## CX.Adjusted 1.0000000 0.1327250 0.28274131 0.18859505
## PFE.Adjusted 0.1327250 1.0000000 0.10389421 0.10943647
## NVDA.Adjusted 0.2827413 0.1038942 1.00000000 0.02952954
## SPCE.Adjusted 0.1885950 0.1094365 0.02952954 1.00000000
## GOOGL.Adjusted 0.5332644 0.2701367 0.45620173 0.10324100
## GOOGL.Adjusted
## CX.Adjusted 0.5332644
## PFE.Adjusted 0.2701367
## NVDA.Adjusted 0.4562017
## SPCE.Adjusted 0.1032410
## GOOGL.Adjusted 1.0000000
After analysing the matrix of correlation we can canclude that these stocks are not so correlated, there are a porcentage of correlation between them but there are not significant.
gmv = globalMin.portfolio(ER, COV, )
gmv
## Call:
## globalMin.portfolio(er = ER, cov.mat = COV)
##
## Portfolio expected return: 0.01262516
## Portfolio standard deviation: 0.05392271
## Portfolio weights:
## CX.Adjusted PFE.Adjusted NVDA.Adjusted SPCE.Adjusted GOOGL.Adjusted
## -0.0179 0.4847 0.0569 0.0316 0.4447
efrontier = efficient.frontier(ER,COV,nport = 100,
alpha.min = -0.5,
alpha.max = 1.5,)
plot(efrontier, plot.assets = TRUE, col= "green")
plot(efrontier, col= "green")
rf = 0.0
optport = tangency.portfolio(ER,COV,rf, )
optport
## Call:
## tangency.portfolio(er = ER, cov.mat = COV, risk.free = rf)
##
## Portfolio expected return: 0.02633147
## Portfolio standard deviation: 0.07787371
## Portfolio weights:
## CX.Adjusted PFE.Adjusted NVDA.Adjusted SPCE.Adjusted GOOGL.Adjusted
## -0.3875 0.0123 0.2029 0.1066 1.0657
opweights = getPortfolio(ER,COV,weights = optport$weights)
opweights
## Call:
## getPortfolio(er = ER, cov.mat = COV, weights = optport$weights)
##
## Portfolio expected return: 0.02633147
## Portfolio standard deviation: 0.07787371
## Portfolio weights:
## CX.Adjusted PFE.Adjusted NVDA.Adjusted SPCE.Adjusted GOOGL.Adjusted
## -0.3875 0.0123 0.2029 0.1066 1.0657
plot(opweights)
### Capital market line:
The slope of the capital market line is the sharpe ratio
plot(efrontier)
# primero se pone el eje x
points(gmv$sd , gmv$er, col="green")
# ahora va el tangete portafolio
points(optport$sd, optport$er, col="red", pch = 16, cex = 2)
sharper = (optport$er - rf) / optport$sd
sharper
## [1] 0.3381304
# que tantos premium returns te da un punto de riesgo
abline(a=rf, b=sharper, col="blue")