Research Methods in Finance

Instructor: Dr. Nguyen Phuong Anh


1 Libraries

library(rio)
# Panel Data
library(plm)
library(lmtest)
# Limited Dependent Variable Models
library(stargazer)
library(mfx)
# Vector Autoregressive Models
library(vars)
library(tseries)
# ARIMA
library(forecast)
# ARCH - GARCH
library(fGarch)
library(rugarch)
library(FinTS)

2 Panel Data

The data include annual returns and second pass betas for 11 years on 2500 UK firms.

url = 'https://raw.githubusercontent.com/QuanNguyenIU/Res_Med_Fin/main/panelx.xls'
panelx = rio::import(file = url, na = 'NA')
head(panelx, 6)
##   firm_ident       return     beta year
## 1          1 -0.004812734 1.501050 1996
## 2          2           NA 0.771553 1996
## 3          3 -0.004812734 1.116419 1996
## 4          4 -0.004812734 1.128749 1996
## 5          5 -0.001598855 1.244929 1996
## 6          6 -0.004812734 1.611615 1996
data = pdata.frame(panelx, index = c('firm_ident', 'year'))
pdim(data)
## Balanced Panel: n = 2500, T = 11, N = 27500
summary(data[c('return', 'beta')])
##      return            beta      
##  Min.   :-1.005   Min.   :0.661  
##  1st Qu.:-0.005   1st Qu.:0.941  
##  Median :-0.004   Median :1.082  
##  Mean   :-0.002   Mean   :1.105  
##  3rd Qu.:-0.003   3rd Qu.:1.249  
##  Max.   : 0.706   Max.   :1.612  
##  NA's   :3409     NA's   :18427
pooled = plm(return ~ beta, model = 'pooling', data = data)
summary(pooled)
## Pooling Model
## 
## Call:
## plm(formula = return ~ beta, data = data, model = "pooling")
## 
## Unbalanced Panel: n = 1734, T = 1-11, N = 8856
## 
## Residuals:
##       Min.    1st Qu.     Median    3rd Qu.       Max. 
## -1.0073394 -0.0133705 -0.0017671  0.0201393  0.7041667 
## 
## Coefficients:
##               Estimate Std. Error t-value Pr(>|t|)
## (Intercept) 0.00184254 0.00307461  0.5993    0.549
## beta        0.00045439 0.00273471  0.1662    0.868
## 
## Total Sum of Squares:    24.205
## Residual Sum of Squares: 24.204
## R-Squared:      3.1181e-06
## Adj. R-Squared: -0.00010982
## F-statistic: 0.0276078 on 1 and 8854 DF, p-value: 0.86804

For the pooled regression (PR) model, neither the intercept nor the slope is statistically significant.

fixed = plm(return ~ beta, model = 'within', data = data)
summary(fixed)
## Oneway (individual) effect Within Model
## 
## Call:
## plm(formula = return ~ beta, data = data, model = "within")
## 
## Unbalanced Panel: n = 1734, T = 1-11, N = 8856
## 
## Residuals:
##      Min.   1st Qu.    Median   3rd Qu.      Max. 
## -0.891228 -0.015834  0.000000  0.016408  0.653811 
## 
## Coefficients:
##        Estimate Std. Error t-value Pr(>|t|)   
## beta -0.0118931  0.0041139 -2.8909 0.003852 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Total Sum of Squares:    18.371
## Residual Sum of Squares: 18.35
## R-Squared:      0.0011723
## Adj. R-Squared: -0.24205
## F-statistic: 8.35756 on 1 and 7121 DF, p-value: 0.0038525

For the fixed effects (FE) model, the estimate on the beta parameter is negative and statistically significant.

random = plm(return ~ beta, model = 'random', data = data)
summary(random)
## Oneway (individual) effect Random Effect Model 
##    (Swamy-Arora's transformation)
## 
## Call:
## plm(formula = return ~ beta, data = data, model = "random")
## 
## Unbalanced Panel: n = 1734, T = 1-11, N = 8856
## 
## Effects:
##                     var   std.dev share
## idiosyncratic 0.0025768 0.0507625 0.944
## individual    0.0001529 0.0123657 0.056
## theta:
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## 0.02841 0.12183 0.19262 0.16455 0.22215 0.22215 
## 
## Residuals:
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
## -0.98141 -0.01314 -0.00156  0.00031  0.01972  0.69480 
## 
## Coefficients:
##               Estimate Std. Error z-value Pr(>|z|)
## (Intercept)  0.0032809  0.0032887  0.9976   0.3185
## beta        -0.0014994  0.0029132 -0.5147   0.6068
## 
## Total Sum of Squares:    23.117
## Residual Sum of Squares: 23.125
## R-Squared:      0.00048503
## Adj. R-Squared: 0.00037214
## Chisq: 0.264896 on 1 DF, p-value: 0.60678

For the random effects (RE) model, neither the intercept nor the slope is statistically significant. Hence at first glance, the FE model ‘looks’ better than the PR and RE models.

phtest(fixed, random)
## 
##  Hausman Test
## 
## data:  return ~ beta
## chisq = 12.804, df = 1, p-value = 0.0003459
## alternative hypothesis: one model is inconsistent

The Hausman test confirms that FE model is better than RE model.

plmtest(pooled, type = 'bp')
## 
##  Lagrange Multiplier Test - (Breusch-Pagan) for unbalanced panels
## 
## data:  return ~ beta
## chisq = 1.5486, df = 1, p-value = 0.2133
## alternative hypothesis: significant effects

The Breusch\(-\)Pagan Lagrange Multiplier test indicates that RE model is better than PR model. Hence FE model is the best model.

pFtest(fixed, pooled)
## 
##  F test for individual effects
## 
## data:  return ~ beta
## F = 1.3111, df1 = 1733, df2 = 7121, p-value = 9.795e-14
## alternative hypothesis: significant effects

The \(F-\)test agrees that FE model is better than PR model, following the above conclusion.

bptest(fixed)
## 
##  studentized Breusch-Pagan test
## 
## data:  fixed
## BP = 0.95725, df = 1, p-value = 0.3279

There is no problem with heteroscedasticity.

pbgtest(fixed)
## 
##  Breusch-Godfrey/Wooldridge test for serial correlation in panel models
## 
## data:  return ~ beta
## chisq = 49.74, df = 1, p-value = 1.755e-12
## alternative hypothesis: serial correlation in idiosyncratic errors

Autocorrelation exists. Hence we need to ajust the standard errors by using robust regression.

coeftest(fixed, vcovSCC)
## 
## t test of coefficients:
## 
##       Estimate Std. Error t value Pr(>|t|)  
## beta -0.011893   0.006098 -1.9503  0.05118 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

The robust regression using Driscoll and Kraay robust covariance matrix estimator returns a sufficiently large \(p-\)value.


3 Limited Dependent Variable Models

The data comprise a sample from the actual records of failure rates for \(5\) years of M.Sc. students at the ICMA Center, University of Reading. A sample of \(100\) students is included for each of the \(5\) years who completed (or failed) their degrees in the years \(2003-2007\) inclusive.

url = 'https://raw.githubusercontent.com/QuanNguyenIU/Res_Med_Fin/main/MSc_fail.xls'
MSc_fail = rio::import(file = url, na = 'NA')
head(MSc_fail, 6)
##   Age Female Fail Work Experience English Country Code PG Degree Agrade
## 1  22      1    0               0       0           10         0      0
## 2  24      0    1               0       1            8         0      0
## 3  28      0    0               0       1           10         0      0
## 4  23      0    0               0       1            8         0      0
## 5  23      0    0               1       1            8         0      0
## 6  22      1    0               1       1            8         0      0
##   BelowBGrade Year2004 Year2005 Year2006 Year2007
## 1           0        0        0        0        0
## 2           0        0        0        0        0
## 3           0        0        0        0        0
## 4           0        0        0        0        0
## 5           0        0        0        0        0
## 6           0        0        0        0        0
h = glm(MSc_fail$Fail ~ MSc_fail$Age + MSc_fail$English +
          MSc_fail$Female + MSc_fail$`Work Experience` +
          MSc_fail$Agrade + MSc_fail$BelowBGrade +
          MSc_fail$'PG Degree' + MSc_fail$Year2004 +
          MSc_fail$Year2005 + MSc_fail$Year2006 + MSc_fail$Year2007,
        family = binomial(link = 'logit'))
summary(h)
## 
## Call:
## glm(formula = MSc_fail$Fail ~ MSc_fail$Age + MSc_fail$English + 
##     MSc_fail$Female + MSc_fail$`Work Experience` + MSc_fail$Agrade + 
##     MSc_fail$BelowBGrade + MSc_fail$"PG Degree" + MSc_fail$Year2004 + 
##     MSc_fail$Year2005 + MSc_fail$Year2006 + MSc_fail$Year2007, 
##     family = binomial(link = "logit"))
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.2275  -0.5870  -0.4228  -0.2980   2.5579  
## 
## Coefficients:
##                            Estimate Std. Error z value Pr(>|z|)   
## (Intercept)                -2.25637    1.07300  -2.103  0.03548 * 
## MSc_fail$Age                0.01101    0.03813   0.289  0.77275   
## MSc_fail$English           -0.16512    0.28295  -0.584  0.55952   
## MSc_fail$Female            -0.33389    0.34923  -0.956  0.33902   
## MSc_fail$`Work Experience` -0.56877    0.28847  -1.972  0.04865 * 
## MSc_fail$Agrade            -1.08503    0.49110  -2.209  0.02715 * 
## MSc_fail$BelowBGrade        0.56235    0.37351   1.506  0.13217   
## MSc_fail$"PG Degree"        0.21208    0.41990   0.505  0.61350   
## MSc_fail$Year2004           0.65321    0.50092   1.304  0.19223   
## MSc_fail$Year2005          -0.18382    0.58794  -0.313  0.75454   
## MSc_fail$Year2006           1.24658    0.47365   2.632  0.00849 **
## MSc_fail$Year2007           0.85042    0.49705   1.711  0.08710 . 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 393.92  on 499  degrees of freedom
## Residual deviance: 359.43  on 488  degrees of freedom
## AIC: 383.43
## 
## Number of Fisher Scoring iterations: 5

The significant independent variables are Work Experience, Agrade, Year2006 and Year2007. Some insights: - Students having work experience and A grades have lower chance of failure. - Students in 2006 and 2007 have higher chance of failure.

stargazer(h, type = "text", out = "logit.htm")
## 
## =============================================
##                       Dependent variable:    
##                   ---------------------------
##                              Fail            
## ---------------------------------------------
## Age                          0.011           
##                             (0.038)          
##                                              
## English                     -0.165           
##                             (0.283)          
##                                              
## Female                      -0.334           
##                             (0.349)          
##                                              
## `Work Experience`          -0.569**          
##                             (0.288)          
##                                              
## Agrade                     -1.085**          
##                             (0.491)          
##                                              
## BelowBGrade                  0.562           
##                             (0.374)          
##                                              
## "PG Degree"                  0.212           
##                             (0.420)          
##                                              
## Year2004                     0.653           
##                             (0.501)          
##                                              
## Year2005                    -0.184           
##                             (0.588)          
##                                              
## Year2006                   1.247***          
##                             (0.474)          
##                                              
## Year2007                    0.850*           
##                             (0.497)          
##                                              
## Constant                   -2.256**          
##                             (1.073)          
##                                              
## ---------------------------------------------
## Observations                  500            
## Log Likelihood             -179.717          
## Akaike Inf. Crit.           383.433          
## =============================================
## Note:             *p<0.1; **p<0.05; ***p<0.01
logitor(MSc_fail$Fail ~ MSc_fail$Age + MSc_fail$English + 
           MSc_fail$Female + MSc_fail$`Work Experience` +
           MSc_fail$Agrade + MSc_fail$BelowBGrade +
           MSc_fail$'PG Degree' + MSc_fail$Year2004 +
           MSc_fail$Year2005 + MSc_fail$Year2006 + MSc_fail$Year2007,
         data = MSc_fail)
## Call:
## logitor(formula = MSc_fail$Fail ~ MSc_fail$Age + MSc_fail$English + 
##     MSc_fail$Female + MSc_fail$`Work Experience` + MSc_fail$Agrade + 
##     MSc_fail$BelowBGrade + MSc_fail$"PG Degree" + MSc_fail$Year2004 + 
##     MSc_fail$Year2005 + MSc_fail$Year2006 + MSc_fail$Year2007, 
##     data = MSc_fail)
## 
## Odds Ratio:
##                            OddsRatio Std. Err.       z    P>|z|   
## MSc_fail$Age                1.011072  0.038553  0.2888 0.772746   
## MSc_fail$English            0.847794  0.239887 -0.5835 0.559523   
## MSc_fail$Female             0.716130  0.250092 -0.9561 0.339025   
## MSc_fail$`Work Experience`  0.566222  0.163340 -1.9716 0.048650 * 
## MSc_fail$Agrade             0.337891  0.165940 -2.2094 0.027149 * 
## MSc_fail$BelowBGrade        1.754793  0.655425  1.5056 0.132169   
## MSc_fail$"PG Degree"        1.236252  0.519106  0.5051 0.613503   
## MSc_fail$Year2004           1.921693  0.962606  1.3040 0.192225   
## MSc_fail$Year2005           0.832082  0.489211 -0.3127 0.754538   
## MSc_fail$Year2006           3.478413  1.647564  2.6318 0.008493 **
## MSc_fail$Year2007           2.340634  1.163423  1.7109 0.087095 . 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
logitmfx(MSc_fail$Fail ~ MSc_fail$Age + MSc_fail$English + 
           MSc_fail$Female + MSc_fail$`Work Experience` +
           MSc_fail$Agrade + MSc_fail$BelowBGrade +
           MSc_fail$'PG Degree' + MSc_fail$Year2004 +
           MSc_fail$Year2005 + MSc_fail$Year2006 + MSc_fail$Year2007,
         data = MSc_fail)
## Call:
## logitmfx(formula = MSc_fail$Fail ~ MSc_fail$Age + MSc_fail$English + 
##     MSc_fail$Female + MSc_fail$`Work Experience` + MSc_fail$Agrade + 
##     MSc_fail$BelowBGrade + MSc_fail$"PG Degree" + MSc_fail$Year2004 + 
##     MSc_fail$Year2005 + MSc_fail$Year2006 + MSc_fail$Year2007, 
##     data = MSc_fail)
## 
## Marginal Effects:
##                                 dF/dx  Std. Err.       z    P>|z|   
## MSc_fail$Age                0.0010592  0.0036673  0.2888 0.772713   
## MSc_fail$English           -0.0159936  0.0276090 -0.5793 0.562394   
## MSc_fail$Female            -0.0305298  0.0302175 -1.0103 0.312335   
## MSc_fail$`Work Experience` -0.0568921  0.0297591 -1.9118 0.055908 . 
## MSc_fail$Agrade            -0.0826957  0.0279924 -2.9542 0.003135 **
## MSc_fail$BelowBGrade        0.0642508  0.0500076  1.2848 0.198855   
## MSc_fail$"PG Degree"        0.0216560  0.0454387  0.4766 0.633648   
## MSc_fail$Year2004           0.0734098  0.0642854  1.1419 0.253481   
## MSc_fail$Year2005          -0.0169395  0.0518026 -0.3270 0.743667   
## MSc_fail$Year2006           0.1606837  0.0757138  2.1223 0.033817 * 
## MSc_fail$Year2007           0.1001326  0.0691120  1.4488 0.147381   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## dF/dx is for discrete change for the following variables:
## 
##  [1] "MSc_fail$English"           "MSc_fail$Female"           
##  [3] "MSc_fail$`Work Experience`" "MSc_fail$Agrade"           
##  [5] "MSc_fail$BelowBGrade"       "MSc_fail$\"PG Degree\""    
##  [7] "MSc_fail$Year2004"          "MSc_fail$Year2005"         
##  [9] "MSc_fail$Year2006"          "MSc_fail$Year2007"

Only the marginal effects of Work Experience, Agrade and Year2006 are significant.

M_Sc_conf_matrix = function(threshold = 0.5) {
  p = predict(h, newdata = MSc_fail, type = 'response')
  predicted_values = ifelse(p > threshold, 1, 0)
  actual_values = MSc_fail$Fail
  return(table(predicted_values, actual_values))
}

M_Sc_conf_matrix()
##                 actual_values
## predicted_values   0   1
##                0 432  67
##                1   1   0

All predictions are false for failed students, which is not balanced at all.

M_Sc_conf_matrix(0.134)
##                 actual_values
## predicted_values   0   1
##                0 279  24
##                1 154  43

Although accuracy for passed students falls, the results are now more balanced. Hence \(0.134\) seems to be a better threshold for the logit model.


4 Vector Autoregressive Models

The data are daily returns to \(3\) exchange rates (the euro, the British pound and the Japanese yen) against the US dollar, running from \(14/12/1998\) to \(03/07/2018,\) giving a total of \(7142\) observations.

url = 'https://raw.githubusercontent.com/QuanNguyenIU/Res_Med_Fin/main/currencies.xls'
currencies = rio::import(file = url, na = 'NA')
head(currencies, 6)
##         Date      EUR      GBP     JPY
## 1 1998-12-14 0.846806 0.593014 116.600
## 2 1998-12-15 0.849082 0.595344 115.920
## 3 1998-12-16 0.850526 0.596641 116.100
## 4 1998-12-17 0.848249 0.594495 115.145
## 5 1998-12-18 0.850146 0.594477 116.225
## 6 1998-12-19 0.848608 0.593824 115.675
currencies$reur = c(NA, 100 * diff(log(currencies$EUR)))
currencies$rgbp = c(NA, 100 * diff(log(currencies$GBP)))
currencies$rjpy = c(NA, 100 * diff(log(currencies$JPY)))
currencies = currencies[-1,]
adf.test(currencies$reur)
## 
##  Augmented Dickey-Fuller Test
## 
## data:  currencies$reur
## Dickey-Fuller = -18.535, Lag order = 19, p-value = 0.01
## alternative hypothesis: stationary
adf.test(currencies$rgbp)
## 
##  Augmented Dickey-Fuller Test
## 
## data:  currencies$rgbp
## Dickey-Fuller = -19.246, Lag order = 19, p-value = 0.01
## alternative hypothesis: stationary
adf.test(currencies$rjpy)
## 
##  Augmented Dickey-Fuller Test
## 
## data:  currencies$rjpy
## Dickey-Fuller = -18.927, Lag order = 19, p-value = 0.01
## alternative hypothesis: stationary

All the returns are stationary, so we can now run the VAR model.

VAR(currencies[c('reur', 'rgbp', 'rjpy')], p = 2)
## 
## VAR Estimation Results:
## ======================= 
## 
## Estimated coefficients for equation reur: 
## ========================================= 
## Call:
## reur = reur.l1 + rgbp.l1 + rjpy.l1 + reur.l2 + rgbp.l2 + rjpy.l2 + const 
## 
##       reur.l1       rgbp.l1       rjpy.l1       reur.l2       rgbp.l2 
##  0.1474965506 -0.0183557359 -0.0070978299 -0.0118082131  0.0066228786 
##       rjpy.l2         const 
## -0.0054272893  0.0001368541 
## 
## 
## Estimated coefficients for equation rgbp: 
## ========================================= 
## Call:
## rgbp = reur.l1 + rgbp.l1 + rjpy.l1 + reur.l2 + rgbp.l2 + rjpy.l2 + const 
## 
##      reur.l1      rgbp.l1      rjpy.l1      reur.l2      rgbp.l2      rjpy.l2 
## -0.025270542  0.221361804 -0.039016040  0.046926637 -0.067794083  0.003286917 
##        const 
##  0.002825689 
## 
## 
## Estimated coefficients for equation rjpy: 
## ========================================= 
## Call:
## rjpy = reur.l1 + rgbp.l1 + rjpy.l1 + reur.l2 + rgbp.l2 + rjpy.l2 + const 
## 
##       reur.l1       rgbp.l1       rjpy.l1       reur.l2       rgbp.l2 
##  0.0410612623 -0.0708456993  0.1324572148 -0.0188915075  0.0249075437 
##       rjpy.l2         const 
##  0.0149565492 -0.0004125908
VARselect(currencies[c('reur', 'rgbp', 'rjpy')], lag.max = 10)
## $selection
## AIC(n)  HQ(n)  SC(n) FPE(n) 
##      4      2      1      4 
## 
## $criteria
##                   1           2            3            4            5
## AIC(n) -5.433453428 -5.43726462 -5.437666647 -5.438909241 -5.438464346
## HQ(n)  -5.429472196 -5.43029747 -5.427713567 -5.425970237 -5.422539418
## SC(n)  -5.421888924 -5.41702674 -5.408755386 -5.401324601 -5.392206329
## FPE(n)  0.004367985  0.00435137  0.004349621  0.004344219  0.004346152
##                   6            7            8            9           10
## AIC(n) -5.437389700 -5.436794113 -5.435808057 -5.435335655 -5.433668221
## HQ(n)  -5.418478849 -5.414897337 -5.410925357 -5.407467031 -5.402813673
## SC(n)  -5.382458304 -5.373189339 -5.363529904 -5.354384124 -5.344043311
## FPE(n)  0.004350826  0.004353418  0.004357713  0.004359772  0.004367048

According to the Schwarz Criterion, lag \(1\) is the optimal choice.

var = VAR(currencies[c('reur', 'rgbp', 'rjpy')], p = 1)
causality(var, cause = c('reur', 'rgbp'))$Granger
## 
##  Granger causality H0: reur rgbp do not Granger-cause rjpy
## 
## data:  VAR object var
## F-Test = 7.7511, df1 = 2, df2 = 21408, p-value = 0.0004315
causality(var, cause = c('reur', 'rjpy'))$Granger
## 
##  Granger causality H0: reur rjpy do not Granger-cause rgbp
## 
## data:  VAR object var
## F-Test = 7.7007, df1 = 2, df2 = 21408, p-value = 0.0004537
causality(var, cause = c('rgbp', 'rjpy'))$Granger
## 
##  Granger causality H0: rgbp rjpy do not Granger-cause reur
## 
## data:  VAR object var
## F-Test = 0.72376, df1 = 2, df2 = 21408, p-value = 0.4849
causality(var, cause = 'rgbp')$Granger
## 
##  Granger causality H0: rgbp do not Granger-cause reur rjpy
## 
## data:  VAR object var
## F-Test = 7.7523, df1 = 2, df2 = 21408, p-value = 0.000431
causality(var, cause = 'reur')$Granger
## 
##  Granger causality H0: reur do not Granger-cause rgbp rjpy
## 
## data:  VAR object var
## F-Test = 4.1141, df1 = 2, df2 = 21408, p-value = 0.01635
causality(var, cause = 'rjpy')$Granger
## 
##  Granger causality H0: rjpy do not Granger-cause reur rgbp
## 
## data:  VAR object var
## F-Test = 7.5659, df1 = 2, df2 = 21408, p-value = 0.0005192

The null hypothesis of no Granger\(-\)causality is only accepted for the third case of GBP and JPY Granger\(-\)causing EUR.

ir = irf(var, n.ahead = 20)
plot(ir)

Only a few linkages between the series are established here. The responses to the shocks are very small, except for the response of a variable to its own shock, and they die down to almost \(0\) after the first lag.

vd = fevd(var, n.ahead = 10) 
plot(vd)

The percentage of the errors that is attributable to own shocks is \(100\%\) in the case of the euro rate (top graph), for the pound, the euro series explains around \(43\%\) of the variation in returns (middle graph), and for the yen, the euro series explains around \(7\%\) of the variation.


5 ARIMA Models

url = 'https://raw.githubusercontent.com/QuanNguyenIU/Res_Med_Fin/main/UKHP.xls'
UKHP = rio::import(file = url)
head(UKHP, 6)
##        Month Average House Price
## 1 1991-01-01            53051.72
## 2 1991-02-01            53496.80
## 3 1991-03-01            52892.86
## 4 1991-04-01            53677.44
## 5 1991-05-01            54385.73
## 6 1991-06-01            55107.38
Z = ts(UKHP$`Average House Price`)
DHP = 100 * diff(Z) / lag(Z, -1)
summary(DHP)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## -3.4047 -0.2557  0.4483  0.4315  1.1522  3.8022
plot(UKHP$`Month`[-(1:1)], DHP, type = 'l', 
     xlab = '', ylab = '', main = 'DHP')

par(mfrow = c(1, 2))
plot(acf(DHP, plot = F)[1:12])
pacf(DHP, lag = 12)

The ACF dies away rather slowly, while only the first two PACF values seem strongly significant.

adf.test(DHP)
## 
##  Augmented Dickey-Fuller Test
## 
## data:  DHP
## Dickey-Fuller = -5.1732, Lag order = 6, p-value = 0.01
## alternative hypothesis: stationary

The small \(p-\)value returned by the Dickey-Fuller test suggests that our data is stationary.

auto.arima(DHP, max.order = 10)
## Series: DHP 
## ARIMA(2,0,0) with non-zero mean 
## 
## Coefficients:
##          ar1     ar2    mean
##       0.2361  0.3340  0.4275
## s.e.  0.0521  0.0523  0.1259
## 
## sigma^2 = 0.9756:  log likelihood = -457.23
## AIC=922.46   AICc=922.58   BIC=937.61

The best ARIMA model suggested by R is ARMA\((2,0)\).

fit = auto.arima(DHP, max.order = 10)
checkresiduals(fit)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(2,0,0) with non-zero mean
## Q* = 5.1323, df = 8, p-value = 0.7433
## 
## Model df: 2.   Total lags used: 10

The residuals seem to follow a white noise process.

coeftest(fit)
## 
## z test of coefficients:
## 
##           Estimate Std. Error z value  Pr(>|z|)    
## ar1       0.236107   0.052112  4.5308 5.877e-06 ***
## ar2       0.334036   0.052308  6.3859 1.704e-10 ***
## intercept 0.427546   0.125874  3.3966 0.0006822 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

All parameters are significant, i.e. the model fits well to the data.

fcast = forecast(fit, h = 2)
plot(fcast)


6 ARCH/GARCH Models with fGarch

g1 = garchFit(formula = ~garch(1, 1), data = DHP, trace = FALSE)
summary(g1)
## 
## Title:
##  GARCH Modelling 
## 
## Call:
##  garchFit(formula = ~garch(1, 1), data = DHP, trace = FALSE) 
## 
## Mean and Variance Equation:
##  data ~ garch(1, 1)
## <environment: 0x000000002c4de6d0>
##  [data = DHP]
## 
## Conditional Distribution:
##  norm 
## 
## Coefficient(s):
##       mu     omega    alpha1     beta1  
## 0.475607  0.049521  0.142394  0.819234  
## 
## Std. Errors:
##  based on Hessian 
## 
## Error Analysis:
##         Estimate  Std. Error  t value Pr(>|t|)    
## mu       0.47561     0.05962    7.978 1.55e-15 ***
## omega    0.04952     0.04052    1.222  0.22170    
## alpha1   0.14239     0.04831    2.947  0.00321 ** 
## beta1    0.81923     0.06646   12.327  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Log Likelihood:
##  -477.0586    normalized:  -1.46337 
## 
## Description:
##  Wed Aug 10 22:20:19 2022 by user: count 
## 
## 
## Standardised Residuals Tests:
##                                 Statistic p-Value  
##  Jarque-Bera Test   R    Chi^2  0.2716565 0.8729925
##  Shapiro-Wilk Test  R    W      0.997917  0.9591372
##  Ljung-Box Test     R    Q(10)  103.1381  0        
##  Ljung-Box Test     R    Q(15)  176.5068  0        
##  Ljung-Box Test     R    Q(20)  182.8047  0        
##  Ljung-Box Test     R^2  Q(10)  7.707323  0.6574007
##  Ljung-Box Test     R^2  Q(15)  10.3927   0.794338 
##  Ljung-Box Test     R^2  Q(20)  12.77499  0.886828 
##  LM Arch Test       R    TR^2   10.53314  0.5692942
## 
## Information Criterion Statistics:
##      AIC      BIC      SIC     HQIC 
## 2.951280 2.997745 2.950983 2.969822

3/4 parameters are significant. Results from Standardised Residuals Tests show that the residuals follow a normal distribution, with no autocorrelation for squared residuals.

g2 = garchFit(formula = ~arma(1, 1) + garch(1, 1), 
              data = DHP, trace = FALSE)
summary(g2)
## 
## Title:
##  GARCH Modelling 
## 
## Call:
##  garchFit(formula = ~arma(1, 1) + garch(1, 1), data = DHP, trace = FALSE) 
## 
## Mean and Variance Equation:
##  data ~ arma(1, 1) + garch(1, 1)
## <environment: 0x00000000273e87a0>
##  [data = DHP]
## 
## Conditional Distribution:
##  norm 
## 
## Coefficient(s):
##        mu        ar1        ma1      omega     alpha1      beta1  
##  0.110124   0.771887  -0.470166   0.022934   0.074244   0.901314  
## 
## Std. Errors:
##  based on Hessian 
## 
## Error Analysis:
##         Estimate  Std. Error  t value Pr(>|t|)    
## mu       0.11012     0.04342    2.536  0.01120 *  
## ar1      0.77189     0.07160   10.781  < 2e-16 ***
## ma1     -0.47017     0.09632   -4.882 1.05e-06 ***
## omega    0.02293     0.01802    1.272  0.20322    
## alpha1   0.07424     0.02440    3.042  0.00235 ** 
## beta1    0.90131     0.03103   29.042  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Log Likelihood:
##  -449.7309    normalized:  -1.379543 
## 
## Description:
##  Wed Aug 10 22:20:19 2022 by user: count 
## 
## 
## Standardised Residuals Tests:
##                                 Statistic p-Value     
##  Jarque-Bera Test   R    Chi^2  4.561172  0.1022243   
##  Shapiro-Wilk Test  R    W      0.9914341 0.05573989  
##  Ljung-Box Test     R    Q(10)  9.620899  0.4743583   
##  Ljung-Box Test     R    Q(15)  47.10555  3.541328e-05
##  Ljung-Box Test     R    Q(20)  57.07719  1.997997e-05
##  Ljung-Box Test     R^2  Q(10)  10.07508  0.4339309   
##  Ljung-Box Test     R^2  Q(15)  14.49067  0.4886876   
##  Ljung-Box Test     R^2  Q(20)  16.28947  0.6985115   
##  LM Arch Test       R    TR^2   13.25202  0.350992    
## 
## Information Criterion Statistics:
##      AIC      BIC      SIC     HQIC 
## 2.795895 2.865593 2.795234 2.823708

Now the Ljung-Box test on the residuals returns better result.

g3 = garchFit(formula = ~arma(1, 1) + garch(1, 1),
              cond.dist = 'std', data = DHP, trace = FALSE)
summary(g3)
## 
## Title:
##  GARCH Modelling 
## 
## Call:
##  garchFit(formula = ~arma(1, 1) + garch(1, 1), data = DHP, cond.dist = "std", 
##     trace = FALSE) 
## 
## Mean and Variance Equation:
##  data ~ arma(1, 1) + garch(1, 1)
## <environment: 0x00000000412e49c0>
##  [data = DHP]
## 
## Conditional Distribution:
##  std 
## 
## Coefficient(s):
##        mu        ar1        ma1      omega     alpha1      beta1      shape  
##  0.106181   0.759713  -0.432559   0.021294   0.069514   0.907741   9.027749  
## 
## Std. Errors:
##  based on Hessian 
## 
## Error Analysis:
##         Estimate  Std. Error  t value Pr(>|t|)    
## mu       0.10618     0.04208    2.523   0.0116 *  
## ar1      0.75971     0.06868   11.062  < 2e-16 ***
## ma1     -0.43256     0.09133   -4.736 2.18e-06 ***
## omega    0.02129     0.01931    1.103   0.2700    
## alpha1   0.06951     0.02665    2.608   0.0091 ** 
## beta1    0.90774     0.03394   26.746  < 2e-16 ***
## shape    9.02775     4.71959    1.913   0.0558 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Log Likelihood:
##  -447.5294    normalized:  -1.372789 
## 
## Description:
##  Wed Aug 10 22:20:20 2022 by user: count 
## 
## 
## Standardised Residuals Tests:
##                                 Statistic p-Value     
##  Jarque-Bera Test   R    Chi^2  5.343427  0.06913365  
##  Shapiro-Wilk Test  R    W      0.9909293 0.04212616  
##  Ljung-Box Test     R    Q(10)  10.29695  0.4148389   
##  Ljung-Box Test     R    Q(15)  45.16766  7.204429e-05
##  Ljung-Box Test     R    Q(20)  54.55139  4.790228e-05
##  Ljung-Box Test     R^2  Q(10)  10.27432  0.4167646   
##  Ljung-Box Test     R^2  Q(15)  14.6317   0.4782571   
##  Ljung-Box Test     R^2  Q(20)  16.38522  0.6924567   
##  LM Arch Test       R    TR^2   13.70538  0.3199166   
## 
## Information Criterion Statistics:
##      AIC      BIC      SIC     HQIC 
## 2.788524 2.869837 2.787627 2.820973

The performance is now worse compared to the usual conditional distribution.


7 Time Series with GARCH/EGARCH/TGARCH/GJRGARCH

z = ts(currencies$JPY)
return = 100 * diff(z) / lag(z, -1)
adf.test(return)
## 
##  Augmented Dickey-Fuller Test
## 
## data:  return
## Dickey-Fuller = -19.013, Lag order = 19, p-value = 0.01
## alternative hypothesis: stationary
par(mfrow = c(1, 2))
plot(acf(return, plot = F)[1:12])
pacf(return, lag = 12)

ArchTest(return, lags = 1, demean = TRUE)
## 
##  ARCH LM-test; Null hypothesis: no ARCH effects
## 
## data:  return
## Chi-squared = 108.94, df = 1, p-value < 2.2e-16

The data exhibits ARCH effects.

fit = auto.arima(return, max.order = 10)
fit
## Series: return 
## ARIMA(5,0,1) with zero mean 
## 
## Coefficients:
##           ar1     ar2     ar3      ar4      ar5     ma1
##       -0.6776  0.1246  0.0006  -0.0435  -0.0428  0.8100
## s.e.   0.2109  0.0314  0.0148   0.0148   0.0118  0.2107
## 
## sigma^2 = 0.2177:  log likelihood = -4685.19
## AIC=9384.39   AICc=9384.4   BIC=9432.5
checkresiduals(fit)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(5,0,1) with zero mean
## Q* = 1.2105, df = 4, p-value = 0.8764
## 
## Model df: 6.   Total lags used: 10

The residuals from ARIMA model exhibits autocorrelation and non-normality.

sgar_spec = ugarchspec(variance.model = list(model = 'sGARCH',
                                             garchOrder = c(1, 1)),
                       mean.model = list(armaOrder = c(1, 1)))
sfit = ugarchfit(spec = sgar_spec, data = return,
                 out.sample = 0, solver = 'hybrid')
sfit
## 
## *---------------------------------*
## *          GARCH Model Fit        *
## *---------------------------------*
## 
## Conditional Variance Dynamics    
## -----------------------------------
## GARCH Model  : sGARCH(1,1)
## Mean Model   : ARFIMA(1,0,1)
## Distribution : norm 
## 
## Optimal Parameters
## ------------------------------------
##         Estimate  Std. Error    t value Pr(>|t|)
## mu      0.008976    0.005812   1.544333 0.122508
## ar1     0.175968    0.064515   2.727533 0.006381
## ma1     0.005331    0.065549   0.081325 0.935184
## omega   0.001517    0.000201   7.534589 0.000000
## alpha1  0.038070    0.001724  22.084123 0.000000
## beta1   0.955720    0.001284 744.168205 0.000000
## 
## Robust Standard Errors:
##         Estimate  Std. Error    t value Pr(>|t|)
## mu      0.008976    0.006199 1.4480e+00 0.147628
## ar1     0.175968    0.058711 2.9972e+00 0.002725
## ma1     0.005331    0.062984 8.4637e-02 0.932550
## omega   0.001517    0.000609 2.4885e+00 0.012827
## alpha1  0.038070    0.003916 9.7211e+00 0.000000
## beta1   0.955720    0.000849 1.1251e+03 0.000000
## 
## LogLikelihood : -4242.425 
## 
## Information Criteria
## ------------------------------------
##                    
## Akaike       1.1900
## Bayes        1.1958
## Shibata      1.1900
## Hannan-Quinn 1.1920
## 
## Weighted Ljung-Box Test on Standardized Residuals
## ------------------------------------
##                         statistic p-value
## Lag[1]                     0.0268  0.8700
## Lag[2*(p+q)+(p+q)-1][5]    2.5182  0.7690
## Lag[4*(p+q)+(p+q)-1][9]    4.4387  0.5861
## d.o.f=2
## H0 : No serial correlation
## 
## Weighted Ljung-Box Test on Standardized Squared Residuals
## ------------------------------------
##                         statistic   p-value
## Lag[1]                      16.65 4.484e-05
## Lag[2*(p+q)+(p+q)-1][5]     20.30 1.608e-05
## Lag[4*(p+q)+(p+q)-1][9]     24.38 1.432e-05
## d.o.f=2
## 
## Weighted ARCH LM Tests
## ------------------------------------
##             Statistic Shape Scale P-Value
## ARCH Lag[3]     2.891 0.500 2.000 0.08908
## ARCH Lag[5]     6.487 1.440 1.667 0.04610
## ARCH Lag[7]     9.621 2.315 1.543 0.02252
## 
## Nyblom stability test
## ------------------------------------
## Joint Statistic:  12.1472
## Individual Statistics:              
## mu     0.08015
## ar1    7.99040
## ma1    9.16043
## omega  0.30760
## alpha1 0.42778
## beta1  0.42549
## 
## Asymptotic Critical Values (10% 5% 1%)
## Joint Statistic:          1.49 1.68 2.12
## Individual Statistic:     0.35 0.47 0.75
## 
## Sign Bias Test
## ------------------------------------
##                    t-value      prob sig
## Sign Bias           1.0888 2.763e-01    
## Negative Sign Bias  5.3730 7.988e-08 ***
## Positive Sign Bias  0.8881 3.745e-01    
## Joint Effect       31.4191 6.937e-07 ***
## 
## 
## Adjusted Pearson Goodness-of-Fit Test:
## ------------------------------------
##   group statistic p-value(g-1)
## 1    20     897.4   3.942e-178
## 2    30     955.5   2.180e-182
## 3    40     993.8   4.424e-183
## 4    50    1018.3   2.598e-181
## 
## 
## Elapsed time : 0.5841742

The AR(1) and GARCH coefficients are significant. The Weighted Ljung-Box Test is good on standardized residuals but not their squared. ARCH test is rather good for lag 3. We have a negative sign bias but not the positive one.

egar_spec = ugarchspec(variance.model = list(model = 'eGARCH',
                                             garchOrder = c(1, 1)),
                       mean.model = list(armaOrder = c(1, 1)))
efit = ugarchfit(spec = egar_spec, data = return,
                 out.sample = 0, solver = 'hybrid')
efit
## 
## *---------------------------------*
## *          GARCH Model Fit        *
## *---------------------------------*
## 
## Conditional Variance Dynamics    
## -----------------------------------
## GARCH Model  : eGARCH(1,1)
## Mean Model   : ARFIMA(1,0,1)
## Distribution : norm 
## 
## Optimal Parameters
## ------------------------------------
##         Estimate  Std. Error   t value Pr(>|t|)
## mu      0.002162    0.003350   0.64536 0.518692
## ar1     0.204993    0.025118   8.16137 0.000000
## ma1    -0.019240    0.024128  -0.79744 0.425196
## omega  -0.012847    0.005767  -2.22783 0.025892
## alpha1 -0.038792    0.006457  -6.00801 0.000000
## beta1   0.986123    0.004411 223.56575 0.000000
## gamma1  0.106860    0.013068   8.17699 0.000000
## 
## Robust Standard Errors:
##         Estimate  Std. Error  t value Pr(>|t|)
## mu      0.002162    0.002783  0.77699  0.43716
## ar1     0.204993    0.014931 13.72892  0.00000
## ma1    -0.019240    0.012742 -1.50995  0.13106
## omega  -0.012847    0.026963 -0.47647  0.63374
## alpha1 -0.038792    0.032954 -1.17716  0.23913
## beta1   0.986123    0.019759 49.90823  0.00000
## gamma1  0.106860    0.066890  1.59754  0.11014
## 
## LogLikelihood : -4215.051 
## 
## Information Criteria
## ------------------------------------
##                    
## Akaike       1.1826
## Bayes        1.1894
## Shibata      1.1826
## Hannan-Quinn 1.1850
## 
## Weighted Ljung-Box Test on Standardized Residuals
## ------------------------------------
##                         statistic p-value
## Lag[1]                     0.2484  0.6182
## Lag[2*(p+q)+(p+q)-1][5]    2.4974  0.7801
## Lag[4*(p+q)+(p+q)-1][9]    4.3004  0.6194
## d.o.f=2
## H0 : No serial correlation
## 
## Weighted Ljung-Box Test on Standardized Squared Residuals
## ------------------------------------
##                         statistic   p-value
## Lag[1]                      11.48 7.040e-04
## Lag[2*(p+q)+(p+q)-1][5]     16.50 1.754e-04
## Lag[4*(p+q)+(p+q)-1][9]     22.02 6.006e-05
## d.o.f=2
## 
## Weighted ARCH LM Tests
## ------------------------------------
##             Statistic Shape Scale  P-Value
## ARCH Lag[3]     3.982 0.500 2.000 0.045989
## ARCH Lag[5]     8.447 1.440 1.667 0.015659
## ARCH Lag[7]    12.791 2.315 1.543 0.004003
## 
## Nyblom stability test
## ------------------------------------
## Joint Statistic:  13.8855
## Individual Statistics:             
## mu     0.1279
## ar1    7.5089
## ma1    8.8590
## omega  0.7094
## alpha1 0.2018
## beta1  0.5217
## gamma1 0.4660
## 
## Asymptotic Critical Values (10% 5% 1%)
## Joint Statistic:          1.69 1.9 2.35
## Individual Statistic:     0.35 0.47 0.75
## 
## Sign Bias Test
## ------------------------------------
##                    t-value      prob sig
## Sign Bias           0.5237 6.005e-01    
## Negative Sign Bias  3.9915 6.633e-05 ***
## Positive Sign Bias  1.4674 1.423e-01    
## Joint Effect       18.8354 2.957e-04 ***
## 
## 
## Adjusted Pearson Goodness-of-Fit Test:
## ------------------------------------
##   group statistic p-value(g-1)
## 1    20     867.3   1.034e-171
## 2    30     929.0   8.529e-177
## 3    40     938.1   1.948e-171
## 4    50     962.7   8.145e-170
## 
## 
## Elapsed time : 1.239805
gjrgar_spec = ugarchspec(variance.model = list(model = 'gjrGARCH',
                                               garchOrder = c(1, 1)),
                         mean.model = list(armaOrder = c(1, 1)))
gjrfit = ugarchfit(spec = gjrgar_spec, data = return,
                   out.sample = 0, solver = 'hybrid')
gjrfit
## 
## *---------------------------------*
## *          GARCH Model Fit        *
## *---------------------------------*
## 
## Conditional Variance Dynamics    
## -----------------------------------
## GARCH Model  : gjrGARCH(1,1)
## Mean Model   : ARFIMA(1,0,1)
## Distribution : norm 
## 
## Optimal Parameters
## ------------------------------------
##         Estimate  Std. Error    t value Pr(>|t|)
## mu      0.003666    0.005881   0.623434 0.532999
## ar1     0.188476    0.063307   2.977181 0.002909
## ma1    -0.001247    0.064347  -0.019374 0.984543
## omega   0.001931    0.000344   5.613735 0.000000
## alpha1  0.025477    0.003511   7.255702 0.000000
## beta1   0.948465    0.004872 194.692838 0.000000
## gamma1  0.036696    0.006601   5.559404 0.000000
## 
## Robust Standard Errors:
##         Estimate  Std. Error   t value Pr(>|t|)
## mu      0.003666    0.006192  0.592113 0.553775
## ar1     0.188476    0.058422  3.226094 0.001255
## ma1    -0.001247    0.062847 -0.019837 0.984174
## omega   0.001931    0.000861  2.242890 0.024904
## alpha1  0.025477    0.007159  3.558913 0.000372
## beta1   0.948465    0.009746 97.317963 0.000000
## gamma1  0.036696    0.014566  2.519229 0.011761
## 
## LogLikelihood : -4220.421 
## 
## Information Criteria
## ------------------------------------
##                    
## Akaike       1.1842
## Bayes        1.1909
## Shibata      1.1841
## Hannan-Quinn 1.1865
## 
## Weighted Ljung-Box Test on Standardized Residuals
## ------------------------------------
##                         statistic p-value
## Lag[1]                    0.09231  0.7613
## Lag[2*(p+q)+(p+q)-1][5]   2.44637  0.8063
## Lag[4*(p+q)+(p+q)-1][9]   4.32832  0.6127
## d.o.f=2
## H0 : No serial correlation
## 
## Weighted Ljung-Box Test on Standardized Squared Residuals
## ------------------------------------
##                         statistic   p-value
## Lag[1]                      10.82 0.0010068
## Lag[2*(p+q)+(p+q)-1][5]     17.02 0.0001269
## Lag[4*(p+q)+(p+q)-1][9]     22.34 0.0000496
## d.o.f=2
## 
## Weighted ARCH LM Tests
## ------------------------------------
##             Statistic Shape Scale  P-Value
## ARCH Lag[3]     4.883 0.500 2.000 0.027121
## ARCH Lag[5]    10.086 1.440 1.667 0.006264
## ARCH Lag[7]    13.648 2.315 1.543 0.002479
## 
## Nyblom stability test
## ------------------------------------
## Joint Statistic:  13.4239
## Individual Statistics:              
## mu     0.06278
## ar1    8.33142
## ma1    9.54232
## omega  0.38609
## alpha1 0.55682
## beta1  0.58579
## gamma1 0.41758
## 
## Asymptotic Critical Values (10% 5% 1%)
## Joint Statistic:          1.69 1.9 2.35
## Individual Statistic:     0.35 0.47 0.75
## 
## Sign Bias Test
## ------------------------------------
##                    t-value      prob sig
## Sign Bias           0.3912 6.956e-01    
## Negative Sign Bias  3.9586 7.612e-05 ***
## Positive Sign Bias  1.4369 1.508e-01    
## Joint Effect       18.8153 2.985e-04 ***
## 
## 
## Adjusted Pearson Goodness-of-Fit Test:
## ------------------------------------
##   group statistic p-value(g-1)
## 1    20     868.1   7.062e-172
## 2    30     939.4   5.391e-179
## 3    40     947.6   1.985e-173
## 4    50     987.2   6.947e-175
## 
## 
## Elapsed time : 1.322361
par(mfrow = c(2, 2))
ni.garch11 = newsimpact(efit)
plot(ni.garch11$zx, ni.garch11$zy, 
     type = 'l', lwd = 2, col = 'blue', 
     main = 'eGARCH(1,1)-ARMA(1,1) - News Impact Curve', 
     xlab = ni.garch11$xexpr, ylab = ni.garch11$yexpr)
ni.garch11 = newsimpact(gjrfit)
plot(ni.garch11$zx, ni.garch11$zy,
     type = 'l', lwd = 2, col = 'blue', 
     main = 'GJR-ARMA(1,1) - News Impact Curve', 
     xlab = ni.garch11$xexpr, ylab = ni.garch11$yexpr)


8 Dickey-Fuller Simulation

set.seed(123456)
tnone = tconst = ttrend = NULL
for (i in 1:50000){
  y = c(0, cumsum(rnorm(1199)))
  dy = c(NA, diff(y))
  lagy = c(NA, y[1:1199])
  # Model with no intercept
  lmnone = lm(dy[-(1:200)] ~ 0 + lagy[-(1:200)])
  tnone = c(tnone, coef(summary(lmnone))[1] / coef(summary(lmnone))[2])
  # Model with intercept, no trend
  lmconst = lm(dy[-(1:200)] ~ 1 + lagy[-(1:200)])
  tconst = c(tconst, coef(summary(lmconst))[2,1] / coef(summary(lmconst))[2,2])
  # Model with intercept and trend
  lmtrend = lm(dy[-(1:200)] ~ 1 + lagy[-(1:200)] + c(1:1000))
  ttrend = c(ttrend, coef(summary(lmtrend))[2,1] / coef(summary(lmtrend))[2,2])
} 
quantile(tnone, c(0.01, 0.05, 0.1))
##        1%        5%       10% 
## -2.586771 -1.944655 -1.621259
quantile(tconst, c(0.01, 0.05, 0.1))
##        1%        5%       10% 
## -3.430630 -2.859899 -2.565954
quantile(ttrend, c(0.01, 0.05, 0.1))
##        1%        5%       10% 
## -3.968928 -3.414297 -3.130338

9 Pricing Asian Options

set.seed (123456)
# Initial values and derived constants
K = 6500; iv = 0.2652 # Simulation one
s0 = 6289.7; rf = 0.0624; dy = 0.0242; ttm = 0.5; obs = 125
dt = ttm / obs
drift = (rf - dy - iv ^ 2 / 2) * dt
vsqrdt = iv * dt ^ 0.5
putval = callval = NULL
for (i in 1:25000){
  random = rnorm(obs)
  # create cumulative sum of random numbers
  # Spot price evolution for positive and negative random numbers
  spot = s0 * exp(drift * c(1:obs) + vsqrdt * cumsum(random))
  spot_neg = s0 * exp(drift * c(1:obs) + vsqrdt * cumsum(-random))
  # Compute call values 
  callval = c(callval, max(mean(spot) - K, 0) * exp(-rf * ttm))
  callval = c(callval, max(mean(spot_neg) - K, 0) * exp(-rf * ttm))
  # Compute Put values 
  putval = c(putval, max(K - mean(spot), 0) * exp(-rf * ttm))
  putval = c(putval, max(K - mean(spot_neg), 0) * exp(-rf * ttm))
}
mean(callval)
## [1] 204.6794
mean(putval)
## [1] 349.6241
K = 5500; iv = 0.3433 # Simulation two
s0 = 6289.7; rf = 0.0624; dy = 0.0242; ttm = 0.5; obs = 125
dt = ttm / obs
drift = (rf - dy - iv ^ 2 / 2) * dt
vsqrdt = iv * dt ^ 0.5
putval = callval = NULL
for (i in 1:25000){
  random = rnorm(obs)
  spot = s0 * exp(drift * c(1:obs) + vsqrdt * cumsum(random))
  spot_neg = s0 * exp(drift * c(1:obs) + vsqrdt * cumsum(-random))
  callval = c(callval, max(mean(spot) - K, 0) * exp(-rf * ttm))
  callval = c(callval, max(mean(spot_neg) - K, 0) * exp(-rf * ttm))
  putval = c(putval, max(K - mean(spot), 0) * exp(-rf * ttm))
  putval = c(putval, max(K - mean(spot_neg), 0) * exp(-rf * ttm))
}
mean(callval)
## [1] 886.031
mean(putval)
## [1] 61.86626