library(readxl)
## Warning: package 'readxl' was built under R version 4.3.3
#Import file data
macro <- read_excel("D:/Financial Econometrics/macro.xls" ,col_types = c("date", "numeric", "numeric", "numeric", "numeric", "numeric", "numeric", "numeric", "numeric", "numeric"))
# Create variables (new attributes)
macro$dspread = c (NA , diff ( macro$BMINUSA ) )
macro$dcredit = c (NA , diff ( macro$CCREDIT ) )
macro$dprod = c (NA , diff ( macro$INDPRO ) )
macro$dmoney = c (NA , diff ( macro$M1SUPPLY ) )
macro$inflation = c (NA , diff ( log ( macro$CPI ) ) )
macro$rterm = c (NA , diff ( macro$USTB10Y - macro$USTB3M ) )
macro$dinflation = c (NA ,100* diff ( macro$inflation ) )
macro$rsandp = c (NA ,100* diff ( log ( macro$SANDP ) ) )
macro$ermsoft = c (NA ,100* diff ( log ( macro$MICROSOFT ) ) ) - macro$USTB3M /12
macro$ersandp = macro$rsandp - macro$USTB3M /12
# Linear Regression Model
lm_msoft = lm ( ermsoft ~ ersandp + dprod + dcredit + dinflation + dmoney +
dspread + rterm , data = macro )
summary(lm_msoft)
##
## Call:
## lm(formula = ermsoft ~ ersandp + dprod + dcredit + dinflation +
## dmoney + dspread + rterm, data = macro)
##
## Residuals:
## Min 1Q Median 3Q Max
## -36.075 -4.440 -0.403 4.616 24.480
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.326002 0.475481 2.789 0.00556 **
## ersandp 1.280799 0.094354 13.574 < 2e-16 ***
## dprod -0.303032 0.736881 -0.411 0.68113
## dcredit -0.025364 0.027149 -0.934 0.35078
## dinflation 2.194670 1.264299 1.736 0.08341 .
## dmoney -0.006871 0.015568 -0.441 0.65919
## dspread 2.260064 4.140284 0.546 0.58548
## rterm 4.733069 1.715814 2.758 0.00609 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.845 on 375 degrees of freedom
## (2 observations deleted due to missingness)
## Multiple R-squared: 0.3452, Adjusted R-squared: 0.333
## F-statistic: 28.24 on 7 and 375 DF, p-value: < 2.2e-16
# Hypothesis testing: H0: all beta_i = 0
library (car)
## Warning: package 'car' was built under R version 4.3.3
## Loading required package: carData
linearHypothesis (lm_msoft , c("dprod=0"," dcredit=0"," dmoney=0"," dspread=0"))
## Linear hypothesis test
##
## Hypothesis:
## dprod = 0
## dcredit = 0
## dmoney = 0
## dspread = 0
##
## Model 1: restricted model
## Model 2: ermsoft ~ ersandp + dprod + dcredit + dinflation + dmoney + dspread +
## rterm
##
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 379 23180
## 2 375 23078 4 101.88 0.4139 0.7986
lm_start = lm (ermsoft~1 , data=macro[-2,])
step(lm_start,direction="forward",scope=formula(lm_msoft))
## Start: AIC=1733.94
## ermsoft ~ 1
##
## Df Sum of Sq RSS AIC
## + ersandp 1 11367.2 23878 1586.8
## + dspread 1 366.8 34878 1731.9
## + rterm 1 303.0 34942 1732.6
## + dinflation 1 219.9 35025 1733.5
## <none> 35245 1733.9
## + dprod 1 78.8 35166 1735.1
## + dcredit 1 10.8 35234 1735.8
## + dmoney 1 1.0 35244 1735.9
##
## Step: AIC=1586.81
## ermsoft ~ ersandp
##
## Df Sum of Sq RSS AIC
## + rterm 1 497.12 23381 1580.8
## + dinflation 1 227.20 23650 1585.2
## <none> 23878 1586.8
## + dcredit 1 70.09 23808 1587.7
## + dprod 1 50.24 23827 1588.0
## + dmoney 1 24.10 23854 1588.4
## + dspread 1 0.13 23878 1588.8
##
## Step: AIC=1580.75
## ermsoft ~ ersandp + rterm
##
## Df Sum of Sq RSS AIC
## + dinflation 1 200.479 23180 1579.5
## <none> 23381 1580.8
## + dcredit 1 69.399 23311 1581.6
## + dprod 1 37.212 23343 1582.1
## + dmoney 1 25.316 23355 1582.3
## + dspread 1 0.048 23380 1582.8
##
## Step: AIC=1579.45
## ermsoft ~ ersandp + rterm + dinflation
##
## Df Sum of Sq RSS AIC
## <none> 23180 1579.5
## + dcredit 1 63.785 23116 1580.4
## + dprod 1 17.067 23163 1581.2
## + dmoney 1 13.831 23166 1581.2
## + dspread 1 12.788 23167 1581.2
##
## Call:
## lm(formula = ermsoft ~ ersandp + rterm + dinflation, data = macro[-2,
## ])
##
## Coefficients:
## (Intercept) ersandp rterm dinflation
## 1.021 1.266 4.739 2.187
plot (macro$Date [ -(1:2) ] , lm_msoft$residuals , type = "l", xlab ="", ylab ="")
\text{Using function linearHypothesis to testing the null hypothesis
that the parameters (‘dprod’, ‘dcredit’, ‘dmoney’ , ‘dspread’ variables)
on these four variables are jointly zero using an F-test.
Result: The resulting F-test statistic follows an F(4, 375) distribution as there are 4 restrictions, 383 usable observations and eight parameters to estimate in the unrestricted regression. The F-statistic value is 0.4139 with p-value 0.7986, suggesting that the null hypothesis cannot be rejected. The parameters on ‘rterm’ and ‘dinflation’ are significant at the 10% level. Hence they are not included in this F-test and the variables are retained.}
#Diagnostic test
##Multicollinearity problem
cor(macro [-(1:2), c("dprod", "dcredit", "dinflation", "dmoney", "dspread", "rterm")])
## dprod dcredit dinflation dmoney dspread
## dprod 1.00000000 0.094273354 -0.14355079 -0.052514358 -0.05275628
## dcredit 0.09427335 1.000000000 -0.02460369 0.150165099 0.06281801
## dinflation -0.14355079 -0.024603694 1.00000000 -0.093571291 -0.22710010
## dmoney -0.05251436 0.150165099 -0.09357129 1.000000000 0.17069868
## dspread -0.05275628 0.062818012 -0.22710010 0.170698675 1.00000000
## rterm -0.04375067 -0.004029469 0.04160626 0.003800624 -0.01762237
## rterm
## dprod -0.043750669
## dcredit -0.004029469
## dinflation 0.041606256
## dmoney 0.003800624
## dspread -0.017622374
## rterm 1.000000000
vif(lm_msoft) #smaller than 10 is good
## ersandp dprod dcredit dinflation dmoney dspread rterm
## 1.045330 1.047018 1.039351 1.090154 1.062975 1.132181 1.005900
To generate the correlation matrix, we use the function cor() of the stats package. Result: the largest observed correlations (in absolute value) are 0.17 between the money supply and spread variables, and –0.23 between the spread and unexpected inflation. These figures are probably sufficiently small that they can reasonably be ignored.
## Heteroskedasticity problem
library(lmtest)
## Warning: package 'lmtest' was built under R version 4.3.3
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(zoo)
bptest (formula ( lm_msoft ) , data = macro , studentize = FALSE) #Norm_dist
##
## Breusch-Pagan test
##
## data: formula(lm_msoft)
## BP = 6.3131, df = 7, p-value = 0.5037
bptest ( formula ( lm_msoft ) , data = macro , studentize = TRUE ) #T_dist
##
## studentized Breusch-Pagan test
##
## data: formula(lm_msoft)
## BP = 3.1607, df = 7, p-value = 0.8698
Testing Heteroskedasticity by Breusch-Pagan test Result: As can be seen, the null hypothesis is one of constant variance, i.e., homoscedasticity. With a chi square-value of 6.31 and a corresponding p-value of 0.5037, the Breusch–Pagan test suggests not to reject the null hypothesis of constant variance of the residuals. This result is robust to the alternative distributional assumption, since the test statistic and p-value also suggest that there is not a problem of heteroscedastic errors for the APT-style model.
### If Heterosk. problem => Using White's Modified Standard Error Estimates
library(sandwich)
## Warning: package 'sandwich' was built under R version 4.3.3
coeftest ( lm_msoft , vcov. = vcovHC ( lm_msoft , type ="HC1"))
##
## t test of coefficients:
##
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.3260025 0.4590678 2.8885 0.004096 **
## ersandp 1.2807988 0.0929615 13.7777 < 2.2e-16 ***
## dprod -0.3030317 0.6345495 -0.4776 0.633246
## dcredit -0.0253637 0.0208151 -1.2185 0.223790
## dinflation 2.1946700 1.3068027 1.6794 0.093903 .
## dmoney -0.0068714 0.0109006 -0.6304 0.528835
## dspread 2.2600645 3.4278130 0.6593 0.510088
## rterm 4.7330689 1.7265468 2.7413 0.006412 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Result: - With p-value is 2.2e-16, the null hypothesis is rejected, it indicate that ‘ersandp’ variable extremely significant suggesting a strong positive relationship with the dependent variable. -The independent variable ‘rterm’, ‘dinflation’, has p-value are 0.006412 and 0.093903 respectively, it indicate that the null hypothesis is rejected at significant level 0.1 that is suggesting some evidence that this variable might affect the dependent variable.. -The variables dprod, dcredit, dmoney, and dspread have p-values greater than the significance level of 0.1, suggesting that they do not have a significant impact on the dependent variable in the model.
## Autocorrelation problem
dwtest(lm_msoft) #Durbin-Watson test
##
## Durbin-Watson test
##
## data: lm_msoft
## DW = 2.0974, p-value = 0.8176
## alternative hypothesis: true autocorrelation is greater than 0
bgtest(lm_msoft, order=10) #Breusch-Godfrey test.
##
## Breusch-Godfrey test for serial correlation of order up to 10
##
## data: lm_msoft
## LM test = 4.7666, df = 10, p-value = 0.9062
Based on the Durbin-Watson test result (DW = 2.0974, p-value = 0.8176): - The DW statistic is very close to 2, suggesting no significant autocorrelation in the residuals. - The high p-value indicates that we cannot reject the null hypothesis of no autocorrelation.
Based on the Breusch-Godfrey test result (LM test = 4.7666, df = 10, p-value = 0.9062) with null hypothesis is no serial correlation of any order up to the specified lag: - High p-value (0.9062): The p-value is much higher than 0.05, indicating a lack of evidence to reject the null hypothesis.
### If violation (both Heterosk. and Autocorr.) => Newey-West Procedure for Estimating Standard Errors
coeftest ( lm_msoft , vcov. = NeweyWest ( lm_msoft , lag = 6, adjust =T , prewhite = F ) )
##
## t test of coefficients:
##
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.3260025 0.5027048 2.6377 0.008694 **
## ersandp 1.2807988 0.0998922 12.8218 < 2.2e-16 ***
## dprod -0.3030317 0.5216170 -0.5809 0.561625
## dcredit -0.0253637 0.0223410 -1.1353 0.256975
## dinflation 2.1946700 1.3136172 1.6707 0.095614 .
## dmoney -0.0068714 0.0109474 -0.6277 0.530596
## dspread 2.2600645 2.8418132 0.7953 0.426948
## rterm 4.7330689 1.7585143 2.6915 0.007431 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Testing for Non-Normality
library(moments)
skewness(lm_msoft$residuals)
## [1] -0.005612677
kurtosis(lm_msoft$residuals)
## [1] 4.994826
hist ( lm_msoft$residuals , main = "")
box ()
Looking at the histogram plot, we see that the distribution of the
residuals roughly assembles a bellshape, although we also find that
there are some large negative outliers which might lead to a
considerable negative skewness of the data series.
#A formal normality test
jarque.test(lm_msoft$residuals)
##
## Jarque-Bera Normality Test
##
## data: lm_msoft$residuals
## JB = 63.505, p-value = 1.621e-14
## alternative hypothesis: greater
agostino.test(lm_msoft$residuals)
##
## D'Agostino skewness test
##
## data: lm_msoft$residuals
## skew = -0.0056127, z = -0.0456821, p-value = 0.9636
## alternative hypothesis: data have a skewness
anscombe.test(lm_msoft$residuals)
##
## Anscombe-Glynn kurtosis test
##
## data: lm_msoft$residuals
## kurt = 4.9948, z = 4.5984, p-value = 4.258e-06
## alternative hypothesis: kurtosis is not equal to 3
D’Agostino test does not reject the null hypothesis of zero skewness, but the Anscombe–Glynn test rejects the null hypothesis of a kurtosis of 3. The joint Jarque–Bera test also rejects the normality assumption.
#Dummy Variable Construction and Application
plot ( macro$Date [-(1:2)], lm_msoft$residuals , type = "l", col ="red ",xlab ="",
ylab ="")
lines ( macro$Date [-(1:2)], lm_msoft$fitted.values )
legend ("bottomright",c("Residuals","Fitted"), col = c("red","black"),lty =1)
The graph shows that there are several significant (negative) outliers,
with 2000 having the largest of them. The months with a large residual
are all associated with actual returns that were significantly lower
(i.e., more negative) than those anticipated by the model. It’s
interesting to note that the October 1987 residual is not as noticeable
since, despite the stock price decline, the market index value also
declined, indicating that the stock price decline was, at least in part,
anticipated.
macro$Date = as.Date ( macro$Date )
macro$APR00DUM = as.integer ( macro$Date == as.Date ("2000-04-01"))
macro$DEC00DUM = as.integer ( macro$Date == as.Date ("2000-12-01"))
lm_dummy = lm(ermsoft ~ ersandp + dprod + dcredit + dinflation + dmoney + dspread + rterm + APR00DUM + DEC00DUM, data = macro)
summary(lm_dummy)
##
## Call:
## lm(formula = ermsoft ~ ersandp + dprod + dcredit + dinflation +
## dmoney + dspread + rterm + APR00DUM + DEC00DUM, data = macro)
##
## Residuals:
## Min 1Q Median 3Q Max
## -20.9130 -4.3820 -0.4323 4.2993 25.0430
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.419760 0.454388 3.125 0.001920 **
## ersandp 1.253853 0.090222 13.897 < 2e-16 ***
## dprod -0.321131 0.704945 -0.456 0.648985
## dcredit -0.015718 0.026044 -0.603 0.546548
## dinflation 1.442064 1.214637 1.187 0.235889
## dmoney -0.005696 0.014868 -0.383 0.701875
## dspread 1.869289 3.955274 0.473 0.636770
## rterm 4.264174 1.640514 2.599 0.009712 **
## APR00DUM -37.028834 7.576018 -4.888 1.52e-06 ***
## DEC00DUM -28.729950 7.546383 -3.807 0.000164 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.492 on 373 degrees of freedom
## (2 observations deleted due to missingness)
## Multiple R-squared: 0.406, Adjusted R-squared: 0.3917
## F-statistic: 28.33 on 9 and 373 DF, p-value: < 2.2e-16
We re-examine the normality test results of the residuals based on this new model specification by applying the functions to the residuals of lm_dummy. We see that now also the skewness test strongly rejects the normality assumption with a p-value of 0.0016. The residuals are still a long way from following a normal distribution, and the joint null hypothesis of normality is still strongly rejected, probably because there are still several very large outliers.