knitr::opts_chunk$set(echo = TRUE, warning = FALSE, message = FALSE)
The following work in prgress is for my Honors Thesis in Economics. The dataset “FinCrisis” consists of economic variables that affect the number of housing starts in the US.
library(readxl)
library(readr)
library(dplyr)
library(ggplot2)
library(GGally)
library(gridExtra)
library(purrr)
library(glmnet)
library(tidyverse)
library(caret)
library(data.table)
library(tidyverse)
library("NeuralNetTools") # plots neural nets using nnet package
library(nnet)
library(forecast)
library(LiblineaR) # estimates predictive linear models for classification and regression
library(broom) # has tidy function to reshape, combine untidy to cleaner formats
library(forecast)
library(sarima) # has whiteNoise and autocorrelations functions
library(nlme) # has the function gls
library(vars) # estimate standard errors of fitted VAR parameters.
library(tseries) # for GARCH models and unit root tests
library(fastmatch) # to get column number
library(urca) # for unit root test
library(GGally) # Creates scatterplot matrix
library(nnet)
library(stats)
library(dynlm) # Dynamic Linear Models and Time Series Regression
library(kableExtra) # generates customized tables
library(stargazer) # reports summary statistics, results of VAR, and regression estimation
library(tsDyn) # has VECM function
library(fGarch) # fits GARCH models
library(egcm) # for Engle-Granger two-step procedure for identifying pairs of cointegrated series.
library(Rmisc)
library(sweep)
FinCrisis = read_xlsx("FinCrisis.xlsx")
# converts from data.frame to .ts
time_ser=ts(FinCrisis,frequency=12,start=c(1976,6), end=c(2018,12))
time_ser.df = as.data.frame(time_ser)
# Making a train and test split for the estimation
train_fin = slice(time_ser.df, 1:409)
test_fin = slice(time_ser.df, -(1:409))
matrix = cor(FinCrisis[,unlist(lapply(FinCrisis, is.numeric))])
# ranks from most negatively to most positively correlated variables
cor(matrix) %>%
as.data.frame() %>%
mutate(var1 = rownames(.)) %>%
gather(var2, value, -var1) %>%
arrange(desc(value)) %>%
group_by(value) %>%
dplyr::filter(row_number() == 1)
## # A tibble: 56 x 3
## # Groups: value [56]
## var1 var2 value
## <chr> <chr> <dbl>
## 1 hous_st hous_st 1
## 2 sec_conL income 1.000
## 3 CPI income 0.999
## 4 CPI sec_conL 0.999
## 5 real_estL CPI 0.997
## 6 real_estL sec_conL 0.997
## 7 real_estL income 0.996
## 8 mortgR fed_fundsR 0.989
## 9 pvt_house_comp hous_st 0.981
## 10 real_estL yield_sp 0.794
## # … with 46 more rows
crossval_fold_inds <- caret::createFolds(
y = train_fin$hous_st,
k = 10
)
calc_mse <- function(observed, predicted) {
return(mean(abs(observed - predicted)))
}
get_val_mse <- function(formula, train_fin, cross_fold_inds, val_fold){
val_train <- train_fin %>% slice(cross_fold_inds[[val_fold]])
train2_data <- train_fin %>% slice(-cross_fold_inds[[val_fold]])
fitmod <- lm(formula, data = train2_data)
val_preds <- predict(fitmod, val_train)
val_resids <- val_train$hous_st - val_preds
val_obs <- val_train$hous_st
calc_mse(observed = val_obs, predicted = val_preds)
}
mse_results <- expand.grid(
val_fold = 1:10
)
get_crossval_mse <- function(formula, train_fin, crossval_fold_inds){
xval_mse_results = mse_results %>%
mutate(
val_mse = mse_results%>%
pmap_dbl(
get_val_mse,
formula = formula,
cross_fold_inds = crossval_fold_inds,
train_fin = train_fin
))
mean(xval_mse_results$val_mse)
}
As the above models had high test set MSE’s, we realized that this maybe due to the multicollinearity of the independent variables in the dataset so we chose to analyze the data using PCA.
FinCrisis.pca <-
prcomp(time_ser.df[,-c(1,2)], #formula with no response variable and numerical explan vars
center = TRUE, # logical value indicating whether the variables should be shifted to be zero centered. Alternately, a vector of length equal to the number of columns of x can be supplied. The value is passed to scale.
scale. = TRUE) #a logical value indicating whether the variables should be scaled to have unit variance before the analysis takes place. The default is FALSE for consistency with S, but in general scaling is advisable. Alternatively, a vector of length equal the number of columns of x can be supplied. The value is passed to scale
summary(FinCrisis.pca)
## Importance of components:
## PC1 PC2 PC3 PC4 PC5 PC6 PC7
## Standard deviation 2.4645 1.3724 1.0866 0.65482 0.49411 0.36791 0.1731
## Proportion of Variance 0.6074 0.1883 0.1181 0.04288 0.02441 0.01354 0.0030
## Cumulative Proportion 0.6074 0.7957 0.9138 0.95669 0.98111 0.99464 0.9976
## PC8 PC9 PC10
## Standard deviation 0.13519 0.06179 0.03869
## Proportion of Variance 0.00183 0.00038 0.00015
## Cumulative Proportion 0.99947 0.99985 1.00000
Proportion of variance shows how much of the variance is explained by that principal component. The components are always sorted by how important they are, so the most important components will always be the first few.
#selecting the first 4 principal components to use in the model
x_pca = FinCrisis.pca$x[, 1:6]
x_pca %>% head() %>%
kable("html") %>%
kable_styling(bootstrap_options = c("striped", "hover", "responsive"), full_width = FALSE)
| PC1 | PC2 | PC3 | PC4 | PC5 | PC6 |
|---|---|---|---|---|---|
| -2.532253 | 0.9589985 | -0.6856539 | -0.4194933 | 0.6971565 | -0.9574271 |
| -2.417471 | 1.1089015 | -0.9856733 | -0.2229814 | 0.6730305 | -0.8642424 |
| -2.397437 | 1.0653462 | -1.1664993 | -0.3159122 | 0.5733262 | -0.7703319 |
| -2.369972 | 0.9977712 | -1.1642373 | -0.3433385 | 0.6511079 | -0.7066144 |
| -2.267717 | 1.0412444 | -1.4965310 | -0.3725687 | 0.5711167 | -0.6006079 |
| -2.265047 | 1.0854381 | -1.4703869 | -0.5285890 | 0.5150894 | -0.6393254 |
# combine PC1-PC4 as explanatory variables with hous_st as response variable in a new dataset:
new_data = cbind(time_ser.df, as.data.frame(x_pca))
new_data = new_data[, -c(1, 3:12)]
# Making a train and test split for the estimation
train_data <- slice(new_data, 1:409)
test_data <- slice(new_data, -(1:409))
K-Nearest Neighbors Tree Ridge/LASSO Artificial Neural Networks Support Vector Machine
# Creating time slices of the training set
timeSlices = createTimeSlices(train_data$hous_st, # used in place of cv
initialWindow = 180, # a vector of outcomes in chronolgical order 15 years times 12
horizon = 12, # number of consecutive values in test set sample: 12 months
fixedWindow = FALSE) #logical,if FALSE, all training samples start at 1
train_fold_inds <- timeSlices$train[seq(from = 1, to = 211, by = 12)]
val_fold_inds <- timeSlices$test[seq(from = 1, to = 211, by = 12)]
# Function to calculate error rate
calc_mse <- function(observed, predicted) {
mean(abs(observed - predicted))
}
knn_fit <- caret::train(
form = hous_st ~ .,
data = train_data,
method = "knn",
preProcess = c("scale", "center"),
trControl = trainControl(method = "cv",
number = 10,
index = train_fold_inds, # I'm specifying which folds to use, for consistency across methods
indexOut = val_fold_inds, # I'm specifying which folds to use, for consistency across methods
returnResamp = "all",
savePredictions = TRUE),
tuneGrid = data.frame(k = 1:10)
)
mean(abs(test_data$hous_st - predict(knn_fit, newdata = test_data))) # MSE in test set
## [1] 287.4804
#Output of kNN fit
knn_fit
## k-Nearest Neighbors
##
## 409 samples
## 6 predictor
##
## Pre-processing: scaled (6), centered (6)
## Resampling: Cross-Validated (10 fold)
## Summary of sample sizes: 180, 192, 204, 216, 228, 240, ...
## Resampling results across tuning parameters:
##
## k RMSE Rsquared MAE
## 1 160.8808 0.1875936 137.2130
## 2 151.9892 0.1520998 131.1968
## 3 154.6955 0.1541260 132.6327
## 4 160.9234 0.1306035 139.3553
## 5 161.3336 0.2314432 139.9667
## 6 160.6328 0.2051346 139.4336
## 7 161.7521 0.2503193 141.7870
## 8 163.1922 0.2509895 144.2205
## 9 164.3352 0.2324320 146.3585
## 10 167.7617 0.2027130 149.1916
##
## RMSE was used to select the optimal model using the smallest value.
## The final value used for the model was k = 2.
#Plotting yields Number of Neighbours Vs accuracy (based on repeated cross validation)
plot(knn_fit)
rpart_fit <- caret::train(
form = hous_st ~ .,
data = train_data,
method = "rpart",
trControl = trainControl(method = "cv",
number = 10,
index = train_fold_inds, # I'm specifying which folds to use, for consistency across methods
indexOut = val_fold_inds, # I'm specifying which folds to use, for consistency across methods
returnResamp = "all",
savePredictions = TRUE),
tuneLength = 10
)
mean(abs(test_data$hous_st - predict(rpart_fit, newdata = test_data))) # MSE in test set
## [1] 267.5523
rpart_fit
## CART
##
## 409 samples
## 6 predictor
##
## No pre-processing
## Resampling: Cross-Validated (10 fold)
## Summary of sample sizes: 180, 192, 204, 216, 228, 240, ...
## Resampling results across tuning parameters:
##
## cp RMSE Rsquared MAE
## 0.009542403 214.5948 0.1989273 183.0732
## 0.011132371 213.8626 0.1885151 181.4543
## 0.020995939 231.1790 0.1894002 200.1020
## 0.033556184 243.0073 0.1973972 215.0034
## 0.036199468 245.1995 0.1915208 216.6570
## 0.040026184 261.0502 0.1996859 234.7980
## 0.077193980 244.9059 0.2719042 217.4738
## 0.096501590 252.2932 0.2507244 228.4847
## 0.123447812 256.4299 0.2396500 232.4073
## 0.399577721 274.5872 NaN 259.6067
##
## RMSE was used to select the optimal model using the smallest value.
## The final value used for the model was cp = 0.01113237.
plot(rpart_fit)
To reduce multicollinearity among independent variables, we can use ridge regression. When predictors in a regression are strongly correlated, regression coefficients of variable depends on other predictors in the model. Therefore, the specific independent variable does not reflect the effects of the predictor on the regressor. It only partially or marginally effects the regressor based on other predictors in the model.Ridge regression adds bias to alleviate multicollinearity.
ridge_fit <- caret::train(
form = hous_st ~ .,
data = train_data,
method = "glmnet", # method for fit
tuneGrid = expand.grid(
alpha = 0,
lambda = 2^seq(from = -4, to = 10) # plug in different values of lambda to see how test RMSE changes
),
trControl = trainControl(method = "cv", # evaluate method performance via cross-validation
number = 10, # number of folds for cross-validation
index = train_fold_inds, # I'm specifying which folds to use, for consistency across methods
indexOut = val_fold_inds, # I'm specifying which folds to use, for consistency across methods
returnResamp = "all", # return information from cross-validation
savePredictions = TRUE) # return validation set predictions from cross-validation
)
mean(abs(test_data$hous_st - predict(ridge_fit, newdata = test_data))) # MSE in test set
## [1] 99.16544
ridge_fit
## glmnet
##
## 409 samples
## 6 predictor
##
## No pre-processing
## Resampling: Cross-Validated (10 fold)
## Summary of sample sizes: 180, 192, 204, 216, 228, 240, ...
## Resampling results across tuning parameters:
##
## lambda RMSE Rsquared MAE
## 0.0625 125.1038 0.2564421 105.6468
## 0.1250 125.1038 0.2564421 105.6468
## 0.2500 125.1038 0.2564421 105.6468
## 0.5000 125.1038 0.2564421 105.6468
## 1.0000 125.1038 0.2564421 105.6468
## 2.0000 125.1038 0.2564421 105.6468
## 4.0000 125.1038 0.2564421 105.6468
## 8.0000 125.1038 0.2564421 105.6468
## 16.0000 124.6471 0.2566134 105.2336
## 32.0000 123.0606 0.2547423 104.0415
## 64.0000 130.8146 0.2522117 113.9690
## 128.0000 150.6686 0.2493908 135.2232
## 256.0000 179.9229 0.2451270 163.4515
## 512.0000 210.8423 0.2400159 193.8696
## 1024.0000 235.9032 0.2353817 220.1529
##
## Tuning parameter 'alpha' was held constant at a value of 0
## RMSE was used to select the optimal model using the smallest value.
## The final values used for the model were alpha = 0 and lambda = 32.
plot(ridge_fit)
set.seed(123)
svm_fit <- caret::train(
form = hous_st ~ .,
data = train_data,
method = "svmLinear3", # method for fit
tuneGrid = expand.grid(
cost = 2^(2:5), # inverse of a regularization constant. pecify the cost of a violation to the margin. Thus when the cost is small, the margins will be wide and there will be many support vectors.
Loss = sample(c("L1", "L2"), replace = TRUE)
),
trControl = trainControl(method = "cv", # evaluate method performance via cross-validation
number = 10, # number of folds for cross-validation
index = train_fold_inds, # I'm specifying which folds to use, for consistency across methods
indexOut = val_fold_inds, # I'm specifying which folds to use, for consistency across methods
returnResamp = "all", # return information from cross-validation
savePredictions = TRUE) # return validation set predictions from cross-validation
)
mean(abs(test_data$hous_st - predict(svm_fit, newdata = test_data))) # MSE in test set
## [1] 96.70616
plot(svm_fit)
Prior to regression ANN construction we first must split the new_data data set into test and training data sets. Then we have to scale only the explanatory variables. Among the choices of activation function, we have used the logisistic function in out model. Here, each feature of new_data to fall in the [0, 1] interval
# Scale the Data:
# The scale01() function maps each data observation onto the [0, 1] interval as called in the dplyr mutate_all() function
scale01 = function(x){
(x - min(x)) / (max(x) - min(x))
}
new_data_preds_01 = new_data %>% mutate_at(c("PC1", "PC2", "PC3", "PC4"), scale01)
new_data_preds_01
## hous_st PC1 PC2 PC3 PC4 PC5
## 1 1495 0.28533738 0.589665440 0.241034504 0.47238151 0.6971565261
## 2 1401 0.29827298 0.615096591 0.186373446 0.52564365 0.6730305371
## 3 1550 0.30053074 0.607707409 0.153428455 0.50045591 0.5733261876
## 4 1720 0.30362597 0.596243252 0.153840572 0.49302233 0.6511079477
## 5 1629 0.31514969 0.603618517 0.093299415 0.48509986 0.5711167096
## 6 1641 0.31545060 0.611116015 0.098062659 0.44281243 0.5150894040
## 7 1804 0.32027848 0.591180775 0.061628273 0.46044372 0.4406206601
## 8 1527 0.31985365 0.546407064 0.060367783 0.50465977 0.5099113814
## 9 1943 0.30764203 0.499775226 0.032284085 0.44122804 0.1644697461
## 10 2063 0.31456293 0.495090602 0.025185026 0.45346415 0.2688933137
## 11 1892 0.31769104 0.517882415 0.068384219 0.43074371 0.4925407668
## 12 1971 0.30621223 0.484121925 0.091611264 0.47091480 0.4905537585
## 13 1893 0.29753117 0.478862970 0.097346675 0.44009507 0.3034860787
## 14 2058 0.28917033 0.460859927 0.154258453 0.37084313 0.3883445386
## 15 2020 0.27936360 0.440821229 0.163171318 0.45091419 0.2980865377
## 16 1949 0.26170268 0.373949783 0.180026596 0.37987858 0.0816263554
## 17 2042 0.26359813 0.408715246 0.233304531 0.47966774 0.3482190774
## 18 2042 0.25968418 0.385087461 0.218038619 0.44470048 0.1927206364
## 19 2142 0.27199854 0.393147467 0.228605868 0.48773669 0.5233644711
## 20 1718 0.26033700 0.354051246 0.217954680 0.44326775 0.2884456048
## 21 1738 0.26009958 0.374222344 0.257041813 0.43426627 0.4335719161
## 22 2032 0.25764839 0.351909842 0.238955539 0.42493749 0.3044230860
## 23 2197 0.25314898 0.311000439 0.223902075 0.43347094 0.2280056936
## 24 2075 0.24369432 0.301506314 0.247484428 0.43908932 0.2133264836
## 25 2070 0.23206892 0.284175359 0.284032195 0.40756095 0.1708048096
## 26 2092 0.22129157 0.302156072 0.315696026 0.36818754 0.0172618627
## 27 1996 0.21630868 0.268650545 0.333988890 0.37337443 0.0686786903
## 28 1970 0.21343800 0.244297150 0.305393987 0.48563780 -0.0326698330
## 29 1981 0.21201933 0.232312011 0.311307313 0.54347348 0.0566398130
## 30 2094 0.18497159 0.241488366 0.420499643 0.54574180 0.0431695643
## 31 2044 0.18115938 0.235960006 0.406212554 0.61924744 -0.0361159526
## 32 1630 0.17695568 0.241477849 0.451258053 0.57723220 0.0372626441
## 33 1520 0.18098796 0.259399579 0.454081242 0.54291026 0.0688948585
## 34 1847 0.17522416 0.218376672 0.440117359 0.52323315 -0.0762050599
## 35 1748 0.18124733 0.249330248 0.453338848 0.52978735 0.0642792424
## 36 1876 0.16926821 0.218961651 0.468218688 0.43388754 -0.0638025661
## 37 1913 0.17879518 0.289503255 0.481888483 0.45039095 0.1327245956
## 38 1760 0.18895062 0.308837333 0.457866142 0.50661800 0.2187494051
## 39 1778 0.17652092 0.300701319 0.465756825 0.56280505 0.0228410027
## 40 1832 0.14546388 0.243544746 0.528368851 0.47756368 -0.2403631951
## 41 1681 0.11374679 0.251990603 0.613320799 0.64997716 -0.2333277056
## 42 1524 0.09795326 0.272469080 0.677324907 0.53778321 -0.2614083330
## 43 1498 0.09603480 0.307484423 0.693860026 0.51487724 -0.2532059479
## 44 1341 0.11307205 0.340614175 0.613733080 0.57578431 -0.3336472284
## 45 1350 0.09478452 0.341775909 0.700478354 0.55669482 -0.3098142264
## 46 1047 0.00248373 0.338151204 0.958900929 0.71520307 -0.4378111294
## 47 1051 0.00000000 0.515420041 0.989752483 0.41136859 -0.5789530902
## 48 927 0.17066450 0.643787901 0.546748125 0.36989431 -0.1379727209
## 49 1196 0.22816484 0.621603013 0.367952851 0.45760059 -0.0520726151
## 50 1269 0.25102563 0.608125530 0.257564051 0.53057365 -0.1655023339
## 51 1436 0.22784295 0.546462707 0.295803043 0.68892684 -0.2389516490
## 52 1471 0.19381967 0.575521248 0.489047625 0.75362254 0.0800282963
## 53 1523 0.15931198 0.555457706 0.545447572 0.82808764 -0.1014475478
## 54 1510 0.10627543 0.513568761 0.639617690 0.92276362 -0.3721415275
## 55 1482 0.05298693 0.454989375 0.738760146 0.98290674 -0.5744223205
## 56 1547 0.06892551 0.554264198 0.722120730 0.92994683 -0.4712886806
## 57 1246 0.08982855 0.524279246 0.688005681 0.81930337 -0.4656673056
## 58 1306 0.11149771 0.537077676 0.619247962 0.80833227 -0.4388542011
## 59 1360 0.08822223 0.531716626 0.700860310 0.70407774 -0.4680398761
## 60 1140 0.03805596 0.535866817 0.822005551 1.00000000 -0.5373344677
## 61 1045 0.02722388 0.563339138 0.855465283 0.85221624 -0.6423095795
## 62 1041 0.03094487 0.536312013 0.850815353 0.87529511 -0.5614700134
## 63 940 0.03234086 0.590075606 0.928345154 0.87661677 -0.3627821019
## 64 911 0.03807345 0.678663229 1.000000000 0.69324879 -0.2256689065
## 65 873 0.06991305 0.723699298 0.842095898 0.68620523 -0.3819439547
## 66 837 0.13586965 0.802043916 0.641730079 0.70476832 -0.2841177891
## 67 910 0.16243456 0.723319194 0.473645864 0.85370784 -0.5413116301
## 68 843 0.12999203 0.784123496 0.665509801 0.79897342 -0.3597864943
## 69 866 0.10429430 0.813054308 0.751249196 0.91185403 -0.3792241012
## 70 931 0.11329027 0.797351028 0.700053138 0.93251583 -0.4687973472
## 71 917 0.10854760 0.837611723 0.748651922 0.87081701 -0.5036955609
## 72 1025 0.12259286 0.798652326 0.635571808 0.90539716 -0.7357579423
## 73 902 0.13297589 0.842467722 0.638146695 0.96268839 -0.6044052328
## 74 1166 0.15259914 0.860452143 0.573325416 0.87531259 -0.7163412554
## 75 1046 0.21021607 0.867933200 0.400696910 0.85886549 -0.6115019684
## 76 1144 0.21708029 0.865846020 0.381507677 0.93715375 -0.6480935683
## 77 1173 0.23288095 0.850695441 0.302941287 0.90095591 -0.8606109426
## 78 1372 0.25068620 0.844191428 0.219919247 0.95330858 -1.0249309867
## 79 1303 0.25955244 0.873709737 0.235191784 0.90701833 -0.8942904875
## 80 1586 0.27026616 0.814746159 0.173619950 0.83332356 -0.9490417396
## 81 1699 0.27551164 0.829143781 0.193136195 0.83947948 -0.8240552124
## 82 1606 0.26947576 0.789305702 0.201509977 0.87455585 -0.8651039566
## 83 1472 0.27019011 0.763263753 0.181575206 0.89080040 -0.9121011685
## 84 1776 0.27130980 0.728843776 0.153045903 0.85028426 -1.0276090935
## 85 1733 0.25364558 0.718138044 0.207580284 0.83259515 -1.0966684006
## 86 1785 0.24115413 0.674853147 0.261062698 0.75404983 -0.9675219482
## 87 1910 0.21516707 0.634971638 0.266759533 0.58407160 -1.4362287694
## 88 1710 0.23605232 0.669057278 0.277359454 0.65961612 -1.0032915777
## 89 1715 0.24236770 0.604523052 0.222809680 0.65047567 -1.0558797781
## 90 1785 0.25628242 0.622397459 0.243788016 0.66906440 -0.7342598686
## 91 1688 0.25998909 0.555362435 0.167945722 0.75297489 -0.8767734595
## 92 1897 0.25452878 0.537528734 0.210404965 0.65108216 -0.8294445781
## 93 2260 0.26226763 0.524168142 0.199521202 0.67667604 -0.7298285302
## 94 1663 0.24849956 0.547975519 0.281914532 0.58161123 -0.7047823588
## 95 1851 0.23634066 0.514118938 0.285010829 0.57152664 -0.8554078352
## 96 1774 0.22737235 0.507726650 0.342590473 0.48392543 -0.7717325236
## 97 1843 0.20926226 0.475468916 0.381001571 0.55326986 -0.7898775635
## 98 1732 0.19952666 0.494373105 0.405369971 0.58812920 -0.8936551283
## 99 1586 0.18923653 0.508644106 0.488520384 0.54905280 -0.8051523691
## 100 1698 0.20598430 0.470853706 0.412477605 0.64447023 -0.7669715179
## 101 1590 0.23256095 0.479616565 0.332866277 0.63978222 -0.7499982257
## 102 1689 0.25099715 0.535584266 0.383669202 0.47357571 -0.4194127062
## 103 1612 0.27797167 0.568438251 0.334377111 0.38324541 -0.3584817383
## 104 1711 0.28626752 0.551957120 0.275279830 0.41194113 -0.4404462196
## 105 1632 0.27741747 0.499995139 0.265154045 0.38940272 -0.6106960847
## 106 1800 0.27269033 0.497580135 0.282548538 0.45272700 -0.5778751526
## 107 1821 0.28484197 0.555157995 0.303487950 0.41626881 -0.4174738153
## 108 1680 0.29895945 0.528765954 0.249934593 0.42174121 -0.4574821839
## 109 1676 0.30459384 0.497389965 0.195887874 0.39174778 -0.6638772372
## 110 1684 0.31472744 0.509132003 0.169402571 0.46571898 -0.5701785896
## 111 1743 0.30945645 0.487105622 0.206035713 0.46701657 -0.4551368375
## 112 1676 0.29958639 0.476480836 0.236678358 0.36958881 -0.5777098764
## 113 1834 0.31506852 0.534177225 0.262213937 0.46745150 -0.2204031875
## 114 1698 0.30654097 0.452027324 0.230299253 0.45333842 -0.4938331780
## 115 1942 0.31121016 0.442376804 0.232432953 0.48111179 -0.4536184836
## 116 1972 0.31917197 0.413219268 0.232592489 0.49060793 -0.3122876520
## 117 1848 0.30944882 0.428726536 0.270366802 0.49693808 -0.4814227666
## 118 1876 0.32502977 0.367575066 0.178881348 0.62332326 -0.5730937617
## 119 1933 0.33816046 0.378638961 0.175433505 0.63844568 -0.3981994287
## 120 1854 0.32603428 0.373179476 0.197946201 0.52508810 -0.6016990217
## 121 1847 0.32570283 0.429508103 0.253035177 0.55855353 -0.3657778188
## 122 1782 0.32822554 0.417798270 0.277101115 0.48752605 -0.2908644335
## 123 1807 0.33527123 0.446189325 0.317010255 0.34794490 -0.1531122376
## 124 1687 0.35753938 0.435632021 0.198439408 0.43186066 -0.2460936311
## 125 1681 0.35308118 0.451903128 0.242617197 0.33868332 -0.2304589993
## 126 1623 0.35194969 0.413671451 0.231457344 0.39323542 -0.2535999547
## 127 1833 0.34716077 0.347889001 0.210054420 0.45013991 -0.3079125695
## 128 1774 0.35054178 0.360813475 0.239004959 0.38475740 -0.2497045641
## 129 1784 0.36099663 0.388283600 0.255452538 0.39955806 -0.0693358298
## 130 1726 0.36733884 0.397243929 0.247946343 0.45417644 0.0172365542
## 131 1614 0.35604953 0.384047700 0.241602604 0.41823461 -0.0048827421
## 132 1628 0.33837641 0.405918694 0.324208439 0.40098435 -0.0124734798
## 133 1594 0.34480924 0.422147354 0.346532033 0.41482436 0.1668946646
## 134 1575 0.35425580 0.407663314 0.309915024 0.38048347 0.1406457087
## 135 1605 0.35703297 0.418032556 0.326286167 0.39923473 0.2580360486
## 136 1695 0.35043857 0.427095313 0.334731884 0.41778478 0.2684308348
## 137 1515 0.34757143 0.441124050 0.334370034 0.42393435 0.2055273396
## 138 1656 0.36370044 0.434161885 0.336647341 0.37882949 0.3831938660
## 139 1400 0.35651163 0.432473456 0.385615153 0.30570991 0.3986190716
## 140 1271 0.36258658 0.439004019 0.391609895 0.35995922 0.5090790143
## 141 1473 0.38471732 0.427481318 0.325454805 0.47948529 0.6113558747
## 142 1532 0.37764967 0.397854263 0.310065693 0.40576695 0.3880882082
## 143 1573 0.37516822 0.369301445 0.303089129 0.41546892 0.4019977856
## 144 1421 0.37540705 0.419207343 0.332337302 0.47029553 0.5066193537
## 145 1478 0.36796713 0.369515843 0.329016297 0.52450733 0.4509608542
## 146 1467 0.36049970 0.371623718 0.372148638 0.50039444 0.4570153872
## 147 1493 0.34993775 0.370111355 0.386712817 0.52055554 0.3112931839
## 148 1492 0.35017125 0.348238056 0.396461950 0.55125281 0.3913058731
## 149 1522 0.35360366 0.324727990 0.366701871 0.60767446 0.3372111264
## 150 1569 0.35061787 0.376274404 0.482428259 0.57124573 0.6821873455
## 151 1563 0.32889826 0.312227280 0.477310542 0.59383325 0.3613287482
## 152 1621 0.32460929 0.287112090 0.447092648 0.66715465 0.1901125030
## 153 1425 0.31189626 0.301585979 0.559346837 0.54717771 0.3476165503
## 154 1422 0.30669554 0.340649934 0.646851869 0.57505619 0.6527789165
## 155 1339 0.30607422 0.307267094 0.572718314 0.60236338 0.3408328706
## 156 1331 0.32118415 0.324280020 0.543846965 0.65229959 0.4711606101
## 157 1397 0.33858721 0.357421330 0.546542131 0.67591976 0.6623463064
## 158 1427 0.36068843 0.324013608 0.426747232 0.70508184 0.5626324614
## 159 1332 0.35141778 0.309278583 0.466264681 0.68678622 0.5247976396
## 160 1279 0.34962676 0.358285910 0.530738218 0.67450747 0.6836548929
## 161 1410 0.36001907 0.359716011 0.502032223 0.67457892 0.6928257051
## 162 1351 0.35961172 0.325441186 0.460411280 0.63072180 0.4455050156
## 163 1251 0.36992519 0.378024925 0.501557237 0.66794677 0.7457232091
## 164 1551 0.36076350 0.336042377 0.482119825 0.57009992 0.4411124898
## 165 1437 0.36403841 0.386230337 0.543874786 0.58698638 0.7403657926
## 166 1289 0.35918515 0.378206364 0.575814720 0.59012788 0.7915123081
## 167 1248 0.35921485 0.420995610 0.601573988 0.54036964 0.7915658654
## 168 1212 0.35982758 0.417391929 0.588201889 0.53427398 0.7461659174
## 169 1177 0.37257129 0.411459319 0.573921934 0.59368133 0.9276590525
## 170 1171 0.37900880 0.433634716 0.540414937 0.56776992 0.7854361522
## 171 1115 0.38565779 0.486210340 0.530812205 0.47969025 0.7568686837
## 172 1110 0.38569994 0.514903588 0.532769970 0.44959911 0.6947695555
## 173 1014 0.38775128 0.534677312 0.553373781 0.42981911 0.7801230052
## 174 1145 0.39566885 0.541902318 0.512876072 0.49639869 0.6929123111
## 175 969 0.40801235 0.573738711 0.532171218 0.49482328 0.8557441946
## 176 798 0.41380851 0.631659513 0.576322299 0.39263993 0.9943907063
## 177 965 0.43671755 0.607267834 0.452136711 0.54284700 0.8907360833
## 178 921 0.43476979 0.583497273 0.394806808 0.55944355 0.6185907878
## 179 1001 0.44585173 0.601238089 0.392082794 0.57931980 0.7863480749
## 180 996 0.45590801 0.624181358 0.347230247 0.59176340 0.7302065286
## 181 1036 0.45304549 0.621535650 0.346113110 0.58158391 0.6741617034
## 182 1063 0.45606519 0.623931399 0.352149632 0.57205434 0.7560008004
## 183 1049 0.46886429 0.632943090 0.314040581 0.59092769 0.7672015526
## 184 1015 0.46037656 0.614465072 0.344200973 0.45867363 0.6277377338
## 185 1079 0.48190731 0.639139403 0.280921676 0.55981372 0.7452478884
## 186 1103 0.50248671 0.652473090 0.219034028 0.59275656 0.8203700287
## 187 1079 0.51527346 0.689121285 0.190082508 0.56885842 0.7746808627
## 188 1176 0.52403857 0.640835829 0.105431985 0.62623084 0.5921555776
## 189 1250 0.52102692 0.634804576 0.076429623 0.63556548 0.4345870152
## 190 1297 0.50299359 0.651338313 0.191495176 0.55119996 0.5070464848
## 191 1099 0.51857697 0.681791988 0.163663278 0.52693615 0.6034635900
## 192 1214 0.51424618 0.668427718 0.144416449 0.49243548 0.3806048912
## 193 1145 0.52027020 0.670471522 0.104589722 0.52324332 0.2782857004
## 194 1139 0.53670262 0.655909868 0.045091905 0.47344584 0.2606569376
## 195 1226 0.54468812 0.664129878 0.054187264 0.53913281 0.4495562423
## 196 1186 0.55212454 0.670039234 0.026479308 0.54536210 0.4668598597
## 197 1244 0.55038442 0.648577829 0.039569566 0.52555811 0.5508731227
## 198 1214 0.53432773 0.632249422 0.077446134 0.49507833 0.3960083377
## 199 1227 0.53983572 0.608941649 0.066839725 0.56573336 0.3749416618
## 200 1210 0.54475597 0.637762558 0.090965295 0.53939328 0.5780403845
## 201 1210 0.54366476 0.590994590 0.083405475 0.50469813 0.5137572928
## 202 1083 0.54993089 0.607429678 0.112636170 0.56667719 0.7604433170
## 203 1258 0.55222624 0.571002248 0.040981765 0.56945957 0.4882638317
## 204 1260 0.55321984 0.611271599 0.100220636 0.57037904 0.7057178828
## 205 1280 0.54203576 0.563125250 0.113986734 0.57699735 0.5996615772
## 206 1254 0.54994183 0.577612048 0.134880707 0.61716032 0.8084807600
## 207 1300 0.53871030 0.532511681 0.145310123 0.52826831 0.6194624218
## 208 1343 0.54577899 0.512390321 0.123724431 0.64297839 0.7225231305
## 209 1392 0.54106407 0.501952873 0.127756113 0.60811722 0.6021177125
## 210 1376 0.54344892 0.492082443 0.111052748 0.61783851 0.6306573833
## 211 1533 0.54735364 0.460828607 0.089173875 0.62048400 0.5318318338
## 212 1272 0.54245689 0.534444877 0.190979269 0.51421513 0.7678095879
## 213 1337 0.53438872 0.466351120 0.129551195 0.56159748 0.4516709182
## 214 1564 0.53516080 0.475761079 0.130243249 0.62930369 0.5415189886
## 215 1465 0.51431502 0.454993482 0.167920205 0.55815824 0.3767960224
## 216 1526 0.49955377 0.410141914 0.206108718 0.55794998 0.3427436980
## 217 1409 0.49980106 0.458939212 0.285815139 0.52432877 0.5869120208
## 218 1439 0.50090621 0.476190807 0.299274422 0.54160528 0.6540183831
## 219 1450 0.49562401 0.437752249 0.295043621 0.54666852 0.5548218721
## 220 1474 0.48994196 0.412782831 0.289445346 0.52996158 0.4490892113
## 221 1450 0.49118642 0.396972250 0.272940702 0.59202118 0.4484252544
## 222 1511 0.47561492 0.396897831 0.358046849 0.56817644 0.5560454371
## 223 1455 0.45571130 0.353523975 0.440202661 0.61229718 0.5503826108
## 224 1407 0.45438249 0.361863328 0.448175827 0.57467397 0.4806992319
## 225 1316 0.46437037 0.398732569 0.492738783 0.57272945 0.8047269663
## 226 1249 0.46696055 0.355351653 0.441030498 0.55664853 0.5991671514
## 227 1267 0.47334890 0.400515228 0.424831558 0.59520105 0.5624548957
## 228 1314 0.48404754 0.371228079 0.400661529 0.63525948 0.6313152959
## 229 1281 0.49841300 0.369702979 0.367660546 0.71517913 0.7238065822
## 230 1461 0.49562160 0.348125813 0.338213506 0.67917478 0.5061595272
## 231 1416 0.49735231 0.387864382 0.381539709 0.67082048 0.6700889178
## 232 1369 0.49587266 0.374384940 0.408965488 0.65994649 0.7132020867
## 233 1369 0.49461604 0.349788592 0.410676991 0.63848169 0.6778731547
## 234 1452 0.49487100 0.375380795 0.435276004 0.56948642 0.6711761146
## 235 1431 0.51075250 0.392973129 0.418009435 0.67862337 0.8772988097
## 236 1467 0.50626206 0.355678831 0.390040549 0.56923270 0.6014983760
## 237 1491 0.53013415 0.346785401 0.289249407 0.66770894 0.6447855848
## 238 1424 0.51009433 0.357242115 0.375387764 0.59301050 0.6076536054
## 239 1516 0.50893197 0.364739184 0.368300361 0.64120502 0.5826100078
## 240 1504 0.50622046 0.345609657 0.348818910 0.59918948 0.4106418262
## 241 1467 0.50334485 0.323668520 0.366625900 0.58544779 0.4734979140
## 242 1472 0.50261064 0.322204805 0.341933685 0.61305013 0.3432556272
## 243 1557 0.51422965 0.265130099 0.291138222 0.65589858 0.4050696902
## 244 1475 0.51524655 0.300082515 0.314648023 0.68355241 0.4922344095
## 245 1392 0.51804278 0.310595378 0.339629467 0.63499954 0.5495356921
## 246 1489 0.51910823 0.295168472 0.315943401 0.68133461 0.4142060732
## 247 1370 0.51812772 0.272080858 0.294992051 0.66475810 0.2790740547
## 248 1355 0.52746809 0.284532140 0.278474710 0.73289591 0.4169266720
## 249 1486 0.52069404 0.227053468 0.257610887 0.68063132 0.1965774533
## 250 1457 0.52390834 0.233577583 0.244135534 0.77322470 0.2522996985
## 251 1492 0.51709036 0.248215667 0.298271922 0.72799211 0.3330213558
## 252 1442 0.52452209 0.237225956 0.296948151 0.74900499 0.4588520009
## 253 1494 0.53345457 0.252946427 0.288495153 0.80905125 0.5367645461
## 254 1437 0.53595965 0.240068152 0.295890820 0.81510335 0.5858838657
## 255 1390 0.53811197 0.226753145 0.287136984 0.81059170 0.5769657151
## 256 1546 0.53228690 0.202278430 0.274238930 0.77793708 0.3671778799
## 257 1520 0.53715568 0.196206929 0.294285905 0.79254721 0.5209696069
## 258 1510 0.53796657 0.162396051 0.273488811 0.82869564 0.4700524065
## 259 1566 0.53459114 0.178487112 0.316204960 0.79144142 0.4842291997
## 260 1525 0.54915528 0.188477281 0.287513293 0.86280666 0.6399829559
## 261 1584 0.54220369 0.154094793 0.275484547 0.82166331 0.4356890408
## 262 1567 0.53708296 0.155880247 0.293685497 0.79869498 0.3539675660
## 263 1540 0.54021892 0.118205689 0.293696425 0.79209776 0.4414310107
## 264 1536 0.54239261 0.127637037 0.289671150 0.82135065 0.4355902203
## 265 1641 0.54247679 0.123757128 0.288744952 0.84274019 0.3852380723
## 266 1698 0.53824623 0.112097435 0.296406567 0.78448901 0.2875976478
## 267 1614 0.54291421 0.127257292 0.300004998 0.77961247 0.3447427081
## 268 1582 0.55274318 0.153735264 0.294980099 0.79991617 0.4287809567
## 269 1715 0.56821380 0.164575363 0.255959507 0.75963363 0.4832711066
## 270 1660 0.55958250 0.095378403 0.223599872 0.75516817 0.2213828497
## 271 1792 0.56905722 0.133758969 0.269386401 0.81846666 0.5142833543
## 272 1748 0.55821879 0.089276433 0.272412016 0.74099479 0.3075138055
## 273 1670 0.56592749 0.133226347 0.287971856 0.79462220 0.4751963090
## 274 1710 0.55180247 0.078666488 0.282241756 0.68389781 0.2199901924
## 275 1553 0.56228465 0.100747575 0.268691417 0.74002115 0.3081378281
## 276 1611 0.56015021 0.094795291 0.269107538 0.70064297 0.2691190074
## 277 1559 0.55417803 0.093985174 0.263168628 0.70680546 0.1413236251
## 278 1669 0.55373419 0.106316600 0.282424901 0.73039740 0.1992063572
## 279 1648 0.55404580 0.109891184 0.289171577 0.74528919 0.2417354634
## 280 1635 0.54730683 0.107921341 0.323692012 0.66664089 0.1813261904
## 281 1608 0.55435156 0.100969830 0.308702247 0.71540210 0.2401802929
## 282 1648 0.55232891 0.096759265 0.327097318 0.71890788 0.2331453127
## 283 1708 0.55092658 0.076577404 0.325649332 0.68829985 0.1526611552
## 284 1636 0.55453059 0.100103529 0.333353358 0.72586019 0.2299184592
## 285 1737 0.53600999 0.061539973 0.360038346 0.73131289 -0.0026662987
## 286 1604 0.53196369 0.034468393 0.379852425 0.74353869 -0.0249539856
## 287 1626 0.53535858 0.034617898 0.413134393 0.79208752 0.1768556952
## 288 1575 0.52568418 0.036512441 0.408788987 0.76773295 -0.0544360377
## 289 1559 0.53569092 0.082606875 0.453588775 0.80158306 0.2072283107
## 290 1463 0.54872239 0.071053753 0.395854960 0.86817707 0.1790209513
## 291 1541 0.54227909 0.061731178 0.422184991 0.82246099 0.0637058511
## 292 1507 0.55505920 0.051303529 0.388018304 0.85824131 0.1580169660
## 293 1549 0.56173508 0.061567487 0.378500418 0.84926088 0.1900301021
## 294 1551 0.56093330 0.062778852 0.391459115 0.81824540 0.1748965726
## 295 1532 0.57422736 0.046968508 0.339831267 0.86924538 0.1656390784
## 296 1600 0.60129042 0.123921562 0.299338787 0.80740067 0.2464416494
## 297 1625 0.60447626 0.105152491 0.278022958 0.76891494 0.1346448386
## 298 1590 0.61523710 0.138021660 0.275808673 0.77198696 0.2234292205
## 299 1649 0.62225056 0.151342970 0.235878808 0.66426873 0.0749142819
## 300 1605 0.63965476 0.182075174 0.223081407 0.64810794 0.2624297905
## 301 1636 0.63273496 0.172116891 0.215444140 0.55281704 -0.0013388654
## 302 1670 0.64140731 0.195140403 0.217081611 0.58309128 0.0709169146
## 303 1567 0.64329947 0.214578316 0.222921073 0.55254132 -0.0481073170
## 304 1562 0.66608834 0.264670631 0.179378982 0.51764804 0.0693515193
## 305 1540 0.67527296 0.287644413 0.131779279 0.46859287 -0.0501922740
## 306 1602 0.68477242 0.307326316 0.110240708 0.49997158 -0.0734834041
## 307 1568 0.67871717 0.289471246 0.065390660 0.45629668 -0.4059099898
## 308 1698 0.68590697 0.318388994 0.096795565 0.44124687 -0.2706121024
## 309 1829 0.68374591 0.292992557 0.090979037 0.45815443 -0.3534727767
## 310 1642 0.68501960 0.310846002 0.123368327 0.52181265 -0.2020197985
## 311 1592 0.68252839 0.321154156 0.125488423 0.47040792 -0.3485243338
## 312 1764 0.68652153 0.291475043 0.088876507 0.44501252 -0.4510848868
## 313 1717 0.69758475 0.326831701 0.108036295 0.47035131 -0.2521775737
## 314 1655 0.70340678 0.330430492 0.091702110 0.43717163 -0.2731362842
## 315 1633 0.70342864 0.293153408 0.067779679 0.40003411 -0.4064900428
## 316 1804 0.70563703 0.288685892 0.090204573 0.47735870 -0.3027501100
## 317 1648 0.71428954 0.312561791 0.087468384 0.46445729 -0.2412911166
## 318 1753 0.71547996 0.306753449 0.063101739 0.39724236 -0.4628877666
## 319 1788 0.72287879 0.328600673 0.058963479 0.40996101 -0.4303358830
## 320 1853 0.73184650 0.325832801 0.052767772 0.39637053 -0.3279087249
## 321 1629 0.72772507 0.339133369 0.091455269 0.34321978 -0.3580143263
## 322 1726 0.73509937 0.333718491 0.067935818 0.40472036 -0.3329053179
## 323 1643 0.73652753 0.342359066 0.057103791 0.38366352 -0.4141714225
## 324 1751 0.73363721 0.310177777 0.053159348 0.39576524 -0.5662681695
## 325 1867 0.74443531 0.326248080 0.029304120 0.48006032 -0.5343557372
## 326 1897 0.75415003 0.344510992 0.000000000 0.39825406 -0.5699389300
## 327 1833 0.75968272 0.368162881 0.001328223 0.44938868 -0.4610813337
## 328 1939 0.74850008 0.342020832 0.015672248 0.35964580 -0.6293100867
## 329 1967 0.75048739 0.329998252 0.017138082 0.35490437 -0.5895133968
## 330 2083 0.74822385 0.322262716 0.061576013 0.36203340 -0.4396103837
## 331 2057 0.74954827 0.304900593 0.055460369 0.35974704 -0.4519895388
## 332 1911 0.75505583 0.300539589 0.038531091 0.37642358 -0.4594744924
## 333 1846 0.75702344 0.283533258 0.037782635 0.38576660 -0.4524743101
## 334 1998 0.75440107 0.268724622 0.031175419 0.37644348 -0.6451556234
## 335 2003 0.74255041 0.239582144 0.057488872 0.27314476 -0.7867156103
## 336 1981 0.74151247 0.238759388 0.059570581 0.32779807 -0.8061846792
## 337 1828 0.73807676 0.234916297 0.092183649 0.36481611 -0.7641126527
## 338 2002 0.73183633 0.233716365 0.149829931 0.31436789 -0.6932425522
## 339 2024 0.73119924 0.204792762 0.145241530 0.33086842 -0.7295047876
## 340 1905 0.73660242 0.211050560 0.157089452 0.42408321 -0.6034856481
## 341 2072 0.73355060 0.194796406 0.148870863 0.43930090 -0.7457889697
## 342 1782 0.73127685 0.207531950 0.206513678 0.46319413 -0.5714376596
## 343 2042 0.72307188 0.152302438 0.200294084 0.43307811 -0.8859009467
## 344 2144 0.71149296 0.148466228 0.245454940 0.44504340 -0.7636484310
## 345 2207 0.70547854 0.132462200 0.259147927 0.48081844 -0.8584761106
## 346 1864 0.71255232 0.139077040 0.263690541 0.55566607 -0.6883587020
## 347 2061 0.70174770 0.107171636 0.280630613 0.48994005 -0.8858457163
## 348 2025 0.68693094 0.041319152 0.283953529 0.44850768 -1.1311302701
## 349 2068 0.69512306 0.058530017 0.316877258 0.51547222 -0.8920867883
## 350 2054 0.69838231 0.069865042 0.325435401 0.57086888 -0.8293116788
## 351 2095 0.68914153 0.051847169 0.360191946 0.53066674 -0.8970013517
## 352 2151 0.69203331 0.074206302 0.375557339 0.52604378 -0.8738980324
## 353 2065 0.68589099 0.054864354 0.369702799 0.54109879 -1.0072312403
## 354 2147 0.68038958 0.081107239 0.423509472 0.53484440 -0.9172076140
## 355 1994 0.67911072 0.058267158 0.423688563 0.53625426 -0.9607828147
## 356 2273 0.67664782 0.030245033 0.461268361 0.46466291 -1.0115516879
## 357 2119 0.66562053 0.054232933 0.534946809 0.40906306 -1.0040100131
## 358 1969 0.65733147 0.001121266 0.503765840 0.33041146 -1.2977439245
## 359 1821 0.66674477 0.063660379 0.540413808 0.36029129 -1.0313577034
## 360 1942 0.67904034 0.096206915 0.546286024 0.44113368 -0.7793932626
## 361 1802 0.66428030 0.054233400 0.560407823 0.38733329 -1.0138320003
## 362 1737 0.66289805 0.121690753 0.648537824 0.35645604 -0.7920642046
## 363 1650 0.67477190 0.112743568 0.605823610 0.43306087 -0.7649146596
## 364 1720 0.66948544 0.064595825 0.603986999 0.37365247 -0.8910703593
## 365 1491 0.67707402 0.098296273 0.663370653 0.35829302 -0.6802257305
## 366 1570 0.68388951 0.080544232 0.618255152 0.44357627 -0.7654254137
## 367 1649 0.69079239 0.072090667 0.611750244 0.44851656 -0.7407243932
## 368 1409 0.68954529 0.125648740 0.667759456 0.41963173 -0.6460026583
## 369 1480 0.69875465 0.184348782 0.737508962 0.43546632 -0.2843203367
## 370 1495 0.70578357 0.188318325 0.727735961 0.42488370 -0.2049092388
## 371 1490 0.71583473 0.199246608 0.692457610 0.50529039 -0.1852004647
## 372 1415 0.71383067 0.204293872 0.730267965 0.47471245 -0.1172239057
## 373 1448 0.71390061 0.257319803 0.750969961 0.45134128 -0.0962753147
## 374 1354 0.71218405 0.259861749 0.749444466 0.41409649 -0.2069945123
## 375 1330 0.71958196 0.301395132 0.800495091 0.31026439 -0.0102428794
## 376 1183 0.73924079 0.353517291 0.794624036 0.35723548 0.1958265053
## 377 1264 0.74612960 0.321231264 0.734254324 0.39024301 0.0379143115
## 378 1197 0.75637242 0.372840008 0.776457024 0.27568191 0.1954946508
## 379 1037 0.76843935 0.426525923 0.771749943 0.26293130 0.2258913443
## 380 1084 0.78704508 0.433077820 0.717524465 0.24364027 0.2378535548
## 381 1103 0.81275810 0.491049586 0.694732680 0.14717289 0.4527718055
## 382 1005 0.82031065 0.560455930 0.742089333 0.09095000 0.6041488674
## 383 1013 0.82982132 0.568082438 0.760263364 0.21111763 0.8759725609
## 384 973 0.82277504 0.566737046 0.797212184 0.15738167 0.5653267549
## 385 1046 0.81190125 0.576621535 0.803124467 0.17737191 0.4830157376
## 386 923 0.81564270 0.605411900 0.777625790 0.20986785 0.4733747378
## 387 844 0.81278107 0.673644961 0.833525573 0.16893830 0.5688585868
## 388 820 0.81924742 0.633985908 0.776845575 0.11712117 0.3611549079
## 389 777 0.84767893 0.754632619 0.768663909 0.01336419 0.4792993379
## 390 652 0.85471592 0.772770417 0.726149057 0.00000000 0.3715470213
## 391 560 0.84546598 0.759343445 0.763780417 0.13619553 0.3164456826
## 392 490 0.85977484 0.891786127 0.833802550 0.14397331 0.6451880215
## 393 582 0.86750311 0.871351316 0.679019445 0.26815057 0.2092487699
## 394 505 0.86254878 0.912039026 0.706319311 0.21952909 0.1182904986
## 395 478 0.86992116 0.927290562 0.667179171 0.23330770 -0.0155237899
## 396 540 0.89159172 0.937570361 0.526989508 0.33493012 -0.2922721841
## 397 585 0.89220349 0.938290120 0.451822586 0.40049627 -0.4204207227
## 398 594 0.89723806 0.916384554 0.403432947 0.45227217 -0.4616771102
## 399 586 0.89684194 0.905557189 0.377401236 0.50231059 -0.5295113844
## 400 585 0.89843822 0.943559286 0.401581160 0.51291420 -0.4404631802
## 401 534 0.89841754 0.937536475 0.363914665 0.54105413 -0.5719684298
## 402 588 0.89950134 0.925636989 0.360929226 0.45607817 -0.6564209865
## 403 581 0.90628878 0.969704050 0.386050637 0.44541446 -0.4718951595
## 404 614 0.91014860 0.986601123 0.391254163 0.45076708 -0.3348068128
## 405 604 0.90960041 1.000000000 0.403015931 0.43663989 -0.2689065126
## 406 636 0.91716382 0.968340958 0.319640105 0.57222332 -0.3992507756
## 407 687 0.91598973 0.914980219 0.245991731 0.60982009 -0.6743919431
## 408 583 0.89875623 0.994940146 0.503559186 0.36114715 -0.1415424087
## 409 536 0.89204620 0.892781147 0.428025160 0.37823618 -0.4563319446
## 410 546 0.90855028 0.980489476 0.500914847 0.47385724 0.0980415043
## 411 599 0.90347923 0.963605964 0.509705805 0.50379927 0.0228347581
## 412 594 0.90860128 0.911064270 0.417158054 0.59680078 -0.1686264262
## 413 543 0.90923188 0.929533131 0.466086886 0.55265009 -0.0144625472
## 414 545 0.91464260 0.980763861 0.449295953 0.56474081 -0.0785216384
## 415 539 0.93512498 0.926248490 0.340751365 0.61714249 -0.1217206080
## 416 630 0.94110911 0.940638090 0.360216365 0.59166386 0.0449567074
## 417 517 0.92973890 0.942639578 0.414053426 0.47397573 0.0217323267
## 418 600 0.93516551 0.908775974 0.356033363 0.57930193 -0.0394208344
## 419 554 0.94142303 0.909286994 0.316971620 0.64248278 -0.0677962829
## 420 561 0.94201239 0.888914650 0.321032927 0.66410184 -0.0308553017
## 421 608 0.94164268 0.886487040 0.318918087 0.65504530 -0.1024769154
## 422 623 0.93951928 0.869994908 0.325831164 0.62099308 -0.1466352640
## 423 585 0.93018840 0.829252025 0.359385177 0.71562720 -0.1371776344
## 424 650 0.92610719 0.802583461 0.372298774 0.77732605 -0.1435636598
## 425 610 0.93578936 0.793057406 0.346653635 0.80304882 -0.0528807964
## 426 711 0.93629629 0.755510657 0.337232135 0.83279239 -0.0503634512
## 427 694 0.93878820 0.724980274 0.311936091 0.86225381 -0.1063535715
## 428 723 0.94735882 0.725633846 0.320550815 0.88574363 0.0479888175
## 429 704 0.95110646 0.700836467 0.288012285 0.93047353 -0.0490956805
## 430 695 0.95435436 0.702184901 0.284737873 0.89322831 -0.0423385327
## 431 753 0.94849236 0.676032379 0.286326693 0.86450881 -0.1778438441
## 432 708 0.94707894 0.660867707 0.297808400 0.93525042 -0.1418203425
## 433 757 0.94307558 0.651025415 0.321589380 0.94977913 -0.1261113295
## 434 740 0.94023411 0.628260977 0.306425487 0.95107794 -0.2095327039
## 435 754 0.94471440 0.628667560 0.296117883 0.92884484 -0.1878952625
## 436 847 0.95372495 0.613681805 0.292133651 0.93933109 -0.0476142428
## 437 915 0.95044184 0.608930543 0.317167143 0.86799077 -0.1139982712
## 438 833 0.95831539 0.598119299 0.311677266 0.93291737 -0.0468004767
## 439 976 0.96475677 0.610586915 0.295961799 0.93345678 -0.1620744534
## 440 888 0.95966002 0.608024120 0.231302686 0.93748744 -0.2619482865
## 441 962 0.96074790 0.592383457 0.237901965 0.90845097 -0.1784666056
## 442 1010 0.95495171 0.561154932 0.244330284 0.85910695 -0.2281555512
## 443 835 0.95724098 0.586220597 0.280385447 0.90993102 -0.0900871245
## 444 930 0.96186888 0.597726216 0.284321050 0.87222862 -0.0324902234
## 445 839 0.96592367 0.597098155 0.221861946 0.87218699 -0.1729124200
## 446 880 0.96000340 0.641707068 0.304216825 0.68666378 -0.0250352677
## 447 917 0.96586828 0.649032983 0.295217661 0.67560078 0.0307771819
## 448 850 0.96772864 0.644819589 0.285458909 0.67685586 -0.0061508006
## 449 925 0.96929109 0.610459622 0.256791325 0.72656828 -0.0814944972
## 450 1100 0.97257848 0.593927814 0.255290070 0.67726200 -0.0278178645
## 451 1002 0.97938577 0.611486674 0.267310823 0.66551958 0.1330279304
## 452 890 0.97714704 0.580395822 0.267447871 0.65991369 0.0604894702
## 453 941 0.97546422 0.575580737 0.280481457 0.65009288 -0.0148667510
## 454 970 0.97225008 0.580825763 0.317410859 0.61520638 -0.0139010317
## 455 1049 0.97915096 0.559997869 0.342079154 0.62652616 0.2445395844
## 456 1008 0.97784567 0.524940905 0.310328314 0.66174353 0.0567504465
## 457 903 0.98342001 0.547034229 0.360636149 0.65425498 0.3067617258
## 458 1082 0.97694097 0.549543614 0.398748238 0.61830908 0.2476786561
## 459 983 0.97611895 0.498408370 0.359685116 0.65058040 0.0921126245
## 460 1020 0.97453613 0.465913515 0.351610275 0.62612888 0.0422016026
## 461 1077 0.97978334 0.454189254 0.362140871 0.67302447 0.1931266429
## 462 1001 0.98071684 0.483827843 0.399447858 0.66900465 0.2798334187
## 463 1068 0.97654401 0.416032738 0.382979656 0.70636930 0.1626085652
## 464 1094 0.97364568 0.388226224 0.372810188 0.77004566 0.0592898495
## 465 888 0.98639111 0.383585439 0.351605135 0.84033273 0.2075983935
## 466 963 0.98969818 0.420421757 0.409030016 0.80073329 0.4128682546
## 467 1203 0.98072223 0.363101685 0.381235962 0.72657912 0.0768804997
## 468 1079 0.98361722 0.390679530 0.369149650 0.69116249 -0.0241399556
## 469 1185 0.98894414 0.406794276 0.411369484 0.65709939 0.2034211918
## 470 1133 0.98972233 0.380149199 0.392443887 0.67213971 0.1364714551
## 471 1134 0.99010278 0.359336645 0.405283977 0.70953640 0.1773864540
## 472 1212 0.98407766 0.367433427 0.465901799 0.61371594 0.2244239277
## 473 1064 0.99000475 0.367685635 0.456950658 0.65417793 0.2645343605
## 474 1171 0.98859462 0.363086215 0.447003513 0.67911074 0.1689085907
## 475 1155 0.98136208 0.333588972 0.435280485 0.71208211 0.1447485736
## 476 1114 0.97721245 0.319432948 0.465293131 0.67176584 0.1305996325
## 477 1202 0.97897116 0.310569400 0.472998464 0.70950902 0.1588934225
## 478 1115 0.98049561 0.317804876 0.478002662 0.72347765 0.1272229558
## 479 1173 0.98953397 0.322957489 0.457570003 0.77362848 0.1819064236
## 480 1133 0.98762618 0.298139088 0.471873177 0.74793506 0.1863985394
## 481 1183 0.98054692 0.276525696 0.472641137 0.71846441 -0.0009544209
## 482 1225 0.98760217 0.242243331 0.431494529 0.79812096 -0.0266883529
## 483 1161 0.98842176 0.273977497 0.472726685 0.77224003 0.0359115287
## 484 1064 0.99174158 0.297212057 0.486095368 0.76401724 0.0547563096
## 485 1327 0.99389842 0.282609912 0.480759744 0.73340131 0.0130766976
## 486 1151 0.99279079 0.256601957 0.462576481 0.62898714 -0.1230898836
## 487 1280 0.99534398 0.305071616 0.486921249 0.64121535 0.0176457230
## 488 1225 0.99607301 0.291825642 0.476138062 0.68647643 -0.0372645696
## 489 1289 0.99529537 0.269932784 0.458655984 0.67852873 -0.1611193959
## 490 1179 0.99471105 0.231155099 0.460678964 0.67217396 -0.1377965784
## 491 1165 0.99487548 0.258895542 0.509442066 0.69318135 0.0307030135
## 492 1122 0.99124578 0.236664326 0.511385192 0.66894754 -0.0833400337
## 493 1225 0.98430041 0.199803023 0.517324506 0.66951772 -0.1651531120
## 494 1185 0.98587839 0.239541870 0.565956235 0.61073798 -0.0420617128
## 495 1172 0.99219023 0.264759495 0.580170259 0.67013902 0.0662136654
## 496 1158 1.00000000 0.222426373 0.536182398 0.73806148 0.0649429636
## 497 1265 0.99327265 0.201679577 0.557076006 0.66687718 -0.0323030307
## 498 1303 0.99640801 0.183031017 0.523575274 0.77525629 -0.1151418378
## 499 1210 0.98764412 0.177943028 0.578508965 0.71041753 -0.1059696247
## 500 1334 0.98748061 0.175017830 0.589422822 0.69679592 -0.1466335318
## 501 1290 0.98594600 0.162094242 0.560843780 0.67003662 -0.3019017876
## 502 1327 0.98521366 0.157747927 0.574122205 0.72303425 -0.2293130793
## 503 1276 0.97887685 0.152851361 0.613418347 0.68468409 -0.2011432446
## 504 1329 0.97981686 0.139209574 0.604663776 0.70859249 -0.2036398152
## 505 1177 0.97574143 0.172104768 0.652051112 0.69652171 -0.1792638950
## 506 1184 0.97447890 0.169264286 0.681903971 0.70061901 -0.1058140835
## 507 1280 0.97178822 0.159000835 0.701346290 0.67075911 -0.1173710041
## 508 1237 0.97795456 0.172355827 0.719036096 0.69810164 0.0436311792
## 509 1217 0.97288243 0.228660479 0.790384043 0.64145392 0.1356199609
## 510 1256 0.97319232 0.224750392 0.810344712 0.62350916 0.1858538070
## 511 1260 0.90467978 0.000000000 0.759387928 0.19340583 -1.3509434677
## PC6
## 1 -0.9574271028
## 2 -0.8642423504
## 3 -0.7703318525
## 4 -0.7066144221
## 5 -0.6006079476
## 6 -0.6393253974
## 7 -0.6509758759
## 8 -0.6820418949
## 9 -0.7699064064
## 10 -0.6575469159
## 11 -0.5624215753
## 12 -0.5701557014
## 13 -0.6698787596
## 14 -0.6596785405
## 15 -0.7906226582
## 16 -0.8814932721
## 17 -0.9261998639
## 18 -0.9288665242
## 19 -0.7344968515
## 20 -0.7763735023
## 21 -0.7182440401
## 22 -0.7205559513
## 23 -0.6681873177
## 24 -0.6144552080
## 25 -0.6410705781
## 26 -0.7616127568
## 27 -0.7270661596
## 28 -0.8042921031
## 29 -0.7091990520
## 30 -0.8830371665
## 31 -0.8786499169
## 32 -0.8847382212
## 33 -0.8027793781
## 34 -0.8206071827
## 35 -0.7533810856
## 36 -0.6628791100
## 37 -0.4562554473
## 38 -0.3246942577
## 39 -0.5015234685
## 40 -0.6498146149
## 41 -0.6721428379
## 42 -0.4941339665
## 43 -0.3729071430
## 44 -0.2309658161
## 45 -0.3771399675
## 46 -0.4083701847
## 47 0.2120692967
## 48 0.3944721369
## 49 0.2410338777
## 50 0.2022758557
## 51 -0.0071037107
## 52 -0.0980473245
## 53 -0.0200080361
## 54 -0.0349239826
## 55 0.0904649596
## 56 0.3808195951
## 57 0.1836719273
## 58 0.3323091492
## 59 0.4166011906
## 60 0.2448359465
## 61 0.4664576885
## 62 0.5630781900
## 63 0.3789272780
## 64 0.4315470727
## 65 0.8175119155
## 66 1.0771635188
## 67 0.8016190444
## 68 0.6570959873
## 69 0.5153870216
## 70 0.4440182941
## 71 0.3144502809
## 72 0.3282001187
## 73 0.3002203479
## 74 0.3356709675
## 75 0.4508368028
## 76 0.1577677713
## 77 -0.0314503922
## 78 -0.2705677686
## 79 -0.2540757666
## 80 -0.1250354540
## 81 -0.1919898746
## 82 -0.3254781751
## 83 -0.3088336496
## 84 -0.3106103080
## 85 -0.3930163221
## 86 -0.1111286419
## 87 -0.1050569895
## 88 0.0634503735
## 89 0.1882373593
## 90 0.3058116247
## 91 0.3615123730
## 92 0.4168462443
## 93 0.4756536345
## 94 0.4637177253
## 95 0.5025059240
## 96 0.5981654328
## 97 0.6308008100
## 98 0.5215111961
## 99 0.3857308275
## 100 0.4560985348
## 101 0.4612961913
## 102 0.5624636498
## 103 0.6062995012
## 104 0.6740635558
## 105 0.5870026137
## 106 0.5522597877
## 107 0.6354732729
## 108 0.6682068765
## 109 0.4408788304
## 110 0.5123984355
## 111 0.5308007137
## 112 0.4825029489
## 113 0.5457791143
## 114 0.3705925103
## 115 0.2522942520
## 116 0.2253038294
## 117 -0.1482600610
## 118 -0.3391060725
## 119 -0.3333943296
## 120 -0.3672070831
## 121 -0.2409482809
## 122 -0.2645222891
## 123 -0.2411341125
## 124 -0.1236636871
## 125 -0.1406186489
## 126 -0.2439666278
## 127 -0.2503108471
## 128 -0.3145782528
## 129 -0.3332308707
## 130 -0.3187569593
## 131 -0.0084398820
## 132 0.0732558881
## 133 0.0879395986
## 134 0.1510108101
## 135 0.2138314109
## 136 0.4244707074
## 137 0.4992022592
## 138 0.4251732203
## 139 0.4010889240
## 140 0.3436861758
## 141 0.2857767895
## 142 0.2894851632
## 143 0.4553811948
## 144 0.4791975050
## 145 0.4504451806
## 146 0.3849920897
## 147 0.2932340498
## 148 0.2832103651
## 149 0.2372284908
## 150 0.1723417722
## 151 0.0805358724
## 152 0.0561618041
## 153 -0.0023724497
## 154 0.1108463228
## 155 0.0884519045
## 156 0.1370933498
## 157 0.0384446000
## 158 0.1973171451
## 159 0.0490642583
## 160 0.0253846868
## 161 0.0607965234
## 162 -0.0244939737
## 163 -0.0005083912
## 164 -0.0064019262
## 165 0.0939178367
## 166 0.0538878312
## 167 0.0738184612
## 168 0.1140666799
## 169 0.1593544452
## 170 0.1516169715
## 171 0.3131167178
## 172 0.3460962151
## 173 0.3591983390
## 174 0.2320205290
## 175 0.1184705483
## 176 0.1503805705
## 177 0.0838698569
## 178 0.0577429255
## 179 0.1402251967
## 180 0.2092995154
## 181 0.2571709378
## 182 0.2894623058
## 183 0.2798620685
## 184 0.1587340933
## 185 0.2405677404
## 186 0.3608342629
## 187 0.3434717005
## 188 0.3263008255
## 189 0.3971419947
## 190 0.2234477416
## 191 0.3701672847
## 192 0.2786937657
## 193 0.2387330468
## 194 0.3070457567
## 195 0.3025188424
## 196 0.3711605266
## 197 0.4514428116
## 198 0.3087792057
## 199 0.2054980328
## 200 0.2668054673
## 201 0.2038076637
## 202 0.1622161951
## 203 0.1728833378
## 204 0.1507099285
## 205 0.0007456968
## 206 -0.0143242874
## 207 -0.0998146257
## 208 -0.1544258373
## 209 -0.2542720750
## 210 -0.0731992863
## 211 -0.0458476028
## 212 -0.0862311659
## 213 -0.1264291056
## 214 0.0279854885
## 215 0.1024658461
## 216 0.1247157677
## 217 0.0847019242
## 218 0.1397331775
## 219 0.0880872993
## 220 0.1454871225
## 221 0.2180276903
## 222 0.2182947842
## 223 -0.0860598916
## 224 -0.1008616481
## 225 -0.0057267447
## 226 -0.0527241767
## 227 -0.1091397399
## 228 -0.1211658421
## 229 -0.1543915456
## 230 -0.1795054325
## 231 -0.1223370411
## 232 -0.2081662338
## 233 -0.2569499368
## 234 -0.2552610593
## 235 -0.2718382624
## 236 -0.2767523123
## 237 -0.0651937432
## 238 -0.0972248459
## 239 -0.0855949371
## 240 -0.0281024545
## 241 0.0743830765
## 242 0.0202357595
## 243 0.0884950569
## 244 0.1284065114
## 245 0.0659917725
## 246 -0.0923267997
## 247 -0.0991618186
## 248 0.0362524484
## 249 -0.0439758954
## 250 0.0296604744
## 251 0.0734176255
## 252 0.0968822134
## 253 0.0473297811
## 254 -0.0011086879
## 255 0.0397200158
## 256 -0.0392666069
## 257 -0.0420770376
## 258 -0.0728743256
## 259 -0.1787305038
## 260 -0.0691647524
## 261 -0.1137384625
## 262 -0.1733840113
## 263 -0.0785559399
## 264 -0.0958005518
## 265 -0.1807409172
## 266 -0.2088328723
## 267 -0.1655878606
## 268 -0.1669121402
## 269 0.0029840359
## 270 -0.0728151091
## 271 -0.1490352879
## 272 -0.1872372876
## 273 -0.1479030765
## 274 -0.0856662411
## 275 -0.0865174239
## 276 0.0162811444
## 277 0.0560764954
## 278 0.0796679837
## 279 0.1909475217
## 280 0.1418958274
## 281 0.1924134247
## 282 0.1413907436
## 283 0.1787595314
## 284 0.2954521698
## 285 0.1086784754
## 286 0.0204738741
## 287 0.0121343466
## 288 0.0476066549
## 289 0.0576353316
## 290 0.1205595744
## 291 -0.0210573840
## 292 0.1076802941
## 293 0.1570733783
## 294 0.1415851235
## 295 0.1232588933
## 296 0.2670420402
## 297 0.2434284749
## 298 0.2691455579
## 299 0.4034381677
## 300 0.5509493320
## 301 0.4751688321
## 302 0.4578678083
## 303 0.3389292207
## 304 0.4984228544
## 305 0.4597422492
## 306 0.4279050032
## 307 0.4630941990
## 308 0.4774259996
## 309 0.3867755232
## 310 0.3458844585
## 311 0.3119451279
## 312 0.3541242655
## 313 0.3772329707
## 314 0.4239296760
## 315 0.4104212766
## 316 0.2548755406
## 317 0.3580132203
## 318 0.2954980870
## 319 0.3094714189
## 320 0.4122948491
## 321 0.3279501623
## 322 0.3233514617
## 323 0.3565918479
## 324 0.1497148368
## 325 0.0645943021
## 326 0.3715372679
## 327 0.5975482596
## 328 0.5025376449
## 329 0.4806002394
## 330 0.4366546016
## 331 0.4452372112
## 332 0.4495601605
## 333 0.4305572858
## 334 0.2761864853
## 335 0.3556022134
## 336 0.4077834045
## 337 0.2994294471
## 338 0.2125223141
## 339 0.1609787188
## 340 0.0981736209
## 341 0.0294471343
## 342 -0.0202057609
## 343 -0.0982603100
## 344 -0.2151648306
## 345 -0.3579495025
## 346 -0.2113664138
## 347 -0.3043169587
## 348 -0.4482070933
## 349 -0.4862482481
## 350 -0.4485052403
## 351 -0.4619947883
## 352 -0.4576409632
## 353 -0.4298586883
## 354 -0.4095240855
## 355 -0.4199246182
## 356 -0.4368836983
## 357 -0.5263410465
## 358 -0.4840297087
## 359 -0.3304804022
## 360 -0.1995860297
## 361 -0.3160760705
## 362 -0.3136653985
## 363 -0.3104396739
## 364 -0.3428896499
## 365 -0.3401992078
## 366 -0.3967437934
## 367 -0.3629986248
## 368 -0.4047575112
## 369 -0.3334676287
## 370 -0.2463965630
## 371 -0.2088936344
## 372 -0.1994349725
## 373 -0.0929875548
## 374 -0.0937960490
## 375 -0.0262067749
## 376 0.0451371420
## 377 0.0675364178
## 378 0.1444895066
## 379 0.1462207398
## 380 0.2355966487
## 381 0.5231733327
## 382 0.5279206832
## 383 0.4353262768
## 384 0.2051152857
## 385 0.1498352437
## 386 0.2060367766
## 387 0.1480465222
## 388 0.1028711530
## 389 0.3010599723
## 390 0.2189052008
## 391 -0.4545164697
## 392 -0.5180585933
## 393 -0.4899558180
## 394 -0.6409114543
## 395 -0.6798233050
## 396 -0.5093044304
## 397 -0.2980185427
## 398 -0.3202041670
## 399 -0.3728735630
## 400 -0.4456931536
## 401 -0.5111585126
## 402 -0.4447485832
## 403 -0.3458783562
## 404 -0.2413872649
## 405 -0.2297979913
## 406 -0.2398166328
## 407 -0.2027836480
## 408 -0.3580478929
## 409 -0.4245608304
## 410 -0.4241242424
## 411 -0.5902705086
## 412 -0.5836184874
## 413 -0.5893103207
## 414 -0.5722919772
## 415 -0.1190432194
## 416 0.0135767605
## 417 0.0443356098
## 418 0.0087291349
## 419 0.0266770729
## 420 -0.0449029580
## 421 -0.1104932089
## 422 -0.0927002425
## 423 -0.4200092810
## 424 -0.6075467333
## 425 -0.4865551012
## 426 -0.5056078526
## 427 -0.4981656811
## 428 -0.4225204023
## 429 -0.4349695884
## 430 -0.3224274477
## 431 -0.3903577261
## 432 -0.5306643549
## 433 -0.6554635505
## 434 -0.7189659289
## 435 -0.6095880939
## 436 -0.5019665847
## 437 -0.5439128422
## 438 -0.5425864299
## 439 -0.5333469513
## 440 -0.4708202926
## 441 -0.3349405029
## 442 -0.3143851849
## 443 -0.4311888877
## 444 -0.3073512603
## 445 -0.0381137583
## 446 0.1651463982
## 447 0.2967656190
## 448 0.3227230086
## 449 0.2042956005
## 450 0.3607603113
## 451 0.5451018520
## 452 0.5053549640
## 453 0.3976777226
## 454 0.3594920512
## 455 0.4888267213
## 456 0.3688686997
## 457 0.4189719285
## 458 0.2996467022
## 459 0.2767042450
## 460 0.3252002922
## 461 0.3170479685
## 462 0.2617680388
## 463 0.1505129445
## 464 -0.0340369926
## 465 0.0780031814
## 466 0.1381546954
## 467 0.0667428296
## 468 0.1525838211
## 469 0.3075315216
## 470 0.3370064235
## 471 0.2455004785
## 472 0.2206596350
## 473 0.2070635061
## 474 0.1817274219
## 475 0.1432305158
## 476 0.0978549004
## 477 -0.0082603253
## 478 -0.0447743542
## 479 -0.0102996103
## 480 -0.0046934238
## 481 -0.1069996980
## 482 -0.1222153893
## 483 -0.1533648310
## 484 -0.1459415923
## 485 -0.1045861057
## 486 0.0623445741
## 487 0.2717326216
## 488 0.2819925536
## 489 0.2715428982
## 490 0.3025312437
## 491 0.2645462307
## 492 0.1959515737
## 493 0.1138739192
## 494 0.1732819229
## 495 0.1366760004
## 496 0.1606864123
## 497 0.1547259432
## 498 0.0965195640
## 499 0.0491107426
## 500 0.0535612046
## 501 0.1636464071
## 502 0.1867681143
## 503 0.1598066644
## 504 0.2146603298
## 505 0.1173814523
## 506 0.0920747645
## 507 0.1034715446
## 508 0.1312750463
## 509 0.2164010749
## 510 0.2083701191
## 511 -0.2258312049
# Split new_data_ann into test and train sets
train_set_preds_01 = slice(new_data_preds_01, 1:407)
test_set_preds_01 = slice(new_data_preds_01, -(1:407))
# caret::createDataPartition does stratified sampling to ensure test and train sets look similar in some way
# it splits the dataset into test and training set
#'@train_inds outputs 2 lists: 1st list has 35 obs, and the 2nd list has 13 obs, totalling to the n = 48 in the training dataset
train_inds = caret::createDataPartition(new_data_preds_01$hous_st, p = 0.8)
# Choose some indices to use in each part of the split.
# 1st split: split new_data_ann into training and test sets.
# use the square bracket notation to subset the data into selected indices
# Creating time slices of the training set after scaling
timeSlices = createTimeSlices(train_set_preds_01$hous_st, # used in place of cv
initialWindow = 180, # a vector of outcomes in chronolgical order 15 years times 12
horizon = 12, # number of consecutive values in test set sample: 12 months
fixedWindow = FALSE) #logical,if FALSE, all training samples start at 1
train_fold_inds_01 <- timeSlices$train[seq(from = 1, to = 211, by = 12)]
val_fold_inds_01 <- timeSlices$test[seq(from = 1, to = 211, by = 12)]
Timeslices method has created 211 folds. Modeling in 211 datasets is computationally expensive, so we have to reduce that. We command R to skip steps i.e. instead of incrementing by every month, we increment the number of folds by every 12 months. This way, we will have 211/12=17.58 ~ 18 fold cross validated sets or 18-fold time slices.
# predicts hous_st in the transformed dataset
# Absolute value of Test MSE of ANN model using original data
#Prediction from KNN model
knn_preds <- predict(knn_fit, newdata = test_data)
calc_mse(test_data$hous_st, knn_preds)
## [1] 287.4804
#Prediction from the tree model
rpart_preds <- predict(rpart_fit, newdata = test_data)
calc_mse(test_data$hous_st, rpart_preds)
## [1] 267.5523
#Prediction from Ridge regression model
ridge_preds <- predict(ridge_fit, newdata = test_data)
calc_mse(test_data$hous_st, ridge_preds)
## [1] 99.16544
#Prediction from ANN model
ann_preds <- predict(nn_fit, newdata = test_data)
calc_mse(test_data$hous_st, ann_preds)
## [1] 100.4438
#Prediction from ANN model
svm_preds <- predict(svm_fit, newdata = test_data)
calc_mse(test_data$hous_st, svm_preds)
## [1] 96.70616
The plotnet function plots a neural interpretation diagram (NID). The black lines indicate positive weights between layers and grey lines indicate negative weights.
The thickness of line increases as the magnitude of each weight increases. The first layer consists of input variables whose nodes are labelled as I1 through I4 for 4 explanatory variables.
The plot below consists of one hidden layer with 10 hidden nodes labelled as H1 through H10. The last layer consists of the output layer labeled as O1. It shows the bias nodes which are connected to the hidden and output layers.
# To view a diagram of the NN1 use the plotnet() function.
plotnet(nn_fit, x_names = c("P1", "P2", "P3", "P4"), y_names = "hous_st")
# Step 1: Validation-fold predictions from component models
#Ridge
ridge_val_pred <- ridge_fit$pred %>%
dplyr::filter(lambda == ridge_fit$bestTune$lambda) %>%
arrange(rowIndex) %>%
pull(pred)
#Tree
rpart_val_pred <- rpart_fit$pred %>%
dplyr::filter(cp == rpart_fit$bestTune$cp) %>%
arrange(rowIndex) %>%
pull(pred)
# KNN
knn_val_pred <- knn_fit$pred %>%
dplyr::filter(k == knn_fit$bestTune$k) %>%
arrange(rowIndex) %>%
pull(pred)
# ANN
ann_val_pred = nn_fit$pred %>%
dplyr::filter(decay == nn_fit$bestTune$decay, size == nn_fit$bestTune$size) %>%
arrange(rowIndex) %>%
pull(pred)
# SVM
svm_val_pred = svm_fit$pred %>%
dplyr::filter(cost == svm_fit$bestTune$cost, Loss == svm_fit$bestTune$Loss) %>%
arrange(rowIndex) %>%
pull(pred)
# Step 2: data set with validation-set component model predictions as explanatory variables
val_data <- train_data %>%
slice(val_fold_inds %>% unlist()) %>%
mutate(
ridge_pred = ridge_val_pred,
knn_pred = knn_val_pred,
rpart_pred = rpart_val_pred,
ann_pred = ann_val_pred,
svm_pred = svm_val_pred
)
# Step 3: fit model using component model predictions as explanatory variables
stacking_fit <- caret::train(
form = hous_st ~ knn_pred + rpart_pred + ridge_pred + ann_pred + svm_pred,
data = val_data,
method = "glmnet",
tuneGrid = expand.grid(
alpha = 0,
lambda = 2^seq(from = -4, to = 10)
)
)
coef(stacking_fit$finalModel, stacking_fit$bestTune$lambda) %>% t() # Transposes the result to occcupy less space
## 1 x 6 sparse Matrix of class "dgCMatrix"
## (Intercept) knn_pred rpart_pred ridge_pred ann_pred svm_pred
## 1 -10.57982 0.2463322 -0.1663135 0.382326 0.2657742 0.2854406
# Step 4 (both cross-validation and refitting to the full training set were already done
# as part of obtaining ridge_fit, knn_fit, and rpart_fit above)
knn_test_pred <- predict(knn_fit, newdata = test_data)
rpart_test_pred <- predict(rpart_fit, newdata = test_data)
ridge_test_pred <- predict(ridge_fit, newdata = test_data)
ann_test_pred = predict(nn_fit, newdata = test_data)
svm_test_pred = predict(svm_fit, newdata = test_data)
# Step 5: Assemble data frame of test set predictions from each component model
stacking_test_x = data.frame(
ridge_pred = ridge_test_pred,
knn_pred = knn_test_pred,
rpart_pred = rpart_test_pred,
ann_pred = ann_test_pred,
svm_pred = svm_test_pred
)
# Step 6: Stacked model predictions
stacking_preds = predict(stacking_fit, stacking_test_x)
as.data.frame(stacking_preds)
## stacking_preds
## 1 567.2119
## 2 587.2577
## 3 664.0555
## 4 616.9417
## 5 590.9819
## 6 662.9442
## 7 605.0397
## 8 613.1456
## 9 649.3453
## 10 659.9421
## 11 665.6442
## 12 685.7426
## 13 709.6603
## 14 699.0178
## 15 708.3126
## 16 763.3629
## 17 807.1750
## 18 839.7675
## 19 797.5714
## 20 733.7121
## 21 735.5016
## 22 787.6972
## 23 776.5767
## 24 860.3075
## 25 803.3010
## 26 802.7138
## 27 794.4561
## 28 917.3605
## 29 895.8455
## 30 809.4142
## 31 856.4047
## 32 852.2098
## 33 927.5244
## 34 828.5757
## 35 811.1332
## 36 863.1353
## 37 829.4691
## 38 816.4904
## 39 827.7962
## 40 871.2303
## 41 1115.6689
## 42 1052.5164
## 43 1099.8024
## 44 1117.7281
## 45 1206.5886
## 46 1147.6334
## 47 1065.2568
## 48 979.8883
## 49 991.2562
## 50 1063.9214
## 51 1102.2844
## 52 1059.3953
## 53 1010.5128
## 54 1086.0329
## 55 1094.3298
## 56 1083.4805
## 57 968.4004
## 58 1149.9603
## 59 1144.8077
## 60 1080.0429
## 61 1111.4306
## 62 1109.7577
## 63 1080.9070
## 64 1097.3876
## 65 1131.3462
## 66 1190.2208
## 67 1186.5782
## 68 1180.0278
## 69 1172.0793
## 70 1154.1391
## 71 1182.1570
## 72 1208.7344
## 73 1260.9144
## 74 1201.1169
## 75 1155.8723
## 76 1186.4515
## 77 1257.3801
## 78 1208.9314
## 79 1214.2772
## 80 1259.8916
## 81 1286.6663
## 82 1248.0759
## 83 1274.9360
## 84 1377.8404
## 85 1173.1340
## 86 1075.4001
## 87 1227.2178
## 88 1272.8262
## 89 1308.5593
## 90 1247.1972
## 91 1244.4860
## 92 1386.6856
## 93 1337.1244
## 94 1261.5676
## 95 1270.2051
## 96 1201.1355
## 97 1179.3543
## 98 1189.7553
## 99 1130.8359
## 100 1034.7914
## 101 1047.1551
## 102 1836.8576
# Calculate error rate
calc_mse(test_data$hous_st, stacking_preds)
## [1] 75.4695
The stacked ensemble produces an MSE of 75.4695, lower than that of all other models.
#Plots comparing Predicted hous_st from ensemble model vs actual hous_st from test set
df_ensemble = data.frame(hous_st = test_data$hous_st, pred_hous_st_stack_ridge = stacking_preds)
# convert the last 101 rows of the entire FinCrisis dataset into a dataframe
# test set with 3 columns: date, actual and predicted hous_st
series_ensemble = cbind(tail(FinCrisis[,1], 102) , df_ensemble)
head(series_ensemble)
## Date hous_st pred_hous_st_stack_ridge
## 1 2010-07-01 546 567.2119
## 2 2010-08-01 599 587.2577
## 3 2010-09-01 594 664.0555
## 4 2010-10-01 543 616.9417
## 5 2010-11-01 545 590.9819
## 6 2010-12-01 539 662.9442
# Melt the "series" data to a long form so that we can pass the data for all 3 variables (hous_st and pred_hous_st) to one aesthetic:
series_ensemble_melt = reshape2::melt(series_ensemble, id.var = 'Date')
ggplot(series_ensemble_melt, aes(x = Date, y = value, colour = variable)) + geom_point() + xlab('Date')
#Plots comparing Predicted hous_st from individual component model vs actual hous_st from test set
df_comp = data.frame(hous_st = test_data$hous_st, knn_preds, ridge_preds, rpart_preds, ann_preds)
# convert the last 101 rows of the entire FinCrisis dataset into a dataframe
# test set with 3 columns: date, actual and predicted hous_st
series_comp = cbind(tail(FinCrisis[,1], 102) , df_comp)
head(series_comp)
## Date hous_st knn_preds ridge_preds rpart_preds ann_preds
## 1 2010-07-01 546 593.5 592.0258 580.5 605.1938
## 2 2010-08-01 599 584.0 626.0560 580.5 605.1938
## 3 2010-09-01 594 559.5 748.1920 580.5 605.1938
## 4 2010-10-01 543 559.5 678.9041 580.5 605.1938
## 5 2010-11-01 545 559.5 642.1370 580.5 605.1938
## 6 2010-12-01 539 661.5 708.9837 580.5 605.1938
# Melt the "series" data to a long form so that we can pass the data for all 5 variables (hous_st predictions from 4 models plus actual house_st) to one aesthetic:
series_comp_melt = reshape2::melt(series_comp, id.var = 'Date')
ggplot(series_comp_melt, aes(x = Date, y = value, colour = variable)) + geom_point() + xlab('Date')
FinCrisis = read_xlsx("FinCrisis.xlsx") # read and import an xlsx file as an R object
FinCrisis = FinCrisis[,-1] # drops the 1st column: Date
time_ser=ts(FinCrisis,frequency=12,start=c(1976,6), end=c(2018,12))
Autocorrelation measures the linear relationship between lagged variables in a time series data. The ACF plot shows different autocorrelation coefficients. For example, \(r_1\) measures the relationship between y_t and y_(t-1). \(r_2\) measures the relationship between y_t and y_(t-2) and so on.
ACF and PACF plots measure the relationship between y_t and y_(t-k) after removing the effects of lags 1,2,…,(k-1). So the first partial autocorrelation is identical to the first autocorrelation, because there is nothing between them to remove. Each partial autocorrelation can be estimated as the last coefficient in an autoregressive model.
ggAcf(time_ser[,1], lag=75) + ggtitle("Autocorrelation function of monthly US housing starts")
When a plot has trends, then the ACF decreases gradually as lags increase. Because the housing starts series has autocorrelation, it is not white noise. A time series is white noise if all the variables are independently and identically distributed with a mean of 0, and a constant variance. The blue lines in the plot indicate significane. The spikes in the plot that exceed the significane lines above and below imply that the current level of housing starts is significantly autocorrelated with its lagged values.
The partial auto-correlation function measures the correlation between current variable and lagged variable after eliminating the correlation from previous lags. In simple terms, the PACF removes the lags that cause autocorrelation.
From the plot below, we will include 3 lags. Adding more lags decreases the degrees of freedom and power as we add more regressors to the model.
ggPacf(time_ser[,1], lag=20) + ggtitle("PACF of monthly US housing starts")
ARIMA models explain or capture serial correlation present within a time series.
We test whether the first h autocorrelations are significantly different from what would be expected from a white noise process. A test for a group of autocorrelations is called a portmanteau test. We can do the Ljung-Box test.
H0: at each lag, the time series data points are i.i.d i.e. there is no autocorrelation.
Ha: data points at each lag are not i.i.d. and are serially correlated.
A time series is stationary whose properties are independent of the time in which we observe the data. So, a series with trends or seasonality is non-stationary as trends and seasonality change the values of parameters at different points in time. Alternatively, a white noise series is stationary as it looks the same any time.Generally, stationary time series have no predictable patterns in the long run. Such time plots will be approximately horizontal,have constant variance and mean, albeit they may be cyclical.
When the distribution of elements x_(t_1),…,x_(t_n) is equal to that of x_(t_(1+m)),…,x_(t_(n+m)), ∀ t_i,m, then the time series model, {x_t}, is strictly stationary. The distribution of the time series should be constant even when time arbitrarily changes.
We can make a non-stationary time series stationary by differencing consecutive observations. Lograrithmic transformations and differencing can stabalize the variance and mean of the time series,respectively.Furthermore, differencing eliminates seasonality and trends.
We can look at ACF plot to identify non-stationary time series. For suchlike data, ACF plummets to zero fast. On the contrary, the ACF of a unit root series decreases relatively gradually. The value of r_1 is often large and positive for non-stationary data.
Box.test(diff(time_ser[,1]), lag=10, type="Ljung-Box")
##
## Box-Ljung test
##
## data: diff(time_ser[, 1])
## X-squared = 70.293, df = 10, p-value = 3.892e-11
The null hypothesis is that the series is i.i.d. or has no serial correlation
The ACF of the differenced housing starts looks doesn’t like that of a white noise series. There are autocorrelations lying outside the 95% limits, and the Ljung-Box Q∗ statistic has a very small p-value of 3.892e-11 (for h=10). This suggests that the daily change in the US housing starts is not a random amount which is correlated with that of previous months.
Used for non-stationary economic and financial data, random walk models have long periods of trends and can change unpredictably in any direction. Thus, the forecasts are equal to the last observation as the values are equally likely to move up or down.
Let a time series be {w_t:t=1,…n}. If the elements of the series, w_i, are independent and identically distributed (i.i.d.), with zero mean, variance σ^2 and no serial correlation (i.e. Cor(w_i,w_j)≠ 0,∀ i≠j) then the time series is discrete white noise (DWN).
In particular, if the values w_i are drawn from a standard normal distribution (i.e. w_t ∼ N(0,σ^2)), then the series is known as Gaussian White Noise.
In a random walk, each term, x_t depends entirely on the previous term, x_(t−1) and a stochastic white noise term, w_t:
x_t = x_(t−1) + w_t
An extention of the random walk is the autoregressive model as it incorporates terms further back in time. Thus the AR model is linearly dependent on the previous terms.
A time series model, {x_t}, is an autoregressive model of order p, AR(p), if:
x_t = α_1 * x_(t−1) +… + α_p * x_(t−p) + w_t, where {w_t} is white noise.
MA is a linear combination of the past white noise terms.
Intuitively, this means that the MA model sees such random white noise “shocks” directly at each current value of the model. This is in contrast to an AR(p) model, where the white noise “shocks” are only seen indirectly, via regression onto previous terms of the series.
A time series model, {x_t}, is a moving average model of order q, MA(q), if:
x_t = w_t + β_1 * w_(t−1) +…+ β_q * w_(t−q), where {w_t} is white noise
A time series model, {x_t}, is an autoregressive moving average model of order p,q, ARMA(p,q), if:
x_t = α_1 * x_(t−1) + α_2 * x_(t−2) +…+ w_t + β_1 * w_(t−1) + β_2* w_(t−2) …+ β_q * w_(t−q)
Where {w_t} is white noise with E(w_t) = 0 and variance σ^2.
The former AR model considers its own past behaviour as inputs for the model and as such attempts to capture market participant effects, such as momentum and mean-reversion in stock trading.
The latter model is used to characterise “shock” information to a series, such as a surprise earnings announcement or unexpected event (such as the BP Deepwater Horizon oil spill).
Applying a difference operator to a non-stationary or a random walk series {x_t} gives a stationary or a white noise {w_t} series.
∇x_t=x_t − x_(t−1) = w_t
ARIMA repeatedly differences d times to make a stationary series.
We can use the statistical hypothesis unit toot tests to objectively determine whether the series requires differencing. In our analysis, we use the Augmented Dickey Fuller test.
The time series is modeled as: z_t = α * z_(t−1) + w_t, wherein w_t is discrete white noise. The null hypothesis is that α = 1, while the alternative hypothesis is that α < 1. In this test, the null hypothesis is that the data is not stationary or it is unit root, and we look for evidence that the null hypothesis is false. Consequently, small p-values (e.g., > 0.05) suggest that differencing is required.
# The test can be computed using the ur.kpss() function from the urca package.
adf.test(time_ser[,1])
##
## Augmented Dickey-Fuller Test
##
## data: time_ser[, 1]
## Dickey-Fuller = -2.4706, Lag order = 7, p-value = 0.3791
## alternative hypothesis: stationary
The test statistic is much bigger than the 5% critical value, so we fail to reject the null hypothesis. That is, the data is not stationary. We can difference the data twice (2nd difference), and apply the test again.Occasionally the differenced data will not appear to be stationary and it may be necessary to difference the data a second time to obtain a stationary series.
adf.test(diff(time_ser[,1]))
##
## Augmented Dickey-Fuller Test
##
## data: diff(time_ser[, 1])
## Dickey-Fuller = -7.7858, Lag order = 7, p-value = 0.01
## alternative hypothesis: stationary
This time, the test statistic is tiny, and well within the range we would expect for stationary data. So we can conclude that the differenced data are stationary.
Computes the Phillips-Perron test for the null hypothesis that pvt_house_supply has a unit root. Because the p-value of 0.62 > 0.05, we fail to reject the null hypothesis of unit root in the series. Therefore, the series in non-stationary and requires differencing.
pp.test(time_ser[,8], alternative = "stationary")
##
## Phillips-Perron Unit Root Test
##
## data: time_ser[, 8]
## Dickey-Fuller Z(alpha) = -8.6233, Truncation lag parameter = 6,
## p-value = 0.6287
## alternative hypothesis: stationary
Combining autoregresion with differencing and a mocing average model yields a non-seasonal Auto-regressive Integrated Moving Average (ARIMA) model. A series with ARIMA(0,0,0) is a white noise series.
Intuitively, ARIMA denotes the number of previous time steps the current value of our variable depends on. For example, at time T, our variable X_t depends on X_(t-1) and X_(t-2), linearly. In this case, we have 2 AR terms and hence our p parameter=2
MA – This term is a measure of the average over multiple time periods we take into account. For example, to calculate the value of our variable at the current time step, if we take an average over previous 2 timesteps, the number of MA terms, denoted by q=2
R estimates the ARIMA model using maximum likelihood estimation (MLE). This approch finds the parameter values that maximize the probability of obtaining the observed data.
R will report the value of the log likelihood of the data; that is, the logarithm of the probability of the observed data coming from the estimated model. For given values of p,d and q, R will try to maximise the log likelihood when finding parameter estimates.
# The default procedure uses some approximations to speed up the search. These approximations can be avoided with the argument approximation=FALSE.
# A much larger set of models will be searched if the argument stepwise=FALSE is used.
fit_original = auto.arima(time_ser[,1], seasonal=FALSE, stepwise = FALSE, approximation = FALSE)
summary(fit_original)
## Series: time_ser[, 1]
## ARIMA(2,1,3)
##
## Coefficients:
## ar1 ar2 ma1 ma2 ma3
## 1.3944 -0.7655 -1.8145 1.3736 -0.2945
## s.e. 0.1623 0.1049 0.1751 0.2062 0.0930
##
## sigma^2 estimated as 10659: log likelihood=-3086.27
## AIC=6184.53 AICc=6184.7 BIC=6209.94
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set -0.6161137 102.6342 76.59767 -0.4025837 5.75423 0.3953275
## ACF1
## Training set 0.003641176
This is an ARIMA(2,1,3) model:
y_t = c + 1.3944y_(t-1) + -0.7655y_(t-2) + -1.8145ε_(t-1) + 1.3736ε_(t-2) + -0.2945*ε_(t-3)
where c = mean, ε_t is white noise with standard deviation of sqrt(10659) = 103.2424.
The constant c has an important effect on the long-term forecasts obtained from these models. If c ≠ 0 and d = 0, the long-term forecasts will go to the mean of the data.
Below I have created a model that has the lowest value of AIC. I have looped over several combinations of p,d and q and store the fitted model of ARIMA(p,d,q). If the current AIC value is less than the previously generated AIC, then the current AIC is the final AIC and select the order. After terminating the loop, I have stored the order of the ARIMA model in final.order and stored ARIMA(p,d,q) fitted model in final.arima
Upon termination of the loop we have the order of the ARMA model stored in final.order and the ARIMA(p,d,q) fit itself stored as final.arma:
house_final.aic = Inf
house_final.order = c(0,0,0)
for (p in 1:4) for (d in 0:2) for (q in 1:4) {
house_current.aic = AIC(arima(time_ser[,1], order=c(p, d, q)))
if (house_current.aic < house_final.aic) {
house_final.aic = house_current.aic
house_final.order = c(p, d, q)
house_final.arima = arima(time_ser[,1], order=house_final.order)
}
}
# outputs the AIC, order and ARIMA coefficients:
house_final.aic
## [1] 6177.282
house_final.order
## [1] 4 2 4
house_final.arima
##
## Call:
## arima(x = time_ser[, 1], order = house_final.order)
##
## Coefficients:
## ar1 ar2 ar3 ar4 ma1 ma2 ma3 ma4
## 0.0022 0.5383 -0.5267 -0.1795 -1.3846 -0.1697 1.3837 -0.8293
## s.e. 0.1021 0.1199 0.0883 0.0572 0.0899 0.0708 0.0947 0.0613
##
## sigma^2 estimated as 10340: log likelihood = -3079.64, aic = 6177.28
After simulating the AIRMA model of order ARIMA(4,2,4), the model is:
y_t = c + 0.0022 y_(t-1) + 0.5383y_(t-2) + -0.5267 y_(t-3) + -0.1795 y_(t-4) + -1.3846ε_(t-1) + -0.1697ε_(t-2) + 1.3837ε_(t-3)+ -0.8293 ε_(t43)
where ε_t is white noise with standard deviation of sqrt(10340) = 101.6858.
Below is the plot of residuals of the model to check if the plot is discrete white noise (DWN):
ggAcf(resid(house_final.arima), na.action = na.omit) + ggtitle("Residuals of an ARIMA(4,2,4) fit to the housing starts diff log returns")
The corelogrram seems to follow discrete white noise.
I have squared the residuals and plotted its correlogram to test for conditional heteroskedasticity.
ggAcf(resid(house_final.arima)^2, na.action = na.omit) + ggtitle(" Squared residuals of an ARIMA(4,2,4) fit to the housing starts diff log returns")
The squared residuals have autocorrelation, which means that that difference of the log returns of housing starts have conditional heteroskedasticity.
# fit a GARCH model using the tseries library.
# fits an appropriate GARCH model, with the trace=F parameter telling R to suppress excessive output.
(ft.garch = garch(diff(time_ser[,1]), trace=F))
##
## Call:
## garch(x = diff(time_ser[, 1]), trace = F)
##
## Coefficient(s):
## a0 a1 b1
## 1.131e+04 1.836e-01 2.834e-11
# removes the first element of the residuals, since it is NA:
ft.res = ft.garch$res[-1]
ggAcf(ft.res) + ggtitle("Residuals of a GARCH(p,q) fit to the ARIMA(4,2,4) fit of the housing starts diff log returns")
The correlogram looks like a realisation of a discrete white noise process, indicating a good fit. Let’s now try the squared residuals:
ggAcf(ft.res^2) + ggtitle("Squared residuals of a GARCH(p,q) fit to the ARIMA(4,2,4) fit of the housing starts diff log returns")
Again, the ACF plot of sqaured residuals indicate that the series is a white noise process. Hence, the model has “explained” autocorrelation found in the squared residuals with a right of GARCH(p,q) and ARIMA(p,d,q).
Finally, I have performed the Ljung-Box test for 20 lags:
Box.test(resid(house_final.arima), lag=20, type="Ljung-Box")
##
## Box-Ljung test
##
## data: resid(house_final.arima)
## X-squared = 18.964, df = 20, p-value = 0.5242
Notice that the p-value is greater than 0.05, which states that the residuals are independent at the 95% level and thus an ARMA(4,2,4) model provides a good model fit.
autoplot(time_ser[,2:4], facets=TRUE) + xlab("Year") + ylab("") + ggtitle("Monthly Changes in Values of Economic Predictors")
autoplot(time_ser[,5:7], facets=TRUE) + xlab("Year") + ylab("") + ggtitle("Monthly Changes in Values of Economic Predictors")
autoplot(time_ser[,8:11], facets=TRUE) + xlab("Year") + ylab("") + ggtitle("Monthly Changes in Values of Economic Predictors")
Graphically, all the predictors are non-stationary.Another way of checking is the unit root test for stationarity using the Augmented Dickey Fuller test for stationarity. The null hypothesis is that the series is unit root or non-stationarity.
The order of intergation is another concept closely associated to stationarity. The order tells the number of time we should difference the series to make it stationary.An I(0) series has order 0 if it does not require any differencing, and is already stationary. A series of order 1 or I(1) if it is non-stationary in the beginning, and the first difference makes it stationary. An I(0) series frequently crosses the mean, whereas I(1) and I(2) series can stary or wander farther from their mean value and rarely comes across the mean.
(unit_tests = rbind(
Housing_Starts = tidy(adf.test(time_ser[,1])),
Income = tidy(adf.test(time_ser[,2])),
Federal_funds_rate = tidy(adf.test(time_ser[,3])),
Yield_spread = tidy(adf.test(time_ser[,4])),
Securitized_consumer_loans = tidy(adf.test(time_ser[,5])),
Unemployment_rate = tidy(adf.test(time_ser[,6])),
CPI = tidy(adf.test(time_ser[,7])),
Private_house_completed = tidy(adf.test(time_ser[,8])),
Mortgage_rate = tidy(adf.test(time_ser[,9])),
Real_estate_loans = tidy(adf.test(time_ser[,10])),
House_supply = tidy(adf.test(time_ser[,11]))
) %>%
kable("html") %>%
kable_styling(bootstrap_options = c("striped", "hover", "responsive"), full_width = FALSE)
)
| statistic | p.value | parameter | method | alternative | |
|---|---|---|---|---|---|
| Housing_Starts | -2.4706305 | 0.3790981 | 7 | Augmented Dickey-Fuller Test | stationary |
| Income | -1.8670599 | 0.6345780 | 7 | Augmented Dickey-Fuller Test | stationary |
| Federal_funds_rate | -3.0956900 | 0.1145223 | 7 | Augmented Dickey-Fuller Test | stationary |
| Yield_spread | -2.7597494 | 0.2567196 | 7 | Augmented Dickey-Fuller Test | stationary |
| Securitized_consumer_loans | -0.6996825 | 0.9705642 | 7 | Augmented Dickey-Fuller Test | stationary |
| Unemployment_rate | -3.2116336 | 0.0859251 | 7 | Augmented Dickey-Fuller Test | stationary |
| CPI | -2.7648175 | 0.2545744 | 7 | Augmented Dickey-Fuller Test | stationary |
| Private_house_completed | -1.6420346 | 0.7298269 | 7 | Augmented Dickey-Fuller Test | stationary |
| Mortgage_rate | -3.3027467 | 0.0702159 | 7 | Augmented Dickey-Fuller Test | stationary |
| Real_estate_loans | -1.9457001 | 0.6012911 | 7 | Augmented Dickey-Fuller Test | stationary |
| House_supply | -2.5655219 | 0.3389323 | 7 | Augmented Dickey-Fuller Test | stationary |
All the series are stationary after differencing upto 3 times.
(unit_tests = rbind(
Housing_Starts = tidy(adf.test(diff(time_ser[,1]))),
Income = tidy(adf.test(diff(diff(time_ser[,2])))),
Federal_funds_rate = tidy(adf.test(diff(time_ser[,3]))),
Yield_spread = tidy(adf.test(diff(time_ser[,4]))),
Securitized_consumer_loans = tidy(adf.test(diff(diff(time_ser[,5])))),
Unemployment_rate = tidy(adf.test(diff(time_ser[,6]))),
CPI = tidy(adf.test(diff(diff(diff(time_ser[,7]))))),
Private_house_completed = tidy(adf.test(diff(time_ser[,8]))),
Mortgage_rate = tidy(adf.test(diff(time_ser[,9]))),
Real_estate_loans = tidy(adf.test(diff(diff(time_ser[,10])))),
House_supply = tidy(adf.test(diff(time_ser[,11])))
) %>%
kable("html") %>%
kable_styling(bootstrap_options = c("striped", "hover", "responsive"), full_width = FALSE)
)
| statistic | p.value | parameter | method | alternative | |
|---|---|---|---|---|---|
| Housing_Starts | -7.785817 | 0.01 | 7 | Augmented Dickey-Fuller Test | stationary |
| Income | -15.757528 | 0.01 | 7 | Augmented Dickey-Fuller Test | stationary |
| Federal_funds_rate | -7.247981 | 0.01 | 7 | Augmented Dickey-Fuller Test | stationary |
| Yield_spread | -7.480514 | 0.01 | 7 | Augmented Dickey-Fuller Test | stationary |
| Securitized_consumer_loans | -12.691731 | 0.01 | 7 | Augmented Dickey-Fuller Test | stationary |
| Unemployment_rate | -4.733799 | 0.01 | 7 | Augmented Dickey-Fuller Test | stationary |
| CPI | -15.485510 | 0.01 | 7 | Augmented Dickey-Fuller Test | stationary |
| Private_house_completed | -6.393007 | 0.01 | 7 | Augmented Dickey-Fuller Test | stationary |
| Mortgage_rate | -7.348586 | 0.01 | 7 | Augmented Dickey-Fuller Test | stationary |
| Real_estate_loans | -13.229879 | 0.01 | 7 | Augmented Dickey-Fuller Test | stationary |
| House_supply | -8.092979 | 0.01 | 7 | Augmented Dickey-Fuller Test | stationary |
# store the differenced variables as new variables and combine those new variables to the time_ser dataset
# ts.union() binds at least two time series that have a common frequency. It can also construct a data frame
time_ser_diff = cbind(time_ser,
hous_st_d1 = diff(time_ser[,1]),
income_d2 = diff(diff(time_ser[,2])),
fed_fundsR_d1 = diff(time_ser[,3]),
yield_sp_d1 = diff(time_ser[,4]),
sec_conL_d2 = diff(diff(time_ser[,5])),
unempR_d1 = diff(time_ser[,6]),
CPI_d3 = diff(diff(diff(time_ser[,7]))),
pvt_house_comp_d1 = diff(time_ser[,8]),
mortgR_d1 = diff(time_ser[,9]),
real_estL_d2 = diff(diff(time_ser[,10])),
house_supply_d1 = (diff(time_ser[,11]))
)
time_ser_diff = time_ser_diff %>% as.data.frame() %>%
plyr::rename(c("time_ser.hous_st" = "hous_st", "time_ser.income" = "income",
"time_ser.fed_fundsR" = "fed_fundsR", "time_ser.yield_sp" = "yield_sp",
"time_ser.sec_conL" = "sec_conL", "time_ser.unempR" = "unempR",
"time_ser.CPI" = "CPI", "time_ser.pvt_house_comp" = "pvt_house_comp",
"time_ser.mortgR" = "mortgR", "time_ser.real_estL" = "real_estL",
"time_ser.house_supply" = "house_supply")) %>%
ts(frequency = 12, start = c(1976,6), end =c(2018,12))
When time series data is “non-stationary” i.e. the values don’t fluctuate around a constant variance or mean, then regression models will output “spurious regressions.” These spurious regressions have high R^2 and high residual autocorrelation. However, if Y_t and X_t are cointegrated, spurious regression no longer arise.
To avoid having spurious regressions, I have regressed housing starts with thestationary variables as the independent variables.
mod_ts = tslm(hous_st ~ fed_fundsR_d1 + yield_sp_d1 + sec_conL_d2 + unempR_d1 + CPI_d3 + pvt_house_comp + mortgR_d1 + real_estL_d2 + house_supply + house_supply_d1 + trend + hous_st_d1 , data=na.omit(time_ser_diff))
mod_lm = lm(hous_st ~ fed_fundsR_d1 + yield_sp_d1 + sec_conL_d2 + unempR_d1 + CPI_d3 + pvt_house_comp + mortgR_d1 + real_estL_d2 + house_supply + house_supply_d1 + hous_st_d1 , data=na.omit(time_ser_diff))
stargazer(mod_lm, type ="text",
dep.var.labels=c("Housing Starts"),
covariate.labels=c("Federal Funds Rate.d1","Yield Spread.d1", "Securitized Consumer Loans.d2",
"Unemployment Rate.d1", "CPI.d3", "Private House Completed", "Mortgage Rate.d1",
"Real Estate Loans.d2", "House Supply", "House Supply.d1", "Housing Starts.d1"), out="models.txt")
##
## =========================================================
## Dependent variable:
## ---------------------------
## Housing Starts
## ---------------------------------------------------------
## Federal Funds Rate.d1 14.759
## (14.706)
##
## Yield Spread.d1 -34.682
## (36.146)
##
## Securitized Consumer Loans.d2 -0.154
## (0.429)
##
## Unemployment Rate.d1 -137.934***
## (35.581)
##
## CPI.d3 -7.428
## (7.593)
##
## Private House Completed 0.910***
## (0.015)
##
## Mortgage Rate.d1 -11.005
## (23.443)
##
## Real Estate Loans.d2 0.141
## (0.344)
##
## House Supply -61.204***
## (3.663)
##
## House Supply.d1 42.231***
## (11.825)
##
## Housing Starts.d1 0.501***
## (0.052)
##
## Constant 522.618***
## (34.314)
##
## ---------------------------------------------------------
## Observations 508
## R2 0.908
## Adjusted R2 0.906
## Residual Std. Error 124.611 (df = 496)
## F Statistic 445.757*** (df = 11; 496)
## =========================================================
## Note: *p<0.1; **p<0.05; ***p<0.01
In a time series, usually a variable’s value currently is similar to that in the previous period and before that, causing autocorrelation in the residuals. Such type of models violate the assumption of zero autocorrelation in the errors. This results in inefficient forecasts as the residuals contain some information that the model should account for. While the forecasts are unbiased, they will have larger prediction intervals than needed. Thus, we should look at the ACF plot of residuals.
A good forecasting method will yield residuals with the following properties:
1.The residuals are uncorrelated. If there are correlations between residuals, then there is information left in the residuals which should be used in computing forecasts.
2.The residuals have zero mean. If the residuals have a mean other than zero, then the forecasts are biased.
In addition to these essential properties, it is useful (but not necessary) for the residuals to also have the following two properties.
1.The residuals have constant variance. 2.The residuals are normally distributed.
The Breuch-Godfry or the Lagrange Multipler (LM) also tests for autocorrelation in the residuals obtained from regression models. The null hyothesis is that serial correlation is absent in the residuals upto a specified order. Thereby, a small p-value of < 2.2e-16 indicates there is significant autocorrelation remaining in the residuals in the model below.
# Analysing the residuals from a regression model for US housing starts.
checkresiduals(mod_lm)
##
## Breusch-Godfrey test for serial correlation of order up to 15
##
## data: Residuals
## LM test = 239.93, df = 15, p-value < 2.2e-16
The coverage of the prediction interval is inaccurate due to heteroscedasticity.
Statistically significant peaks are present at k =1…15. So, the series is inherent with unexplained serial correlation.
From the histogram, the residuals seem to be slightly skewed, which may also affect the coverage probability of the prediction intervals.There is significant autocorrelation as the spikes are gradually diminishing and are far beyond the significance blue line.
Ideally, the residuals should be randomly scattered without displaying any systematic patterns. To examine this, we can observe the scatterplots of the residuals against each of the predictor variables.The scatterplots that show patterns, indicate a non-linear relationship; thus, we would have to modify the model.
df = as.data.frame(na.omit(time_ser_diff))
df[,"Residuals"] = as.numeric(residuals(mod_lm))
p1 = ggplot(df, aes(x=unempR_d1, y=Residuals)) +
geom_point()
p2 = ggplot(df, aes(x=pvt_house_comp, y=Residuals)) +
geom_point()
p3 = ggplot(df, aes(x=house_supply, y=Residuals)) +
geom_point()
p4 = ggplot(df, aes(x=hous_st_d1, y=Residuals)) +
geom_point()
gridExtra::grid.arrange(p1, p2, p3, p4, nrow=2)
cbind(Fitted = fitted(mod_lm),Residuals=residuals(mod_lm)) %>%
as.data.frame() %>%
ggplot(aes(x=Fitted, y=Residuals)) + geom_point() + ggtitle("Residual vs Fitted")
The random scatter suggests the errors are homoscedastic.
The standard ARIMA models forecast solely based on the past values of the housing starts, and does not have covariates. The model assumes that the future values are linearly dependent on the past values and previous stochastic shocks. Similar to ARIMA and a multivariate regression model is the ARIMAX model, wherein covariates are present on the right hand side of the model. Below is an ARIMAX model where x_t is a covariate at time t and a is its coefficient:
x_t = ax_t + α_1 x_(t−1) + α_2 * x_(t−2) +…+ w_t + β_1 * w_(t−1) + β_2* w_(t−2) …+ β_q * w_(t−q)
Where {w_t} is white noise with E(w_t) = 0 and variance σ^2
# The auto.arima() function handles regression terms via the xreg argument.
# specify the predictor variables to include, but auto.arima() will select the best ARIMA model for the errors
fit_xreg = auto.arima(time_ser_diff[,1],xreg=time_ser_diff[,13:22],seasonal=FALSE,stepwise=FALSE, approximation=FALSE)
summary(fit_xreg)
## Series: time_ser_diff[, 1]
## Regression with ARIMA(2,1,3) errors
##
## Coefficients:
## ar1 ar2 ma1 ma2 ma3 income_d2 fed_fundsR_d1
## 1.3321 -0.7435 -1.7905 1.3678 -0.2923 0.0106 17.7702
## s.e. 0.1249 0.0828 0.1369 0.1697 0.0828 0.0314 9.7085
## yield_sp_d1 sec_conL_d2 unempR_d1 CPI_d3 pvt_house_comp_d1
## -13.8636 0.1510 -71.5963 -6.6441 0.0988
## s.e. 24.1674 0.2591 24.6546 4.6276 0.0379
## mortgR_d1 real_estL_d2 house_supply_d1
## 10.0606 -0.1279 -10.3668
## s.e. 16.7195 0.2122 7.2123
##
## sigma^2 estimated as 10131: log likelihood=-3051.75
## AIC=6135.5 AICc=6136.6 BIC=6203.25
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set -1.407159 99.35808 74.00433 -0.4363343 5.510389 0.381943
## ACF1
## Training set 1.687727e-06
y_t = c + 1.3321y_(t-1) + -0.7435y_(t-2) + -1.7905ε_(t-1) + 1.3678ε_(t-2)+ -0.2923 ε_(t-3) +0.0106 income_d2 + 17.7702* fed_fundsR_d1 + 0.151 * sec_conL_d2 - 71.5963 * unempR_d1 - 6.6441* CPI_d3 + 0.0988 * pvt_house_comp_d1 + 10.0606 * mortgR_d1 - 0.1279 * real_estL_d2 - 10.3668 * house_supply_d1
cbind("Regression Errors" = residuals(fit_xreg, type="regression"),
"ARIMA errors" = residuals(fit_xreg, type="innovation")) %>% autoplot(facets=TRUE)
the ARIMA errors that should resemble a white noise series.
checkresiduals(fit_xreg)
##
## Ljung-Box test
##
## data: Residuals from Regression with ARIMA(2,1,3) errors
## Q* = 28.112, df = 9, p-value = 0.0009134
##
## Model df: 15. Total lags used: 24
H_o = no autocorrelation in the residuals.
The results Ljung-Box test are significant (i.e., the p-values are relatively small). Thus, we can conclude that the residuals are distinguishable from a white noise series.
The time plot and histogram of the residuals shows that the variance in the residuals are almost constant.
The residuals of the model are autocorrelated, producing imprecise coverage of the prediction intervals. Also, the histogram of the residuals shows one positive outlier, which will also affect the coverage of the prediction intervals.
# attaches the dataset called FinCrisis
FinCrisis = read_xlsx("FinCrisis.xlsx")
FinCrisis = FinCrisis[,-1] # drops the 1st column: Date
#All the series are stationary after differencing upto 3 times.
# converts from lass data.frame to .ts
time_ser=ts(FinCrisis,frequency=12,start=c(1976,6), end=c(2018,12))
# # Data Cleaning: Creating a new dataframe with the differenced variables
# store the differenced variables as new variables and combine those new variables to the time_ser dataset
# ts.union() and cbind() binds at least two time series that have a common frequency. It can also construct a data frame
time_ser_diff = cbind(time_ser,
hous_st_d1 = diff(time_ser[,1]),
income_d2 = diff(diff(time_ser[,2])),
fed_fundsR_d1 = diff(time_ser[,3]),
yield_sp_d1 = diff(time_ser[,4]),
sec_conL_d2 = diff(diff(time_ser[,5])),
unempR_d1 = diff(time_ser[,6]),
CPI_d3 = diff(diff(diff(time_ser[,7]))),
pvt_house_comp_d1 = diff(time_ser[,8]),
mortgR_d1 = diff(time_ser[,9]),
real_estL_d2 = diff(diff(time_ser[,10])),
house_supply_d1 = (diff(time_ser[,11]))
)
# rename variables or column names using plyr
time_ser_diff = time_ser_diff %>% as.data.frame() %>%
plyr::rename(c("time_ser.hous_st" = "hous_st",
"time_ser.income" = "income",
"time_ser.fed_fundsR" = "fed_fundsR",
"time_ser.yield_sp" = "yield_sp",
"time_ser.sec_conL" = "sec_conL",
"time_ser.unempR" = "unempR",
"time_ser.CPI" = "CPI",
"time_ser.pvt_house_comp" = "pvt_house_comp",
"time_ser.mortgR" = "mortgR",
"time_ser.real_estL" = "real_estL",
"time_ser.house_supply" = "house_supply")) %>%
ts(frequency = 12, start = c(1976,6), end =c(2018,12))
# Training and test sets of the time_ser_diff data frame
train_set = FinCrisis[1:409, ]
test_set = FinCrisis[-(1:409),]
# Training and test sets of the time_ser_diff data frame
train_set_diff = ts(time_ser_diff[1:409, ], frequency=12, start = c(1976,6))
test_set_diff = ts(time_ser_diff[-(1:409),], frequency = 12, start = c(2010,7))
PCA reduces p-dimension dataset to an m-dimension dataset where p > m. It describes the orginal data using fewer variables or dimensions than initially measured. We project the original time_ser_diff data onto a new, orthogonal basis. This removes multicollinearity. R calculates PCA using singular value decomposition of the scaled and centered matrix data (rather than eigen on covarince matrix) as it provides numerical accuracy.
time_ser.pca = prcomp(time_ser_diff[,2:11], #formula with no response variable and numerical explan vars
center = TRUE, # logical value indicating whether the variables should be shifted to be zero centered. Alternately, we can supply a vector of length equal to the number of columns of x. The value is passed to scale.
scale. = TRUE) #a logical value indicating whether the variables should be scaled to have unit variance before the analysis takes place. The default is FALSE for consistency with S, but in general scaling is advisable. Alternatively, a vector of length equal the number of columns of x can be supplied. The value is passed to scale
summary(time_ser.pca)
## Importance of components:
## PC1 PC2 PC3 PC4 PC5 PC6 PC7
## Standard deviation 2.4645 1.3724 1.0866 0.65482 0.49411 0.36791 0.1731
## Proportion of Variance 0.6074 0.1883 0.1181 0.04288 0.02441 0.01354 0.0030
## Cumulative Proportion 0.6074 0.7957 0.9138 0.95669 0.98111 0.99464 0.9976
## PC8 PC9 PC10
## Standard deviation 0.13519 0.06179 0.03869
## Proportion of Variance 0.00183 0.00038 0.00015
## Cumulative Proportion 0.99947 0.99985 1.00000
Proportion of variance shows how much of the variance is explained by that principal component. The components are always sorted by how important they are, so the most important components will always be the first few.
#selecting the first 4 principal components to use in the model
x_pca <- time_ser.pca$x[, 1:6]
head(x_pca)
## PC1 PC2 PC3 PC4 PC5 PC6
## [1,] -2.532253 0.9589985 -0.6856539 -0.4194933 0.6971565 -0.9574271
## [2,] -2.417471 1.1089015 -0.9856733 -0.2229814 0.6730305 -0.8642424
## [3,] -2.397437 1.0653462 -1.1664993 -0.3159122 0.5733262 -0.7703319
## [4,] -2.369972 0.9977712 -1.1642373 -0.3433385 0.6511079 -0.7066144
## [5,] -2.267717 1.0412444 -1.4965310 -0.3725687 0.5711167 -0.6006079
## [6,] -2.265047 1.0854381 -1.4703869 -0.5285890 0.5150894 -0.6393254
class(x_pca)
## [1] "matrix"
# combine PC1-PC4 as explanatory variables with hous_st as response variable in a new dataset:
pca_data = ts.union(time_ser[,1], x_pca)
# renaming variable names
pca_data = pca_data %>% as.data.frame() %>%
plyr::rename(c("time_ser[, 1]" = "hous_st", "x_pca.PC1" = "PC1",
"x_pca.PC2" = "PC2", "x_pca.PC3" = "PC3", "x_pca.PC4" = "PC4",
"x_pca.PC5" = "PC5", "x_pca.PC6" = "PC6")) %>%
ts(frequency = 12, start = c(1976,6), end =c(2018,12))
# Training and test sets of the pca_data
train_pca.ts = ts(pca_data[1:409, ], frequency=12, start = c(1976,6))
test_pca.ts = ts(pca_data[-(1:409),], frequency = 12, start = c(2010,7))
When the trends and patterns of two series are similar, then they are cointegrated. The cointegration test measures whether the residuals from a regression are stationary. Stationary residuals are cointegrated. Therefore, the fact that time series are correlated is statistically significant, and not due to some chance.It is also a Dickey-Fuler stationarity test on residuals where the null hypothesis is that the series are not cointegrated. For a stationary test, we should reject the null hypothesis of no cointegrated.
The concept of cointegrated time series arises from the idea that housing prices, securities’ prices, interest rates and other economic indicators return to their long-term average levels after significant movements in short terms. Besides the imbalance in the demand and supply of houses, prices revert to their means as housing pries are highly correlated with inflation. Further, inflation rates are highly correlated with wages or real disposable income.
Given two series x(t) and y(t), R will search for paramteres α, β, and ρ such that
y(t) = α + β * x(t) + r(t) r(t) = ρ * r(t−1) + ϵ(t)
where r(t) = residual and, ϵ(t) = series of idependently and identically distributed (i.i.d) innovations with mean = 0
If |ρ| < 1, then x(t) and y(t) are cointegrated (i.e., r(t) doesn’t contain a unit root). if |ρ| = 1, then the residual series R[t] has a unit root and follows a random walk.
# Constructs an Engle-Granger cointegration model from pvt_house_comp (X) and hous_st (Y)
# identifies a pair of cointegrated series
# procedure selects the appropriate values for α, β, and ρ that best fit the following model:
fit_egcm = egcm(time_ser_diff[,8], time_ser_diff[,1])
summary(fit_egcm)
## Y[i] = 0.9736 X[i] + 66.8147 + R[i], R[i] = 0.7482 R[i-1] + eps[i], eps ~ N(0,125.2861^2)
## (0.0215) (69.6048) (0.0314)
##
## R[511] = -864.9835 (t = -4.788)
##
## WARNING: The series seem cointegrated but the residuals are not AR(1).
##
## Unit Root Tests of Residuals
## Statistic p-value
## Augmented Dickey Fuller (ADF) -4.276 0.00583
## Phillips-Perron (PP) -133.946 0.00010
## Pantula, Gonzales-Farias and Fuller (PGFF) 0.736 0.00010
## Elliott, Rothenberg and Stock DF-GLS (ERSD) -4.261 0.00014
## Johansen's Trace Test (JOT) -79.007 0.00010
## Schmidt and Phillips Rho (SPR) -86.676 0.00010
##
## Variances
## SD(diff(X)) = 95.289345
## SD(diff(Y)) = 112.092429
## SD(diff(residuals)) = 133.568139
## SD(residuals) = 180.670994
## SD(innovations) = 125.286102
##
## Half life = 2.389727
## R[last] = -864.983531 (t=-4.79)
plot(fit_egcm)
This test is different from Augmented Dickey Fuller and Phillips-Perron unit root tests.It measures the evidence of coinintegration in the residuals of two time series. In this case, I have regressed housing starts on private houses completed series.
We cannot use ADF on residuals as they are devoid of Dickey-Fuller distributions in which the null hypothesis is that cointegration is absent. Alternatively, the residuals have Phillips-Ouliaris distributions.
# Engle-Granger two-step with pvt_house_comp regressed on hous_st:
# Available in po.test() from tseries
# The main argument of the function is a matrix having in its first column the dependent variable of the cointegration equation and the independent variables in the other columns.
po.test(time_ser_diff[,c(1,8)])
##
## Phillips-Ouliaris Cointegration Test
##
## data: time_ser_diff[, c(1, 8)]
## Phillips-Ouliaris demeaned = -126.95, Truncation lag parameter =
## 5, p-value = 0.01
H0: the 2 Series are not cointegrated Ha: the 2 Series are cointergated
The PO test rejects the null of no cointegration at the 5 percent level.The series are cointegrated. With cointegrated series we can construct a VEC model to better understand the causal relationship between the two variables.
http://blog.mindymallory.com/2018/02/basic-time-series-analysis-the-var-model-explained/
The variables with cointegration I(0) have a short term relationship as opposed to those variables with cointegration I(1), as the latter have long term relationship. Both short and long run effects are present in the short run error correction model. The first equaation is the ARDL(1,1) model that presumes a long run or steady state association between x and y. When deriving the error correction model, we can add more lagged differences of the regressor (x variable) to remove serial correlation.
y_t = δ + θ_1* y_(t−1) + δ_0 * x_t + δ_1 * x_(t−1) + ν_t,
Δy_t = −α * [y_(t−1) − β_1 − β_2 * x_(t−1)] + δ_0 * Δx_t + ν_t
We can construct the ECM for US housing starts and housing supply as:
Δb_t = −α * [b_(t−1) − β_1 − β_2 * f_(t−1)] + (δ_0 * Δf_t) + [δ_1 * Δf_(t−1)] + ν_t (estimated using codes below)
We can estimate a vector autoregression model of order 1, VAR(1) if both series are I(0). If they are I(1), we can estimate the same equations vy taking the first differences.
y_t = β_10 + β_11 * y_(t−1) + β_12 * x_(t−1) + ν
x_t = β_20 + β_21 * y_(t−1) + β_22 * x_(t−1) + ν
If both the variables in the above equations are cointegrated, we have to include the cointegration relationship in the model. This model is known as the vector error correction model. The equation below displays the cointegration relationship with stationary error terms.
y_t = β_0 + β_1 * x_t + e_t
The stationarity tests indicate that both series are I(1). To check for cointegration, I have .
hous_st = β1 * house_supply + e_ t e_t = hous_st − β1 * house_supply
house1_dyn = dynlm(hous_st ~ house_supply , data= train_set_diff) #The results of the cointegration equation
summary(house1_dyn)
##
## Time series regression with "ts" data:
## Start = 1976(6), End = 2010(6)
##
## Call:
## dynlm(formula = hous_st ~ house_supply, data = train_set_diff)
##
## Residuals:
## Min 1Q Median 3Q Max
## -806.12 -187.17 -10.31 201.78 659.48
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2322.583 50.110 46.35 <2e-16 ***
## house_supply -133.785 7.787 -17.18 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 283.7 on 407 degrees of freedom
## Multiple R-squared: 0.4204, Adjusted R-squared: 0.4189
## F-statistic: 295.2 on 1 and 407 DF, p-value: < 2.2e-16
resid_house1_dyn <- resid(house1_dyn)
house2_dyn <- dynlm(d(resid_house1_dyn)~L(resid_house1_dyn))
summary(house2_dyn)
##
## Time series regression with "ts" data:
## Start = 1976(7), End = 2010(6)
##
## Call:
## dynlm(formula = d(resid_house1_dyn) ~ L(resid_house1_dyn))
##
## Residuals:
## Min 1Q Median 3Q Max
## -417.99 -74.60 -6.48 74.53 383.16
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.74156 6.03229 -0.289 0.773
## L(resid_house1_dyn) -0.09043 0.02144 -4.218 3.04e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 121.8 on 406 degrees of freedom
## Multiple R-squared: 0.04199, Adjusted R-squared: 0.03963
## F-statistic: 17.79 on 1 and 406 DF, p-value: 3.038e-05
Our test rejects the null of no cointegration, meaning that the series are cointegrated. With cointegrated series we can construct a VEC model to better understand the causal relationship between the two variables.
vec_hous_st<- dynlm(d(hous_st)~L(resid_house1_dyn), data = train_set_diff)
vec_house_supply <- dynlm(d(house_supply)~L(resid_house1_dyn), data = train_set_diff)
# generates the results from both equations
tidy(vec_hous_st)
## # A tibble: 2 x 5
## term estimate std.error statistic p.value
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) -2.17 5.63 -0.386 0.700
## 2 L(resid_house1_dyn) -0.108 0.0200 -5.39 0.000000120
tidy(vec_house_supply)
## # A tibble: 2 x 5
## term estimate std.error statistic p.value
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) 0.00322 0.0272 0.118 0.906
## 2 L(resid_house1_dyn) 0.000131 0.0000968 1.35 0.178
In the hous_st equation, the error correction term’s coefficient : e_(t−1), is significant for housing starts, implying that changes in the housing supply influence housing starts;
Alternatively, in the house_supply equation, the the error correction coefficient is statistically insignificant, indicating that changes in housing starts impact housing supply.
This test measures if three or more time series are cointegrated. Then, we take a linear combination of underlying series to form a stationary series. VAR(p) without drift is of the form:
x_t = μ + A_1 * x_(t−1) + … + A_p * x_(t−p) + w_t
μ = vector-valued mean of the series, A_i = coefficient matrices for each lag, w_t = multivariate Gaussian noise term with mean zero.
By differencing the series, we can form a Vector Error Correction model (VECM):
Δx_t = μ + A * x_(t−1) + Γ_1 * Δx_(t−1) +…+ Γ_p * Δx_(t−p) + w_t
Δx_t = x_t − x_(t−1) : differencing operator, A = coefficient matrix for the first lag, Γ_i = matrices for each differenced lag.
When the matrix A=0, the series are not cointegrated.
We perform an eigenvalue decomposition of A. r is the rank of the matrix A and the Johansen test checks if r = 0 or 1.
r=n−1, where n is the number of time series under test.
H0: r=0 means implies that no cointegration is present. When rank r > 0, there is a cointegrating relationship between at least two time series.
The eigenvalue decomposition outputs a set of eigenvectors. The components of the largest eigenvector is used in formulating the coefficients of the linear combination of time series. This creates stationarity. We should run the Johansen Test of Cointegration for variables which are I(1) before running ECM. If series are not cointegrated, we don’t have to perform ECM.
fit_jo = ca.jo(time_ser_diff[,c(1,8:11)],
ecdet = "none", # whether to use constant or drift term in the model
type = "trace", # use the trace test statistic or the maximum eigenvalue test statistic
spec = "longrun")
summary(fit_jo)
##
## ######################
## # Johansen-Procedure #
## ######################
##
## Test type: trace statistic , with linear trend
##
## Eigenvalues (lambda):
## [1] 0.153066962 0.142880756 0.070036417 0.015608783 0.001257115
##
## Values of teststatistic and critical values of test:
##
## test 10pct 5pct 1pct
## r <= 4 | 0.64 6.50 8.18 11.65
## r <= 3 | 8.65 15.66 17.95 23.52
## r <= 2 | 45.61 28.71 31.52 37.22
## r <= 1 | 124.08 45.23 48.28 55.43
## r = 0 | 208.64 66.49 70.60 78.87
##
## Eigenvectors, normalised to first column:
## (These are the cointegration relations)
##
## hous_st.l2 pvt_house_comp.l2 mortgR.l2
## hous_st.l2 1.0000000000 1.0000000 1.00000000
## pvt_house_comp.l2 -1.0098454639 1.0424300 -0.57269343
## mortgR.l2 -2.7489510983 48.8810518 -43.81129888
## real_estL.l2 -0.0008109261 0.5289490 -0.02919225
## house_supply.l2 46.1616988216 0.7031595 290.75075321
## real_estL.l2 house_supply.l2
## hous_st.l2 1.000000 1.000000
## pvt_house_comp.l2 -2.479229 2.187444
## mortgR.l2 2728.186555 264.462972
## real_estL.l2 5.389685 -1.462174
## house_supply.l2 -1325.584712 -226.501618
##
## Weights W:
## (This is the loading matrix)
##
## hous_st.l2 pvt_house_comp.l2 mortgR.l2
## hous_st.d -6.167287e-02 -2.077764e-02 -0.0715103915
## pvt_house_comp.d 2.515696e-01 4.212425e-04 -0.0238280145
## mortgR.d 9.762531e-05 2.810376e-05 -0.0000180714
## real_estL.d -4.589921e-03 8.162843e-03 -0.0006669728
## house_supply.d 1.355016e-04 1.162172e-04 -0.0001178565
## real_estL.l2 house_supply.l2
## hous_st.d -4.594538e-04 -3.452476e-04
## pvt_house_comp.d -1.086997e-04 -2.145177e-04
## mortgR.d -6.340109e-06 1.503845e-06
## real_estL.d -6.554387e-05 -4.430796e-05
## house_supply.d 4.468994e-06 4.035547e-06
Johansen procedure as implemented in function ca.jo will help you find the number of cointegrating vectors. Take the output of ca.jo, start with 𝑟=0 and see if you can reject the null hypothesis of 𝑟=0 using the test statistic and the critical values reported in the output. If you reject, move to 𝑟=1 and upwards until you cannot reject. The first rank that you cannot reject is the number of cointegrating vectors. If you can reject all of them, all of your series appear to be stationary.
In Johansen cointegration test, the null hypothesis for the eigenvalue test is that there are 𝑟+1 cointegration relations. The largest eigenvalue generated by the test is 0.15306696.
Next, the output shows the trace test statistic for the three hypotheses of r = 0 to r ≤ 4.From r = 0 to r ≤ 2, the test statistic exceeds the 0.05 significance level. For instance, when r = 0, 208.64 > 78.87. Similarly, in the second test we test the null hypothesis for r ≤ 1 against the alternative hypothesis of r > 1. As 124.08 > 55.43, we reject r ≤ 1, i.e. the null hypothesis of no cointegration. However, when r ≤ 6, we fail to reject the null as 8.65 < 23.52. Thus, the matrix’ rank is 3 and the series will become stationary after using a linear combination of 3 time series.
We can make a linear combination by using components of eigenvectors associated with the largets eigenvalue of 0.14624891. Correspondingly, we use vectors under the column hous_st.l2 which are (1.000000,-1.0098454639 , -2.7489510983) to obtain a stationary series.
The linear series is: 1 * hous_st -1.0098454639 * pvt_house_comp -2.7489510983 * mortgR
linear_series = 1.000*time_ser_diff[,1] -1.0098454639*time_ser_diff[,8] + -2.7489510983 *time_ser_diff[,9]
autoplot(linear_series, type="l") + xlab("Years") + ggtitle("Linear Combination of hous_st, pvt_house_comp & mortgR")
adf.test(linear_series)
##
## Augmented Dickey-Fuller Test
##
## data: linear_series
## Dickey-Fuller = -4.2891, Lag order = 7, p-value = 0.01
## alternative hypothesis: stationary
The p-value in the Dickey-Fuller test is 0.01 < 0.05. So, we reject the null hypothesis of unit root and conclude that the series formed from the linear combination is stationary.
We treat all variables symmetrically in VAR i.e we model them in such a way that these endogenous variables equally impact each other.
As a generalization of the univariate autoregressive model, it forecasts a vector of time series.The system has one equation per variable. The right hand side of each equation has lags and a constant of all the variables.
For a stationary series, we can directly fit VAR to the data and forecast. This is called “VAR in levels”. Otherwise, we difference the non-stationary data first and then fit the model. The resulting model is called “VAR in differences.” Using leveled variables (which are stationary) in VAR models can result in spurious regression. But, differenced variables will remedy the problem. In both instances, we use the concept of least squares to estimate the model.
Moreover, a non-stationary series could be cointegrated. This implies that there is a linear combination of variables which is stationary. In this scenario, we should make a vector error correction model i.e. a VAR model with an error correction mechanism
The VAR model can be used when the variables under study are I(1) but not cointegrated.
Δy_t = [β_11 * Δy_(t−1)] + [β_12 * Δx_(t−1)] + ν Δx_t = [β_21 * Δy_(t−1)] + [β_22 * Δx_(t−1)] + ν
VAR with exogenous variables is VARX model
# The R output shows the lag length selected by each of the information criteria available in the vars package.
# selection criteria summary
# using levelled variables
VARselect(na.omit(time_ser_diff[,c(1,3)]), # Data item containing the endogenous variables
type="both", # Type of deterministic regressors to include: constant, trend, both or none
exogen = na.omit(time_ser_diff[,c(3:7)]))[["selection"]]
## AIC(n) HQ(n) SC(n) FPE(n)
## 3 3 3 3
# using differenced variables to have stationarity
VARselect(na.omit(time_ser_diff[,c(12,14)]), # Data item containing the endogenous variables
type="both", # Type of deterministic regressors to include: constant, trend, both or none
exogen = na.omit(time_ser_diff[,c(15, 17, 20)]))[["selection"]]
## AIC(n) HQ(n) SC(n) FPE(n)
## 9 2 2 9
The R output shows the lag length selected by each of the information criteria available in the vars package. From the above results, we choose p= 3 as a lag parameter as minimizes AIC, HQ, SC and FPE. We construct multivariate order 3 VAR model.
R estimates VAR using OLS equation where the model is of the form: y_t = A_1y_(t-1) + …. + A_p y_(t-p) + CD_t + u_t
where y_t is a K * 1 vector of endogenous variables and u_t assigns a spherical disturbance term of the same dimension. The coefficient matrices A_1……Ap are of dimension K * K.
# estimates VAR by using OLS per equation
var1 = VAR(time_ser_diff[,c(1,3)], p=3, type="both", exogen = time_ser_diff[, 4:7])
stargazer(var1$varresult, type="text", dep.var.labels = c("Housing Starts, Federal Funds Rate"), align = TRUE)
##
## ==================================================================
## Dependent variable:
## -----------------------------------
## Housing Starts, Federal Funds Rate
## (1) (2)
## ------------------------------------------------------------------
## hous_st.l1 0.548*** 0.0001
## (0.044) (0.0002)
##
## fed_fundsR.l1 -18.162* 1.244***
## (10.099) (0.044)
##
## hous_st.l2 0.237*** 0.0005**
## (0.049) (0.0002)
##
## fed_fundsR.l2 -5.808 -0.592***
## (14.988) (0.066)
##
## hous_st.l3 0.190*** -0.0005**
## (0.044) (0.0002)
##
## fed_fundsR.l3 16.275* 0.187***
## (9.283) (0.041)
##
## const 219.046* -0.575
## (131.389) (0.575)
##
## trend 0.514 -0.016***
## (1.114) (0.005)
##
## yield_sp -0.003 -0.472***
## (11.133) (0.049)
##
## sec_conL -0.005 0.0002*
## (0.029) (0.0001)
##
## unempR 2.928 0.062***
## (5.228) (0.023)
##
## CPI -1.803 0.031***
## (2.391) (0.010)
##
## ------------------------------------------------------------------
## Observations 508 508
## R2 0.939 0.988
## Adjusted R2 0.938 0.988
## Residual Std. Error (df = 496) 101.439 0.444
## F Statistic (df = 11; 496) 695.626*** 3,842.526***
## ==================================================================
## Note: *p<0.1; **p<0.05; ***p<0.01
# estimate VAR using differenced variables to eliminate non-stationarity
var2 = VAR(na.omit(time_ser_diff[,c(12,14)]), p=2, type="both", exogen = na.omit(time_ser_diff[,c(15,17,20)]))
stargazer(var2$varresult, type="text", dep.var.labels = c("Housing Starts.d1, Federal Funds Rate.d1"), align = TRUE)
##
## ========================================================================
## Dependent variable:
## -----------------------------------------
## Housing Starts.d1, Federal Funds Rate.d1
## (1) (2)
## ------------------------------------------------------------------------
## hous_st_d1.l1 -0.421*** -0.0002
## (0.043) (0.0002)
##
## fed_fundsR_d1.l1 -6.091 0.271***
## (9.546) (0.034)
##
## hous_st_d1.l2 -0.180*** 0.0001
## (0.044) (0.0002)
##
## fed_fundsR_d1.l2 -26.946*** -0.156***
## (9.032) (0.032)
##
## const -0.242 0.006
## (9.068) (0.032)
##
## trend -0.008 -0.00004
## (0.031) (0.0001)
##
## yield_sp_d1 -10.872 -1.392***
## (23.662) (0.083)
##
## unempR_d1 -104.370*** -0.165*
## (27.507) (0.097)
##
## mortgR_d1 -46.284*** 0.508***
## (17.193) (0.060)
##
## ------------------------------------------------------------------------
## Observations 508 508
## R2 0.193 0.587
## Adjusted R2 0.180 0.580
## Residual Std. Error (df = 499) 101.428 0.356
## F Statistic (df = 8; 499) 14.954*** 88.500***
## ========================================================================
## Note: *p<0.1; **p<0.05; ***p<0.01
# function computes the multivariate Portmanteau- and Breusch-Godfrey test for serially correlated errors.
serial.test(var1, lags.pt=10)
##
## Portmanteau Test (asymptotic)
##
## data: Residuals of VAR object var1
## Chi-squared = 103.05, df = 28, p-value = 1.615e-10
serial.test(var2, lags.pt=10)
##
## Portmanteau Test (asymptotic)
##
## data: Residuals of VAR object var2
## Chi-squared = 95.235, df = 32, p-value = 3.378e-08
H0: no serial correlation H1: serial correlation is present
The residuals for this model do not pass the test for serial correlation as the p-value is very small.
We use F-test on the lags of other variables to implement the granger causality. It tests whether the lags of variables are useful in forecasting housing starts and vice versa.
For instance, fed_fundsR can granger cause hous_st if hous_st can be more accurately predicted by the lagged values of both hous_st and fed_fundsR, rather than the lagged values of hous_st alone. Thus, the granger causality test examines if lagged values of a variable can enhance the forecasts of another variable.
# Granger causality
causality(var1, cause="hous_st")
## $Granger
##
## Granger causality H0: hous_st do not Granger-cause fed_fundsR
##
## data: VAR object var1
## F-Test = 2.7361, df1 = 3, df2 = 992, p-value = 0.04246
##
##
## $Instant
##
## H0: No instantaneous causality between: hous_st and fed_fundsR
##
## data: VAR object var1
## Chi-squared = 0.11538, df = 1, p-value = 0.7341
causality(var1, cause="fed_fundsR")
## $Granger
##
## Granger causality H0: fed_fundsR do not Granger-cause hous_st
##
## data: VAR object var1
## F-Test = 4.0938, df1 = 3, df2 = 992, p-value = 0.006691
##
##
## $Instant
##
## H0: No instantaneous causality between: fed_fundsR and hous_st
##
## data: VAR object var1
## Chi-squared = 0.11538, df = 1, p-value = 0.7341
Three of the four results have sufficiently small p-value and they indicates that we can reject null hypothesis: they Granger-cause others. The regressors have information to predict today’s housing starts.
Alternatively, we fail to reject the null hypothesis that income do not Granger-cause hous_st. This means income does not play much role in predicting today’s housing starts.
In IRF, we shock one variable, say income, and propagate it through the fitted VAR model for a number of periods. We can trace this through the VAR model and see if it impacts the other variables in a statistically significant way.
IRF_hous_st = irf(var1, impulse = 'hous_st', response = 'fed_fundsR', n.ahead = 5, ci = 0.95) # variables names in impulse and response arise from the set of endogenous variables.
IRF_fed_fundsR = irf(var1, impulse = 'fed_fundsR', response = 'hous_st', n.ahead = 5, ci = 0.95)
plot(IRF_hous_st)
plot(IRF_fed_fundsR)
An impulse (shock) to housing starts at time zero has large effects the next period, and the effects enlarge over time. The dotted lines show the 95 percent interval estimates of these effects. The VAR function prints the values corresponding to the impulse response graphs.
Using the VAR model, a Forecast Error Variance Decomposition examines the impact of variables on one another. We use the forecast errors of each equation in the fitted VAR model to compute FEVD. Then, the fitted VAR model determines how much of each error realization is coming from unexpected changes (forecast errors) in the other variable.
Variance decomposition helps to interpret the VAR model. We can determine the amount of variation in the dependent variable is explained by each each independent variable. FEVD explains how a future shock in a time series changes future uncertainity in the other time series in the system. This process evolves over time, so a shock on a time series may not be important in the short run, but may be very significant in the long run.
# We can calculate FEVD with fevd() from the vars package.
FEVD = fevd(var1, n.ahead = 10)
plot(FEVD)
In the first plot, we see the FEVD for housing starts. It appears that although we were borderline on whether or not to conclude that federal funds rate Granger cause housing starts, the FEVD reveals that the magnitude of the causality is tiny anyway, while that of income is greater on housing starts.
In the second plot, we see the FEVD for income. It appears that although we were borderline on whether or not to conclude that housing starts and federal funds rate Granger cause income.
The ARCH-LM test with q lags checks for the presence of ARCH effects at lags 1 to q. It tests if the coefficients α_1,…. α_q in the equation below:
x^2_t = α_0 + α_1 * x^2_(t-1) +….+ α_q * x^2_(t-q) + ϵ_t
# compute univariate and multivariate ARCH-LM tests for a VAR(p).
arch.test(var1, lags.multi = 2) # uses 2 lags while computing multivariate test statistic
##
## ARCH (multivariate)
##
## data: Residuals of VAR object var1
## Chi-squared = 276.65, df = 18, p-value < 2.2e-16
When q = 2, we test for ARCH effects jointly at lags 1 and 2. H0 = α_1 = α_2 = 0 As the p-value is very small, we reject the null hypothesis and conclude that ARCH effects are present at lags 1 and 2 jointly. ARCH effects are also present at higher lag orders, implying that the data is conditionally heteroskedastic.
VAR is used to capture short-run relationship between the variables employed (example, where there is a shock) while VECM tests for the long-run relationship. Through VECM concept of error correction, we understand how deviations from the long-run are “corrected”.
The coefficient on the ECT in an equation of the VECM quantifies the impact of the error correction term on the particular dependent variable (just as any regression coefficient in a linear model). The sign shows whether there is “error correction” (so that the variable corrects towards equilibrium) or “error inflation” (just made this term up; so that the variable deviates further from the equilibrium).
If some of the variables have coefficients that are indistinguishable from zero, then those are “dominant” in the sense that they do not adjust towards the equilibrium; rather, they “drive” the system of variables. But there must be some other variables in the system that do adjust, and these ones are “dominated”. (If none of the variables adjusted towards the equilibrium, there would be no cointegration.)
On the other hand, the coefficients on the variables inside the error correction term describe the long-run equilibrium relationship.
# series may be non-stationary but they are cointegrated, which means that there
# exists a linear combination of them that is stationary. In this case a VAR
# specification that includes an error correction mechanism (usually referred to
# as a vector error correction model) should be included
VECmodel = VECM(time_ser_diff[,c(1,8:10)],
lag = 3, # Number of lags
r = 1, # Number of cointegrating relationships
include = "const", # Type of deterministic regressors to include
estim = "ML") # Type of estimator: 2OLS for the two-step approach or ML for Johansen MLE
summary(VECmodel)
## #############
## ###Model VECM
## #############
## Full sample size: 511 End sample size: 507
## Number of variables: 4 Number of estimated slope parameters 56
## AIC 10388.74 BIC 10638.23 SSR 8090898
## Cointegrating vector (estimated by ML):
## hous_st pvt_house_comp mortgR real_estL
## r1 1 -1.035796 2.454442 0.006344342
##
##
## ECT Intercept
## Equation hous_st 0.0764(0.0343)* 1.8726(5.4561)
## Equation pvt_house_comp 0.3050(0.0269)*** -3.4904(4.2839)
## Equation mortgR 7.9e-05(8.7e-05) -0.0049(0.0138)
## Equation real_estL 0.0050(0.0047) 3.3416(0.7465)***
## hous_st -1 pvt_house_comp -1
## Equation hous_st -0.5633(0.0565)*** 0.1023(0.0608).
## Equation pvt_house_comp -0.2244(0.0444)*** -0.4668(0.0477)***
## Equation mortgR 0.0002(0.0001) 0.0001(0.0002)
## Equation real_estL -0.0064(0.0077) 0.0113(0.0083)
## mortgR -1 real_estL -1
## Equation hous_st -59.3497(17.8874)*** -0.4454(0.3292)
## Equation pvt_house_comp 0.1921(14.0443) -0.0020(0.2584)
## Equation mortgR 0.5564(0.0452)*** 0.0002(0.0008)
## Equation real_estL -2.7886(2.4473) 0.3922(0.0450)***
## hous_st -2 pvt_house_comp -2
## Equation hous_st -0.2942(0.0566)*** 0.0256(0.0682)
## Equation pvt_house_comp -0.1930(0.0445)*** -0.2954(0.0535)***
## Equation mortgR 1.6e-05(0.0001) 0.0001(0.0002)
## Equation real_estL -0.0080(0.0077) -0.0031(0.0093)
## mortgR -2 real_estL -2
## Equation hous_st -33.0312(19.3822). 0.1260(0.3523)
## Equation pvt_house_comp 3.1885(15.2180) 0.3928(0.2766)
## Equation mortgR -0.3511(0.0489)*** -0.0010(0.0009)
## Equation real_estL -2.4286(2.6518) 0.0921(0.0482).
## hous_st -3 pvt_house_comp -3
## Equation hous_st -0.0964(0.0493). -0.0100(0.0617)
## Equation pvt_house_comp -0.1132(0.0387)** -0.1995(0.0484)***
## Equation mortgR -0.0002(0.0001) 8.0e-05(0.0002)
## Equation real_estL -0.0093(0.0067) -0.0002(0.0084)
## mortgR -3 real_estL -3
## Equation hous_st -57.9881(18.3216)** -0.3413(0.3275)
## Equation pvt_house_comp -1.3819(14.3853) -0.4115(0.2572)
## Equation mortgR 0.0936(0.0463)* 0.0006(0.0008)
## Equation real_estL 0.3087(2.5067) 0.1059(0.0448)*
Generalized Autoregressive Conditional Heteroskedastic, or GARCH models are useful to analyse and forecast volatility in a time series data. Univariate GARCH(1,1) helps in modeling volality and its clustering.
Financial time series possess the property of volatility clustering wherein the volatility of the variable changes over time. Technically, this behavior is called conditional heteroskedasticity. Because ARMA models don’t consider volatility clustering i.e. they are not conditionally heteroskedastic, so we need to use ARCH and GARCH models for predictions.
Such models include the Autogressive Conditional Heteroskedastic (ARCH) model and Generalised Autogressive Conditional Heteroskedastic (GARCH) model. Different forms of volatility such as sell-offs during a financial crises, can cause serially correlated heteroskedasticity. Thus, the time_ser data is conditionally heteroskedastic.
Maximum likelihood estimates most GARCH models, such as measuring relative loss or profit from trading stocks in a day. If x_t is the value of housing starts on t, then r_t=[x_t − x_(t−1)]/x_(t−1) is called the return. We observe large volatility around the 2008 financial crisis and returns that are mostly noise noise with short periods of large variability.
hous_st_return = diff(time_ser_diff[,1])/stats::lag(time_ser_diff[,1], 1) # returns of housing starts
## garchFit
autoplot(hous_st_return) + ylab("Returns of housing starts") + xlab("Years") + ggtitle("Housing starts returns over time")
# calculate the autocorrelations and partial autocorrelations for the log returns.
hous_st_return.acf = autocorrelations(hous_st_return)
house_st_return.pacf = partialAutocorrelations(hous_st_return)
Below I have plotted the ACF of the returns of housing starts. There are two bounds plotted on the graph. The straight red line represents the standard bounds under the strong white noise assumption. The second line is under the hypothesis that the process is GARCH.
hous_iid = whiteNoiseTest(hous_st_return.acf, h0 = "iid", nlags = c(5,10,20), x = hous_st_return, method = "LjungBox")
hous_iid$test
## ChiSq DF pvalue
## [1,] 55.53981 5 1.010765e-10
## [2,] 60.73615 10 2.629113e-09
## [3,] 86.60523 20 2.889828e-10
## attr(,"method")
## [1] "LjungBox"
We reject the hypothesis that the series is independently and identically distributed from the Ljung-Box test.The series is not white noise, hence autocorrelated.
plot(hous_st_return.acf, data = hous_st_return , main="Autocorrelation test of the returns of housing starts")
From the plot above, several autocorrelations seem significant under hypothesis of both iid and GARCH process.
Now, I have fit a GARCH-type model which assumes the null hypothesis that the returns are GARCH.
wntg = whiteNoiseTest(hous_st_return.acf, h0 = "garch", nlags = c(5,10,15), x = hous_st_return)
wntg$test
## h Q pval
## [1,] 5 40.57540 1.143075e-07
## [2,] 10 42.76489 5.478140e-06
## [3,] 15 52.29641 5.045273e-06
The low p-values give reason to reject the hypothesis that the returns are a GARCH white noise process. So, we should do ARMA modelling.
We have fit GARCH model(s), starting with a GARCH(1,1) model with Gaussian innovations.GARCH(1,1) considers a single autoregressive and a moving average lag. The model is:
ϵ_t = σ_t * w_t σ^2 = α_0 + α_1 * ϵ^2_(t−1) + β_1 * σ^2_(t−1)
Note that alpha_1 + beta_1 < 0, otherwise the series will become unstable.
The persistence of a GARCH model signifies the rate at which large volatilities decay after a shock. The key statistic in GARCH(1,1) is the sum of two parameters: alpha1 and beta1.
Ideally, alpha_1 + beta_1 < 1. If, alpha_1 + beta_1 > 1, then the volatility predictions are explosive. If, alpha_1 + beta_1 = 1, then the model has exponential decay.
In the output from garchFit, the normalized log-likelihood is the loglikelihood divided by n. The AIC and BIC values have also been normalized by dividing by n,
summary(fit_garch_return)
##
## Title:
## GARCH Modelling
##
## Call:
## garchFit(formula = ~arma(1, 0) + garch(1, 1), data = hous_st_return,
## cond.dist = "norm")
##
## Mean and Variance Equation:
## data ~ arma(1, 0) + garch(1, 1)
## <environment: 0x5575b883e218>
## [data = hous_st_return]
##
## Conditional Distribution:
## norm
##
## Coefficient(s):
## mu ar1 omega alpha1 beta1
## 4.1271e-06 -3.6658e-01 6.5196e-05 6.8848e-02 9.2174e-01
##
## Std. Errors:
## based on Hessian
##
## Error Analysis:
## Estimate Std. Error t value Pr(>|t|)
## mu 4.127e-06 2.994e-03 0.001 0.99890
## ar1 -3.666e-01 4.253e-02 -8.619 < 2e-16 ***
## omega 6.520e-05 5.193e-05 1.256 0.20929
## alpha1 6.885e-02 2.182e-02 3.155 0.00161 **
## beta1 9.217e-01 2.286e-02 40.319 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Log Likelihood:
## 599.6187 normalized: 1.178033
##
## Description:
## Sun Mar 17 15:46:30 2019 by user:
##
##
## Standardised Residuals Tests:
## Statistic p-Value
## Jarque-Bera Test R Chi^2 18.01514 0.0001224791
## Shapiro-Wilk Test R W 0.9891823 0.0008269949
## Ljung-Box Test R Q(10) 16.03317 0.09868677
## Ljung-Box Test R Q(15) 27.71855 0.02339867
## Ljung-Box Test R Q(20) 34.76176 0.02141044
## Ljung-Box Test R^2 Q(10) 26.98303 0.002620484
## Ljung-Box Test R^2 Q(15) 43.25259 0.0001438445
## Ljung-Box Test R^2 Q(20) 46.94882 0.0005962632
## LM Arch Test R TR^2 26.92906 0.007910933
##
## Information Criterion Statistics:
## AIC BIC SIC HQIC
## -2.336419 -2.294843 -2.336610 -2.320117
The diagnostics imply that the standardised residuals and their squares are IID and that the model accomodates ARCH effects.
H_0: white noise innovation process is Gaussian
Their distribution is Gaussian only from the p-value for Ljung-Box Test which is 0.921266. From all other tests of normality, we reject the null hypothesis as the p-values are very low.
The qq-plot of the standardised residuals, suggests that the fitted standardised skew-t conditional distribution is decent.
plot(fit_garch_return, which = 13)
Since, ARIMA linearly models the data, the forecast width is constant as the model does not incorporate new information or recent changes.To model non-linearity or a cluster of volatility, we have to use ARCH/GARCH methods as they reflect more recent fluctuations in the series. The ACF and PACF of residuals can confirm if the residuals can be predicted if they are not white noise. Residuals of strict white noise series are i.i.d normally distributed with zero mean. Moreover, the PACF and ACF of squared residuals have no significant lags. Finally, we cannot predict a strict white noise series, either linearly or non-linearly. Below, the residuals and squared residuals of ARIMA(4,2,4) model show a cluster of volatility as shown from the ACF plots.
fit_xreg = arima(time_ser_diff[,1],order = c(4,2,4))
resid = resid(fit_xreg)
# ACF of the residuals
ggAcf(resid, lag = 80) + ggtitle("ACF of residuals of ARIMA(4,2,4)")
Box.test(resid, lag = 10, type = "Ljung-Box")
##
## Box-Ljung test
##
## data: resid
## X-squared = 6.1032, df = 10, p-value = 0.8065
We fail to reject the null hypothesis that the residuals of ARIMA(4,2,4) is not serially correlated i.e. the series is white noise.
# ACF of the square of residuals
ggAcf(resid^2, lag = 80) + ggtitle("ACF of residuals squared of ARIMA(4,2,4)")
Box.test(resid^2, lag = 10, type = "Ljung-Box")
##
## Box-Ljung test
##
## data: resid^2
## X-squared = 63.898, df = 10, p-value = 6.583e-10
Now, I have fit a GARCH(1,1) model.
summary(fit_garch_hous_st)
##
## Title:
## GARCH Modelling
##
## Call:
## garchFit(formula = ~arma(4, 4) + garch(1, 1), data = time_ser_diff[,
## 1])
##
## Mean and Variance Equation:
## data ~ arma(4, 4) + garch(1, 1)
## <environment: 0x5575c0218558>
## [data = time_ser_diff[, 1]]
##
## Conditional Distribution:
## norm
##
## Coefficient(s):
## mu ar1 ar2 ar3 ar4
## 16.053017 0.471082 0.579245 0.662428 -0.724449
## ma1 ma2 ma3 ma4 omega
## 0.083284 -0.252425 -0.649781 0.336848 2640.659451
## alpha1 beta1
## 0.424977 0.367700
##
## Std. Errors:
## based on Hessian
##
## Error Analysis:
## Estimate Std. Error t value Pr(>|t|)
## mu 16.05302 11.47482 1.399 0.161820
## ar1 0.47108 0.15926 2.958 0.003096 **
## ar2 0.57925 0.05429 10.670 < 2e-16 ***
## ar3 0.66243 0.05118 12.942 < 2e-16 ***
## ar4 -0.72445 0.14914 -4.858 1.19e-06 ***
## ma1 0.08328 0.15740 0.529 0.596715
## ma2 -0.25242 0.14761 -1.710 0.087245 .
## ma3 -0.64978 0.10502 -6.187 6.12e-10 ***
## ma4 0.33685 0.05913 5.697 1.22e-08 ***
## omega 2640.65945 773.37713 3.414 0.000639 ***
## alpha1 0.42498 0.09702 4.380 1.19e-05 ***
## beta1 0.36770 0.10534 3.491 0.000482 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Log Likelihood:
## -3050.358 normalized: -5.96939
##
## Description:
## Sun Mar 17 15:46:31 2019 by user:
##
##
## Standardised Residuals Tests:
## Statistic p-Value
## Jarque-Bera Test R Chi^2 10.53292 0.005161848
## Shapiro-Wilk Test R W 0.9928411 0.01531213
## Ljung-Box Test R Q(10) 4.61827 0.9151778
## Ljung-Box Test R Q(15) 15.15126 0.4405772
## Ljung-Box Test R Q(20) 19.6149 0.4822394
## Ljung-Box Test R^2 Q(10) 10.52377 0.3958027
## Ljung-Box Test R^2 Q(15) 39.47513 0.0005439962
## Ljung-Box Test R^2 Q(20) 40.64349 0.004138153
## LM Arch Test R TR^2 20.30752 0.06148775
##
## Information Criterion Statistics:
## AIC BIC SIC HQIC
## 11.98575 12.08523 11.98468 12.02475
plot(fit_garch_hous_st, which = 13)