Clean The Environment

rm(list = ls())
packages <- c("psych", 
              "stargazer", 
              "tidyverse", 
              "corrplot", 
              "ggplot2", 
              "data.table", 
              "car",
              "MASS",
              "Metrics",
              "vars",
              "tseries",
              "forecast",
              "urca",
              "gptstudio"
              
              )
 for (i in 1:length(packages)) {
    if (!packages[i] %in% rownames(installed.packages())) {
      install.packages(packages[i]
                       , repos = "http://cran.rstudio.com/"
                       , dependencies = TRUE
                       )
    }
    library(packages[i], character.only = TRUE)
  }
## Warning: 程辑包'psych'是用R版本4.3.1 来建造的
## 
## Please cite as:
##  Hlavac, Marek (2022). stargazer: Well-Formatted Regression and Summary Statistics Tables.
##  R package version 5.2.3. https://CRAN.R-project.org/package=stargazer
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.2     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.4.2     ✔ tibble    3.2.1
## ✔ lubridate 1.9.2     ✔ tidyr     1.3.0
## ✔ purrr     1.0.1     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ ggplot2::%+%()   masks psych::%+%()
## ✖ ggplot2::alpha() masks psych::alpha()
## ✖ dplyr::filter()  masks stats::filter()
## ✖ dplyr::lag()     masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## corrplot 0.92 loaded
## 
## 
## 载入程辑包:'data.table'
## 
## 
## The following objects are masked from 'package:lubridate':
## 
##     hour, isoweek, mday, minute, month, quarter, second, wday, week,
##     yday, year
## 
## 
## The following objects are masked from 'package:dplyr':
## 
##     between, first, last
## 
## 
## The following object is masked from 'package:purrr':
## 
##     transpose
## 
## 
## 载入需要的程辑包:carData
## 
## 
## 载入程辑包:'car'
## 
## 
## The following object is masked from 'package:dplyr':
## 
##     recode
## 
## 
## The following object is masked from 'package:purrr':
## 
##     some
## 
## 
## The following object is masked from 'package:psych':
## 
##     logit
## 
## 
## 
## 载入程辑包:'MASS'
## 
## 
## The following object is masked from 'package:dplyr':
## 
##     select
## 
## 
## 载入需要的程辑包:strucchange
## 
## 载入需要的程辑包:zoo
## 
## 
## 载入程辑包:'zoo'
## 
## 
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## 
## 
## 载入需要的程辑包:sandwich
## 
## 
## 载入程辑包:'strucchange'
## 
## 
## The following object is masked from 'package:stringr':
## 
##     boundary
## 
## 
## 载入需要的程辑包:urca
## 
## 载入需要的程辑包:lmtest
## Warning: 程辑包'tseries'是用R版本4.3.1 来建造的
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
## Warning: 程辑包'forecast'是用R版本4.3.1 来建造的
## 
## 载入程辑包:'forecast'
## 
## The following object is masked from 'package:Metrics':
## 
##     accuracy

Import the Data

setwd(dir = "D:/Study/Econometric")

Test <- read.csv("D:/Study/Econometric/Assignment/Group Project/Dataset/df_train_moneysupply.csv")

split the data

Test$DATE <- as.Date(Test$DATE, format = "%Y/%m/%d")

n <- nrow(Test)
n_train <- round(0.9 * n) 

train <- Test[1:n_train, ]  # New training data set
Test <- Test[-(1:n_train), ] 
             # New predicting data set

CPIAT <- cbind(train$CPI)

Doing ADF test for stationary

#adf test for CPI
adf1<- adf.test(train$CPI, alternative = "stationary")
print(adf1)
## 
##  Augmented Dickey-Fuller Test
## 
## data:  train$CPI
## Dickey-Fuller = -3.3689, Lag order = 5, p-value = 0.06151
## alternative hypothesis: stationary
#adf test for GDP
adf2<- adf.test(train$GDP, alternative = "stationary")
print(adf2)
## 
##  Augmented Dickey-Fuller Test
## 
## data:  train$GDP
## Dickey-Fuller = -1.8356, Lag order = 5, p-value = 0.6449
## alternative hypothesis: stationary
#adf test for M2S
adf3<- adf.test(train$M2S, alternative = "stationary")
## Warning in adf.test(train$M2S, alternative = "stationary"): p-value greater
## than printed p-value
print(adf3)
## 
##  Augmented Dickey-Fuller Test
## 
## data:  train$M2S
## Dickey-Fuller = 0.84789, Lag order = 5, p-value = 0.99
## alternative hypothesis: stationary
#adf test for IR
adf4<- adf.test(train$IR, alternative = "stationary")
print(adf4)
## 
##  Augmented Dickey-Fuller Test
## 
## data:  train$IR
## Dickey-Fuller = -3.1321, Lag order = 5, p-value = 0.1025
## alternative hypothesis: stationary
#adf test for M2V
adf5<- adf.test(train$M2V, alternative = "stationary")
print(adf5)
## 
##  Augmented Dickey-Fuller Test
## 
## data:  train$M2V
## Dickey-Fuller = -0.60286, Lag order = 5, p-value = 0.9763
## alternative hypothesis: stationary

After doing the ADF test, I found CPI, GDP, M2s and M2V are non-stationary, So we need to do a difference to optimize the data.

Prepare Data for Difference

#log transform GDP
train$GDP <- log(train$GDP)

#log transform CPI
train$CPI <- log(train$CPI)

#log transform M2S
train$M2S <- log(train$M2S)

#log transform IR
train$IR <- log(train$IR)

#log transform M2V
train$M2V <- log(train$M2V)

Difference GDP

#convert the date column to date format
train$DATE <- as.Date(train$DATE, format = "%Y/%m/%d")

#convert the GDP column to time series format
train$GDP_TS <- ts(data = train$GDP, start = c(1968, 1))

#take the first difference of the GDP time series
diff1_GDP_TS <- diff(train$GDP_TS)

#add the first difference to the train data frame
train$diff1_GDP_TS <- c(NA,diff1_GDP_TS)

#plot the first difference of the GDP time series
plot(train$diff1_GDP_TS)

#perform the Augmented Dickey-Fuller test on the first difference of the GDP time series
adf_diff1_GDP_TS<- adf.test(train$diff1_GDP_TS[-1], alternative = "stationary")
## Warning in adf.test(train$diff1_GDP_TS[-1], alternative = "stationary"):
## p-value smaller than printed p-value
print(adf_diff1_GDP_TS)
## 
##  Augmented Dickey-Fuller Test
## 
## data:  train$diff1_GDP_TS[-1]
## Dickey-Fuller = -5.3597, Lag order = 5, p-value = 0.01
## alternative hypothesis: stationary

Difference CPI

train$CPI_TS <- ts(data = train$CPI, start = c(1968, 1))

diff1_CPI_TS <- diff(train$CPI_TS)

train$diff1_CPI_TS <- c(NA,diff1_CPI_TS)

plot(train$diff1_CPI_TS)

adf_diff1_CPI_TS<- adf.test(train$diff1_CPI_TS[-1], alternative = "stationary")
## Warning in adf.test(train$diff1_CPI_TS[-1], alternative = "stationary"):
## p-value smaller than printed p-value
print(adf_diff1_CPI_TS)
## 
##  Augmented Dickey-Fuller Test
## 
## data:  train$diff1_CPI_TS[-1]
## Dickey-Fuller = -5.9593, Lag order = 5, p-value = 0.01
## alternative hypothesis: stationary

Difference M2S

train$M2S_TS <- ts(data = train$M2S, start = c(1968, 1))

diff1_M2S_TS <- diff(train$M2S_TS)

train$diff1_M2S_TS <- c(NA,diff1_M2S_TS)

plot(train$diff1_M2S_TS)

adf_diff1_M2S_TS<- adf.test(train$diff1_M2S_TS[-1], alternative = "stationary")
## Warning in adf.test(train$diff1_M2S_TS[-1], alternative = "stationary"):
## p-value smaller than printed p-value
print(adf_diff1_M2S_TS)
## 
##  Augmented Dickey-Fuller Test
## 
## data:  train$diff1_M2S_TS[-1]
## Dickey-Fuller = -4.4111, Lag order = 5, p-value = 0.01
## alternative hypothesis: stationary

Difference M2V

train$M2V_TS <- ts(data = train$M2V, start = c(1968, 1))

diff1_M2V_TS <- diff(train$M2V_TS)

train$diff1_M2V_TS <- c(NA,diff1_M2V_TS)

plot(train$diff1_M2V_TS)

adf_diff1_M2V_TS<- adf.test(train$diff1_M2V_TS[-1], alternative = "stationary")
## Warning in adf.test(train$diff1_M2V_TS[-1], alternative = "stationary"):
## p-value smaller than printed p-value
print(adf_diff1_M2V_TS)
## 
##  Augmented Dickey-Fuller Test
## 
## data:  train$diff1_M2V_TS[-1]
## Dickey-Fuller = -5.2512, Lag order = 5, p-value = 0.01
## alternative hypothesis: stationary

Check Data

train[train == ""] <- NA

colSums(is.na(train))
##         DATE          CPI          GDP          M2S           IR          M2V 
##            0            0            0            0            0            0 
##       GDP_TS diff1_GDP_TS       CPI_TS diff1_CPI_TS       M2S_TS diff1_M2S_TS 
##            0            1            0            1            0            1 
##       M2V_TS diff1_M2V_TS 
##            0            1
str(train)
## 'data.frame':    191 obs. of  14 variables:
##  $ DATE        : Date, format: "1968-01-01" "1968-04-01" ...
##  $ CPI         : num  1.3 1.43 1.56 1.65 1.75 ...
##  $ GDP         : num  8.46 8.47 8.48 8.49 8.5 ...
##  $ M2S         : num  7.34 7.35 7.35 7.36 7.37 ...
##  $ IR          : num  1.5 1.7 1.81 1.79 1.39 ...
##  $ M2V         : num  0.539 0.55 0.548 0.543 0.552 ...
##  $ GDP_TS      : Time-Series  from 1968 to 2158: 8.46 8.47 8.48 8.49 8.5 ...
##  $ diff1_GDP_TS: num  NA 0.01657 0.00772 0.00393 0.01553 ...
##  $ CPI_TS      : Time-Series  from 1968 to 2158: 1.3 1.43 1.56 1.65 1.75 ...
##  $ diff1_CPI_TS: num  NA 0.1294 0.1348 0.0868 0.1036 ...
##  $ M2S_TS      : Time-Series  from 1968 to 2158: 7.34 7.35 7.35 7.36 7.37 ...
##  $ diff1_M2S_TS: num  NA 0.00689 0.00385 0.01037 0.00951 ...
##  $ M2V_TS      : Time-Series  from 1968 to 2158: 0.539 0.55 0.548 0.543 0.552 ...
##  $ diff1_M2V_TS: num  NA 0.01044 -0.00173 -0.00464 0.00867 ...

Data Preperation 2

#create a time series object for the interest rate
train$IR_TS <- ts(data = train$IR, start = c(1968, 1))

#create a model matrix for the time series objects
Model <- cbind(diff1_GDP_TS, diff1_CPI_TS, diff1_M2S_TS, train$IR_TS, diff1_M2V_TS)

I transformed my dataset into a big multivariate time series object so that its easier to deal with in the model. Mean while I think taking log could help me better with data.

VAR Model Building

# select the lag order of the VAR model
lag_order <- VARselect(Model[-1, ], lag.max = 12, type = "both")

# select the lag order based on AIC
selected_lag <- lag_order$selection["AIC(n)"]
summary(selected_lag)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       4       4       4       4       4       4
# fit the VAR model with the selected lag order
var_model <- VAR(Model[-1, ], season = 4, p = selected_lag, type = "const")

# summary of the fitted VAR model
summary(var_model)
## 
## VAR Estimation Results:
## ========================= 
## Endogenous variables: diff1_GDP_TS, diff1_CPI_TS, diff1_M2S_TS, train.IR_TS, diff1_M2V_TS 
## Deterministic variables: const 
## Sample size: 186 
## Log Likelihood: 2125.681 
## Roots of the characteristic polynomial:
## 0.9819 0.7887 0.7887 0.7875 0.7875 0.7212 0.6988 0.6988 0.666 0.666 0.6354 0.6354 0.634 0.634 0.6151 0.5642 0.5642 0.4443 0.4443 0.3565
## Call:
## VAR(y = Model[-1, ], p = selected_lag, type = "const", season = 4L)
## 
## 
## Estimation results for equation diff1_GDP_TS: 
## ============================================= 
## diff1_GDP_TS = diff1_GDP_TS.l1 + diff1_CPI_TS.l1 + diff1_M2S_TS.l1 + train.IR_TS.l1 + diff1_M2V_TS.l1 + diff1_GDP_TS.l2 + diff1_CPI_TS.l2 + diff1_M2S_TS.l2 + train.IR_TS.l2 + diff1_M2V_TS.l2 + diff1_GDP_TS.l3 + diff1_CPI_TS.l3 + diff1_M2S_TS.l3 + train.IR_TS.l3 + diff1_M2V_TS.l3 + diff1_GDP_TS.l4 + diff1_CPI_TS.l4 + diff1_M2S_TS.l4 + train.IR_TS.l4 + diff1_M2V_TS.l4 + const + sd1 + sd2 + sd3 
## 
##                   Estimate Std. Error t value Pr(>|t|)  
## diff1_GDP_TS.l1  0.0224132  0.1568750   0.143   0.8866  
## diff1_CPI_TS.l1 -0.0106182  0.0062115  -1.709   0.0893 .
## diff1_M2S_TS.l1  0.1557385  0.1122660   1.387   0.1673  
## train.IR_TS.l1  -0.0008413  0.0017940  -0.469   0.6398  
## diff1_M2V_TS.l1  0.1024061  0.1206089   0.849   0.3971  
## diff1_GDP_TS.l2  0.1762070  0.1646720   1.070   0.2862  
## diff1_CPI_TS.l2 -0.0092801  0.0061637  -1.506   0.1341  
## diff1_M2S_TS.l2 -0.0061019  0.1160351  -0.053   0.9581  
## train.IR_TS.l2  -0.0006328  0.0022857  -0.277   0.7822  
## diff1_M2V_TS.l2 -0.0305718  0.1294350  -0.236   0.8136  
## diff1_GDP_TS.l3  0.1358140  0.1605891   0.846   0.3990  
## diff1_CPI_TS.l3 -0.0001981  0.0061253  -0.032   0.9742  
## diff1_M2S_TS.l3 -0.0799972  0.1122340  -0.713   0.4770  
## train.IR_TS.l3   0.0037629  0.0023103   1.629   0.1053  
## diff1_M2V_TS.l3 -0.1149825  0.1271302  -0.904   0.3671  
## diff1_GDP_TS.l4  0.0873474  0.1493685   0.585   0.5595  
## diff1_CPI_TS.l4 -0.0023485  0.0058217  -0.403   0.6872  
## diff1_M2S_TS.l4 -0.0449451  0.1077070  -0.417   0.6770  
## train.IR_TS.l4  -0.0020688  0.0017985  -1.150   0.2517  
## diff1_M2V_TS.l4 -0.0341112  0.1164843  -0.293   0.7700  
## const            0.0034817  0.0013427   2.593   0.0104 *
## sd1              0.0009951  0.0015641   0.636   0.5255  
## sd2              0.0000408  0.0015782   0.026   0.9794  
## sd3             -0.0001254  0.0015728  -0.080   0.9366  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## 
## Residual standard error: 0.00736 on 162 degrees of freedom
## Multiple R-Squared: 0.235,   Adjusted R-squared: 0.1264 
## F-statistic: 2.163 on 23 and 162 DF,  p-value: 0.002902 
## 
## 
## Estimation results for equation diff1_CPI_TS: 
## ============================================= 
## diff1_CPI_TS = diff1_GDP_TS.l1 + diff1_CPI_TS.l1 + diff1_M2S_TS.l1 + train.IR_TS.l1 + diff1_M2V_TS.l1 + diff1_GDP_TS.l2 + diff1_CPI_TS.l2 + diff1_M2S_TS.l2 + train.IR_TS.l2 + diff1_M2V_TS.l2 + diff1_GDP_TS.l3 + diff1_CPI_TS.l3 + diff1_M2S_TS.l3 + train.IR_TS.l3 + diff1_M2V_TS.l3 + diff1_GDP_TS.l4 + diff1_CPI_TS.l4 + diff1_M2S_TS.l4 + train.IR_TS.l4 + diff1_M2V_TS.l4 + const + sd1 + sd2 + sd3 
## 
##                  Estimate Std. Error t value Pr(>|t|)    
## diff1_GDP_TS.l1 -2.818729   2.019090  -1.396  0.16461    
## diff1_CPI_TS.l1  0.275965   0.079946   3.452  0.00071 ***
## diff1_M2S_TS.l1  0.437037   1.444940   0.302  0.76269    
## train.IR_TS.l1  -0.009100   0.023091  -0.394  0.69401    
## diff1_M2V_TS.l1  1.714421   1.552319   1.104  0.27105    
## diff1_GDP_TS.l2  0.888613   2.119443   0.419  0.67558    
## diff1_CPI_TS.l2  0.061206   0.079331   0.772  0.44152    
## diff1_M2S_TS.l2 -0.282879   1.493452  -0.189  0.85001    
## train.IR_TS.l2  -0.010885   0.029418  -0.370  0.71185    
## diff1_M2V_TS.l2  1.403462   1.665917   0.842  0.40077    
## diff1_GDP_TS.l3  2.969094   2.066893   1.437  0.15279    
## diff1_CPI_TS.l3  0.222311   0.078838   2.820  0.00540 ** 
## diff1_M2S_TS.l3  0.472835   1.444529   0.327  0.74384    
## train.IR_TS.l3   0.078382   0.029735   2.636  0.00920 ** 
## diff1_M2V_TS.l3 -0.929641   1.636253  -0.568  0.57072    
## diff1_GDP_TS.l4 -1.827094   1.922475  -0.950  0.34333    
## diff1_CPI_TS.l4 -0.181600   0.074929  -2.424  0.01647 *  
## diff1_M2S_TS.l4  2.441549   1.386264   1.761  0.08008 .  
## train.IR_TS.l4  -0.067119   0.023148  -2.900  0.00426 ** 
## diff1_M2V_TS.l4  3.166858   1.499233   2.112  0.03619 *  
## const           -0.003101   0.017281  -0.179  0.85783    
## sd1              0.025861   0.020131   1.285  0.20077    
## sd2              0.023267   0.020312   1.145  0.25370    
## sd3              0.040213   0.020243   1.987  0.04866 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## 
## Residual standard error: 0.09473 on 162 degrees of freedom
## Multiple R-Squared: 0.4337,  Adjusted R-squared: 0.3533 
## F-statistic: 5.393 on 23 and 162 DF,  p-value: 3.47e-11 
## 
## 
## Estimation results for equation diff1_M2S_TS: 
## ============================================= 
## diff1_M2S_TS = diff1_GDP_TS.l1 + diff1_CPI_TS.l1 + diff1_M2S_TS.l1 + train.IR_TS.l1 + diff1_M2V_TS.l1 + diff1_GDP_TS.l2 + diff1_CPI_TS.l2 + diff1_M2S_TS.l2 + train.IR_TS.l2 + diff1_M2V_TS.l2 + diff1_GDP_TS.l3 + diff1_CPI_TS.l3 + diff1_M2S_TS.l3 + train.IR_TS.l3 + diff1_M2V_TS.l3 + diff1_GDP_TS.l4 + diff1_CPI_TS.l4 + diff1_M2S_TS.l4 + train.IR_TS.l4 + diff1_M2V_TS.l4 + const + sd1 + sd2 + sd3 
## 
##                   Estimate Std. Error t value Pr(>|t|)    
## diff1_GDP_TS.l1  0.8128010  0.2871023   2.831 0.005228 ** 
## diff1_CPI_TS.l1  0.0054400  0.0113678   0.479 0.632910    
## diff1_M2S_TS.l1 -0.1479995  0.2054617  -0.720 0.472362    
## train.IR_TS.l1   0.0111204  0.0032833   3.387 0.000887 ***
## diff1_M2V_TS.l1 -0.7174662  0.2207304  -3.250 0.001402 ** 
## diff1_GDP_TS.l2 -0.2360847  0.3013719  -0.783 0.434556    
## diff1_CPI_TS.l2  0.0124567  0.0112804   1.104 0.271111    
## diff1_M2S_TS.l2  0.0019208  0.2123599   0.009 0.992794    
## train.IR_TS.l2  -0.0078636  0.0041830  -1.880 0.061920 .  
## diff1_M2V_TS.l2 -0.0522091  0.2368833  -0.220 0.825837    
## diff1_GDP_TS.l3 -0.4840364  0.2938996  -1.647 0.101508    
## diff1_CPI_TS.l3  0.0029663  0.0112102   0.265 0.791652    
## diff1_M2S_TS.l3  0.4626881  0.2054033   2.253 0.025628 *  
## train.IR_TS.l3  -0.0003771  0.0042281  -0.089 0.929038    
## diff1_M2V_TS.l3  0.3322723  0.2326652   1.428 0.155184    
## diff1_GDP_TS.l4 -0.3295528  0.2733643  -1.206 0.229751    
## diff1_CPI_TS.l4 -0.0166306  0.0106544  -1.561 0.120496    
## diff1_M2S_TS.l4  0.1570776  0.1971183   0.797 0.426693    
## train.IR_TS.l4  -0.0025590  0.0032915  -0.777 0.438025    
## diff1_M2V_TS.l4  0.0902901  0.2131818   0.424 0.672466    
## const            0.0047708  0.0024573   1.941 0.053936 .  
## sd1             -0.0029753  0.0028626  -1.039 0.300181    
## sd2             -0.0046894  0.0028883  -1.624 0.106405    
## sd3             -0.0039747  0.0028785  -1.381 0.169227    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## 
## Residual standard error: 0.01347 on 162 degrees of freedom
## Multiple R-Squared: 0.2718,  Adjusted R-squared: 0.1684 
## F-statistic: 2.629 on 23 and 162 DF,  p-value: 0.0002227 
## 
## 
## Estimation results for equation train.IR_TS: 
## ============================================ 
## train.IR_TS = diff1_GDP_TS.l1 + diff1_CPI_TS.l1 + diff1_M2S_TS.l1 + train.IR_TS.l1 + diff1_M2V_TS.l1 + diff1_GDP_TS.l2 + diff1_CPI_TS.l2 + diff1_M2S_TS.l2 + train.IR_TS.l2 + diff1_M2V_TS.l2 + diff1_GDP_TS.l3 + diff1_CPI_TS.l3 + diff1_M2S_TS.l3 + train.IR_TS.l3 + diff1_M2V_TS.l3 + diff1_GDP_TS.l4 + diff1_CPI_TS.l4 + diff1_M2S_TS.l4 + train.IR_TS.l4 + diff1_M2V_TS.l4 + const + sd1 + sd2 + sd3 
## 
##                  Estimate Std. Error t value Pr(>|t|)    
## diff1_GDP_TS.l1   0.59390    8.21310   0.072   0.9424    
## diff1_CPI_TS.l1  -0.27147    0.32520  -0.835   0.4051    
## diff1_M2S_TS.l1   2.70421    5.87762   0.460   0.6461    
## train.IR_TS.l1    0.67935    0.09393   7.233 1.78e-11 ***
## diff1_M2V_TS.l1   7.52920    6.31440   1.192   0.2349    
## diff1_GDP_TS.l2   8.72845    8.62130   1.012   0.3128    
## diff1_CPI_TS.l2  -0.23162    0.32270  -0.718   0.4739    
## diff1_M2S_TS.l2  -3.23138    6.07495  -0.532   0.5955    
## train.IR_TS.l2    0.21672    0.11966   1.811   0.0720 .  
## diff1_M2V_TS.l2   0.11155    6.77649   0.016   0.9869    
## diff1_GDP_TS.l3   8.02393    8.40754   0.954   0.3413    
## diff1_CPI_TS.l3   0.25708    0.32069   0.802   0.4239    
## diff1_M2S_TS.l3  -8.67920    5.87594  -1.477   0.1416    
## train.IR_TS.l3    0.02085    0.12095   0.172   0.8633    
## diff1_M2V_TS.l3  -2.25476    6.65582  -0.339   0.7352    
## diff1_GDP_TS.l4  14.09961    7.82009   1.803   0.0732 .  
## diff1_CPI_TS.l4   0.13879    0.30479   0.455   0.6495    
## diff1_M2S_TS.l4  -6.70295    5.63894  -1.189   0.2363    
## train.IR_TS.l4    0.02683    0.09416   0.285   0.7760    
## diff1_M2V_TS.l4 -11.85559    6.09846  -1.944   0.0536 .  
## const            -0.07337    0.07030  -1.044   0.2981    
## sd1               0.17027    0.08189   2.079   0.0392 *  
## sd2               0.14138    0.08262   1.711   0.0890 .  
## sd3               0.10023    0.08234   1.217   0.2253    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## 
## Residual standard error: 0.3853 on 162 degrees of freedom
## Multiple R-Squared: 0.9481,  Adjusted R-squared: 0.9407 
## F-statistic: 128.7 on 23 and 162 DF,  p-value: < 2.2e-16 
## 
## 
## Estimation results for equation diff1_M2V_TS: 
## ============================================= 
## diff1_M2V_TS = diff1_GDP_TS.l1 + diff1_CPI_TS.l1 + diff1_M2S_TS.l1 + train.IR_TS.l1 + diff1_M2V_TS.l1 + diff1_GDP_TS.l2 + diff1_CPI_TS.l2 + diff1_M2S_TS.l2 + train.IR_TS.l2 + diff1_M2V_TS.l2 + diff1_GDP_TS.l3 + diff1_CPI_TS.l3 + diff1_M2S_TS.l3 + train.IR_TS.l3 + diff1_M2V_TS.l3 + diff1_GDP_TS.l4 + diff1_CPI_TS.l4 + diff1_M2S_TS.l4 + train.IR_TS.l4 + diff1_M2V_TS.l4 + const + sd1 + sd2 + sd3 
## 
##                   Estimate Std. Error t value Pr(>|t|)  
## diff1_GDP_TS.l1 -0.3211076  0.3240550  -0.991   0.3232  
## diff1_CPI_TS.l1 -0.0200535  0.0128309  -1.563   0.1200  
## diff1_M2S_TS.l1  0.0578155  0.2319066   0.249   0.8034  
## train.IR_TS.l1  -0.0086285  0.0037059  -2.328   0.0211 *
## diff1_M2V_TS.l1  0.4203386  0.2491405   1.687   0.0935 .
## diff1_GDP_TS.l2  0.3996381  0.3401613   1.175   0.2418  
## diff1_CPI_TS.l2 -0.0181621  0.0127323  -1.426   0.1557  
## diff1_M2S_TS.l2 -0.0642430  0.2396926  -0.268   0.7890  
## train.IR_TS.l2   0.0050743  0.0047214   1.075   0.2841  
## diff1_M2V_TS.l2 -0.0321517  0.2673724  -0.120   0.9044  
## diff1_GDP_TS.l3  0.6164153  0.3317272   1.858   0.0650 .
## diff1_CPI_TS.l3  0.0012766  0.0126531   0.101   0.9198  
## diff1_M2S_TS.l3 -0.5153169  0.2318406  -2.223   0.0276 *
## train.IR_TS.l3   0.0028730  0.0047723   0.602   0.5480  
## diff1_M2V_TS.l3 -0.4491797  0.2626114  -1.710   0.0891 .
## diff1_GDP_TS.l4  0.5039694  0.3085488   1.633   0.1043  
## diff1_CPI_TS.l4  0.0046200  0.0120257   0.384   0.7014  
## diff1_M2S_TS.l4 -0.3509613  0.2224893  -1.577   0.1166  
## train.IR_TS.l4   0.0002646  0.0037152   0.071   0.9433  
## diff1_M2V_TS.l4 -0.2922288  0.2406203  -1.214   0.2263  
## const           -0.0038143  0.0027736  -1.375   0.1710  
## sd1              0.0039406  0.0032310   1.220   0.2244  
## sd2              0.0037599  0.0032600   1.153   0.2505  
## sd3              0.0031746  0.0032489   0.977   0.3300  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## 
## Residual standard error: 0.0152 on 162 degrees of freedom
## Multiple R-Squared: 0.1651,  Adjusted R-squared: 0.04652 
## F-statistic: 1.392 on 23 and 162 DF,  p-value: 0.1211 
## 
## 
## 
## Covariance matrix of residuals:
##              diff1_GDP_TS diff1_CPI_TS diff1_M2S_TS train.IR_TS diff1_M2V_TS
## diff1_GDP_TS    5.417e-05    4.084e-05   -6.490e-07   0.0005692    6.251e-05
## diff1_CPI_TS    4.084e-05    8.973e-03   -5.447e-04   0.0069623    4.305e-04
## diff1_M2S_TS   -6.490e-07   -5.447e-04    1.814e-04  -0.0026292   -1.571e-04
## train.IR_TS     5.692e-04    6.962e-03   -2.629e-03   0.1484768    3.329e-03
## diff1_M2V_TS    6.251e-05    4.305e-04   -1.571e-04   0.0033288    2.311e-04
## 
## Correlation matrix of residuals:
##              diff1_GDP_TS diff1_CPI_TS diff1_M2S_TS train.IR_TS diff1_M2V_TS
## diff1_GDP_TS     1.000000      0.05857    -0.006547      0.2007       0.5586
## diff1_CPI_TS     0.058571      1.00000    -0.426858      0.1907       0.2989
## diff1_M2S_TS    -0.006547     -0.42686     1.000000     -0.5066      -0.7674
## train.IR_TS      0.200703      0.19074    -0.506561      1.0000       0.5682
## diff1_M2V_TS     0.558614      0.29895    -0.767381      0.5682       1.0000

some variables have p-values less than 0.05, indicating that they are significant predictors for the respective equations. For example, in the equation for diff1_GDP_TS, the constant term is significant. In the equation for diff1_CPI_TS, the lagged diff1_CPI_TS.l1, diff1_CPI_TS.l3, train.IR_TS.l3, diff1_CPI_TS.l4, train.IR_TS.l4, diff1_M2V_TS.l4, and sd3 are significant predictors. Similarly, in the equation for diff1_M2S_TS, the lagged diff1_GDP_TS.l1, train.IR_TS.l1, diff1_M2V_TS.l1, and diff1_M2S_TS.l3 are significant predictors.

In terms of model fit, the model explains around 23.5% of the variation in diff1_GDP_TS, 43.37% of the variation in diff1_CPI_TS, and 37.97% of the variation in diff1_M2S_TS, as indicated by the respective R-squared values.

diff1_GDP_TS equation: The lag 1 and lag 2 of diff1_GDP_TS seem to be insignificant, as their p-values are much greater than 0.05, meaning these variables aren’t strong predictors for diff1_GDP_TS. However, the constant term is statistically significant, implying that there’s a non-zero baseline value for diff1_GDP_TS even when all predictors are zero. The R-squared value is around 0.235, meaning the model explains about 23.5% of the variation in diff1_GDP_TS, which isn’t particularly high. This may suggest that there are other variables not included in the model that could be influential, or it could be the nature of the variable itself.

diff1_CPI_TS equation: Here, the lagged terms diff1_CPI_TS.l1, diff1_CPI_TS.l3, train.IR_TS.l3, diff1_CPI_TS.l4, train.IR_TS.l4, diff1_M2V_TS.l4, and sd3 are all statistically significant, as their p-values are less than 0.05. This means these variables are strong predictors for diff1_CPI_TS. The R-squared value here is 0.4337, meaning about 43.37% of the variation in diff1_CPI_TS can be explained by the model. While this is better than the model for diff1_GDP_TS, there’s still a significant portion of the variation left unexplained.

diff1_M2S_TS equation: The lagged terms diff1_GDP_TS.l1, train.IR_TS.l1, diff1_M2V_TS.l1, and diff1_M2S_TS.l3 are statistically significant, with p-values less than 0.05. These variables are strong predictors for diff1_M2S_TS. The R-squared value for this equation is 0.3797, meaning the model explains about 37.97% of the variation in diff1_M2S_TS. Again, this leaves a substantial portion of the variation unexplained.

Anlyze the result

# Extract residuals from your VAR model
residuals <- resid(var_model)

# Loop through each variable
for (i in 1:ncol(residuals)) {
  cat("\nVariable", colnames(residuals)[i], "\n")
  
  # Durbin Watson Test for first order autocorrelation
  print(durbinWatsonTest(residuals[,i]))
  
  # Ljung-Box test for higher order autocorrelation
  print(Box.test(residuals[,i], lag = selected_lag, type = "Ljung-Box"))
  
  # Shapiro-Wilk normality test
  print(shapiro.test(residuals[,i]))
}
## 
## Variable diff1_GDP_TS 
## [1] 1.972103
## 
##  Box-Ljung test
## 
## data:  residuals[, i]
## X-squared = 0.15491, df.AIC(n) = 4, p-value = 0.9972
## 
## 
##  Shapiro-Wilk normality test
## 
## data:  residuals[, i]
## W = 0.95976, p-value = 3.687e-05
## 
## 
## Variable diff1_CPI_TS 
## [1] 2.091993
## 
##  Box-Ljung test
## 
## data:  residuals[, i]
## X-squared = 1.7996, df.AIC(n) = 4, p-value = 0.7726
## 
## 
##  Shapiro-Wilk normality test
## 
## data:  residuals[, i]
## W = 0.94763, p-value = 2.463e-06
## 
## 
## Variable diff1_M2S_TS 
## [1] 2.02439
## 
##  Box-Ljung test
## 
## data:  residuals[, i]
## X-squared = 0.090305, df.AIC(n) = 4, p-value = 0.999
## 
## 
##  Shapiro-Wilk normality test
## 
## data:  residuals[, i]
## W = 0.7606, p-value = 4.071e-16
## 
## 
## Variable train.IR_TS 
## [1] 1.973166
## 
##  Box-Ljung test
## 
## data:  residuals[, i]
## X-squared = 3.7467, df.AIC(n) = 4, p-value = 0.4414
## 
## 
##  Shapiro-Wilk normality test
## 
## data:  residuals[, i]
## W = 0.74736, p-value < 2.2e-16
## 
## 
## Variable diff1_M2V_TS 
## [1] 1.97876
## 
##  Box-Ljung test
## 
## data:  residuals[, i]
## X-squared = 0.1241, df.AIC(n) = 4, p-value = 0.9982
## 
## 
##  Shapiro-Wilk normality test
## 
## data:  residuals[, i]
## W = 0.67227, p-value < 2.2e-16

In summary, the residuals of my model show no autocorrelation (which is good), but they are not normally distributed. This lack of normality might be a concern.

Forcasting

# predict the next 21 period
predictions <- predict(var_model, n.ahead = 21)

# plot the predicted values
plot(predictions)

# extract the predicted values

point_forecasts <- lapply(predictions$fcst, function(x) x[, "fcst"])
# combine the predicted values

predicted_values <- do.call(cbind, point_forecasts)

predicted_values <- as.data.frame(predicted_values)


PV <- 0.78518793 + cumsum(predicted_values$diff1_CPI_TS)*10

# Accuracy_df <- data.frame(var1 = Test$CPI, var2 = PV)

# ggplot(data = NULL, mapping = aes(), ..., environment = parent.frame())

Plot_df <- read.csv("D:/Study/Econometric/Assignment/Group Project/Dataset/df_train_moneysupply.csv")

ActualCPI <- as.matrix(Plot_df$CPI)

df1 <- data.frame(
  x = 1:length(ActualCPI),
  y = ActualCPI,
  category = "ActualCPI"
)

df2 <- data.frame(
  x = 1:length(PV),
  y = PV,
  category = "PredictCPI"
)


# Combine all dataframes
df <- rbind(df1, df2)

# Plot the line graph
PP <- ggplot(df, aes(x = x, y = y, color = category)) +
  geom_line() +
  labs(title = "Line Graph", x = "Quater", y = "CPIV")
PP

prediction_error <- abs(PV - Test$CPI)
accuracy205 <- mean(prediction_error / Test$CPI <= 0.05) * 100
accuracy21 <- mean(prediction_error / Test$CPI <= 0.1) * 100
accuracy22 <- mean(prediction_error / Test$CPI <= 0.2) * 100
accuracy23 <- mean(prediction_error / Test$CPI <= 0.3) * 100
accuracy24 <- mean(prediction_error / Test$CPI <= 0.4) * 100
accuracy25 <- mean(prediction_error / Test$CPI <= 0.5) * 100

cat("Predicting accuracy rateï¼", accuracy205, "%\n, when tolerance is +-5%")
## Predicting accuracy rateï¼ 14.28571 %
## , when tolerance is +-5%
predictions1 <- predict(var_model, newdata = Test)         # NO ERROR
predictions1
## $diff1_GDP_TS
##              fcst        lower      upper         CI
##  [1,] 0.006596405 -0.007828879 0.02102169 0.01442528
##  [2,] 0.006429573 -0.008557621 0.02141677 0.01498719
##  [3,] 0.007449880 -0.008202769 0.02310253 0.01565265
##  [4,] 0.006161897 -0.009783978 0.02210777 0.01594588
##  [5,] 0.005446987 -0.010680295 0.02157427 0.01612728
##  [6,] 0.005878494 -0.010362803 0.02211979 0.01624130
##  [7,] 0.006866974 -0.009400791 0.02313474 0.01626777
##  [8,] 0.006306062 -0.010003529 0.02261565 0.01630959
##  [9,] 0.005936195 -0.010386521 0.02225891 0.01632272
## [10,] 0.006120304 -0.010221127 0.02246173 0.01634143
## 
## $diff1_CPI_TS
##               fcst      lower     upper        CI
##  [1,]  0.003344210 -0.1823191 0.1890076 0.1856634
##  [2,]  0.045923853 -0.1510426 0.2428903 0.1969664
##  [3,]  0.045994131 -0.1638073 0.2557956 0.2098015
##  [4,]  0.008902849 -0.2138551 0.2316608 0.2227579
##  [5,]  0.034060342 -0.1982998 0.2664205 0.2323601
##  [6,]  0.006743408 -0.2292640 0.2427509 0.2360075
##  [7,] -0.002444287 -0.2399370 0.2350484 0.2374927
##  [8,] -0.001753510 -0.2425496 0.2390426 0.2407961
##  [9,]  0.015130858 -0.2272799 0.2575416 0.2424108
## [10,] -0.012028106 -0.2549397 0.2308834 0.2429116
## 
## $diff1_M2S_TS
##              fcst       lower      upper         CI
##  [1,] 0.005332765 -0.02106744 0.03173297 0.02640020
##  [2,] 0.015201973 -0.01368544 0.04408938 0.02888741
##  [3,] 0.009058840 -0.02024597 0.03836365 0.02930481
##  [4,] 0.006340319 -0.02344339 0.03612403 0.02978371
##  [5,] 0.007888453 -0.02227962 0.03805653 0.03016808
##  [6,] 0.010479049 -0.01981558 0.04077368 0.03029463
##  [7,] 0.008082089 -0.02228605 0.03845023 0.03036814
##  [8,] 0.007348004 -0.02307225 0.03776826 0.03042026
##  [9,] 0.006814159 -0.02364128 0.03726960 0.03045544
## [10,] 0.010812236 -0.01967040 0.04129487 0.03048264
## 
## $train.IR_TS
##              fcst      lower     upper        CI
##  [1,]  0.04773355 -0.7074934 0.8029605 0.7552269
##  [2,] -0.06420175 -1.0394276 0.9110241 0.9752258
##  [3,]  0.04412126 -1.1387304 1.2269729 1.1828516
##  [4,]  0.03746142 -1.3571336 1.4320564 1.3945950
##  [5,]  0.01088449 -1.5061324 1.5279014 1.5170169
##  [6,] -0.04620906 -1.6752569 1.5828388 1.6290479
##  [7,]  0.05088834 -1.6750033 1.7767800 1.7258916
##  [8,]  0.07573957 -1.7377993 1.8892784 1.8135388
##  [9,]  0.06192023 -1.8300926 1.9539331 1.8920129
## [10,] -0.01383264 -1.9789300 1.9512648 1.9650974
## 
## $diff1_M2V_TS
##               fcst       lower      upper         CI
##  [1,] -0.001230218 -0.03102837 0.02856794 0.02979815
##  [2,] -0.008111462 -0.03890984 0.02268691 0.03079838
##  [3,] -0.002765513 -0.03417279 0.02864177 0.03140728
##  [4,] -0.001751668 -0.03350027 0.02999694 0.03174861
##  [5,] -0.002848360 -0.03483210 0.02913538 0.03198374
##  [6,] -0.005838095 -0.03789261 0.02621642 0.03205451
##  [7,] -0.001822202 -0.03389608 0.03025168 0.03207388
##  [8,] -0.002070286 -0.03414971 0.03000914 0.03207943
##  [9,] -0.002169913 -0.03426303 0.02992320 0.03209312
## [10,] -0.005224825 -0.03734637 0.02689672 0.03212155
plot(predictions1)