panel vector autoregression model with R

1 Dynamic panel estimation with panelvar
- 1.1 abdata数据集
2 Panel vector autoregression with panelvar
3 自己的数据集

1 Dynamic panel estimation with panelvar

library(pacman)
p_load(tidyverse,panelvar)

1.1 abdata数据集

data("abdata")
abdata %>% head(10)

##      c1 ind year    emp    wage     cap indoutpt        n        w          k
## 1   1-1   7 1977  5.041 13.1516  0.5894  95.7072 1.617604 2.576543 -0.5286502
## 2   2-1   7 1978  5.600 12.3018  0.6318  97.3569 1.722767 2.509746 -0.4591824
## 3   3-1   7 1979  5.015 12.8395  0.6771  99.6083 1.612433 2.552526 -0.3899363
## 4   4-1   7 1980  4.715 13.8039  0.6171 100.5501 1.550749 2.624951 -0.4827242
## 5   5-1   7 1981  4.093 14.2897  0.5076  99.5581 1.409278 2.659539 -0.6780615
## 6   6-1   7 1982  3.166 14.8681  0.4229  98.6151 1.152469 2.699218 -0.8606195
## 7   7-1   7 1983  2.936 13.7784  0.3920 100.0301 1.077048 2.623102 -0.9364935
## 8   8-1   7 1977 71.319 14.7909 16.9363  95.7072 4.267163 2.694012  2.8294592
## 9   9-1   7 1978 70.643 14.1036 17.2422  97.3569 4.257639 2.646430  2.8473599
## 10 10-1   7 1979 70.918 14.9534 17.5413  99.6083 4.261524 2.704939  2.8645580
##          ys rec yearm1 id      nL1      nL2      wL1        kL1        kL2
## 1  4.561294   1   1977  1       NA       NA       NA         NA         NA
## 2  4.578383   2   1977  1 1.617604       NA 2.576543 -0.5286502         NA
## 3  4.601245   3   1978  1 1.722767 1.617604 2.509746 -0.4591824 -0.5286502
## 4  4.610656   4   1979  1 1.612433 1.722767 2.552526 -0.3899363 -0.4591824
## 5  4.600741   5   1980  1 1.550749 1.612433 2.624951 -0.4827242 -0.3899363
## 6  4.591224   6   1981  1 1.409278 1.550749 2.659539 -0.6780615 -0.4827242
## 7  4.605471   7   1982  1 1.152469 1.409278 2.699218 -0.8606195 -0.6780615
## 8  4.561294   8   1983  2       NA       NA       NA         NA         NA
## 9  4.578383   9   1977  2 4.267163       NA 2.694012  2.8294592         NA
## 10 4.601245  10   1978  2 4.257639 4.267163 2.646430  2.8473599  2.8294592
##        ysL1     ysL2 yr1976 yr1977 yr1978 yr1979 yr1980 yr1981 yr1982 yr1983
## 1        NA       NA      0      1      0      0      0      0      0      0
## 2  4.561294       NA      0      0      1      0      0      0      0      0
## 3  4.578383 4.561294      0      0      0      1      0      0      0      0
## 4  4.601245 4.578383      0      0      0      0      1      0      0      0
## 5  4.610656 4.601245      0      0      0      0      0      1      0      0
## 6  4.600741 4.610656      0      0      0      0      0      0      1      0
## 7  4.591224 4.600741      0      0      0      0      0      0      0      1
## 8        NA       NA      0      1      0      0      0      0      0      0
## 9  4.561294       NA      0      0      1      0      0      0      0      0
## 10 4.578383 4.561294      0      0      0      1      0      0      0      0
##    yr1984
## 1       0
## 2       0
## 3       0
## 4       0
## 5       0
## 6       0
## 7       0
## 8       0
## 9       0
## 10      0

abdata %>% glimpse()

## Observations: 1,031
## Variables: 30
## $ c1       <chr> "1-1", "2-1", "3-1", "4-1", "5-1", "6-1", "7-1", "8-1", "9...
## $ ind      <dbl> 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7...
## $ year     <fct> 1977, 1978, 1979, 1980, 1981, 1982, 1983, 1977, 1978, 1979...
## $ emp      <dbl> 5.041, 5.600, 5.015, 4.715, 4.093, 3.166, 2.936, 71.319, 7...
## $ wage     <dbl> 13.1516, 12.3018, 12.8395, 13.8039, 14.2897, 14.8681, 13.7...
## $ cap      <dbl> 0.5894, 0.6318, 0.6771, 0.6171, 0.5076, 0.4229, 0.3920, 16...
## $ indoutpt <dbl> 95.7072, 97.3569, 99.6083, 100.5501, 99.5581, 98.6151, 100...
## $ n        <dbl> 1.617604, 1.722767, 1.612433, 1.550749, 1.409278, 1.152469...
## $ w        <dbl> 2.576543, 2.509746, 2.552526, 2.624951, 2.659539, 2.699218...
## $ k        <dbl> -0.5286502, -0.4591824, -0.3899363, -0.4827242, -0.6780615...
## $ ys       <dbl> 4.561294, 4.578383, 4.601245, 4.610656, 4.600741, 4.591224...
## $ rec      <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,...
## $ yearm1   <dbl> 1977, 1977, 1978, 1979, 1980, 1981, 1982, 1983, 1977, 1978...
## $ id       <dbl> 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3...
## $ nL1      <dbl> NA, 1.617604, 1.722767, 1.612433, 1.550749, 1.409278, 1.15...
## $ nL2      <dbl> NA, NA, 1.617604, 1.722767, 1.612433, 1.550749, 1.409278, ...
## $ wL1      <dbl> NA, 2.576543, 2.509746, 2.552526, 2.624951, 2.659539, 2.69...
## $ kL1      <dbl> NA, -0.5286502, -0.4591824, -0.3899363, -0.4827242, -0.678...
## $ kL2      <dbl> NA, NA, -0.5286502, -0.4591824, -0.3899363, -0.4827242, -0...
## $ ysL1     <dbl> NA, 4.561294, 4.578383, 4.601245, 4.610656, 4.600741, 4.59...
## $ ysL2     <dbl> NA, NA, 4.561294, 4.578383, 4.601245, 4.610656, 4.600741, ...
## $ yr1976   <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...
## $ yr1977   <int> 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0...
## $ yr1978   <int> 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0...
## $ yr1979   <int> 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0...
## $ yr1980   <int> 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0...
## $ yr1981   <int> 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0...
## $ yr1982   <int> 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1...
## $ yr1983   <int> 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0...
## $ yr1984   <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...

Arellano_Bond_1991_table4b <- pvargmm(
  dependent_vars = c("n"),
  lags = 2, # Number of lags of dependent variables
  exog_vars = c(
    "w",
    "wL1",
    "k",
    "ys",
    "ysL1",
    "yr1979",
    "yr1980",
    "yr1981",
    "yr1982",
    "yr1983",
    "yr1984"
  ),
  transformation = "fd",  #     First-difference "fd" or forward orthogonal deviations "fod"
  data = abdata,
  panel_identifier = c("id", "year"),  # Vector of panel identifiers
  steps = c("twostep"),   # "onestep", "twostep" or "mstep" estimation
  system_instruments = FALSE,  #    System GMM estimator
  max_instr_dependent_vars = 99, #  Maximum number of instruments for dependent variables
  min_instr_dependent_vars = 2L, # Minimum number of instruments for dependent variables
  collapse = FALSE
)
summary(Arellano_Bond_1991_table4b)

Dynamic Panel VAR estimation, two-step GMM

Transformation: First-differences
Group variable: id
Time variable: year
Number of observations = 611
Number of groups = 140
Obs per group: min = 4
Obs per group: avg = 4.36428571428571
Obs per group: max = 6
Number of instruments = 38

<!DOCTYPE HTML PUBLIC “-//W3C//DTD HTML 4.01 Transitional//EN” “http://www.w3.org/TR/html4/loose.dtd”>

	n
lag1_n	0.4742^*
	(0.1854)
lag2_n	-0.0530
	(0.0517)
w	-0.5132^***
	(0.1456)
wL1	0.2246
	(0.1419)
k	0.2927^***
	(0.0626)
ys	0.6098^***
	(0.1563)
ysL1	-0.4464^*
	(0.2173)
yr1979	0.0105
	(0.0099)
yr1980	0.0247
	(0.0158)
yr1981	-0.0158
	(0.0267)
yr1982	-0.0374
	(0.0300)
yr1983	-0.0393
	(0.0347)
yr1984	-0.0495
	(0.0349)
p < 0.001, p < 0.01, p < 0.05

Instruments for equation

Standard
w, wL1, k, ys, ysL1, yr1979, yr1980, yr1981, yr1982, yr1983, yr1984

GMM-type
Dependent vars: L(2,6))
Collapse = FALSE

Hansen test of overid. restrictions: chi2(25) = 30.11 Prob > chi2 = 0.22
(Robust, but weakened by many instruments.)

Next, we look at two important options for any GMM estimation: (1) collapse = TRUE, and (2) transformation = ”fod”, the forward orthogonal transformation. We begin with the code for the estimation in R.

Arellano_Bond_1991_table4b_coll_fod <-
  pvargmm(
    dependent_vars = c("n"),
    lags = 2,
    exog_vars = c(
      "w",
      "wL1",
      "k",
      "ys",
      "ysL1",
      "yr1979",
      "yr1980",
      "yr1981",
      "yr1982",
      "yr1983",
      "yr1984"
    ),
    transformation = "fod",
    data = abdata,
    panel_identifier = c("id", "year"),
    steps = c("twostep"),
    system_instruments = FALSE,
    max_instr_dependent_vars = 99,
    max_instr_predet_vars = 99,
    min_instr_dependent_vars = 2L,
    min_instr_predet_vars = 1L,
    collapse = TRUE
  )
summary(Arellano_Bond_1991_table4b_coll_fod)

Dynamic Panel VAR estimation, two-step GMM

Transformation: Forward orthogonal deviations
Group variable: id
Time variable: year
Number of observations = 611
Number of groups = 140
Obs per group: min = 4
Obs per group: avg = 4.36428571428571
Obs per group: max = 6
Number of instruments = 18

<!DOCTYPE HTML PUBLIC “-//W3C//DTD HTML 4.01 Transitional//EN” “http://www.w3.org/TR/html4/loose.dtd”>

	n
lag1_n	1.3783^**
	(0.4523)
lag2_n	-0.2526^**
	(0.0955)
w	-0.5626^**
	(0.2036)
wL1	0.5399
	(0.4064)
k	0.0966
	(0.1482)
ys	0.5777^*
	(0.2454)
ysL1	-0.8983^*
	(0.4463)
yr1979	0.0134
	(0.0133)
yr1980	0.0130
	(0.0202)
yr1981	-0.0403
	(0.0262)
yr1982	-0.0358
	(0.0238)
yr1983	-0.0149
	(0.0304)
yr1984	-0.0249
	(0.0260)
p < 0.001, p < 0.01, p < 0.05

Instruments for equation

Standard
w, wL1, k, ys, ysL1, yr1979, yr1980, yr1981, yr1982, yr1983, yr1984

GMM-type
Dependent vars: L(2,6))
Collapse = TRUE

Hansen test of overid. restrictions: chi2(5) = 7.79 Prob > chi2 = 0.168
(Robust, but weakened by many instruments.)

\[ \begin{aligned} \log _{-} e m p_{i, t}=& \log _{-} e m p_{i, t-1}+\log _{-} w a g e_{i, t}+\log _{-} w a g e_{i, t-1} + \log _{-}c a p i t a l_{i, t}+\log _{-}c a p i t a l_{i, t-1}+\sum_{t=2}^{T} y e a r_{t} \end{aligned} \]

Blundell_Bond_1998_table4 <- pvargmm(
  dependent_vars = c("n"),
  lags = 1,
  predet_vars = c("w", "wL1", "k", "kL1"),  # Predetermined variables
  exog_vars = c(
    "yr1978",
    "yr1979",
    "yr1980",
    "yr1981",
    "yr1982",
    "yr1983",
    "yr1984"
  ),
  transformation = "fd",
  data = abdata,
  panel_identifier = c("id", "year"),
  steps = c("onestep"),
  system_instruments = TRUE,
  max_instr_dependent_vars = 99,
  max_instr_predet_vars = 99,
  min_instr_dependent_vars = 2L,
  min_instr_predet_vars = 1L,
  collapse = FALSE
)

## Warning in pvargmm(dependent_vars = c("n"), lags = 1, predet_vars = c("w", : The
## matrix Lambda is singular, therefore the general inverse is used

summary(Blundell_Bond_1998_table4)

## Warning in hansen_j_test.pvargmm(object): Hansen J test for the first step makes
## no sense. But still is mathematically possible

Dynamic Panel VAR estimation, one-step GMM

Transformation: First-differences
Group variable: id
Time variable: year
Number of observations = 751
Number of groups = 140
Obs per group: min = 5
Obs per group: avg = 5.36428571428571
Obs per group: max = 7
Number of instruments = 215

<!DOCTYPE HTML PUBLIC “-//W3C//DTD HTML 4.01 Transitional//EN” “http://www.w3.org/TR/html4/loose.dtd”>

	n
lag1_n	0.9288^***
	(0.0257)
w	-0.5293^**
	(0.1655)
wL1	0.2883^*
	(0.1370)
k	0.3811^***
	(0.0625)
kL1	-0.3220^***
	(0.0631)
yr1978	-0.0052
	(0.0180)
yr1979	0.0022
	(0.0207)
yr1980	-0.0180
	(0.0217)
yr1981	-0.0542
	(0.0284)
yr1982	-0.0219
	(0.0288)
yr1983	-0.0054
	(0.0252)
yr1984	-0.0137
	(0.0293)
const	0.8458^***
	(0.2252)
p < 0.001, p < 0.01, p < 0.05

Instruments for equation

Standard
yr1978, yr1979, yr1980, yr1981, yr1982, yr1983, yr1984

GMM-type
Dependent vars: L(2,7)
Predet vars: L(1, 7)
Collapse = FALSE

Hansen test of overid. restrictions: chi2(202) = 2.67 Prob > chi2 = 1
(Robust, but weakened by many instruments.)

2 Panel vector autoregression with panelvar

2.1 Dahlberg数据集

Dahlberg %>% head()

##    id year expenditures  revenues    grants
## 1 114 1979    0.0229736 0.0181770 0.0054429
## 2 114 1980    0.0266307 0.0209142 0.0057304
## 3 114 1981    0.0273253 0.0210836 0.0056647
## 4 114 1982    0.0288704 0.0234310 0.0058859
## 5 114 1983    0.0226474 0.0179979 0.0055908
## 6 114 1984    0.0215601 0.0179949 0.0047536

Dahlberg %>% class()

## [1] "data.frame"

Dahlberg %>% glimpse()

## Observations: 2,385
## Variables: 5
## $ id           <fct> 114, 114, 114, 114, 114, 114, 114, 114, 114, 115, 115,...
## $ year         <fct> 1979, 1980, 1981, 1982, 1983, 1984, 1985, 1986, 1987, ...
## $ expenditures <dbl> 0.0229736, 0.0266307, 0.0273253, 0.0288704, 0.0226474,...
## $ revenues     <dbl> 0.0181770, 0.0209142, 0.0210836, 0.0234310, 0.0179979,...
## $ grants       <dbl> 0.0054429, 0.0057304, 0.0056647, 0.0058859, 0.0055908,...

ex1_dahlberg_data <-
  pvargmm(
    dependent_vars = c("expenditures", "revenues", "grants"),
    lags = 1,
    transformation = "fod",
    data = Dahlberg,
    panel_identifier = c("id", "year"),
    steps = c("twostep"),
    system_instruments = FALSE,  # System GMM estimator
    max_instr_dependent_vars = 99,
    max_instr_predet_vars = 99,
    min_instr_dependent_vars = 2L,
    min_instr_predet_vars = 1L,
    collapse = FALSE
  )

## Warning in pvargmm(dependent_vars = c("expenditures", "revenues", "grants"), :
## The matrix D_e is singular, therefore the general inverse is used

summary(ex1_dahlberg_data)

Dynamic Panel VAR estimation, two-step GMM

Transformation: Forward orthogonal deviations
Group variable: id
Time variable: year
Number of observations = 1855
Number of groups = 265
Obs per group: min = 7
Obs per group: avg = 7
Obs per group: max = 7
Number of instruments = 252

<!DOCTYPE HTML PUBLIC “-//W3C//DTD HTML 4.01 Transitional//EN” “http://www.w3.org/TR/html4/loose.dtd”>

	expenditures	revenues	grants
lag1_expenditures	0.2846^***	0.2583^**	0.0167
	(0.0664)	(0.0795)	(0.0172)
lag1_revenues	-0.0470	0.0588	-0.0405^**
	(0.0637)	(0.0726)	(0.0151)
lag1_grants	-1.6746^***	-2.2367^***	0.3204^***
	(0.2818)	(0.2846)	(0.0521)
p < 0.001, p < 0.01, p < 0.05

Instruments for equation

Standard

GMM-type
Dependent vars: L(2,7))
Collapse = FALSE

Hansen test of overid. restrictions: chi2(243) = 263.01 Prob > chi2 = 0.18
(Robust, but weakened by many instruments.)

2.2 select the optimal lag length

With the following sequence of commands, we show how to use the model selection procedure of Andrews and Lu (2001) to select the optimal lag length for our example.

Andrews_Lu_MMSC(ex1_dahlberg_data)

## $MMSC_BIC
## [1] -1610.877
## 
## $MMSC_AIC
## [1] -234.9924
## 
## $MMSC_HQIC
## [1] -792.3698

改变滞后阶数为2

ex2_dahlberg_data <-
  pvargmm(
    dependent_vars = c("expenditures", "revenues", "grants"),
    lags = 2,
    transformation = "fod",
    data = Dahlberg,
    panel_identifier = c("id", "year"),
    steps = c("twostep"),
    system_instruments = FALSE,
    max_instr_dependent_vars = 99,
    max_instr_predet_vars = 99,
    min_instr_dependent_vars = 2L,
    min_instr_predet_vars = 1L,
    collapse = FALSE
  )

## Warning in pvargmm(dependent_vars = c("expenditures", "revenues", "grants"), :
## The matrix D_e is singular, therefore the general inverse is used

summary(ex2_dahlberg_data)

Dynamic Panel VAR estimation, two-step GMM

Transformation: Forward orthogonal deviations
Group variable: id
Time variable: year
Number of observations = 1590
Number of groups = 265
Obs per group: min = 6
Obs per group: avg = 6
Obs per group: max = 6
Number of instruments = 243

<!DOCTYPE HTML PUBLIC “-//W3C//DTD HTML 4.01 Transitional//EN” “http://www.w3.org/TR/html4/loose.dtd”>

	expenditures	revenues	grants
lag1_expenditures	0.2572^**	0.2486^*	0.0135
	(0.0893)	(0.1018)	(0.0178)
lag1_revenues	-0.1219	-0.0634	-0.0293
	(0.0879)	(0.0997)	(0.0171)
lag1_grants	-3.0718^***	-3.5849^***	0.1581^*
	(0.5941)	(0.6168)	(0.0636)
lag2_expenditures	-0.0247	0.0252	0.0178
	(0.0791)	(0.0834)	(0.0157)
lag2_revenues	-0.2584^**	-0.2446^**	-0.0237
	(0.0785)	(0.0800)	(0.0157)
lag2_grants	-1.5265^***	-1.6687^***	0.0702
	(0.1873)	(0.2113)	(0.0586)
p < 0.001, p < 0.01, p < 0.05

Instruments for equation

Standard

GMM-type
Dependent vars: L(2,6))
Collapse = FALSE

Hansen test of overid. restrictions: chi2(225) = 260.11 Prob > chi2 = 0.054
(Robust, but weakened by many instruments.)

Andrews_Lu_MMSC(ex2_dahlberg_data)

## $MMSC_BIC
## [1] -1486.935
## 
## $MMSC_AIC
## [1] -213.8917
## 
## $MMSC_HQIC
## [1] -734.1071

note: Lag = 3: BIC = -1301.451, AIC = -180.5911, HQIC = -643.3513, Lag = 4: BIC = -1098.728, AIC = -175.047, HQIC = -561.2191.

2.3 test the stability of the autoregressive process:

stab_ex1_dahlberg_data <- stability(ex1_dahlberg_data)
print(stab_ex1_dahlberg_data)

## Eigenvalue stability condition:
## 
##              Eigenvalue    Modulus
## 1 0.5370657+0.00000000i 0.53706574
## 2 0.0633651+0.06442426i 0.09036383
## 3 0.0633651-0.06442426i 0.09036383
## 
## All the eigenvalues lie inside the unit circle.
## PVAR satisfies stability condition.

2.4 generate a plot of the roots

plot(stab_ex1_dahlberg_data)

2.5 generate impulse response functions.

ex1_dahlberg_data_oirf <- oirf(ex1_dahlberg_data, n.ahead = 8)
ex1_dahlberg_data_oirf

## $expenditures
##      expenditures     revenues        grants
## [1,] 1.629811e-03 1.382710e-03  3.210533e-05
## [2,] 3.450912e-04 4.305316e-04 -1.855299e-05
## [3,] 1.090437e-04 1.559591e-04 -1.762940e-05
## [4,] 5.322461e-05 7.677025e-05 -1.014727e-05
## [5,] 2.853148e-05 4.095941e-05 -5.473222e-06
## [6,] 1.536008e-05 2.202059e-05 -2.936910e-06
## [7,] 8.254473e-06 1.183160e-05 -1.576796e-06
## [8,] 4.433541e-06 6.354806e-06 -8.467986e-07
## 
## $revenues
##      expenditures     revenues        grants
## [1,] 0.000000e+00 9.197397e-04 -1.127152e-04
## [2,] 1.455067e-04 3.061727e-04 -7.335906e-05
## [3,] 1.498647e-04 2.196692e-04 -3.347828e-05
## [4,] 8.838788e-05 1.265101e-04 -1.712503e-05
## [5,] 4.788573e-05 6.857463e-05 -9.137258e-06
## [6,] 2.570595e-05 3.683930e-05 -4.906684e-06
## [7,] 1.380089e-05 1.978127e-05 -2.635623e-06
## [8,] 7.411461e-06 1.062327e-05 -1.415560e-06
## 
## $grants
##       expenditures      revenues       grants
## [1,]  0.000000e+00  0.000000e+00 3.364089e-04
## [2,] -5.633525e-04 -7.524337e-04 1.077837e-04
## [3,] -3.054546e-04 -4.308484e-04 5.561764e-05
## [4,] -1.598162e-04 -2.286388e-04 3.017794e-05
## [5,] -8.527172e-05 -1.222266e-04 1.626505e-05
## [6,] -4.576010e-05 -6.559428e-05 8.740177e-06
## [7,] -2.457614e-05 -3.522681e-05 4.694185e-06
## [8,] -1.319930e-05 -1.891931e-05 2.521064e-06
## 
## attr(,"class")
## [1] "pvaroirf"

ex1_dahlberg_data_girf <- girf(ex1_dahlberg_data, n.ahead = 8, ma_approx_steps= 8)
ex1_dahlberg_data_girf

## $expenditures
##      expenditures     revenues        grants
## [1,] 1.629811e-03 1.382710e-03  3.210533e-05
## [2,] 3.450912e-04 4.305316e-04 -1.855299e-05
## [3,] 1.090437e-04 1.559591e-04 -1.762940e-05
## [4,] 5.322461e-05 7.677025e-05 -1.014727e-05
## [5,] 2.853148e-05 4.095941e-05 -5.473222e-06
## [6,] 1.536008e-05 2.202059e-05 -2.936910e-06
## [7,] 8.254473e-06 1.183160e-05 -1.576796e-06
## [8,] 4.433541e-06 6.354806e-06 -8.467986e-07
## 
## $revenues
##      expenditures     revenues        grants
## [1,] 1.357020e-03 1.660665e-03 -3.569430e-05
## [2,] 3.679185e-04 5.280413e-04 -5.607673e-05
## [3,] 1.737932e-04 2.515165e-04 -3.322021e-05
## [4,] 9.326868e-05 1.339869e-04 -1.793336e-05
## [5,] 5.027695e-05 7.208305e-05 -9.617700e-06
## [6,] 2.702611e-05 3.873789e-05 -5.162852e-06
## [7,] 1.451633e-05 2.080690e-05 -2.772587e-06
## [8,] 7.796224e-06 1.117474e-05 -1.489056e-06
## 
## $grants
##       expenditures      revenues       grants
## [1,]  1.468834e-04 -1.663945e-04 3.562393e-04
## [2,] -5.469311e-04 -7.686221e-04 1.233228e-04
## [3,] -3.260415e-04 -4.623133e-04 6.152545e-05
## [4,] -1.740894e-04 -2.490209e-04 3.300197e-05
## [5,] -9.310485e-05 -1.334286e-04 1.775743e-05
## [6,] -4.996197e-05 -7.161442e-05 9.541455e-06
## [7,] -2.683082e-05 -3.845843e-05 5.124693e-06
## [8,] -1.440999e-05 -2.065467e-05 2.752299e-06
## 
## attr(,"class")
## [1] "pvargirf"

# 任何稳定的AR()模型都有一个无限的MA表示。因此，任何冲击都可以无限地模拟到未来。对于每一个预测步骤，你都需要一个附加的MA项。

2.6 bootstrap function to calculate confidence intervals

ex1_dahlberg_data_bs <-
  bootstrap_irf(
    ex1_dahlberg_data,
    typeof_irf = c("GIRF"),
    n.ahead = 8,
    nof_Nstar_draws = 10,
    confidence.band = 0.95
  )

2.7 plot the impulse response functions including the bootstrapped confidence intervals.

plot(ex1_dahlberg_data_girf, ex1_dahlberg_data_bs)

3 自己的数据集

3.1 宏观数据集

lunwen <- readxl::read_excel("综合数据.xls")
lunwen <- lunwen %>% 
  mutate(id = factor(id),
         year = factor(year)) %>% 
  as.data.frame()

lunwen %>% glimpse()

## Observations: 119
## Variables: 9
## $ id                <fct> 广州, 广州, 广州, 广州, 广州, 广州, 广州, 广州, 广州, 广州, 广州, 广州, 广...
## $ year              <fct> 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2...
## $ 地区生产总值      <dbl> 2841.651, 3203.962, 3758.617, 4450.550, 5154.228, 6081....
## $ 人均地区生产总值  <dbl> 28537.08, 32338.59, 38398.49, 45905.86, 53809.27, 62495.3...
## $ `R&D活动人员`     <dbl> 17461, 20312, 16345, 16628, 19391, 23587, 36888, 3653...
## $ `R&D经费内部支出` <dbl> 185199.0, 231294.0, 191207.0, 271693.0, 341674.0, 52145...
## $ 新产品销售收入    <dbl> 2253913, 2291238, 5514371, 7053357, 6878954, 9236206, 11...
## $ 专利申请数量      <dbl> 4998, 6288, 8206, 8230, 11012, 12296, 12495, 13990, 165...
## $ FDI               <dbl> 300119, 228386, 258075, 240065, 260851, 292273, 3...

lunwen %>% head()

##     id year 地区生产总值 人均地区生产总值 R&D活动人员 R&D经费内部支出
## 1 广州 2001     2841.651         28537.08       17461        185199.0
## 2 广州 2002     3203.962         32338.59       20312        231294.0
## 3 广州 2003     3758.617         38398.49       16345        191207.0
## 4 广州 2004     4450.550         45905.86       16628        271693.0
## 5 广州 2005     5154.228         53809.27       19391        341674.0
## 6 广州 2006     6081.861         62495.36       23587        521452.5
##   新产品销售收入 专利申请数量    FDI
## 1        2253913         4998 300119
## 2        2291238         6288 228386
## 3        5514371         8206 258075
## 4        7053357         8230 240065
## 5        6878954        11012 260851
## 6        9236206        12296 292273

ex1_lunwen_data <-
  pvargmm(
    dependent_vars = c("地区生产总值", "R&D经费内部支出", "FDI","新产品销售收入"),
    lags = 1,
    transformation = "fod",
    data = lunwen,
    panel_identifier = c("id", "year"),
    steps = c("twostep"),
    system_instruments = FALSE,  # System GMM estimator
    max_instr_dependent_vars = 99,
    max_instr_predet_vars = 99,
    min_instr_dependent_vars = 2L,
    min_instr_predet_vars = 1L,
    collapse = FALSE
  )

## Warning in pvargmm(dependent_vars = c("地区生产总值", "R&D经费内部支出", : The
## matrix Lambda is singular, therefore the general inverse is used

## Warning in pvargmm(dependent_vars = c("地区生产总值", "R&D经费内部支出", : The
## matrix D_e is singular, therefore the general inverse is used

summary(ex1_dahlberg_data)

Dynamic Panel VAR estimation, two-step GMM

<!DOCTYPE HTML PUBLIC “-//W3C//DTD HTML 4.01 Transitional//EN” “http://www.w3.org/TR/html4/loose.dtd”>

	expenditures	revenues	grants
lag1_expenditures	0.2846^***	0.2583^**	0.0167
	(0.0664)	(0.0795)	(0.0172)
lag1_revenues	-0.0470	0.0588	-0.0405^**
	(0.0637)	(0.0726)	(0.0151)
lag1_grants	-1.6746^***	-2.2367^***	0.3204^***
	(0.2818)	(0.2846)	(0.0521)
p < 0.001, p < 0.01, p < 0.05

Instruments for equation

Standard

GMM-type
Dependent vars: L(2,7))
Collapse = FALSE

Hansen test of overid. restrictions: chi2(243) = 263.01 Prob > chi2 = 0.18
(Robust, but weakened by many instruments.)

3.2 select the optimal lag length

With the following sequence of commands, we show how to use the model selection procedure of Andrews and Lu (2001) to select the optimal lag length for our example.

Andrews_Lu_MMSC(ex1_lunwen_data)

## $MMSC_BIC
## [1] -8912.059
## 
## $MMSC_AIC
## [1] -3827.071
## 
## $MMSC_HQIC
## [1] -6182.236

改变滞后阶数为2

ex2_lunwen_data <-
  pvargmm(
    dependent_vars = c("地区生产总值", "R&D经费内部支出", "FDI","新产品销售收入","专利申请数量"),
    lags = 2,
    transformation = "fod",
    data = lunwen,
    panel_identifier = c("id", "year"),
    steps = c("twostep"),
    system_instruments = FALSE,
    max_instr_dependent_vars = 99,
    max_instr_predet_vars = 99,
    min_instr_dependent_vars = 2L,
    min_instr_predet_vars = 1L,
    collapse = FALSE
  )
summary(ex2_lunwen_data)
Andrews_Lu_MMSC(ex2_lunwen_data)

note: Lag = 3: BIC = -1301.451, AIC = -180.5911, HQIC = -643.3513, Lag = 4: BIC = -1098.728, AIC = -175.047, HQIC = -561.2191.

3.3 test the stability of the autoregressive process:

stab_ex1_lunwen_data <- stability(ex1_lunwen_data)
print(stab_ex1_lunwen_data)

## Eigenvalue stability condition:
## 
##                 Eigenvalue      Modulus
## 1  0.4530173995+0.4010795i 0.6050533002
## 2  0.4530173995-0.4010795i 0.6050533002
## 3  0.0011187579+0.0000000i 0.0011187579
## 4 -0.0007473146+0.0000000i 0.0007473146
## 
## All the eigenvalues lie inside the unit circle.
## PVAR satisfies stability condition.

3.4 generate a plot of the roots

plot(stab_ex1_lunwen_data)

3.5 generate impulse response functions.

ex1_lunwen_data_oirf <- oirf(ex1_lunwen_data, n.ahead = 8)
ex1_lunwen_data_oirf

## $地区生产总值
##      地区生产总值 R&D经费内部支出        FDI 新产品销售收入
## [1,]    3656894.9     -1679306.44 -4437868.7      -877608.0
## [2,]    -251957.1      -112356.83   420206.4     -8984935.2
## [3,]   -2242338.2       451764.17  2649934.6     -7828662.8
## [4,]   -1942069.1       450428.29  2247363.9     -3803756.1
## [5,]    -938687.3       242717.53  1066077.3      -580344.2
## [6,]    -139512.3        55013.46   143166.8       866703.1
## [7,]     217240.6       -39012.23  -260565.6       997721.1
## [8,]     247901.5       -55486.29  -288493.3       586679.1
## 
## $`R&D经费内部支出`
##      地区生产总值 R&D经费内部支出        FDI 新产品销售收入
## [1,]        0.000      270422.554   33324.19     -618699.27
## [2,]  -149702.661       52349.303  163290.07      546366.13
## [3,]   137323.513      -22768.891 -166234.69      725841.59
## [4,]   180467.721      -39786.223 -210507.31      457621.34
## [5,]   113238.030      -27712.224 -129870.47      148897.88
## [6,]    36530.260      -10542.921  -40602.65      -32623.71
## [7,]    -8357.567         592.901   10756.80      -84068.16
## [8,]   -20945.591        4396.841   24610.24      -64225.48
## 
## $FDI
##      地区生产总值 R&D经费内部支出        FDI 新产品销售收入
## [1,]       0.0000         0.00000 60583.8207    -61690.0719
## [2,]  -15319.0003      3551.29557 17531.5452    -28069.3811
## [3,]   -6922.0984      1814.58267  7841.5313     -3034.7325
## [4,]    -717.9253       344.03782   687.5700      7526.2775
## [5,]    1883.6278      -352.59057 -2247.7277      7930.0528
## [6,]    1969.4572      -445.40796 -2288.2317      4429.6127
## [7,]    1094.8204      -274.47541 -1250.3480      1110.2742
## [8,]     270.9478       -85.62509  -295.1612      -615.6876
## 
## $新产品销售收入
##      地区生产总值 R&D经费内部支出         FDI 新产品销售收入
## [1,]         0.00            0.00        0.00     3623649.05
## [2,]    903757.65      -184533.87 -1067379.66     3046323.70
## [3,]    755520.43      -176175.59  -873526.08     1432174.61
## [4,]    353290.55       -92067.25  -400657.88      182372.22
## [5,]     43505.24       -18920.11   -43221.17     -359068.51
## [6,]    -89918.70        16562.58   107516.76     -392093.11
## [7,]    -97396.28        21932.72   113236.74     -223798.80
## [8,]    -55326.13        13808.43    63235.67      -59228.33
## 
## attr(,"class")
## [1] "pvaroirf"

ex1_lunwen_data_girf <- girf(ex1_lunwen_data, n.ahead = 8, ma_approx_steps= 8)
ex1_lunwen_data_girf %>% plot()

# 任何稳定的AR()模型都有一个无限的MA表示。因此，任何冲击都可以无限地模拟到未来。对于每一个预测步骤，你都需要一个附加的MA项。

3.6 bootstrap function to calculate confidence intervals

ex1_lunwen_data_bs <-
  bootstrap_irf(
    ex1_lunwen_data,
    # typeof_irf = c("GIRF"),
    n.ahead = 8,
    nof_Nstar_draws = 5,
    confidence.band = 0.95
  )

3.7 plot the impulse response functions including the bootstrapped confidence intervals.

plot(ex1_lunwen_data_girf, ex1_lunwen_data_bs)

panel vector autoregression model with R

LJJ

2020/2/15

1 Dynamic panel estimation with panelvar

1.1 abdata数据集

2 Panel vector autoregression with panelvar

2.1 Dahlberg数据集

2.2 select the optimal lag length

2.3 test the stability of the autoregressive process:

2.4 generate a plot of the roots

2.5 generate impulse response functions.

2.6 bootstrap function to calculate confidence intervals

2.7 plot the impulse response functions including the bootstrapped confidence intervals.

3 自己的数据集

3.1 宏观数据集

3.2 select the optimal lag length

3.3 test the stability of the autoregressive process:

3.4 generate a plot of the roots

3.5 generate impulse response functions.

3.6 bootstrap function to calculate confidence intervals

3.7 plot the impulse response functions including the bootstrapped confidence intervals.