例3.1 考虑一个标准的回归模型。
set.seed(101)
n <- 500
spureg01 <- function(n=500){
  x <- seq(n)
  eps <- rnorm(n)*50
  y <- 2 * x + eps
  lmr <- lm(y ~ x)
  summary(lmr) |> print()
  plot(x, y)
  abline(lmr, col="red")
  
  invisible(list(x=x, y=y))
}
spureg01(n)
## 
## Call:
## lm(formula = y ~ x)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -153.870  -33.591   -0.772   32.220  134.560 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.74231    4.32775   0.172    0.864    
## x            1.98451    0.01497 132.572   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 48.31 on 498 degrees of freedom
## Multiple R-squared:  0.9724, Adjusted R-squared:  0.9724 
## F-statistic: 1.758e+04 on 1 and 498 DF,  p-value: < 2.2e-16

##### 例3.2 考虑回归误差项为AR(1)序列的情形。

set.seed(101)
spureg02 <- function(n=500){
  x <- seq(n)
  eps <- arima.sim(
    model=list(ar=c(0.7)), n=n)*36
  y <- 2 * x + eps
  lmr <- lm(y ~ x)
  summary(lmr) |> print()
  plot(x, y)
  abline(lmr, col="red")
  
  invisible(list(x=x, y=y))
}
sim2 <- spureg02(n)
## 
## Call:
## lm(formula = y ~ x)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -137.413  -34.282    0.096   35.279  130.342 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.64699    4.36336  -0.148    0.882    
## x            1.96993    0.01509 130.524   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 48.71 on 498 degrees of freedom
## Multiple R-squared:  0.9716, Adjusted R-squared:  0.9715 
## F-statistic: 1.704e+04 on 1 and 498 DF,  p-value: < 2.2e-16

- 应该用同时估计回归与ARMA误差项的方法:

arima(sim2$y, order=c(1,0,0), xreg = sim2$x)
## 
## Call:
## arima(x = sim2$y, order = c(1, 0, 0), xreg = sim2$x)
## 
## Coefficients:
##          ar1  intercept  sim2$x
##       0.7018    -1.4932  1.9723
## s.e.  0.0317    10.2735  0.0355
## 
## sigma^2 estimated as 1197:  log likelihood = -2481.72,  aic = 4971.44
例3.3 考虑回归误差项为I(1)序列的情形。
set.seed(101)
spureg03 <- function(n=500){
  x <- seq(n)
  eps <- cumsum(rnorm(n))*8
  y <- 2 * x + eps
  lmr <- lm(y ~ x)
  summary(lmr) |> print()
  plot(x, y)
  abline(lmr, col="red")
  
  invisible(list(x=x, y=y))
}
spureg03(n)
## 
## Call:
## lm(formula = y ~ x)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -101.312  -21.701    0.489   21.568   92.563 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1.78482    3.31790   0.538    0.591    
## x            1.69680    0.01148 147.853   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 37.04 on 498 degrees of freedom
## Multiple R-squared:  0.9777, Adjusted R-squared:  0.9777 
## F-statistic: 2.186e+04 on 1 and 498 DF,  p-value: < 2.2e-16

- 如果\(x_t\)\(y_t\)都是I(1)序列但是独立, 回归结果也可能很显著。

spureg04 <- function(){
  set.seed(110)
  n <- 500
  x <- cumsum(rnorm(n))
  y <- cumsum(rnorm(n))*2
  lmr <- lm(y ~ x)
  summary(lmr) |> print()
  plot(x, y)
  abline(lmr, col="red")
  
  invisible(list(x=x, y=y, lmr=lmr))
}
sim4 <- spureg04()
## 
## Call:
## lm(formula = y ~ x)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -29.1795  -8.6775   0.0375   8.8982  25.6600 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  2.49701    0.81992   3.045  0.00245 ** 
## x            0.46871    0.02554  18.350  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 11.51 on 498 degrees of freedom
## Multiple R-squared:  0.4034, Adjusted R-squared:  0.4022 
## F-statistic: 336.7 on 1 and 498 DF,  p-value: < 2.2e-16

- 检验回归残差的序列相关性和单位根:

forecast::Acf(residuals(sim4$lmr))
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo

fUnitRoots::adfTest(residuals(sim4$lmr), lags=1, type="c")
## 
## Title:
##  Augmented Dickey-Fuller Test
## 
## Test Results:
##   PARAMETER:
##     Lag Order: 1
##   STATISTIC:
##     Dickey-Fuller: -1.3595
##   P VALUE:
##     0.5525 
## 
## Description:
##  Thu Jan  4 01:16:43 2024 by user: qaj
3.5
set.seed(101)
spureg05 <- function(n=500){
  z <- cumsum(rnorm(n))
  x <- z + rnorm(n)*2
  y <- 2*z + rnorm(n)*2
  lmr <- lm(y ~ x)
  summary(lmr) |> print()
  plot(x, y)
  abline(lmr, col="red")
  
  invisible(list(x=x, y=y, lmr=lmr))
}
sim5 <- spureg05(n)
## 
## Call:
## lm(formula = y ~ x)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -13.2598  -2.6583   0.1401   2.6900  11.8576 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -1.0736     0.3024   -3.55 0.000421 ***
## x             1.8743     0.0256   73.23  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.181 on 498 degrees of freedom
## Multiple R-squared:  0.915,  Adjusted R-squared:  0.9149 
## F-statistic:  5362 on 1 and 498 DF,  p-value: < 2.2e-16

- 这个回归估计的标准误差仍不可信, 但是回归残差已经基本平稳:

forecast::Acf(residuals(sim5$lmr))

- 单位根检验

fUnitRoots::adfTest(residuals(sim5$lmr), lags=1, type="c")
## Warning in fUnitRoots::adfTest(residuals(sim5$lmr), lags = 1, type = "c"):
## p-value smaller than printed p-value
## 
## Title:
##  Augmented Dickey-Fuller Test
## 
## Test Results:
##   PARAMETER:
##     Lag Order: 1
##   STATISTIC:
##     Dickey-Fuller: -15.6707
##   P VALUE:
##     0.01 
## 
## Description:
##  Thu Jan  4 01:16:44 2024 by user: qaj
例4.1
library(urca, quietly = TRUE)
data(Raotbl3)
ts.Raotbl3 <- ts(Raotbl3[,1:3], start=c(1966,4), frequency=4)
library(quantmod, quietly = TRUE)
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
plot(as.xts(ts.Raotbl3), type="l", 
     multi.panel=FALSE, theme="white",
     main="英国消费、收入、财富季度数据",
     major.ticks="years",
     grid.ticks.on = "years")

fUnitRoots::adfTest(Raotbl3$lc, lags=1, type="ct")
## 
## Title:
##  Augmented Dickey-Fuller Test
## 
## Test Results:
##   PARAMETER:
##     Lag Order: 1
##   STATISTIC:
##     Dickey-Fuller: -1.378
##   P VALUE:
##     0.8341 
## 
## Description:
##  Thu Jan  4 01:16:44 2024 by user: qaj
fUnitRoots::adfTest(Raotbl3$li, lags=1, type="ct")
## 
## Title:
##  Augmented Dickey-Fuller Test
## 
## Test Results:
##   PARAMETER:
##     Lag Order: 1
##   STATISTIC:
##     Dickey-Fuller: -2.0159
##   P VALUE:
##     0.57 
## 
## Description:
##  Thu Jan  4 01:16:44 2024 by user: qaj
fUnitRoots::adfTest(Raotbl3$lw, lags=1, type="ct")
## 
## Title:
##  Augmented Dickey-Fuller Test
## 
## Test Results:
##   PARAMETER:
##     Lag Order: 1
##   STATISTIC:
##     Dickey-Fuller: -1.0153
##   P VALUE:
##     0.9318 
## 
## Description:
##  Thu Jan  4 01:16:44 2024 by user: qaj
fUnitRoots::adfTest(diff(Raotbl3$lc), lags=1, type="c")
## Warning in fUnitRoots::adfTest(diff(Raotbl3$lc), lags = 1, type = "c"): p-value
## smaller than printed p-value
## 
## Title:
##  Augmented Dickey-Fuller Test
## 
## Test Results:
##   PARAMETER:
##     Lag Order: 1
##   STATISTIC:
##     Dickey-Fuller: -6.311
##   P VALUE:
##     0.01 
## 
## Description:
##  Thu Jan  4 01:16:44 2024 by user: qaj
fUnitRoots::adfTest(diff(Raotbl3$li), lags=1, type="c")
## Warning in fUnitRoots::adfTest(diff(Raotbl3$li), lags = 1, type = "c"): p-value
## smaller than printed p-value
## 
## Title:
##  Augmented Dickey-Fuller Test
## 
## Test Results:
##   PARAMETER:
##     Lag Order: 1
##   STATISTIC:
##     Dickey-Fuller: -7.0434
##   P VALUE:
##     0.01 
## 
## Description:
##  Thu Jan  4 01:16:44 2024 by user: qaj
fUnitRoots::adfTest(diff(Raotbl3$lw), lags=1, type="c")
## Warning in fUnitRoots::adfTest(diff(Raotbl3$lw), lags = 1, type = "c"): p-value
## smaller than printed p-value
## 
## Title:
##  Augmented Dickey-Fuller Test
## 
## Test Results:
##   PARAMETER:
##     Lag Order: 1
##   STATISTIC:
##     Dickey-Fuller: -5.9831
##   P VALUE:
##     0.01 
## 
## Description:
##  Thu Jan  4 01:16:44 2024 by user: qaj
eglm1 <- lm(lc ~ li, data=Raotbl3); summary(eglm1)
## 
## Call:
## lm(formula = lc ~ li, data = Raotbl3)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.065949 -0.016067  0.001839  0.017844  0.057295 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.18007    0.14399  -1.251    0.214    
## li           1.00731    0.01322  76.203   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.02379 on 97 degrees of freedom
## Multiple R-squared:  0.9836, Adjusted R-squared:  0.9834 
## F-statistic:  5807 on 1 and 97 DF,  p-value: < 2.2e-16
fUnitRoots::adfTest(residuals(eglm1), lags=1, type="c")
## 
## Title:
##  Augmented Dickey-Fuller Test
## 
## Test Results:
##   PARAMETER:
##     Lag Order: 1
##   STATISTIC:
##     Dickey-Fuller: -2.6351
##   P VALUE:
##     0.09127 
## 
## Description:
##  Thu Jan  4 01:16:44 2024 by user: qaj
library(urca, quietly = TRUE)
summary(ca.po(Raotbl3[-(1:2),c("lc", "li")], type="Pz", demean="constant"))
## 
## ######################################## 
## # Phillips and Ouliaris Unit Root Test # 
## ######################################## 
## 
## Test of type Pz 
## detrending of series with constant only 
## 
## Response lc :
## 
## Call:
## lm(formula = lc ~ zr)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.047772 -0.007401  0.001410  0.008169  0.048541 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.01183    0.08952   0.132    0.895    
## zrlc         1.01799    0.05960  17.080   <2e-16 ***
## zrli        -0.01831    0.06074  -0.302    0.764    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.01393 on 93 degrees of freedom
## Multiple R-squared:  0.9941, Adjusted R-squared:  0.994 
## F-statistic:  7844 on 2 and 93 DF,  p-value: < 2.2e-16
## 
## 
## Response li :
## 
## Call:
## lm(formula = li ~ zr)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.036705 -0.011532 -0.000549  0.008860  0.053870 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.09644    0.10642   0.906    0.367    
## zrlc         0.33183    0.07085   4.683  9.6e-06 ***
## zrli         0.66298    0.07220   9.182  1.1e-14 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.01656 on 93 degrees of freedom
## Multiple R-squared:  0.9914, Adjusted R-squared:  0.9912 
## F-statistic:  5334 on 2 and 93 DF,  p-value: < 2.2e-16
## 
## 
## 
## Value of test-statistic is: 39.7042 
## 
## Critical values of Pz are:
##                   10pct    5pct    1pct
## critical values 47.5877 55.2202 71.9273
VARMA模拟
library(dse, quietly = TRUE)
## 
## Attaching package: 'dse'
## The following objects are masked from 'package:stats':
## 
##     acf, simulate
#自回归部分系数矩阵
Apoly <- array(c(
  1,-0.5,0,
  0,-0.4,-0.25,
  0,-0.1,0,
  1,-0.5,0),
  c(3,2,2))
Apoly[2,,]
##      [,1] [,2]
## [1,] -0.5 -0.1
## [2,] -0.4 -0.5
#滑动平均部分系数矩阵,此处有变换
B <- matrix(c(3,0,0,2),ncol=2)*0.01
#生成模型
var2 <- ARMA(A=Apoly, B=B)
print(var2)
## 
## A(L) =
## 1-0.5L1    0-0.1L1    
## 0-0.4L1-0.25L2    1-0.5L1    
## 
## B(L) =
## 0.03    0    
## 0    0.02
#模拟数据
varsim <- simulate(
  var2, sampleT=250, 
  noise=list(w=matrix(rnorm(500),nrow=250,ncol=2)),
  rng=list(seed=c(123456)))
vardat <- matrix(varsim$output, nrow=250, ncol=2)
例4.2
library(urca, quietly = TRUE)
data(UKpppuip)
ts.UKpppuip <- ts(UKpppuip, start=c(1971,1), frequency=4)
library(quantmod, quietly = TRUE)
plot(as.xts(ts.UKpppuip), type="l", 
     multi.panel=TRUE, theme="white",
     main="英国PPP-UIP数据",
     major.ticks="years",
     grid.ticks.on = "years")

dat1 <- UKpppuip[,c("p1", "p2", "e12", "i1", "i2")]
dat2 <- UKpppuip[,c("doilp0", "doilp1")]
summary(ca.jo(
  dat1, type="trace", K=2, season=4, dumvar=dat2))
## 
## ###################### 
## # Johansen-Procedure # 
## ###################### 
## 
## Test type: trace statistic , with linear trend 
## 
## Eigenvalues (lambda):
## [1] 0.40672818 0.28538240 0.25415335 0.10230406 0.08287097
## 
## Values of teststatistic and critical values of test:
## 
##           test 10pct  5pct  1pct
## r <= 4 |  5.19  6.50  8.18 11.65
## r <= 3 | 11.67 15.66 17.95 23.52
## r <= 2 | 29.26 28.71 31.52 37.22
## r <= 1 | 49.42 45.23 48.28 55.43
## r = 0  | 80.75 66.49 70.60 78.87
## 
## Eigenvectors, normalised to first column:
## (These are the cointegration relations)
## 
##             p1.l2      p2.l2    e12.l2      i1.l2      i2.l2
## p1.l2   1.0000000   1.000000  1.000000  1.0000000  1.0000000
## p2.l2  -0.9086265  -1.143047 -1.272628 -2.4001444 -1.4528820
## e12.l2 -0.9321133  -3.363042  1.113631  1.1221619 -0.4805235
## i1.l2  -3.3746393  35.243576  2.746828 -0.4088865  2.2775510
## i2.l2  -1.8906210 -32.917370 -2.835714  2.9863624  0.7628011
## 
## Weights W:
## (This is the loading matrix)
## 
##             p1.l2         p2.l2       e12.l2        i1.l2       i2.l2
## p1.d  -0.06816507  0.0011795779 -0.002790218  0.001373599 -0.01333013
## p2.d  -0.01773477  0.0001220008 -0.014159241  0.013178503  0.00755575
## e12.d  0.10065321 -0.0001432122 -0.055628059 -0.035400025 -0.04707585
## i1.d   0.03434737 -0.0041631581 -0.010363374  0.012309982 -0.02394672
## i2.d   0.05766426  0.0082830953  0.004821036  0.026984801 -0.01006765
summary(ca.jo(
  dat1, type="eigen", K=2, season=4, dumvar=dat2))
## 
## ###################### 
## # Johansen-Procedure # 
## ###################### 
## 
## Test type: maximal eigenvalue statistic (lambda max) , with linear trend 
## 
## Eigenvalues (lambda):
## [1] 0.40672818 0.28538240 0.25415335 0.10230406 0.08287097
## 
## Values of teststatistic and critical values of test:
## 
##           test 10pct  5pct  1pct
## r <= 4 |  5.19  6.50  8.18 11.65
## r <= 3 |  6.48 12.91 14.90 19.19
## r <= 2 | 17.59 18.90 21.07 25.75
## r <= 1 | 20.16 24.78 27.14 32.14
## r = 0  | 31.33 30.84 33.32 38.78
## 
## Eigenvectors, normalised to first column:
## (These are the cointegration relations)
## 
##             p1.l2      p2.l2    e12.l2      i1.l2      i2.l2
## p1.l2   1.0000000   1.000000  1.000000  1.0000000  1.0000000
## p2.l2  -0.9086265  -1.143047 -1.272628 -2.4001444 -1.4528820
## e12.l2 -0.9321133  -3.363042  1.113631  1.1221619 -0.4805235
## i1.l2  -3.3746393  35.243576  2.746828 -0.4088865  2.2775510
## i2.l2  -1.8906210 -32.917370 -2.835714  2.9863624  0.7628011
## 
## Weights W:
## (This is the loading matrix)
## 
##             p1.l2         p2.l2       e12.l2        i1.l2       i2.l2
## p1.d  -0.06816507  0.0011795779 -0.002790218  0.001373599 -0.01333013
## p2.d  -0.01773477  0.0001220008 -0.014159241  0.013178503  0.00755575
## e12.d  0.10065321 -0.0001432122 -0.055628059 -0.035400025 -0.04707585
## i1.d   0.03434737 -0.0041631581 -0.010363374  0.012309982 -0.02394672
## i2.d   0.05766426  0.0082830953  0.004821036  0.026984801 -0.01006765
dat1 <- UKpppuip[,c("p1", "p2", "e12", "i1", "i2")]
f <- function(){
  x1 <- as.matrix(dat1) %*% c(1, -0.91, -0.93, -3.37, -1.89)
  x2 <- as.matrix(dat1) %*% c(1, -1.14, -3.36, -35.24, -32.91)
  print(fUnitRoots::adfTest(x1, lags=1, type="c"))
  print(fUnitRoots::adfTest(x2, lags=1, type="c"))
  invisible()
}
f()
## 
## Title:
##  Augmented Dickey-Fuller Test
## 
## Test Results:
##   PARAMETER:
##     Lag Order: 1
##   STATISTIC:
##     Dickey-Fuller: -2.5995
##   P VALUE:
##     0.0994 
## 
## Description:
##  Thu Jan  4 01:16:45 2024 by user: qaj
## 
## Title:
##  Augmented Dickey-Fuller Test
## 
## Test Results:
##   PARAMETER:
##     Lag Order: 1
##   STATISTIC:
##     Dickey-Fuller: -3.0532
##   P VALUE:
##     0.03853 
## 
## Description:
##  Thu Jan  4 01:16:45 2024 by user: qaj