案例:CBOE的波动率指数(VIX)
library(readr)
## Warning: package 'readr' was built under R version 4.3.2
library(zoo)
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
library(xts)
library(lubridate)
## Warning: package 'lubridate' was built under R version 4.3.2
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
da <- read_table("D:/齐安静 教学/时间序列分析/北大/ftsdata/d-vix0411.txt", col_types=cols(.default = col_double()))
xts.vix <- xts(da[,-(1:3)], make_date(da$year, da$mon, da$day))
str(xts.vix)
## An xts object on 2004-01-02 / 2011-11-21 containing: 
##   Data:    double [1988, 4]
##   Columns: Open, High, Low, Close
##   Index:   Date [1988] (TZ: "UTC")
vix <- log(xts.vix[,"Close"])
delta.vix <- diff(vix)[-1]
plot(vix, type="l", main="VIX Daily Log Close",
     major.ticks="years", minor.ticks=NULL, 
     grid.ticks.on="auto")

plot(delta.vix, type="l", main="VIX Daily Log Return",
     major.ticks="years", minor.ticks=NULL, 
     grid.ticks.on="auto")

forecast::Acf(c(coredata(delta.vix)), main="")
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo

resm <- arima(c(coredata(vix)), order=c(0,1,1)); resm
## 
## Call:
## arima(x = c(coredata(vix)), order = c(0, 1, 1))
## 
## Coefficients:
##           ma1
##       -0.1500
## s.e.   0.0244
## 
## sigma^2 estimated as 0.004561:  log likelihood = 2535.71,  aic = -5067.42
Box.test(resm$residuals, lag=10, fitdf=1)
## 
##  Box-Pierce test
## 
## data:  resm$residuals
## X-squared = 47, df = 9, p-value = 3.925e-07
resm2 <- arima(c(coredata(vix)), order=c(0,1,10)); resm2
## 
## Call:
## arima(x = c(coredata(vix)), order = c(0, 1, 10))
## 
## Coefficients:
##           ma1      ma2      ma3      ma4      ma5      ma6      ma7      ma8
##       -0.1493  -0.0716  -0.0484  -0.0258  -0.0303  -0.0401  -0.0131  -0.0181
## s.e.   0.0223   0.0226   0.0228   0.0225   0.0226   0.0236   0.0231   0.0219
##          ma9    ma10
##       0.0092  0.0966
## s.e.  0.0245  0.0230
## 
## sigma^2 estimated as 0.004453:  log likelihood = 2559.57,  aic = -5097.13
Box.test(resm2$residuals, lag=20, fitdf=10)
## 
##  Box-Pierce test
## 
## data:  resm2$residuals
## X-squared = 9.5782, df = 10, p-value = 0.4782

季节模型

案例:可口可乐

da <- read_table(
  "d:/齐安静 教学/时间序列分析/北大/ftsdata/q-ko-earns8309.txt",
  col_types=cols(pends=col_date("%Y%m%d"),
                 anntime=col_date("%Y%m%d"),
                 value = col_double()))
xts.koqtr <- xts(da[["value"]], ymd(da[["pends"]]))
ko <- ts(log(da[["value"]]), start=c(1983,1), frequency=4)
plot(ko, type="l", main="Coca Kola Quarterly Log Earnings")

  • 季节差分
d.ko <- diff(ko)
plot(d.ko, type="l", main="Coca Kola Quarterly Log Earnings diff()")

  • ACF
d.ko <- diff(ko)
forecast::Acf(d.ko, main="")

  • 季节差分的原理
tmp <- window(ko, start=c(1983,1), end=c(1985,4)); tmp
##           Qtr1      Qtr2      Qtr3      Qtr4
## 1983 -3.283414 -3.011862 -3.072613 -3.272804
## 1984 -3.158251 -2.842153 -2.885981 -3.177254
## 1985 -3.101093 -2.772589 -2.785471 -3.072613
diff(tmp, lag=4)
##            Qtr1       Qtr2       Qtr3       Qtr4
## 1984 0.12516314 0.16970847 0.18663191 0.09555002
## 1985 0.05715841 0.06956446 0.10051006 0.10464083
  • 例题继续,可口可乐差分之后,季节差分。
dd.ko <- diff(diff(ko), lag=4)
plot(dd.ko, main="Coca Kola Quarterly Log Earnings diff()diff(lag=4)")

  • 差分后的ACF:
forecast::Acf(dd.ko, main="")

  • PACF图:
pacf(dd.ko, main="")

  • 多重季节模型
resm <- arima(
  ko, order=c(0,1,1), seasonal=list(order=c(0,1,1), period=4)
); resm
## 
## Call:
## arima(x = ko, order = c(0, 1, 1), seasonal = list(order = c(0, 1, 1), period = 4))
## 
## Coefficients:
##           ma1     sma1
##       -0.4096  -0.8203
## s.e.   0.0866   0.0743
## 
## sigma^2 estimated as 0.00724:  log likelihood = 104.25,  aic = -202.5
  • 残差平稳性检验
Box.test(resm$residuals, lag=12, fitdf=2)
## 
##  Box-Pierce test
## 
## data:  resm$residuals
## X-squared = 12.233, df = 10, p-value = 0.2698
  • 多步预报
tmp.y <- window(ko, start=start(ko), end=c(2007,4))
resm2 <- arima(tmp.y, order=c(0,1,1), seasonal=list(order=c(0,1,1), period=4))
resm2
## 
## Call:
## arima(x = tmp.y, order = c(0, 1, 1), seasonal = list(order = c(0, 1, 1), period = 4))
## 
## Coefficients:
##           ma1     sma1
##       -0.4209  -0.8099
## s.e.   0.0874   0.0767
## 
## sigma^2 estimated as 0.007432:  log likelihood = 95.78,  aic = -185.57
pred1 <- predict(resm2, n.ahead=7)
cbind(Observed=ko[101:107], Predicted=pred1$pred, SE=pred1$se)
##             Observed  Predicted         SE
## 2008 Q1 -0.400477567 -0.5060620 0.08621248
## 2008 Q2  0.009950331 -0.1237792 0.09962409
## 2008 Q3 -0.186329578 -0.2669296 0.11143307
## 2008 Q4 -0.446287103 -0.4501580 0.12210527
## 2009 Q1 -0.430782916 -0.4219704 0.13894879
## 2009 Q2 -0.083381609 -0.0396876 0.15111786
## 2009 Q3 -0.198450939 -0.1828380 0.16237749
  • R函数
lognorm_adjust <- function(pred_list){
  pred <- pred_list$pred
  se <- pred_list$se
  xnew <- exp(pred + 0.5*se^2)
  senew <- sqrt(exp(2*pred + se^2)*(exp(se^2)-1))
  list(pred = xnew, se=senew)
}
  • 每股盈利预测
pred1adj <- lognorm_adjust(pred1)
pred2 <- pred1adj$pred
se2 <- pred1adj$se
cbind(Observed=c(coredata(xts.koqtr))[101:107], 
      Predicted=round(pred2, 2), SE=round(se2, 2))
##         Observed Predicted   SE
## 2008 Q1     0.67      0.61 0.05
## 2008 Q2     1.01      0.89 0.09
## 2008 Q3     0.83      0.77 0.09
## 2008 Q4     0.64      0.64 0.08
## 2009 Q1     0.65      0.66 0.09
## 2009 Q2     0.92      0.97 0.15
## 2009 Q3     0.82      0.84 0.14
  • 作图
pred2 <- exp(pred1$pred + 0.5*pred1$se^2)
se2 <- sqrt(exp(2*pred1$pred + pred1$se^2)*(exp(pred1$se^2)-1))
cbind(Observed=c(coredata(xts.koqtr))[101:107], Predicted=round(pred2, 2), SE=round(se2, 2))
##         Observed Predicted   SE
## 2008 Q1     0.67      0.61 0.05
## 2008 Q2     1.01      0.89 0.09
## 2008 Q3     0.83      0.77 0.09
## 2008 Q4     0.64      0.64 0.08
## 2009 Q1     0.65      0.66 0.09
## 2009 Q2     0.92      0.97 0.15
## 2009 Q3     0.82      0.84 0.14
tmp.x <- ts(c(coredata(xts.koqtr["2003/"])), start=c(2003,1), frequency = 4)
tmp.p <- ts(c(pred2), start=c(2008,1), frequency = 4)
tmp.lb <- ts(c(pred2) - 2*c(se2), start=c(2008,1), frequency = 4)
tmp.ub <- ts(c(pred2) + 2*c(se2), start=c(2008,1), frequency = 4)
plot(tmp.x, ylim=c(0.3, 1.2), type="l", lwd=2,
     xlab="Year", ylab="")
lines(tmp.p, type="b", pch=2, lty=2, col="red")
lines(tmp.lb, lty=3, col="blue")
lines(tmp.ub, lty=3, col="blue")
legend("topleft", lty=c(1,2,3), lwd=c(2,1,1), pch=c(NA,2,NA),
       col=c("black", "red", "blue"),
       legend=c("Observed", "Predicted", "95% Prediction Limits"))

### 2.10.3 季节哑变量 ##### 案例:CRSP最高10分位资产组合的月简单收益率

da <- read_table(
  "D:/齐安静 教学/时间序列分析/北大/ftsdata/m-deciles08.txt",
  col_types=cols(.default = col_double(),
                 date=col_date("%Y%m%d")))
xts.dec10 <- xts(da[["CAP1RET"]], ymd(da[["date"]]))
dec10 <- ts(da[["CAP1RET"]], start=c(1970, 1), frequency=12)
  • 收益率时序图
plot(dec10, main="CRSP 10 Percentile Index Return Rate",
     xlab="Year", ylab="")

  • 近3年数据
plot(window(dec10, start=c(2006,1), end=c(2008,12)), 
            main="CRSP 10 Percentile Index Return Rate",
     xlab="Year", ylab="")

  • ACF
forecast::Acf(dec10, main="", lag.max=36)

  • 周期12的季节ARIMA
resm1 <- arima(dec10, order=c(1,0,1), 
               seasonal=list(order=c(1,0,1), period=12))
resm1
## 
## Call:
## arima(x = dec10, order = c(1, 0, 1), seasonal = list(order = c(1, 0, 1), period = 12))
## 
## Coefficients:
##           ar1     ma1    sar1     sma1  intercept
##       -0.0639  0.2508  0.9882  -0.9142     0.0117
## s.e.   0.2205  0.2130  0.0092   0.0332     0.0125
## 
## sigma^2 estimated as 0.004704:  log likelihood = 584.69,  aic = -1157.39
jan <- as.numeric(c(cycle(dec10))==1)
lm1 <- lm(c(dec10) ~ jan); summary(lm1)
## 
## Call:
## lm(formula = c(dec10) ~ jan)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.30861 -0.03475 -0.00176  0.03254  0.40671 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 0.002864   0.003333   0.859    0.391    
## jan         0.125251   0.011546  10.848   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.06904 on 466 degrees of freedom
## Multiple R-squared:  0.2016, Adjusted R-squared:  0.1999 
## F-statistic: 117.7 on 1 and 466 DF,  p-value: < 2.2e-16
  • 随机误差项相关
forecast::Acf(residuals(lm1), main="")

2.11 带时间序列误差的回归模型

2.11.1 方法示例

案例:国债利率期限结构
library(readr)
library(zoo)
library(xts)
library(lubridate)
da <- read_table(
  "D:/齐安静 教学/时间序列分析/北大/ftsdata/w-gs1yr.txt",
  col_types=cols(.default=col_double()))
x1 <- da[["rate"]]
y1 <- xts(x1, make_date(da[["year"]], da[["mon"]], da[["day"]]))
da <- read_table2(
  "D:/齐安静 教学/时间序列分析/北大/ftsdata/w-gs3yr.txt",
  col_types=cols(.default=col_double()))
## Warning: `read_table2()` was deprecated in readr 2.0.0.
## ℹ Please use `read_table()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
  • 把数据转换成时间序列
x3 <- da[["rate"]]
y3 <- xts(x3, make_date(da[["year"]], da[["mon"]], da[["day"]]))
rm(da)
xts.gs <- cbind(gs1yr=y1, gs3yr=y3)
rm(y1, y3)
  • 画图
plot(xts.gs, type="l", 
     main="US Federal Bonds 1 Year and 3 Years",
     major.ticks="years", minor.ticks=NULL, 
     grid.ticks.on="auto",
     col=c("blue", "red"))

  • 最后三年的图形
plot(last(xts.gs, "3 years"), type="l", 
     main="US Federal Bonds 1 Year and 3 Years",
     major.ticks="years", minor.ticks=NULL, 
     grid.ticks.on="auto",
     col=c("blue", "red"))

  • 这些数据作为时间序列呈现出不平稳的表现。 对一年期利率作ACF估计:
forecast::Acf(x1, main="")

  • 单位根检验
  • 定阶
ar(diff(x1), method="mle")
## 
## Call:
## ar(x = diff(x1), method = "mle")
## 
## Coefficients:
##       1        2        3        4        5        6        7        8  
##  0.3213   0.0372   0.0333   0.0577  -0.0093  -0.0008  -0.0813   0.0312  
##       9  
## -0.0469  
## 
## Order selected 9  sigma^2 estimated as  0.03103
  • 检验
fUnitRoots::adfTest(x1, lags=9, type="ct")
## 
## Title:
##  Augmented Dickey-Fuller Test
## 
## Test Results:
##   PARAMETER:
##     Lag Order: 9
##   STATISTIC:
##     Dickey-Fuller: -2.3813
##   P VALUE:
##     0.4169 
## 
## Description:
##  Fri Nov 17 11:12:20 2023 by user: qaj
  • 散点图
plot(x1, x3, type="p", pch=16, cex=0.5,
     xlab="1 Year", ylab="3 Years",
     main="US Federal Bonds 3 Years Rate Vs. 1 Year Rate")

  • 做线性回归
lm1 <- lm(x3 ~ x1); summary(lm1)
## 
## Call:
## lm(formula = x3 ~ x1)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.82319 -0.37691 -0.01462  0.38661  1.35679 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.83214    0.02417   34.43   <2e-16 ***
## x1           0.92955    0.00357  260.40   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5228 on 2465 degrees of freedom
## Multiple R-squared:  0.9649, Adjusted R-squared:  0.9649 
## F-statistic: 6.781e+04 on 1 and 2465 DF,  p-value: < 2.2e-16
  • 残差ACF图
forecast::Acf(lm1$residuals, main="")

  • 残差白噪声检验
Box.test(lm1$residuals, lag=9)
## 
##  Box-Pierce test
## 
## data:  lm1$residuals
## X-squared = 19303, df = 9, p-value < 2.2e-16
  • 单位根检验
  • 首先定阶
ar(diff(lm1$residuals), method="mle")
## 
## Call:
## ar(x = diff(lm1$residuals), method = "mle")
## 
## Coefficients:
##       1        2        3        4        5        6        7        8  
##  0.2072  -0.0030  -0.0100   0.0911  -0.0641  -0.0642  -0.0475   0.0193  
##       9       10       11       12  
##  0.0163  -0.0509  -0.0174   0.0706  
## 
## Order selected 12  sigma^2 estimated as  0.005036
  • 检验结果
fUnitRoots::adfTest(lm1$residuals, lags=12, type="nc")
## Warning in fUnitRoots::adfTest(lm1$residuals, lags = 12, type = "nc"): p-value
## smaller than printed p-value
## 
## Title:
##  Augmented Dickey-Fuller Test
## 
## Test Results:
##   PARAMETER:
##     Lag Order: 12
##   STATISTIC:
##     Dickey-Fuller: -4.1201
##   P VALUE:
##     0.01 
## 
## Description:
##  Fri Nov 17 11:12:20 2023 by user: qaj
  • 画残差图
plot(lm1$residuals,type="l")

  • 对利率数据差分
c1 <- diff(x1)
c3 <- diff(x3)
  • 差分后的ACF:
forecast::Acf(c1, main="")

- 差分后的散点图

plot(c1, c3, type="p", pch=16, cex=0.5,
     xlab="1 Year", ylab="3 Years",
     main="US Federal Bonds 3 Years Rate Diff1 Vs. 1 Year Rate Diff1")

  • 线性回归
lm2 <- lm(c3 ~ c1); summary(lm2)
## 
## Call:
## lm(formula = c3 ~ c1)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.42459 -0.03578 -0.00117  0.03467  0.48921 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.0001051  0.0013890  -0.076     0.94    
## c1           0.7919323  0.0073391 107.906   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.06897 on 2464 degrees of freedom
## Multiple R-squared:  0.8253, Adjusted R-squared:  0.8253 
## F-statistic: 1.164e+04 on 1 and 2464 DF,  p-value: < 2.2e-16
  • 继续对残差分析
  • 残差的ACF:
forecast::Acf(lm2$residuals, main="")

  • 残差白噪声检验
Box.test(lm2$residuals, lag=21)
## 
##  Box-Pierce test
## 
## data:  lm2$residuals
## X-squared = 199.57, df = 21, p-value < 2.2e-16
  • ARIMA模型的回归
mres <- arima(c3, xreg=c1, order=c(0,0,1), include.mean=FALSE)
mres
## 
## Call:
## arima(x = c3, order = c(0, 0, 1), xreg = c1, include.mean = FALSE)
## 
## Coefficients:
##          ma1      c1
##       0.1823  0.7936
## s.e.  0.0196  0.0075
## 
## sigma^2 estimated as 0.0046:  log likelihood = 3136.62,  aic = -6267.23

ARIMA函数说明

  • 模拟过程
sima <- function(){
  # 样本量
  n <- 500
  # 自变量
  x <- runif(n, 10, 20)
  # 作为误差项的ARMA
  y <- arima.sim(model=list(ar=0.3, ma=0.7), n=n)
  # 回归参数
  a <- 100; b <- 2
  # 输出的因变量
  z <- a + b*x + y
  cat("Z的样本均值 =", mean(z), "\n")
  
  # 估计:
  res <- arima(z, order=c(1,0,1), xreg=x)
  print(res)
}
set.seed(101)
sima()
## Z的样本均值 = 129.9459 
## 
## Call:
## arima(x = z, order = c(1, 0, 1), xreg = x)
## 
## Coefficients:
##          ar1     ma1  intercept       x
##       0.2927  0.7127    99.8338  1.9994
## s.e.  0.0511  0.0375     0.1643  0.0086
## 
## sigma^2 estimated as 0.8742:  log likelihood = -676.45,  aic = 1362.89
simb <- function(){
  # 样本量
  n <- 500
  # 自变量
  dx <- runif(n, 10, 20) 
  x <- cumsum(dx)
  # 作为误差项的ARMA
  dy <- arima.sim(model=list(
    ar=0.3, ma=0.7), n=n)
  # 回归参数
  a <- 100; b <- 2
  # 输出的因变量
  dz <- a + b*dx + dy
  z <- cumsum(dz)
  
  # 估计,不预先差分:
  res <- arima(z, order=c(1,1,1), xreg=x)
  cat("不预先差分的错误结果:\n")
  print(res)
  
  #估计,预先差分:
  res2 <- arima(dz, order=c(1,0,1), xreg=dx)
  cat("\n\n预先差分的正确结果:\n")
  print(res2)
}
set.seed(101)
simb()
## 不预先差分的错误结果:
## 
## Call:
## arima(x = z, order = c(1, 1, 1), xreg = x)
## 
## Coefficients:
##          ar1     ma1       x
##       0.9999  0.4827  1.9977
## s.e.  0.0002  0.0605  0.0094
## 
## sigma^2 estimated as 1.29:  log likelihood = -776.24,  aic = 1560.49
## 
## 
## 预先差分的正确结果:
## 
## Call:
## arima(x = dz, order = c(1, 0, 1), xreg = dx)
## 
## Coefficients:
##          ar1     ma1  intercept      dx
##       0.2927  0.7127    99.8338  1.9994
## s.e.  0.0511  0.0375     0.1643  0.0086
## 
## sigma^2 estimated as 0.8742:  log likelihood = -676.45,  aic = 1362.89
simc <- function(){
  # 样本量
  n <- 500
  # 自变量
  dx <- runif(n, 10, 20) 
  x <- cumsum(dx)
  # 作为误差项的ARMA
  dy <- arima.sim(model=list(
    ar=0.3, ma=0.7), n=n)
  # 回归参数
  a <- 100; b <- 2
  # 输出的因变量
  z <- a + b*x + dy

  # 估计,不预先差分:
  res <- arima(z, order=c(1,0,1), xreg=x)
  cat("协整情况下的估计:\n")
  print(res)
}
set.seed(101)
simc()
## 协整情况下的估计:
## 
## Call:
## arima(x = z, order = c(1, 0, 1), xreg = x)
## 
## Coefficients:
##          ar1     ma1  intercept      x
##       0.2926  0.7128    99.7804  2e+00
## s.e.  0.0511  0.0374     0.2026  1e-04
## 
## sigma^2 estimated as 0.8741:  log likelihood = -676.42,  aic = 1362.83