date: “Last compiled on 18 October, 2023”

2.5.1 ACF的图形

tmp.x <- 0:15
tmp.y <- 0.8^tmp.x
plot(tmp.x, tmp.y, type="h", xlab="lag", ylab="ACF",
     ylim=c(-1, 1))
abline(h=0)

- \(\phi_1=-0.8\)的情形

tmp.x <- 0:15
tmp.y <- (-0.8)^tmp.x
plot(tmp.x, tmp.y, type="h", xlab="lag", ylab="ACF",
     ylim=c(-1, 1))
abline(h=0)

2.5.4 AR(2)的性质

\[\begin{aligned} X_t =& 0.1 X_{t-1} + 0.5 X_{t-2} + \varepsilon_t \\ X_t =& -0.1 X_{t-1} + 0.5 X_{t-2} + \varepsilon_t \\ X_t =& 0.8 X_{t-1} + \varepsilon_t \\ X_t =& X_{t-1} - 0.1 X_{t-2} + \varepsilon_t \\ X_t =& -X_{t-1} - 0.1 X_{t-2} + \varepsilon_t \\ X_t =& \left(\frac{2}{1.1}\cos\frac{\pi}{6}\right) X_{t-1} - \frac{1}{1.1^2} X_{t-2} + \varepsilon_t \\ X_t =& \left(\frac{2}{1.1}\cos\frac{\pi}{2}\right) X_{t-1} - \frac{1}{1.1^2} X_{t-2} + \varepsilon_t \\ X_t =& \left(\frac{2}{1.1}\cos\frac{2\pi}{3}\right) X_{t-1} - \frac{1}{1.1^2} X_{t-2} + \varepsilon_t \\ \end{aligned}\]

library(ggplot2)
AR2.sim <- function(n, ar1, ar2, sd=1) { 
  Xt = c(0,0) 
  # 初始化一个长度为n+2的向量 
  for (i in 2:n) { # 从第二个观测值开始循环 
    Xt[i+1] = ar1*Xt[i] + ar2*Xt[i-1] + rnorm(1,mean=0,sd=sd) 
    # 按照AR(2)的公式生成下一个观测值 
    } 
Xt # 返回生成的向量 
}

char.root <- function(ar1,ar2){
  char.coef <- c(-ar2,-ar1,1)
  roots <- polyroot(char.coef)
  cat("roots: ",roots,"\n")
}

Xt1 <- AR2.sim(1000, 0.1, 0.5)
xt1 <- expression(X[t] == 0.1*X[t-1]+0.5*X[t-2]+epsilon[t])
char.root(0.1,0.5)
## roots:  0.7588723-0i -0.6588723+0i
acf(Xt1, main = xt1)

Xt2 <- AR2.sim(1000, -0.1,0.5)
char.root(-0.1,0.5)
## roots:  0.6588723+0i -0.7588723+0i
xt2 <- expression(X[t] == -0.1*X[t-1]+0.5*X[t-2]+epsilon[t])
acf(Xt2, main = xt2)

Xt3 = AR2.sim(1000, 1,-0.1)
char.root(1,-0.1)
## roots:  0.1127017-0i 0.8872983+0i
xt3 = expression(X[t] == X[t-1]-0.1*X[t-2]+epsilon[t])
acf(Xt3, main = xt3)

Xt4 = AR2.sim(1000, -1,-0.1)
char.root(-1,-0.1)
## roots:  -0.1127017-0i -0.8872983+0i
xt4 = expression(X[t] == -X[t-1]-0.1*X[t-2]+epsilon[t])
acf(Xt4, main = xt4)

Xt5 = AR2.sim(1000, (2/1.1)*cos(pi/6),-1/1.1^2)
char.root((2/1.1)*cos(pi/6),-1/1.1^2)
## roots:  0.7872958+0.4545455i 0.7872958-0.4545455i
xt5 = expression(X[t] == frac(2,1.1)*cos(frac(pi,6))*X[t-1]-frac(1,1.1^2)*X[t-2]+epsilon[t])
acf(Xt5, main = xt5)

Xt6 = AR2.sim(1000, (2/1.1)*cos(pi/2),-1/1.1^2)
char.root((2/1.1)*cos(pi/2),-1/1.1^2)
## roots:  0+0.9090909i 0-0.9090909i
xt6 = expression(X[t] == frac(2,1.1)*cos(frac(pi,2))*X[t-1]-frac(1,1.1^2)*X[t-2]+epsilon[t])
acf(Xt6, main = xt6)

Xt7 = AR2.sim(1000, (2/1.1)*cos(2*pi/3),-1/1.1^2)
char.root((2/1.1)*cos(2*pi/3),-1/1.1^2)
## roots:  -0.4545455+0.7872958i -0.4545455-0.7872958i
xt7 = expression(X[t] == frac(2,1.1)*cos(frac(2*pi,3))*X[t-1]-frac(1,1.1^2)*X[t-2]+epsilon[t])
acf(Xt7, main = xt7)

Xt8 = AR2.sim(1000, 0.6, -0.4)
char.root(0.6,-0.4)
## roots:  0.3+0.5567764i 0.3-0.5567764i
xt8 = expression(X[t] == 0.6*X[t-1]-0.4*X[t-2]+epsilon[t])
acf(Xt8, main = xt8)

- 周期的计算

cycling <- function(ar1, ar2){
  omega = acos(ar1/(2*sqrt(-ar2)));
  p <- 2*pi/omega;
  p
}
cycling((2/1.1)*cos(pi/6),-1/1.1^2)
## [1] 12
cycling((2/1.1)*cos(pi/2),-1/1.1^2)
## [1] 4
cycling((2/1.1)*cos(2*pi/3),-1/1.1^2)
## [1] 3
# 模拟数据的长度
n <- 1000

# 模型的参数
ar1 <- 0.1
ar2 <- 0.5
sd <- 1

# 初始化数据向量
Xt <- numeric(n)

# 循环生成数据
for (i in 3:n) {
  Xt[i] <- ar1 * Xt[i-1] + ar2 * Xt[i-2] + rnorm(1, mean = 0, sd = sd)
}

xt1 = expression(X[t] == 0.1* X[t-1] + 0.5* X[t-2]+epsilon[t])

# 绘制ACF函数图
acf(Xt, main = xt1)

library(readr)
da <- read_table("D:/齐安静 教学/时间序列分析/北大/ftsdata/q-gnp4710.txt", col_types=cols(.default = col_double()))
head(da)
## # A tibble: 6 × 4
##    Year   Mon   Dat VALUE
##   <dbl> <dbl> <dbl> <dbl>
## 1  1947     1     1  238.
## 2  1947     4     1  242.
## 3  1947     7     1  246.
## 4  1947    10     1  256.
## 5  1948     1     1  262.
## 6  1948     4     1  269.
dim(da)
## [1] 253   4
gnp <- ts(da[["VALUE"]], start=c(1947, 1), frequency=4)
plot(log(gnp), xlab="year", ylab="log(GNP)")

rate <- diff(log(gnp))
plot(rate, xlab="Year", ylab="Growth")
abline(h=0, col="gray")

forecast::Acf(rate, xlab="Years",main="")
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo

pacf(rate, xlab="Years", main="")

- AR模型估计

acf(rate,lag=12)

pacf(rate,lag=12)

ar.model <- arima(rate,order=c(3,0,0))
ar.model
## 
## Call:
## arima(x = rate, order = c(3, 0, 0))
## 
## Coefficients:
##          ar1     ar2      ar3  intercept
##       0.4386  0.2063  -0.1559     0.0163
## s.e.  0.0620  0.0666   0.0626     0.0012
## 
## sigma^2 estimated as 9.549e-05:  log likelihood = 808.56,  aic = -1607.12
# tsdiag(ar.model)
pl <- c(1,-ar.model$coef[1:3])
root1 <- polyroot(pl) #求多项式的0点
root1
## [1]  1.616116+0.864212i -1.909216-0.000000i  1.616116-0.864212i
Mod(root1) # 复数的模
## [1] 1.832674 1.909216 1.832674
abs(root1)
## [1] 1.832674 1.909216 1.832674
k <- 2*pi/acos(Re(root1[1])/Mod(root1[1]))
k
## [1] 12.79523

2.5.6 偏自相关函数

library(readr)
library(zoo)
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
library(xts)
d <- read_table(
  "D:/齐安静 教学/时间序列分析/北大/ftsdata/m-ibm3dx2608.txt",
  col_types=cols(.default=col_double(),
                 date=col_date(format="%Y%m%d")))
ibmind <- xts(as.matrix(d[,-1]), d$date) #转换为xts格式
tclass(ibmind) <- "yearmon" #将ibmind中的时间类别设定时间为年 月
vw <- ts(coredata(ibmind)[,"vwrtn"], start=c(1926,1), frequency=12)
head(ibmind)
##             ibmrtn     vwrtn     ewrtn     sprtn
## Jan 1926 -0.010381  0.000724  0.023174  0.022472
## Feb 1926 -0.024476 -0.033374 -0.053510 -0.043956
## Mar 1926 -0.115591 -0.064341 -0.096824 -0.059113
## Apr 1926  0.089783  0.038358  0.032946  0.022688
## May 1926  0.036932  0.012172  0.001035  0.007679
## Jun 1926  0.068493  0.056888  0.050487  0.043184
plot(vw, main="CRSP Value Weighted Index Monthly Return")
abline(h=0, col="gray")

forecast::Acf(vw, main="ACF")

pacf(vw, main="PACF")

2/sqrt(length(vw))
## [1] 0.06337243
tmp <- pacf(vw, main="", plot=FALSE, lag.max=12)
cbind(k=round(tmp$lag*12), pacf=round(tmp$acf, 4))
##        k    pacf
##  [1,]  1  0.1154
##  [2,]  2 -0.0304
##  [3,]  3 -0.1025
##  [4,]  4  0.0326
##  [5,]  5  0.0618
##  [6,]  6 -0.0502
##  [7,]  7  0.0312
##  [8,]  8  0.0517
##  [9,]  9  0.0635
## [10,] 10  0.0053
## [11,] 11 -0.0052
## [12,] 12  0.0109

信息准则

gnprate <- diff(log(gnp))
resm <- ar(gnprate, method="mle"); resm
## 
## Call:
## ar(x = gnprate, method = "mle")
## 
## Coefficients:
##       1        2        3        4        5        6        7        8  
##  0.4318   0.1985  -0.1180   0.0189  -0.1607   0.0900   0.0615  -0.0814  
##       9  
##  0.1940  
## 
## Order selected 9  sigma^2 estimated as  8.918e-05
plot(as.numeric(names(resm$aic)), resm$aic, type="h",
     xlab="k", ylab="AIC")

  • BIC
tmp.T <- length(gnprate)
tmp.bic <- resm$aic + as.numeric(names(resm$aic)) * (log(tmp.T) - 2)/tmp.T
plot(as.numeric(names(resm$aic)), tmp.bic, type="h",
     xlab="k", ylab="BIC")

# `as.numeric(names(resm$aic))`是一个R语言的表达式,用于将`names(resm$aic)`的结果转换为数值类型。`names(resm$aic)`是一个函数,用于提取`resm$aic`的名称,即该模型中的参数个数。例如,如果`resm$aic`的名称是"ar1",表示该模型只有一个自回归参数,那么`names(resm$aic)`的结果就是"ar1",而`as.numeric(names(resm$aic))`的结果就是1。这样,我们就可以用这个数值来计算BIC公式中的k项。

2.5.8 参数估计

  • 案例:CRSP价值加权指数月度收益率

  • 步骤一:定阶

resm <- ar(vw, method="mle"); resm
## 
## Call:
## ar(x = vw, method = "mle")
## 
## Coefficients:
##       1        2        3        4        5        6        7        8  
##  0.1167  -0.0112  -0.1126   0.0217   0.0735  -0.0452   0.0254   0.0462  
##       9  
##  0.0660  
## 
## Order selected 9  sigma^2 estimated as  0.002831
  • AIC数值
round(resm$aic, 2)
##     0     1     2     3     4     5     6     7     8     9    10    11    12 
## 22.33 10.99 12.07  3.35  4.37  2.46  1.96  3.04  2.24  0.00  1.97  3.94  5.81
  • 均值
mean.vw <- mean(vw); mean.vw
## [1] 0.00890439
  • 估计
resm2 <- arima(vw, order=c(3,0,0))
resm2
## 
## Call:
## arima(x = vw, order = c(3, 0, 0))
## 
## Coefficients:
##          ar1      ar2      ar3  intercept
##       0.1158  -0.0187  -0.1042     0.0089
## s.e.  0.0315   0.0317   0.0317     0.0017
## 
## sigma^2 estimated as 0.002875:  log likelihood = 1500.86,  aic = -2991.73
  • 年简单收益率
(1 + mean.vw)^12 - 1
## [1] 0.1122442
  • 预测总收益率
((1 + mean.vw)^12)^83 - 1
## [1] 6832.002
  • 真实回报率
prod(1+vw)-1
## [1] 1591.953
  • 据此计算的平均年收益
prod(1 + vw)^(1/83) - 1
## [1] 0.09290084
  • R的forecast包提供了一个auto.arima()函数, 可以自动进行模型选择。

  • tseries包的arma()函数可以用条件极小二乘方法估计ARMA模型参数, 使用了optim()函数来求极值。结果可以用summary()显示。 tseries::arma()的结果还支持print(), plot(), coef(),vcov(), residuals(), fitted()等信息提取函数。

  • 模型检验

resm2 <- arima(vw, order=c(3,0,0))
Box.test(resm2$residuals, lag=12, type="Ljung", fitdf=3)
## 
##  Box-Ljung test
## 
## data:  resm2$residuals
## X-squared = 16.352, df = 9, p-value = 0.05988
  • 改进模型
    • 把2阶自回归的系数设定为0
resm3 <- arima(vw, order=c(3,0,0), fixed=c(NA, 0, NA, NA)); resm3
## Warning in arima(vw, order = c(3, 0, 0), fixed = c(NA, 0, NA, NA)): some AR
## parameters were fixed: setting transform.pars = FALSE
## 
## Call:
## arima(x = vw, order = c(3, 0, 0), fixed = c(NA, 0, NA, NA))
## 
## Coefficients:
##          ar1  ar2      ar3  intercept
##       0.1136    0  -0.1063     0.0089
## s.e.  0.0313    0   0.0315     0.0017
## 
## sigma^2 estimated as 0.002876:  log likelihood = 1500.69,  aic = -2993.38
  • 新模型需要查看是否存在潜在的稳定性问题
abs(polyroot(c(1, -coef(resm3)[1:3])))
## [1] 2.031585 2.279433 2.031585
  • 新模型的白噪声检验
Box.test(resm3$residuals, lag=12, type="Ljung", fitdf=2)
## 
##  Box-Ljung test
## 
## data:  resm3$residuals
## X-squared = 16.828, df = 10, p-value = 0.07827
  • 检验命令checkresiduals
forecast::checkresiduals(resm3)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(3,0,0) with non-zero mean
## Q* = 46.896, df = 21, p-value = 0.0009699
## 
## Model df: 3.   Total lags used: 24
  • 第二个检验命令tsdiag()
tsdiag(resm3)

- 拟合优度

r2_1 <- 1 - resm2$sigma2/var(vw) ;r2_1
## [1] 0.02579431
r2_2 <- 1 - resm3$sigma2/var(vw) ;r2_2
## [1] 0.02545196
  • 预测
resm4 <- arima(vw[1:984], order=c(3,0,0))
pred4 <- predict(resm4, n.ahead=12, se.fit=TRUE)
cbind(Observed=round(c(vw[985:996]), 4), 
      Predict=round(c(pred4$pred), 4), 
      SE=round(c(pred4$se), 3))
##       Observed Predict    SE
##  [1,]  -0.0623  0.0075 0.053
##  [2,]  -0.0220  0.0160 0.054
##  [3,]  -0.0105  0.0117 0.054
##  [4,]   0.0511  0.0098 0.054
##  [5,]   0.0238  0.0088 0.054
##  [6,]  -0.0786  0.0092 0.054
##  [7,]  -0.0132  0.0094 0.054
##  [8,]   0.0110  0.0096 0.054
##  [9,]  -0.0981  0.0095 0.054
## [10,]  -0.1847  0.0095 0.054
## [11,]  -0.0852  0.0095 0.054
## [12,]   0.0215  0.0095 0.054
  • 对预测结果作图
plot(window(vw, start=c(2007, 1), end=c(2008,12)),
     ylab="", type="b", ylim=c(-0.2, 0.2), lwd=2)
lines(ts(c(pred4$pred), start=c(2008,1), frequency = 12), 
      col="red", lwd=1, lty=2, type="b", pch=2)
lines(ts(c(pred4$pred) - 2*c(pred4$se), start=c(2008,1), frequency = 12), 
      col="blue", lwd=2, lty=3, type="l")
lines(ts(c(pred4$pred) + 2*c(pred4$se), start=c(2008,1), frequency = 12), 
      col="blue", lwd=2, lty=3, type="l")

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.