date: “Last compiled on 18 October, 2023”
tmp.x <- 0:15
tmp.y <- 0.8^tmp.x
plot(tmp.x, tmp.y, type="h", xlab="lag", ylab="ACF",
ylim=c(-1, 1))
abline(h=0)
- \(\phi_1=-0.8\)的情形
tmp.x <- 0:15
tmp.y <- (-0.8)^tmp.x
plot(tmp.x, tmp.y, type="h", xlab="lag", ylab="ACF",
ylim=c(-1, 1))
abline(h=0)
\[\begin{aligned} X_t =& 0.1 X_{t-1} + 0.5 X_{t-2} + \varepsilon_t \\ X_t =& -0.1 X_{t-1} + 0.5 X_{t-2} + \varepsilon_t \\ X_t =& 0.8 X_{t-1} + \varepsilon_t \\ X_t =& X_{t-1} - 0.1 X_{t-2} + \varepsilon_t \\ X_t =& -X_{t-1} - 0.1 X_{t-2} + \varepsilon_t \\ X_t =& \left(\frac{2}{1.1}\cos\frac{\pi}{6}\right) X_{t-1} - \frac{1}{1.1^2} X_{t-2} + \varepsilon_t \\ X_t =& \left(\frac{2}{1.1}\cos\frac{\pi}{2}\right) X_{t-1} - \frac{1}{1.1^2} X_{t-2} + \varepsilon_t \\ X_t =& \left(\frac{2}{1.1}\cos\frac{2\pi}{3}\right) X_{t-1} - \frac{1}{1.1^2} X_{t-2} + \varepsilon_t \\ \end{aligned}\]
library(ggplot2)
AR2.sim <- function(n, ar1, ar2, sd=1) {
Xt = c(0,0)
# 初始化一个长度为n+2的向量
for (i in 2:n) { # 从第二个观测值开始循环
Xt[i+1] = ar1*Xt[i] + ar2*Xt[i-1] + rnorm(1,mean=0,sd=sd)
# 按照AR(2)的公式生成下一个观测值
}
Xt # 返回生成的向量
}
char.root <- function(ar1,ar2){
char.coef <- c(-ar2,-ar1,1)
roots <- polyroot(char.coef)
cat("roots: ",roots,"\n")
}
Xt1 <- AR2.sim(1000, 0.1, 0.5)
xt1 <- expression(X[t] == 0.1*X[t-1]+0.5*X[t-2]+epsilon[t])
char.root(0.1,0.5)
## roots: 0.7588723-0i -0.6588723+0i
acf(Xt1, main = xt1)
Xt2 <- AR2.sim(1000, -0.1,0.5)
char.root(-0.1,0.5)
## roots: 0.6588723+0i -0.7588723+0i
xt2 <- expression(X[t] == -0.1*X[t-1]+0.5*X[t-2]+epsilon[t])
acf(Xt2, main = xt2)
Xt3 = AR2.sim(1000, 1,-0.1)
char.root(1,-0.1)
## roots: 0.1127017-0i 0.8872983+0i
xt3 = expression(X[t] == X[t-1]-0.1*X[t-2]+epsilon[t])
acf(Xt3, main = xt3)
Xt4 = AR2.sim(1000, -1,-0.1)
char.root(-1,-0.1)
## roots: -0.1127017-0i -0.8872983+0i
xt4 = expression(X[t] == -X[t-1]-0.1*X[t-2]+epsilon[t])
acf(Xt4, main = xt4)
Xt5 = AR2.sim(1000, (2/1.1)*cos(pi/6),-1/1.1^2)
char.root((2/1.1)*cos(pi/6),-1/1.1^2)
## roots: 0.7872958+0.4545455i 0.7872958-0.4545455i
xt5 = expression(X[t] == frac(2,1.1)*cos(frac(pi,6))*X[t-1]-frac(1,1.1^2)*X[t-2]+epsilon[t])
acf(Xt5, main = xt5)
Xt6 = AR2.sim(1000, (2/1.1)*cos(pi/2),-1/1.1^2)
char.root((2/1.1)*cos(pi/2),-1/1.1^2)
## roots: 0+0.9090909i 0-0.9090909i
xt6 = expression(X[t] == frac(2,1.1)*cos(frac(pi,2))*X[t-1]-frac(1,1.1^2)*X[t-2]+epsilon[t])
acf(Xt6, main = xt6)
Xt7 = AR2.sim(1000, (2/1.1)*cos(2*pi/3),-1/1.1^2)
char.root((2/1.1)*cos(2*pi/3),-1/1.1^2)
## roots: -0.4545455+0.7872958i -0.4545455-0.7872958i
xt7 = expression(X[t] == frac(2,1.1)*cos(frac(2*pi,3))*X[t-1]-frac(1,1.1^2)*X[t-2]+epsilon[t])
acf(Xt7, main = xt7)
Xt8 = AR2.sim(1000, 0.6, -0.4)
char.root(0.6,-0.4)
## roots: 0.3+0.5567764i 0.3-0.5567764i
xt8 = expression(X[t] == 0.6*X[t-1]-0.4*X[t-2]+epsilon[t])
acf(Xt8, main = xt8)
- 周期的计算
cycling <- function(ar1, ar2){
omega = acos(ar1/(2*sqrt(-ar2)));
p <- 2*pi/omega;
p
}
cycling((2/1.1)*cos(pi/6),-1/1.1^2)
## [1] 12
cycling((2/1.1)*cos(pi/2),-1/1.1^2)
## [1] 4
cycling((2/1.1)*cos(2*pi/3),-1/1.1^2)
## [1] 3
# 模拟数据的长度
n <- 1000
# 模型的参数
ar1 <- 0.1
ar2 <- 0.5
sd <- 1
# 初始化数据向量
Xt <- numeric(n)
# 循环生成数据
for (i in 3:n) {
Xt[i] <- ar1 * Xt[i-1] + ar2 * Xt[i-2] + rnorm(1, mean = 0, sd = sd)
}
xt1 = expression(X[t] == 0.1* X[t-1] + 0.5* X[t-2]+epsilon[t])
# 绘制ACF函数图
acf(Xt, main = xt1)
library(readr)
da <- read_table("D:/齐安静 教学/时间序列分析/北大/ftsdata/q-gnp4710.txt", col_types=cols(.default = col_double()))
head(da)
## # A tibble: 6 × 4
## Year Mon Dat VALUE
## <dbl> <dbl> <dbl> <dbl>
## 1 1947 1 1 238.
## 2 1947 4 1 242.
## 3 1947 7 1 246.
## 4 1947 10 1 256.
## 5 1948 1 1 262.
## 6 1948 4 1 269.
dim(da)
## [1] 253 4
gnp <- ts(da[["VALUE"]], start=c(1947, 1), frequency=4)
plot(log(gnp), xlab="year", ylab="log(GNP)")
rate <- diff(log(gnp))
plot(rate, xlab="Year", ylab="Growth")
abline(h=0, col="gray")
forecast::Acf(rate, xlab="Years",main="")
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
pacf(rate, xlab="Years", main="")
- AR模型估计
acf(rate,lag=12)
pacf(rate,lag=12)
ar.model <- arima(rate,order=c(3,0,0))
ar.model
##
## Call:
## arima(x = rate, order = c(3, 0, 0))
##
## Coefficients:
## ar1 ar2 ar3 intercept
## 0.4386 0.2063 -0.1559 0.0163
## s.e. 0.0620 0.0666 0.0626 0.0012
##
## sigma^2 estimated as 9.549e-05: log likelihood = 808.56, aic = -1607.12
# tsdiag(ar.model)
pl <- c(1,-ar.model$coef[1:3])
root1 <- polyroot(pl) #求多项式的0点
root1
## [1] 1.616116+0.864212i -1.909216-0.000000i 1.616116-0.864212i
Mod(root1) # 复数的模
## [1] 1.832674 1.909216 1.832674
abs(root1)
## [1] 1.832674 1.909216 1.832674
k <- 2*pi/acos(Re(root1[1])/Mod(root1[1]))
k
## [1] 12.79523
library(readr)
library(zoo)
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(xts)
d <- read_table(
"D:/齐安静 教学/时间序列分析/北大/ftsdata/m-ibm3dx2608.txt",
col_types=cols(.default=col_double(),
date=col_date(format="%Y%m%d")))
ibmind <- xts(as.matrix(d[,-1]), d$date) #转换为xts格式
tclass(ibmind) <- "yearmon" #将ibmind中的时间类别设定时间为年 月
vw <- ts(coredata(ibmind)[,"vwrtn"], start=c(1926,1), frequency=12)
head(ibmind)
## ibmrtn vwrtn ewrtn sprtn
## Jan 1926 -0.010381 0.000724 0.023174 0.022472
## Feb 1926 -0.024476 -0.033374 -0.053510 -0.043956
## Mar 1926 -0.115591 -0.064341 -0.096824 -0.059113
## Apr 1926 0.089783 0.038358 0.032946 0.022688
## May 1926 0.036932 0.012172 0.001035 0.007679
## Jun 1926 0.068493 0.056888 0.050487 0.043184
plot(vw, main="CRSP Value Weighted Index Monthly Return")
abline(h=0, col="gray")
forecast::Acf(vw, main="ACF")
pacf(vw, main="PACF")
2/sqrt(length(vw))
## [1] 0.06337243
tmp <- pacf(vw, main="", plot=FALSE, lag.max=12)
cbind(k=round(tmp$lag*12), pacf=round(tmp$acf, 4))
## k pacf
## [1,] 1 0.1154
## [2,] 2 -0.0304
## [3,] 3 -0.1025
## [4,] 4 0.0326
## [5,] 5 0.0618
## [6,] 6 -0.0502
## [7,] 7 0.0312
## [8,] 8 0.0517
## [9,] 9 0.0635
## [10,] 10 0.0053
## [11,] 11 -0.0052
## [12,] 12 0.0109
gnprate <- diff(log(gnp))
resm <- ar(gnprate, method="mle"); resm
##
## Call:
## ar(x = gnprate, method = "mle")
##
## Coefficients:
## 1 2 3 4 5 6 7 8
## 0.4318 0.1985 -0.1180 0.0189 -0.1607 0.0900 0.0615 -0.0814
## 9
## 0.1940
##
## Order selected 9 sigma^2 estimated as 8.918e-05
plot(as.numeric(names(resm$aic)), resm$aic, type="h",
xlab="k", ylab="AIC")
tmp.T <- length(gnprate)
tmp.bic <- resm$aic + as.numeric(names(resm$aic)) * (log(tmp.T) - 2)/tmp.T
plot(as.numeric(names(resm$aic)), tmp.bic, type="h",
xlab="k", ylab="BIC")
# `as.numeric(names(resm$aic))`是一个R语言的表达式,用于将`names(resm$aic)`的结果转换为数值类型。`names(resm$aic)`是一个函数,用于提取`resm$aic`的名称,即该模型中的参数个数。例如,如果`resm$aic`的名称是"ar1",表示该模型只有一个自回归参数,那么`names(resm$aic)`的结果就是"ar1",而`as.numeric(names(resm$aic))`的结果就是1。这样,我们就可以用这个数值来计算BIC公式中的k项。
案例:CRSP价值加权指数月度收益率
步骤一:定阶
resm <- ar(vw, method="mle"); resm
##
## Call:
## ar(x = vw, method = "mle")
##
## Coefficients:
## 1 2 3 4 5 6 7 8
## 0.1167 -0.0112 -0.1126 0.0217 0.0735 -0.0452 0.0254 0.0462
## 9
## 0.0660
##
## Order selected 9 sigma^2 estimated as 0.002831
round(resm$aic, 2)
## 0 1 2 3 4 5 6 7 8 9 10 11 12
## 22.33 10.99 12.07 3.35 4.37 2.46 1.96 3.04 2.24 0.00 1.97 3.94 5.81
mean.vw <- mean(vw); mean.vw
## [1] 0.00890439
resm2 <- arima(vw, order=c(3,0,0))
resm2
##
## Call:
## arima(x = vw, order = c(3, 0, 0))
##
## Coefficients:
## ar1 ar2 ar3 intercept
## 0.1158 -0.0187 -0.1042 0.0089
## s.e. 0.0315 0.0317 0.0317 0.0017
##
## sigma^2 estimated as 0.002875: log likelihood = 1500.86, aic = -2991.73
(1 + mean.vw)^12 - 1
## [1] 0.1122442
((1 + mean.vw)^12)^83 - 1
## [1] 6832.002
prod(1+vw)-1
## [1] 1591.953
prod(1 + vw)^(1/83) - 1
## [1] 0.09290084
R的forecast包提供了一个auto.arima()函数,
可以自动进行模型选择。
tseries包的arma()函数可以用条件极小二乘方法估计ARMA模型参数,
使用了optim()函数来求极值。结果可以用summary()显示。
tseries::arma()的结果还支持print(),
plot(), coef(),vcov(),
residuals(), fitted()等信息提取函数。
模型检验
resm2 <- arima(vw, order=c(3,0,0))
Box.test(resm2$residuals, lag=12, type="Ljung", fitdf=3)
##
## Box-Ljung test
##
## data: resm2$residuals
## X-squared = 16.352, df = 9, p-value = 0.05988
resm3 <- arima(vw, order=c(3,0,0), fixed=c(NA, 0, NA, NA)); resm3
## Warning in arima(vw, order = c(3, 0, 0), fixed = c(NA, 0, NA, NA)): some AR
## parameters were fixed: setting transform.pars = FALSE
##
## Call:
## arima(x = vw, order = c(3, 0, 0), fixed = c(NA, 0, NA, NA))
##
## Coefficients:
## ar1 ar2 ar3 intercept
## 0.1136 0 -0.1063 0.0089
## s.e. 0.0313 0 0.0315 0.0017
##
## sigma^2 estimated as 0.002876: log likelihood = 1500.69, aic = -2993.38
abs(polyroot(c(1, -coef(resm3)[1:3])))
## [1] 2.031585 2.279433 2.031585
Box.test(resm3$residuals, lag=12, type="Ljung", fitdf=2)
##
## Box-Ljung test
##
## data: resm3$residuals
## X-squared = 16.828, df = 10, p-value = 0.07827
checkresidualsforecast::checkresiduals(resm3)
##
## Ljung-Box test
##
## data: Residuals from ARIMA(3,0,0) with non-zero mean
## Q* = 46.896, df = 21, p-value = 0.0009699
##
## Model df: 3. Total lags used: 24
tsdiag()tsdiag(resm3)
- 拟合优度
r2_1 <- 1 - resm2$sigma2/var(vw) ;r2_1
## [1] 0.02579431
r2_2 <- 1 - resm3$sigma2/var(vw) ;r2_2
## [1] 0.02545196
resm4 <- arima(vw[1:984], order=c(3,0,0))
pred4 <- predict(resm4, n.ahead=12, se.fit=TRUE)
cbind(Observed=round(c(vw[985:996]), 4),
Predict=round(c(pred4$pred), 4),
SE=round(c(pred4$se), 3))
## Observed Predict SE
## [1,] -0.0623 0.0075 0.053
## [2,] -0.0220 0.0160 0.054
## [3,] -0.0105 0.0117 0.054
## [4,] 0.0511 0.0098 0.054
## [5,] 0.0238 0.0088 0.054
## [6,] -0.0786 0.0092 0.054
## [7,] -0.0132 0.0094 0.054
## [8,] 0.0110 0.0096 0.054
## [9,] -0.0981 0.0095 0.054
## [10,] -0.1847 0.0095 0.054
## [11,] -0.0852 0.0095 0.054
## [12,] 0.0215 0.0095 0.054
plot(window(vw, start=c(2007, 1), end=c(2008,12)),
ylab="", type="b", ylim=c(-0.2, 0.2), lwd=2)
lines(ts(c(pred4$pred), start=c(2008,1), frequency = 12),
col="red", lwd=1, lty=2, type="b", pch=2)
lines(ts(c(pred4$pred) - 2*c(pred4$se), start=c(2008,1), frequency = 12),
col="blue", lwd=2, lty=3, type="l")
lines(ts(c(pred4$pred) + 2*c(pred4$se), start=c(2008,1), frequency = 12),
col="blue", lwd=2, lty=3, type="l")
Note that the echo = FALSE parameter was added to the
code chunk to prevent printing of the R code that generated the
plot.