长记忆模型

分数差分序列的模拟

  1. d=0.3,选择ma
# 加载fracdiff包
library(fracdiff)

# 定义参数
d <- 0.3 # 分数差分的阶数
n <- 1000 # 时间序列的长度
sigma <- 1 # 白噪声的标准差
set.seed(123) # 设置随机数种子

# 生成分数差分序列
fds <- fracdiff.sim(n=n,ar=NULL, d=d, sigma)

fds_series <- fds$series

plot(fds_series,  type="l", ylab = "FDS")

forecast::Acf(fds_series, main="", lag.max=300)
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo

2. d=0.3,选择ar

# 加载fracdiff包
library(fracdiff)

# 定义参数
d <- 0.3 # 分数差分的阶数
n <- 1000 # 时间序列的长度
sigma <- 1 # 白噪声的标准差
set.seed(123) # 设置随机数种子

# 生成分数差分序列
fds <- fracdiff.sim(n=n,ma=NULL, d=d, sigma)
## Warning in fracdiff.sim(n = n, ma = NULL, d = d, sigma): 'ar' part of fracdiff
## model is not stationary!!
fds_series <- fds$series

plot(fds_series,  type="l", ylab = "FDS")

  1. d=-0.2,选择ma
# 加载fracdiff包
library(fracdiff)

# 定义参数
d <- -0.3 # 分数差分的阶数
n <- 1000 # 时间序列的长度
sigma <- 1 # 白噪声的标准差
set.seed(123) # 设置随机数种子

# 生成分数差分序列
fds <- fracdiff.sim(n=n,ar=NULL, d=d, sigma)

fds_series <- fds$series

plot(fds_series, type="l", ylab = "FDS")

forecast::Acf(fds_series, main="", lag.max=300)

  1. d=-0.2,选择ar
# 加载fracdiff包
library(fracdiff)

# 定义参数
d <- -0.2 # 分数差分的阶数
n <- 1000 # 时间序列的长度
sigma <- 1 # 白噪声的标准差
set.seed(123)

# 生成分数差分序列
fds <- fracdiff.sim(n=n,ma=NULL, d=d, sigma)
## Warning in fracdiff.sim(n = n, ma = NULL, d = d, sigma): 'ar' part of fracdiff
## model is not stationary!!
fds_series <- fds$series

plot(fds_series,  type="l", ylab = "FDS")

forecast::Acf(fds_series, main="", lag.max=300)

5. d=0.6,选择ma

# 加载fracdiff包
library(fracdiff)

# 定义参数
# d <- 0.6 # 分数差分的阶数
# n <- 1000 # 时间序列的长度
# sigma <- 1 # 白噪声的标准差
# set.seed(123) # 设置随机数种子
# 
# # 生成分数差分序列
# fds <- fracdiff.sim(n=n,ar=NULL, d=d, sigma)
# 
# fds_series <- fds$series
# 
# plot(fds_series, type="l", ylab = "FDS")
CRSP价值加权指数和等权指数的日简单收益率(1970-2008)
  • 读入数据,计算日收益率的绝对值:
library(readr)
library(zoo)
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
library(xts)
library(lubridate)
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
da <- read_table(
  "D:/齐安静 教学/时间序列分析/北大/ftsdata/d-ibm3dx7008.txt",
  col_types=cols(.default=col_double(),
                 Date=col_date("%Y%m%d"))
)
xts.crspw <- xts(da[,-1], da$Date)
vw <- abs(da$vwretd)
ew <- abs(da$ewretd)
  • 价值加权日收益率绝对值序列的ACF:
forecast::Acf(vw, main="", lag.max=300)

  • 等加权日收益率绝对值序列的ACF:
forecast::Acf(ew, main="", lag.max=300)

  • 函数fracdiff::fdGPH()计算差分阶的Geweke-Porter-Hudak估计值:
fracdiff::fdGPH(vw)
## $d
## [1] 0.372226
## 
## $sd.as
## [1] 0.0698385
## 
## $sd.reg
## [1] 0.06868857
  • 用fracdiff::fracdiff()函数进行ARFIMA模型估计:
mres <- fracdiff::fracdiff(vw, nar=1, nma=1)
summary(mres)
## 
## Call:
##   fracdiff::fracdiff(x = vw, nar = 1, nma = 1) 
## 
## Coefficients:
##    Estimate Std. Error z value Pr(>|z|)    
## d  0.490938   0.007997   61.39   <2e-16 ***
## ar 0.113389   0.005988   18.94   <2e-16 ***
## ma 0.575895   0.005946   96.85   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## sigma[eps] = 0.0065619 
## [d.tol = 0.0001221, M = 100, h = 0.0003742]
## Log likelihood: 3.551e+04 ==> AIC = -71021.02 [4 deg.freedom]
  • 查看预测值
library(fracdiff)
library(readr)
library(zoo)
library(xts)

# read the data file
da <- read_table(
  "D:/齐安静 教学/时间序列分析/北大/ftsdata/d-ibm3dx7008.txt",
  col_types=cols(.default=col_double(),
                 Date=col_date(format="%Y%m%d")))

# calculate the absolute value of the daily return
vw <- abs(da$vwretd)

# split the data into estimation and prediction parts
n <- length(vw) # total number of observations
m <- 5 # number of observations for prediction
vw.est <- vw[1:(n-m)] # estimation data
vw.pre <- vw[(n-m+1):n] # prediction data

# estimate the fractional order by fdGPH()
res <- fdGPH(vw.est)
d <- res$d # fractional order
d
## [1] 0.3613705
sd <- res$sd.as # standard error

# 使用fracdiff函数拟合分数差分模型,并得到模型的系数和残差
fdmodel <- fracdiff(vw.est, nar = 0, nma = 0, drange = res$d)
# 使用forecast函数对模型进行预测,并得到预测值和置信区间
library(forecast)
fcst <- forecast(fdmodel, h = 5) # h是预测的期数,可以自己设定
print(fcst)
##      Point Forecast       Lo 80      Hi 80       Lo 95      Hi 95
## 9841     0.01560617 0.006969813 0.02424254 0.002397999 0.02881435
## 9842     0.01642301 0.007573875 0.02527214 0.002889427 0.02995659
## 9843     0.01666490 0.007737461 0.02559233 0.003011561 0.03031823
## 9844     0.01672195 0.007751796 0.02569210 0.003003284 0.03044061
## 9845     0.01669646 0.007698680 0.02569425 0.002935541 0.03045739
print(vw.pre)
## [1] 0.004463 0.007170 0.004481 0.024951 0.017513
plot(window(vw, start=c(9840), end=c(9845)),
     ylab="", type="b", ylim=c(0.0, 0.035), xlab="Time",lwd=2, xaxt = "n") # 隐藏默认横坐标
lines(ts(c(fcst$mean),start = c(2)), 
      col="red", lwd=1, lty=2, type="b", pch=2)
lines(ts(c(fcst$lower[,2]), start=c(2)), 
      col="blue", lwd=2, lty=3, type="l")
lines(ts(c(fcst$uppe[,2]), start=c(2)), 
      col="blue", lwd=2, lty=3, type="l")

  • fracdiff函数的样本外预测效果和参数设定有关
  • 设定ar和ma的阶数为1
# 使用fracdiff函数拟合分数差分模型,并得到模型的系数和残差
fdmodel <- fracdiff(vw.est, nar=1, nma=1)
# 使用forecast函数对模型进行预测,并得到预测值和置信区间
library(forecast)
fcst <- forecast(fdmodel, h = 5) # h是预测的期数,可以自己设定

summary(fcst)
## 
## Forecast method: ARFIMA(1,0.49,1)
## 
## Model Information:
## 
## Call:
##   fracdiff(x = vw.est, nar = 1, nma = 1) 
## 
## Coefficients:
##         d        ar        ma 
## 0.4919193 0.1149709 0.5797157 
## sigma[eps] = 0.006558008 
## a list with components:
##  [1] "log.likelihood"  "n"               "msg"             "d"              
##  [5] "ar"              "ma"              "covariance.dpq"  "fnormMin"       
##  [9] "sigma"           "stderror.dpq"    "correlation.dpq" "h"              
## [13] "d.tol"           "M"               "hessian.dpq"     "length.w"       
## [17] "residuals"       "fitted"          "call"            "x"              
## 
## Error measures:
##                        ME        RMSE         MAE       MPE     MAPE      MASE
## Training set 5.323205e-05 0.006553912 0.004443025 -494.8609 525.7161 0.7337654
##                     ACF1
## Training set -0.00250303
## 
## Forecasts:
##      Point Forecast      Lo 80      Hi 80       Lo 95      Hi 95
## 9841     0.02150384 0.01310466 0.02990301 0.008658405 0.03434927
## 9842     0.02052644 0.01212416 0.02892872 0.007676268 0.03337662
## 9843     0.02032866 0.01189617 0.02876114 0.007432285 0.03322503
## 9844     0.02041473 0.01193897 0.02889048 0.007452179 0.03337728
## 9845     0.02054435 0.01202616 0.02906253 0.007516910 0.03357179
plot(window(vw, start=c(9840), end=c(9845)),
     ylab="", type="b", ylim=c(0.0, 0.035), xlab="Time",lwd=2, xaxt = "n") # 隐藏默认横坐标
lines(ts(c(fcst$mean),start = c(2)), 
      col="red", lwd=1, lty=2, type="b", pch=2)
lines(ts(c(fcst$lower[,2]), start=c(2)), 
      col="blue", lwd=2, lty=3, type="l")
lines(ts(c(fcst$uppe[,2]), start=c(2)), 
      col="blue", lwd=2, lty=3, type="l")

  • 更换参数
# 使用fracdiff函数拟合分数差分模型,并得到模型的系数和残差
fdmodel <- fracdiff(vw.est, nar=0, nma=0)
# 使用forecast函数对模型进行预测,并得到预测值和置信区间
library(forecast)
fcst <- forecast(fdmodel, h = 5) # h是预测的期数,可以自己设定

summary(fcst)
## 
## Forecast method: ARFIMA(0,0.2,0)
## 
## Model Information:
## 
## Call:
##   fracdiff(x = vw.est, nar = 0, nma = 0) 
## 
## Coefficients:
##         d 
## 0.2012896 
## sigma[eps] = 0.006743346 
## a list with components:
##  [1] "log.likelihood"  "n"               "msg"             "d"              
##  [5] "ar"              "ma"              "covariance.dpq"  "fnormMin"       
##  [9] "sigma"           "stderror.dpq"    "correlation.dpq" "h"              
## [13] "d.tol"           "M"               "hessian.dpq"     "length.w"       
## [17] "residuals"       "fitted"          "call"            "x"              
## 
## Error measures:
##                       ME        RMSE         MAE       MPE     MAPE      MASE
## Training set 7.35239e-05 0.006732825 0.004548547 -526.2058 556.6262 0.7511924
##                    ACF1
## Training set -0.1458025
## 
## Forecasts:
##      Point Forecast       Lo 80      Hi 80       Lo 95      Hi 95
## 9841     0.01537600 0.006747537 0.02400446 0.002179905 0.02857209
## 9842     0.01603366 0.007232129 0.02483519 0.002572881 0.02949444
## 9843     0.01619033 0.007327192 0.02505347 0.002635330 0.02974533
## 9844     0.01619301 0.007296876 0.02508914 0.002587549 0.02979847
## 9845     0.01612987 0.007212666 0.02504707 0.002492186 0.02976755
plot(window(vw, start=c(9840), end=c(9845)),
     ylab="", type="b", ylim=c(0.0, 0.035), xlab="Time",lwd=2, xaxt = "n") # 隐藏默认横坐标
lines(ts(c(fcst$mean),start = c(2)), 
      col="red", lwd=1, lty=2, type="b", pch=2)
lines(ts(c(fcst$lower[,2]), start=c(2)), 
      col="blue", lwd=2, lty=3, type="l")
lines(ts(c(fcst$uppe[,2]), start=c(2)), 
      col="blue", lwd=2, lty=3, type="l")

模型比较和平均

library(xts)
library(lubridate)
library(readr)
da <- read_table(
  "D:/齐安静 教学/时间序列分析/北大/ftsdata/m-deciles08.txt",
  col_types=cols(.default = col_double(),
                 date=col_date("%Y%m%d")))
xts.dec10 <- xts(da[["CAP1RET"]], ymd(da[["date"]]))
dec10 <- ts(da[["CAP1RET"]], start=c(1970, 1), frequency=12)
resm1 <- arima(dec10, order=c(1,0,1), 
               seasonal=list(order=c(1,0,1), period=12))
resm1
## 
## Call:
## arima(x = dec10, order = c(1, 0, 1), seasonal = list(order = c(1, 0, 1), period = 12))
## 
## Coefficients:
##           ar1     ma1    sar1     sma1  intercept
##       -0.0639  0.2508  0.9882  -0.9142     0.0117
## s.e.   0.2205  0.2130  0.0092   0.0332     0.0125
## 
## sigma^2 estimated as 0.004704:  log likelihood = 584.69,  aic = -1157.39
jan <- as.numeric(c(cycle(dec10))==1)
lm1 <- lm(c(dec10) ~ jan); summary(lm1)
## 
## Call:
## lm(formula = c(dec10) ~ jan)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.30861 -0.03475 -0.00176  0.03254  0.40671 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 0.002864   0.003333   0.859    0.391    
## jan         0.125251   0.011546  10.848   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.06904 on 466 degrees of freedom
## Multiple R-squared:  0.2016, Adjusted R-squared:  0.1999 
## F-statistic: 117.7 on 1 and 466 DF,  p-value: < 2.2e-16
jan <- as.numeric(c(cycle(dec10))==1)
resm2 <- arima(
  dec10, xreg=jan, seasonal=list(order=c(1,0,1), period=12))
resm2
## 
## Call:
## arima(x = dec10, seasonal = list(order = c(1, 0, 1), period = 12), xreg = jan)
## 
## Coefficients:
##          sar1    sma1  intercept     jan
##       -0.0920  0.2192     0.0027  0.1248
## s.e.   0.3585  0.3502     0.0037  0.0127
## 
## sigma^2 estimated as 0.004671:  log likelihood = 591.56,  aic = -1173.12
cat("模型1的标准误:",sqrt(0.004704),"\n")
## 模型1的标准误: 0.06858571
cat("模型2的标准误:",0.06904,"\n")
## 模型2的标准误: 0.06904
cat("模型3的标准误:",sqrt(0.004671))
## 模型3的标准误: 0.06834471
案例:美国GDP季度数据(1947-2010)
  • 读入数据
da <- read_table("D:/齐安静 教学/时间序列分析/北大/ftsdata/q-gdpc96.txt")
## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   Year = col_double(),
##   Mon = col_character(),
##   Day = col_character(),
##   gdp = col_double()
## )
gdp <- ts(log(da[["gdp"]]), start=c(1947,1), frequency=4)
dgdp <- diff(gdp)
  • 时序图
plot(gdp, xlab="Year", ylab="ln(GDP)")

  • 差分时序图
plot(dgdp, xlab="Year", ylab="ln(GDP)")
abline(h=0, col="gray", lty=3)

  • ACF
forecast::Acf(dgdp, main="")

  • PACF
pacf(dgdp, main="")

- AIC对ar模型定阶

ar(dgdp, method="mle")
## 
## Call:
## ar(x = dgdp, method = "mle")
## 
## Coefficients:
##       1        2        3  
##  0.3461   0.1299  -0.1224  
## 
## Order selected 3  sigma^2 estimated as  8.324e-05
  • 建立AR(3)模型
resm1 <- arima(dgdp, order=c(3,0,0)); resm1
## 
## Call:
## arima(x = dgdp, order = c(3, 0, 0))
## 
## Coefficients:
##          ar1     ar2      ar3  intercept
##       0.3462  0.1299  -0.1225     0.0079
## s.e.  0.0623  0.0655   0.0624     0.0009
## 
## sigma^2 estimated as 8.323e-05:  log likelihood = 829.23,  aic = -1648.45
  • 考虑是季度数据,有周期,模型可以考虑增加周期
resm2 <- arima(dgdp, order=c(3,0,0), 
               seasonal=list(order=c(1,0,1), period=4))
resm2
## 
## Call:
## arima(x = dgdp, order = c(3, 0, 0), seasonal = list(order = c(1, 0, 1), period = 4))
## 
## Coefficients:
##          ar1     ar2      ar3    sar1     sma1  intercept
##       0.3305  0.1521  -0.1103  0.4966  -0.5865     0.0079
## s.e.  0.0633  0.0668   0.0635  0.2578   0.2357     0.0008
## 
## sigma^2 estimated as 8.24e-05:  log likelihood = 830.47,  aic = -1646.93
  • 比较预测误差
  • 模型1
cat("==== Model 1\n")
## ==== Model 1
path = "D:/My documents/rmarkdown"
setwd(path)
source('backtest.R')


res1 <- backtest(resm1, dgdp, n_min_sample=215, h=1)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr   1.1.4     ✔ stringr 1.5.1
## ✔ forcats 1.0.0     ✔ tibble  3.2.1
## ✔ ggplot2 3.4.4     ✔ tidyr   1.3.0
## ✔ purrr   1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::first()  masks xts::first()
## ✖ dplyr::lag()    masks stats::lag()
## ✖ dplyr::last()   masks xts::last()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
print(res1$stats)
## # A tibble: 1 × 3
##   n_ahead    rmse     mae
##     <dbl>   <dbl>   <dbl>
## 1       1 0.00615 0.00443
  • 模型2
cat("==== Model 2\n")
## ==== Model 2
res2 <- backtest(resm2, dgdp, n_min_sample=215, h=1)
print(res2$stats)
## # A tibble: 1 × 3
##   n_ahead    rmse     mae
##     <dbl>   <dbl>   <dbl>
## 1       1 0.00632 0.00455