多元时间序列的一些计算使用蔡瑞胸(R.S. Tsay)教授的MTS扩展包。
考虑IBM股票与标普500指数从1926年1月到2008年12月的月对数收益率, 共996个观测。单位为百分之一。
library(readr)
library(zoo)
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(xts)
library(lubridate)
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
da1 <- read_table(
"D:/齐安静 教学/时间序列分析/北大/ftsdata/m-ibm3dx2608.txt",
col_types=cols(.default=col_double(),
date=col_date(format="%Y%m%d")))
ts.mibm <- ts(100*log(1 + da1[["ibmrtn"]]),
start=c(1926,1), frequency=12)
ts.msp5 <- ts(100*log(1 + da1[["sprtn"]]),
start=c(1926,1), frequency=12)
plot(ts.mibm, xlab="Year", ylab="log(IBM Return)")
plot(ts.msp5, xlab="Year", ylab="log(SP500 Return)")
- 两个序列同时刻的散点图:
plot(as.vector(ts.msp5), as.vector(ts.mibm),
xlab="标普500[t]", ylab="IBM[t]")
cor.test(as.vector(ts.msp5), as.vector(ts.mibm))
##
## Pearson's product-moment correlation
##
## data: as.vector(ts.msp5) and as.vector(ts.mibm)
## t = 26.625, df = 994, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.6074247 0.6800598
## sample estimates:
## cor
## 0.6451977
plot(as.vector(ts.msp5)[-length(ts.mibm)],
as.vector(ts.mibm)[-1],
xlab="标普500[t-1]", ylab="IBM[t]")
- IBM对领先一个月的标普500的相关系数
cor.test(as.vector(ts.msp5)[-length(ts.mibm)],
as.vector(ts.mibm)[-1])
##
## Pearson's product-moment correlation
##
## data: as.vector(ts.msp5)[-length(ts.mibm)] and as.vector(ts.mibm)[-1]
## t = 3.093, df = 993, p-value = 0.002037
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.03575303 0.15886892
## sample estimates:
## cor
## 0.09768469
plot(as.vector(ts.mibm)[-length(ts.mibm)],
as.vector(ts.msp5)[-1],
xlab="IBM[t-1]", ylab="标普500[t]")
cor.test(as.vector(ts.mibm)[-length(ts.mibm)],
as.vector(ts.msp5)[-1])
##
## Pearson's product-moment correlation
##
## data: as.vector(ts.mibm)[-length(ts.mibm)] and as.vector(ts.msp5)[-1]
## t = 1.1803, df = 993, p-value = 0.2382
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.02477745 0.09934653
## sample estimates:
## cor
## 0.0374289
library(MTS)
MTS::MTSplot(cbind(ts.mibm, ts.msp5))
- 计算两个序列的单样本简单样本统计量:
fBasics::basicStats(cbind(ts.mibm, ts.msp5))
## ts.mibm ts.msp5
## nobs 996.000000 996.000000
## NAs 0.000000 0.000000
## Minimum -30.368274 -35.585100
## Maximum 38.566232 35.222044
## 1. Quartile -2.800141 -2.032465
## 3. Quartile 5.039744 3.546559
## Mean 1.089135 0.430068
## Median 1.095871 0.897907
## Sum 1084.778282 428.348045
## SE Mean 0.222860 0.175458
## LCL Mean 0.651806 0.085759
## UCL Mean 1.526464 0.774378
## Variance 49.467724 30.662294
## Stdev 7.033329 5.537354
## Skewness -0.067766 -0.521287
## Kurtosis 2.621657 7.926907
ccm()函数显示简约的互相关阵序列和图形。ccm(cbind(ts.mibm, ts.msp5), level=TRUE)
## [1] "Covariance matrix:"
## ts.mibm ts.msp5
## ts.mibm 49.5 25.1
## ts.msp5 25.1 30.7
## CCM at lag: 0
## [,1] [,2]
## [1,] 1.000 0.645
## [2,] 0.645 1.000
## Simplified matrix:
## CCM at lag: 1
## . +
## . +
## Correlations:
## [,1] [,2]
## [1,] 0.0443 0.0976
## [2,] 0.0374 0.0836
## CCM at lag: 2
## . -
## . .
## Correlations:
## [,1] [,2]
## [1,] 0.00071 -0.0779
## [2,] 0.01505 -0.0178
## CCM at lag: 3
## . .
## . -
## Correlations:
## [,1] [,2]
## [1,] -0.0139 -0.0581
## [2,] -0.0524 -0.0951
## CCM at lag: 4
## . .
## . .
## Correlations:
## [,1] [,2]
## [1,] -0.0282 -0.0312
## [2,] 0.0384 0.0269
## CCM at lag: 5
## . +
## . +
## Correlations:
## [,1] [,2]
## [1,] 0.02370 0.0812
## [2,] 0.00452 0.0890
## CCM at lag: 6
## . .
## . .
## Correlations:
## [,1] [,2]
## [1,] -0.0397 -0.0166
## [2,] -0.0390 -0.0154
## CCM at lag: 7
## . .
## . .
## Correlations:
## [,1] [,2]
## [1,] 0.00822 -0.0109
## [2,] 0.01813 0.0266
## CCM at lag: 8
## . .
## + .
## Correlations:
## [,1] [,2]
## [1,] 0.0634 0.0301
## [2,] 0.0717 0.0480
## CCM at lag: 9
## . .
## . +
## Correlations:
## [,1] [,2]
## [1,] 0.0515 0.0331
## [2,] 0.0482 0.0749
## CCM at lag: 10
## . .
## . .
## Correlations:
## [,1] [,2]
## [1,] 0.0383 0.0256
## [2,] 0.0166 0.0133
## CCM at lag: 11
## . .
## . .
## Correlations:
## [,1] [,2]
## [1,] 0.0207 0.00321
## [2,] 0.0237 -0.00659
## CCM at lag: 12
## . .
## . .
## Correlations:
## [,1] [,2]
## [1,] 0.0122 -0.0118
## [2,] 0.0225 0.0203
## Hit Enter for p-value plot of individual ccm:
##### 案例:美国国债
考虑各个期限的美国债券指数的月简单收益率, 包括30年、20年、10年、5年和1年期限, 数据来自CRSP数据库, 从1942年1月值1999年12月,共696个观测值。 读入数据后转换为对数收益率, 单位为百分之一。
da2 <- read_table(
"D:/齐安静 教学/时间序列分析/北大/ftsdata/m-bnd.txt",
col_types=cols(.default=col_double()))
ts.mbnd <- ts(log(1 + da2)*100, start=c(1942,1), frequency=12)
序列的基本统计:
summary(ts.mbnd)
## 30yrs 20yrs 10yrs 5yrs
## Min. :-8.0440 Min. :-8.7804 Min. :-6.9050 Min. :-5.9771
## 1st Qu.:-0.8146 1st Qu.:-0.7186 1st Qu.:-0.5882 1st Qu.:-0.1133
## Median : 0.2228 Median : 0.2337 Median : 0.2482 Median : 0.2297
## Mean : 0.4024 Mean : 0.4229 Mean : 0.4284 Mean : 0.4489
## 3rd Qu.: 1.5366 3rd Qu.: 1.4728 3rd Qu.: 1.2825 3rd Qu.: 0.9425
## Max. :12.4993 Max. :14.1803 Max. : 9.5301 Max. :10.0858
## 1yr
## Min. :-1.7360
## 1st Qu.: 0.1049
## Median : 0.3404
## Mean : 0.4408
## 3rd Qu.: 0.6372
## Max. : 5.4545
plot(as.xts(ts.mbnd), type="l", multi.panel=TRUE, theme="white",
main="美国国债月对数收益率(%)",
major.ticks="years",
grid.ticks.on = "auto")
colMeans(ts.mbnd)
## 30yrs 20yrs 10yrs 5yrs 1yr
## 0.4024412 0.4228871 0.4283688 0.4488915 0.4407712
样本标准差:
apply(ts.mbnd, 2, sd)
## 30yrs 20yrs 10yrs 5yrs 1yr
## 2.5014721 2.3970918 1.9521371 1.3783452 0.5282478
同步相关阵:
round(cor(ts.mbnd), 2)
## 30yrs 20yrs 10yrs 5yrs 1yr
## 30yrs 1.00 0.98 0.92 0.85 0.63
## 20yrs 0.98 1.00 0.91 0.86 0.64
## 10yrs 0.92 0.91 1.00 0.90 0.67
## 5yrs 0.85 0.86 0.90 1.00 0.81
## 1yr 0.63 0.64 0.67 0.81 1.00
library("ggcorrplot")
## Loading required package: ggplot2
ggcorrplot::ggcorrplot(cor(ts.mbnd),
hc.order=TRUE, lab=TRUE)
可以看出相关性都很强, 而且长期国债之间的相关比短期国债之间的相关要高.
作CCM统计表和图形:
ccm函数的源代码
"ccm" <- function(x, lags=12, level=FALSE, output=TRUE){
# Compute and plot the cross-correlation matrices.
# lags: number of lags used.
# level: logical unit for printing.
#
if(!is.matrix(x)) x = as.matrix(x)
nT=dim(x)[1]; k=dim(x)[2]
if(output){
opar <- par(mfcol=c(k,k))
on.exit(par(opar))
}
if(lags < 1)lags=1
# remove the sample means
y = scale(x,center=TRUE,scale=FALSE)
V1 = cov(y)
if(output){
print("Covariance matrix:")
print(V1,digits=3)
}
se = sqrt(diag(V1))
SD = diag(1/se)
S0 = SD %*% V1 %*% SD
## S0 used later
ksq = k*k
wk = matrix(0, ksq, lags+1)
wk[,1] = c(S0)
j=0
if(output){
cat("CCM at lag: ",j,"\n")
print(S0, digits=3)
cat("Simplified matrix:","\n")
}
y = y %*% SD
crit = 2.0/sqrt(nT)
for (j in 1:lags){
y1 = y[1:(nT-j),]
y2 = y[(j+1):nT,]
Sj = t(y2) %*% y1 / nT
Smtx = matrix(".", k, k)
for (ii in 1:k){
for (jj in 1:k){
if(Sj[ii,jj] > crit) Smtx[ii,jj] = "+"
if(Sj[ii,jj] < -crit) Smtx[ii,jj] = "-"
}
}
if(output){
cat("CCM at lag: ", j, "\n")
for (ii in 1:k){
cat(Smtx[ii,],"\n")
}
if(level){
cat("Correlations:","\n")
print(Sj,digits=3)
}
} ## end of if-(output) statement
wk[, j+1] = c(Sj)
}
if(output){
iik <- rep(1:k, k)
jjk <- rep(1:k, each=k)
tdx=c(0,1:lags)
jcnt=0
if(k > 10){
print("Skip the plots due to high dimension!")
} else {
for (j in 1:ksq){
plot(tdx, wk[j,], type='h',
xlab='lag',
ylab=paste('ccf(', iik[j], ",", jjk[j], ")"),
ylim=c(-1,1))
abline(h=c(0))
crit=2/sqrt(nT)
abline(h=c(crit),lty=2)
abline(h=c(-crit),lty=2)
jcnt=jcnt+1
}
}
## end of if-(output) statement
}
## The following p-value plot was added on May 16, 2012 by Ruey Tsay.
### Obtain a p-value plot of ccm matrix
r0i=solve(S0)
R0=kronecker(r0i,r0i)
pv=rep(0,lags)
for (i in 1:lags){
tmp=matrix(wk[,(i+1)],ksq,1)
tmp1=R0%*%tmp
ci=crossprod(tmp,tmp1)*nT*nT/(nT-i)
pv[i]=1-pchisq(ci,ksq)
}
if(output){
par(opar)
plot(pv,xlab='lag',ylab='p-value',ylim=c(0,1))
abline(h=c(0))
abline(h=c(0.05),col="blue")
title(main="Significance plot of CCM")
}
ccm <- list(ccm=wk,pvalue=pv)
}
# ccm(ts.mbnd)
MTS::mq()计算多元混成检验:MTS::mq(cbind(ts.mibm, ts.msp5), lag=12)
## Ljung-Box Statistics:
## m Q(m) df p-value
## [1,] 1.0 10.9 4.0 0.03
## [2,] 2.0 23.3 8.0 0.00
## [3,] 3.0 33.5 12.0 0.00
## [4,] 4.0 40.4 16.0 0.00
## [5,] 5.0 54.2 20.0 0.00
## [6,] 6.0 56.3 24.0 0.00
## [7,] 7.0 59.0 28.0 0.00
## [8,] 8.0 65.1 32.0 0.00
## [9,] 9.0 73.8 36.0 0.00
## [10,] 10.0 75.5 40.0 0.00
## [11,] 11.0 77.0 44.0 0.00
## [12,] 12.0 79.2 48.0 0.00
MTS::mq(ts.mbnd, lag=12)
## Ljung-Box Statistics:
## m Q(m) df p-value
## [1,] 1 279 25 0
## [2,] 2 454 50 0
## [3,] 3 712 75 0
## [4,] 4 861 100 0
## [5,] 5 1076 125 0
## [6,] 6 1268 150 0
## [7,] 7 1412 175 0
## [8,] 8 1623 200 0
## [9,] 9 1835 225 0
## [10,] 10 2009 250 0
## [11,] 11 2213 275 0
## [12,] 12 2366 300 0