library(xts)
library(zoo)
library(tseries)
library(timeSeries)
library(forecast)
library(quantmod)
library(fGarch)
library(ggplot2)
setwd("C:/Users/User/Google 雲端硬碟/政治大學ECO/時間序列/學號對應資料序列")
hw<-read.csv("106258017.csv")
hwx<-as.ts(hw$x)
hwy<-as.ts(hw$y)
y值
(a).
下圖為時間序列圖,呈現下降的趨勢。
簡單統基分析中,Mean為6.116,Q1為3.285,Q2為6.355,Q3為8.601,最小值為-1.500,最大值為14.275。偏態係數為0.09970972,呈現右偏態。峰態係數為-0.6760822,呈現低闊峰。
plot(hwy)

summary(hwy)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -1.500 3.285 6.355 6.116 8.601 14.275
skewness(hwy)
## [1] 0.09970972
## attr(,"method")
## [1] "moment"
kurtosis(hwy)
## [1] -0.6760822
## attr(,"method")
## [1] "excess"
(b).
下圖為ACF和PACF圖,從PACF圖中,t期與t-8期有很大的cov,因此初步估計為ARMA(8,0)模型。


(c).
從ACF中,發現其cov大約每八期為一個周期,因此我認為有季節性因素,從時間序列圖中,發現大約在170期左右下降得特別快,因此猜測可能有結構性改變。
因此在mean equation中,我採取AR(8),試圖捕捉所有季節性因素。同時透過Chow test,我們拒絕了沒有結構改變的假設。因此我決定以170期分兩段來估計。
model1<-arima(x = hwy,order = c(8,0,0))
summary(model1)
##
## Call:
## arima(x = hwy, order = c(8, 0, 0))
##
## Coefficients:
## ar1 ar2 ar3 ar4 ar5 ar6 ar7 ar8
## 0.0885 0.0375 0.1533 -0.0453 0.0865 0.0467 -0.0521 0.6622
## s.e. 0.0434 0.0441 0.0440 0.0439 0.0445 0.0441 0.0444 0.0442
## intercept
## 5.4281
## s.e. 2.4025
##
## sigma^2 estimated as 2.463: log likelihood = -564.51, aic = 1149.02
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set -0.1171659 1.569402 1.226071 -23.78693 48.25452 0.4559486
## ACF1
## Training set 0.06204426
plot(as.ts(hwy[150:250]))

ssr<-sum(model1$residuals^2)
model1.1<-arima(x = hwy[1:170],order = c(8,0,0))
ssr1<-sum(model1.1$residuals^2)
model1.2<-arima(x = hwy[171:300],order = c(8,0,0))
ssr2<-sum(model1.2$residuals^2)
chow.test<-((ssr-ssr1-ssr2)/9)/((ssr1+ssr2)/(300-18))
p.v<-pf(chow.test,df1 = 9,df2 = (300-18),lower.tail = F)
p.v
## [1] 0.003520817
(d).
由於上一題發現結構性改變,因此我們以170期,為分隔點,分前後兩期。
透過分析ACF和PACF以及參考auto.arima,前後兩期分別估計ARMA(8,2)和ARMA(4,0),雖然係數不全然為統計顯著,但殘差不具序列相關。
auto.arima(y =hwy[1:170],seasonal = T,max.d = 0 )
auto.arima(y =hwy[171:300],seasonal = T,max.d = 0 )
tsdisplay(hwy[1:170])

tsdisplay(hwy[171:300])

model2.1<-arima(x = hwy[1:170],order = c(8,0,2))
model2.2<-arima(x = hwy[171:300],order = c(4,0,8))
summary(model2.1)
##
## Call:
## arima(x = hwy[1:170], order = c(8, 0, 2))
##
## Coefficients:
## ar1 ar2 ar3 ar4 ar5 ar6 ar7 ar8
## -0.0566 -0.0631 0.0669 -0.0900 -0.0078 -0.0089 -0.1062 0.6944
## s.e. 0.1072 0.0892 0.0594 0.0579 0.0626 0.0613 0.0609 0.0672
## ma1 ma2 intercept
## 0.2517 0.0277 7.8357
## s.e. 0.1484 0.1256 0.2413
##
## sigma^2 estimated as 2.116: log likelihood = -308.64, aic = 641.28
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 0.008657756 1.454617 1.150441 -4.998891 17.37411 0.3370215
## ACF1
## Training set -0.009129863
r2.1<-model2.1$residuals
Box.test(x = r2.1,lag = 4,type = "Ljung-Box")
##
## Box-Ljung test
##
## data: r2.1
## X-squared = 1.1828, df = 4, p-value = 0.8809
Box.test(x = r2.1,lag = 8,type = "Ljung-Box")
##
## Box-Ljung test
##
## data: r2.1
## X-squared = 3.3488, df = 8, p-value = 0.9106
Box.test(x = r2.1,lag = 12,type = "Ljung-Box")
##
## Box-Ljung test
##
## data: r2.1
## X-squared = 5.0545, df = 12, p-value = 0.9561
summary(model2.2)
##
## Call:
## arima(x = hwy[171:300], order = c(4, 0, 8))
##
## Coefficients:
## ar1 ar2 ar3 ar4 ma1 ma2 ma3 ma4
## 0.1056 0.3386 0.6006 -0.1045 0.0713 -0.1102 -0.7013 0.1647
## s.e. 0.1866 0.1116 0.1417 0.2117 0.1838 0.1494 0.2267 0.2179
## ma5 ma6 ma7 ma8 intercept
## 0.0212 0.0326 -0.0096 0.4284 3.8837
## s.e. 0.1509 0.1372 0.1714 0.1434 1.5117
##
## sigma^2 estimated as 2.079: log likelihood = -236.29, aic = 500.57
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set -0.1020997 1.441739 1.106511 -38.56665 78.61229 0.6341516
## ACF1
## Training set -0.0006511916
r2.2<-model2.2$residuals
Box.test(x = r2.2,lag = 4,type = "Ljung-Box")
##
## Box-Ljung test
##
## data: r2.2
## X-squared = 0.42472, df = 4, p-value = 0.9804
Box.test(x = r2.2,lag = 8,type = "Ljung-Box")
##
## Box-Ljung test
##
## data: r2.2
## X-squared = 2.44, df = 8, p-value = 0.9645
Box.test(x = r2.2,lag = 12,type = "Ljung-Box")
##
## Box-Ljung test
##
## data: r2.2
## X-squared = 4.7985, df = 12, p-value = 0.9644
(e).
透過LM test我們發現前段模型殘差平方和其落後項不具有統計上的顯著相關(以0.05為顯著水準)。
但後段模型殘差平方和其落後第3項雖具有統計上的顯著相關(以0.05為顯著水準),但整體F test卻是無法拒絕沒有相關,因此暫時判定沒有ARCH effect。
r2.1_sq<-r2.1^2
lm.test2.1<-lm(r2.1_sq[6:170]~r2.1_sq[5:169]+
r2.1_sq[4:168]+r2.1_sq[3:167]+
r2.1_sq[2:166]+r2.1_sq[1:165])
summary(lm.test2.1)
##
## Call:
## lm(formula = r2.1_sq[6:170] ~ r2.1_sq[5:169] + r2.1_sq[4:168] +
## r2.1_sq[3:167] + r2.1_sq[2:166] + r2.1_sq[1:165])
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.6051 -1.8339 -0.9616 0.8227 14.3205
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.28453 0.44527 5.131 8.32e-07 ***
## r2.1_sq[5:169] -0.06060 0.08067 -0.751 0.454
## r2.1_sq[4:168] 0.01079 0.08070 0.134 0.894
## r2.1_sq[3:167] 0.06684 0.08054 0.830 0.408
## r2.1_sq[2:166] -0.02411 0.08034 -0.300 0.765
## r2.1_sq[1:165] -0.07437 0.08307 -0.895 0.372
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.865 on 159 degrees of freedom
## Multiple R-squared: 0.0127, Adjusted R-squared: -0.01835
## F-statistic: 0.4091 on 5 and 159 DF, p-value: 0.842
r2.2_sq<-r2.2^2
lm.test2.2<-lm(r2.2_sq[6:130]~r2.2_sq[5:129]+
r2.2_sq[4:128]+r2.2_sq[3:127]+
r2.2_sq[2:126]+r2.2_sq[1:125])
summary(lm.test2.2)
##
## Call:
## lm(formula = r2.2_sq[6:130] ~ r2.2_sq[5:129] + r2.2_sq[4:128] +
## r2.2_sq[3:127] + r2.2_sq[2:126] + r2.2_sq[1:125])
##
## Residuals:
## Min 1Q Median 3Q Max
## -5.8432 -1.6284 -1.1811 0.1846 23.3882
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.70118 0.53521 3.179 0.00189 **
## r2.2_sq[5:129] -0.01496 0.09117 -0.164 0.86997
## r2.2_sq[4:128] -0.06110 0.09024 -0.677 0.49966
## r2.2_sq[3:127] 0.18168 0.08887 2.044 0.04313 *
## r2.2_sq[2:126] -0.04908 0.09016 -0.544 0.58722
## r2.2_sq[1:125] 0.10178 0.09024 1.128 0.26161
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.901 on 119 degrees of freedom
## Multiple R-squared: 0.04828, Adjusted R-squared: 0.00829
## F-statistic: 1.207 on 5 and 119 DF, p-value: 0.3099
(f).
因無ARCH effect 所以不用做。
(g).
下圖為實際值、one-step-ahead forecast 預測值與預測誤差的比較。
f.v<-data.frame(time=201:300)
f.v$predict<-NA
i<-1
for(i in 1:100){
model.forecast<-arima(x = hwy[171:(199+i)],order = c(4,0,8))
pre.v<-predict(object = model.forecast,n.ahead=1)
f.v[i,2]<-pre.v$pred[1]
}
f.v$real<-hwy[201:300]
f.v$error<-f.v$real-f.v$predict
ggplot(data = f.v)+
geom_line(aes(x=f.v$time,y=f.v$predict),color="red")+
geom_line(aes(x=f.v$time,y=f.v$real))+
geom_line(aes(x=f.v$time,y=f.v$error),color="blue")+
labs(title="實際值(黑線)、預測值(紅線)、預測誤差(藍線",x="期數",y="數值")

(k).
下圖為實際值、no-change forecast 預測值與預測誤差的比較。
f.v2<-data.frame(time=201:300)
f.v2$predict<-hwy[200:299]
f.v2$real<-hwy[201:300]
f.v2$error<-f.v2$real-f.v2$predict
ggplot(data = f.v2)+
geom_line(aes(x=f.v2$time,y=f.v2$predict),color="red")+
geom_line(aes(x=f.v2$time,y=f.v2$real))+
geom_line(aes(x=f.v2$time,y=f.v2$error),color="blue")+
labs(title="實際值(黑線)、預測值(紅線)、預測誤差(藍線",x="期數",y="數值")

(l).
根據Diebold-Mariano Test,看起來透過one-step-ahead forecast的MSPE比較小,但統計上我們仍然無法拒絕“H0: 兩者預測一樣好”,前者預測方式在統計上並無顯著優於後者。
dm.test(f.v$error,f.v2$error,alternative = "greater")
##
## Diebold-Mariano Test
##
## data: f.v$errorf.v2$error
## DM = -0.92539, Forecast horizon = 1, Loss function power = 2,
## p-value = 0.8215
## alternative hypothesis: greater