Time Series Analysis

Airpassengers 데이터를 이용한 시계열 분석/예측

시계열 분석

아래 플로팅을 통해 알 수 있는 사실

apts <- ts(AirPassengers, frequency = 12)

plot(apts)

plot of chunk analysis


acf(apts)

plot of chunk analysis


pacf(apts)

plot of chunk analysis


spectrum(apts)

plot of chunk analysis


acf(diff(log(AirPassengers)))

plot of chunk analysis


pacf(diff(log(AirPassengers)))

plot of chunk analysis

장기 추세 제거

library(zoo)
m <- lm(coredata(apts) ~ index(apts))

apts.eltr <- ts(resid(m), index(apts))

plot(apts.eltr)

plot of chunk trend


plot(diff(log(apts)))

plot of chunk trend


apts.diff <- diff(log(apts))

추세(차분), 계절요인(이동평균), 순환변동, 불규칙 변동 추출

f <- decompose(apts)

attributes(f)
## $names
## [1] "x"        "seasonal" "trend"    "random"   "figure"   "type"    
## 
## $class
## [1] "decomposed.ts"


plot(f$figure, type = "b", xaxt = "n", xlab = "")

monthNames <- months(ISOdate(2011, 1:12, 1))

axis(1, at = 1:12, labels = monthNames, las = 2)

plot of chunk decompose


plot(f)

plot of chunk decompose

시계열을 이용한 예측

library(forecast)
apts.arima <- auto.arima(apts)

summary(apts.arima)
## Series: apts 
## ARIMA(0,1,1)(0,1,0)[12]                    
## 
## Coefficients:
##          ma1
##       -0.318
## s.e.   0.088
## 
## sigma^2 estimated as 137:  log likelihood=-508.3
## AIC=1021   AICc=1021   BIC=1026
## 
## Training set error measures:
##                  ME  RMSE  MAE     MPE  MAPE   MASE      ACF1
## Training set 0.2675 11.17 8.28 0.03319 2.958 0.2585 -0.005541

fore <- predict(apts.arima, n.ahead = 24)

U <- fore$pred + 2 * fore$se
L <- fore$pred - 2 * fore$se


apts.smoothing <- HoltWinters(apts, seasonal = "mul")
fore2 <- predict(apts.smoothing, n.ahead = 24)


ts.plot(apts, fore$pred, U, L, fore2, col = c(1, 2, 4, 4, 6), lty = c(1, 1, 
    2, 2, 3))
legend("topleft", c("Actual", "ARIMA", "ARIMA Error Bounds (95% Confidence)", 
    "exponential smoothing"), col = c(1, 2, 4, 6), lty = c(1, 1, 2, 3))

plot of chunk predict

분산을 일정하는 하는 변수 변환을 한 경우

apts.log <- log(apts)
apts.log.arima <- auto.arima(apts.log)

summary(apts.log.arima)
## Series: apts.log 
## ARIMA(0,1,1)(2,1,2)[12]                    
## 
## Coefficients:
##          ma1    sar1    sar2   sma1    sma2
##       -0.429  -1.000  -0.019  0.431  -0.490
## s.e.   0.090   0.261   0.178  0.293   0.173
## 
## sigma^2 estimated as 0.00131:  log likelihood=245.5
## AIC=-478.9   AICc=-478.2   BIC=-461.7
## 
## Training set error measures:
##                     ME   RMSE     MAE     MPE   MAPE   MASE    ACF1
## Training set 0.0006109 0.0346 0.02597 0.01164 0.4701 0.2146 0.01461

fore <- predict(apts.log.arima, n.ahead = 24)

U <- fore$pred + 2 * fore$se
L <- fore$pred - 2 * fore$se


apts.smoothing <- HoltWinters(apts.log, seasonal = "mul")
fore2 <- predict(apts.smoothing, n.ahead = 24)


ts.plot(exp(apts.log), exp(fore$pred), exp(U), exp(L), exp(fore2), col = c(1, 
    2, 4, 4, 6), lty = c(1, 1, 2, 2, 3))
legend("topleft", c("Actual", "ARIMA", "ARIMA Error Bounds (95% Confidence)", 
    "exponential smoothing"), col = c(1, 2, 4, 6), lty = c(1, 1, 2, 3))

plot of chunk predict2

지표를 좀더 살펴 보면 아래와 같다.

sma1, sar2의 경우 추정 모수에 비해 s.e가 너무 크며 모수가 0을 포함하고 있는 것을 알 수 있다. 따라서 이를 제거한 결과를 모델링해 볼 필요가 있다.

apts.log.arima <- arima(apts.log, order = c(0, 1, 1), seasonal = list(order = c(0, 
    1, 1), period = 12))


summary(apts.log.arima)
## Series: apts.log 
## ARIMA(0,1,1)(0,1,1)[12]                    
## 
## Coefficients:
##          ma1    sma1
##       -0.402  -0.557
## s.e.   0.090   0.073
## 
## sigma^2 estimated as 0.00135:  log likelihood=244.7
## AIC=-483.4   AICc=-483.2   BIC=-474.8
## 
## Training set error measures:
##                     ME    RMSE     MAE     MPE   MAPE  MASE    ACF1
## Training set 0.0005731 0.03505 0.02626 0.01099 0.4753 0.217 0.01444

fore <- predict(apts.log.arima, n.ahead = 24)

U <- fore$pred + 2 * fore$se
L <- fore$pred - 2 * fore$se


apts.smoothing <- HoltWinters(apts.log, seasonal = "mul")
fore2 <- predict(apts.smoothing, n.ahead = 24)


ts.plot(exp(apts.log), exp(fore$pred), exp(U), exp(L), exp(fore2), col = c(1, 
    2, 4, 4, 6), lty = c(1, 1, 2, 2, 3))
legend("topleft", c("Actual", "ARIMA", "ARIMA Error Bounds (95% Confidence)", 
    "exponential smoothing"), col = c(1, 2, 4, 6), lty = c(1, 1, 2, 3))

plot of chunk predict3

ARIMA 모형 진단하기

아래 그래프의 결과 아래와 같은 사실을 알 수 있다.

결과적으로 우리는 적절한 모델을 만들어 낸 것을 알 수 있다.


tsdiag(apts.log.arima)

plot of chunk diag