Time Series Analysis

Airpassengers 데이터를 이용한 시계열 분석/예측

시계열 분석

아래 플로팅을 통해 알 수 있는 사실

apts <- ts(AirPassengers, frequency = 12)

plot(apts)

plot of chunk analysis


acf(apts)

plot of chunk analysis


pacf(apts)

plot of chunk analysis


spectrum(apts)

plot of chunk analysis


acf(diff(log(AirPassengers)))

plot of chunk analysis


pacf(diff(log(AirPassengers)))

plot of chunk analysis

장기 추세 제거

library(zoo)
## Attaching package: 'zoo'
## The following object(s) are masked from 'package:base':
## 
## as.Date, as.Date.numeric
m <- lm(coredata(apts) ~ index(apts))

apts.eltr <- ts(resid(m), index(apts))

plot(apts.eltr)

plot of chunk trend


plot(diff(log(apts)))

plot of chunk trend


apts.diff <- diff(log(apts))

추세(차분), 계절요인(이동평균), 순환변동, 불규칙 변동 추출

f <- decompose(apts)

attributes(f)
## $names
## [1] "x"        "seasonal" "trend"    "random"   "figure"   "type"    
## 
## $class
## [1] "decomposed.ts"


plot(f$figure, type = "b", xaxt = "n", xlab = "")

monthNames <- months(ISOdate(2011, 1:12, 1))

axis(1, at = 1:12, labels = monthNames, las = 2)

plot of chunk decompose


plot(f)

plot of chunk decompose

시계열을 이용한 예측

library(forecast)
apts.arima <- auto.arima(apts)

summary(apts.arima)
## Series: apts 
## ARIMA(0,1,1)(0,1,0)[12]                    
## 
## Coefficients:
##          ma1
##       -0.318
## s.e.   0.088
## 
## sigma^2 estimated as 137:  log likelihood=-508.3
## AIC=1021   AICc=1021   BIC=1026
## 
## In-sample error measures:
##       ME     RMSE      MAE      MPE     MAPE     MASE 
##  0.26749 11.17471  8.27956  0.03319  2.95828  0.32017

fore <- predict(apts.arima, n.ahead = 24)

U <- fore$pred + 2 * fore$se
L <- fore$pred - 2 * fore$se


apts.smoothing <- HoltWinters(apts, seasonal = "mul")
fore2 <- predict(apts.smoothing, n.ahead = 24)


ts.plot(apts, fore$pred, U, L, fore2, col = c(1, 2, 4, 4, 6), lty = c(1, 1, 
    2, 2, 3))
legend("topleft", c("Actual", "ARIMA", "ARIMA Error Bounds (95% Confidence)", 
    "exponential smoothing"), col = c(1, 2, 4, 6), lty = c(1, 1, 2, 3))

plot of chunk predict

분산을 일정하는 하는 변수 변환을 한 경우

apts.log <- log(apts)
apts.log.arima <- auto.arima(apts.log)

summary(apts.log.arima)
## Series: apts.log 
## ARIMA(0,1,1)(2,1,2)[12]                    
## 
## Coefficients:
##          ma1    sar1    sar2   sma1    sma2
##       -0.429  -1.000  -0.019  0.431  -0.490
## s.e.   0.090   0.261   0.178  0.293   0.173
## 
## sigma^2 estimated as 0.00131:  log likelihood=245.5
## AIC=-478.9   AICc=-478.2   BIC=-461.7
## 
## In-sample error measures:
##        ME      RMSE       MAE       MPE      MAPE      MASE 
## 0.0006109 0.0345961 0.0259718 0.0116426 0.4701051 0.2867096

fore <- predict(apts.log.arima, n.ahead = 24)

U <- fore$pred + 2 * fore$se
L <- fore$pred - 2 * fore$se


apts.smoothing <- HoltWinters(apts.log, seasonal = "mul")
fore2 <- predict(apts.smoothing, n.ahead = 24)


ts.plot(exp(apts.log), exp(fore$pred), exp(U), exp(L), exp(fore2), col = c(1, 
    2, 4, 4, 6), lty = c(1, 1, 2, 2, 3))
legend("topleft", c("Actual", "ARIMA", "ARIMA Error Bounds (95% Confidence)", 
    "exponential smoothing"), col = c(1, 2, 4, 6), lty = c(1, 1, 2, 3))

plot of chunk predict2

지표를 좀더 살펴 보면 아래와 같다.

sma1, sar2의 경우 추정 모수에 비해 s.e가 너무 크며 모수가 0을 포함하고 있는 것을 알 수 있다. 따라서 이를 제거한 결과를 모델링해 볼 필요가 있다.

apts.log.arima <- arima(apts.log, order = c(0, 1, 1), seasonal = list(order = c(0, 
    1, 1), period = 12))


summary(apts.log.arima)
## Series: apts.log 
## ARIMA(0,1,1)(0,1,1)[12]                    
## 
## Coefficients:
##          ma1    sma1
##       -0.402  -0.557
## s.e.   0.090   0.073
## 
## sigma^2 estimated as 0.00135:  log likelihood=244.7
## AIC=-483.4   AICc=-483.2   BIC=-474.8
## 
## In-sample error measures:
##        ME      RMSE       MAE       MPE      MAPE      MASE 
## 0.0005731 0.0350488 0.0262603 0.0109890 0.4752815 0.2898953

fore <- predict(apts.log.arima, n.ahead = 24)

U <- fore$pred + 2 * fore$se
L <- fore$pred - 2 * fore$se


apts.smoothing <- HoltWinters(apts.log, seasonal = "mul")
fore2 <- predict(apts.smoothing, n.ahead = 24)


ts.plot(exp(apts.log), exp(fore$pred), exp(U), exp(L), exp(fore2), col = c(1, 
    2, 4, 4, 6), lty = c(1, 1, 2, 2, 3))
legend("topleft", c("Actual", "ARIMA", "ARIMA Error Bounds (95% Confidence)", 
    "exponential smoothing"), col = c(1, 2, 4, 6), lty = c(1, 1, 2, 3))

plot of chunk predict3

ARIMA 모형 진단하기

아래 그래프의 결과 아래와 같은 사실을 알 수 있다.

결과적으로 우리는 적절한 모델을 만들어 낸 것을 알 수 있다.


tsdiag(apts.log.arima)

plot of chunk diag