time series data for Real Personal Comsumption Expenditures
rpce <- read.csv("https://www.quandl.com/data/FRED/PCECC96-Real-Personal-Consumption-Expenditures", header=TRUE)
rpce <- ts(rpce, start=c(1947, 1), end=c(2016, 10), frequency=4)
str(rpce)
## Time-Series [1:286] from 1947 to 2018: 109 107 108 106 135 110 134 78 42 51 ...
summary (rpce)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.0 131.8 226.0 211.8 287.0 340.0
head(rpce,5)
## [1] 109 107 108 106 135
tail(rpce,5)
## [1] 112 126 128 130 124
plot(rpce, type= "l", xlab= "Years 1947-2016", ylab="Real Personal Consumption Expenditures", main="Real Personal Consumption Expenditures, Quaterly")
Take the difference to make our data stationary
drpce <-diff(log(rpce))
plot(drpce, type= "p", xlab= "Years 1947-2016", ylab="log-change in Real Personal Consumption Expenditures", main="Log-change in Real Personal Consumption Expenditures, Quaterly")
Auto-correlation Function(ACF),and Partial Autocorrelation Function (PACF) helps to understand the relation between data over period of time.
acf(drpce, type="correlation", lag=285, main="Sample ACF")
acf(drpce, type="partial", lag=285, main="PACF")
ar1 <- arima(drpce, order=c(1,0,0))
ar1
##
## Call:
## arima(x = drpce, order = c(1, 0, 0))
##
## Coefficients:
## ar1 intercept
## -0.4178 0.0005
## s.e. 0.0537 0.0215
##
## sigma^2 estimated as 0.2649: log likelihood = -215.17, aic = 436.34
par(mar = rep(2, 4))
tsdiag(ar1, gof.lag=10)
ar2 <- arima(drpce, order=c(2,0,0))
ar2
##
## Call:
## arima(x = drpce, order = c(2, 0, 0))
##
## Coefficients:
## ar1 ar2 intercept
## -0.5258 -0.2567 0.0005
## s.e. 0.0572 0.0570 0.0166
##
## sigma^2 estimated as 0.2472: log likelihood = -205.39, aic = 418.77
par(mar = rep(2, 4))
tsdiag(ar2, gof.lag=10)
ar3 <- arima(drpce, order=c(3,0,0))
ar3
##
## Call:
## arima(x = drpce, order = c(3, 0, 0))
##
## Coefficients:
## ar1 ar2 ar3 intercept
## -0.5569 -0.3199 -0.1192 0.0005
## s.e. 0.0588 0.0645 0.0585 0.0147
##
## sigma^2 estimated as 0.2436: log likelihood = -203.32, aic = 416.65
par(mar = rep(2, 4))
tsdiag(ar3, gof.lag=10)
ar4 <- arima(drpce, order=c(4,0,0))
ar4
##
## Call:
## arima(x = drpce, order = c(4, 0, 0))
##
## Coefficients:
## ar1 ar2 ar3 ar4 intercept
## -0.5688 -0.3516 -0.1741 -0.0978 0.0005
## s.e. 0.0589 0.0670 0.0668 0.0586 0.0133
##
## sigma^2 estimated as 0.2412: log likelihood = -201.94, aic = 415.87
par(mar = rep(2, 4))
tsdiag(ar4, gof.lag=10)
ar5 <- arima(drpce, order=c(5,0,0))
ar5
##
## Call:
## arima(x = drpce, order = c(5, 0, 0))
##
## Coefficients:
## ar1 ar2 ar3 ar4 ar5 intercept
## -0.5761 -0.3645 -0.2000 -0.1395 -0.0729 0.0005
## s.e. 0.0590 0.0676 0.0698 0.0673 0.0586 0.0124
##
## sigma^2 estimated as 0.2399: log likelihood = -201.16, aic = 416.33
par(mar = rep(2, 4))
tsdiag(ar5, gof.lag=10)
BIC(ar1)
## [1] 447.3011
BIC(ar2)
## [1] 433.3805
BIC(ar3)
## [1] 434.9081
BIC(ar4)
## [1] 437.7859
BIC(ar5)
## [1] 441.8947
Box.test(residuals(ar2),lag=100,type="Ljung")
##
## Box-Ljung test
##
## data: residuals(ar2)
## X-squared = 86.942, df = 100, p-value = 0.821
par(mar = rep(2, 4))
tsdiag(ar2, gof.lag=10)
Among all 5 AR models, AR(2) has the lowest BIC which is 433.3805, so AR(2) is a good model. Moreover, in Ljung Box AR(2) has high p-values which is 0.821. Thus,AR(2) is an adequate model.
ma1 <- arima(drpce, order=c(0,0,1))
ma1
##
## Call:
## arima(x = drpce, order = c(0, 0, 1))
##
## Coefficients:
## ma1 intercept
## -0.6632 0.0006
## s.e. 0.0569 0.0098
##
## sigma^2 estimated as 0.2363: log likelihood = -199.12, aic = 404.24
par(mar = rep(2, 4))
tsdiag(ma1, gof.lag=10)
ma2 <- arima(drpce, order=c(0,0,2))
ma2
##
## Call:
## arima(x = drpce, order = c(0, 0, 2))
##
## Coefficients:
## ma1 ma2 intercept
## -0.6151 -0.1049 0.0007
## s.e. 0.0574 0.0609 0.0081
##
## sigma^2 estimated as 0.2339: log likelihood = -197.68, aic = 403.37
par(mar = rep(2, 4))
tsdiag(ma2, gof.lag=10)
ma3 <- arima(drpce, order=c(0,0,3))
ma3
##
## Call:
## arima(x = drpce, order = c(0, 0, 3))
##
## Coefficients:
## ma1 ma2 ma3 intercept
## -0.6193 -0.0641 -0.0582 0.0007
## s.e. 0.0596 0.0747 0.0574 0.0075
##
## sigma^2 estimated as 0.233: log likelihood = -197.17, aic = 404.34
par(mar = rep(2, 4))
tsdiag(ma3, gof.lag=10)
ma4 <- arima(drpce, order=c(0,0,4))
ma4
##
## Call:
## arima(x = drpce, order = c(0, 0, 4))
##
## Coefficients:
## ma1 ma2 ma3 ma4 intercept
## -0.6230 -0.0564 0.0092 -0.0990 0.0008
## s.e. 0.0592 0.0683 0.0705 0.0616 0.0067
##
## sigma^2 estimated as 0.2309: log likelihood = -195.88, aic = 403.76
par(mar = rep(2, 4))
tsdiag(ma4, gof.lag=10)
ma5 <- arima(drpce, order=c(0,0,5))
ma5
##
## Call:
## arima(x = drpce, order = c(0, 0, 5))
##
## Coefficients:
## ma1 ma2 ma3 ma4 ma5 intercept
## -0.6214 -0.0534 0.0093 -0.0722 -0.0480 0.0006
## s.e. 0.0590 0.0696 0.0721 0.0701 0.0592 0.0062
##
## sigma^2 estimated as 0.2303: log likelihood = -195.55, aic = 405.1
par(mar = rep(2, 4))
tsdiag(ma5, gof.lag=10)
Box.test(residuals(ma2),lag=100,type="Ljung")
##
## Box-Ljung test
##
## data: residuals(ma2)
## X-squared = 71.186, df = 100, p-value = 0.987
par(mar = rep(2, 4))
tsdiag(ma2, gof.lag=10)
Among all 3 MA models, MA(2) has the lowest BIC which is 403.37, so MA(2) is a good model. Moreover, in Ljung Box MA(2) has high p-values which is 0.987. Thus,MA(2) is an adequate model.
AR(2) and MA(2) are best among all models studied.