Import data:

time series data for Real Personal Comsumption Expenditures

library(Quandl)
## Loading required package: xts
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
rpce <- Quandl("FRED/PCECC96", type="xts")

Summary

summary (rpce)
##      Index           rpce      
##  Min.   :1947   Min.   : 1199  
##  1st Qu.:1964   1st Qu.: 2253  
##  Median :1982   Median : 4064  
##  Mean   :1982   Mean   : 5097  
##  3rd Qu.:1999   3rd Qu.: 7753  
##  Max.   :2017   Max.   :11640
head(rpce,5)
##           [,1]
## 1947 Q1 1199.4
## 1947 Q2 1219.3
## 1947 Q3 1223.3
## 1947 Q4 1223.6
## 1948 Q1 1229.8
tail(rpce,5)
##            [,1]
## 2015 Q4 11319.3
## 2016 Q1 11365.2
## 2016 Q2 11484.9
## 2016 Q3 11569.0
## 2016 Q4 11640.4

Plots

plot(rpce, type= "l",xlab= "Years", ylab="Real Personal Consumption Expenditures", main="Real Personal Consumption Expenditures", major.format="%Y Q%q")

Stationary Time Series Data

Take the difference to make our data stationary

drpce <-diff(log(rpce))
plot(drpce, type= "l", xlab= "Years 1947-2016", ylab="log-change in Real Personal Consumption Expenditures", main="Log-change in Real Personal Consumption Expenditures, Quaterly",major.format="%Y Q%q")

The Relation Between Data Over Period of Time

Auto-correlation Function(ACF),and Partial Autocorrelation Function (PACF) helps to understand therelation between data over period of time.

library(forecast)
## Loading required package: timeDate
## This is forecast 7.3
par(mfrow=c(2,1), cex=1, mar=c(3,4,3,3))

ACF

Acf(drpce,type='correlation',lag=280, main="ACF")

PACF

Acf(drpce,type='correlation',lag=280, main="PACF")

AR(p)

ar1 <- arima(drpce, order=c(1,0,0))
ar1
## 
## Call:
## arima(x = drpce, order = c(1, 0, 0))
## 
## Coefficients:
##          ar1  intercept
##       0.0914     0.0081
## s.e.  0.0596     0.0005
## 
## sigma^2 estimated as 6.562e-05:  log likelihood = 947.72,  aic = -1889.45
par(mar = rep(2, 4))
tsdiag(ar1, gof.lag=280)

ar2 <- arima(drpce, order=c(2,0,0))
ar2
## 
## Call:
## arima(x = drpce, order = c(2, 0, 0))
## 
## Coefficients:
##          ar1     ar2  intercept
##       0.0614  0.3185     0.0081
## s.e.  0.0566  0.0566     0.0007
## 
## sigma^2 estimated as 5.891e-05:  log likelihood = 962.67,  aic = -1917.34
par(mar = rep(2, 4))
tsdiag(ar2, gof.lag=280)

ar3 <- arima(drpce, order=c(3,0,0))
ar3
## 
## Call:
## arima(x = drpce, order = c(3, 0, 0))
## 
## Coefficients:
##          ar1     ar2     ar3  intercept
##       0.0560  0.3175  0.0162     0.0081
## s.e.  0.0599  0.0567  0.0599     0.0008
## 
## sigma^2 estimated as 5.889e-05:  log likelihood = 962.71,  aic = -1915.42
par(mar = rep(2, 4))
tsdiag(ar3, gof.lag=280)

ar4 <- arima(drpce, order=c(4,0,0))
ar4
## 
## Call:
## arima(x = drpce, order = c(4, 0, 0))
## 
## Coefficients:
##          ar1     ar2     ar3      ar4  intercept
##       0.0584  0.3643  0.0244  -0.1441     0.0082
## s.e.  0.0593  0.0594  0.0594   0.0592     0.0007
## 
## sigma^2 estimated as 5.765e-05:  log likelihood = 965.64,  aic = -1919.28
par(mar = rep(2, 4))
tsdiag(ar4, gof.lag=280)

ar5 <- arima(drpce, order=c(5,0,0))
ar5
## 
## Call:
## arima(x = drpce, order = c(5, 0, 0))
## 
## Coefficients:
##          ar1     ar2     ar3      ar4      ar5  intercept
##       0.0582  0.3643  0.0249  -0.1440  -0.0013     0.0082
## s.e.  0.0600  0.0594  0.0635   0.0593   0.0599     0.0007
## 
## sigma^2 estimated as 5.765e-05:  log likelihood = 965.64,  aic = -1917.28
par(mar = rep(2, 4))
tsdiag(ar5, gof.lag=280)

BIC

BIC(ar1)
## [1] -1878.553
BIC(ar2)
## [1] -1902.818
BIC(ar3)
## [1] -1897.26
BIC(ar4)
## [1] -1897.495
BIC(ar5)
## [1] -1891.865

Box-Ljung test

Box.test(residuals(ar1),lag=200,type="Ljung")
## 
##  Box-Ljung test
## 
## data:  residuals(ar1)
## X-squared = 191.8, df = 200, p-value = 0.6489
par(mar = rep(2, 4))
tsdiag(ar1, gof.lag=280)

Box.test(residuals(ar2),lag=200,type="Ljung")
## 
##  Box-Ljung test
## 
## data:  residuals(ar2)
## X-squared = 163.52, df = 200, p-value = 0.9722
par(mar = rep(2, 4))
tsdiag(ar2, gof.lag=280)

Box.test(residuals(ar3),lag=200,type="Ljung")
## 
##  Box-Ljung test
## 
## data:  residuals(ar3)
## X-squared = 163.78, df = 200, p-value = 0.9712
par(mar = rep(2, 4))
tsdiag(ar3, gof.lag=280)

Box.test(residuals(ar4),lag=200,type="Ljung")
## 
##  Box-Ljung test
## 
## data:  residuals(ar4)
## X-squared = 160.73, df = 200, p-value = 0.981
par(mar = rep(2, 4))
tsdiag(ar4, gof.lag=280)

Box.test(residuals(ar5),lag=200,type="Ljung")
## 
##  Box-Ljung test
## 
## data:  residuals(ar5)
## X-squared = 160.76, df = 200, p-value = 0.9809
par(mar = rep(2, 4))
tsdiag(ar5, gof.lag=280)

Among all 5 AR models, AR(4) has the lowest AIC which is -1919.28, but AR(2) has the lowest BIC which is -1902.818,so AR(2) is a good model. Moreover, in Ljung Box all 5 models has p-values higher than 0.6, and AR(2) plot for ACF of residual is zero and p values for Ljung Box is 0.9722. Thus AR(2) is an adequate model.

MA(q)

ma1 <- arima(drpce, order=c(0,0,1))
ma1
## 
## Call:
## arima(x = drpce, order = c(0, 0, 1))
## 
## Coefficients:
##          ma1  intercept
##       0.0560     0.0081
## s.e.  0.0468     0.0005
## 
## sigma^2 estimated as 6.584e-05:  log likelihood = 947.27,  aic = -1888.53
par(mar = rep(2, 4))
tsdiag(ma1, gof.lag=280)

ma2 <- arima(drpce, order=c(0,0,2))
ma2
## 
## Call:
## arima(x = drpce, order = c(0, 0, 2))
## 
## Coefficients:
##          ma1     ma2  intercept
##       0.0276  0.3652     0.0082
## s.e.  0.0564  0.0581     0.0006
## 
## sigma^2 estimated as 5.814e-05:  log likelihood = 964.46,  aic = -1920.92
par(mar = rep(2, 4))
tsdiag(ma2, gof.lag=280)

ma3 <- arima(drpce, order=c(0,0,3))
ma3
## 
## Call:
## arima(x = drpce, order = c(0, 0, 3))
## 
## Coefficients:
##          ma1     ma2     ma3  intercept
##       0.0556  0.3682  0.0713     0.0082
## s.e.  0.0600  0.0575  0.0575     0.0007
## 
## sigma^2 estimated as 5.782e-05:  log likelihood = 965.23,  aic = -1920.46
par(mar = rep(2, 4))
tsdiag(ma3, gof.lag=280)

ma4 <- arima(drpce, order=c(0,0,4))
ma4
## 
## Call:
## arima(x = drpce, order = c(0, 0, 4))
## 
## Coefficients:
##          ma1     ma2     ma3      ma4  intercept
##       0.0554  0.3671  0.0715  -0.0044     0.0082
## s.e.  0.0600  0.0605  0.0575   0.0742     0.0007
## 
## sigma^2 estimated as 5.782e-05:  log likelihood = 965.23,  aic = -1918.46
par(mar = rep(2, 4))
tsdiag(ma4, gof.lag=280)

ma5 <- arima(drpce, order=c(0,0,5))
ma5
## 
## Call:
## arima(x = drpce, order = c(0, 0, 5))
## 
## Coefficients:
##          ma1     ma2     ma3      ma4     ma5  intercept
##       0.0557  0.3666  0.0795  -0.0059  0.0121     0.0082
## s.e.  0.0600  0.0606  0.0743   0.0749  0.0707     0.0007
## 
## sigma^2 estimated as 5.781e-05:  log likelihood = 965.25,  aic = -1916.49
par(mar = rep(2, 4))
tsdiag(ma5, gof.lag=280)

BIC

BIC(ma1)
## [1] -1877.64
BIC(ma2)
## [1] -1906.397
BIC(ma3)
## [1] -1902.303
BIC(ma4)
## [1] -1896.675
BIC(ma5)
## [1] -1891.074

Box-Ljung test

Box.test(residuals(ma1),lag=200,type="Ljung")
## 
##  Box-Ljung test
## 
## data:  residuals(ma1)
## X-squared = 195.84, df = 200, p-value = 0.5699
par(mar = rep(2, 4))
tsdiag(ma1, gof.lag=280)

Box.test(residuals(ma2),lag=200,type="Ljung")
## 
##  Box-Ljung test
## 
## data:  residuals(ma2)
## X-squared = 156.61, df = 200, p-value = 0.9897
par(mar = rep(2, 4))
tsdiag(ma2, gof.lag=280)

Box.test(residuals(ma3),lag=200,type="Ljung")
## 
##  Box-Ljung test
## 
## data:  residuals(ma3)
## X-squared = 152.76, df = 200, p-value = 0.9945
par(mar = rep(2, 4))
tsdiag(ma3, gof.lag=280)

Box.test(residuals(ma4),lag=200,type="Ljung")
## 
##  Box-Ljung test
## 
## data:  residuals(ma4)
## X-squared = 152.2, df = 200, p-value = 0.995
par(mar = rep(2, 4))
tsdiag(ma4, gof.lag=280)

Box.test(residuals(ma5),lag=200,type="Ljung")
## 
##  Box-Ljung test
## 
## data:  residuals(ma5)
## X-squared = 152.18, df = 200, p-value = 0.9951
par(mar = rep(2, 4))
tsdiag(ma5, gof.lag=280)

Among all 5 MA models, MA(2) has the lowest AIC which is -1920.92, and has the lowest BIC which is -1906.397,so MA(2) is a good model. Moreover, in Ljung Box all 5 models has p-values higher than 0.5, and AR(2) plot for ACF of residual is zero and p values for Ljung Box is 0.9897. Thus MA(2) is an adequate model.

Conclusion:

AR(2) and MA(2) are best among all models studied.