Import data
y <- read.csv(file.choose(), header=TRUE)
plot (y, ylab= " Periods(1947-2015) ", main="Consumption Expenditure")
image:
Data is not stationary. Clearly, we can tell by looking at the figure we see that there is not constant expected value. So we take the difference. Note, taking the difference makes our data stationary
dly <-diff(log(y[,2]))
plot (y[2:276,1],dly, xlab="Periods(1947-2015) ", main="log(Consumption Expenditure)")
image:
after making the data stationary we’ll take both ACF & PACF to find the AR(p)
acf(dly, type="correlation", lag=24, main="Sample ACF")
image:
acf(dly, type="partial", lag=24, main="PACF")
image:
We try several AR(q) in order to figure which one is the best
ar1 <- arima(dly, order=c(1,0,0))
ar1
Coefficients:
ar1 intercept
0.0893 -0.0082
s.e. 0.0601 0.0005
sigma^2 estimated as 6.649e-05: log likelihood = 932.32, aic = -1858.64
ar2 <- arima(dly, order=c(2,0,0))
ar2
Coefficients:
ar1 ar2 intercept
0.0599 0.3188 -0.0082
s.e. 0.0571 0.0570 0.0007
sigma^2 estimated as 5.968e-05: log likelihood = 947.08, aic = -1886.16
ar3 <- arima(dly, order=c(3,0,0))
ar3
Coefficients:
ar1 ar2 ar3 intercept
0.0545 0.3177 0.0165 -0.0082
s.e. 0.0604 0.0571 0.0603 0.0008
sigma^2 estimated as 5.966e-05: log likelihood = 947.12, aic = -1884.23
ar4 <- arima(dly, order=c(4,0,0))
ar4
Coefficients:
ar1 ar2 ar3 ar4 intercept
0.0570 0.3647 0.0244 -0.1445 -0.0082
s.e. 0.0597 0.0598 0.0598 0.0596 0.0007
sigma^2 estimated as 5.84e-05: log likelihood = 950.02, aic = -1888.03
ar5 <- arima(dly, order=c(5,0,0)) ar5
Coefficients:
ar1 ar2 ar3 ar4 ar5 intercept
0.0569 0.3647 0.0245 -0.1444 -0.0003 -0.0082
s.e. 0.0604 0.0598 0.0639 0.0597 0.0603 0.0007
sigma^2 estimated as 5.84e-05: log likelihood = 950.02, aic = -1886.03
It’s clear that AIC is the lowest at AR(4). Thus we will be using it
We test residuals using Box-Ljung
Box.test(residuals(ar4),lag=24,type="Ljung")
Box-Ljung test
data: residuals(ar4)
X-squared = 22.66, df = 24, p-value = 0.5399
tsdiag(ar4, gof.lag=24)
We can see that our p-value is 0.5399 which is relatively large, so we can conclude that we don’t have correlation in the residuals.
image:
Now we try to find out MA models
ma1 <- arima(dly, order=c(0,0,1))
ma1
Coefficients:
ma1 intercept
0.0545 -0.0082
s.e. 0.0472 0.0005
sigma^2 estimated as 6.67e-05: log likelihood = 931.89, aic = -1857.78
ma2 <- arima(dly, order=c(0,0,2)) ma2
Coefficients:
ma1 ma2 intercept
0.0267 0.3660 -0.0082
s.e. 0.0567 0.0586 0.0006
sigma^2 estimated as 5.889e-05: log likelihood = 948.88, aic = -1889.77
ma3 <- arima(dly, order=c(0,0,3))
ma3
Coefficients:
ma1 ma2 ma3 intercept
0.0543 0.3688 0.0694 -0.0082
s.e. 0.0604 0.0580 0.0578 0.0007
sigma^2 estimated as 5.858e-05: log likelihood = 949.6, aic = -1889.21
ma4 <- arima(dly, order=c(0,0,4))
ma4
Coefficients:
ma1 ma2 ma3 ma4 intercept
0.0542 0.3674 0.0696 -0.0056 -0.0082
s.e. 0.0604 0.0609 0.0579 0.0748 0.0007
sigma^2 estimated as 5.857e-05: log likelihood = 949.61, aic = -1887.21
We can conclude that MA(2) has the minimum AIC value
Test for correlations of residuals
Box.test(residuals(ma2),lag=8,type="Ljung")
Box-Ljung test
data: residuals(ma2)
X-squared = 9.391, df = 8, p-value = 0.3104
tsdiag(ma2, gof.lag=8)
image:
We conclude that MA(2) is the best model
Import the data
x <- read.csv(file.choose(), header=TRUE)
plot(x, xlab="Periods ", ylab="Price", main="Crude Oil")
image:
As we did in the first question, we make the data stationary by differencing
dlx <-diff(log(x[,2]))
plot(x[2:1501,1],dlx, xlab="Periods ", main="Price of Crude Oil")
image:
We now get the ACF and the PACF functions
acf(dlx, type="correlation", lag=24, main="Sample ACF")
image:
acf(dlx, type="partial", lag=24, main="Sample PACF")
image:
ar1 <- arima(dlx, order=c(1,0,0))
ar1
Coefficients:
ar1 intercept
0.1851 -0.0004
s.e. 0.0254 0.0013
sigma^2 estimated as 0.001728: log likelihood = 2642.29, aic = -5278.57
ar2 <- arima(dlx, order=c(2,0,0))
ar2
Coefficients:
ar1 ar2 intercept
0.1864 -0.0073 -0.0003
s.e. 0.0258 0.0258 0.0013
sigma^2 estimated as 0.001728: log likelihood = 2642.33, aic = -5276.65
ar3 <- arima(dlx, order=c(3,0,0))
ar3
Coefficients:
ar1 ar2 ar3 intercept
0.1868 -0.0244 0.0922 -0.0003
s.e. 0.0257 0.0262 0.0258 0.0014
sigma^2 estimated as 0.001713: log likelihood = 2648.68, aic = -5287.37
ar4 <- arima(dlx, order=c(4,0,0))
ar4
Coefficients:
ar1 ar2 ar3 ar4 intercept
0.1898 -0.0251 0.0985 -0.0332 -0.0003
s.e. 0.0258 0.0261 0.0262 0.0259 0.0014
sigma^2 estimated as 0.001711: log likelihood = 2649.51, aic = -5287.01
ma1 <- arima(dlx, order=c(0,0,1))
ma1
Coefficients:
ma1 intercept
0.1914 -0.0003
s.e. 0.0261 0.0013
sigma^2 estimated as 0.001727: log likelihood = 2642.7, aic = -5279.4
ma2 <- arima(dlx, order=c(0,0,2))
ma2
Coefficients:
ma1 ma2 intercept
0.1915 -0.0084 -0.0003
s.e. 0.0258 0.0259 0.0013
sigma^2 estimated as 0.001727: log likelihood = 2642.75, aic = -5277.5
ma3 <-arima(dlx, order=c(0,0,3))
ma3
Coefficients:
ma1 ma2 ma3 intercept
0.1905 0.0086 0.1010 -0.0003
s.e. 0.0257 0.0261 0.0262 0.0014
sigma^2 estimated as 0.00171: log likelihood = 2650.06, aic = -5290.12
ma4 <-arima(dlx, order=c(0,0,4))
ma4
Coefficients:
ma1 ma2 ma3 ma4 intercept
0.1906 0.0086 0.1010 0.0002 -0.0003
s.e. 0.0258 0.0261 0.0268 0.0254 0.0014
sigma^2 estimated as 0.00171: log likelihood = 2650.06, aic = -5288.12
ARIMA1 <- arima(dlx, order=c(1,0,1))
ARIMA1
Coefficients:
ar1 ma1 intercept
-0.2774 0.4616 -0.0003
s.e. 0.1981 0.1857 0.0012
sigma^2 estimated as 0.001726: log likelihood = 2643.22, aic = -5278.44
ARIMA2 <- arima(dlx, order=c(2,0,2))
ARIMA2
Coefficients:
ar1 ar2 ma1 ma2 intercept
0.0609 0.3732 0.1351 -0.3799 -0.0003
s.e. 0.2092 0.0904 0.2110 0.1109 0.0014
sigma^2 estimated as 0.001712: log likelihood = 2648.99, aic = -5285.99
ARIMA3 <- arima(dlx, order=c(3,0,3))
ARIMA3
Coefficients:
ar1 ar2 ar3 ma1 ma2 ma3 intercept
0.2996 -0.0444 -0.1597 -0.1095 -0.004 0.2656 -0.0003
s.e. 0.3628 0.2763 0.1853 0.3616 0.250 0.2051 0.0014
sigma^2 estimated as 0.001709: log likelihood = 2650.37, aic = -5284.73
ARIMA4 <- arima(dlx, order=c(3,0,3))
ARIMA3
Coefficients:
ar1 ar2 ar3 ma1 ma2 ma3 intercept
0.2996 -0.0444 -0.1597 -0.1095 -0.004 0.2656 -0.0003
s.e. 0.3628 0.2763 0.1853 0.3616 0.250 0.2051 0.0014
sigma^2 estimated as 0.001709: log likelihood = 2650.37, aic = -5284.73
Test the correlation of residuals
Box.test(residuals(ma3), lag=24,type="Ljung")
Box-Ljung test
data: residuals(ma3)
X-squared = 21.843, df = 24, p-value = 0.5886
tsdiag(ma3, gof.lag=24)
image:
It is clear from the figures that and from the p-value that residuals are not serially correlated, thus MA(3) is the best model
hp <- Quandl("FRED/HOUSTNSA.csv", type="ts")
help(quandl)
help(Quandl)
library("Quandl")
hp <- read.csv(file.choose(), header=TRUE)
hp<- Quandl("FRED/HOUSTNSA", type="ts")
str(hp)
zooreg(1:685, start=c(1959,1), frequency=12)
plot(hp)
image:
## library(tseries)
adf.test(hp)
Augmented Dickey-Fuller Test
data: hp
Dickey-Fuller = -2.6039, Lag order = 8, p-value = 0.3227
alternative hypothesis: stationary
By looking at the Augmented Dickey-Fuller test, we can conclude that using the difference is optimal for our data.
We also divide the data into two parts
hpall<-hp
hp1 <- window(hpall, end=c(2013,12))
hp2 <- window(hpall,start=c(2014,1))
hp <- hp1
dhp <- diff(hp)
sdhp<-diff(diff(hp),12)
plot(hp)
image:
plot(dhp)
image:
plot(sdhp)
image:
par(mfrow=c(2,2))
acf((dhp), type='correlation',lag =24, main="sample ACF")
image:
acf((dhp),type = 'partial',lag=24, main="Simple PACF")
image:
acf((sdhp),type = 'correlation',lag = 24,main="Sample ACF")
image:
acf((sdhp),type='partial',lag=24,main="simple PACF")
image:
am <- arima(dhp,order=c(2,0,0),seasonal=list(order=c(2,0,0),period=12))
am
ar1 ar2 sar1 sar2 intercept
-0.3315 -0.0933 0.4975 0.3408 -0.0432
s.e. 0.0407 0.0396 0.0372 0.0369 1.7747
sigma^2 estimated as 139.4: log likelihood = -2568.47, aic = 5148.93
tsdiag(am,gof.lag=24)
image:
am.LB <- Box.test(am$rersiduals, lag=8, type="Ljung")
am.LB<- Box.test(am$residuals, lag=24, type="Ljung")
am.LB
Box-Ljung test
data: am$residuals
X-squared = 77.403, df = 24, p-value = 1.569e-07
To check frequency
T4 <- arima(sdhp,order=c(0,0,1),seasonal=list(order=c(0,0,2),period=12))
T4
ma1 sma1 sma2 intercept
-0.3198 -0.7488 -0.1208 0.0004
s.e. 0.0364 0.0390 0.0377 0.0442
sigma^2 estimated as 114.9: log likelihood = -2460.84, aic = 4931.68
tsdiag(T4,gof.lag=24)
image:
T4.LB <- Box.test(T4$residuals, lag=24, type="Ljung")
T4.LB
Box-Ljung test
data: T4$residuals
X-squared = 44.076, df = 24, p-value = 0.007477
par(mfrow=c(1,2), cex=0.75)
library("forecast")
T4.forecast <- forecast(T4, h=24)
plot(T4.forecast, xlim=c(2011,2017))
image: