Question 1:

Import data

y <- read.csv(file.choose(), header=TRUE)

plot (y, ylab= " Periods(1947-2015) ", main="Consumption Expenditure")

image:

Data is not stationary. Clearly, we can tell by looking at the figure we see that there is not constant expected value. So we take the difference. Note, taking the difference makes our data stationary

dly <-diff(log(y[,2]))

plot (y[2:276,1],dly, xlab="Periods(1947-2015) ", main="log(Consumption Expenditure)")

image:

after making the data stationary we’ll take both ACF & PACF to find the AR(p)

acf(dly, type="correlation", lag=24, main="Sample ACF")

image:

acf(dly, type="partial", lag=24, main="PACF")

image:

We try several AR(q) in order to figure which one is the best

ar1 <- arima(dly, order=c(1,0,0))

ar1

Coefficients:

     ar1  intercept
  0.0893    -0.0082
  s.e.  0.0601     0.0005

  sigma^2 estimated as 6.649e-05:  log likelihood = 932.32,  aic = -1858.64
  

ar2 <- arima(dly, order=c(2,0,0))

ar2

Coefficients:

     ar1     ar2  intercept
  0.0599  0.3188    -0.0082
  s.e.  0.0571  0.0570     0.0007

  sigma^2 estimated as 5.968e-05:  log likelihood = 947.08,  aic = -1886.16

ar3 <- arima(dly, order=c(3,0,0))

ar3

Coefficients:

     ar1     ar2     ar3  intercept
  0.0545  0.3177  0.0165    -0.0082
  s.e.  0.0604  0.0571  0.0603     0.0008

  sigma^2 estimated as 5.966e-05:  log likelihood = 947.12,  aic = -1884.23
  

ar4 <- arima(dly, order=c(4,0,0))

ar4

Coefficients:

     ar1     ar2     ar3      ar4  intercept
  0.0570  0.3647  0.0244  -0.1445    -0.0082
  s.e.  0.0597  0.0598  0.0598   0.0596     0.0007

  sigma^2 estimated as 5.84e-05:  log likelihood = 950.02,  aic = -1888.03
  

ar5 <- arima(dly, order=c(5,0,0)) ar5

Coefficients:

     ar1     ar2     ar3      ar4      ar5  intercept
  0.0569  0.3647  0.0245  -0.1444  -0.0003    -0.0082
  s.e.  0.0604  0.0598  0.0639   0.0597   0.0603     0.0007

  sigma^2 estimated as 5.84e-05:  log likelihood = 950.02,  aic = -1886.03
  

It’s clear that AIC is the lowest at AR(4). Thus we will be using it

We test residuals using Box-Ljung

Box.test(residuals(ar4),lag=24,type="Ljung")

Box-Ljung test

data:  residuals(ar4)
X-squared = 22.66, df = 24, p-value = 0.5399

tsdiag(ar4, gof.lag=24)

We can see that our p-value is 0.5399 which is relatively large, so we can conclude that we don’t have correlation in the residuals.

image:

Now we try to find out MA models

ma1 <- arima(dly, order=c(0,0,1))

ma1

Coefficients:

     ma1  intercept
  0.0545    -0.0082
  s.e.  0.0472     0.0005

  sigma^2 estimated as 6.67e-05:  log likelihood = 931.89,  aic = -1857.78
  

ma2 <- arima(dly, order=c(0,0,2)) ma2

Coefficients:

     ma1     ma2  intercept
  0.0267  0.3660    -0.0082
  s.e.  0.0567  0.0586     0.0006

  sigma^2 estimated as 5.889e-05:  log likelihood = 948.88,  aic = -1889.77
  

ma3 <- arima(dly, order=c(0,0,3))

ma3

Coefficients:

     ma1     ma2     ma3  intercept
  0.0543  0.3688  0.0694    -0.0082
  s.e.  0.0604  0.0580  0.0578     0.0007

  sigma^2 estimated as 5.858e-05:  log likelihood = 949.6,  aic = -1889.21
  

ma4 <- arima(dly, order=c(0,0,4))

ma4

Coefficients:

     ma1     ma2     ma3      ma4  intercept
  0.0542  0.3674  0.0696  -0.0056    -0.0082
  s.e.  0.0604  0.0609  0.0579   0.0748     0.0007

  sigma^2 estimated as 5.857e-05:  log likelihood = 949.61,  aic = -1887.21

We can conclude that MA(2) has the minimum AIC value

Test for correlations of residuals

Box.test(residuals(ma2),lag=8,type="Ljung")

Box-Ljung test

data:  residuals(ma2)
X-squared = 9.391, df = 8, p-value = 0.3104

tsdiag(ma2, gof.lag=8)

image:

We conclude that MA(2) is the best model

Question 2:

Import the data

x <- read.csv(file.choose(), header=TRUE)

plot(x, xlab="Periods ", ylab="Price", main="Crude Oil")

image:

As we did in the first question, we make the data stationary by differencing

dlx <-diff(log(x[,2]))

plot(x[2:1501,1],dlx, xlab="Periods ", main="Price of Crude Oil")

image:

We now get the ACF and the PACF functions

acf(dlx, type="correlation", lag=24, main="Sample ACF")

image:

acf(dlx, type="partial", lag=24, main="Sample PACF")

image:

ar1 <- arima(dlx, order=c(1,0,0))

ar1

Coefficients:

     ar1  intercept
  0.1851    -0.0004
  s.e.  0.0254     0.0013

  sigma^2 estimated as 0.001728:  log likelihood = 2642.29,  aic = -5278.57

ar2 <- arima(dlx, order=c(2,0,0))

ar2

Coefficients:

     ar1      ar2  intercept
  0.1864  -0.0073    -0.0003
s.e.  0.0258   0.0258     0.0013

sigma^2 estimated as 0.001728:  log likelihood = 2642.33,  aic = -5276.65

ar3 <- arima(dlx, order=c(3,0,0))

ar3

Coefficients:

     ar1      ar2     ar3  intercept
  0.1868  -0.0244  0.0922    -0.0003
s.e.  0.0257   0.0262  0.0258     0.0014

sigma^2 estimated as 0.001713:  log likelihood = 2648.68,  aic = -5287.37

ar4 <- arima(dlx, order=c(4,0,0))

ar4

Coefficients:

     ar1      ar2     ar3      ar4  intercept
  0.1898  -0.0251  0.0985  -0.0332    -0.0003
  s.e.  0.0258   0.0261  0.0262   0.0259     0.0014

  sigma^2 estimated as 0.001711:  log likelihood = 2649.51,  aic = -5287.01
  

ma1 <- arima(dlx, order=c(0,0,1))

ma1

Coefficients:

     ma1  intercept
  0.1914    -0.0003
  s.e.  0.0261     0.0013

  sigma^2 estimated as 0.001727:  log likelihood = 2642.7,  aic = -5279.4
  

ma2 <- arima(dlx, order=c(0,0,2))

ma2

Coefficients:

     ma1      ma2  intercept
  0.1915  -0.0084    -0.0003
  s.e.  0.0258   0.0259     0.0013

  sigma^2 estimated as 0.001727:  log likelihood = 2642.75,  aic = -5277.5
  

ma3 <-arima(dlx, order=c(0,0,3))

ma3

Coefficients:

     ma1     ma2     ma3  intercept
  0.1905  0.0086  0.1010    -0.0003
  s.e.  0.0257  0.0261  0.0262     0.0014

  sigma^2 estimated as 0.00171:  log likelihood = 2650.06,  aic = -5290.12
  

ma4 <-arima(dlx, order=c(0,0,4))

ma4

Coefficients:

     ma1     ma2     ma3     ma4  intercept
  0.1906  0.0086  0.1010  0.0002    -0.0003
  s.e.  0.0258  0.0261  0.0268  0.0254     0.0014

  sigma^2 estimated as 0.00171:  log likelihood = 2650.06,  aic = -5288.12
  

ARIMA1 <- arima(dlx, order=c(1,0,1))

ARIMA1

Coefficients:

      ar1     ma1  intercept
  -0.2774  0.4616    -0.0003
  s.e.   0.1981  0.1857     0.0012

  sigma^2 estimated as 0.001726:  log likelihood = 2643.22,  aic = -5278.44
  

ARIMA2 <- arima(dlx, order=c(2,0,2))

ARIMA2

Coefficients:

     ar1     ar2     ma1      ma2  intercept
  0.0609  0.3732  0.1351  -0.3799    -0.0003
  s.e.  0.2092  0.0904  0.2110   0.1109     0.0014

  sigma^2 estimated as 0.001712:  log likelihood = 2648.99,  aic = -5285.99

ARIMA3 <- arima(dlx, order=c(3,0,3))

ARIMA3

Coefficients:

     ar1      ar2      ar3      ma1     ma2     ma3  intercept
  0.2996  -0.0444  -0.1597  -0.1095  -0.004  0.2656    -0.0003
  s.e.  0.3628   0.2763   0.1853   0.3616   0.250  0.2051     0.0014

  sigma^2 estimated as 0.001709:  log likelihood = 2650.37,  aic = -5284.73

ARIMA4 <- arima(dlx, order=c(3,0,3))

ARIMA3

Coefficients:

     ar1      ar2      ar3      ma1     ma2     ma3  intercept
  0.2996  -0.0444  -0.1597  -0.1095  -0.004  0.2656    -0.0003
  s.e.  0.3628   0.2763   0.1853   0.3616   0.250  0.2051     0.0014

  sigma^2 estimated as 0.001709:  log likelihood = 2650.37,  aic = -5284.73
  

Test the correlation of residuals

Box.test(residuals(ma3), lag=24,type="Ljung")

  Box-Ljung test

data:  residuals(ma3)
X-squared = 21.843, df = 24, p-value = 0.5886

tsdiag(ma3, gof.lag=24)

image:

It is clear from the figures that and from the p-value that residuals are not serially correlated, thus MA(3) is the best model

Question 3:

hp <- Quandl("FRED/HOUSTNSA.csv", type="ts")

help(quandl)

help(Quandl)

library("Quandl")

hp <- read.csv(file.choose(), header=TRUE)

hp<- Quandl("FRED/HOUSTNSA", type="ts")

str(hp)

zooreg(1:685, start=c(1959,1), frequency=12)

plot(hp)

image:

## library(tseries)

adf.test(hp)

Augmented Dickey-Fuller Test

data:  hp
Dickey-Fuller = -2.6039, Lag order = 8, p-value = 0.3227
alternative hypothesis: stationary

By looking at the Augmented Dickey-Fuller test, we can conclude that using the difference is optimal for our data.

We also divide the data into two parts

hpall<-hp

hp1 <- window(hpall, end=c(2013,12))

hp2 <- window(hpall,start=c(2014,1))

hp <- hp1

dhp <- diff(hp)

sdhp<-diff(diff(hp),12)

plot(hp)

image:

plot(dhp)

image:

plot(sdhp)

image:

par(mfrow=c(2,2))

acf((dhp), type='correlation',lag =24, main="sample ACF")

image:

acf((dhp),type = 'partial',lag=24, main="Simple PACF")

image:

acf((sdhp),type = 'correlation',lag = 24,main="Sample ACF")

image:

acf((sdhp),type='partial',lag=24,main="simple PACF")

image:

clearly, we have seasonal data by looking at the ACF and the PACF

am <- arima(dhp,order=c(2,0,0),seasonal=list(order=c(2,0,0),period=12))

am

Coefficients:

      ar1      ar2    sar1    sar2  intercept
  -0.3315  -0.0933  0.4975  0.3408    -0.0432
  s.e.   0.0407   0.0396  0.0372  0.0369     1.7747

  sigma^2 estimated as 139.4:  log likelihood = -2568.47,  aic =   5148.93
  

tsdiag(am,gof.lag=24)

image:

am.LB <- Box.test(am$rersiduals, lag=8, type="Ljung")

am.LB<- Box.test(am$residuals, lag=24, type="Ljung")

am.LB

 Box-Ljung test

data:  am$residuals
X-squared = 77.403, df = 24, p-value = 1.569e-07

To check frequency

T4 <- arima(sdhp,order=c(0,0,1),seasonal=list(order=c(0,0,2),period=12))

T4

Coefficients:

      ma1     sma1     sma2  intercept
  -0.3198  -0.7488  -0.1208     0.0004
  s.e.   0.0364   0.0390   0.0377     0.0442

  sigma^2 estimated as 114.9:  log likelihood = -2460.84,  aic = 4931.68
  

tsdiag(T4,gof.lag=24)

image:

T4.LB <- Box.test(T4$residuals, lag=24, type="Ljung")

T4.LB

Box-Ljung test

data:  T4$residuals
X-squared = 44.076, df = 24, p-value = 0.007477

par(mfrow=c(1,2), cex=0.75)

library("forecast")

T4.forecast <- forecast(T4, h=24)

plot(T4.forecast, xlim=c(2011,2017))

image: