Homework

(20%) Problem 6.4 (i.e., Chapter 6, Problem 4) from Textbooka.

Consider the following MA(2) process y(t) = 0.7 - 2(e_t-1) + 1.35(e_t-2) + e_t. e_t is a white noise process, normally distributed with zero mean and unit variance.

Obtain the theoretical autocorrelation function up to lag 10.

theoacf<-ARMAacf( ar = 0, ma = c(-2, 1.35) , lag.max = 10)
theoacf

##          0          1          2          3          4          5 
##  1.0000000 -0.6888970  0.1978747  0.0000000  0.0000000  0.0000000 
##          6          7          8          9         10 
##  0.0000000  0.0000000  0.0000000  0.0000000  0.0000000

Now, simulate the process for t = 1, 2, …,100 and compute the sample autocorrelation function up to lag 10. Comment and compare with your results in a.

Comparing the results from a, the results are fairly similar. However, there are more “imperfections” in the simulation that mimic real data, as the lags of 2-10 are not exactly zero, but are close to zero.

ma2.sim<-arima.sim(model=list(ma=c(-2,1.35)),n=100)
acfsim<- acf(ma2.sim, type = "correlation", lag.max= 10)

acfsim

## 
## Autocorrelations of series 'ma2.sim', by lag
## 
##      1      2      3      4      5      6      7      8      9     10 
## -0.614  0.093  0.051 -0.053  0.077 -0.041  0.007 -0.034  0.078 -0.050

(20%) Problem 6.10 (i.e., Chapter 6, Problem 10) from Textbook “Download financial prices of your favorite stocks. Obtain the autocorrelation functions. Which process(es) will fit the financial returns to these stocks? Propose a model, estimate it, and forecast at several horizons.”

2 spikes in ACF and PACF indicates AR(2,2) process, so AR(2,2) is my proposed model.

#Amazon Historical Prices from 1/2/15 - 12/29/17
amazon_data <- read.csv("AMZN(1).csv")

amazon_data_ts <- ts(amazon_data, start = 2015, end= 2018, freq = 251)



#ACFs of Prices
quartz()
par(mfrow=c(3,1))
acf(amazon_data$Adj.Close, type = "covariance", main = "Covariance")
acf(amazon_data$Adj.Close, type = "correlation", main = "ACF")
acf(amazon_data$Adj.Close, type = "partial", main = "PACF")

#Financial returns

n=dim(amazon_data)[1]
date = 1 : n
date_i = seq(from = 1, to = 755, by = 251)
lab_x = seq(from = 2015, to = 2018, by = 1)

returns <- amazon_data$Adj.Close[2:n] /amazon_data$Adj.Close[1:(n-1)] - 1
n0=n-1
date0 = 1: n0

plot(date0,returns,type='l',col='blue',xaxt = 'n',xlab= "",ylab='AMZN Returns',xlim=c(1,n0))
axis(1, at=date_i, labels=lab_x)
title(main='Daily Returns of AMZN (01/02/2015 -- 12/29/2017)')

#ACFS of Financial Returns

quartz()
par(mfrow=c(3,1))
acf(returns, type = "covariance", main = "Covariance")
acf(returns, type = "correlation", main = "ACF")
acf(returns, type = "partial", main = "PACF")

#Model for Financial Returns
#Process looks mean reversion, but variance is not stationary

#2 spikes in ACF and PACF indicated AR(2,2) process


quartz()
par(mfrow=c(3,1))
acf(returns, type = "covariance", main = "Covariance")
acf(returns, type = "correlation", main = "ACF")
acf(returns, type = "partial", main = "PACF")


quartz()
plot(date0,returns,type='l',col='black',xaxt = 'n',xlab= "",ylab='AMZN Returns',xlim=c(1,n0))
axis(1, at=date_i, labels=lab_x)

returns_frame = data.frame(returns)


returns_ts<- ts(returns_frame, start = 2015, end= 2018, freq = 251)


arma_2_2<- arima(returns_ts, order = c(2,0,2))

arma_2_2

## 
## Call:
## arima(x = returns_ts, order = c(2, 0, 2))
## 
## Coefficients:
##           ar1      ar2     ma1     ma2  intercept
##       -0.2922  -0.9076  0.3242  0.9354     0.0019
## s.e.   0.0744   0.0607  0.0644  0.0507     0.0007
## 
## sigma^2 estimated as 0.0003216:  log likelihood = 1961.99,  aic = -3913.97

#auto<- auto.arima(returns)


quartz()
plot(returns_ts,type='l',col='black',xaxt = 'n',xlab= "",ylab='AMZN Returns')
axis(1, at=date_i, labels=lab_x)
title(main='Daily Returns of AMZN (01/02/2015 -- 12/29/2017)')
lines(arma_2_2$fitted.values,col="red",lwd=2,lty=2)

legend("topright",legend=c("Data","ARMA(2,2)"),text.col=1:4,bty="n")

#Forecast Financial Returns at Several Horizons


plot(forecast(returns_ts, model=arma_2_2, h=2))

plot(forecast(returns_ts, model=arma_2_2, h=3))

plot(forecast(returns_ts, model=arma_2_2, h=4))

(20%) Problem 7.2 (i.e., Chapter 7, Problem 2) from Textbook “Update the time series of Figure 7.3 , Unemployed Persons. Calculate the autocorrelation functions and reason the values of the autocorrelation coefficients for different displacements of time. Do you think that an autoregressive process could be a good model to explain the dependence of the series?”

#Updated Time Series of Unemployed Persons: 1989-2012
unemployed_data <- read.csv("7.2data.csv")
names(unemployed_data)<- c("date","Unemployed")

unemployed_ts <- ts(unemployed_data$Unemployed, start = 1989, end= 2012+(6/12), freq = 12)

t<-seq(1989, 2012+(6/12),length=length(unemployed_ts))

#Plot
ts.plot(unemployed_ts, gpars = list(col=rainbow(10)))

#ACFs
quartz()
par(mfrow=c(3,1))
acf(unemployed_ts, type = "covariance", main = "Covariance")
acf(unemployed_ts, type = "correlation", main = "ACF")
pacf1<-acf(unemployed_ts, type = "partial", main = "PACF")

pacf1

## 
## Partial autocorrelations of series 'unemployed_ts', by lag
## 
## 0.0833 0.1667 0.2500 0.3333 0.4167 0.5000 0.5833 0.6667 0.7500 0.8333 
##  0.991 -0.104 -0.126 -0.066 -0.092 -0.095 -0.070 -0.108 -0.049 -0.094 
## 0.9167 1.0000 1.0833 1.1667 1.2500 1.3333 1.4167 1.5000 1.5833 1.6667 
## -0.021 -0.034  0.009 -0.043  0.021  0.015  0.012 -0.027 -0.034 -0.059 
## 1.7500 1.8333 1.9167 2.0000 
## -0.099  0.064  0.028 -0.036

Based on the ACF functions, I would say that an autoregressive process could be a good model to explain the dependence of the series. This is characterized by the sharp spike at a lag = 1 in the PACF, as well as a decaying to zero over time in the ACF.

(20%) Problem 7.6 (i.e., Chapter 7, Problem 6) from Textbook “Download some of the components of the Consumer Price Index (e.g., housing, food, transportation) from the BLS website. Analyze whether the inflation rate for these components may be modeled as an AR(2) process.”

Yes, for housing inflation rate, the series can be modeled as an AR(2) Process, as the PACF spikes at lags of 1 and 2 and decays to zero over time in the ACF correllogram.

No, not for transportation inflation rate, since the PACF only has a statistically significant spike at lag=1 in the PACF, denoting a AR(1) process instead.

CPI_data <- read.csv("7.6data.csv")

names(CPI_data)<- c ("date", "housing", "housing_inflation", "transportation", "transportation_inflation")

CPI_data.adj<-na.exclude(CPI_data)


#Plots

housing_inflation_ts <- ts(CPI_data.adj$housing_inflation, start = 1968, end= 2011, freq = 1)

transportation_inflation_ts <- ts(CPI_data.adj$transportation_inflation, start = 1968, end= 2011, freq = 1)


t3<-seq(1968, 2011,length=length(housing_inflation_ts))

#Plot
ts.plot(housing_inflation_ts, gpars = list(col=rainbow(10)))

ts.plot(transportation_inflation_ts , gpars = list(col=rainbow(10)))

#ACFs

#Housing Inflation
quartz()
par(mfrow=c(3,1))
acf(CPI_data.adj$housing_inflation, type = "covariance", main = "Covariance")
acf(CPI_data.adj$housing_inflation, type = "correlation", main = "ACF")
acf(CPI_data.adj$housing_inflation, type = "partial", main = "PACF")

#Transportation Inflation
#ACFs
quartz()
par(mfrow=c(3,1))
acf(CPI_data.adj$transportation_inflation, type = "covariance", main = "Covariance")
acf(CPI_data.adj$transportation_inflation, type = "correlation", main = "ACF")
acf(CPI_data.adj$transportation_inflation, type = "partial", main = "PACF")

#AR Models and Fits

#Housing Inflation

ar2 <-arma(housing_inflation_ts, order = c(2,0))




quartz()
plot(x= t3, y=housing_inflation_ts, type='l',col='black',xaxt = 'n',xlab= "",ylab='Housing Inflation Rate (%)')
axis(1, at=date_i, labels=lab_x)
title(main='Housing Inflation (1968 -- 2011)')
lines(ar2$fitted.values,col="red",lwd=2,lty=2)

legend("topright",legend=c("Data","AR(2)"),text.col=1:4,bty="n")

#Transportation Inflation

ar1 <-arma(transportation_inflation_ts, order = c(1,0))



quartz()
plot(transportation_inflation_ts,type='l',col='black',xaxt = 'n',xlab= "",ylab='Transportation Inflation Rate (%)')
axis(1, at=date_i, labels=lab_x)
title(main='Housing Inflation (1968 -- 2011)')
lines(ar1$fitted.values,col="red",lwd=2,lty=2)

legend("topright",legend=c("Data","AR(1)"),text.col=1:4,bty="n")

(20%) Problem 7.7 (i.e., Chapter 7, Problem 7) from Textbook “From Exercise 6, select the time series corresponding to food and gas inflation. Compute the autocorrelation and partial autocorrelation functions. Which time series model(s) would you entertain? Estimate the model and construct the 1-step-, 2-step-, and 3-step-ahead forecasts, their forecast errors, and the uncertainty associated with the forecasts. Which series is more difficult to predict?”

inflations<- read.csv("7.7data.csv")
inflations.adj<-na.exclude(inflations)

names(inflations.adj)<- c("date","food","food_inflation","gas","gas_inflation") 


#ACFS

#Food inflation

quartz()
par(mfrow=c(3,1))
acf(inflations.adj$food_inflation, type = "covariance", main = "Covariance")
acf(inflations.adj$food_inflation, type = "correlation", main = "ACF")
acf(inflations.adj$food_inflation, type = "partial", main = "PACF")

#Gas Inflation

quartz()
par(mfrow=c(3,1))
acf(inflations.adj$gas_inflation, type = "covariance", main = "Covariance")
acf(inflations.adj$gas_inflation, type = "correlation", main = "ACF")
acf(inflations.adj$gas_inflation, type = "partial", main = "PACF")

According to the ACF functions, I would entertain an AR(1) model for food inflation rate, as there is a large spike in the lag 1 of the PACF, and a decaying ACF over time. As for gas inflation rate, this series seems trickier to model due to the absense of any significant values in the ACF or PACF correllelograms. Nevertheles, I will attempt to fit an AR(1) model for the sake of comparison. Comparing the forecast error statistics, I can see that the error statistics of the forecast for gas inflation is much higher, indicating that gas inflation is much harder to predict.

#Plots

food_inflation_ts <- ts(inflations.adj$food_inflation, start = 1958, end= 2011, freq = 1)

gas_inflation_ts <- ts(inflations.adj$gas_inflation, start = 1958, end= 2011, freq = 1)


t<-seq(1958, 2011,length=length(gas_inflation_ts))

#Plot
par(mfrow=c(2,1))

ts.plot(food_inflation_ts, gpars = list(col=rainbow(10)))

ts.plot(gas_inflation_ts , gpars = list(col=rainbow(10)))

#Forecasts

#Model for food inflation rate: AR(1)

ar_1 <-arima(food_inflation_ts, order= c(1,0,0))


plot(forecast(food_inflation_ts, model=ar_1, h=1), type="l")
plot(forecast(food_inflation_ts, model=ar_1, h=2), type="l")

plot(forecast(food_inflation_ts, model=ar_1, h=3), type="l")

accuracy(forecast(food_inflation_ts, model=ar_1, h=1))

##                        ME     RMSE     MAE       MPE    MAPE      MASE
## Training set -0.008823183 2.369196 1.62239 -22.69352 61.4825 0.8878287
##                   ACF1
## Training set 0.1558132

accuracy(forecast(food_inflation_ts, model=ar_1, h=2))

##                        ME     RMSE     MAE       MPE    MAPE      MASE
## Training set -0.008823183 2.369196 1.62239 -22.69352 61.4825 0.8878287
##                   ACF1
## Training set 0.1558132

accuracy(forecast(food_inflation_ts, model=ar_1, h=3))

##                        ME     RMSE     MAE       MPE    MAPE      MASE
## Training set -0.008823183 2.369196 1.62239 -22.69352 61.4825 0.8878287
##                   ACF1
## Training set 0.1558132

#Model for gas inflation rate: AR(1)

ar_gas_1 <-arima(gas_inflation_ts, order= c(1,0,0))


plot(forecast(gas_inflation_ts, model=ar_gas_1, h=1), type="l")

plot(forecast(gas_inflation_ts, model=ar_gas_1, h=2), type="l")
plot(forecast(gas_inflation_ts, model=ar_gas_1, h=3), type="l")

accuracy(forecast(gas_inflation_ts, model=ar_gas_1, h=1))

##                      ME     RMSE      MAE     MPE     MAPE     MASE
## Training set 0.03325194 11.95614 8.497864 198.303 369.8408 0.874864
##                    ACF1
## Training set 0.04576479

accuracy(forecast(gas_inflation_ts, model=ar_gas_1, h=2))

##                      ME     RMSE      MAE     MPE     MAPE     MASE
## Training set 0.03325194 11.95614 8.497864 198.303 369.8408 0.874864
##                    ACF1
## Training set 0.04576479

accuracy(forecast(gas_inflation_ts, model=ar_gas_1, h=3))

##                      ME     RMSE      MAE     MPE     MAPE     MASE
## Training set 0.03325194 11.95614 8.497864 198.303 369.8408 0.874864
##                    ACF1
## Training set 0.04576479

Homework_3

Justin Yee

5/10/2018