#1 Check your working directory

getwd()
## [1] "/Users/zosiajiang/Desktop/Harrisburg Application - Zhuoxin Jiang/ANLY 565"

#2 Set your working directory to “ANLY 580/RScript”.

setwd("~/Desktop/Harrisburg Application - Zhuoxin Jiang/ANLY 565")

#3 Download “Inflation.xls” data file and set the “observation_date” variable to the date format and the “CPI” variable to the numeric format. The “CPI” variable represents the consumer price index, which idicates the relative prices of a consumer basket.

library(readxl)
inflation <- read_xls("Inflation.xls")
inflation$observation_date <- as.Date(inflation$observation_date)
inflation$CPI <- as.numeric(inflation$CPI)
str(inflation)
## tibble [1,273 × 2] (S3: tbl_df/tbl/data.frame)
##  $ observation_date: Date[1:1273], format: "1913-01-01" "1913-02-01" ...
##  $ CPI             : num [1:1273] 9.8 9.8 9.8 9.8 9.7 9.8 9.9 9.9 10 10 ...

#4 Create two stand alone varibales: “date” and “cpi”. “date” variable should represent the values of the “observation_date” variable from the “Inflation” data set, while, “cpi” variable should represent values of the “cpi” variable from the “Inflation” data set.

date <- inflation$observation_date
cpi <- inflation$CPI

#5 Transform “cpi” variable from numeric format to the time series format by using ts() function. Lable the new varibale as “cpits”.

range(date)
## [1] "1913-01-01" "2019-01-01"
cpits <- ts(cpi, start = c(1913,1), end = c(2019,1), frequency = 12 )
class(cpits)
## [1] "ts"

#6 Please construct the following three graphs: 1)time series plot, 2) autocorrelation and 3) partial autocorrelation functions for the “cpits” variable. Based on the signature of these graphs, does the variable appear stationary? Explain

Answer: The CPI variable seems to be non-stationary because the mean value of the time series is not constant and the variance changes over time. Based on the ACF plot, it demonstrates a decaying trend.

plot(cpits, main = "CPI from Year 1913 to 2019", xlab = "Year", ylab = "CPI")

acf(cpits)

pacf(cpits)

#7 Use “cpits” variable and window() function to create 2 new variables called “cpi.pre”, “cpi.post”. The “cpi.pre” should include all observations for the period starting from January of 1990 and up until October 2018. The “cpi.post” should include all observations starting from November 2018 and up until the last month in the dataset.

cpi.pre <- window(cpits, start = c(1990,1), end = c(2018, 10), frequency = 12)
cpi.post <- window(cpits, start = c(2018,11), end = c(2019,1), frequency = 12)

#8 Use time() function and “cpi.pre” variable to create a variable called “Time”. Moreover, use “cpi.pre” variable and cycle() function to create a factor variable titled “Seas”.

Time <- time(cpi.pre)
Seas <- factor(cycle(cpi.pre))

#9 Use lm() function to estimate parameter values of a linear regression model by regressing “Time”, and “Seas” on “cpi.pre”. Save these estimates as “cpi.lm”. Set the value of the intercept to 0, in order to interpret the coeficients of the seasonal dummy variables as seasonal intercepts. (Setting intercept to 0 ensures that for each season there is a unique intercept) Save these estimates as cpi.lm

cpi.lm <- lm(cpi.pre ~ 0 + Time + Seas)
summary(cpi.lm)
## 
## Call:
## lm(formula = cpi.pre ~ 0 + Time + Seas)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.5517 -2.1523  0.0583  1.5749 10.5564 
## 
## Coefficients:
##          Estimate Std. Error t value Pr(>|t|)    
## Time    4.361e+00  1.656e-02   263.4   <2e-16 ***
## Seas1  -8.550e+03  3.318e+01  -257.7   <2e-16 ***
## Seas2  -8.550e+03  3.319e+01  -257.6   <2e-16 ***
## Seas3  -8.549e+03  3.319e+01  -257.6   <2e-16 ***
## Seas4  -8.549e+03  3.319e+01  -257.6   <2e-16 ***
## Seas5  -8.549e+03  3.319e+01  -257.6   <2e-16 ***
## Seas6  -8.549e+03  3.319e+01  -257.6   <2e-16 ***
## Seas7  -8.549e+03  3.319e+01  -257.6   <2e-16 ***
## Seas8  -8.549e+03  3.319e+01  -257.6   <2e-16 ***
## Seas9  -8.549e+03  3.320e+01  -257.5   <2e-16 ***
## Seas10 -8.549e+03  3.320e+01  -257.5   <2e-16 ***
## Seas11 -8.550e+03  3.319e+01  -257.6   <2e-16 ***
## Seas12 -8.551e+03  3.319e+01  -257.6   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.563 on 333 degrees of freedom
## Multiple R-squared:  0.9998, Adjusted R-squared:  0.9998 
## F-statistic: 1.533e+05 on 13 and 333 DF,  p-value: < 2.2e-16

#10 Create the following new items: # “new.Time”- sequence of 12 values starting from 2018.75+1/12 and each number going up by 1/12 # “new.Seas”- a vector with the following values c(11,12,1,2,3,4,5,6,7,8,9,10) # “new.data”- a data frame that combines the “new.Time” and “new.Seas” variables.

new.Time <- seq(2018.75, len = 12, by = 1/12)
new.Seas <- factor(c(11,12,1,2,3,4,5,6,7,8,9,10))
new.data <- data.frame(Time=new.Time, Seas=new.Seas)
new.data
##        Time Seas
## 1  2018.750   11
## 2  2018.833   12
## 3  2018.917    1
## 4  2019.000    2
## 5  2019.083    3
## 6  2019.167    4
## 7  2019.250    5
## 8  2019.333    6
## 9  2019.417    7
## 10 2019.500    8
## 11 2019.583    9
## 12 2019.667   10

#11 Use predict() function and cpi.lm model to create a 12 month ahead forecast of the consumer price index. Save this forecast as “predict.lm”

predict.lm<-predict(cpi.lm, new.data)
predict.lm
##        1        2        3        4        5        6        7        8 
## 253.2700 252.9572 253.6425 254.3816 255.3153 255.9543 256.4341 256.8678 
##        9       10       11       12 
## 257.0126 257.3388 257.8063 257.8698

#12 Collect residuals from the “cpi.lm” model and save them as “cpi.lm.resid”. Moreover, constuct acf and pacf for the “cpi.lm.resid” series. Is the series stationary? Is there autocorrelation in the residual series?

Answer: Series is not stationary. Residuals still have autocorrelation.

cpi.lm.resid <- cpi.lm$residuals
acf(cpi.lm.resid)

pacf(cpi.lm.resid)

#13 Based on the AIC, identify the best order of ARMA model (without the seasonal component) for the cpi.lm.resid time series and estimate the value of the parameter coefficients. Please, consider any ARMA model with up to 3 AR and/or MA terms. Save these estimates as resid.best.arma. What is the order of resid.best.arma?

Answer: order of resid.best.arma is (1,0,2).

best.order <- c(0, 0, 0)
best.aic <- Inf
for (i in 0:3) for (j in 0:3) {
  fit.aic <- AIC(arima(cpi.lm.resid, order = c(i, 0,
                                                 j)))
  if (fit.aic < best.aic) {
    best.order <- c(i, 0, j)
    best.arma <- arima(cpi.lm.resid, order = best.order)
    best.aic <- fit.aic
  }}
best.order
## [1] 1 0 2
resid.best.arma <- best.arma
coef(best.arma)
##         ar1         ma1         ma2   intercept 
##  0.94446255  0.57488540  0.15219349 -0.02787618

#14 Use predict() function and resid.best.arma to create a 12 period ahead forecast of cpi.lm.resid series. Save the forecasted values as resid.best.arma.pred

resid.best.arma.pred <- predict(resid.best.arma, n.ahead = 12)
resid.best.arma.pred$pred
## Time Series:
## Start = 347 
## End = 358 
## Frequency = 1 
##  [1] -0.7179941 -0.6170488 -0.5843277 -0.5534238 -0.5242362 -0.4966696
##  [7] -0.4706340 -0.4460444 -0.4228204 -0.4008862 -0.3801702 -0.3606047

#15 Use ts() function to combine the cpi values forecaseted by cpi.lm model and the residual values forecasted by resid.best.arma. Lable this time series as cpi.pred

cpi.pred <- ts((predict.lm + resid.best.arma.pred$pred),start = c(2018,11), frequency = 12)

#16 Use ts.plot() function to plot cpi.pre and cpi.pred together on one graph. What do you expect will happen to the CPI during the next 12 month?

Answer: based on the plot, I expect the CPI to increase during the next 12 months.

ts.plot(cbind(cpi.pre, cpi.pred), lty = 1:2)

#17 Please calculate mean absolute percentage error for the cpi.pred forecast for the first three month (Novermber 2018, December 2018, January 2019). How accurate is the model?

Answer: The model is pretty accurate (0.4% error rate)

mean(abs(cpi.post[1:3] - cpi.pred[1:3])/cpi.post[1:3])*100
## [1] 0.3931447

#18 What is the forecasted rate of inflation between December 2018 and January 2019? # Hint: Inflation = % change in CPI

Answer: the forecasted rate of inflation is around 0.28%.

diff(log(cpi.pred[2:3])) * 100
##         3 
## 0.2841387

#19 Policy makers often care more about inflation rather than cpi. # Create a new stand alone varible that would represent the first log difference of the the cpits variable. Lable this variable “pi”, which represents monthly inflation rate in the US. # If percentage change is positive there is inflation (prices go up), and if the percentage change is negative there is deflation (prices fall). What was the lowest monthly rate of inflation(deflation) recorded in US during the time sample? What about was the highest?

Answer: The lowest monthly rate of inflation is -3.2%. The highest monthly rate of inflation is 5.71%.

pi <- (diff(log(cpits)))* 100
summary(pi) 
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## -3.2088  0.0000  0.2397  0.2552  0.5451  5.7158

#20 Please construct the time series plot, the autocorrelation and partial autocorrelation functions for the “pi” variable. Based on the signature of these graphs, does the variable appear stationary? Explain

Answer: the variance seems to change over time. Also, the acf and pacf indicate seasonality and trends. The time series is non-stationary.

plot(pi, main = "Inflation rate in the US", xlab = "Time", ylab = "Inflation rate")

acf(pi)

pacf(pi)

#21 Use “pi” variable and window() function to create 2 new variables called “pi.pre”, “pi.post”. The “pi.pre” should include all observations for the period starting from January of 1990 and up until October 2018. The “pi.post” should include all observations starting from November 2018 and up until the last month in the dataset.

pi.pre <- window(pi, start = c(1990,1), end = c(2018,10), frequency = 12)
pi.post <- window(pi, start = c(2018,11), end = c(2019,1), frequency = 12)

#22 Please create a function that takes a time series as input, and then uses AIC to identify the best SARIMA model. The function should return the following: # - the order of the best SARIMA, # - its AIC # - and the estimates of its coefficient values # Lable this formula get.best.sarima

get.best.sarima <- function(x.ts, maxord = c(2,2,2,2,2,2))
{
  best.aic <- Inf
  n <- length(x.ts)
  for (p in 0:maxord[2]) for(d in 0:maxord[2]) for(q in 0:maxord[2])
    for (P in 0:maxord[2]) for(D in 0:maxord[2]) for(Q in 0:maxord[2])
    {
      fit <- arima(x.ts, order = c(p,d,q),
                   seas = list(order = c(P,D,Q),
                               frequency(x.ts)), method = "CSS")
      fit.aic <- -2 * fit$loglik + (log(n) + 1) * length(fit$coef)
      if (fit.aic < best.aic)
      {
        best.aic <- fit.aic
        best.fit <- fit
        best.model <- c(p,d,q,P,D,Q)
      }
    }
  list(best.aic, best.fit, best.model)
}

#23 By using get.best.sarima() function please identify the best SARIMA model for pi.pre time series. Please cosider SARIMA(2,2,2,2,2,2) as the maximum order of the model. Save the results of the get.best.sarima() function as “pi.best.sarima”. What is the order of the best SARIMA model?

Answer: The order of the best SARIMA model is (0,0,1,2,1,1).

pi.best.sarima <- get.best.sarima(pi.pre, maxord = c(2,2,2,2,2,2))
## Warning in arima(x.ts, order = c(p, d, q), seas = list(order = c(P, D, Q), :
## possible convergence problem: optim gave code = 1

## Warning in arima(x.ts, order = c(p, d, q), seas = list(order = c(P, D, Q), :
## possible convergence problem: optim gave code = 1

## Warning in arima(x.ts, order = c(p, d, q), seas = list(order = c(P, D, Q), :
## possible convergence problem: optim gave code = 1

## Warning in arima(x.ts, order = c(p, d, q), seas = list(order = c(P, D, Q), :
## possible convergence problem: optim gave code = 1

## Warning in arima(x.ts, order = c(p, d, q), seas = list(order = c(P, D, Q), :
## possible convergence problem: optim gave code = 1

## Warning in arima(x.ts, order = c(p, d, q), seas = list(order = c(P, D, Q), :
## possible convergence problem: optim gave code = 1

## Warning in arima(x.ts, order = c(p, d, q), seas = list(order = c(P, D, Q), :
## possible convergence problem: optim gave code = 1
pi.best.sarima
## [[1]]
## [1] 52.45196
## 
## [[2]]
## 
## Call:
## arima(x = x.ts, order = c(p, d, q), seasonal = list(order = c(P, D, Q), frequency(x.ts)), 
##     method = "CSS")
## 
## Coefficients:
##          ma1     sar1     sar2     sma1
##       0.4615  -0.1775  -0.0910  -0.8422
## s.e.  0.0444   0.0600   0.0577   0.0379
## 
## sigma^2 estimated as 0.06311:  part log likelihood = -12.53
## 
## [[3]]
## [1] 0 0 1 2 1 1

#24 Please use predict() function and the best.sarima.pi model to forecast monthly rate of inflation in the US during November 2018, December 2018 and January 2019. Save these predictions as pi.sarima.pred

pi.sarima.pred <- predict(pi.best.sarima[[2]], n.ahead=3)
pi.sarima.pred$pred
##             Jan Feb Mar Apr May Jun Jul Aug Sep Oct        Nov        Dec
## 2018                                                -0.1475132 -0.2388366
## 2019  0.2646618

#25 Please calculate mean absolute percentage error of the best.sarima model. How accurate is the model?

Answer: The actual inflation rate is 14.14% lower than the predicted rate.

mean(abs(pi.post[1:3] - pi.sarima.pred$pred)/pi.post[1:3])*100
## [1] -14.14246

#26 Extract the residual series from the pi.best.sarima model, and save them as sarima.resid.

sarima.resid <- pi.best.sarima[[2]]$residuals

#27 Plot the acf of the sarima.resid series and acf of the residsarima.resid^2 series. What can you conclude based on these graphs?

Answer: from two graphs, we can see there is evidence of serial correlation in both residual and squared values, so there is evidence of volatility and conditional heteroskedastic since the squared residuals are correlated at most lags. Hence, a Garch model should be fitted to the residual series.

acf(sarima.resid)

acf(sarima.resid^2)

#28 Download fGarch package and upload it to the library

library(fGarch)
## Loading required package: timeDate
## Loading required package: timeSeries
## Loading required package: fBasics

#29 Use garchFit() function from the fGarch package to estimate garch(1,1) model of the resid time series. By doing so you will be able to analyze the volatility of the inflation, or, in other words, how stable it is. # Make sure to set “include.mean=F” by doing so you suppress mean parameter from the default ARMA(0,0) model.Save the estimated coefficients as resid.garch

resid.garch <- garchFit(~ garch(1,1), data=sarima.resid, include.mean = F, trace = F)
## Warning: Using formula(x) is deprecated when x is a character vector of length > 1.
##   Consider formula(paste(x, collapse = " ")) instead.
summary(resid.garch)
## 
## Title:
##  GARCH Modelling 
## 
## Call:
##  garchFit(formula = ~garch(1, 1), data = sarima.resid, include.mean = F, 
##     trace = F) 
## 
## Mean and Variance Equation:
##  data ~ garch(1, 1)
## <environment: 0x7fd0a983e2f8>
##  [data = sarima.resid]
## 
## Conditional Distribution:
##  norm 
## 
## Coefficient(s):
##      omega      alpha1       beta1  
## 0.00065718  0.33466089  0.72819666  
## 
## Std. Errors:
##  based on Hessian 
## 
## Error Analysis:
##         Estimate  Std. Error  t value Pr(>|t|)    
## omega  0.0006572   0.0002065    3.183  0.00146 ** 
## alpha1 0.3346609   0.0737837    4.536 5.74e-06 ***
## beta1  0.7281967   0.0433202   16.810  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Log Likelihood:
##  67.64266    normalized:  0.195499 
## 
## Description:
##  Sun Jan 31 13:40:08 2021 by user:  
## 
## 
## Standardised Residuals Tests:
##                                 Statistic p-Value    
##  Jarque-Bera Test   R    Chi^2  104.6071  0          
##  Shapiro-Wilk Test  R    W      0.9648451 2.11459e-07
##  Ljung-Box Test     R    Q(10)  3.589067  0.9639873  
##  Ljung-Box Test     R    Q(15)  17.51112  0.2892383  
##  Ljung-Box Test     R    Q(20)  18.74425  0.5385016  
##  Ljung-Box Test     R^2  Q(10)  5.769825  0.8342165  
##  Ljung-Box Test     R^2  Q(15)  6.809474  0.9627225  
##  Ljung-Box Test     R^2  Q(20)  10.02728  0.9676745  
##  LM Arch Test       R    TR^2   6.544629  0.8861832  
## 
## Information Criterion Statistics:
##        AIC        BIC        SIC       HQIC 
## -0.3736570 -0.3403064 -0.3738056 -0.3603767

#30 The main priority of the monetary authority (Federal Reserve) in the United States is to ensure stable value of currency. Simply put, Fed wants to keep inflation stable (no volatility). To maintain stability the Fed depends on a number of tools, and its effectiveness is judged based on the forecasting model of volatility. # Please use pi variable and predict function to forecast two period ahead inflation volatily, which is measured by a square of the forecasted standard deviation. # How stable will be the currency in February and March of 2019?

Answer: the inflation volatility will be around 0.1%.

pi_resid_garch <- garchFit(~ garch(1,1), data=pi, include.mean = F, trace = F)
## Warning: Using formula(x) is deprecated when x is a character vector of length > 1.
##   Consider formula(paste(x, collapse = " ")) instead.
pred_pi <- predict(pi_resid_garch, n.ahead = 2)
inf_vol <- (pred_pi$standardDeviation)^2
inf_vol
## [1] 0.09645904 0.10229067