getwd()
## [1] "/Volumes/Macintosh HD - Data/Users/Morgane/Documents/Harrisburg University/Analytics Master Classes/ANLY 565 Summer Time Series/Assignment #3/R Script"
setwd("/Volumes/Macintosh HD - Data/Users/Morgane/Documents/Harrisburg University/Analytics Master Classes/ANLY 565 Summer Time Series/Assignment #3/R Script")
getwd()
## [1] "/Volumes/Macintosh HD - Data/Users/Morgane/Documents/Harrisburg University/Analytics Master Classes/ANLY 565 Summer Time Series/Assignment #3/R Script"
library(readxl)
Inf_OR<-read_excel("/Volumes/Macintosh HD - Data/Users/Morgane/Documents/Harrisburg University/Analytics Master Classes/ANLY 565 Summer Time Series/Assignment #3/Inflation.xls", col_types = c("date", "numeric"))
str(Inf_OR)
## tibble [1,273 × 2] (S3: tbl_df/tbl/data.frame)
## $ observation_date: POSIXct[1:1273], format: "1913-01-01" "1913-02-01" ...
## $ CPI : num [1:1273] 9.8 9.8 9.8 9.8 9.7 9.8 9.9 9.9 10 10 ...
date<-Inf_OR$observation_date
range(date)
## [1] "1913-01-01 UTC" "2019-01-01 UTC"
cpi<-Inf_OR$CPI
cpits<-ts(cpi,start=c(1913,1), end=c(2019,1), freq = 12)
head(cpits)
## [1] 9.8 9.8 9.8 9.8 9.7 9.8
plot(cpits,main="CPI from Jan-1913 to Jan-2019", xlab = "Time (monthly data)",
ylab = "CPI")
acf(cpits)
pacf(cpits)
The CPI data doesn’t seems to be stationary, as showed by the plot, showing the upward trend, and confirmed by the ACF, showing us a slowing decaying trend.
cpi.pre <- window(cpits, start = c(1990,1), end=c(2018,10), freq = 12)
cpi.post <- window(cpits, start = c(2018,11), end=c(2019,1), freq = 12)
cpi.post
## Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec
## 2018 252.038 251.233
## 2019 251.712
Time<-time(cpi.pre)
str(Time)
## Time-Series [1:346] from 1990 to 2019: 1990 1990 1990 1990 1990 ...
tail(Time)
## [1] 2018.333 2018.417 2018.500 2018.583 2018.667 2018.750
Seas <- factor(cycle(cpi.pre))
str(Seas)
## Factor w/ 12 levels "1","2","3","4",..: 1 2 3 4 5 6 7 8 9 10 ...
cpi.lm <- lm(cpi.pre ~ 0 + Time + Seas)
coef(cpi.lm)
## Time Seas1 Seas2 Seas3 Seas4 Seas5
## 4.360764 -8550.376913 -8550.001241 -8549.430879 -8549.155276 -8549.038949
## Seas6 Seas7 Seas8 Seas9 Seas10 Seas11
## -8548.968622 -8549.187226 -8549.224381 -8549.120330 -8549.420244 -8550.022663
## Seas12
## -8550.698810
new.Time<-seq(2018.75+1/12, length=12, by=1/12)
new.Time
## [1] 2018.833 2018.917 2019.000 2019.083 2019.167 2019.250 2019.333 2019.417
## [9] 2019.500 2019.583 2019.667 2019.750
new.Seas<-factor(c(11,12,1,2,3,4,5,6,7,8,9,10))
new.data<-data.frame(Time=new.Time,Seas=new.Seas)
new.data
predict.lm<-predict(cpi.lm, new.data)
predict.lm
## 1 2 3 4 5 6 7 8
## 253.6334 253.3206 254.0059 254.7450 255.6787 256.3177 256.7975 257.2312
## 9 10 11 12
## 257.3760 257.7022 258.1697 258.2332
cpi.lm.resid<-cpi.lm$residuals
acf(cpi.lm.resid)
pacf(cpi.lm.resid)
Even when extracting the trend with the linear model, there is still some autocorrelation in the residuals: the model that we used did not capture all the trends/seasonality of the data.
best.order <- c(0, 0, 0)
best.aic <- Inf
for (i in 0:3) for (j in 0:3) {
fit.aic <- AIC(arima(cpi.lm.resid, order = c(i, 0,
j)))
if (fit.aic < best.aic) {
best.order <- c(i, 0, j)
resid.best.arma <- arima(cpi.lm.resid, order = best.order)
best.aic <- fit.aic
}}
resid.best.arma
##
## Call:
## arima(x = cpi.lm.resid, order = best.order)
##
## Coefficients:
## ar1 ma1 ma2 intercept
## 0.9445 0.5749 0.1522 -0.0279
## s.e. 0.0179 0.0568 0.0543 0.7572
##
## sigma^2 estimated as 0.2259: log likelihood = -235.35, aic = 480.71
best.order
## [1] 1 0 2
It seems that the best order is (1,0,2).
Save the forecasted values as resid.best.arma.pred
resid.best.arma.pred <- predict(resid.best.arma, n.ahead = 12)
resid.best.arma.pred$pred
## Time Series:
## Start = 347
## End = 358
## Frequency = 1
## [1] -0.7179941 -0.6170488 -0.5843277 -0.5534238 -0.5242362 -0.4966696
## [7] -0.4706340 -0.4460444 -0.4228204 -0.4008862 -0.3801702 -0.3606047
cpi.pred <- ts((predict.lm + resid.best.arma.pred$pred), start = 2018.833, freq = 12)
cpi.pred
## Time Series:
## Start = 2018.833
## End = 2019.74966666667
## Frequency = 12
## 1 2 3 4 5 6 7 8
## 252.9154 252.7036 253.4216 254.1916 255.1545 255.8211 256.3268 256.7851
## 9 10 11 12
## 256.9532 257.3013 257.7895 257.8725
ts.plot(cbind(cpi.pre, cpi.pred), lty = 1:2)
In the next 12 month, the CPI will continue increasing.
Mpred<-cpi.pred[1:3]
Mpred
## 1 2 3
## 252.9154 252.7036 253.4216
cpi.post[1:3]
## [1] 252.038 251.233 251.712
mean(abs((cpi.post[1:3] - Mpred)/cpi.post[1:3]))*100
## [1] 0.5375443
It seems that the model is pretty accurate as there is a 0.54% error, on average.
(diff(log(cpi.pred[1:3])) * 100)
## 2 3
## -0.0837804 0.2837307
(diff(log(cpi.post[1:3])) * 100)
## [1] -0.3199074 0.1904781
According to the model, in December, the forecasted inflation would be -0.08% in December and 0.28% in January, which is not accurate as the actual values are -0.32% in December 2018 and 0.19% in January 2019 (from cpi.post).
Create a new stand alone variable that would represent the first log difference of the cpits variable. Lable this variable “pi”, which represents monthly inflation rate in the US. If percentage change is positive there is inflation (prices go up), and if the percentage change is negative there is deflation (prices fall). What was the lowest monthly rate of inflation(deflation) recorded in US during the time sample? What about was the highest?
str(cpits)
## Time-Series [1:1273] from 1913 to 2019: 9.8 9.8 9.8 9.8 9.7 9.8 9.9 9.9 10 10 ...
pi<-(diff(log(cpits)))*100
summary(pi)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -3.2088 0.0000 0.2397 0.2552 0.5451 5.7158
The lowest deflation was recorded at -3.2% and the highest inflation rate was recorded at 5.71%.
plot(pi,main="Inflation from Jan-1913 to Jan-2019", xlab = "Time (monthly data)",
ylab = "Inflation (%)")
acf(pi)
pacf(pi)
The mean seems stable but the volatility seems to change over time.
pi.pre <- window(pi, start = c(1990,1), end=c(2018,10), freq = 12)
pi.post <- window(pi, start = c(2018,11), end=c(2019,1), freq = 12)
get.best.sarima <- function(x.ts, maxord = c(2,2,2,2,2,2))
{
best.aic <- Inf
n <- length(x.ts)
for (p in 0:maxord[2]) for(d in 0:maxord[2]) for(q in 0:maxord[2])
for (P in 0:maxord[2]) for(D in 0:maxord[2]) for(Q in 0:maxord[2])
{
fit <- arima(x.ts, order = c(p,d,q),
seas = list(order = c(P,D,Q),
frequency(x.ts)), method = "CSS")
fit.aic <- -2 * fit$loglik + (log(n) + 1) * length(fit$coef)
if (fit.aic < best.aic)
{
best.aic <- fit.aic
best.fit <- fit
best.order <- c(p,d,q,P,D,Q)
}
}
list(best.order,best.aic, best.fit)
}
AIC(arima((pi.pre),order = c(0,0,1), seas=list(order=c(2,1,1))))
## [1] 47.65343
AIC(arima((pi.pre),order = c(2,0,0), seas=list(order=c(2,0,1))))
## [1] 38.91891
pi.best.sarima <- get.best.sarima((pi.pre), maxord = c(2,2,2,2,2,2))
## Warning in arima(x.ts, order = c(p, d, q), seas = list(order = c(P, D, Q), :
## possible convergence problem: optim gave code = 1
## Warning in arima(x.ts, order = c(p, d, q), seas = list(order = c(P, D, Q), :
## possible convergence problem: optim gave code = 1
## Warning in arima(x.ts, order = c(p, d, q), seas = list(order = c(P, D, Q), :
## possible convergence problem: optim gave code = 1
## Warning in arima(x.ts, order = c(p, d, q), seas = list(order = c(P, D, Q), :
## possible convergence problem: optim gave code = 1
## Warning in arima(x.ts, order = c(p, d, q), seas = list(order = c(P, D, Q), :
## possible convergence problem: optim gave code = 1
## Warning in arima(x.ts, order = c(p, d, q), seas = list(order = c(P, D, Q), :
## possible convergence problem: optim gave code = 1
## Warning in arima(x.ts, order = c(p, d, q), seas = list(order = c(P, D, Q), :
## possible convergence problem: optim gave code = 1
pi.best.sarima
## [[1]]
## [1] 0 0 1 2 1 1
##
## [[2]]
## [1] 52.45196
##
## [[3]]
##
## Call:
## arima(x = x.ts, order = c(p, d, q), seasonal = list(order = c(P, D, Q), frequency(x.ts)),
## method = "CSS")
##
## Coefficients:
## ma1 sar1 sar2 sma1
## 0.4615 -0.1775 -0.0910 -0.8422
## s.e. 0.0444 0.0600 0.0577 0.0379
##
## sigma^2 estimated as 0.06311: part log likelihood = -12.53
AIC(arima(pi.pre,order = c(0,0,1), seas=list(order=c(2,1,1))))
## [1] 47.65343
AIC(arima(pi.pre,order = c(2,0,0), seas=list(order=c(2,0,1))))
## [1] 38.91891
IF we multiplied by x100 to obtain our original pi data, the best model has the order (0,0,1,2,1,1). However, if we did not multiply by 100 to obtain the pi data (i.e. our timeseries are not in percentages), the best model would have the order (2 0 0 2 0 1).
We use the exponentional function, as we previously used the log.
pi.sarima.pred<-(predict(pi.best.sarima[[3]],n.ahead=3)$pred)
pi.sarima.pred
## Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec
## 2018 -0.1475132 -0.2388366
## 2019 0.2646618
To entertain our curiosity, see below the actual values of the inflation.
pi.post
## Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec
## 2018 -0.3354970 -0.3199074
## 2019 0.1904781
mean(abs((pi.post[1:3]) - pi.sarima.pred)/((pi.post[1:3])))*100
## [1] -14.14246
On average, it seems that actual values are 14.1% below the predicted values: not a great fit.
sarima.resid<-pi.best.sarima[[3]]$residuals
plot(sarima.resid)
acf(sarima.resid)
acf(sarima.resid^2)
When looking at the plot, we see that the volatility changes over time, which is a first hint that there might be some heteroskedasticity. We then look at the ACF plot: there is no autocorrelation. However, with the square, we see that that there is autocorrelation, indicating that there is a conditional or unconditional heteroskedasticity. The variance of the residuals varies over time. The Garch model could work well.
library(fGarch)
## Loading required package: timeDate
## Loading required package: timeSeries
## Loading required package: fBasics
resid.garch<-garchFit(~garch(1,1), data=sarima.resid,include.mean = F, trace = F)
summary(resid.garch)
##
## Title:
## GARCH Modelling
##
## Call:
## garchFit(formula = ~garch(1, 1), data = sarima.resid, include.mean = F,
## trace = F)
##
## Mean and Variance Equation:
## data ~ garch(1, 1)
## <environment: 0x7ff49bbf1808>
## [data = sarima.resid]
##
## Conditional Distribution:
## norm
##
## Coefficient(s):
## omega alpha1 beta1
## 0.00065718 0.33466089 0.72819666
##
## Std. Errors:
## based on Hessian
##
## Error Analysis:
## Estimate Std. Error t value Pr(>|t|)
## omega 0.0006572 0.0002065 3.183 0.00146 **
## alpha1 0.3346609 0.0737837 4.536 5.74e-06 ***
## beta1 0.7281967 0.0433202 16.810 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Log Likelihood:
## 67.64266 normalized: 0.195499
##
## Description:
## Wed Jul 22 20:25:28 2020 by user:
##
##
## Standardised Residuals Tests:
## Statistic p-Value
## Jarque-Bera Test R Chi^2 104.6071 0
## Shapiro-Wilk Test R W 0.9648451 2.11459e-07
## Ljung-Box Test R Q(10) 3.589067 0.9639873
## Ljung-Box Test R Q(15) 17.51112 0.2892383
## Ljung-Box Test R Q(20) 18.74425 0.5385016
## Ljung-Box Test R^2 Q(10) 5.769825 0.8342165
## Ljung-Box Test R^2 Q(15) 6.809474 0.9627225
## Ljung-Box Test R^2 Q(20) 10.02728 0.9676745
## LM Arch Test R TR^2 6.544629 0.8861832
##
## Information Criterion Statistics:
## AIC BIC SIC HQIC
## -0.3736570 -0.3403064 -0.3738056 -0.3603767
coef(resid.garch)
## omega alpha1 beta1
## 0.0006571834 0.3346608900 0.7281966646