\(~\)
data <- read.table("./Week 6 - Test.txt", header = TRUE)
data$YYYY.MM <- ym(data$YYYY.MM)
data_sub <- data[which(data$YYYY.MM=="2000-01-01"):which(data$YYYY.MM=="2010-12-01"),]
lab <- c("Consumer price index","", "Logarithm of consumer price index", "","Monthly inflation rate")
for (i in c(3,5,7)) {
plot <- ggplot(data = data, aes(x = data[,1], y = data[,i]))+
geom_line(aes(color = "red"))+
geom_line(y = data[,i+1], aes(color = "blue"))+
labs(x = "Date", y = lab[i-2])+
ggtitle(paste0("Time series - ", lab[i-2]))+
scale_color_identity(name = "Legend", breaks = c("red", "blue"), labels =
c(names(data)[i], names(data)[i+1]), guide = "legend")
print(plot)}
## Warning: Removed 1 row(s) containing missing values (geom_path).
## Warning: Removed 1 row(s) containing missing values (geom_path).
\(~\)
This data suggests a correlation between US and EU values for all three observed variables. There’s an evident increase of “consumer price index” values over time, which suggests a non-linear trend. Meanwhile, monthly inflation rates with logarithmic transformations are rather stationary.
\(~\)
\(~\)
adf.test(data_sub$LOGPEUR, k = 3)
##
## Augmented Dickey-Fuller Test
##
## data: data_sub$LOGPEUR
## Dickey-Fuller = -2.4547, Lag order = 3, p-value = 0.3874
## alternative hypothesis: stationary
adf.test(data_sub$LOGPUSA, k = 3)
##
## Augmented Dickey-Fuller Test
##
## data: data_sub$LOGPUSA
## Dickey-Fuller = -2.4031, Lag order = 3, p-value = 0.4089
## alternative hypothesis: stationary
\(~\)
With p-values greater than 0.05 and ADF statistics greater than −3.5, we fail to reject the null hypothesis and the time series is non-stationary.
\(~\)
\(~\)
acf <- acf(data_sub$DPEUR, na.action = na.pass, plot = FALSE)
pacf <- pacf(data_sub$DPEUR, na.action = na.pass, plot = FALSE)
result <- data.frame("ACF" = acf$acf[-1], "PACF" = pacf$acf, row.names= c(1:length(pacf$acf)))
result[order(result$ACF, decreasing = TRUE),,drop=FALSE]
## ACF PACF
## 12 0.55447799 0.39835961
## 6 0.40291546 0.37371612
## 18 0.30892987 -0.07521133
## 1 0.08325085 0.08325085
## 11 0.01457959 0.04182718
## 13 0.01089604 -0.13963865
## 7 -0.03504572 -0.19514792
## 17 -0.05330612 0.04054127
## 19 -0.05432761 -0.14970672
## 5 -0.08844252 -0.11953564
## 2 -0.10915884 -0.11689974
## 10 -0.11141270 -0.07577645
## 21 -0.14886144 -0.14195695
## 4 -0.15896006 -0.14828024
## 20 -0.15915481 -0.01420708
## 9 -0.16205955 -0.06804760
## 14 -0.16214537 -0.07462563
## 8 -0.17325690 -0.16633956
## 15 -0.18319360 -0.03740304
## 3 -0.19908838 -0.18295739
## 16 -0.23129416 -0.20783622
\(~\)
Values 12 and 6 offer the highest similarity between the time series and the lagged version of itself, which justifies the use of the presented model. Its estimated parameters are as follows:
\(~\)
data_dpeur <- ts(data = data_sub$DPEUR, start = 2)
reg <- dynlm(formula = data_dpeur ~ L(data_dpeur, 6) + L(data_dpeur, 12))
summary(reg)
##
## Time series regression with "ts" data:
## Start = 15, End = 133
##
## Call:
## dynlm(formula = data_dpeur ~ L(data_dpeur, 6) + L(data_dpeur,
## 12))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.0103343 -0.0017369 -0.0000475 0.0015322 0.0080903
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.0003838 0.0002811 1.365 0.1749
## L(data_dpeur, 6) 0.1887459 0.0772888 2.442 0.0161 *
## L(data_dpeur, 12) 0.5979841 0.0835544 7.157 8.05e-11 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.002569 on 116 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.4232, Adjusted R-squared: 0.4132
## F-statistic: 42.55 on 2 and 116 DF, p-value: 1.381e-14
\(~\)
\(~\)
data_dpusa <- ts(data = data_sub$DPUSA, start = 2)
reg2 <- dynlm(formula = data_dpeur ~ L(data_dpeur, 6) + L(data_dpeur, 12) + L(data_dpusa, 1) + L(data_dpusa, 6) + L(data_dpusa, 12))
summary(reg2)
##
## Time series regression with "ts" data:
## Start = 15, End = 133
##
## Call:
## dynlm(formula = data_dpeur ~ L(data_dpeur, 6) + L(data_dpeur,
## 12) + L(data_dpusa, 1) + L(data_dpusa, 6) + L(data_dpusa,
## 12))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.0065866 -0.0016535 -0.0000118 0.0012630 0.0082682
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.0004407 0.0002853 1.545 0.125
## L(data_dpeur, 6) 0.2029891 0.0785520 2.584 0.011 *
## L(data_dpeur, 12) 0.6367464 0.0874766 7.279 4.78e-11 ***
## L(data_dpusa, 1) 0.2264287 0.0511286 4.429 2.20e-05 ***
## L(data_dpusa, 6) -0.0560565 0.0547645 -1.024 0.308
## L(data_dpusa, 12) -0.2300418 0.0541695 -4.247 4.47e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.002272 on 113 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.5602, Adjusted R-squared: 0.5408
## F-statistic: 28.79 on 5 and 113 DF, p-value: < 2.2e-16
\(~\)
We can prove that the coefficient at lag 6 is not significant by observing that its p-value is greater than 0.05 and its absolute t-statistic smaller than 2. The corrected model is as follows:
\(~\)
reg3 <- dynlm(formula = data_dpeur ~ L(data_dpeur, 6) + L(data_dpeur, 12) + L(data_dpusa, 1) + L(data_dpusa, 12))
summary(reg3)
##
## Time series regression with "ts" data:
## Start = 15, End = 133
##
## Call:
## dynlm(formula = data_dpeur ~ L(data_dpeur, 6) + L(data_dpeur,
## 12) + L(data_dpusa, 1) + L(data_dpusa, 12))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.0067809 -0.0016356 0.0000532 0.0013660 0.0082448
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.0003391 0.0002676 1.267 0.2076
## L(data_dpeur, 6) 0.1687310 0.0710801 2.374 0.0193 *
## L(data_dpeur, 12) 0.6551529 0.0856263 7.651 6.93e-12 ***
## L(data_dpusa, 1) 0.2326460 0.0507772 4.582 1.19e-05 ***
## L(data_dpusa, 12) -0.2264880 0.0540694 -4.189 5.55e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.002273 on 114 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.5561, Adjusted R-squared: 0.5406
## F-statistic: 35.71 on 4 and 114 DF, p-value: < 2.2e-16
\(~\)
\(~\)
new <- data.frame("Date" = seq(ymd('2011-01-01'),ymd('2011-12-01'),by='months'))
fore_c <- predict.lm(reg, newdata = new)
## Warning: 'newdata' had 12 rows but variables found have 132 rows
fore_d <- predict.lm(reg3, newdata = new)
## Warning: 'newdata' had 12 rows but variables found have 132 rows
fore <- data.frame("AR Model" = fore_c[121:132], "ADL Model" = fore_d[121:132], "Actual Values" = data$DPEUR[133:144])
print(fore)
## AR.Model ADL.Model Actual.Values
## 121 -0.0063270419 -0.0066707975 -0.006826
## 122 0.0028304956 0.0029058058 0.003798
## 123 0.0088891035 0.0092727545 0.013554
## 124 0.0040011499 0.0041361543 0.005966
## 125 0.0009848270 0.0009729432 0.000000
## 126 0.0003837652 0.0003346406 0.000000
## 127 -0.0026270506 -0.0028139357 -0.005966
## 128 0.0015898223 0.0016109377 0.001495
## 129 0.0027895856 0.0028629269 0.007441
## 130 0.0027825051 0.0028555120 0.003700
## 131 0.0009824668 0.0009704593 0.000738
## 132 0.0051552828 0.0053447490 0.003683
ggplot(data = fore, aes(x = new$Date, y = AR.Model))+
geom_line(aes(color = "red"))+
geom_line(y = fore$ADL.Model, aes(color = "blue"))+
geom_line(y = fore$Actual.Values, aes(color = "green"))+
labs(x = "Date", y = "Values")+
ggtitle("Forecast")+
scale_color_identity(name = "Legend", breaks = c("red", "blue", "green"), labels =
c("AR Model", "ADL Model", "Actual Values"), guide = "legend")
rmse1 <- rmse(fore$AR.Model, fore$Actual.Values)
rmse2 <- rmse(fore$ADL.Model, fore$Actual.Values)
mae1 <- mae(fore$AR.Model, fore$Actual.Values)
mae2 <- mae(fore$ADL.Model, fore$Actual.Values)
sum1 <- sum(fore$Actual.Values-fore$AR.Model)
sum2 <- sum(fore$Actual.Values-fore$ADL.Model)
data.frame("RMSE" = rbind(rmse1, rmse2), "MAE" = rbind(mae1, mae2), "SUM" = rbind(sum1,sum2), row.names = c("AR Model", "ADL Model"))
## RMSE MAE SUM
## AR Model 0.002305546 0.001682019 0.006148089
## ADL Model 0.002198340 0.001587570 0.005800850
\(~\)
With lower error scores, we can conclude that the ADL model performs better than the AR model.
\(~\) \(~\) \(~\)