Yumeng Zou
September 15, 2018
library(ggplot2)
library(forecast)
cpi <- read.csv("CPI Sept 2018.csv", header=T)
head(cpi)
DATE CPIAUCSL
1 1947-01-01 21.48
2 1947-02-01 21.62
3 1947-03-01 22.00
4 1947-04-01 22.00
5 1947-05-01 21.95
6 1947-06-01 22.08
tail(cpi)
DATE CPIAUCSL
855 2018-03-01 249.462
856 2018-04-01 250.013
857 2018-05-01 250.535
858 2018-06-01 250.857
859 2018-07-01 251.286
860 2018-08-01 251.846
Monthly data from Jan 1947 to Aug 2018
cpi_ts <- ts(cpi$CPIAUCSL, start = c(1947,1), end=c(2018,8), frequency=12)
head(cpi_ts)
Jan Feb Mar Apr May Jun
1947 21.48 21.62 22.00 22.00 21.95 22.08
autoplot(cpi_ts)+
ggtitle("CPI for all urban consumers")+
xlab("Year")+
ylab("Index 1982-1984=100")
cpi$DATE <- as.Date(cpi$DATE, format="%Y-%m-%d")
cpi$YEAR <- as.numeric(format(cpi$DATE, "%Y"))
cpi$MONTH <- as.numeric(format(cpi$DATE, "%m"))
head(cpi)
DATE CPIAUCSL YEAR MONTH
1 1947-01-01 21.48 1947 1
2 1947-02-01 21.62 1947 2
3 1947-03-01 22.00 1947 3
4 1947-04-01 22.00 1947 4
5 1947-05-01 21.95 1947 5
6 1947-06-01 22.08 1947 6
ls01 <- lm(CPIAUCSL~YEAR+MONTH, cpi)
summary(ls01)
Call:
lm(formula = CPIAUCSL ~ YEAR + MONTH, data = cpi)
Residuals:
Min 1Q Median 3Q Max
-30.085 -11.002 -0.132 12.921 41.426
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -6.962e+03 5.749e+01 -121.110 <2e-16 ***
YEAR 3.565e+00 2.899e-02 123.003 <2e-16 ***
MONTH 2.453e-01 1.739e-01 1.411 0.159
---
Signif. codes: 0 *** 0.001 ** 0.01 * 0.05 . 0.1 1
Residual standard error: 17.59 on 857 degrees of freedom
Multiple R-squared: 0.9464, Adjusted R-squared: 0.9463
F-statistic: 7565 on 2 and 857 DF, p-value: < 2.2e-16
cpi_latest <- cpi[cpi$DATE > "1982-01-01",]
ls02 <- lm(CPIAUCSL~YEAR+MONTH, cpi_latest)
summary(ls02)
Call:
lm(formula = CPIAUCSL ~ YEAR + MONTH, data = cpi_latest)
Residuals:
Min 1Q Median 3Q Max
-4.8241 -2.2068 0.3494 1.7608 9.9976
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -8.630e+03 2.171e+01 -397.55 <2e-16 ***
YEAR 4.400e+00 1.085e-02 405.53 <2e-16 ***
MONTH 3.731e-01 3.333e-02 11.19 <2e-16 ***
---
Signif. codes: 0 *** 0.001 ** 0.01 * 0.05 . 0.1 1
Residual standard error: 2.401 on 436 degrees of freedom
Multiple R-squared: 0.9974, Adjusted R-squared: 0.9973
F-statistic: 8.223e+04 on 2 and 436 DF, p-value: < 2.2e-16
cpi_latest$PRED <- predict(ls02)
ggplot(cpi_latest, aes(x=DATE))+
geom_line(aes(y=CPIAUCSL), color="orange")+
geom_line(aes(y=PRED), color="black")
plot(ls02)
ggplot(cpi_latest, aes(x=DATE))+
geom_point(aes(y=residuals(ls02)))+
geom_line(aes(y=0), color='red')
Residuals are cyclic, but the length of one cycle changes. Recently the size of a cycle is approximately 5 years.
window(cpi_ts,1982) %>% decompose(type="multiplicative") %>%
autoplot()
ets01 <- ets(window(cpi_ts,1982))
ets02 <- ets(window(cpi_ts, 2000))
summary(ets01)
ETS(M,Ad,N)
Call:
ets(y = window(cpi_ts, 1982))
Smoothing parameters:
alpha = 0.9999
beta = 0.7196
phi = 0.8
Initial states:
l = 94.2953
b = -0.0197
sigma: 0.0025
AIC AICc BIC
1919.793 1919.987 1944.313
Training set error measures:
ME RMSE MAE MPE MAPE MASE
Training set 0.09351585 0.4694888 0.3115312 0.05861811 0.1760985 0.07124087
ACF1
Training set 0.01956088
summary(ets02)
ETS(A,Ad,N)
Call:
ets(y = window(cpi_ts, 2000))
Smoothing parameters:
alpha = 0.9999
beta = 0.6924
phi = 0.8
Initial states:
l = 168.9403
b = 0.3644
sigma: 0.6189
AIC AICc BIC
1004.168 1004.555 1024.638
Training set error measures:
ME RMSE MAE MPE MAPE MASE ACF1
Training set 0.09874172 0.6119142 0.437537 0.0475882 0.208288 0.09625048 0.05403826
autoplot(ets02)
ets01 %>% forecast(h=8) %>%
autoplot()
arima01 <- auto.arima(window(cpi_ts,1982))
arima02 <- auto.arima(window(cpi_ts, 2000))
summary(arima01)
Series: window(cpi_ts, 1982)
ARIMA(0,1,3)(0,0,1)[12] with drift
Coefficients:
ma1 ma2 ma3 sma1 drift
0.5095 0.0561 -0.0748 -0.1542 0.3585
s.e. 0.0476 0.0531 0.0493 0.0506 0.0253
sigma^2 estimated as 0.1779: log likelihood=-241.64
AIC=495.28 AICc=495.48 BIC=519.79
Training set error measures:
ME RMSE MAE MPE MAPE MASE
Training set -4.055605e-05 0.4188511 0.2716888 -0.0009501852 0.1541964 0.06212972
ACF1
Training set 0.0004557807
summary(arima02)
Series: window(cpi_ts, 2000)
ARIMA(0,1,3)(0,0,1)[12] with drift
Coefficients:
ma1 ma2 ma3 sma1 drift
0.5320 0.0637 -0.1216 -0.1670 0.3675
s.e. 0.0664 0.0758 0.0693 0.0717 0.0453
sigma^2 estimated as 0.3038: log likelihood=-181.34
AIC=374.68 AICc=375.07 BIC=395.12
Training set error measures:
ME RMSE MAE MPE MAPE MASE
Training set 0.001673625 0.5437227 0.372797 0.002033106 0.1763454 0.08200881
ACF1
Training set 0.003039392
ggplot(cpi[cpi$DATE >= "2000-01-01",], aes(x=DATE))+
geom_line(aes(y=CPIAUCSL), color="orange")+
geom_line(aes(y=fitted(arima02)), color="black")
arima02 %>% forecast(h=24) %>%
autoplot()
forecast(arima02, h=8)
Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
Sep 2018 252.2076 251.5013 252.9139 251.1274 253.2879
Oct 2018 252.5952 251.3030 253.8875 250.6189 254.5715
Nov 2018 252.8605 251.1458 254.5752 250.2380 255.4829
Dec 2018 253.2003 251.1942 255.2064 250.1323 256.2684
Jan 2019 253.3841 251.1239 255.6443 249.9274 256.8408
Feb 2019 253.7625 251.2739 256.2510 249.9566 257.5683
Mar 2019 254.2377 251.5401 256.9353 250.1121 258.3633
Apr 2019 254.5725 251.6809 257.4640 250.1502 258.9947
tail(cpi_ts)
Mar Apr May Jun Jul Aug
2018 249.462 250.013 250.535 250.857 251.286 251.846
inflation <- diff(cpi_ts)/cpi_ts[-length(cpi_ts)]
autoplot(inflation)
ggPacf(cpi_ts, main="Partial Autocorrelation")
wage <- read.csv("Wage Sept 2018.csv", header=T)
head(wage)
DATE CES0500000003
1 2006-03-01 20.04
2 2006-04-01 20.15
3 2006-05-01 20.13
4 2006-06-01 20.23
5 2006-07-01 20.30
6 2006-08-01 20.32
tail(wage)
DATE CES0500000003
145 2018-03-01 26.80
146 2018-04-01 26.86
147 2018-05-01 26.94
148 2018-06-01 26.99
149 2018-07-01 27.06
150 2018-08-01 27.16
names(wage)
[1] "DATE" "CES0500000003"
names(wage) <- c("DATE", "WAGE")
wage$DATE <- as.Date(wage$DATE, format="%Y-%m-%d")
wage_ts <- ts(wage$WAGE, start=c(2006,3), end=c(2018,8), frequency=12)
cpi2006 <- cpi[cpi$DATE >= "2006-04-01",]
cpi2006$WAGElag1 <- wage$WAGE[-length(wage$WAGE)]
ls03 <- lm(CPIAUCSL~WAGElag1, cpi2006)
summary(ls03)
Call:
lm(formula = CPIAUCSL ~ WAGElag1, data = cpi2006)
Residuals:
Min 1Q Median 3Q Max
-4.1533 -1.9993 -0.7458 2.3845 6.0125
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 59.2234 2.4660 24.02 <2e-16 ***
WAGElag1 7.1459 0.1046 68.31 <2e-16 ***
---
Signif. codes: 0 *** 0.001 ** 0.01 * 0.05 . 0.1 1
Residual standard error: 2.421 on 147 degrees of freedom
Multiple R-squared: 0.9695, Adjusted R-squared: 0.9692
F-statistic: 4666 on 1 and 147 DF, p-value: < 2.2e-16
cpi_ts2 <- window(cpi_ts, start = c(2006,3))
tslm1 <- tslm(cpi_ts2 ~ wage_ts)
summary(tslm1)
Call:
tslm(formula = cpi_ts2 ~ wage_ts)
Residuals:
Min 1Q Median 3Q Max
-4.2392 -2.0496 -0.8994 2.4336 5.8106
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 58.7150 2.4525 23.94 <2e-16 ***
wage_ts 7.1523 0.1039 68.82 <2e-16 ***
---
Signif. codes: 0 *** 0.001 ** 0.01 * 0.05 . 0.1 1
Residual standard error: 2.435 on 148 degrees of freedom
Multiple R-squared: 0.9697, Adjusted R-squared: 0.9695
F-statistic: 4736 on 1 and 148 DF, p-value: < 2.2e-16
ppi <- read.csv("PPI Sept 2018.csv", header=T)
ppi$DATE <- as.Date(ppi$DATE, format="%Y-%m-%d")
ppi2006 <- ppi[ppi$DATE >= "2006-03-01",]
head(ppi2006)
DATE PPIACO
1119 2006-03-01 162.2
1120 2006-04-01 164.3
1121 2006-05-01 165.8
1122 2006-06-01 166.1
1123 2006-07-01 166.8
1124 2006-08-01 167.9
cpi2006$PPIlag1 <- ppi2006$PPIACO[-length(ppi2006$PPIACO)]
ls05 <- lm(CPIAUCSL~WAGElag1+PPIlag1, cpi2006)
summary(ls05)
Call:
lm(formula = CPIAUCSL ~ WAGElag1 + PPIlag1, data = cpi2006)
Residuals:
Min 1Q Median 3Q Max
-2.8003 -0.6217 0.1715 0.6734 2.2022
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 39.709237 1.327329 29.92 <2e-16 ***
WAGElag1 6.327598 0.056086 112.82 <2e-16 ***
PPIlag1 0.204163 0.008148 25.06 <2e-16 ***
---
Signif. codes: 0 *** 0.001 ** 0.01 * 0.05 . 0.1 1
Residual standard error: 1.055 on 146 degrees of freedom
Multiple R-squared: 0.9942, Adjusted R-squared: 0.9942
F-statistic: 1.259e+04 on 2 and 146 DF, p-value: < 2.2e-16
ggplot(cpi2006, aes(x=DATE))+
geom_line(aes(y=CPIAUCSL), color="orange")+
geom_line(aes(y=predict(ls05)), color="black")
plot(ls05)
len <- length(wage$DATE)
predict(ls05, data.frame(WAGElag1=wage$WAGE[len], PPIlag1=ppi2006$PPIACO[len]))
1
253.0119
AIC(ls05)
[1] 443.856
BIC(ls05)
[1] 455.8717