remove(list = ls())
pacman::p_load(ggplot2, dplyr, forecast, forecastHybrid, ggpubr, pdfetch)
options(digits = 3, scipen = 99999)
######################
#Download the Series by FRED code
#Use pdfetch to draw, assemble and prepare data
permits_CT = pdfetch_FRED(c("CTBPPRIVSA")) # Building Permits in CT
AWH_NH = pdfetch_FRED(c("SMU09757000500000002SA")) # Average Weekly Hours in New Haven
AWE_NH = pdfetch_FRED(c("SMU09757000500000011SA")) # Average Weekly Earnings New
EdsMeds_NH = pdfetch_FRED(c("NEWH709EDUH")) # Eds & Meds Employment New Haven
UR_NH = pdfetch_FRED(c("NEWH709UR")) # Unemployment Rate in New Haven
#adjust unemployment so increases are positive
UR_NH = max(UR_NH) - UR_NH
#take a look
plot(UR_NH)

plot(EdsMeds_NH)

plot(AWH_NH)

plot(AWE_NH)

plot(permits_CT)

#Shorten them cause UR lags 1 month extra
tail(AWH_NH); head(UR_NH)
## SMU09757000500000002SA
## 2021-10-31 34.0
## 2021-11-30 34.0
## 2021-12-31 33.9
## 2022-01-31 34.1
## 2022-02-28 34.0
## 2022-03-31 34.0
## NEWH709UR
## 1990-01-31 6.1
## 1990-02-28 6.1
## 1990-03-31 6.1
## 1990-04-30 6.0
## 1990-05-31 5.9
## 1990-06-30 5.8
permits_CT <- permits_CT["2010-01-31/2022-02-28"] #reduce the length of all series
AWH_NH <- AWH_NH["2010-01-31/2022-02-28"] #reduce the length of all series
AWE_NH <- AWE_NH["2010-01-31/2022-02-28"] #reduce the length of all series
EdsMeds_NH <- EdsMeds_NH["2010-01-31/2022-02-28"] #reduce the length of all series
UR_NH <- UR_NH["2010-01-31/2022-02-28"] #reduce the length of all series
#Assemble into a data frame
ct_data = data.frame(permits_CT, AWH_NH, AWE_NH, EdsMeds_NH, UR_NH)
ct_data_m <- ts(ct_data,start=c(2010,1),frequency=12)
plot(ct_data_m)

lo = nrow(ct_data)
######################
###Method: Indexing
#Rebase each series to 2010:1 = 100
index_rebased <- dplyr::mutate(ct_data,
permits_CT=100*permits_CT/as.numeric(permits_CT[1]),
AWH_NH=100*AWH_NH/as.numeric(AWH_NH[1]),
AWE_NH=100*AWE_NH/as.numeric(AWE_NH[1]),
EdsMeds_NH=100*EdsMeds_NH/as.numeric(EdsMeds_NH[1]),
UR_NH=100*UR_NH/as.numeric(UR_NH[1]))
head(index_rebased)
## CTBPPRIVSA SMU09757000500000002SA SMU09757000500000011SA NEWH709EDUH
## 2010-01-31 281 33.4 837 73.5
## 2010-02-28 317 32.1 817 73.4
## 2010-03-31 292 32.5 841 73.5
## 2010-04-30 335 32.7 848 74.1
## 2010-05-31 254 32.7 861 74.2
## 2010-06-30 181 32.8 856 74.1
## NEWH709UR CTBPPRIVSA SMU09757000500000002SA SMU09757000500000011SA
## 2010-01-31 1.0 100.0 100.0 100.0
## 2010-02-28 0.8 112.7 96.1 97.7
## 2010-03-31 0.7 104.0 97.5 100.5
## 2010-04-30 0.7 119.2 98.1 101.3
## 2010-05-31 0.7 90.6 98.0 102.9
## 2010-06-30 0.7 64.5 98.3 102.3
## NEWH709EDUH NEWH709UR
## 2010-01-31 100.0 100
## 2010-02-28 99.9 80
## 2010-03-31 100.0 70
## 2010-04-30 100.9 70
## 2010-05-31 100.9 70
## 2010-06-30 100.9 70
tail(index_rebased)
## CTBPPRIVSA SMU09757000500000002SA SMU09757000500000011SA NEWH709EDUH
## 2021-09-30 365 33.8 1135 83.5
## 2021-10-31 308 34.0 1133 83.8
## 2021-11-30 393 34.0 1123 84.0
## 2021-12-31 443 33.9 1123 83.5
## 2022-01-31 680 34.1 1117 83.4
## 2022-02-28 434 34.0 1115 83.6
## NEWH709UR CTBPPRIVSA SMU09757000500000002SA SMU09757000500000011SA
## 2021-09-30 5.1 130 101 136
## 2021-10-31 5.4 110 102 135
## 2021-11-30 5.4 140 102 134
## 2021-12-31 5.4 157 102 134
## 2022-01-31 5.7 242 102 134
## 2022-02-28 5.9 154 102 133
## NEWH709EDUH NEWH709UR
## 2021-09-30 114 510
## 2021-10-31 114 540
## 2021-11-30 114 540
## 2021-12-31 114 540
## 2022-01-31 114 570
## 2022-02-28 114 590
names(index_rebased)
## [1] "CTBPPRIVSA" "SMU09757000500000002SA" "SMU09757000500000011SA"
## [4] "NEWH709EDUH" "NEWH709UR" "permits_CT"
## [7] "AWH_NH" "AWE_NH" "EdsMeds_NH"
## [10] "UR_NH"
#sum across columns
index = rowSums(index_rebased[6:10], dims = 1)
tail(index)
## 2021-09-30 2021-10-31 2021-11-30 2021-12-31 2022-01-31 2022-02-28
## 990 1001 1030 1047 1161 1093
plot.ts(index)

#rebase the index to 100
index=100*index/index[1]
tail(index)
## 2021-09-30 2021-10-31 2021-11-30 2021-12-31 2022-01-31 2022-02-28
## 198 200 206 209 232 219
plot.ts(index)

#convert index to a time series
Index <- ts(index, start=c(2010, 1), frequency=12) # UPDATED- October 2021!!!!!
autoplot(Index) +
theme_bw(base_size = 8) +
ylab("NHREPI = January 2010 = 100") +
labs(title="New Haven Region", subtitle = "Economic Performance") +
geom_hline(yintercept= 100) +
geom_forecast(h=6, fan = "True", level = c(90)) +
geom_line(size = 0.8, col = "darkred")
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.

#Obtain a 6-month centered moving average
NH_MA = tsutils::cmav(Index) # this function interpolates the
# 6 NAs created
##############################################################################
#HOLTS WINTER FORECAST
hw(Index, h=6)
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## Mar 2022 223 201 244 190 255
## Apr 2022 220 193 247 179 262
## May 2022 214 182 246 166 263
## Jun 2022 221 185 257 166 276
## Jul 2022 215 176 255 155 276
## Aug 2022 222 179 265 156 287
autoplot(hw(Index, h=6),
main="New Haven Region Economic performance",
ylab = "NHEPI=100=January 2010", xlab = "Time") +
autolayer(NH_MA) +
theme(legend.position = "none")

accuracy(hw(Index))
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set -0.485 15.7 10.7 -0.784 6.59 0.373 0.00163
###############################################################################
##################################################################
### AUTO.ARIMA FORECAST
aa.model=auto.arima(Index)
autoplot(forecast(aa.model, h=6)) +
autolayer(NH_MA) +
theme(legend.position = "none")

forecast(aa.model,h=6)
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## Mar 2022 221 200 241 189 252
## Apr 2022 221 194 247 180 261
## May 2022 221 190 251 173 268
## Jun 2022 221 186 255 167 274
## Jul 2022 221 182 259 162 279
## Aug 2022 221 179 262 157 284
accuracy(forecast(aa.model,h=6)) #
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 1.07 16.1 10.5 0.119 6.31 0.364 0.00502
##################################################################
autoplot(Index) +
autolayer(forecast(aa.model, h=6))+
autolayer(NH_MA) +
ylab("NHREPI = January 2010 = 100") +
labs(title="New Haven Region", subtitle = "Economic Performance") +
geom_hline(yintercept= 100) +
geom_line(size = 0.8, col = "blue")+
theme_bw(base_size = 12) +
theme(legend.position = "none")

##################################################################
#Ensemble Forecast
fit1 <- hybridModel(Index, weights="equal")
## Fitting the auto.arima model
## Fitting the ets model
## Fitting the thetam model
## Fitting the nnetar model
## Fitting the stlm model
## Fitting the tbats model
fc1 <- forecast(fit1, h=12) # forecast here is 12 months
autoplot(fc1) + autolayer(NH_MA)+
ggtitle("NH Region Economic Performance") +
xlab("Year") +
ylab("New Haven Economic Performance Index") +
geom_hline(yintercept= 100) +
geom_line(size = 0.8, col = "blue")+
theme_bw(base_size = 12) +
theme(legend.position = "none")

accuracy(fc1) #
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 1.09 15.5 10.4 0.13 6.1 0.361 -0.00897
#Different Version of Hybrid Graph
autoplot(fc1) +
autolayer(Index)+
autolayer(NH_MA)+
ylab("NHREPI = January 2010 = 100") +
xlab("Year")

labs(title="New Haven Region", subtitle = "Economic Performance") +
geom_hline(yintercept= 100) +
geom_line(size = 0.8, col = "darkred")+
theme_bw(base_size = 12) +
theme(legend.position = "none")
## NULL
#Shorter Window - to highlight forecast
autoplot(fc1) +
autolayer(Index)+
autolayer(NH_MA)+
ylab("NHREPI = January 2010 = 100") +
xlab("Year")+
labs(title="New Haven Region",
subtitle = "Economic Performance and 12-M Forecast",
caption = "Created by: J.Watters and J. Hickey, New Haven BANL") +
geom_hline(yintercept= 100) +
geom_line(size = 1.2, col = "darkred")+
#theme_bw(base_size = 12) +
xlim(x = c(2019,2023)) +
theme(legend.position = "none")
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
## Warning: Removed 108 rows containing missing values (`geom_line()`).
## Warning: Removed 108 rows containing missing values (`geom_line()`).
## Removed 108 rows containing missing values (`geom_line()`).

#and here is a dygraph for an html version - to upload to Rpubs
library(dygraphs) # you need this package
NH_Dat = cbind(NH_MA, Index, fc1$mean) #
dygraph(NH_Dat,
main = "New Haven Economic Performance Index",
ylab = "NHREPI = January 2010 = 100" ) %>%
dySeries("fc1$mean", label = "12-MO Forecast, (%)") %>%
dySeries("NH_MA", label = "6-Mo Moving Average") %>%
dyShading(from = "2020-03-1", to = "2021-04-01",
color = "#FFE6E6") %>%
dyRangeSelector() %>%
dyLimit(min(Index[1], na.rm = TRUE), "2010:1 = 100 ",
strokePattern = "solid", color = "darkred")
##################################################################
#===============================================================================#