remove(list = ls())
pacman::p_load(ggplot2, dplyr, forecast, forecastHybrid, ggpubr, pdfetch)
options(digits = 3, scipen = 99999)

######################
#Download the Series by FRED code

        #Use pdfetch to draw, assemble and prepare data
        permits_CT = pdfetch_FRED(c("CTBPPRIVSA"))  # Building Permits in CT
        AWH_NH = pdfetch_FRED(c("SMU09757000500000002SA"))  # Average Weekly Hours in New Haven
        AWE_NH = pdfetch_FRED(c("SMU09757000500000011SA"))  # Average Weekly Earnings New 
        EdsMeds_NH = pdfetch_FRED(c("NEWH709EDUH"))  # Eds & Meds Employment New Haven
        UR_NH = pdfetch_FRED(c("NEWH709UR"))  # Unemployment Rate in New Haven
        
        #adjust unemployment so increases are positive
        UR_NH = max(UR_NH) - UR_NH
              
              #take a look
        
              plot(UR_NH)          

              plot(EdsMeds_NH)

              plot(AWH_NH)

              plot(AWE_NH)

              plot(permits_CT)

#Shorten them cause UR lags 1 month extra
tail(AWH_NH); head(UR_NH)
##            SMU09757000500000002SA
## 2021-10-31                   34.0
## 2021-11-30                   34.0
## 2021-12-31                   33.9
## 2022-01-31                   34.1
## 2022-02-28                   34.0
## 2022-03-31                   34.0
##            NEWH709UR
## 1990-01-31       6.1
## 1990-02-28       6.1
## 1990-03-31       6.1
## 1990-04-30       6.0
## 1990-05-31       5.9
## 1990-06-30       5.8
permits_CT <- permits_CT["2010-01-31/2022-02-28"] #reduce the length of all series
AWH_NH <- AWH_NH["2010-01-31/2022-02-28"] #reduce the length of all series
AWE_NH <- AWE_NH["2010-01-31/2022-02-28"] #reduce the length of all series
EdsMeds_NH <- EdsMeds_NH["2010-01-31/2022-02-28"] #reduce the length of all series
UR_NH <- UR_NH["2010-01-31/2022-02-28"] #reduce the length of all series

            #Assemble into a data frame
            ct_data = data.frame(permits_CT, AWH_NH, AWE_NH, EdsMeds_NH, UR_NH)
            ct_data_m <- ts(ct_data,start=c(2010,1),frequency=12)
            plot(ct_data_m)

            lo = nrow(ct_data)


######################
###Method: Indexing
#Rebase each series to 2010:1 = 100

index_rebased <- dplyr::mutate(ct_data, 
                               permits_CT=100*permits_CT/as.numeric(permits_CT[1]),
                               AWH_NH=100*AWH_NH/as.numeric(AWH_NH[1]),
                               AWE_NH=100*AWE_NH/as.numeric(AWE_NH[1]),
                               EdsMeds_NH=100*EdsMeds_NH/as.numeric(EdsMeds_NH[1]), 
                               UR_NH=100*UR_NH/as.numeric(UR_NH[1]))

        head(index_rebased)
##            CTBPPRIVSA SMU09757000500000002SA SMU09757000500000011SA NEWH709EDUH
## 2010-01-31        281                   33.4                    837        73.5
## 2010-02-28        317                   32.1                    817        73.4
## 2010-03-31        292                   32.5                    841        73.5
## 2010-04-30        335                   32.7                    848        74.1
## 2010-05-31        254                   32.7                    861        74.2
## 2010-06-30        181                   32.8                    856        74.1
##            NEWH709UR CTBPPRIVSA SMU09757000500000002SA SMU09757000500000011SA
## 2010-01-31       1.0      100.0                  100.0                  100.0
## 2010-02-28       0.8      112.7                   96.1                   97.7
## 2010-03-31       0.7      104.0                   97.5                  100.5
## 2010-04-30       0.7      119.2                   98.1                  101.3
## 2010-05-31       0.7       90.6                   98.0                  102.9
## 2010-06-30       0.7       64.5                   98.3                  102.3
##            NEWH709EDUH NEWH709UR
## 2010-01-31       100.0       100
## 2010-02-28        99.9        80
## 2010-03-31       100.0        70
## 2010-04-30       100.9        70
## 2010-05-31       100.9        70
## 2010-06-30       100.9        70
        tail(index_rebased)  
##            CTBPPRIVSA SMU09757000500000002SA SMU09757000500000011SA NEWH709EDUH
## 2021-09-30        365                   33.8                   1135        83.5
## 2021-10-31        308                   34.0                   1133        83.8
## 2021-11-30        393                   34.0                   1123        84.0
## 2021-12-31        443                   33.9                   1123        83.5
## 2022-01-31        680                   34.1                   1117        83.4
## 2022-02-28        434                   34.0                   1115        83.6
##            NEWH709UR CTBPPRIVSA SMU09757000500000002SA SMU09757000500000011SA
## 2021-09-30       5.1        130                    101                    136
## 2021-10-31       5.4        110                    102                    135
## 2021-11-30       5.4        140                    102                    134
## 2021-12-31       5.4        157                    102                    134
## 2022-01-31       5.7        242                    102                    134
## 2022-02-28       5.9        154                    102                    133
##            NEWH709EDUH NEWH709UR
## 2021-09-30         114       510
## 2021-10-31         114       540
## 2021-11-30         114       540
## 2021-12-31         114       540
## 2022-01-31         114       570
## 2022-02-28         114       590
        names(index_rebased)
##  [1] "CTBPPRIVSA"             "SMU09757000500000002SA" "SMU09757000500000011SA"
##  [4] "NEWH709EDUH"            "NEWH709UR"              "permits_CT"            
##  [7] "AWH_NH"                 "AWE_NH"                 "EdsMeds_NH"            
## [10] "UR_NH"
            #sum across columns
            index = rowSums(index_rebased[6:10], dims = 1)
            tail(index)
## 2021-09-30 2021-10-31 2021-11-30 2021-12-31 2022-01-31 2022-02-28 
##        990       1001       1030       1047       1161       1093
            plot.ts(index)

            #rebase the index to 100
            index=100*index/index[1]
            tail(index)
## 2021-09-30 2021-10-31 2021-11-30 2021-12-31 2022-01-31 2022-02-28 
##        198        200        206        209        232        219
            plot.ts(index)

        #convert index to a time series
        Index <- ts(index, start=c(2010, 1), frequency=12) # UPDATED- October 2021!!!!!

                autoplot(Index) +
                theme_bw(base_size = 8) +     
                ylab("NHREPI = January 2010 = 100") +
                labs(title="New Haven Region", subtitle = "Economic Performance") +
                geom_hline(yintercept= 100) +
                geom_forecast(h=6, fan = "True", level = c(90)) +
                geom_line(size = 0.8, col = "darkred")
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.

                #Obtain a 6-month centered moving average
                NH_MA = tsutils::cmav(Index) # this function interpolates the 
                                             # 6 NAs created
                
                
##############################################################################
#HOLTS WINTER FORECAST 
hw(Index, h=6)
##          Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## Mar 2022            223   201   244   190   255
## Apr 2022            220   193   247   179   262
## May 2022            214   182   246   166   263
## Jun 2022            221   185   257   166   276
## Jul 2022            215   176   255   155   276
## Aug 2022            222   179   265   156   287
autoplot(hw(Index, h=6), 
         main="New Haven Region Economic performance",
         ylab = "NHEPI=100=January 2010", xlab = "Time") +
        autolayer(NH_MA) +
        theme(legend.position = "none")

accuracy(hw(Index)) 
##                  ME RMSE  MAE    MPE MAPE  MASE    ACF1
## Training set -0.485 15.7 10.7 -0.784 6.59 0.373 0.00163
###############################################################################

              ##################################################################
              ###   AUTO.ARIMA FORECAST
              aa.model=auto.arima(Index)
              autoplot(forecast(aa.model, h=6)) + 
                      autolayer(NH_MA) +
                      theme(legend.position = "none")

              forecast(aa.model,h=6)
##          Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## Mar 2022            221   200   241   189   252
## Apr 2022            221   194   247   180   261
## May 2022            221   190   251   173   268
## Jun 2022            221   186   255   167   274
## Jul 2022            221   182   259   162   279
## Aug 2022            221   179   262   157   284
              accuracy(forecast(aa.model,h=6)) #
##                ME RMSE  MAE   MPE MAPE  MASE    ACF1
## Training set 1.07 16.1 10.5 0.119 6.31 0.364 0.00502
              ##################################################################

        autoplot(Index) +
        autolayer(forecast(aa.model, h=6))+
                      autolayer(NH_MA) +
        ylab("NHREPI = January 2010 = 100") +
        labs(title="New Haven Region", subtitle = "Economic Performance") +
        geom_hline(yintercept= 100) +
        geom_line(size = 0.8, col = "blue")+ 
        theme_bw(base_size = 12) +
                      theme(legend.position = "none")

##################################################################
              #Ensemble Forecast
              fit1 <- hybridModel(Index, weights="equal")
## Fitting the auto.arima model
## Fitting the ets model
## Fitting the thetam model
## Fitting the nnetar model
## Fitting the stlm model
## Fitting the tbats model
                      fc1 <- forecast(fit1, h=12)  # forecast here is 12 months
                      
              autoplot(fc1) + autolayer(NH_MA)+
                      ggtitle("NH Region Economic Performance") + 
                xlab("Year") +
                ylab("New Haven Economic Performance Index") +
                      geom_hline(yintercept= 100) +
                      geom_line(size = 0.8, col = "blue")+ 
                      theme_bw(base_size = 12) +
              theme(legend.position = "none")

              accuracy(fc1) #
##                ME RMSE  MAE  MPE MAPE  MASE     ACF1
## Training set 1.09 15.5 10.4 0.13  6.1 0.361 -0.00897
              #Different Version of Hybrid Graph
              autoplot(fc1) +
                      autolayer(Index)+
                      autolayer(NH_MA)+
                      ylab("NHREPI = January 2010 = 100") +
                      xlab("Year")

                      labs(title="New Haven Region", subtitle = "Economic Performance") +
                      geom_hline(yintercept= 100) +
                      geom_line(size = 0.8, col = "darkred")+ 
                      theme_bw(base_size = 12) +
                      theme(legend.position = "none")
## NULL
              #Shorter Window - to highlight forecast
              autoplot(fc1) +
                      autolayer(Index)+
                      autolayer(NH_MA)+
                      ylab("NHREPI = January 2010 = 100") +
                      xlab("Year")+
                      labs(title="New Haven Region", 
                           subtitle = "Economic Performance and 12-M Forecast",
                           caption = "Created by: J.Watters and J. Hickey, New Haven BANL") +
                      geom_hline(yintercept= 100) +
                      geom_line(size = 1.2, col = "darkred")+ 
                      #theme_bw(base_size = 12) +
                      xlim(x = c(2019,2023)) +
                      theme(legend.position = "none")
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
## Warning: Removed 108 rows containing missing values (`geom_line()`).
## Warning: Removed 108 rows containing missing values (`geom_line()`).
## Removed 108 rows containing missing values (`geom_line()`).

              #and here is a dygraph for an html version - to upload to Rpubs
              library(dygraphs) # you need this package
              
              
              
                NH_Dat = cbind(NH_MA, Index, fc1$mean) #
              
              dygraph(NH_Dat, 
                      main = "New Haven Economic Performance Index",
                      ylab = "NHREPI = January 2010 = 100" )  %>%
                      dySeries("fc1$mean", label = "12-MO Forecast, (%)") %>%
                      dySeries("NH_MA", label = "6-Mo Moving Average") %>%
                      dyShading(from = "2020-03-1", to = "2021-04-01",
                                color = "#FFE6E6") %>%
                      dyRangeSelector()  %>% 
                      dyLimit(min(Index[1], na.rm = TRUE), "2010:1 = 100 ",
              strokePattern = "solid", color = "darkred")
              ##################################################################
              
#===============================================================================#