Packages

library(fpp3)
## ── Attaching packages ──────────────────────────────────────────── fpp3 0.4.0 ──
## ✔ tibble      3.1.8     ✔ tsibble     1.1.2
## ✔ dplyr       1.0.9     ✔ tsibbledata 0.4.0
## ✔ tidyr       1.2.0     ✔ feasts      0.2.2
## ✔ lubridate   1.8.0     ✔ fable       0.3.1
## ✔ ggplot2     3.3.6
## ── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ──
## ✖ lubridate::date()    masks base::date()
## ✖ dplyr::filter()      masks stats::filter()
## ✖ tsibble::intersect() masks base::intersect()
## ✖ tsibble::interval()  masks lubridate::interval()
## ✖ dplyr::lag()         masks stats::lag()
## ✖ tsibble::setdiff()   masks base::setdiff()
## ✖ tsibble::union()     masks base::union()
library(readxl)
library(ggplot2)
library(tsibble)
library(lubridate)
library(tidyverse)
## ── Attaching packages
## ───────────────────────────────────────
## tidyverse 1.3.2 ──
## ✔ readr   2.1.2     ✔ stringr 1.4.1
## ✔ purrr   0.3.4     ✔ forcats 0.5.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ lubridate::as.difftime() masks base::as.difftime()
## ✖ lubridate::date()        masks base::date()
## ✖ dplyr::filter()          masks stats::filter()
## ✖ tsibble::intersect()     masks lubridate::intersect(), base::intersect()
## ✖ tsibble::interval()      masks lubridate::interval()
## ✖ dplyr::lag()             masks stats::lag()
## ✖ tsibble::setdiff()       masks lubridate::setdiff(), base::setdiff()
## ✖ tsibble::union()         masks lubridate::union(), base::union()
library(dplyr)
library(seasonal)
## 
## Attaching package: 'seasonal'
## 
## The following object is masked from 'package:tibble':
## 
##     view
library(zoo)
## 
## Attaching package: 'zoo'
## 
## The following object is masked from 'package:tsibble':
## 
##     index
## 
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
library(tidyr)
library(tibble)
library(ggfortify)
library(moments)
library(fable)
library(brew)
library(sos)
## 
## Attaching package: 'sos'
## 
## The following object is masked from 'package:tidyr':
## 
##     matches
## 
## The following object is masked from 'package:dplyr':
## 
##     matches
## 
## The following object is masked from 'package:utils':
## 
##     ?
library(slider)
library(x13binary)
library(USgas)
library(GGally)
## Registered S3 method overwritten by 'GGally':
##   method from   
##   +.gg   ggplot2

Question 1

1.1

#import
WA_data <- read_excel("C:/Users/natha/Desktop/Forecasting in R/WA_data.xlsx")
#I changed the excel sheet so that it would be easier in the coding
#organizing into tsibble
WA_1 <- WA_data %>%
  select(-GeoFips, -GeoName, -LineCode)%>%
  pivot_longer(!Description, names_to = "Year", values_to = "count")%>%
  pivot_wider(names_from = Description, values_from = count)
head(WA_1)
## # A tibble: 6 × 7
##   Year  Dollar   GDP Income Consumption Employment Total_Employment
##   <chr>  <dbl> <dbl>  <dbl>       <dbl>      <dbl>            <dbl>
## 1 1999      NA   9.6    6.2         7.5         NA              1.9
## 2 2000      NA   3.3    6.9         7.8         NA              2.3
## 3 2001      NA   0.4    2.4         3.1         NA             -0.3
## 4 2002      NA   3.2    1.5         3.7         NA             -0.8
## 5 2003      NA   3.8    4           5.7         NA              0.8
## 6 2004      NA   5.1    7.5         6.7         NA              2
WA_2 <- WA_1%>%
  select(-Employment, -Dollar)
WA_2$Year = as.numeric(as.character(WA_2$Year))
head(WA_2)
## # A tibble: 6 × 5
##    Year   GDP Income Consumption Total_Employment
##   <dbl> <dbl>  <dbl>       <dbl>            <dbl>
## 1  1999   9.6    6.2         7.5              1.9
## 2  2000   3.3    6.9         7.8              2.3
## 3  2001   0.4    2.4         3.1             -0.3
## 4  2002   3.2    1.5         3.7             -0.8
## 5  2003   3.8    4           5.7              0.8
## 6  2004   5.1    7.5         6.7              2
WA_stib <- WA_2 %>%
  as_tsibble(index = Year)
head(WA_stib)
## # A tsibble: 6 x 5 [1Y]
##    Year   GDP Income Consumption Total_Employment
##   <dbl> <dbl>  <dbl>       <dbl>            <dbl>
## 1  1999   9.6    6.2         7.5              1.9
## 2  2000   3.3    6.9         7.8              2.3
## 3  2001   0.4    2.4         3.1             -0.3
## 4  2002   3.2    1.5         3.7             -0.8
## 5  2003   3.8    4           5.7              0.8
## 6  2004   5.1    7.5         6.7              2

1.2

#Plotting all variables
WA_stib %>%
  ggplot(aes(x = Year)) +
  geom_line(aes(y = GDP, color = "GDP")) +
  geom_line(aes(y = Income, color = "Income")) +
  geom_line(aes(y = Consumption, color = "Consumption")) +
  geom_line(aes(y = Total_Employment, color = "Employment")) +
  labs(y = "% Change", title = "Consumption, GPD, Income, and Employment") +
  scale_colour_manual(values = c(GDP = "orange", Income = "dark green", Consumption = "black", Employment = "blue")) +
  guides(colour = guide_legend(title = NULL))

#plotting ggpair with WA_2, it's the tibble version of WA_stib
ggpairs(WA_2)

ggpairs(WA_2 %>%
          select(-Year))

1.3: The interpretation here is based off the ggpair graph that excludes the Year variable. GDP is positively correlated with every variable. GDP is correlated with consumption and employment at the 99.9% significance level,and is correlated at the 99% significants level for income. This is not surprising because income, employment, and consumption all contribute to GDP. Income is positively correlated with consumption at the 95% level and employment at the 99% level. This makes reasonable because as people make more, the will spend more. Most people earn income through a job, so these two variables being correlated makes since as well. Consumption is positively correlated to employment at the 99.9% level. This makes sense because as employment increases, income increases, which means people have money to increase their consumption.

1.4

#model for estimating the relationship between variables
WA_reg <- WA_stib %>%
  model(TSLM(Consumption ~ Income + Total_Employment + GDP))
report(WA_reg)
## Series: Consumption 
## Model: TSLM 
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.0978 -1.1434 -0.3197  0.7035  4.3069 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)  
## (Intercept)       2.23047    0.92958   2.399   0.0268 *
## Income           -0.05339    0.16009  -0.334   0.7424  
## Total_Employment  0.78957    0.28523   2.768   0.0122 *
## GDP               0.38749    0.19365   2.001   0.0599 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.7 on 19 degrees of freedom
## Multiple R-squared: 0.6848,  Adjusted R-squared: 0.635
## F-statistic: 13.76 on 3 and 19 DF, p-value: 5.264e-05

1.5: Of the explanatory variables, only employment is significant at the 95% level. As income increases by 1%, consumption decreases by 0.053%. As GDP increases by 1%, consumption increases by .387%. These values are not statistically significant. As employment increases by 1%, consumption increases by .79%. The intercept is statistically significant at the 95% level and means that consumption has a 2.23% change rate if income, GDP, and employment remain constant.

1.6

#Plot the fitted and actual values for consumption.
augment(WA_reg) %>%
  ggplot(aes(x = Year)) +
  geom_line(aes(y = Consumption, color = "Data")) +
  geom_line(aes(y = .fitted, color = "Fitted")) +
  labs(y = "% Change", title = "Consumption in Washington") +
  scale_colour_manual(values = c(Data = "black", Fitted = "red")) +
  guides(colour = guide_legend(title = NULL))

1.7

#Scenario 1, constant growth
WA_fit <- WA_stib %>%
  model(lm = TSLM(Consumption ~ GDP + Income + Total_Employment))

WA_scenario1 <- scenarios(
  Increase = new_data(WA_stib, 3) %>%
    mutate(GDP = 1, Income = 1, Total_Employment = 1),
  names_to = "Scenario")

WA_Forescene1 <- forecast(WA_fit, new_data = WA_scenario1)

WA_stib %>%
  autoplot(Consumption) +
  autolayer(WA_Forescene1) +
  labs(title = "Consumption Forecast Assuming a 1% Constant Growth", y = "% Change")

#Scenario 2, Income changes
WA_scenari02 <- scenarios(
  Increase_1 = new_data(WA_stib, 3) %>%
    mutate(Income = 1, GDP = 0, Total_Employment = 0),
  Decrease_0.5 = new_data(WA_stib, 3)%>%
    mutate(Income = -0.5, GDP = 0, Total_Employment = 0),
  Increase_0.8 = new_data(WA_stib, 3)%>%
    mutate(Income = 0.8, GDP = 0, Total_Employment = 0),
  names_to = "scenario")

forecast2 <- forecast(WA_fit, new_data = WA_scenari02)

WA_stib %>%
  autoplot(Consumption) + autolayer(forecast2)+labs("Washington Consumption", y = "% Change")

WA_Forescene1
## # A fable: 3 x 8 [1Y]
## # Key:     Scenario, .model [1]
##   Scenario .model  Year Consumption .mean   GDP Income Total_Employment
##   <chr>    <chr>  <dbl>      <dist> <dbl> <dbl>  <dbl>            <dbl>
## 1 Increase lm      2022 N(3.4, 3.7)  3.35     1      1                1
## 2 Increase lm      2023 N(3.4, 3.7)  3.35     1      1                1
## 3 Increase lm      2024 N(3.4, 3.7)  3.35     1      1                1
forecast2
## # A fable: 9 x 8 [1Y]
## # Key:     scenario, .model [3]
##   scenario     .model  Year Consumption .mean Income   GDP Total_Employment
##   <chr>        <chr>  <dbl>      <dist> <dbl>  <dbl> <dbl>            <dbl>
## 1 Increase_1   lm      2022 N(2.2, 3.6)  2.18    1       0                0
## 2 Increase_1   lm      2023 N(2.2, 3.6)  2.18    1       0                0
## 3 Increase_1   lm      2024 N(2.2, 3.6)  2.18    1       0                0
## 4 Decrease_0.5 lm      2022 N(2.3, 3.8)  2.26   -0.5     0                0
## 5 Decrease_0.5 lm      2023 N(2.3, 3.8)  2.26   -0.5     0                0
## 6 Decrease_0.5 lm      2024 N(2.3, 3.8)  2.26   -0.5     0                0
## 7 Increase_0.8 lm      2022 N(2.2, 3.7)  2.19    0.8     0                0
## 8 Increase_0.8 lm      2023 N(2.2, 3.7)  2.19    0.8     0                0
## 9 Increase_0.8 lm      2024 N(2.2, 3.7)  2.19    0.8     0                0

1.8: The point forecast for scenario 1 is 3.3541% with a prediction interval of 3.4% to 3.7%. The point forecast when income increases by 1% is 2.18% with a PI of 2.2% to 3.6%. When income is increased by 0.08%, the point forecast is 2.19% with a PI of 2.2% to 3.7%. The point forecast when income is decreasing by 0.5% is 2.26% with a PI of 2.3% to 3.8%. The point forecast for the decreasing income being the highest of the three makes sense because the model produced a negative beta for income.

Question 2

#The majority of cleaning was preformed in excel
M3_ns <- read_excel("C:/Users/natha/Desktop/Forecasting in R/M3_exam1_transformed.xlsx")
head(M3_ns)
## # A tibble: 6 × 2
##   Year     Data
##   <chr>   <dbl>
## 1 1983 Q4 3681.
## 2 1984 Q1 3686.
## 3 1984 Q2 3404.
## 4 1984 Q3 2700.
## 5 1984 Q4 3165.
## 6 1985 Q1 3233.
M3_ns <- M3_ns%>%
  mutate(Period = yearquarter(Year))%>%
  select(-Year)
head(M3_ns)
## # A tibble: 6 × 2
##    Data  Period
##   <dbl>   <qtr>
## 1 3681. 1983 Q4
## 2 3686. 1984 Q1
## 3 3404. 1984 Q2
## 4 2700. 1984 Q3
## 5 3165. 1984 Q4
## 6 3233. 1985 Q1
M3_tsib <- M3_ns%>%
  as_tsibble(index = Period)
head(M3_tsib)
## # A tsibble: 6 x 2 [1Q]
##    Data  Period
##   <dbl>   <qtr>
## 1 3681. 1983 Q4
## 2 3686. 1984 Q1
## 3 3404. 1984 Q2
## 4 2700. 1984 Q3
## 5 3165. 1984 Q4
## 6 3233. 1985 Q1
M3_tsib %>%
  ggplot(aes(x = Period)) +
  geom_line(aes(y = Data)) +
  labs(title = "Nathan's Data", y = "Data Index")

ns_add <- M3_tsib %>%
  model(classical_decomposition(Data, type = "additive")) %>%
  components() %>%
  autoplot() +
  labs(title = "Classical Additive Decomposition")
ns_add
## Warning: Removed 2 row(s) containing missing values (geom_path).

ns_multi <- M3_tsib %>%
  model(classical_decomposition(Data, type = "additive")) %>%
  components() %>%
  autoplot() +
  labs(title = "Classical Additive Decomposition")
ns_multi
## Warning: Removed 2 row(s) containing missing values (geom_path).

ns_stl <- M3_tsib %>%
  model(STL(Data ~ season(window = 4), robust = TRUE)) %>%
  components() %>% 
  autoplot() +
  labs(title = "STL Decomposition")
ns_stl

M3_tsib %>%
  features(Data, features = guerrero)
## # A tibble: 1 × 1
##   lambda_guerrero
##             <dbl>
## 1           0.415
lamda <- 0.415
M3_tsib %>%
  autoplot(box_cox(Data, lamda)) +
  labs(title = "Box-Cox Transformation", x = "Date")

Interpretation of transformations: Every transformation I performed above did not show any significant features in the data. There is a relatively flat trend in all the increases in the later part of the data. There is a seasonal component to the data.

ns_models <- M3_tsib %>%
  model(MEAN(Data),
        NAIVE(Data),
        RW(Data ~ drift()))

accuracy(ns_models)
## # A tibble: 3 × 10
##   .model             .type         ME  RMSE   MAE   MPE  MAPE  MASE RMSSE   ACF1
##   <chr>              <chr>      <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>  <dbl>
## 1 MEAN(Data)         Train… -4.04e-14  642.  478. -3.61  14.7  1.04  1.12  0.546
## 2 NAIVE(Data)        Train…  7.26e- 1  617.  516. -1.86  15.9  1.13  1.07 -0.118
## 3 RW(Data ~ drift()) Train…  1.45e-13  617.  516. -1.88  15.9  1.13  1.07 -0.118
ns_models
## # A mable: 1 x 3
##   `MEAN(Data)` `NAIVE(Data)` `RW(Data ~ drift())`
##        <model>       <model>              <model>
## 1       <MEAN>       <NAIVE>        <RW w/ drift>

The drift method has a slightly lower RMSE value which means if is the best to forecast to use.

driftfc_ns <- M3_tsib %>% 
  model(RW(Data ~ drift())) %>%
  forecast(h = 4) %>%
  autoplot(M3_tsib) +
  labs(title = "Drift Forecast")
driftfc_ns