library(readxl)
library(tidyquant)
library(fpp3)
library(moments)
library(tsibble)
library(tsibbledata)
library(ggplot2)
library(dplyr)
library(seasonal)
library(seasonalview)
library(fabletools)
library(fable)
library(ggfortify)
library(quantmod)
library(GGally)

#Question 1

data <- read_excel("C:/Users/harle/OneDrive/Desktop/Exam I EC Clean.xlsx")
data
## # A tibble: 23 × 5
##     Year   GDP Income Consumption Employment
##    <dbl> <dbl>  <dbl>       <dbl>      <dbl>
##  1  1998   6.7    6           7.1        2.7
##  2  1999   5.8    7.6         7.5        2.6
##  3  2000   4.4    3.7         4.1        0.1
##  4  2001   1.4   -0.2         3.7       -0.9
##  5  2002   2.5    1.5         5.4        0.3
##  6  2003   6      4.6         6.1        1.3
##  7  2004   5.1    3.7         6.5        1.3
##  8  2005   5.8    7.8         5.5        1.5
##  9  2006   4.1    7.6         5.2        2.5
## 10  2007  -0.2    2           3.6        1  
## # … with 13 more rows
#Cleaned data in excel and then imported. 

datatsibble <- data %>%
  as_tsibble(index = Year)
datatsibble
## # A tsibble: 23 x 5 [1Y]
##     Year   GDP Income Consumption Employment
##    <dbl> <dbl>  <dbl>       <dbl>      <dbl>
##  1  1998   6.7    6           7.1        2.7
##  2  1999   5.8    7.6         7.5        2.6
##  3  2000   4.4    3.7         4.1        0.1
##  4  2001   1.4   -0.2         3.7       -0.9
##  5  2002   2.5    1.5         5.4        0.3
##  6  2003   6      4.6         6.1        1.3
##  7  2004   5.1    3.7         6.5        1.3
##  8  2005   5.8    7.8         5.5        1.5
##  9  2006   4.1    7.6         5.2        2.5
## 10  2007  -0.2    2           3.6        1  
## # … with 13 more rows
#Converted to tsibble with 'Year' as the index.

ggpairs(data %>% select(-Year))

#Personal consumption expenditures and total employment are highly and statistically significantly correlated at the 99% level. Total employment and personal consumption expenditures are also highly and statistically significantly correlated at the 99% level. This means that the two variables tend to move together very strongly and in a positive fashion. The others are not statistically significant and can therefore not be said to move together in a fashion differentiable from 0. 

#Modeling the data.
ny_model <- datatsibble %>%
  model(lm = TSLM(Consumption ~ Income + GDP + Employment))
report(ny_model)
## Series: Consumption 
## Model: TSLM 
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.20680 -0.77373 -0.08306  0.82512  2.34479 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1.92624    0.75624   2.547 0.019678 *  
## Income       0.06632    0.12835   0.517 0.611330    
## GDP          0.30922    0.16067   1.925 0.069394 .  
## Employment   0.85483    0.19281   4.434 0.000285 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.392 on 19 degrees of freedom
## Multiple R-squared: 0.7742,  Adjusted R-squared: 0.7386
## F-statistic: 21.72 on 3 and 19 DF, p-value: 2.3348e-06
#Income and GDP do not have a statistically significant impact on Consumption. Employment has a statistically significant impact on Consumption at the 99% level of confidence. For every 1% increase in employment there is an increase of 0.85% in consumption. 
#The intercept is significant at a 90% level and indicates a base percentage change rate of 1.92% in consumption. 

augment(ny_model) %>%
  ggplot(aes(x = Year)) +
  geom_line(aes(y = Consumption, color = "Data")) +
  geom_line(aes(y = .fitted, color = "Fitted")) +
  scale_color_manual(values = c(Data = "black", Fitted = "red")) +
  labs(y = "Consumption",
       title = "New York Consumption") +
  guides(color = guide_legend(title = "Series")) 

#Plotted the predictive values (fitted) from the model for the values of consumption that I currently have. No new data. 

future_scenarios <- scenarios(
  Increase = new_data(datatsibble, 3) %>%
    mutate(Income=1, GDP=1, Employment=1),
  names_to = "Scenario")

fc <- forecast(ny_model, new_data = future_scenarios)

datatsibble %>%
  autoplot(Consumption) +
  autolayer(fc) +
  labs(title = "NY consumption", y = "% change")

fc
## # A fable: 3 x 8 [1Y]
## # Key:     Scenario, .model [1]
##   Scenario .model  Year Consumption .mean Income   GDP Employment
##   <chr>    <chr>  <dbl>      <dist> <dbl>  <dbl> <dbl>      <dbl>
## 1 Increase lm      2021 N(3.2, 2.4)  3.16      1     1          1
## 2 Increase lm      2022 N(3.2, 2.4)  3.16      1     1          1
## 3 Increase lm      2023 N(3.2, 2.4)  3.16      1     1          1
future_scenarios2 <- scenarios(
  Increase = new_data(datatsibble, 3) %>%
    mutate(Income= 1, GDP = 0, Employment = 0),
  Decrease = new_data(datatsibble, 3) %>%
    mutate(Income = -0.5, GDP = 0, Employment = 0),
  Increase8 = new_data(datatsibble, 3) %>%
    mutate(Income = 0.8, GDP = 0, Employment = 0),
  names_to = "Scenario")

fc2 <- forecast(ny_model, new_data = future_scenarios2)

datatsibble %>%
  autoplot(Consumption) +
  autolayer(fc2) +
  labs(title = "NY consumption", y = "% change")

fc2
## # A fable: 9 x 8 [1Y]
## # Key:     Scenario, .model [3]
##   Scenario  .model  Year Consumption .mean Income   GDP Employment
##   <chr>     <chr>  <dbl>      <dist> <dbl>  <dbl> <dbl>      <dbl>
## 1 Increase  lm      2021   N(2, 2.4)  1.99    1       0          0
## 2 Increase  lm      2022   N(2, 2.4)  1.99    1       0          0
## 3 Increase  lm      2023   N(2, 2.4)  1.99    1       0          0
## 4 Decrease  lm      2021 N(1.9, 2.6)  1.89   -0.5     0          0
## 5 Decrease  lm      2022 N(1.9, 2.6)  1.89   -0.5     0          0
## 6 Decrease  lm      2023 N(1.9, 2.6)  1.89   -0.5     0          0
## 7 Increase8 lm      2021   N(2, 2.4)  1.98    0.8     0          0
## 8 Increase8 lm      2022   N(2, 2.4)  1.98    0.8     0          0
## 9 Increase8 lm      2023   N(2, 2.4)  1.98    0.8     0          0
#The prediction intervals for the 1% and 0.8% increase in income are identical with nearly the same point forecasts. 
#The point forecast for the 1% increase in every variable is 3.16 and the point forecasts for the 1%, -0.5%, and 0.8% changes income are 1.99, 1.89, and 1.98, respectively. This makes sense as I would expect a higher point forecast when GDP and Consumption change as I previously saw they were significantly and positively correlated. I would also expect Employment to have a strong effect since it had a significant coefficient. When Employment changed by 1%, Consumption should increase by .85% as seen in the model's coefficients. The -0.5% decrease in income clearly lowers the point forecast as expected.

#Question 2

ECQ2Data <- read_excel("C:/Users/harle/OneDrive/Desktop/ECQ2Data.xlsx", 
                       col_types = c("text", "numeric"))

q2ts <- ECQ2Data %>%
  mutate(Quarter = yearquarter(Quarter)) %>%
  as_tsibble(index = Quarter)

autoplot(q2ts)
## Plot variable not specified, automatically selected `.vars = Data`

q2ts %>%
  features(Data, features = guerrero)
## # A tibble: 1 × 1
##   lambda_guerrero
##             <dbl>
## 1           0.810
q2ts %>% autoplot(box_cox(Data, 0.810)) +
  labs(title = "Box-Cox transformed Data with Guerrero")

#I do not believe any transformations are necessary as there is there is a very steady trend without a lot of variance. I tried a Box-Cox transformation with a guerrero lambda and it had no meaningful impact on the data, proving my point.
exam_dcmp <- q2ts %>%
  model(stl = STL(Data))
components(exam_dcmp) %>%
  autoplot()

#There is a clear upward trend with no seasonality. Comparing the season_year component and the remainder, it looks like the STL decomp was trying to force seasonality since there are spikes in the remainder when the seasonal component speaks. Based off this, I believe the drift method is best to capture the trend. I will do all methods and compare the accuracies.

q2mean <- q2ts %>% 
  model(MEAN(Data)) %>%
  forecast(h = 4) %>%
  autoplot(q2ts) +
  labs(title = "Forecast of Data using MEAN Method")

q2snaive <- q2ts %>% 
  model(SNAIVE(Data)) %>%
  forecast(h = 4) %>%
  autoplot(q2ts) +
  labs(title = "Forecast of Data using SNAIVE Method")

q2naive <- q2ts %>% 
  model(NAIVE(Data)) %>%
  forecast(h = 4) %>%
  autoplot(q2ts) +
  labs(title = "Forecast of Data using NAIVE Method")

q2drift <- q2ts %>% 
  model(RW(Data ~ drift())) %>%
  forecast(h = 4) %>%
  autoplot(q2ts) +
  labs(title = "Forecast of Data using Drift Method")
#Plotting the forecast for each method individually, it appears that drift will be the most reliable. 
q2ts %>% ACF(Data, lag_max = 10) %>% 
  autoplot() + 
  labs(title = "ACF Plot")

#The plot of my residuals implies that my model is missing a trend component and that my residuals do not resemble white noise. It is therefore not reasonable to assume that my residuals follow a normal distribution. 
q2_models <- q2ts %>%
  model(MEAN(Data),
        NAIVE(Data),
        RW(Data ~ drift()))

accuracy(q2_models)
## # A tibble: 3 × 10
##   .model             .type        ME  RMSE   MAE     MPE  MAPE  MASE RMSSE  ACF1
##   <chr>              <chr>     <dbl> <dbl> <dbl>   <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 MEAN(Data)         Train… 2.00e-14 1563. 1378. -24.0   49.1  3.02  2.75  0.953
## 2 NAIVE(Data)        Train… 9.11e+ 1  208.  151.   2.81   4.33 0.331 0.365 0.134
## 3 RW(Data ~ drift()) Train… 3.54e-14  187.  140.  -0.165  4.17 0.307 0.329 0.134
#Both RMSE and MAPE are lowest for the drift, showing that drift is the best method. 
fitted <- q2ts%>%
  model(RW(Data ~ drift()))
simulate <- fitted %>% generate(h=4,times=5, bootstrap = TRUE)
Bootstrapped <- fitted %>% forecast(h = 4, bootstrap = TRUE, times=5000)
autoplot(Bootstrapped, q2ts) + 
  labs(title = "Drift Method Forecast with Bootstrapped Values")

#I bootstrapped the drift method since I saw that the residuals are likely not normally distributed when I did the ACF.

Bootstrapped %>%
  hilo(80)
## # A tsibble: 4 x 5 [1Q]
## # Key:       .model [1]
##   .model             Quarter         Data .mean                  `80%`
##   <chr>                <qtr>       <dist> <dbl>                 <hilo>
## 1 RW(Data ~ drift()) 1995 Q1 sample[5000] 5542. [5336.582, 5753.722]80
## 2 RW(Data ~ drift()) 1995 Q2 sample[5000] 5584. [5284.287, 5937.983]80
## 3 RW(Data ~ drift()) 1995 Q3 sample[5000] 5634. [5248.431, 6066.939]80
## 4 RW(Data ~ drift()) 1995 Q4 sample[5000] 5673. [5222.351, 6151.907]80
#Point forecasts with 80% prediction intervals.