library(readxl)
library(tidyquant)
library(fpp3)
library(moments)
library(tsibble)
library(tsibbledata)
library(ggplot2)
library(dplyr)
library(seasonal)
library(seasonalview)
library(fabletools)
library(fable)
library(ggfortify)
library(quantmod)
library(GGally)
#Question 1
data <- read_excel("C:/Users/harle/OneDrive/Desktop/Exam I EC Clean.xlsx")
data
## # A tibble: 23 × 5
## Year GDP Income Consumption Employment
## <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1998 6.7 6 7.1 2.7
## 2 1999 5.8 7.6 7.5 2.6
## 3 2000 4.4 3.7 4.1 0.1
## 4 2001 1.4 -0.2 3.7 -0.9
## 5 2002 2.5 1.5 5.4 0.3
## 6 2003 6 4.6 6.1 1.3
## 7 2004 5.1 3.7 6.5 1.3
## 8 2005 5.8 7.8 5.5 1.5
## 9 2006 4.1 7.6 5.2 2.5
## 10 2007 -0.2 2 3.6 1
## # … with 13 more rows
#Cleaned data in excel and then imported.
datatsibble <- data %>%
as_tsibble(index = Year)
datatsibble
## # A tsibble: 23 x 5 [1Y]
## Year GDP Income Consumption Employment
## <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1998 6.7 6 7.1 2.7
## 2 1999 5.8 7.6 7.5 2.6
## 3 2000 4.4 3.7 4.1 0.1
## 4 2001 1.4 -0.2 3.7 -0.9
## 5 2002 2.5 1.5 5.4 0.3
## 6 2003 6 4.6 6.1 1.3
## 7 2004 5.1 3.7 6.5 1.3
## 8 2005 5.8 7.8 5.5 1.5
## 9 2006 4.1 7.6 5.2 2.5
## 10 2007 -0.2 2 3.6 1
## # … with 13 more rows
#Converted to tsibble with 'Year' as the index.
ggpairs(data %>% select(-Year))
#Personal consumption expenditures and total employment are highly and statistically significantly correlated at the 99% level. Total employment and personal consumption expenditures are also highly and statistically significantly correlated at the 99% level. This means that the two variables tend to move together very strongly and in a positive fashion. The others are not statistically significant and can therefore not be said to move together in a fashion differentiable from 0.
#Modeling the data.
ny_model <- datatsibble %>%
model(lm = TSLM(Consumption ~ Income + GDP + Employment))
report(ny_model)
## Series: Consumption
## Model: TSLM
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.20680 -0.77373 -0.08306 0.82512 2.34479
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.92624 0.75624 2.547 0.019678 *
## Income 0.06632 0.12835 0.517 0.611330
## GDP 0.30922 0.16067 1.925 0.069394 .
## Employment 0.85483 0.19281 4.434 0.000285 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.392 on 19 degrees of freedom
## Multiple R-squared: 0.7742, Adjusted R-squared: 0.7386
## F-statistic: 21.72 on 3 and 19 DF, p-value: 2.3348e-06
#Income and GDP do not have a statistically significant impact on Consumption. Employment has a statistically significant impact on Consumption at the 99% level of confidence. For every 1% increase in employment there is an increase of 0.85% in consumption.
#The intercept is significant at a 90% level and indicates a base percentage change rate of 1.92% in consumption.
augment(ny_model) %>%
ggplot(aes(x = Year)) +
geom_line(aes(y = Consumption, color = "Data")) +
geom_line(aes(y = .fitted, color = "Fitted")) +
scale_color_manual(values = c(Data = "black", Fitted = "red")) +
labs(y = "Consumption",
title = "New York Consumption") +
guides(color = guide_legend(title = "Series"))
#Plotted the predictive values (fitted) from the model for the values of consumption that I currently have. No new data.
future_scenarios <- scenarios(
Increase = new_data(datatsibble, 3) %>%
mutate(Income=1, GDP=1, Employment=1),
names_to = "Scenario")
fc <- forecast(ny_model, new_data = future_scenarios)
datatsibble %>%
autoplot(Consumption) +
autolayer(fc) +
labs(title = "NY consumption", y = "% change")
fc
## # A fable: 3 x 8 [1Y]
## # Key: Scenario, .model [1]
## Scenario .model Year Consumption .mean Income GDP Employment
## <chr> <chr> <dbl> <dist> <dbl> <dbl> <dbl> <dbl>
## 1 Increase lm 2021 N(3.2, 2.4) 3.16 1 1 1
## 2 Increase lm 2022 N(3.2, 2.4) 3.16 1 1 1
## 3 Increase lm 2023 N(3.2, 2.4) 3.16 1 1 1
future_scenarios2 <- scenarios(
Increase = new_data(datatsibble, 3) %>%
mutate(Income= 1, GDP = 0, Employment = 0),
Decrease = new_data(datatsibble, 3) %>%
mutate(Income = -0.5, GDP = 0, Employment = 0),
Increase8 = new_data(datatsibble, 3) %>%
mutate(Income = 0.8, GDP = 0, Employment = 0),
names_to = "Scenario")
fc2 <- forecast(ny_model, new_data = future_scenarios2)
datatsibble %>%
autoplot(Consumption) +
autolayer(fc2) +
labs(title = "NY consumption", y = "% change")
fc2
## # A fable: 9 x 8 [1Y]
## # Key: Scenario, .model [3]
## Scenario .model Year Consumption .mean Income GDP Employment
## <chr> <chr> <dbl> <dist> <dbl> <dbl> <dbl> <dbl>
## 1 Increase lm 2021 N(2, 2.4) 1.99 1 0 0
## 2 Increase lm 2022 N(2, 2.4) 1.99 1 0 0
## 3 Increase lm 2023 N(2, 2.4) 1.99 1 0 0
## 4 Decrease lm 2021 N(1.9, 2.6) 1.89 -0.5 0 0
## 5 Decrease lm 2022 N(1.9, 2.6) 1.89 -0.5 0 0
## 6 Decrease lm 2023 N(1.9, 2.6) 1.89 -0.5 0 0
## 7 Increase8 lm 2021 N(2, 2.4) 1.98 0.8 0 0
## 8 Increase8 lm 2022 N(2, 2.4) 1.98 0.8 0 0
## 9 Increase8 lm 2023 N(2, 2.4) 1.98 0.8 0 0
#The prediction intervals for the 1% and 0.8% increase in income are identical with nearly the same point forecasts.
#The point forecast for the 1% increase in every variable is 3.16 and the point forecasts for the 1%, -0.5%, and 0.8% changes income are 1.99, 1.89, and 1.98, respectively. This makes sense as I would expect a higher point forecast when GDP and Consumption change as I previously saw they were significantly and positively correlated. I would also expect Employment to have a strong effect since it had a significant coefficient. When Employment changed by 1%, Consumption should increase by .85% as seen in the model's coefficients. The -0.5% decrease in income clearly lowers the point forecast as expected.
#Question 2
ECQ2Data <- read_excel("C:/Users/harle/OneDrive/Desktop/ECQ2Data.xlsx",
col_types = c("text", "numeric"))
q2ts <- ECQ2Data %>%
mutate(Quarter = yearquarter(Quarter)) %>%
as_tsibble(index = Quarter)
autoplot(q2ts)
## Plot variable not specified, automatically selected `.vars = Data`
q2ts %>%
features(Data, features = guerrero)
## # A tibble: 1 × 1
## lambda_guerrero
## <dbl>
## 1 0.810
q2ts %>% autoplot(box_cox(Data, 0.810)) +
labs(title = "Box-Cox transformed Data with Guerrero")
#I do not believe any transformations are necessary as there is there is a very steady trend without a lot of variance. I tried a Box-Cox transformation with a guerrero lambda and it had no meaningful impact on the data, proving my point.
exam_dcmp <- q2ts %>%
model(stl = STL(Data))
components(exam_dcmp) %>%
autoplot()
#There is a clear upward trend with no seasonality. Comparing the season_year component and the remainder, it looks like the STL decomp was trying to force seasonality since there are spikes in the remainder when the seasonal component speaks. Based off this, I believe the drift method is best to capture the trend. I will do all methods and compare the accuracies.
q2mean <- q2ts %>%
model(MEAN(Data)) %>%
forecast(h = 4) %>%
autoplot(q2ts) +
labs(title = "Forecast of Data using MEAN Method")
q2snaive <- q2ts %>%
model(SNAIVE(Data)) %>%
forecast(h = 4) %>%
autoplot(q2ts) +
labs(title = "Forecast of Data using SNAIVE Method")
q2naive <- q2ts %>%
model(NAIVE(Data)) %>%
forecast(h = 4) %>%
autoplot(q2ts) +
labs(title = "Forecast of Data using NAIVE Method")
q2drift <- q2ts %>%
model(RW(Data ~ drift())) %>%
forecast(h = 4) %>%
autoplot(q2ts) +
labs(title = "Forecast of Data using Drift Method")
#Plotting the forecast for each method individually, it appears that drift will be the most reliable.
q2ts %>% ACF(Data, lag_max = 10) %>%
autoplot() +
labs(title = "ACF Plot")
#The plot of my residuals implies that my model is missing a trend component and that my residuals do not resemble white noise. It is therefore not reasonable to assume that my residuals follow a normal distribution.
q2_models <- q2ts %>%
model(MEAN(Data),
NAIVE(Data),
RW(Data ~ drift()))
accuracy(q2_models)
## # A tibble: 3 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 MEAN(Data) Train… 2.00e-14 1563. 1378. -24.0 49.1 3.02 2.75 0.953
## 2 NAIVE(Data) Train… 9.11e+ 1 208. 151. 2.81 4.33 0.331 0.365 0.134
## 3 RW(Data ~ drift()) Train… 3.54e-14 187. 140. -0.165 4.17 0.307 0.329 0.134
#Both RMSE and MAPE are lowest for the drift, showing that drift is the best method.
fitted <- q2ts%>%
model(RW(Data ~ drift()))
simulate <- fitted %>% generate(h=4,times=5, bootstrap = TRUE)
Bootstrapped <- fitted %>% forecast(h = 4, bootstrap = TRUE, times=5000)
autoplot(Bootstrapped, q2ts) +
labs(title = "Drift Method Forecast with Bootstrapped Values")
#I bootstrapped the drift method since I saw that the residuals are likely not normally distributed when I did the ACF.
Bootstrapped %>%
hilo(80)
## # A tsibble: 4 x 5 [1Q]
## # Key: .model [1]
## .model Quarter Data .mean `80%`
## <chr> <qtr> <dist> <dbl> <hilo>
## 1 RW(Data ~ drift()) 1995 Q1 sample[5000] 5542. [5336.582, 5753.722]80
## 2 RW(Data ~ drift()) 1995 Q2 sample[5000] 5584. [5284.287, 5937.983]80
## 3 RW(Data ~ drift()) 1995 Q3 sample[5000] 5634. [5248.431, 6066.939]80
## 4 RW(Data ~ drift()) 1995 Q4 sample[5000] 5673. [5222.351, 6151.907]80
#Point forecasts with 80% prediction intervals.