library(fpp3)
## ── Attaching packages ──────────────────────────────────────────── fpp3 0.4.0 ──
## ✔ tibble 3.1.8 ✔ tsibble 1.1.2
## ✔ dplyr 1.0.9 ✔ tsibbledata 0.4.0
## ✔ tidyr 1.2.0 ✔ feasts 0.2.2
## ✔ lubridate 1.8.0 ✔ fable 0.3.1
## ✔ ggplot2 3.3.6
## ── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ──
## ✖ lubridate::date() masks base::date()
## ✖ dplyr::filter() masks stats::filter()
## ✖ tsibble::intersect() masks base::intersect()
## ✖ tsibble::interval() masks lubridate::interval()
## ✖ dplyr::lag() masks stats::lag()
## ✖ tsibble::setdiff() masks base::setdiff()
## ✖ tsibble::union() masks base::union()
library(readxl)
library(ggplot2)
library(tsibble)
library(lubridate)
library(tidyverse)
## ── Attaching packages
## ───────────────────────────────────────
## tidyverse 1.3.2 ──
## ✔ readr 2.1.2 ✔ stringr 1.4.1
## ✔ purrr 0.3.4 ✔ forcats 0.5.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ lubridate::as.difftime() masks base::as.difftime()
## ✖ lubridate::date() masks base::date()
## ✖ dplyr::filter() masks stats::filter()
## ✖ tsibble::intersect() masks lubridate::intersect(), base::intersect()
## ✖ tsibble::interval() masks lubridate::interval()
## ✖ dplyr::lag() masks stats::lag()
## ✖ tsibble::setdiff() masks lubridate::setdiff(), base::setdiff()
## ✖ tsibble::union() masks lubridate::union(), base::union()
library(dplyr)
library(seasonal)
##
## Attaching package: 'seasonal'
##
## The following object is masked from 'package:tibble':
##
## view
library(zoo)
##
## Attaching package: 'zoo'
##
## The following object is masked from 'package:tsibble':
##
## index
##
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(tidyr)
library(tibble)
library(ggfortify)
library(moments)
library(fable)
library(brew)
library(sos)
##
## Attaching package: 'sos'
##
## The following object is masked from 'package:tidyr':
##
## matches
##
## The following object is masked from 'package:dplyr':
##
## matches
##
## The following object is masked from 'package:utils':
##
## ?
library(slider)
library(x13binary)
library(USgas)
library(GGally)
## Registered S3 method overwritten by 'GGally':
## method from
## +.gg ggplot2
1.1
#import
WA_data <- read_excel("C:/Users/natha/Desktop/Forecasting in R/WA_data.xlsx")
#I changed the excel sheet so that it would be easier in the coding
#organizing into tsibble
WA_1 <- WA_data %>%
select(-GeoFips, -GeoName, -LineCode)%>%
pivot_longer(!Description, names_to = "Year", values_to = "count")%>%
pivot_wider(names_from = Description, values_from = count)
head(WA_1)
## # A tibble: 6 × 7
## Year Dollar GDP Income Consumption Employment Total_Employment
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1999 NA 9.6 6.2 7.5 NA 1.9
## 2 2000 NA 3.3 6.9 7.8 NA 2.3
## 3 2001 NA 0.4 2.4 3.1 NA -0.3
## 4 2002 NA 3.2 1.5 3.7 NA -0.8
## 5 2003 NA 3.8 4 5.7 NA 0.8
## 6 2004 NA 5.1 7.5 6.7 NA 2
WA_2 <- WA_1%>%
select(-Employment, -Dollar)
WA_2$Year = as.numeric(as.character(WA_2$Year))
head(WA_2)
## # A tibble: 6 × 5
## Year GDP Income Consumption Total_Employment
## <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1999 9.6 6.2 7.5 1.9
## 2 2000 3.3 6.9 7.8 2.3
## 3 2001 0.4 2.4 3.1 -0.3
## 4 2002 3.2 1.5 3.7 -0.8
## 5 2003 3.8 4 5.7 0.8
## 6 2004 5.1 7.5 6.7 2
WA_stib <- WA_2 %>%
as_tsibble(index = Year)
head(WA_stib)
## # A tsibble: 6 x 5 [1Y]
## Year GDP Income Consumption Total_Employment
## <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1999 9.6 6.2 7.5 1.9
## 2 2000 3.3 6.9 7.8 2.3
## 3 2001 0.4 2.4 3.1 -0.3
## 4 2002 3.2 1.5 3.7 -0.8
## 5 2003 3.8 4 5.7 0.8
## 6 2004 5.1 7.5 6.7 2
1.2
#Plotting all variables
WA_stib %>%
ggplot(aes(x = Year)) +
geom_line(aes(y = GDP, color = "GDP")) +
geom_line(aes(y = Income, color = "Income")) +
geom_line(aes(y = Consumption, color = "Consumption")) +
geom_line(aes(y = Total_Employment, color = "Employment")) +
labs(y = "% Change", title = "Consumption, GPD, Income, and Employment") +
scale_colour_manual(values = c(GDP = "orange", Income = "dark green", Consumption = "black", Employment = "blue")) +
guides(colour = guide_legend(title = NULL))
#plotting ggpair with WA_2, it's the tibble version of WA_stib
ggpairs(WA_2)
ggpairs(WA_2 %>%
select(-Year))
1.3: The interpretation here is based off the ggpair graph that excludes the Year variable. GDP is positively correlated with every variable. GDP is correlated with consumption and employment at the 99.9% significance level,and is correlated at the 99% significants level for income. This is not surprising because income, employment, and consumption all contribute to GDP. Income is positively correlated with consumption at the 95% level and employment at the 99% level. This makes reasonable because as people make more, the will spend more. Most people earn income through a job, so these two variables being correlated makes since as well. Consumption is positively correlated to employment at the 99.9% level. This makes sense because as employment increases, income increases, which means people have money to increase their consumption.
1.4
#model for estimating the relationship between variables
WA_reg <- WA_stib %>%
model(TSLM(Consumption ~ Income + Total_Employment + GDP))
report(WA_reg)
## Series: Consumption
## Model: TSLM
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.0978 -1.1434 -0.3197 0.7035 4.3069
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.23047 0.92958 2.399 0.0268 *
## Income -0.05339 0.16009 -0.334 0.7424
## Total_Employment 0.78957 0.28523 2.768 0.0122 *
## GDP 0.38749 0.19365 2.001 0.0599 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.7 on 19 degrees of freedom
## Multiple R-squared: 0.6848, Adjusted R-squared: 0.635
## F-statistic: 13.76 on 3 and 19 DF, p-value: 5.264e-05
1.5: Of the explanatory variables, only employment is significant at the 95% level. As income increases by 1%, consumption decreases by 0.053%. As GDP increases by 1%, consumption increases by .387%. These values are not statistically significant. As employment increases by 1%, consumption increases by .79%. The intercept is statistically significant at the 95% level and means that consumption has a 2.23% change rate if income, GDP, and employment remain constant.
1.6
#Plot the fitted and actual values for consumption.
augment(WA_reg) %>%
ggplot(aes(x = Year)) +
geom_line(aes(y = Consumption, color = "Data")) +
geom_line(aes(y = .fitted, color = "Fitted")) +
labs(y = "% Change", title = "Consumption in Washington") +
scale_colour_manual(values = c(Data = "black", Fitted = "red")) +
guides(colour = guide_legend(title = NULL))
1.7
#Scenario 1, constant growth
WA_fit <- WA_stib %>%
model(lm = TSLM(Consumption ~ GDP + Income + Total_Employment))
WA_scenario1 <- scenarios(
Increase = new_data(WA_stib, 3) %>%
mutate(GDP = 1, Income = 1, Total_Employment = 1),
names_to = "Scenario")
WA_Forescene1 <- forecast(WA_fit, new_data = WA_scenario1)
WA_stib %>%
autoplot(Consumption) +
autolayer(WA_Forescene1) +
labs(title = "Consumption Forecast Assuming a 1% Constant Growth", y = "% Change")
#Scenario 2, Income changes
WA_scenari02 <- scenarios(
Increase_1 = new_data(WA_stib, 3) %>%
mutate(Income = 1, GDP = 0, Total_Employment = 0),
Decrease_0.5 = new_data(WA_stib, 3)%>%
mutate(Income = -0.5, GDP = 0, Total_Employment = 0),
Increase_0.8 = new_data(WA_stib, 3)%>%
mutate(Income = 0.8, GDP = 0, Total_Employment = 0),
names_to = "scenario")
forecast2 <- forecast(WA_fit, new_data = WA_scenari02)
WA_stib %>%
autoplot(Consumption) + autolayer(forecast2)+labs("Washington Consumption", y = "% Change")
WA_Forescene1
## # A fable: 3 x 8 [1Y]
## # Key: Scenario, .model [1]
## Scenario .model Year Consumption .mean GDP Income Total_Employment
## <chr> <chr> <dbl> <dist> <dbl> <dbl> <dbl> <dbl>
## 1 Increase lm 2022 N(3.4, 3.7) 3.35 1 1 1
## 2 Increase lm 2023 N(3.4, 3.7) 3.35 1 1 1
## 3 Increase lm 2024 N(3.4, 3.7) 3.35 1 1 1
forecast2
## # A fable: 9 x 8 [1Y]
## # Key: scenario, .model [3]
## scenario .model Year Consumption .mean Income GDP Total_Employment
## <chr> <chr> <dbl> <dist> <dbl> <dbl> <dbl> <dbl>
## 1 Increase_1 lm 2022 N(2.2, 3.6) 2.18 1 0 0
## 2 Increase_1 lm 2023 N(2.2, 3.6) 2.18 1 0 0
## 3 Increase_1 lm 2024 N(2.2, 3.6) 2.18 1 0 0
## 4 Decrease_0.5 lm 2022 N(2.3, 3.8) 2.26 -0.5 0 0
## 5 Decrease_0.5 lm 2023 N(2.3, 3.8) 2.26 -0.5 0 0
## 6 Decrease_0.5 lm 2024 N(2.3, 3.8) 2.26 -0.5 0 0
## 7 Increase_0.8 lm 2022 N(2.2, 3.7) 2.19 0.8 0 0
## 8 Increase_0.8 lm 2023 N(2.2, 3.7) 2.19 0.8 0 0
## 9 Increase_0.8 lm 2024 N(2.2, 3.7) 2.19 0.8 0 0
1.8: The point forecast for scenario 1 is 3.3541% with a prediction interval of 3.4% to 3.7%. The point forecast when income increases by 1% is 2.18% with a PI of 2.2% to 3.6%. When income is increased by 0.08%, the point forecast is 2.19% with a PI of 2.2% to 3.7%. The point forecast when income is decreasing by 0.5% is 2.26% with a PI of 2.3% to 3.8%. The point forecast for the decreasing income being the highest of the three makes sense because the model produced a negative beta for income.
#The majority of cleaning was preformed in excel
M3_ns <- read_excel("C:/Users/natha/Desktop/Forecasting in R/M3_exam1_transformed.xlsx")
head(M3_ns)
## # A tibble: 6 × 2
## Year Data
## <chr> <dbl>
## 1 1983 Q4 3681.
## 2 1984 Q1 3686.
## 3 1984 Q2 3404.
## 4 1984 Q3 2700.
## 5 1984 Q4 3165.
## 6 1985 Q1 3233.
M3_ns <- M3_ns%>%
mutate(Period = yearquarter(Year))%>%
select(-Year)
head(M3_ns)
## # A tibble: 6 × 2
## Data Period
## <dbl> <qtr>
## 1 3681. 1983 Q4
## 2 3686. 1984 Q1
## 3 3404. 1984 Q2
## 4 2700. 1984 Q3
## 5 3165. 1984 Q4
## 6 3233. 1985 Q1
M3_tsib <- M3_ns%>%
as_tsibble(index = Period)
head(M3_tsib)
## # A tsibble: 6 x 2 [1Q]
## Data Period
## <dbl> <qtr>
## 1 3681. 1983 Q4
## 2 3686. 1984 Q1
## 3 3404. 1984 Q2
## 4 2700. 1984 Q3
## 5 3165. 1984 Q4
## 6 3233. 1985 Q1
M3_tsib %>%
ggplot(aes(x = Period)) +
geom_line(aes(y = Data)) +
labs(title = "Nathan's Data", y = "Data Index")
ns_add <- M3_tsib %>%
model(classical_decomposition(Data, type = "additive")) %>%
components() %>%
autoplot() +
labs(title = "Classical Additive Decomposition")
ns_add
## Warning: Removed 2 row(s) containing missing values (geom_path).
ns_multi <- M3_tsib %>%
model(classical_decomposition(Data, type = "additive")) %>%
components() %>%
autoplot() +
labs(title = "Classical Additive Decomposition")
ns_multi
## Warning: Removed 2 row(s) containing missing values (geom_path).
ns_stl <- M3_tsib %>%
model(STL(Data ~ season(window = 4), robust = TRUE)) %>%
components() %>%
autoplot() +
labs(title = "STL Decomposition")
ns_stl
M3_tsib %>%
features(Data, features = guerrero)
## # A tibble: 1 × 1
## lambda_guerrero
## <dbl>
## 1 0.415
lamda <- 0.415
M3_tsib %>%
autoplot(box_cox(Data, lamda)) +
labs(title = "Box-Cox Transformation", x = "Date")
Interpretation of transformations: Every transformation I performed
above did not show any significant features in the data. There is a
relatively flat trend in all the increases in the later part of the
data. There is a seasonal component to the data.
ns_models <- M3_tsib %>%
model(MEAN(Data),
NAIVE(Data),
RW(Data ~ drift()))
accuracy(ns_models)
## # A tibble: 3 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 MEAN(Data) Train… -4.04e-14 642. 478. -3.61 14.7 1.04 1.12 0.546
## 2 NAIVE(Data) Train… 7.26e- 1 617. 516. -1.86 15.9 1.13 1.07 -0.118
## 3 RW(Data ~ drift()) Train… 1.45e-13 617. 516. -1.88 15.9 1.13 1.07 -0.118
ns_models
## # A mable: 1 x 3
## `MEAN(Data)` `NAIVE(Data)` `RW(Data ~ drift())`
## <model> <model> <model>
## 1 <MEAN> <NAIVE> <RW w/ drift>
The drift method has a slightly lower RMSE value which means if is the best to forecast to use.
driftfc_ns <- M3_tsib %>%
model(RW(Data ~ drift())) %>%
forecast(h = 4) %>%
autoplot(M3_tsib) +
labs(title = "Drift Forecast")
driftfc_ns