library(tidyquant)
library(tidyverse)
library(fpp3)
library(moments)
library(tsibble)
library(tsibbledata)
library(ggfortify)
library(ggplot2)
library(readxl)
library(dplyr)
library(fabletools)
library(fable)
library(slider)
library(zoo)
library(latex2exp)
library(forecast)
library(seasonal)
#Problem 1
global_economy %>%
filter(Country == "United States") %>%
autoplot(GDP)
#This graph shows the annual GDP of the United States from 1960 to present day.
global_economy %>%
filter(Country == "United States") %>%
autoplot(GDP/Population)
#This graph shows GDP per capita of the United States from 1960 to present day.
BBS <- aus_livestock %>%
filter(State == "Victoria", Animal == "Bulls, bullocks and steers")
BBS %>% autoplot(log(Count)) +
labs(y = "Count", title = "Bulls, Bullocks, and Steers slaughted in Victoria")
vic_elec %>%
autoplot(Demand)
#This graph shows the demand for electricity in Australia by date.
lambda <- vic_elec %>%
features(Demand, features = guerrero) %>%
pull(lambda_guerrero)
vic_elec %>%
autoplot(box_cox(Demand, lambda))
#This graph shows uses a Box-Cox transformation that reduces the distance between the peaks and troughs of the data.
aus_production %>%
autoplot(Gas)
#This graph shows the quarterly gas production in Australia.
lambda <- aus_production %>%
features(Gas, features = guerrero) %>%
pull(lambda_guerrero)
aus_production %>%
autoplot(box_cox(Gas, lambda)) +
labs(y = "",
title = latex2exp::TeX(paste0(
"Transformed gas production with $\\lambda$ = ",
round(lambda,2))))
#This graph shows us the quarterly gas production in Austrailia, transformed by lamda 0.12. This transformation makes potential forecasts simplier by roughly equalizing the seasonal variation.
#Problem 2
canadian_gas %>%
autoplot(Volume)
#This graph shows the monthly volume of gas production in Canada.
lambda <- canadian_gas %>%
features(Volume, features = guerrero) %>%
pull(lambda_guerrero)
canadian_gas %>%
autoplot(box_cox(Volume, lambda)) +
labs(y = "",
title = latex2exp::TeX(paste0(
"Transformed gas production with $\\lambda$ = ",
round(lambda,2))))
#Quarterly Canadian gas production with a Box-Cox transformation.
#The Box-Cox transformation is not useful for the volume of Canadian gas because the variance isn’t constantly going up, instead it has increases and decreases in it variance, making the Box-Cox not an ideal transformation.
#Problem 3
aus_production %>%
autoplot(Tobacco)
## Warning: Removed 24 row(s) containing missing values (geom_path).
#Quarterly tobacco production in Australia
lambda <- aus_production %>%
features(Tobacco, features = guerrero) %>%
pull(lambda_guerrero)
aus_production %>%
autoplot(box_cox(Tobacco, lambda)) +
labs(y = "",
title = latex2exp::TeX(paste0(
"Transformed Tobacco production with $\\lambda$ = ",
round(lambda,2))))
## Warning: Removed 24 row(s) containing missing values (geom_path).
#Box-Cox transformed tobacco production (quarterly) in Australia
mel_syd <- ansett %>%
filter(Class == "Economy", Airports == "MEL-SYD")
lambda <- mel_syd %>%
features(Passengers, features = guerrero) %>%
pull(lambda_guerrero)
mel_syd %>%
autoplot(box_cox(Passengers/1e2, lambda)) +
labs(y = "",
title = latex2exp::TeX(paste0(
"Transformed Economy Passengers with $\\lambda$ = ",
round(lambda,2))))
#Box-Cox transformed number of economy class passengers on airlines going from Melbourne to Sydeney airports in Australia.
southern_cross <- pedestrian %>%
filter(Sensor == "Southern Cross Station")
southern_cross %>%
autoplot(log(Count))
#Logged pedestrian counts at Southern Cross Station
#Problem 4
# 1/3(
# 1/5(y[t-3] + y[t-2] + y[t-1] + y[t] + y[t+1]) +
# 1/5(y[t-2] + y[t-1] + y[t] + y[t+1] + y[t+2]) +
# 1/5(y[t-1] + y[t] + y[t+1] + y[t+2] + y[t+3]) +
# )
# =
# 1/3((1/5)*y[t-3] + (2/5)*y[t-2] + (3/5)*y[t-1] + (3/5)*y[t] + (3/5)*y[t+1] + (3/5)*y[t+2] + (1/5)*y[t+3])
# =
# 1/3(0.2*y[t-3] + 0.4*y[t-2] + 0.6*y[t-1] + 0.6*y[t] + 0.6*y[t+1] + 0.6*y[t+2] + 0.2*y[t+3])
# 0.067y[t-3], 0.133*y[t-2], 0.2*y[t-1], 0.2*y[t], 0.2*y[t+1], 0.2*y[t+2], 0.067*y[t+3]
# weights = 0.067, 0.133, 0.2, 0.2, 0.2, 0.133, 0.067
# count = 7 == 7-term weighted moving average
#Problem 5
gas <- tail(aus_production, 5*4) %>% select(Gas)
autoplot(gas)
## Plot variable not specified, automatically selected `.vars = Gas`
#Australian gas production #Gas production is trending upwards and shows seasonality. Production is at its lowest in January and peaks in the summer months.
gas %>%
model(classical_decomposition(Gas, type = "multiplicative")) %>%
components() %>% autoplot() +
labs(title = "Classical Decomp of Australiain Gas Production")
## Warning: Removed 2 row(s) containing missing values (geom_path).
#The classical decomposition supports the conclusions made from the original plot. The data trends upwards and there is definetly seasonality.
gas %>%
model(classical_decomposition(Gas, type = "multiplicative")) %>%
components %>%
select("Quarter", season_adjust) %>%
autoplot(season_adjust) + xlab("Quarter") +
ggtitle("Seasonally Adjusted Gas Production")
#Shows us the seasonally adjusted data for the last five years of gas production in Australia
outlier_gas <- gas
outlier_gas$Gas[10] <- outlier_gas$Gas[10]+300
outlier_gas %>%
model(classical_decomposition(Gas,type = "multiplicative")) %>%
components() %>%
autoplot() +
ggtitle("Last Five Years of The Gas Data with 300 added to the 10th observation")
## Warning: Removed 2 row(s) containing missing values (geom_path).
#Adding the outlier to the tenth observation caused the trough in Q3 of 2008 to become a peak. Seasonal variation was left relatively uneffected.
outlier_gas2 <- gas
outlier_gas2$Gas[20] <- outlier_gas2$Gas[10]+300
outlier_gas2 %>%
model(classical_decomposition(Gas,type = "multiplicative")) %>%
components() %>%
autoplot() +
ggtitle("Last Five Years of The Gas Data with 300 added to the last observation")
## Warning: Removed 2 row(s) containing missing values (geom_path).
#Adding 300 to the last entry causes the overall trend look like its about to continue soaring up like an economic boom.
#Problem 6
set.seed(987654321)
myseries <- aus_retail %>%
filter(`Series ID` == sample(aus_retail$`Series ID`,1))
x11_dcmp <- myseries %>%
model(x11 = X_13ARIMA_SEATS(Turnover ~ x11())) %>%
components()
autoplot(x11_dcmp) +
labs(title =
"Decomposition of total AUS retail Turnover using X-11.")
#There is a really weird portion where the overall trend is increasing and the seasonality effect is decreasing during the early-mid 2000s. Perhaps retailers struggled to get new employees during those seasons, meaning a higher proportiion of retail workers were people who had kept their jobs for a long time, decreasing the seasonal impact?
#Problem 7
#The overall trend of the civlian labor force in Australia is increasing. However, the variation in the seasonality is getting more extreme over time, meaning the peaks are getting higher, but the troughs are getting lower. The seasonality hits two peaks: the highest being in December and another in March. There are troughs in January and August.
#The recession of 1991-92 is very visible. The graph of the remainder shows a huge downwards spike at that time.
#Problem 8
autoplot(canadian_gas)
## Plot variable not specified, automatically selected `.vars = Volume`
#The seasonality seems to be all over the place.
gg_subseries(canadian_gas)
## Plot variable not specified, automatically selected `y = Volume`
#Even though the seasonality is changing, the mean is remarkably consistent.
gg_season(canadian_gas)
## Plot variable not specified, automatically selected `y = Volume`
#This plot shows us that there used to be much more consistentancy, but over time there have arisen an increasing number of seasonal patterns.
canadian_gas_stl <- canadian_gas %>%
model(STL(Volume ~ season(window=9), robust=TRUE)) %>%
components()
autoplot(canadian_gas_stl) +
labs(title = "STL decomposition: Canadian Gas Production")
#Seasonality changed by increasing during the early-mid 80s and decreasing from the late 80s until the mid 90s, where it has now remained relatively stable since then.
canadian_gas_stl %>%
select(Month, season_adjust) %>%
autoplot(season_adjust) +
xlab("Month") + ylab("Seasonaly Adj. Production Volume")
canadian_gas_x11 <- canadian_gas %>%
model(x11 = feasts:::X11(Volume)) %>%
components()
## Warning: `X11()` was deprecated in feasts 0.2.0.
## Please use `X_13ARIMA_SEATS()` instead.
## You can specify the X-11 decomposition method by including x11() in the model formula of `X_13ARIMA_SEATS()`.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.
canadian_gas_x11 %>%
autoplot()
#x11 decomposition of canadian gas production #Seasonality appears to have more variation with an x11 decompositon than in the stl decomposition.
canadian_gas_seats <- canadian_gas %>%
model(x11 = feasts:::SEATS(Volume)) %>%
components()
## Warning: `SEATS()` was deprecated in feasts 0.2.0.
## Please use `X_13ARIMA_SEATS()` instead.
## You can specify the SEATS decomposition method by including seats() in the model formula of `X_13ARIMA_SEATS()`.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.
canadian_gas_seats %>%
autoplot()
#Decomposition of Canadian gas production using SEATS #Seasonality is much greater in the 1960s and 70s.
#Problem 9
liquor<-aus_retail%>%
filter(Industry == "Liquor retailing" & year(Month)>= 2000)%>%
summarise(Turnover = sum(Turnover))
ggplot(data= liquor)+
geom_line(aes(x=Month, y=Turnover))
#The best moving average to use for this data would be a 12 period moving average. That would show us the moving average across years.
liquor_ma <-liquor%>%
mutate(`12-MA` = slider::slide_dbl(Turnover, mean,
.before = 5, .after = 6, .complete = TRUE))%>%
mutate(`2x12-MA` = slider::slide_dbl(`12-MA`, mean,
.before = 1, .after = 0, .complete = TRUE))
liquor_ma%>%
autoplot(Turnover) +
geom_line(aes(y =`2x12-MA`), colour = "#0072B2") +
labs(y = "Liquor retailing Turnover",
title = "Total AUS Liquor retailing Turnover")+
guides(colour = guide_legend(title = "series"))
## Warning: Removed 12 row(s) containing missing values (geom_path).
#Problem 10
#There are two forms of classical decomposition: additive and multiplicative, and we assume that the seasonal is constant from year to year, meaning we don’t measure a different amount of time periods across years.
#ADDITIVE: If seasonal period (m) is even we compute the trend cycle (Tt) using the formula 2*m - Moving Average (MA). If the seasonal period is odd, compute the trend cycle with m - MA. We then calculate a detrended series using the formula observation (yt) - Tt. Then we take the average of the detrended values, which is equal to 0. Then we replicate that sequence for each year of data, giving us St. We then calculate the remainder (Rt) subtracting Tt and St from yt.
#MULTIPLICATIVE: If seasonal period (m) is even we compute the trend cycle (Tt) using the formula 2m - Moving Average (MA). If the seasonal period is odd, compute the trend cycle with m - MA. We can then calculate the detrended series using the formula yt/Tt. We then take the average of the detrended series to find the seasonal component. The average of the seasonal components should be equal to 1 and is obtained by replicating the sequence for each year of data. Finally, the remainder (Rt) is calculating with the formula yt/(TtSt)