library(fpp3)
## ── Attaching packages ──────────────────────────────────────────── fpp3 0.4.0 ──
## ✔ tibble 3.1.8 ✔ tsibble 1.1.2
## ✔ dplyr 1.0.9 ✔ tsibbledata 0.4.0
## ✔ tidyr 1.2.0 ✔ feasts 0.2.2
## ✔ lubridate 1.8.0 ✔ fable 0.3.1
## ✔ ggplot2 3.3.6
## ── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ──
## ✖ lubridate::date() masks base::date()
## ✖ dplyr::filter() masks stats::filter()
## ✖ tsibble::intersect() masks base::intersect()
## ✖ tsibble::interval() masks lubridate::interval()
## ✖ dplyr::lag() masks stats::lag()
## ✖ tsibble::setdiff() masks base::setdiff()
## ✖ tsibble::union() masks base::union()
library(readxl)
library(ggplot2)
library(tsibble)
library(lubridate)
library(tidyverse)
## ── Attaching packages
## ───────────────────────────────────────
## tidyverse 1.3.2 ──
## ✔ readr 2.1.2 ✔ stringr 1.4.1
## ✔ purrr 0.3.4 ✔ forcats 0.5.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ lubridate::as.difftime() masks base::as.difftime()
## ✖ lubridate::date() masks base::date()
## ✖ dplyr::filter() masks stats::filter()
## ✖ tsibble::intersect() masks lubridate::intersect(), base::intersect()
## ✖ tsibble::interval() masks lubridate::interval()
## ✖ dplyr::lag() masks stats::lag()
## ✖ tsibble::setdiff() masks lubridate::setdiff(), base::setdiff()
## ✖ tsibble::union() masks lubridate::union(), base::union()
library(dplyr)
library(seasonal)
##
## Attaching package: 'seasonal'
##
## The following object is masked from 'package:tibble':
##
## view
#Q1: 1.
global_economy %>%
filter(Country == "United States") %>%
autoplot(GDP / 10 ^ 12) +
labs(title= "US GPD", y = "$USD in Trillion")
The graph has an obvious upward trend with no seasonality. since the data does not follow a seasonal pattern, I found unnecessary to transform the data using a Box-Cox transformation.
#———————————————————————————————————————————-
Q1: 2.
aus_livestock %>%
filter(Animal == "Bulls, bullocks and steers",
State == "Queensland") %>%
autoplot(Count) +
labs(title= "Slaughter of Victoria Bulls, Bullocks, and Steers")
This is the untransformed version of the data. I narrowed the state to just Queensland to make the interpretation easier. There is an overall positive trend but is very seasonal. Since the seasonal component does not seem to fluctuate with the trend, I decided to use a additive classical decomposition seen below.
livestockdecomp <- aus_livestock %>%
filter(Animal == "Bulls, bullocks and steers",
State == "Queensland") %>%
model(classical_decomposition(Count, type = "additive"))
components(livestockdecomp) %>%
autoplot() +
labs(title = "Classical Multiplicative Decomposition of Austrialian Livestock")
## Warning: Removed 6 row(s) containing missing values (geom_path).
The decomposition shows the a more obvious trend in the data with seasonal component showing the month to month variation in the data.
#———————————————————————————————————————————-
Q1: 3.
elect <- vic_elec %>%
group_by(Date) %>%
mutate(Demand = sum(Demand)) %>%
distinct(Date, Demand)
#This changes the data from from every 30 minutes to daily
elect %>%
as_tsibble(index = Date) %>%
autoplot(Demand) +
labs(title= "Daily Victorian Electricity Demand", y = "$US (in trillions)")
elect %>%
mutate(Date = yearmonth(Date)) %>%
group_by(Date) %>%
summarise(Demand = sum(Demand)) %>%
as_tsibble(index = Date) %>%
autoplot(Demand) +
labs(title= "Monthly Victorian Electricity Demand", y = "$US (in trillions)")
#This put the data into yearly
Q1: 4.
aus_production %>%
autoplot(Gas) +
labs(title = "Non-Transformed Gas Production")
This graph’s seasonal variation changes in magnitute across time. The Box-Cox transformation, shown below, fixes this so that the seasonal variation is relatively constant across time.
lambda <- aus_production %>%
features(Gas, features = guerrero) %>%
pull(lambda_guerrero)
aus_production %>%
autoplot(box_cox(Gas, lambda)) +
labs(y = "",
title = "Transformed gas production",
round(lambda,2))
#———————————————————————————————————————
canadian_gas %>%
autoplot(Volume) +
labs(title = "Pre Box Cox")
lambda <- canadian_gas %>%
features(Volume, features = guerrero) %>%
pull(lambda_guerrero)
canadian_gas %>%
autoplot(box_cox(Volume, lambda)) +
labs(y = "", title = "Box Cox",
round(lambda,2))
The two graphs look almost identical to one another dispite the Box-Cox transformation’s attempt at standardizing the seasonality of the data. This is likely due to the large variation in the data between 1978 and 1988.
#———————————————————————————————————————
autoplot(aus_production, Tobacco)+
labs(title = "Tobacco and Cigarette Production (Metric Tons)")
## Warning: Removed 24 row(s) containing missing values (geom_path).
lambda <- aus_production %>%
features(Tobacco, features = guerrero) %>%
pull(lambda_guerrero)
aus_production %>%
autoplot(box_cox(Tobacco, lambda)) +
labs(y = "", title = "Transformed Tobacco Production (Metric Tons)",
round(lambda,2))
## Warning: Removed 24 row(s) containing missing values (geom_path).
southern_cross <- pedestrian %>%
filter(Sensor == "Southern Cross Station")
autoplot(southern_cross, Count) +
labs(title = "Hourly Pedestrian Counts at Southern Cross Station")
lambda <- southern_cross %>%
features(Count, features = guerrero) %>%
pull(lambda_guerrero)
southern_cross %>%
autoplot(box_cox(Count, lambda)) +
labs(y = "", title = "Transformed Hourly Pedestrian Counts",
round(lambda,2))
southern_cross <- southern_cross %>%
index_by(Date) %>%
summarise(Count = sum(Count))
autoplot(southern_cross, Count)+
labs(title = "Daily Pedestrian Counts at Southern Cross Station")
lambda <- southern_cross %>%
features(Count, features = guerrero) %>%
pull(lambda_guerrero)
southern_cross %>%
autoplot(box_cox(Count, lambda)) +
labs(y = "", title = "Transformed Daily Pedestrian Counts with $\\lambda$ = ",
round(lambda,2))
southern_cross <- southern_cross %>%
mutate(Week = yearweek(Date)) %>%
index_by(Week) %>%
summarise(Count = sum(Count))
autoplot(southern_cross, Count)+
labs(title = "Weekly Pedestrian Counts at Southern Cross Station")
lambda <- southern_cross %>%
features(Count, features = guerrero) %>%
pull(lambda_guerrero)
southern_cross %>%
autoplot(box_cox(Count, lambda)) +
labs(y = "", title = "Transformed Weekly Pedestrian Counts",
round(lambda,2))
#———————————————————————————————————————
3x5 MA = (((X1+X2+X3+X4+X5)/5)+((X2+X3+X4+X5+X6)/5)+((X3+X4+X5+X6+X7)/5)) Combine like terms 3x5 MA = (1/15)(X1+2X2+3X3+3X4+3X5+2X6+X7) Distribute (1/15) to coefficients 1/15 = 0.067 2/15 = 0.133 3/15 = 0.2 3/15 = 0.2 3/15 = 0.2 2/15 = 0.133 1/15 = 0.067
gas <- tail(aus_production, 5*4) %>% select(Gas)
gas%>%
autoplot(Gas)
There is a positive trend with a yearly cycle. The seasonality is
quarterly with an increase in Q1 and Q2 and a decrease in Q3 and Q4.
gas_dcmp <- gas %>%
model(classical_decomposition(Gas, type = "multiplicative"))
components(gas_dcmp) %>%
autoplot() +
labs(title = "Classical Multiplicative Decomposition of Gas Production")
## Warning: Removed 2 row(s) containing missing values (geom_path).
3. The results of the classical decomposition match the previous
interpretation of the orginal graph.
components(gas_dcmp) %>%
as_tsibble() %>%
autoplot(Gas, colour = "gray") +
geom_line(aes(y=season_adjust), colour = "#0072B2") +
labs(title = "Seasonally Adjusted Gas Production")
The seasonal adjustment shows the general trend and the cyclic component
of the data. This is why the blue line does not match the trend line
from the decomposition.
gas %>%
mutate(Gas = ifelse(Gas == 229, Gas + 300, Gas)) %>%
model(classical_decomposition(Gas, type = "multiplicative")) %>%
components() %>%
as_tsibble() %>%
autoplot(Gas, colour = "gray") +
geom_line(aes(y=season_adjust), colour = "#0072B2") +
labs(title = "Seasonally Adjusted Gas Production with an Outlier")
The outlier caused a spike in graph in Q2 of 2008. The seasonally adjusted graph did not increase as significantly because it is a residual.
gas %>%
mutate(Gas = ifelse(Gas == 236, Gas + 300, Gas)) %>%
model(classical_decomposition(Gas, type = "multiplicative")) %>%
components() %>%
as_tsibble() %>%
autoplot(Gas, colour = "gray") +
geom_line(aes(y=season_adjust), colour = "#0072B2") +
labs(title = "Seasonally Adjusted Gas Production with an Outlier at the End")
There is no significant difference between the outlier being in the
middle or at the end. The data graph and seasonally adjusted graphs
increased greatly with the seasonally adjusted graph increasing slightly
less.
set.seed(11082000)
myseries <- aus_retail %>%
filter(`Series ID` == sample(aus_retail$`Series ID`,1))
myseries%>%
autoplot(Turnover) + labs(title = "Retail Turnover")
x11_dcmp <- myseries %>%
model(x11 = X_13ARIMA_SEATS(Turnover ~ x11())) %>%
components()
autoplot(x11_dcmp) +
labs(title = "Decomposition of Retail Turnover using X-11.")
The irregular graph has a few spikes like the one just after January 2000. This irregularity wasn’t previously seen.
autoplot(canadian_gas)
## Plot variable not specified, automatically selected `.vars = Volume`
gg_season(canadian_gas)
## Plot variable not specified, automatically selected `y = Volume`
gg_subseries(canadian_gas)
## Plot variable not specified, automatically selected `y = Volume`
The autoplot graph’s seasonality starts with little variation in the
data but increases in magnitude in 1978 though 1988. The seasonality
from 1988 decreases in magnitude but continues to vary until 2005.
canadian_gas %>%
model( STL(Volume ~ trend(window = 7) +
season(window = "periodic"),
robust = TRUE)) %>%
components() %>%
autoplot()
gg_season(canadian_gas)
## Plot variable not specified, automatically selected `y = Volume`
The seasonal trend increases in every month through time.
dcmp <- canadian_gas %>%
model(STL(Volume))
components(dcmp)
## # A dable: 542 x 7 [1M]
## # Key: .model [1]
## # : Volume = trend + season_year + remainder
## .model Month Volume trend season_year remainder season_adjust
## <chr> <mth> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 STL(Volume) 1960 Jan 1.43 1.08 0.520 -0.172 0.911
## 2 STL(Volume) 1960 Feb 1.31 1.11 0.215 -0.0178 1.09
## 3 STL(Volume) 1960 Mar 1.40 1.13 0.307 -0.0395 1.09
## 4 STL(Volume) 1960 Apr 1.17 1.16 0.0161 -0.00627 1.15
## 5 STL(Volume) 1960 May 1.12 1.18 -0.116 0.0476 1.23
## 6 STL(Volume) 1960 Jun 1.01 1.21 -0.356 0.159 1.37
## 7 STL(Volume) 1960 Jul 0.966 1.23 -0.403 0.136 1.37
## 8 STL(Volume) 1960 Aug 0.977 1.26 -0.349 0.0677 1.33
## 9 STL(Volume) 1960 Sep 1.03 1.28 -0.340 0.0870 1.37
## 10 STL(Volume) 1960 Oct 1.25 1.31 -0.0899 0.0329 1.34
## # … with 532 more rows
canadian_gas %>%
autoplot(Volume, color='gray') +
autolayer(components(dcmp),
season_adjust, color='blue') +
xlab("Month") + ylab("Volume (meters)") + ggtitle("Total Usage")
x11_dcmp <- canadian_gas %>%
model(x11 = feasts:::X11(Volume, type = "additive")) %>%
components()
## Warning: `X11()` was deprecated in feasts 0.2.0.
## Please use `X_13ARIMA_SEATS()` instead.
## You can specify the X-11 decomposition method by including x11() in the model formula of `X_13ARIMA_SEATS()`.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.
autoplot(x11_dcmp) + xlab("Year") + ggtitle("Additive X11 decomposition of Canadian Usage")
seats_dcmp <- canadian_gas %>%
model(seats = feasts:::SEATS(Volume)) %>%
components()
## Warning: `SEATS()` was deprecated in feasts 0.2.0.
## Please use `X_13ARIMA_SEATS()` instead.
## You can specify the SEATS decomposition method by including seats() in the model formula of `X_13ARIMA_SEATS()`.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.
autoplot(seats_dcmp) + xlab("Year") + ggtitle("SEATS decomposition of total Canadian Usage")
The overal trend of the seasonally adjust graph and the additive x 11
graphs are very similar, however, the SEATS trend has more variation in
its trend.
#1.
liquor<-aus_retail%>%
filter(Industry == "Liquor retailing" & year(Month)>= 2000)%>%
summarise(Turnover = sum(Turnover))
ggplot(data= liquor)+
geom_line(aes(x=Month, y=Turnover))
2.Since this is monthly data, m is even(12) which requires a moving average of a moving average. This mean a 2X12 MA would be the best option.
liquor <-liquor%>%
mutate(`12-MA` = slider::slide_dbl(Turnover, mean,
.before = 5, .after = 6, .complete = TRUE))%>%
mutate(`2x12-MA` = slider::slide_dbl(`12-MA`, mean,
.before = 1, .after = 0, .complete = TRUE))
liquor%>%
autoplot(Turnover) +
geom_line(aes(y =`2x12-MA`), colour = "#0072B2") +
labs(y = "Liquor retailing Turnover",
title = "Total AUS Liquor retailing Turnover")+
guides(colour = guide_legend(title = "series"))
## Warning: Removed 12 row(s) containing missing values (geom_path).
Step 1: Compute the trend-cycle component (using moving average if m = odd or moving average of a moving average if m = even) Step 2: Calculate the detrended series (knownobs - trend_component) Step 3: Estimate seasonal component (average detrended values for each season) Step 4: Calculate remainder (remainder = knownobs - trend_Component- Seasonal_Component)