library(tsibble)
##
## Attaching package: 'tsibble'
## The following objects are masked from 'package:base':
##
## intersect, setdiff, union
library(tsibbledata)
library(ggfortify)
## Loading required package: ggplot2
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(USgas)
library(readr)
library(tidyr)
library(readxl)
library(httr)
library(feasts)
## Loading required package: fabletools
library(stats)
library(fpp3)
## -- Attaching packages -------------------------------------------- fpp3 0.4.0 --
## v tibble 3.1.6 v fable 0.3.1
## v lubridate 1.8.0
## -- Conflicts ------------------------------------------------- fpp3_conflicts --
## x lubridate::date() masks base::date()
## x dplyr::filter() masks stats::filter()
## x tsibble::intersect() masks base::intersect()
## x lubridate::interval() masks tsibble::interval()
## x dplyr::lag() masks stats::lag()
## x tsibble::setdiff() masks base::setdiff()
## x tsibble::union() masks base::union()
library(seasonal)
##
## Attaching package: 'seasonal'
## The following object is masked from 'package:tibble':
##
## view
ge = global_economy
ge %>% select(Country,GDP,Population,Year) %>% autoplot(GDP/Population,show.legend=FALSE)
## Warning: Removed 3242 row(s) containing missing values (geom_path).
max_data = ge %>% select(Country,GDP,Population,Year) %>% mutate(GDP_Capita=GDP/Population)
new_max_df = data.frame(max_data$Country,max_data$GDP_Capita)
max_country = new_max_df[complete.cases(new_max_df),] %>% group_by(max_data.Country) %>%summarise(max_gdp=max(max_data.GDP_Capita)) %>% filter(max_gdp==max(max_gdp))
print(max_country)
## # A tibble: 1 x 2
## max_data.Country max_gdp
## <fct> <dbl>
## 1 Monaco 185153.
ge %>% select(Country,GDP,Population,Year) %>% filter(Country==max_country$max_data.Country[[1]]) %>% autoplot(GDP/Population,show.legend=FALSE)
## Warning: Removed 11 row(s) containing missing values (geom_path).
us_gdp = global_economy %>% filter(Country == 'United States') %>% select(GDP)
bull_bullocks_steers = aus_livestock %>% filter(Animal=='Bulls, bullocks and steers') %>% filter(State=='Victoria')
victorian_elec = vic_elec %>% select(Demand)
gas_prod = aus_production %>% select(Gas)
3.2 - Pre Transformation: global_economy
us_gdp %>% autoplot()
## Plot variable not specified, automatically selected `.vars = GDP`
gdplambda = us_gdp %>% features(GDP,features=guerrero) %>% pull(lambda_guerrero)
us_gdp %>%
autoplot(box_cox(GDP, gdplambda))
Due to the constant variance and lack of seasonality in the datset we can apply a transformation to smooth the timeseries.
bull_bullocks_steers %>% autoplot()
## Plot variable not specified, automatically selected `.vars = Count`
lambda = bull_bullocks_steers %>% features(Count,features=guerrero) %>% pull(lambda_guerrero)
bull_bullocks_steers %>%
autoplot(box_cox(Count, lambda))
The box cox transformation on the aus_livestock data set is not applicable due to the nonconstant variance and irregularity of the trends in the dataset.
victorian_elec %>% autoplot()
## Plot variable not specified, automatically selected `.vars = Demand`
vlambda = victorian_elec %>% features(Demand,features=guerrero) %>% pull(lambda_guerrero)
victorian_elec %>%
autoplot(box_cox(Demand, vlambda))
The box-cox transformation on the vic_elec data set is not applicable due to the nonconstant variance and irregularity of the trends in the dataset.
gas_prod %>% autoplot()
## Plot variable not specified, automatically selected `.vars = Gas`
glambda = gas_prod %>% features(Gas,features=guerrero) %>% pull(lambda_guerrero)
gas_prod %>%
autoplot(box_cox(Gas, glambda))
The box cox transformation shows that gas trends spiked earlier than initially represented. Also variability at the end of the dataset was minimal as compared to the non transformed dataset. Applicable as the seasonality and trends appear to be constant.
cg = canadian_gas
non_box_cox = autoplot(cg)
## Plot variable not specified, automatically selected `.vars = Volume`
cglambda = cg %>% features(Volume,features=guerrero) %>% pull(lambda_guerrero)
box_cox = cg %>%
autoplot(box_cox(Volume, cglambda))
non_box_cox
box_cox
Box Cox transformation over fits the transformation. Due to the non constant variance present in this data set you can’t apply the Box Cox transformation to make the data stationary. To clarify further the seasonality trends change overtime and would require a nonlinear dynamic transformation to accurately account for this.
## 3.4
set.seed(12345678)
myretailseries <- aus_retail %>%
filter(`Series ID` == sample(aus_retail$`Series ID`,1))
myretailseries %>% autoplot()
## Plot variable not specified, automatically selected `.vars = Turnover`
arlambda = myretailseries %>% features(Turnover,features=guerrero) %>% pull(lambda_guerrero)
box_cox_retail = myretailseries %>%
autoplot(box_cox(Turnover, arlambda))
box_cox_retail
I would utilize a classic Box-Cox approach, not modified for the possibility of negative values, that utilizes the guerrero feature to auto select the lambda value.
ape = aus_production %>% select(Tobacco)
ape_lambda = ape %>% features(Tobacco,features=guerrero) %>% pull(lambda_guerrero)
ans_ems = ansett %>% filter(Class=='Economy',Airports=='MEL-SYD') %>% select(Passengers)
ans_ems_lambda = ans_ems %>% features(Passengers,features=guerrero) %>% pull(lambda_guerrero)
peds_scs = pedestrian %>% filter(Sensor=='Southern Cross Station') %>% select(Count)
peds_scs_lambda = peds_scs %>% features(Count,features=guerrero) %>% pull(lambda_guerrero)
## Pre Transformation: aus_production
ape %>% autoplot()
## Plot variable not specified, automatically selected `.vars = Tobacco`
## Warning: Removed 24 row(s) containing missing values (geom_path).
## Post Transformation: aus_production
ape %>% autoplot(box_cox(Tobacco, ape_lambda))
## Warning: Removed 24 row(s) containing missing values (geom_path).
## Pre Transformation: ansett
ans_ems %>% autoplot()
## Plot variable not specified, automatically selected `.vars = Passengers`
## Post Transformation: ansett
ans_ems %>% autoplot(box_cox(Passengers, ans_ems_lambda))
## Pre Transformation: pedestrian
peds_scs %>% autoplot()
## Plot variable not specified, automatically selected `.vars = Count`
## Post Transformation: pedestrian
peds_scs %>% autoplot(box_cox(Count, peds_scs_lambda))
gas <- tail(aus_production, 5*4) %>% select(Gas)
autoplot(gas)
## Plot variable not specified, automatically selected `.vars = Gas`
There is an semi-annual cycle where sales are at their lowest point at the beginning of the year, peak at Q2, and decline until to the start of the next year. The chart reflects relatively constant variance through these cycles.
gas %>%
model(
classical_decomposition(Gas, type = "multiplicative")
) %>%
components() %>%
autoplot() +
labs(title = "Classical additive decomposition of total
Gas")
## Warning: Removed 2 row(s) containing missing values (geom_path).
Seasonal fluctuations align with the interpretation from part a. Trend does not and just reflects and upward movement over time.
gas %>%
model(
classical_decomposition(Gas, type = "multiplicative")
) %>%
components() %>% select(season_adjust) %>%
autoplot() +
labs(title = "Seasonally Adjusted Gas")
## Plot variable not specified, automatically selected `.vars = season_adjust`
gas[15,1] = gas[15,1]+300
gas %>%
model(
classical_decomposition(Gas, type = "multiplicative")
) %>%
components() %>% select(season_adjust) %>%
autoplot() +
labs(title = "Seasonally Adjusted Gas")
## Plot variable not specified, automatically selected `.vars = season_adjust`
The outlier creates a one time spike in the trend and then reverts back to originally reflected seasonality.
begout_gas <- tail(aus_production, 5*4) %>% select(Gas)
begout_gas[2,1] = begout_gas[2,1] +300
middleout_gas <- tail(aus_production, 5*4) %>% select(Gas)
middleout_gas[10,1] = middleout_gas[10,1] +300
endout_gas <- tail(aus_production, 5*4) %>% select(Gas)
endout_gas[18,1] = endout_gas[18,1] +300
begout_gas%>%
model(
classical_decomposition(Gas, type = "multiplicative")
) %>%
components() %>% select(season_adjust) %>%
autoplot() +
labs(title = "Seasonally Adjusted:Beginning Outlier")
## Plot variable not specified, automatically selected `.vars = season_adjust`
middleout_gas %>%
model(
classical_decomposition(Gas, type = "multiplicative")
) %>%
components() %>% select(season_adjust) %>%
autoplot() +
labs(title = "Seasonally Adjusted:Middle Outlier")
## Plot variable not specified, automatically selected `.vars = season_adjust`
endout_gas %>%
model(
classical_decomposition(Gas, type = "multiplicative")
) %>%
components() %>% select(season_adjust) %>%
autoplot() +
labs(title = "Seasonally Adjusted:Middle Outlier")
## Plot variable not specified, automatically selected `.vars = season_adjust`
The location of the outlier determines where the spike caused by the outlier is. In addition it also determines where slight increases and decreases in variation are reflected in the trend line.
x11_dcmp <- myretailseries %>%
model(x11 = X_13ARIMA_SEATS(Turnover ~ x11())) %>%
components()
autoplot(x11_dcmp) +
labs(title =
"Decomposition of total AUS retail employment using X-11.")
Using the X-11 method I noticed that seasonality is more constant than I initially assumed. In addition the the variability in the irregularity component has less variability at the tail of the dataset which is very interesting.
The decomposition of the Australian Labour Force indicates that the trend component is fairly smooth and is moving in a consistently upward fashion. The seasonal component is fairly constant with variability oscillating between 100 and -100. The remainder component has outliers towards the tail of the data indicating the occurrence of the recession. The seasonality and remainder components however, based on scale, don’t create vast deviations in the trends but rather small oscillations that would create headaches with predictions if you were to use the original non cleaned dataset.
The recession is visible in the remainder component.