library(dplyr)
library(ggplot2)
library(tidyr)
library(tibble)
library(tsibble)
library(ggfortify)
library(tidyverse)
library(fpp3)
library(moments)
library(zoo)
library(fable)
library(sos)
library(slider)
library(seasonal)
library(x13binary)
library(readxl)
head(global_economy)
## # A tsibble: 6 x 9 [1Y]
## # Key: Country [1]
## Country Code Year GDP Growth CPI Imports Exports Population
## <fct> <fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Afghanistan AFG 1960 537777811. NA NA 7.02 4.13 8996351
## 2 Afghanistan AFG 1961 548888896. NA NA 8.10 4.45 9166764
## 3 Afghanistan AFG 1962 546666678. NA NA 9.35 4.88 9345868
## 4 Afghanistan AFG 1963 751111191. NA NA 16.9 9.17 9533954
## 5 Afghanistan AFG 1964 800000044. NA NA 18.1 8.89 9731361
## 6 Afghanistan AFG 1965 1006666638. NA NA 21.4 11.3 9938414
global_economy %>%
filter(Country == "United States") %>%
autoplot(GDP/Population)
#A plot of GDP per capita for the United States. Adjusting for population gives a more accurate depiction of how an economy is actually performing.
Aus1 <- aus_livestock %>%
filter(Animal == "Bulls, bullocks and steers")%>%
index_by(Year = year(Month))
head(Aus1)
## # A tsibble: 6 x 5 [1M]
## # Key: Animal, State [1]
## # Groups: @ Year [1]
## Month Animal State Count Year
## <mth> <fct> <fct> <dbl> <dbl>
## 1 1976 Jul Bulls, bullocks and steers Australian Capital Territory 2300 1976
## 2 1976 Aug Bulls, bullocks and steers Australian Capital Territory 2100 1976
## 3 1976 Sep Bulls, bullocks and steers Australian Capital Territory 2100 1976
## 4 1976 Oct Bulls, bullocks and steers Australian Capital Territory 1900 1976
## 5 1976 Nov Bulls, bullocks and steers Australian Capital Territory 2100 1976
## 6 1976 Dec Bulls, bullocks and steers Australian Capital Territory 1800 1976
Aus1 %>%
filter(State == "Victoria")%>%
autoplot(Count)
## `mutate_if()` ignored the following grouping variables:
## * Column `Year`
#There is a very clear decreasing trend of Bulls, bullocks and steers in Victoria as the plot shows. No transformation needed.
Vic.Elec <- vic_elec %>%
filter(grepl("2013", Time))
Vic.Elec %>%
features(Demand, features = guerrero)
## # A tibble: 1 x 1
## lambda_guerrero
## <dbl>
## 1 0.109
#.109
Vic.Elec %>%
autoplot(box_cox(Demand, 0.0109))+ labs(y = "Box-Cox transformation")
# The variability is a little better than it was before and the data is much easier on the eyes when only a single year is being looked at and a box-cox transformation is used.
aus_production %>%
autoplot(Gas)
BCaus <- aus_production %>%
features(Gas, features = guerrero)
#0.121
aus_production %>%
autoplot(box_cox(Gas, 0.121)) + labs(y = "Box-Cox transformation")
#The variance is much more constant now that a box-cox transformation has been used.
autoplot(canadian_gas)
## Plot variable not specified, automatically selected `.vars = Volume`
CangasBC <- canadian_gas %>%
features(Volume, features = guerrero)
CangasBC
## # A tibble: 1 x 1
## lambda_guerrero
## <dbl>
## 1 0.392
#0.392
canadian_gas %>%
autoplot(box_cox(Volume, .392)) + labs(y= "Box-Cox transformation")
# It looks very similar as it did prior to the box-cox transformation. The variance becomes a bit more constant, but there wasn't a lot of heteroskedasticity to begin with.
autoplot(aus_production, Tobacco)
## Warning: Removed 24 row(s) containing missing values (geom_path).
TobaccoBC <- aus_production %>%
features(Tobacco, features= guerrero)
TobaccoBC
## # A tibble: 1 x 1
## lambda_guerrero
## <dbl>
## 1 0.929
aus_production %>%
autoplot(box_cox(Tobacco, .929)) + labs(y = "Box-Cox transformation")
## Warning: Removed 24 row(s) containing missing values (geom_path).
The lambda value the Guerrero feature gives us is really close to 1, therefore it is not going to change the plot dramatically. The variance is relatively constant to begin with.
autoplot(ansett, Passengers)
#That is a mess
Melb_Syd <- ansett %>%
filter(Airports == "MEL-SYD")
Melb_Syd2 <- Melb_Syd %>%
filter(Class == "Economy")
autoplot(Melb_Syd2)
## Plot variable not specified, automatically selected `.vars = Passengers`
Melb_Syd2 %>%
features(Passengers, features= guerrero)
## # A tibble: 1 x 3
## Airports Class lambda_guerrero
## <chr> <chr> <dbl>
## 1 MEL-SYD Economy 2.00
#Lambda = 2
Melb_Syd2 %>%
autoplot(box_cox(Passengers, 2.00)) + labs(y= "Box-Cox transformation")
autoplot(pedestrian, Count)
Ped2 <- pedestrian %>%
filter(Sensor == "Southern Cross Station")
autoplot(Ped2, Count)
Ped2 %>%
features(Count, features = guerrero)
## # A tibble: 1 x 2
## Sensor lambda_guerrero
## <chr> <dbl>
## 1 Southern Cross Station -0.226
#Lambda = -.226
Ped2 %>%
autoplot(box_cox(Count, -.226)) + labs(y= "Box-Cox transformation")
movinga <- readxl::read_excel("C:\\Users\\Zacha\\OneDrive\\Documents\\Movingav.xlsx")
y_ma5 <- movinga %>%
rollmean(num, k = 5, fill = NA)
y_ma3 <- y_ma5 %>%
rollmean(num, k = 3, fill = NA)
y_ma3
## num
## [1,] NA
## [2,] NA
## [3,] NA
## [4,] NA
## [5,] NA
## [6,] NA
z <- c(0.067, 0.133, 0.2, 0.2, 0.2, 0.133, 0.067)
z[1] = 0.067*(1/15)
z[2] = 0.133*(2/15)
z[3] = 0.200*(3/15)
z[4] = 0.200*(3/15)
z[5] = 0.200*(3/15)
z[6] = 0.133*(2/15)
z[7] = 0.067*(1/15)
z_sum <- sum(z)
z_sum
## [1] 0.1644
gas <- tail(aus_production, 5*4) %>% select(Gas)
head(gas)
## # A tsibble: 6 x 2 [1Q]
## Gas Quarter
## <dbl> <qtr>
## 1 221 2005 Q3
## 2 180 2005 Q4
## 3 171 2006 Q1
## 4 224 2006 Q2
## 5 233 2006 Q3
## 6 192 2006 Q4
autoplot(gas)
## Plot variable not specified, automatically selected `.vars = Gas`
It definitely has a positive trend there is a consistent increase from year to year. There appears to be a spike halfway through each year which implies a quarterly seasonality.
gas2 <- gas %>%
model(classical_decomposition(Gas, type = "multiplicative")) %>%
components() %>%
autoplot() +
labs(title = "Classical Decomposition")
gas2
## Warning: Removed 2 row(s) containing missing values (geom_path).
There is clearly an upward trend and the seasonality spikes in one spot each time showing our quarterly seasonality. I would say the Classical Decomposition certainly supports the interpretation from a. autoplot(gas)
seasonal <- as_tsibble(gas) %>%
model(classical_decomposition(Gas ~ season(4), type = "mult")) %>%
components()
seasonal
## # A dable: 20 x 7 [1Q]
## # Key: .model [1]
## # : Gas = trend * seasonal * random
## .model Quarter Gas trend seaso~1 random seaso~2
## <chr> <qtr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 "classical_decomposition(Gas ~ se~ 2005 Q3 221 NA 1.13 NA 196.
## 2 "classical_decomposition(Gas ~ se~ 2005 Q4 180 NA 0.925 NA 195.
## 3 "classical_decomposition(Gas ~ se~ 2006 Q1 171 200. 0.875 0.974 195.
## 4 "classical_decomposition(Gas ~ se~ 2006 Q2 224 204. 1.07 1.02 209.
## 5 "classical_decomposition(Gas ~ se~ 2006 Q3 233 207 1.13 1.00 207.
## 6 "classical_decomposition(Gas ~ se~ 2006 Q4 192 210. 0.925 0.987 208.
## 7 "classical_decomposition(Gas ~ se~ 2007 Q1 187 213 0.875 1.00 214.
## 8 "classical_decomposition(Gas ~ se~ 2007 Q2 234 216. 1.07 1.01 218.
## 9 "classical_decomposition(Gas ~ se~ 2007 Q3 245 219. 1.13 0.996 218.
## 10 "classical_decomposition(Gas ~ se~ 2007 Q4 205 219. 0.925 1.01 222.
## 11 "classical_decomposition(Gas ~ se~ 2008 Q1 194 219. 0.875 1.01 222.
## 12 "classical_decomposition(Gas ~ se~ 2008 Q2 229 219 1.07 0.974 213.
## 13 "classical_decomposition(Gas ~ se~ 2008 Q3 249 219 1.13 1.01 221.
## 14 "classical_decomposition(Gas ~ se~ 2008 Q4 203 220. 0.925 0.996 219.
## 15 "classical_decomposition(Gas ~ se~ 2009 Q1 196 222. 0.875 1.01 224.
## 16 "classical_decomposition(Gas ~ se~ 2009 Q2 238 223. 1.07 0.993 222.
## 17 "classical_decomposition(Gas ~ se~ 2009 Q3 252 225. 1.13 0.994 224.
## 18 "classical_decomposition(Gas ~ se~ 2009 Q4 210 226 0.925 1.00 227.
## 19 "classical_decomposition(Gas ~ se~ 2010 Q1 205 NA 0.875 NA 234.
## 20 "classical_decomposition(Gas ~ se~ 2010 Q2 236 NA 1.07 NA 220.
## # ... with abbreviated variable names 1: seasonal, 2: season_adjust
seasonal %>%
ggplot(aes(x = Quarter)) +
geom_line(aes(y = Gas, color = "Data")) +
geom_line(aes(y = season_adjust,
colour = "Seasonally Adjusted")) +
geom_line(aes(y = trend, color = "Trend")) +
labs(y = "Gas",
title = "Gas Production") +
scale_color_manual(
values = c("gray", "#0072B2", "#D55E00"),
breaks = c("Data", "Seasonally Adjusted", "Trend")
)
## Warning: Removed 4 row(s) containing missing values (geom_path).
outlier <- gas
outlier[1,1]<- gas[1,1] + 300
head(outlier)
## # A tsibble: 6 x 2 [1Q]
## Gas Quarter
## <dbl> <qtr>
## 1 521 2005 Q3
## 2 180 2005 Q4
## 3 171 2006 Q1
## 4 224 2006 Q2
## 5 233 2006 Q3
## 6 192 2006 Q4
outlierCD <- outlier %>%
model(classical_decomposition(Gas, type = "multiplicative")) %>%
components() %>%
autoplot() +
labs(title = "Classical Decomposition with outlier")
outlierCD
## Warning: Removed 2 row(s) containing missing values (geom_path).
Soutlier<-as_tsibble(outlier) %>%
model(classical_decomposition(Gas ~ season(4), type = "mult")) %>%
components()
Soutlier
## # A dable: 20 x 7 [1Q]
## # Key: .model [1]
## # : Gas = trend * seasonal * random
## .model Quarter Gas trend seaso~1 random seaso~2
## <chr> <qtr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 "classical_decomposition(Gas ~ se~ 2005 Q3 521 NA 1.14 NA 459.
## 2 "classical_decomposition(Gas ~ se~ 2005 Q4 180 NA 0.933 NA 193.
## 3 "classical_decomposition(Gas ~ se~ 2006 Q1 171 238 0.849 0.846 201.
## 4 "classical_decomposition(Gas ~ se~ 2006 Q2 224 204. 1.08 1.02 207.
## 5 "classical_decomposition(Gas ~ se~ 2006 Q3 233 207 1.14 0.992 205.
## 6 "classical_decomposition(Gas ~ se~ 2006 Q4 192 210. 0.933 0.979 206.
## 7 "classical_decomposition(Gas ~ se~ 2007 Q1 187 213 0.849 1.03 220.
## 8 "classical_decomposition(Gas ~ se~ 2007 Q2 234 216. 1.08 1.00 216.
## 9 "classical_decomposition(Gas ~ se~ 2007 Q3 245 219. 1.14 0.987 216.
## 10 "classical_decomposition(Gas ~ se~ 2007 Q4 205 219. 0.933 1.00 220.
## 11 "classical_decomposition(Gas ~ se~ 2008 Q1 194 219. 0.849 1.04 229.
## 12 "classical_decomposition(Gas ~ se~ 2008 Q2 229 219 1.08 0.965 211.
## 13 "classical_decomposition(Gas ~ se~ 2008 Q3 249 219 1.14 1.00 219.
## 14 "classical_decomposition(Gas ~ se~ 2008 Q4 203 220. 0.933 0.987 218.
## 15 "classical_decomposition(Gas ~ se~ 2009 Q1 196 222. 0.849 1.04 231.
## 16 "classical_decomposition(Gas ~ se~ 2009 Q2 238 223. 1.08 0.985 220.
## 17 "classical_decomposition(Gas ~ se~ 2009 Q3 252 225. 1.14 0.986 222.
## 18 "classical_decomposition(Gas ~ se~ 2009 Q4 210 226 0.933 0.996 225.
## 19 "classical_decomposition(Gas ~ se~ 2010 Q1 205 NA 0.849 NA 242.
## 20 "classical_decomposition(Gas ~ se~ 2010 Q2 236 NA 1.08 NA 218.
## # ... with abbreviated variable names 1: seasonal, 2: season_adjust
Soutlier %>%
ggplot(aes(x = Quarter)) +
geom_line(aes(y = Gas, color = "Data")) +
geom_line(aes(y = season_adjust,
color = "Seasonally Adjusted")) +
geom_line(aes(y = trend, color = "Trend")) +
labs(y = "Gas",
title = "Gas Production with an outlier at the beginning") +
scale_color_manual(
values = c("gray", "#0072B2", "#D55E00"),
breaks = c("Data", "Seasonally Adjusted", "Trend")
)
## Warning: Removed 4 row(s) containing missing values (geom_path).
The outlier has made a very obvious impact on the seasonally adjusted
data. A giant spike in the beginning of the data is hard to miss. The
trend is thrown off and takes about half a year to begin the upward
trend with the outlier.
outliermid <- gas
outliermid[12,1] <- gas[12,1] +300
outliermid
## # A tsibble: 20 x 2 [1Q]
## Gas Quarter
## <dbl> <qtr>
## 1 221 2005 Q3
## 2 180 2005 Q4
## 3 171 2006 Q1
## 4 224 2006 Q2
## 5 233 2006 Q3
## 6 192 2006 Q4
## 7 187 2007 Q1
## 8 234 2007 Q2
## 9 245 2007 Q3
## 10 205 2007 Q4
## 11 194 2008 Q1
## 12 529 2008 Q2
## 13 249 2008 Q3
## 14 203 2008 Q4
## 15 196 2009 Q1
## 16 238 2009 Q2
## 17 252 2009 Q3
## 18 210 2009 Q4
## 19 205 2010 Q1
## 20 236 2010 Q2
outliermid2 <- outliermid %>%
model(classical_decomposition(Gas, type = "multiplicative")) %>%
components() %>%
autoplot() +
labs(title = "Classical Decomposition with Middle Outlier")
outliermid2
## Warning: Removed 2 row(s) containing missing values (geom_path).
outliermid3 <-as_tsibble(outliermid) %>%
model(classical_decomposition(Gas ~ season(4), type = "mult")) %>%
components()
outliermid3 %>%
ggplot(aes(x = Quarter)) +
geom_line(aes(y = Gas, color = "Data")) +
geom_line(aes(y = season_adjust,
color = "Seasonally Adjusted")) +
geom_line(aes(y = trend, color = "Trend")) +
labs(y = "Gas",
title = "Gas Production with a middle outlier") +
scale_color_manual(
values = c("gray", "#0072B2", "#D55E00"),
breaks = c("Data", "Seasonally Adjusted", "Trend")
)
## Warning: Removed 4 row(s) containing missing values (geom_path).
outlierend <- gas
outlierend[20,1] <- gas[20,1] +300
outlierend
## # A tsibble: 20 x 2 [1Q]
## Gas Quarter
## <dbl> <qtr>
## 1 221 2005 Q3
## 2 180 2005 Q4
## 3 171 2006 Q1
## 4 224 2006 Q2
## 5 233 2006 Q3
## 6 192 2006 Q4
## 7 187 2007 Q1
## 8 234 2007 Q2
## 9 245 2007 Q3
## 10 205 2007 Q4
## 11 194 2008 Q1
## 12 229 2008 Q2
## 13 249 2008 Q3
## 14 203 2008 Q4
## 15 196 2009 Q1
## 16 238 2009 Q2
## 17 252 2009 Q3
## 18 210 2009 Q4
## 19 205 2010 Q1
## 20 536 2010 Q2
outlierend2 <- outlierend %>%
model(classical_decomposition(Gas, type = "multiplicative")) %>%
components() %>%
autoplot() +
labs(title = "Classical Decomposition with End Outlier")
outlierend2
## Warning: Removed 2 row(s) containing missing values (geom_path).
outlierend3 <-as_tsibble(outlierend) %>%
model(classical_decomposition(Gas ~ season(4), type = "mult")) %>%
components()
outlierend3 %>%
ggplot(aes(x = Quarter)) +
geom_line(aes(y = Gas, color = "Data")) +
geom_line(aes(y = season_adjust,
color = "Seasonally Adjusted")) +
geom_line(aes(y = trend, color = "Trend")) +
labs(y = "Gas",
title = "Gas Production with an outlier at the end") +
scale_color_manual(
values = c("gray", "#0072B2", "#D55E00"),
breaks = c("Data", "Seasonally Adjusted", "Trend")
)
## Warning: Removed 4 row(s) containing missing values (geom_path).
It is different when the outlier is in the middle rather than the end. The trend remains positive all the way through with an outlier at the end, it just has a spike at the end. With an outlier in the middle the trend has to settle back in which leads to it decreasing for a portion of time.
set.seed(1738133)
myseries <- aus_retail %>%
filter(`Series ID` == sample(aus_retail$`Series ID`,1))
autoplot(myseries)
## Plot variable not specified, automatically selected `.vars = Turnover`
x11 <- myseries %>%
model(x11 = X_13ARIMA_SEATS(Turnover ~ x11())) %>%
components()
autoplot(x11) + labs(title = "Decompostion using x11")
###Exercise 7
There is a clear steady positive trend throughout the years that begins to plateau slightly in the early 90s, but always appears to remain positive. A few months have higher variations ranges than the others mainly: March, July, and August. The mean for each month vary greatly over the years with December having the highest by far.
The recession in 1991 and 1992 is visible through the positive trend plateauing at approximately that time. The seasonality appears to offer no explanation for it meaning it’s clearly a recession.
###Exercise 8
canadian_gas
## # A tsibble: 542 x 2 [1M]
## Month Volume
## <mth> <dbl>
## 1 1960 Jan 1.43
## 2 1960 Feb 1.31
## 3 1960 Mar 1.40
## 4 1960 Apr 1.17
## 5 1960 May 1.12
## 6 1960 Jun 1.01
## 7 1960 Jul 0.966
## 8 1960 Aug 0.977
## 9 1960 Sep 1.03
## 10 1960 Oct 1.25
## # ... with 532 more rows
## # i Use `print(n = ...)` to see more rows
autoplot(canadian_gas)
## Plot variable not specified, automatically selected `.vars = Volume`
gg_subseries(canadian_gas)
## Plot variable not specified, automatically selected `y = Volume`
gg_season(canadian_gas)
## Plot variable not specified, automatically selected `y = Volume`
canadian_gas %>%
model(STL(Volume ~ season(window = 12), robust=TRUE)) %>%
components() %>% autoplot() +
labs(title = "STL Decompostion: Canadian Gas")
#The variance gets a lot larger in the middle
Cangascomp <- canadian_gas%>%
model(stl=STL(Volume))
sdcomp <- canadian_gas %>%
autoplot(Volume, color = "Orange")+
autolayer(components(Cangascomp), season_adjust, color = "#0072B2")
sdcomp
###Exercise 9
liquor<-aus_retail%>%
filter(Industry == "Liquor retailing" & year(Month)>= 2000)%>%
summarise(Turnover = sum(Turnover))
ggplot(data= liquor)+
geom_line(aes(x=Month, y=Turnover))
liquor %>%
model(STL(Turnover ~ season(window = 12), robust=TRUE)) %>%
components() %>%
autoplot() + labs(title = "Liquor Turnover")
liquor <-liquor%>%
mutate(`12-MA` = slider::slide_dbl(Turnover, mean,
.before = 5, .after = 6, .complete = TRUE))%>%
mutate(`2x12-MA` = slider::slide_dbl(`12-MA`, mean,
.before = 1, .after = 0, .complete = TRUE))
liquor%>%
autoplot(Turnover) +
geom_line(aes(y =`2x12-MA`), colour = "#0072B2") +
labs(y = "Liquor retailing Turnover",
title = "Total Liquor Turnover")+
guides(colour = guide_legend(title = "series"))
## Warning: Removed 12 row(s) containing missing values (geom_path).
###Exercise 10
Additive decomposition: 1) If m is even then calculate the trend-cycle using 2*m -MA. If m is odd then use m-MA. 2) Calculate the detrended series Yt - Tt. 3) For the seasonal component average the detrended values for that season. 4) The effects should be in a normal distribution and the average of m seasonal components should be 0. Next calculate the total sum of season components and then calculate the average of the seasonal components. Then adjust each seasonal component. 5) You can calculate the remainder by subtracting the estimated seasonal and trend-cycle components: Rt = yt - Tt - St.
Multiplicative decomposition 1) If m is even calculate the trend-cyle using 2m -MA. If m is odd use m-MA. 2) Calculate the detrended series using yt/Tt. 3) Estimate the seasonal component by averaging the detrended values of that season. 4) Make the seasonal effects normalized. Average of m seasonal components is 1, which is gathered by recreating the sequence for each years data (St). Calculate the total sum of the seasonal components. Next calculate the average of the seasonal components then adjust each seasonal component. 5) Calculate the remainder component by subtracting the estimated seasonal and trend-cycle components : Rt = yt/(Tt St).