## Warning: package 'tidyverse' was built under R version 3.6.2
## Warning: package 'tibble' was built under R version 3.6.3
## Warning: package 'tidyr' was built under R version 3.6.3
## Warning: package 'readr' was built under R version 3.6.3
## Warning: package 'dplyr' was built under R version 3.6.3
## Warning: package 'forcats' was built under R version 3.6.3
## Warning: package 'fpp2' was built under R version 3.6.3
## Warning: package 'forecast' was built under R version 3.6.3
## Warning: package 'fma' was built under R version 3.6.3
## Warning: package 'expsmooth' was built under R version 3.6.3
## Warning: package 'readxl' was built under R version 3.6.3
## Warning: package 'seasonal' was built under R version 3.6.3
The plastics
data set consists of the monthly sales (in thousands) of product A for a plastics manufacturer for five years.
autoplot(plastics)
The plastics
data has has an increasing trend and a seasonal component where sales are higher in the summer and lower in the winter.
decompose_plastics <- decompose(plastics, type="multiplicative")
decompose_plastics %>%
autoplot() +
ggtitle("Multiplicative Decomposition of Plastic Product A Sales")
Yes. The trend is increasing and there’s a seasonal component.
seasonally_adjusted_plastics <- plastics / decompose_plastics$seasonal
autoplot(plastics, series = "original data") +
autolayer(seasonally_adjusted_plastics, series = "seasonally adjusted") +
ylab("Sales (thousands)") +
ggtitle("Plastic Product A Sales") +
scale_color_brewer(palette = "Set1")
plastics_with_outlier <- plastics
plastics_with_outlier[20] <- plastics_with_outlier[20] + 500
decompose_plastics_with_outlier <- decompose(plastics_with_outlier, type="multiplicative")
seasonally_adjusted_plastics_with_outlier <- plastics_with_outlier / decompose_plastics_with_outlier$seasonal
autoplot(plastics, series = 'original data') +
autolayer(seasonally_adjusted_plastics, series = 'without outlier') +
autolayer(seasonally_adjusted_plastics_with_outlier, series = 'with outlier') +
ylab("Sales (thousands)") +
ggtitle("Seasonally Adjusted Plastic Product A Sales") +
scale_color_brewer(palette = "Set1")
The outlier causes the series to be slighly higher than the seasonally adjusted series without an outlier. There are also some troughs in the series with an outlier that don’t exist in the series without an outlier. So the addition of the outlier causes the model to find troughs in the data it wouldn’t otherwise, and slighly overstate the level of the seasonal component.
# Near the End
plastics_with_outlier_near_the_end <- plastics
plastics_with_outlier_near_the_end[50] <- plastics_with_outlier_near_the_end[50] + 500
decompose_plastics_with_outlier_near_the_end <- decompose(plastics_with_outlier_near_the_end, type="multiplicative")
seasonally_adjusted_plastics_with_outlier_near_the_end <- plastics_with_outlier_near_the_end / decompose_plastics_with_outlier_near_the_end$seasonal
# In the Middle
plastics_with_outlier_in_the_middle <- plastics
plastics_with_outlier_in_the_middle[30] <- plastics_with_outlier_in_the_middle[30] + 500
decompose_plastics_with_outlier_in_the_middle <- decompose(plastics_with_outlier_in_the_middle, type="multiplicative")
seasonally_adjusted_plastics_with_outlier_in_the_middle <- plastics_with_outlier_in_the_middle / decompose_plastics_with_outlier_in_the_middle$seasonal
# Plot both series
autoplot(plastics, series = 'original data') +
autolayer(seasonally_adjusted_plastics_with_outlier_near_the_end, series = 'with outlier near the end') +
autolayer(seasonally_adjusted_plastics_with_outlier_in_the_middle, series = 'with outlier in the middle') +
ylab("Sales (thousands)") +
ggtitle("Seasonally Adjusted Plastic Product A Sales") +
scale_color_brewer(palette = "Set1")
The outlier has less of an impact if it’s in the middle of the series as opposed to near the end. It’s interesting that when the outlier is near the end the seasonal adjustment doesn’t adjust out the troughs of the orignal series. When it’s in the middle it seems to just introduce some noise.
Recall your retail time series data (from Exercise 3 in Section 2.10). Decompose the series using X11. Does it reveal any outliers, or unusual features that you had not noticed previously?
temp = tempfile(fileext = ".xlsx")
dataURL <- "https://otexts.com/fpp2/extrafiles/retail.xlsx"
download.file(dataURL, destfile=temp, mode='wb')
retaildata <- readxl::read_excel(temp, skip=1)
retail <- ts(retaildata[, "A3349337W"], frequency = 12, start = c(1982, 4))
x11_retail <- seas(retail, x11="")
autoplot(x11_retail) +
ggtitle("X11 Decomposition of Retail Sales Data")
There are some spikes in the remainder early on (circa 1983) and around 2000. That indicates the presense of some outliers.