library(tidyr)
library(dplyr)
library(knitr)
library(utils)
library(ggplot2)
library(forecast)
library(readxl)
library(fpp2)
library(seasonal)
autoplot(plastics)+ggtitle("Monthly Sales (in thousands) of Product A for 5 years")+ylab('Sales')
The data shows that there is an obvious seasonal effect and an increase in sales over time.
decompose_plastics <- decompose(plastics, type="multiplicative")
decompose_plastics %>%
autoplot() +
ggtitle("Multiplicative Decomposition of Plastic Product A Sales")
##c.
The seasonal component shows a constant change over time while the trend is increasing over time. The remainder component shows unusual patter at the end which explains the dip in the trend. Overall, the multiplicative decomposition supports the graphical interpretation.
seasadj_plastics <- plastics/decompose_plastics$seasonal
autoplot(seasadj_plastics, series='Seasonally Adjusted')+autolayer(plastics, series = 'Data')+ggtitle('Sale of Product A')
## Warning in is.na(main): is.na() applied to non-(list or vector) of type
## 'NULL'
outlier_plastics <- plastics
#add 500 to 1 observation
outlier_plastics[13] <- outlier_plastics[13] +500
seasadj_outlier_plastics <- outlier_plastics/decompose_plastics$seasonal
autoplot(seasadj_plastics, series='Seasonally Adjusted')+autolayer(plastics, series = 'Data')+autolayer(seasadj_outlier_plastics,series = "outlier seasonally adjusted")+ggtitle('Sale of Product A')
## Warning in is.na(main): is.na() applied to non-(list or vector) of type
## 'NULL'
The outlier did not have any impact on the remaining seasonally adjusted data.
outlier_plastics <- plastics
#add 500 to 1 observation
outlier_plastics[55] <- outlier_plastics[55] +500
seasadj_outlier_plastics <- outlier_plastics/decompose_plastics$seasonal
autoplot(seasadj_plastics, series='Seasonally Adjusted')+autolayer(plastics, series = 'Data')+autolayer(seasadj_outlier_plastics,series = "outlier seasonally adjusted")+ggtitle('Sale of Product A')
## Warning in is.na(main): is.na() applied to non-(list or vector) of type
## 'NULL'
There is no difference if the outlier is near the end rather than the middle of the series.
Read Retail Data
#read retail data
retaildata <- readxl::read_excel("retail2.xlsx", skip=1)
head(retaildata)
## # A tibble: 6 x 190
## `Series ID` A3349335T A3349627V A3349338X A3349398A A3349468W
## <dttm> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1982-04-01 00:00:00 303. 41.7 63.9 409. 65.8
## 2 1982-05-01 00:00:00 298. 43.1 64 405. 65.8
## 3 1982-06-01 00:00:00 298 40.3 62.7 401 62.3
## 4 1982-07-01 00:00:00 308. 40.9 65.6 414. 68.2
## 5 1982-08-01 00:00:00 299. 42.1 62.6 404. 66
## 6 1982-09-01 00:00:00 305. 42 64.4 412. 62.3
## # ... with 184 more variables: A3349336V <dbl>, A3349337W <dbl>,
## # A3349397X <dbl>, A3349399C <dbl>, A3349874C <dbl>, A3349871W <dbl>,
## # A3349790V <dbl>, A3349556W <dbl>, A3349791W <dbl>, A3349401C <dbl>,
## # A3349873A <dbl>, A3349872X <dbl>, A3349709X <dbl>, A3349792X <dbl>,
## # A3349789K <dbl>, A3349555V <dbl>, A3349565X <dbl>, A3349414R <dbl>,
## # A3349799R <dbl>, A3349642T <dbl>, A3349413L <dbl>, A3349564W <dbl>,
## # A3349416V <dbl>, A3349643V <dbl>, A3349483V <dbl>, A3349722T <dbl>,
## # A3349727C <dbl>, A3349641R <dbl>, A3349639C <dbl>, A3349415T <dbl>,
## # A3349349F <dbl>, A3349563V <dbl>, A3349350R <dbl>, A3349640L <dbl>,
## # A3349566A <dbl>, A3349417W <dbl>, A3349352V <dbl>, A3349882C <dbl>,
## # A3349561R <dbl>, A3349883F <dbl>, A3349721R <dbl>, A3349478A <dbl>,
## # A3349637X <dbl>, A3349479C <dbl>, A3349797K <dbl>, A3349477X <dbl>,
## # A3349719C <dbl>, A3349884J <dbl>, A3349562T <dbl>, A3349348C <dbl>,
## # A3349480L <dbl>, A3349476W <dbl>, A3349881A <dbl>, A3349410F <dbl>,
## # A3349481R <dbl>, A3349718A <dbl>, A3349411J <dbl>, A3349638A <dbl>,
## # A3349654A <dbl>, A3349499L <dbl>, A3349902A <dbl>, A3349432V <dbl>,
## # A3349656F <dbl>, A3349361W <dbl>, A3349501L <dbl>, A3349503T <dbl>,
## # A3349360V <dbl>, A3349903C <dbl>, A3349905J <dbl>, A3349658K <dbl>,
## # A3349575C <dbl>, A3349428C <dbl>, A3349500K <dbl>, A3349577J <dbl>,
## # A3349433W <dbl>, A3349576F <dbl>, A3349574A <dbl>, A3349816F <dbl>,
## # A3349815C <dbl>, A3349744F <dbl>, A3349823C <dbl>, A3349508C <dbl>,
## # A3349742A <dbl>, A3349661X <dbl>, A3349660W <dbl>, A3349909T <dbl>,
## # A3349824F <dbl>, A3349507A <dbl>, A3349580W <dbl>, A3349825J <dbl>,
## # A3349434X <dbl>, A3349822A <dbl>, A3349821X <dbl>, A3349581X <dbl>,
## # A3349908R <dbl>, A3349743C <dbl>, A3349910A <dbl>, A3349435A <dbl>,
## # A3349365F <dbl>, A3349746K <dbl>, ...
#convert into time series
myts <- ts(retaildata[,"A3349338X"],
frequency=12, start=c(1982,4))
#x11 decomposition of retail sales data
x11_retail <- seas(myts, x11="")
autoplot(x11_retail) +
ggtitle("X11 Decomposition of Retail Sales Data")
There is a increase in sales over time but the x11 decomposition shows the increase in seasonality of over time. Previously, we noted the impact in 2008 but we do notice some additional outliers in 1994, 1996 and 1998 that weren’t noticeable before.