library(fpp2)
## Loading required package: ggplot2
## Loading required package: forecast
## Loading required package: fma
## Loading required package: expsmooth
require(seasonal)
## Loading required package: seasonal
#Read the data into R and decompose the series using X11. Does it reveal any outliers, or unusual features?
library(readxl)
retail<- readxl::read_excel("/Users/luyunliang/Downloads/retail.xlsx",skip = 1)
ts_retail<- ts(retail[,"A3349337W"], frequency = 12, start = c(1982,4))
#plot the data to see if there is a trend or seasonality
autoplot(ts_retail)
#decompose the series using X11
x11_retail<- seas(ts_retail,x11="")
# autoplot x11_retail
autoplot(x11_retail)
# there were some outliers between 2000 and 2001 when decomposing the series using x11
# the seasonality effect decreases as trend increases
#Plot the data using autoplot(), ggsubseriesplot() and ggseasonplot() to look at the effect of the changing seasonality over time. What do you think is causing it to change so much?
autoplot(cangas,ylab = "billion cubic meters",xlab = "Year")
ggsubseriesplot(cangas)
ggseasonplot(cangas)
# overall, there is an increasing trend of monthly Canadian gas production from 1960 to 2005. The gas production increased in winter much more than it increased in summer because of increasing amount of heating in winter.
#Do an STL decomposition of the data. You will need to choose s.window to allow for the changing shape of the seasonal component.
stl_cangas<- stl(cangas,s.window = 13,robust = TRUE)
#how each STL decomposed component
autoplot(stl_cangas)+ ggtitle("Monthly Canadian Gas Production", subtitle = "STL decomposition")
# there is an overall increasing trend of gas production; the size of seasonality reaches to the peak in 1985.
# (STL decomposition) See trend-cycle component and seasonally adjusted data along with original data.
autoplot(cangas, series = "Data") +
autolayer(seasadj(stl_cangas), series = "Seasonally Adjusted") +
autolayer(trendcycle(stl_cangas), series = "Trend-cycle") +
ggtitle("Monthly Canadian gas production(STL decomposition)") +
scale_color_manual(values = c("gray", "blue", "red"),
breaks = c("Data", "Seasonally Adjusted", "Trend-cycle"))
#Compare the results with those obtained using SEATS and X11. How are they different?
x11_cangas<- seas(cangas,x11="")
seats_cangas<- seas(cangas)
#show x11 decomposed component
autoplot(x11_cangas) +
ggtitle("Monthly Canadian Gas Production",
subtitle = "X11 decomposition")
# (X11 decomposition) See trend-cycle component and seasonally adjusted data along with original data.
autoplot(cangas, series = "Data") +
autolayer(seasadj(x11_cangas), series = "Seasonally Adjusted") +
autolayer(trendcycle(x11_cangas), series = "Trend-cycle") +
ggtitle("Monthly Canadian gas production(X11 decomposition)") +
scale_color_manual(values = c("gray", "blue", "red"),
breaks = c("Data", "Seasonally Adjusted", "Trend-cycle"))
#show SEATS decomposed component
autoplot(seats_cangas) +
ggtitle("Monthly Canadian Gas Production",
subtitle = "SEATS decomposition")
# (SEATS decomposition) See trend-cycle component and seasonally adjusted data along with original data.
autoplot(cangas, series = "Data") +
autolayer(seasadj(seats_cangas), series = "Seasonally Adjusted") +
autolayer(trendcycle(seats_cangas), series = "Trend-cycle") +
ggtitle("Monthly Canadian gas production(SEATS decomposition)") +
scale_color_manual(values = c("gray", "blue", "red"),
breaks = c("Data", "Seasonally Adjusted", "Trend-cycle"))
# SEATS works better at quarterly and monthly data. The seasonality trend decreased, then increased and decreased. The result is quite similar to the X11 decomposition.
#Decompose the series using different approaches described in the lecture
# method 1: Classical multiplicative decomposition
ts_oil<- ts(oil, frequency = 12, start = c(1965,3))
decompose_ts_oil <- decompose(ts_oil,
type = "multiplicative")
autoplot(decompose_ts_oil)
#Compute and plot the seasonally adjusted data.
autoplot(ts_oil, series="Data") +
autolayer(trendcycle(decompose_ts_oil), series="Trend") +
autolayer(seasadj(decompose_ts_oil), series="Seasonally Adjusted") +
xlab("Year") + ylab("Oil Production amount") +
ggtitle("Annual Oil Production of Saudi Arabia") +
scale_colour_manual(values=c("gray","blue","red"),
breaks=c("Data","Seasonally Adjusted","Trend"))
## Warning: Removed 12 rows containing missing values (geom_path).
# The trend-cycle estimate has over-smoothed the drop in the data, and the corresponding remainder values have been affected by the poor-trend estimate.
# the seasonality trend is not very clear and indicative as other methods.
#method 2: x11 Decomposition
x11_oil <- seas(ts_oil, x11 = "")
autoplot(x11_oil)+
ggtitle("Annual Oil Production of Saudi Arabia")
#Compute and plot the seasonally adjusted data.
autoplot(ts_oil, series="Data") +
autolayer(trendcycle(x11_oil), series="Trend") +
autolayer(seasadj(x11_oil), series="Seasonally Adjusted") +
xlab("Year") + ylab("Oil Production amount") +
ggtitle("Annual Oil Production of Saudi Arabia") +
scale_colour_manual(values=c("gray","blue","red"),
breaks=c("Data","Seasonally Adjusted","Trend"))
# The seasonality is very consistent over years.
# After decompostion, the trend is smoothier.
# X11 captured the sudden fall at the end of 1966.
#method 3:STL Decomposition
stl_oil <- stl(ts_oil, s.window = 13, robust = TRUE)
autoplot(stl_oil) +
ggtitle("Annual Oil Production of Saudi Arabia")
#Compute and plot the seasonally adjusted data.
autoplot(ts_oil, series="Data") +
autolayer(trendcycle(stl_oil), series="Trend") +
autolayer(seasadj(stl_oil), series="Seasonally Adjusted") +
xlab("Year") + ylab("Oil Production amount") +
ggtitle("Annual Oil Production of Saudi Arabia") +
scale_colour_manual(values=c("gray","blue","red"),
breaks=c("Data","Seasonally Adjusted","Trend"))
# The trend-cycle is over smooth.
# the seasonality is not very intuitive and detailed.The corresponding remainder values have been affected by the poor-trend estimate.
#SEATS decomposition
seats_oil<- seas(ts_oil)
autoplot(seats_oil) +
ggtitle("Annual Oil Production of Saudi Arabia")
#Compute and plot the seasonally adjusted data.
autoplot(ts_oil, series="Data") +
autolayer(trendcycle(seats_oil), series="Trend") +
autolayer(seasadj(seats_oil), series="Seasonally Adjusted") +
xlab("Year") + ylab("Oil Production amount") +
ggtitle("Annual Oil Production of Saudi Arabia") +
scale_colour_manual(values=c("gray","blue","red"),
breaks=c("Data","Seasonally Adjusted","Trend"))
# The trend-cycle estimate captures falls at the end of 1965; it is not over smooth
# the seasonality trend is very clear and indicative compared to other methods.
#Plot the time series of sales of product A. Can you identify seasonal fluctuations and/or a trend-cycle?
autoplot(plastics)
#The plot indicates thre is an overall inreasing trend and seasonal fluctuations.
#Use a classical multiplicative decomposition to calculate the trend-cycle and seasonal indices.
multiplicative_decomposition <- decompose(plastics, type = "multiplicative")
autoplot(multiplicative_decomposition)
#Do the results support the graphical interpretation from part a?
#Yes
#Compute and plot the seasonally adjusted data.
autoplot(plastics, series="Data") +
autolayer(trendcycle(multiplicative_decomposition), series="Trend") +
autolayer(seasadj(multiplicative_decomposition), series="Seasonally Adjusted") +
xlab("Year") + ylab("Monthly Sales in Thousands") +
ggtitle("Plastic Product Sales") +
scale_colour_manual(values=c("gray","blue","red"), breaks=c("Data","Seasonally Adjusted","Trend"))
## Warning: Removed 12 rows containing missing values (geom_path).
#Change one observation to be an outlier (e.g., add 500 to one observation), and recompute the seasonally adjusted data. What is the effect of the outlier?
plastics_outlier <- plastics
plastics_outlier[20] <- plastics_outlier[20] + 500
decompose_plastics_outlier <- decompose(plastics_outlier,type = "multiplicative")
autoplot(plastics_outlier, series = "Data") +
autolayer(trendcycle(decompose_plastics_outlier), series = "Trend") +
autolayer(seasadj(decompose_plastics_outlier), series = "Seasonally Adjusted") +
xlab("Year") + ylab("Monthly Sales in Thousands") +
ggtitle("Plastic Product Sales") +
scale_color_manual(values=c("gray", "blue", "red"), breaks=c("Data", "Seasonally Adjusted", "Trend"))
## Warning: Removed 12 rows containing missing values (geom_path).
#the outlier affects more on the seasonally adjusted data than trend data
#Does it make any difference if the outlier is near the end rather than in the middle of the time series?
plastics_outlier <- plastics
plastics_outlier[50] <- plastics_outlier[50] + 500
decompose_plastics_outlier <- decompose(plastics_outlier,type = "multiplicative")
autoplot(plastics_outlier, series = "Data") +
autolayer(trendcycle(decompose_plastics_outlier), series = "Trend") +
autolayer(seasadj(decompose_plastics_outlier), series = "Seasonally Adjusted") +
xlab("Year") + ylab("Monthly Sales in Thousands") +
ggtitle("Plastic Product Sales") +
scale_color_manual(values=c("gray", "blue", "red"), breaks=c("Data", "Seasonally Adjusted", "Trend"))
## Warning: Removed 12 rows containing missing values (geom_path).
#when outlier is near the end, trend decreases near end outlier
#Use stlf() to produce forecasts of the fancy series with either method="naive" or method="rwdrift", whichever is most appropriate. Use the lambda argument if you think a Box-Cox transformation is required.
str(fancy)
## Time-Series [1:84] from 1987 to 1994: 1665 2398 2841 3547 3753 ...
autoplot(fancy)
stlf_fancy <- stlf(fancy,
s.window = 13,
robust = TRUE,
lambda = BoxCox.lambda(fancy),
method = "rwdrift")
autoplot(stlf_fancy)
# The Box-Cox transformation is necessary because it make the variation more consistent across the time series.
#Use stlf() to produce forecasts of the writing series with either method="naive" or method="rwdrift", whichever is most appropriate. Use the lambda argument if you think a Box-Cox transformation is required.
str(writing)
## Time-Series [1:120] from 1968 to 1978: 563 599 669 598 580 ...
autoplot(writing)
stlf_writing <- stlf(writing,
s.window = 13,
robust = TRUE,
lambda = BoxCox.lambda(writing),
method = "rwdrift")
autoplot(stlf_writing)
# The Box-Cox transformation is necessary because it make the the size of the seasonality more consistent across the time series.
This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
summary(cars)
## speed dist
## Min. : 4.0 Min. : 2.00
## 1st Qu.:12.0 1st Qu.: 26.00
## Median :15.0 Median : 36.00
## Mean :15.4 Mean : 42.98
## 3rd Qu.:19.0 3rd Qu.: 56.00
## Max. :25.0 Max. :120.00
You can also embed plots, for example:
Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.