library(fpp2)

## Warning: package 'fpp2' was built under R version 3.5.2

## Loading required package: ggplot2

## Loading required package: forecast

## Warning: package 'forecast' was built under R version 3.5.2

## Loading required package: fma

## Warning: package 'fma' was built under R version 3.5.2

## Loading required package: expsmooth

## Warning: package 'expsmooth' was built under R version 3.5.2

library(seasonal)

## Warning: package 'seasonal' was built under R version 3.5.2

Question 1

Download some monthly Australian retail data from the book website (Links to an external site.)Links to an external site.. These represent retail sales in various categories for different Australian states, and are stored in a MS-Excel file.

Read the data into R and decompose the series using X11. Does it reveal any outliers, or unusual features?

library(readxl)

## Warning: package 'readxl' was built under R version 3.5.1

raw_data <- read_excel("C:\\Users\\nidhi\\Downloads\\retail.xlsx")
my_data <- raw_data[,2]
my_data <-my_data[-1,]
Turnover <- ts(my_data, start = c(1982, 4), frequency=12)
autoplot(Turnover) +
  ggtitle("New SouthWales SuperMarket & Grocery Stores Turnover")+
  ylab("Sale in Thousand $") +
  xlab("Year")

Turnover %>% seas(x11="") -> fit.retail
autoplot(fit.retail) +
  ggtitle("X11 decomposition of Turnover New Southwales SuperMarket & Grocery")

fit.retail %>% seasonal() %>% ggsubseriesplot() + ylab("Seasonal")

#It doesn't show any outliers. Here trend is increasing over time and magnitude of seasonality is reducing over a period of time.

Question 2

This exercise uses the cangas data (monthly Canadian gas production in billions of cubic metres, January 1960 - February 2005).

#- Plot the data using autoplot(), ggsubseriesplot() and ggseasonplot() to look at the effect of the changing seasonality over time. What do you think is causing it to change so much?
#?cangas
autoplot(cangas)

ggsubseriesplot(cangas)

ggseasonplot(cangas)

#There is a clear positive increasing trend in gas production over the period of time by seeing the autoplot
#During winter, gas production is attained high values compared to those at summer

#- Do an STL decomposition of the data. You will need to choose s.window to allow for the changing shape of the seasonal component.
stl.cangas <- stl(cangas, s.window = 13, robust = TRUE)
autoplot(stl.cangas) +
  ggtitle("Monthly Canadian Gas Production",
          subtitle = "STL decomposition")

autoplot(cangas, series = "Data") +
  autolayer(seasadj(stl.cangas), series = "Seasonally Adjusted") +
  autolayer(trendcycle(stl.cangas), series = "Trend-cycle") +
  ggtitle("Monthly Canadian gas production(STL decomposition)") +
  ylab(expression(paste("Gas production (x", 10^{9}, m^{3}, ")"))) +
  scale_color_manual(values = c("gray", "blue", "red"),
                     breaks = c("Data", "Seasonally Adjusted", "Trend-cycle"))

#There is a clear positive upward trend 
#The magnitude of the seasonality increases and attained a peak value during 1984 and then it is getting reduced

#- Compare the results with those obtained using SEATS and X11. How are they different?
x11.cangas <- seas(cangas, x11 = "")

autoplot(x11.cangas) +
  ggtitle("Monthly Canadian Gas Production",
          subtitle = "X11 decomposition")

autoplot(cangas, series = "Data") +
  autolayer(seasadj(x11.cangas), series = "Seasonally Adjusted") +
  autolayer(trendcycle(x11.cangas), series = "Trend-cycle") +
  ggtitle("Monthly Canadian gas production(X11 decomposition)") +
  ylab(expression(paste("Gas production (x", 10^{9}, m^{3}, ")"))) +
  scale_color_manual(values = c("gray", "blue", "red"),
                     breaks = c("Data", "Seasonally Adjusted", "Trend-cycle"))

seats.cangas <- seas(cangas)
autoplot(seats.cangas) +
  ggtitle("Monthly Canadian Gas Production",
          subtitle = "SEATS decomposition")

autoplot(cangas, series = "Data") +
  autolayer(seasadj(seats.cangas), series = "Seasonally Adjusted") +
  autolayer(trendcycle(seats.cangas), series = "Trend-cycle") +
  ggtitle("Monthly Canadian gas production(SEATS decomposition)") +
  ylab(expression(paste("Gas production (x", 10^{9}, m^{3}, ")"))) +
  scale_color_manual(values = c("gray", "blue", "red"),
                     breaks = c("Data", "Seasonally Adjusted", "Trend-cycle"))

x11.cangas %>% seasonal() %>% ggsubseriesplot() + ylab("Seasonal")

seats.cangas %>% seasonal() %>% ggsubseriesplot() + ylab("Seasonal")

#X11 and SEATS perform almost similar.
#We could see a seasonal pattern in remainder component which tells the ineffectiveness of finding the fluctuating seasonality in the data for both the model.

Question3

This exercise uses the oil data (Annual Oil Production of Saudi Arabia 1965-2013).

Decompose the series using different approaches described in the lecture.
Write a paragraph describing the results of each decomposition.

autoplot(oil)

ts.oil<- ts(oil, frequency = 12, start = c(1965,3))
#Classical  Method
decompose.ts.oil <- decompose(ts.oil,
                                type = "multiplicative") 
autoplot(decompose.ts.oil)

autoplot(ts.oil, series="Data") +
  autolayer(trendcycle(decompose.ts.oil), series="Trend") +
  autolayer(seasadj(decompose.ts.oil), series="Seasonally Adjusted") +
  xlab("Year") + ylab("Oil Production amount") +
  ggtitle("Annual Oil Production of Saudi Arabia") +
  scale_colour_manual(values=c("gray","blue","red"),
                     breaks=c("Data","Seasonally Adjusted","Trend"))

## Warning: Removed 12 rows containing missing values (geom_path).

#X11 METHOD
X11.ts.oil <- seas(ts.oil, x11 = "")
autoplot(X11.ts.oil)

autoplot(ts.oil, series="Data") +
  autolayer(trendcycle(X11.ts.oil), series="Trend") +
  autolayer(seasadj(X11.ts.oil), series="Seasonally Adjusted") +
  xlab("Year") + ylab("Oil Production amount") +
  ggtitle("Annual Oil Production of Saudi Arabia") +
  scale_colour_manual(values=c("gray","blue","red"),
                     breaks=c("Data","Seasonally Adjusted","Trend"))

#SEATS METHOD

seats.ts.oil <- seas(ts.oil)
autoplot(seats.ts.oil)

autoplot(ts.oil, series="Data") +
  autolayer(trendcycle(seats.ts.oil), series="Trend") +
  autolayer(seasadj(seats.ts.oil), series="Seasonally Adjusted") +
  xlab("Year") + ylab("Oil Production amount") +
  ggtitle("Annual Oil Production of Saudi Arabia") +
  scale_colour_manual(values=c("gray","blue","red"),
                     breaks=c("Data","Seasonally Adjusted","Trend"))

#STL METHOD
stl.ts.oil <- stl(ts.oil, s.window = 13, robust = TRUE)
autoplot(stl.ts.oil)

autoplot(ts.oil, series="Data") +
  autolayer(trendcycle(stl.ts.oil), series="Trend") +
  autolayer(seasadj(stl.ts.oil), series="Seasonally Adjusted") +
  xlab("Year") + ylab("Oil Production amount") +
  ggtitle("Annual Oil Production of Saudi Arabia") +
  scale_colour_manual(values=c("gray","blue","red"),
                     breaks=c("Data","Seasonally Adjusted","Trend"))

#By seeing the classical multliplicative decomposition method, trend line looks to be over smoothed with the data. There seems to be any seasonality trend or is unclear

#By seeing the X11 method,trend is smoother and seasonality component is consistent over time.
#By seeing the seats method,trend cycle is smooth with the data.Seasonality trend is precise
#By seeing the STL method,trend line looks to be over smoothed.
#Comparing all the models, Seats method seems to be good over models.

Question4

The plastics data set consists of the monthly sales (in thousands) of product A for a plastics manufacturer for five years.

#- Plot the time series of sales of product A. Can you identify seasonal fluctuations and/or a trend-cycle?
?plastics

## starting httpd help server ... done

autoplot(plastics) +
  ggtitle("Monthly sales of product A for a plastics manufacturer") +
  ylab("Sale") +
  xlab("Year")

ggsubseriesplot(plastics)

ggseasonplot(plastics)

#Autoplot shows that there is seasonality in the data along with a upward trend

#- Use a classical multiplicative decomposition to calculate the trend-cycle and seasonal indices.
decompose.plastics <- decompose(plastics,
                                type = "multiplicative")
autoplot(decompose.plastics)

# Do the results support the graphical interpretation from part (a)?
#Yeah

#- Compute and plot the seasonally adjusted data.
autoplot(plastics, series="Data") +
  autolayer(trendcycle(decompose.plastics), series="Trend") +
  autolayer(seasadj(decompose.plastics), series="Seasonally Adjusted") +
  xlab("Year") + ylab("Sales") +
  ggtitle("Monthly sales of product A for a plastics manufacturer") +
  scale_colour_manual(values=c("gray","blue","red"),
                     breaks=c("Data","Seasonally Adjusted","Trend"))

## Warning: Removed 12 rows containing missing values (geom_path).

#- Change one observation to be an outlier (e.g., add 500 to one observation), and recompute the seasonally adjusted data. What is the effect of the outlier?
plastics_test <- plastics
plastics_test[20] <- plastics_test[20] + 500
decompose.plastics_test<- decompose(
  plastics_test,
  type = "multiplicative"
  )
autoplot(plastics_test, series = "Data") +
  autolayer(trendcycle(decompose.plastics_test),
            series = "Trend") +
  autolayer(seasadj(decompose.plastics_test),
            series = "Seasonally Adjusted") +
  xlab("Year") + ylab("Sales") +
  ggtitle("Monthly sales of product A for a plastics manufacturer") +
  scale_colour_manual(values=c("gray","blue","red"),
                     breaks=c("Data","Seasonally Adjusted","Trend"))

## Warning: Removed 12 rows containing missing values (geom_path).

#- Does it make any difference if the outlier is near the end rather than in the middle of the time series?
length(plastics)

## [1] 60

plastics_test_far <- plastics
plastics_test_far[58] <- plastics_test_far[58] + 500
decompose.plastics_test_far<- decompose(
  plastics_test_far,
  type = "multiplicative"
  )
autoplot(plastics_test_far, series = "Data") +
  autolayer(trendcycle(decompose.plastics_test_far),
            series = "Trend") +
  autolayer(seasadj(decompose.plastics_test_far),
            series = "Seasonally Adjusted") +
  xlab("Year") + ylab("Sales") +
  ggtitle("Monthly sales of product A for a plastics manufacturer") +
  scale_colour_manual(values=c("gray","blue","red"),
                     breaks=c("Data","Seasonally Adjusted","Trend"))

## Warning: Removed 12 rows containing missing values (geom_path).

#In both the series where adding an outlier at middle and end , has overall same impact.However middle seems to be having more negative trend at the end compared to end series.
#

Question5

This exercise uses the stfl() function. (A short-cut approach to decompose a time series using STL, forecast the seasonally adjusted series, and return the reseasonalised forecasts. The stlf() function uses mstl() to carry out the decomposition, so there are default values for s.window and t.window. Note: Forecasts using seasonally adjusted data need to be “reseasonalised” by adding in the forecasts of the seasonal component. The stfl() function will return “reseasonalised” data.)

#- Use stlf() to produce forecasts of the fancy series with either method="naive" or method="rwdrift", whichever is most appropriate. Use the lambda argument if you think a Box-Cox transformation is required.
?fancy
length(fancy)

## [1] 84

fancy.train <- head(fancy,72)
fancy.test <- tail(fancy,12)
autoplot(fancy)

naive.fst <- stlf(fancy.train,t.window=13,lambda = BoxCox.lambda(fancy.train),s.window="periodic",robust=TRUE,method="naive",h=12)
accuracy(naive.fst,fancy.test)

##                     ME     RMSE      MAE       MPE     MAPE      MASE
## Training set  283.8252 1953.122 1338.634  1.289209 13.06636 0.3574648
## Test set     3254.8402 8615.015 6358.954 -0.219011 21.51689 1.6980754
##                    ACF1 Theil's U
## Training set -0.3804074        NA
## Test set      0.4765812 0.6127006

autoplot(naive.fst) + ylab("Sale") + ggtitle("Forecat Sales for a souvenir shop with naive")

drift.fst <- stlf(fancy.train,t.window=13,lambda = BoxCox.lambda(fancy.train),s.window="periodic",robust=TRUE,method="rwdrift",h=12)
accuracy(drift.fst,fancy.test)

##                       ME     RMSE      MAE       MPE     MAPE      MASE
## Training set   -48.41344 1956.709 1317.925  -1.58398 13.19986 0.3519345
## Test set     -3653.55795 4856.813 4172.609 -18.44589 20.20618 1.1142406
##                    ACF1 Theil's U
## Training set -0.3839125        NA
## Test set      0.4928380 0.6209322

autoplot(drift.fst) + ylab("Sales") + ggtitle("Forecat Sales for a souvenir shop with Drift")

#Here inorder to compare two methods, i splitted the dataset into training and test dataset. We have got data for 84 months and i choose last 12 month as testing and remaining as training set.
#By comparing the rmse of both the models, found out that drift method is performing well for the test dataset even though performance is similar for training set.
#Also the prediction interval is high for naive than drift.
#Used Box-Cox transformation to make the variations normal as variations are high if we use without boxcox

#- Use stlf() to produce forecasts of the writing series with either method="naive" or method="rwdrift", whichever is most appropriate. Use the lambda argument if you think a Box-Cox transformation is required.

?writing
length(writing)

## [1] 120

autoplot(writing)

writing.train <- head(writing,96)
writing.test <- tail(writing,24)


naive.fst <- stlf(writing.train,t.window=13,lambda = BoxCox.lambda(writing.train),s.window="periodic",robust=TRUE,method="naive",h=24)
accuracy(naive.fst,writing.test)

##                     ME     RMSE      MAE       MPE     MAPE      MASE
## Training set  2.165153 44.66528 31.56315 0.2157835 4.955446 0.6932027
## Test set     53.865532 77.72801 62.76899 6.1595370 7.262321 1.3785582
##                    ACF1 Theil's U
## Training set -0.5586706        NA
## Test set      0.1051887 0.2051572

autoplot(naive.fst) + ylab("Sale") + ggtitle("Forecat Sales for printing and writing paper with naive")

drift.fst <- stlf(writing.train,t.window=13,lambda = BoxCox.lambda(writing.train),s.window="periodic",robust=TRUE,method="rwdrift",h=24)
accuracy(drift.fst,writing.test)

##                      ME     RMSE      MAE        MPE     MAPE      MASE
## Training set -0.7628597 44.74414 31.75145 -0.2198297 4.993025 0.6973382
## Test set     11.9435454 46.29615 36.83495  1.1558683 4.321979 0.8089841
##                    ACF1 Theil's U
## Training set -0.5573935        NA
## Test set     -0.2244357  0.104788

autoplot(drift.fst) + ylab("Sales") + ggtitle("Forecat Sales for printing and writing paper with Drift")

#Here inorder to compare two methods, i splitted the dataset into training and test dataset. We have got data for 120 months and i choose last 24 month as testing and remaining as training set.
#By comparing the rmse of both the models, found out that drift method is performing well for the test dataset even though performance is similar for training set.
#Also the prediction interval is high for drift than naive
#Here there is an increasing trend in data and hence Used Box-Cox transformation to make the size of the seasonal variations normal as variations are high if we use without boxcox

DSCI-FXP-SP19- Assignment #4 Time Series Decomposition Nidhin

Nidhin

28 Feb 2019

Question 1

Question 2

Question3

Question4

Question5