library(fpp2)
## Warning: package 'fpp2' was built under R version 3.5.2
## Loading required package: ggplot2
## Loading required package: forecast
## Warning: package 'forecast' was built under R version 3.5.2
## Loading required package: fma
## Warning: package 'fma' was built under R version 3.5.2
## Loading required package: expsmooth
## Warning: package 'expsmooth' was built under R version 3.5.2
library(seasonal)
## Warning: package 'seasonal' was built under R version 3.5.2
library(readxl)
## Warning: package 'readxl' was built under R version 3.5.1
raw_data <- read_excel("C:\\Users\\nidhi\\Downloads\\retail.xlsx")
my_data <- raw_data[,2]
my_data <-my_data[-1,]
Turnover <- ts(my_data, start = c(1982, 4), frequency=12)
autoplot(Turnover) +
ggtitle("New SouthWales SuperMarket & Grocery Stores Turnover")+
ylab("Sale in Thousand $") +
xlab("Year")
Turnover %>% seas(x11="") -> fit.retail
autoplot(fit.retail) +
ggtitle("X11 decomposition of Turnover New Southwales SuperMarket & Grocery")
fit.retail %>% seasonal() %>% ggsubseriesplot() + ylab("Seasonal")
#It doesn't show any outliers. Here trend is increasing over time and magnitude of seasonality is reducing over a period of time.
#- Plot the data using autoplot(), ggsubseriesplot() and ggseasonplot() to look at the effect of the changing seasonality over time. What do you think is causing it to change so much?
#?cangas
autoplot(cangas)
ggsubseriesplot(cangas)
ggseasonplot(cangas)
#There is a clear positive increasing trend in gas production over the period of time by seeing the autoplot
#During winter, gas production is attained high values compared to those at summer
#- Do an STL decomposition of the data. You will need to choose s.window to allow for the changing shape of the seasonal component.
stl.cangas <- stl(cangas, s.window = 13, robust = TRUE)
autoplot(stl.cangas) +
ggtitle("Monthly Canadian Gas Production",
subtitle = "STL decomposition")
autoplot(cangas, series = "Data") +
autolayer(seasadj(stl.cangas), series = "Seasonally Adjusted") +
autolayer(trendcycle(stl.cangas), series = "Trend-cycle") +
ggtitle("Monthly Canadian gas production(STL decomposition)") +
ylab(expression(paste("Gas production (x", 10^{9}, m^{3}, ")"))) +
scale_color_manual(values = c("gray", "blue", "red"),
breaks = c("Data", "Seasonally Adjusted", "Trend-cycle"))
#There is a clear positive upward trend
#The magnitude of the seasonality increases and attained a peak value during 1984 and then it is getting reduced
#- Compare the results with those obtained using SEATS and X11. How are they different?
x11.cangas <- seas(cangas, x11 = "")
autoplot(x11.cangas) +
ggtitle("Monthly Canadian Gas Production",
subtitle = "X11 decomposition")
autoplot(cangas, series = "Data") +
autolayer(seasadj(x11.cangas), series = "Seasonally Adjusted") +
autolayer(trendcycle(x11.cangas), series = "Trend-cycle") +
ggtitle("Monthly Canadian gas production(X11 decomposition)") +
ylab(expression(paste("Gas production (x", 10^{9}, m^{3}, ")"))) +
scale_color_manual(values = c("gray", "blue", "red"),
breaks = c("Data", "Seasonally Adjusted", "Trend-cycle"))
seats.cangas <- seas(cangas)
autoplot(seats.cangas) +
ggtitle("Monthly Canadian Gas Production",
subtitle = "SEATS decomposition")
autoplot(cangas, series = "Data") +
autolayer(seasadj(seats.cangas), series = "Seasonally Adjusted") +
autolayer(trendcycle(seats.cangas), series = "Trend-cycle") +
ggtitle("Monthly Canadian gas production(SEATS decomposition)") +
ylab(expression(paste("Gas production (x", 10^{9}, m^{3}, ")"))) +
scale_color_manual(values = c("gray", "blue", "red"),
breaks = c("Data", "Seasonally Adjusted", "Trend-cycle"))
x11.cangas %>% seasonal() %>% ggsubseriesplot() + ylab("Seasonal")
seats.cangas %>% seasonal() %>% ggsubseriesplot() + ylab("Seasonal")
#X11 and SEATS perform almost similar.
#We could see a seasonal pattern in remainder component which tells the ineffectiveness of finding the fluctuating seasonality in the data for both the model.
This exercise uses the oil data (Annual Oil Production of Saudi Arabia 1965-2013).
autoplot(oil)
ts.oil<- ts(oil, frequency = 12, start = c(1965,3))
#Classical Method
decompose.ts.oil <- decompose(ts.oil,
type = "multiplicative")
autoplot(decompose.ts.oil)
autoplot(ts.oil, series="Data") +
autolayer(trendcycle(decompose.ts.oil), series="Trend") +
autolayer(seasadj(decompose.ts.oil), series="Seasonally Adjusted") +
xlab("Year") + ylab("Oil Production amount") +
ggtitle("Annual Oil Production of Saudi Arabia") +
scale_colour_manual(values=c("gray","blue","red"),
breaks=c("Data","Seasonally Adjusted","Trend"))
## Warning: Removed 12 rows containing missing values (geom_path).
#X11 METHOD
X11.ts.oil <- seas(ts.oil, x11 = "")
autoplot(X11.ts.oil)
autoplot(ts.oil, series="Data") +
autolayer(trendcycle(X11.ts.oil), series="Trend") +
autolayer(seasadj(X11.ts.oil), series="Seasonally Adjusted") +
xlab("Year") + ylab("Oil Production amount") +
ggtitle("Annual Oil Production of Saudi Arabia") +
scale_colour_manual(values=c("gray","blue","red"),
breaks=c("Data","Seasonally Adjusted","Trend"))
#SEATS METHOD
seats.ts.oil <- seas(ts.oil)
autoplot(seats.ts.oil)
autoplot(ts.oil, series="Data") +
autolayer(trendcycle(seats.ts.oil), series="Trend") +
autolayer(seasadj(seats.ts.oil), series="Seasonally Adjusted") +
xlab("Year") + ylab("Oil Production amount") +
ggtitle("Annual Oil Production of Saudi Arabia") +
scale_colour_manual(values=c("gray","blue","red"),
breaks=c("Data","Seasonally Adjusted","Trend"))
#STL METHOD
stl.ts.oil <- stl(ts.oil, s.window = 13, robust = TRUE)
autoplot(stl.ts.oil)
autoplot(ts.oil, series="Data") +
autolayer(trendcycle(stl.ts.oil), series="Trend") +
autolayer(seasadj(stl.ts.oil), series="Seasonally Adjusted") +
xlab("Year") + ylab("Oil Production amount") +
ggtitle("Annual Oil Production of Saudi Arabia") +
scale_colour_manual(values=c("gray","blue","red"),
breaks=c("Data","Seasonally Adjusted","Trend"))
#By seeing the classical multliplicative decomposition method, trend line looks to be over smoothed with the data. There seems to be any seasonality trend or is unclear
#By seeing the X11 method,trend is smoother and seasonality component is consistent over time.
#By seeing the seats method,trend cycle is smooth with the data.Seasonality trend is precise
#By seeing the STL method,trend line looks to be over smoothed.
#Comparing all the models, Seats method seems to be good over models.
#- Plot the time series of sales of product A. Can you identify seasonal fluctuations and/or a trend-cycle?
?plastics
## starting httpd help server ... done
autoplot(plastics) +
ggtitle("Monthly sales of product A for a plastics manufacturer") +
ylab("Sale") +
xlab("Year")
ggsubseriesplot(plastics)
ggseasonplot(plastics)
#Autoplot shows that there is seasonality in the data along with a upward trend
#- Use a classical multiplicative decomposition to calculate the trend-cycle and seasonal indices.
decompose.plastics <- decompose(plastics,
type = "multiplicative")
autoplot(decompose.plastics)
# Do the results support the graphical interpretation from part (a)?
#Yeah
#- Compute and plot the seasonally adjusted data.
autoplot(plastics, series="Data") +
autolayer(trendcycle(decompose.plastics), series="Trend") +
autolayer(seasadj(decompose.plastics), series="Seasonally Adjusted") +
xlab("Year") + ylab("Sales") +
ggtitle("Monthly sales of product A for a plastics manufacturer") +
scale_colour_manual(values=c("gray","blue","red"),
breaks=c("Data","Seasonally Adjusted","Trend"))
## Warning: Removed 12 rows containing missing values (geom_path).
#- Change one observation to be an outlier (e.g., add 500 to one observation), and recompute the seasonally adjusted data. What is the effect of the outlier?
plastics_test <- plastics
plastics_test[20] <- plastics_test[20] + 500
decompose.plastics_test<- decompose(
plastics_test,
type = "multiplicative"
)
autoplot(plastics_test, series = "Data") +
autolayer(trendcycle(decompose.plastics_test),
series = "Trend") +
autolayer(seasadj(decompose.plastics_test),
series = "Seasonally Adjusted") +
xlab("Year") + ylab("Sales") +
ggtitle("Monthly sales of product A for a plastics manufacturer") +
scale_colour_manual(values=c("gray","blue","red"),
breaks=c("Data","Seasonally Adjusted","Trend"))
## Warning: Removed 12 rows containing missing values (geom_path).
#- Does it make any difference if the outlier is near the end rather than in the middle of the time series?
length(plastics)
## [1] 60
plastics_test_far <- plastics
plastics_test_far[58] <- plastics_test_far[58] + 500
decompose.plastics_test_far<- decompose(
plastics_test_far,
type = "multiplicative"
)
autoplot(plastics_test_far, series = "Data") +
autolayer(trendcycle(decompose.plastics_test_far),
series = "Trend") +
autolayer(seasadj(decompose.plastics_test_far),
series = "Seasonally Adjusted") +
xlab("Year") + ylab("Sales") +
ggtitle("Monthly sales of product A for a plastics manufacturer") +
scale_colour_manual(values=c("gray","blue","red"),
breaks=c("Data","Seasonally Adjusted","Trend"))
## Warning: Removed 12 rows containing missing values (geom_path).
#In both the series where adding an outlier at middle and end , has overall same impact.However middle seems to be having more negative trend at the end compared to end series.
#
#- Use stlf() to produce forecasts of the fancy series with either method="naive" or method="rwdrift", whichever is most appropriate. Use the lambda argument if you think a Box-Cox transformation is required.
?fancy
length(fancy)
## [1] 84
fancy.train <- head(fancy,72)
fancy.test <- tail(fancy,12)
autoplot(fancy)
naive.fst <- stlf(fancy.train,t.window=13,lambda = BoxCox.lambda(fancy.train),s.window="periodic",robust=TRUE,method="naive",h=12)
accuracy(naive.fst,fancy.test)
## ME RMSE MAE MPE MAPE MASE
## Training set 283.8252 1953.122 1338.634 1.289209 13.06636 0.3574648
## Test set 3254.8402 8615.015 6358.954 -0.219011 21.51689 1.6980754
## ACF1 Theil's U
## Training set -0.3804074 NA
## Test set 0.4765812 0.6127006
autoplot(naive.fst) + ylab("Sale") + ggtitle("Forecat Sales for a souvenir shop with naive")
drift.fst <- stlf(fancy.train,t.window=13,lambda = BoxCox.lambda(fancy.train),s.window="periodic",robust=TRUE,method="rwdrift",h=12)
accuracy(drift.fst,fancy.test)
## ME RMSE MAE MPE MAPE MASE
## Training set -48.41344 1956.709 1317.925 -1.58398 13.19986 0.3519345
## Test set -3653.55795 4856.813 4172.609 -18.44589 20.20618 1.1142406
## ACF1 Theil's U
## Training set -0.3839125 NA
## Test set 0.4928380 0.6209322
autoplot(drift.fst) + ylab("Sales") + ggtitle("Forecat Sales for a souvenir shop with Drift")
#Here inorder to compare two methods, i splitted the dataset into training and test dataset. We have got data for 84 months and i choose last 12 month as testing and remaining as training set.
#By comparing the rmse of both the models, found out that drift method is performing well for the test dataset even though performance is similar for training set.
#Also the prediction interval is high for naive than drift.
#Used Box-Cox transformation to make the variations normal as variations are high if we use without boxcox
#- Use stlf() to produce forecasts of the writing series with either method="naive" or method="rwdrift", whichever is most appropriate. Use the lambda argument if you think a Box-Cox transformation is required.
?writing
length(writing)
## [1] 120
autoplot(writing)
writing.train <- head(writing,96)
writing.test <- tail(writing,24)
naive.fst <- stlf(writing.train,t.window=13,lambda = BoxCox.lambda(writing.train),s.window="periodic",robust=TRUE,method="naive",h=24)
accuracy(naive.fst,writing.test)
## ME RMSE MAE MPE MAPE MASE
## Training set 2.165153 44.66528 31.56315 0.2157835 4.955446 0.6932027
## Test set 53.865532 77.72801 62.76899 6.1595370 7.262321 1.3785582
## ACF1 Theil's U
## Training set -0.5586706 NA
## Test set 0.1051887 0.2051572
autoplot(naive.fst) + ylab("Sale") + ggtitle("Forecat Sales for printing and writing paper with naive")
drift.fst <- stlf(writing.train,t.window=13,lambda = BoxCox.lambda(writing.train),s.window="periodic",robust=TRUE,method="rwdrift",h=24)
accuracy(drift.fst,writing.test)
## ME RMSE MAE MPE MAPE MASE
## Training set -0.7628597 44.74414 31.75145 -0.2198297 4.993025 0.6973382
## Test set 11.9435454 46.29615 36.83495 1.1558683 4.321979 0.8089841
## ACF1 Theil's U
## Training set -0.5573935 NA
## Test set -0.2244357 0.104788
autoplot(drift.fst) + ylab("Sales") + ggtitle("Forecat Sales for printing and writing paper with Drift")
#Here inorder to compare two methods, i splitted the dataset into training and test dataset. We have got data for 120 months and i choose last 24 month as testing and remaining as training set.
#By comparing the rmse of both the models, found out that drift method is performing well for the test dataset even though performance is similar for training set.
#Also the prediction interval is high for drift than naive
#Here there is an increasing trend in data and hence Used Box-Cox transformation to make the size of the seasonal variations normal as variations are high if we use without boxcox