Textbook: Hyndman, R.J., & Athanasopoulos, G. (2018) Forecasting: principles and practice, 2nd edition, OTexts: Melbourne, Australia.
# load packages
library(fpp2)
library(ggplot2)
help(gold)
Description Daily morning gold prices in US dollars. 1 January 1985 – 31 March 1989.
help(woolyrnq)
Description Quarterly production of woollen yarn in Australia: tonnes. Mar 1965 – Sep 1994.
help(gas)
Description Australian monthly gas production: 1956–1995.
Gold
autoplot(gold) +
labs(title = "Daily Morning Gold Prices", subtitle = "1 Jan 1985 – 31 Mar 1989")+
xlab("No of days") +
ylab("USD")+
theme_minimal()+
theme(plot.title = element_text(hjust = 0.5), plot.subtitle = element_text(hjust = 0.5))+
scale_y_continuous()
Woolyrnq
autoplot(woolyrnq) +
labs(title = "Quarterly Production woollen yarn Australia", subtitle = "Mar 1965 – Sep 1994")+
xlab("Year") +
ylab("tonnes")+
theme_minimal()+
theme(plot.title = element_text(hjust = 0.5), plot.subtitle = element_text(hjust = 0.5))
Gas
autoplot(gas) +
labs(title = "Australian monthly gas production", subtitle = "1956–1995")+
xlab("Year") +
ylab("Amount")+
theme_minimal()+
theme(plot.title = element_text(hjust = 0.5), plot.subtitle = element_text(hjust = 0.5))
sprintf('The frequency for the Gold time series data is %d (annual).', frequency(gold))
## [1] "The frequency for the Gold time series data is 1 (annual)."
sprintf("The frequency for the Woollen Yarn time series data is %d (quarterly).", frequency(woolyrnq))
## [1] "The frequency for the Woollen Yarn time series data is 4 (quarterly)."
sprintf("The frequency for the Gas time series data is %d (monthly).", frequency(gas))
## [1] "The frequency for the Gas time series data is 12 (monthly)."
c) Use which.max() to spot the outlier in the gold series. Which observation was it?
sprintf("A possible outlier in the gold series occurs on day %d, with a price of US$ %.2f.", which.max(gold), gold[which.max(gold)])
## [1] "A possible outlier in the gold series occurs on day 770, with a price of US$ 593.70."
tute1 <- read.csv("tute1.csv", header=TRUE)
head(tute1)
## X Sales AdBudget GDP
## 1 Mar-81 1020.2 659.2 251.8
## 2 Jun-81 889.2 589.0 290.9
## 3 Sep-81 795.0 512.5 290.8
## 4 Dec-81 1003.9 614.1 292.4
## 5 Mar-82 1057.7 647.2 279.1
## 6 Jun-82 944.4 602.0 254.0
mytimeseries <- ts(tute1[,-1], start=1981, frequency=4)
(The [,-1] removes the first column which contains the quarters as we don’t need them now.)
autoplot(mytimeseries, facets=TRUE)
Check what happens when you don’t include facets=TRUE.
autoplot(mytimeseries)
All three are plotted on the same y axis. The seasonality in GDP is much harder to grasp in this case. The plots ate separated by colors as shown in the legend. I believe the plot where facets=TRUE shows the fluctuations and outlines the scales more clearly.
retaildata <- readxl::read_excel("retail.xlsx", skip=1)
head(retaildata)
## # A tibble: 6 x 190
## `Series ID` A3349335T A3349627V A3349338X A3349398A A3349468W
## <dttm> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1982-04-01 00:00:00 303. 41.7 63.9 409. 65.8
## 2 1982-05-01 00:00:00 298. 43.1 64 405. 65.8
## 3 1982-06-01 00:00:00 298 40.3 62.7 401 62.3
## 4 1982-07-01 00:00:00 308. 40.9 65.6 414. 68.2
## 5 1982-08-01 00:00:00 299. 42.1 62.6 404. 66
## 6 1982-09-01 00:00:00 305. 42 64.4 412. 62.3
## # ... with 184 more variables: A3349336V <dbl>, A3349337W <dbl>,
## # A3349397X <dbl>, A3349399C <dbl>, A3349874C <dbl>, A3349871W <dbl>,
## # A3349790V <dbl>, A3349556W <dbl>, A3349791W <dbl>, A3349401C <dbl>,
## # A3349873A <dbl>, A3349872X <dbl>, A3349709X <dbl>, A3349792X <dbl>,
## # A3349789K <dbl>, A3349555V <dbl>, A3349565X <dbl>, A3349414R <dbl>,
## # A3349799R <dbl>, A3349642T <dbl>, A3349413L <dbl>, A3349564W <dbl>,
## # A3349416V <dbl>, A3349643V <dbl>, A3349483V <dbl>, A3349722T <dbl>,
## # A3349727C <dbl>, A3349641R <dbl>, A3349639C <dbl>, A3349415T <dbl>,
## # A3349349F <dbl>, A3349563V <dbl>, A3349350R <dbl>, A3349640L <dbl>,
## # A3349566A <dbl>, A3349417W <dbl>, A3349352V <dbl>, A3349882C <dbl>,
## # A3349561R <dbl>, A3349883F <dbl>, A3349721R <dbl>, A3349478A <dbl>,
## # A3349637X <dbl>, A3349479C <dbl>, A3349797K <dbl>, A3349477X <dbl>,
## # A3349719C <dbl>, A3349884J <dbl>, A3349562T <dbl>, A3349348C <dbl>,
## # A3349480L <dbl>, A3349476W <dbl>, A3349881A <dbl>, A3349410F <dbl>,
## # A3349481R <dbl>, A3349718A <dbl>, A3349411J <dbl>, A3349638A <dbl>,
## # A3349654A <dbl>, A3349499L <dbl>, A3349902A <dbl>, A3349432V <dbl>,
## # A3349656F <dbl>, A3349361W <dbl>, A3349501L <dbl>, A3349503T <dbl>,
## # A3349360V <dbl>, A3349903C <dbl>, A3349905J <dbl>, A3349658K <dbl>,
## # A3349575C <dbl>, A3349428C <dbl>, A3349500K <dbl>, A3349577J <dbl>,
## # A3349433W <dbl>, A3349576F <dbl>, A3349574A <dbl>, A3349816F <dbl>,
## # A3349815C <dbl>, A3349744F <dbl>, A3349823C <dbl>, A3349508C <dbl>,
## # A3349742A <dbl>, A3349661X <dbl>, A3349660W <dbl>, A3349909T <dbl>,
## # A3349824F <dbl>, A3349507A <dbl>, A3349580W <dbl>, A3349825J <dbl>,
## # A3349434X <dbl>, A3349822A <dbl>, A3349821X <dbl>, A3349581X <dbl>,
## # A3349908R <dbl>, A3349743C <dbl>, A3349910A <dbl>, A3349435A <dbl>,
## # A3349365F <dbl>, A3349746K <dbl>, ...
The second argument (skip=1) is required because the Excel sheet has two header rows.
myts <- ts(retaildata[,"A3349873A"],
frequency=12, start=c(1982,4))
head(myts, 20)
## Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec
## 1982 62.4 63.1 59.6 61.9 60.7 61.2 62.1 68.3 104.0
## 1983 63.9 64.8 70.0 65.3 68.9 65.7 66.9 70.4 71.6 74.9 83.4
autoplot(), ggseasonplot(), ggsubseriesplot(), gglagplot(), ggAcf()
autoplot(myts)+
labs(title = "Autoplot for Retail Sales ")+
xlab("Year") +
ylab("Sales")+
theme_minimal()+
theme(plot.title = element_text(hjust = 0.5))
ggseasonplot(myts)+
labs(title = "Seasonplot for Retail Sales ")+
xlab("Year") +
ylab("Sales")+
theme_minimal()+
theme(plot.title = element_text(hjust = 0.5))
ggsubseriesplot(myts)+
labs(title = "Subseries plot for Retail Sales ")+
xlab("Year") +
ylab("Sales")+
theme_minimal()+
theme(plot.title = element_text(hjust = 0.5))
gglagplot(myts)+
labs(title = "Lagplot for Retail Sales ")+
xlab("Year") +
ylab("Sales")+
theme_minimal()+
theme(plot.title = element_text(hjust = 0.5))
ggAcf(myts)+
labs(title = "ACF Retail Sales ")+
xlab("Year") +
ylab("Sales")+
theme_minimal()+
theme(plot.title = element_text(hjust = 0.5))
The autoplot shows an uptrend in sales.
The seasonal plot shows seasonality in the months March, May, and July, and in the quarter Oct-Dec, when sales tend to go up compared to the rest of the months.
The subseries plot shows that there are more sales in November and significantly higher mean sales in DEcmeber.
The lagplot shows that there is a strong upward positive reationship at lag 12.
The acf plot shows a slow decrease in the ACF as the lags increase is due to the trend, while the “scalloped” shape is due the seasonality.R12 and R24 are much higher than the other lags as Decemeber sales are significantly higher.
autoplot(), ggseasonplot(), ggsubseriesplot(), gglagplot(), ggAcf() and explore features from the following time series: hsales, usdeaths, bricksq, sunspotarea, gasoline.hsales-Sales of one-family houses
autoplot(hsales)+
labs(title = "Autoplot ")+
xlab("Year") +
ylab("Sales")+
theme_minimal()+
theme(plot.title = element_text(hjust = 0.5))
ggseasonplot(hsales)+
labs(title = "Seasonplot ")+
xlab("Year") +
ylab("Sales")+
theme_minimal()+
theme(plot.title = element_text(hjust = 0.5))
ggsubseriesplot(hsales)+
labs(title = "Subseries plot ")+
xlab("Year") +
ylab("Sales")+
theme_minimal()+
theme(plot.title = element_text(hjust = 0.5))
gglagplot(hsales)+
labs(title = "Lagplot ")+
xlab("Year") +
ylab("Sales")+
theme_minimal()+
theme(plot.title = element_text(hjust = 0.5))
ggAcf(hsales)+
labs(title = "ACF Plot ")+
xlab("Year") +
ylab("Sales")+
theme_minimal()+
theme(plot.title = element_text(hjust = 0.5))
The data shows both seasonal and cyclic patterns,but there is no evidence of a trend. The seasonality peaks in March, and tapers off until July. The autoplot shows a couple of down cycles in the early 80s and 90s, and up cycles in late 70s, 80s and 90s. The lag plots show that there is a positive autocorrelation in lag 1 only.The Acf shows this is not a white noise series with a lot of the bars going beyond the autocorrelation bounds.
usdeaths - Accidental deaths in USA
autoplot(usdeaths)+
labs(title = "Autoplot ")+
xlab("Year") +
ylab("No of deaths")+
theme_minimal()+
theme(plot.title = element_text(hjust = 0.5))
ggseasonplot(usdeaths)+
labs(title = "Seasonplot ")+
xlab("Year") +
ylab("No of deaths")+
theme_minimal()+
theme(plot.title = element_text(hjust = 0.5))
ggsubseriesplot(usdeaths)+
labs(title = "Subseries plot ")+
xlab("Year") +
ylab("No of deaths")+
theme_minimal()+
theme(plot.title = element_text(hjust = 0.5))
gglagplot(usdeaths)+
labs(title = "Lagplot ")+
xlab("Year") +
ylab("No of deaths")+
theme_minimal()+
theme(plot.title = element_text(hjust = 0.5))
ggAcf(usdeaths)+
labs(title = "ACF Plot ")+
xlab("Year") +
ylab("No of deaths")+
theme_minimal()+
theme(plot.title = element_text(hjust = 0.5))
There is strong seasonality, but no trend and cyclicity can be seen. There seems to be a spike in deaths during the middle of the year. The lag plots show that there is a strong, positive autocorrelation in lag 12. The Acf shows 15 out of 24 lags beyond the autocorrelation boundaries so this is not a white noise series.
bricksq - Quarterly clay brick production
autoplot(bricksq)+
labs(title = "Autoplot ")+
xlab("Year") +
ylab("No of units produced")+
theme_minimal()+
theme(plot.title = element_text(hjust = 0.5))
ggseasonplot(bricksq)+
labs(title = "Seasonplot ")+
xlab("Year") +
ylab("No of units produced")+
theme_minimal()+
theme(plot.title = element_text(hjust = 0.5))
ggsubseriesplot(bricksq)+
labs(title = "Subseries plot ")+
xlab("Year") +
ylab("No of units produced")+
theme_minimal()+
theme(plot.title = element_text(hjust = 0.5))
gglagplot(bricksq)+
labs(title = "Lagplot ")+
xlab("Year") +
ylab("No of units produced")+
theme_minimal()+
theme(plot.title = element_text(hjust = 0.5))
ggAcf(bricksq)+
labs(title = "ACF Plot ")+
xlab("Year") +
ylab("No of units produced")+
theme_minimal()+
theme(plot.title = element_text(hjust = 0.5))
There is an upward trend and we can see dips every 3-5 years suggesting a cyclic pattern. There is seasonality with Q1 having the lowest and Q3 having the highest production levels. The data exhibits strong autocorrelation in lags 1 and 4. The Acf shows only positive autocorrelation for the lags, and the slow decrease in the ACF is due to the trend.The scallops are due to the seasonality.
sunspotarea - Annual average sunspot area
autoplot(sunspotarea)+
labs(title = "Autoplot ")+
xlab("Year") +
ylab("Area")+
theme_minimal()+
theme(plot.title = element_text(hjust = 0.5))
#ggseasonplot(sunspotarea)+
# labs(title = "Seasonplot")
#xlab("Year") +
#ylab("Area")+
#theme_minimal()+
#theme(plot.title = element_text(hjust = 0.5))
#ggsubseriesplot(sunspotarea)+
# labs(title = "Subseries plot ")+
# xlab("Year") +
#ylab("Area")+
#theme_minimal()+
#theme(plot.title = element_text(hjust = 0.5))
gglagplot(sunspotarea)+
labs(title = "Lagplot ")+
xlab("Year") +
ylab("Area")+
theme_minimal()+
theme(plot.title = element_text(hjust = 0.5))
ggAcf(sunspotarea)+
labs(title = "ACF Plot ")+
xlab("Year") +
ylab("Area")+
theme_minimal()+
theme(plot.title = element_text(hjust = 0.5))
There is no indication of seasonality. Because of how the data was recorded, the seasonal and subseries plots could not be plotted. There is no clear trend but there seems to be upward and downward swings occurring over a period of 11 years.. The ACF shows this is not a white noise series.
gasoline - US finished motor gasoline product supplied
autoplot(gasoline)+
labs(title = "Autoplot ")+
xlab("Week") +
ylab("No of units Supplied")+
theme_minimal()+
theme(plot.title = element_text(hjust = 0.5))
ggseasonplot(gasoline)+
labs(title = "Seasonplot ")+
xlab("Week") +
ylab("No of units Supplied")+
theme_minimal()+
theme(plot.title = element_text(hjust = 0.5))
#ggsubseriesplot(gasoline)+
# labs(title = "Subseries plot ")+
# xlab("Week") +
#ylab("No of units Supplied")+
#theme_minimal()+
#theme(plot.title = element_text(hjust = 0.5))
gglagplot(gasoline)+
labs(title = "Lagplot ")+
xlab("Week") +
ylab("No of units Supplied")+
theme_minimal()+
theme(plot.title = element_text(hjust = 0.5))
ggAcf(gasoline)+
labs(title = "ACF Plot ")+
xlab("Week") +
ylab("No of units Supplied")+
theme_minimal()+
theme(plot.title = element_text(hjust = 0.5))
The data exhibits a clear upward trend. The seasonal plot shows some seasonality depending on the week of the year, there is peak supply during the summer months with drop offs in the late fall and winter months. There is no noticeable cyclic pattern. The Acf shows this is not a white noise series. There are scallops in the Acf depicting the seasonality.