library(fpp3)
library(magrittr)
library(tidyverse)
library(patchwork)
library(readxl)
library(flextable)
library(USgas)
Use the help function to explore what the series gafa_stock, PBS, vic_elec and pelt represent.
gafa_stock: Historical stock prices from 2014-2018 for Google, Amazon, Facebook and Apple. All prices are in $USD.
PBS: Monthly Medicare Australia prescription data
elec_vic: half-hourly electricity demand for Victoria, Australia
pelt: Hudson Bay Company trading records for Snowshoe Hare and Canadian Lynx furs from 1845 to 1935.
Use autoplot() to plot some of the series in these data sets.
What is the time interval of each series?
# explore series
help(gafa_stock)
help(PBS)
help(vic_elec)
help(pelt)
# 1.a. Use autoplot() to plot some of the series in these data sets
#plot interannual closing price
<-gafa_stock%>% #closing daily stock price:Amazon
amazonfilter(Symbol=='AMZN')
<-autoplot(amazon, Adj_Close, color='darkblue') +
p1labs(title = "Amazon Adjusted Stock Closing Price",
subtitle = "2014-2018",
y = "Adjusted Closing Price ($)")+
theme_classic()
# plot avg daily demand
<- aggregate(Demand~Date,vic_elec, mean)%>% # avg daily demand: vic_elec
daily_avg as_tsibble()
<-autoplot(daily_avg, Demand, color='darkblue')+
p2labs(title = "Average Daily Electricity Demand in Victoria, Australia",
subtitle = " Year = 2012",
y = "Demand (MW))")+
theme_classic()
/p2) (p1
Use filter() to find what days corresponded to the peak closing price for each of the four stocks in gafa_stock.
AMZN: $2,039.51
GOOG: $1,268.33
AAPL: $232.07
FB: $217.50
# create function and map with purrr to find peak closing dates
# note we need to convert stibble to tibble
<-set_names(c(gafa_stock$Symbol%>%unique()))
company
= function(x) {
f %>%
gafa_stockas_tibble()%>%
filter(Symbol== x)%>%
summarise(maxi=max(Close))%>%
reduce(full_join, by = 'Symbol')
}
<-map_df(company, ~f(.x))%>%pivot_longer(cols = 1:4,names_to = "Company", values_to = "Peak_Close")%>%arrange(desc(Peak_Close))%>%flextable()) (max_close
Company | Peak_Close |
AMZN | 2,039.51 |
GOOG | 1,268.33 |
AAPL | 232.07 |
FB | 217.50 |
# The following is a more streamlined means of finding peak closing prices
#gafa_stock%>%
#as.tibble()%>%
#select(Symbol, Close)%>%
#group_by(Symbol)%>%
#summarise(maxi=max(Close))%>%
#arrange(desc(maxi))%>%
#flextable()
Download the file tute1.csv from the book website, open it in Excel (or some other spreadsheet application), and review its contents.
Without facet_grid the three series are plotted on the same graph with a continuous scale for the y-axis. This compresses the plotted data (i.e., reduced amplitude) and, in turn, reduces an investigators ability to distinguish fine-grained patterns within and across series.
# read data tute1 into R
<- read_csv("tute1.csv")
tute1
# convert tute1 into a time series
<- tute1 %>%
series mutate(Quarter = yearmonth(Quarter)) %>% #tsibble represent year-month
as_tsibble(index = Quarter) # date-times should be declared as index
# Construct time series plots of each of the three series
%>%
series pivot_longer(-Quarter) %>%
ggplot(aes(x = Quarter, y = value, colour = name)) +
geom_line() +
facet_grid(name ~ ., scales = "free_y")
The USgas package contains data on the demand for natural gas in the US.
Note: USgas consists of three data sets:us_monthly, us_total, us_residential.
# create tsibble from us_total with year as the index and state as the key
<- USgas::us_total%>%
total as_tsibble(key=state, index = year)
#Plot the annual natural gas consumption by state for the New England area (comprising the states of Maine, Vermont, New Hampshire, Massachusetts, Connecticut and Rhode Island).
%>%
totalfilter(state %in% c('Maine', 'Vermont', 'New Hampshire', 'Massachusetts', 'Connecticut', 'Rhode Island'))%>%
mutate_at(vars(y),funs(y=y/1000))%>% # data = units of million cubic feet: divide by 1000 for rendering
autoplot()+
labs(title= 'Annual Natural Gas Consumption in New England', y = 'Million cubic feet/1000', x= 'Year')+
theme_classic()+
facet_grid(state ~ ., scales = "free_y")
Download tourism.xlsx from the book website and read it into R using readxl::read_excel().
The combination of Region State with the maximum average number of overnight trips was Melbourne Visiting = 985.
Other combinations are included in Table 5.1 below.
# make tsibble identical to tsibble::tourism
# we need to change Quarter col to Date when importing Excel
<-read_excel('tourism.xlsx')
tourism$Quarter<-as.Date(tourism$Quarter)
tourism
%>%
tourismas_tsibble(key=c(Region, State, Purpose), index = Quarter)%>%
mutate(Quarter = yearquarter(Quarter))
# Combination of Region and Purpose with maximum number of overnight trips on average
aggregate(Trips ~ Region+Purpose, tourism, max)%>%
group_by(Region)%>%
arrange(desc(Trips))%>%
slice(1)%>%
arrange(desc(Trips))%>%
flextable()%>%
set_caption("Table 5.1: Maximum average number of overnight trips")
Region | Purpose | Trips |
Melbourne | Visiting | 985.27840 |
Sydney | Business | 948.12936 |
South Coast | Holiday | 914.77283 |
North Coast NSW | Holiday | 905.84500 |
Brisbane | Visiting | 796.30206 |
Gold Coast | Holiday | 711.03724 |
Sunshine Coast | Holiday | 617.47597 |
Australia's South West | Holiday | 612.08986 |
Great Ocean Road | Holiday | 548.41108 |
Experience Perth | Visiting | 538.26505 |
Peninsula | Holiday | 477.86811 |
Hunter | Holiday | 451.22469 |
Tropical North Queensland | Holiday | 383.39884 |
Snowy Mountains | Holiday | 378.14346 |
High Country | Holiday | 357.75049 |
Phillip Island | Holiday | 315.97665 |
Canberra | Holiday | 299.83128 |
Australia's North West | Business | 296.80234 |
Central NSW | Holiday | 284.65414 |
Central Coast | Holiday | 280.61938 |
Hobart and the South | Holiday | 275.04495 |
Adelaide | Visiting | 269.53562 |
Gippsland | Holiday | 261.86311 |
Geelong and the Bellarine | Holiday | 243.27757 |
Darling Downs | Visiting | 242.53206 |
Central Queensland | Holiday | 214.34825 |
Blue Mountains | Holiday | 206.22652 |
Fleurieu Peninsula | Holiday | 205.15906 |
Lakes | Holiday | 201.34897 |
Australia's Coral Coast | Holiday | 198.17779 |
The Murray | Holiday | 196.55227 |
Bendigo Loddon | Visiting | 190.36415 |
Central Murray | Holiday | 186.54155 |
New England North West | Visiting | 178.95063 |
Australia's Golden Outback | Business | 173.85992 |
Launceston, Tamar and the North | Holiday | 157.96940 |
East Coast | Holiday | 157.58189 |
Flinders Ranges and Outback | Holiday | 156.14313 |
Capital Country | Visiting | 155.99330 |
Northern | Visiting | 150.21740 |
Whitsundays | Holiday | 143.63325 |
Yorke Peninsula | Holiday | 138.03573 |
Riverina | Visiting | 135.58845 |
Darwin | Holiday | 132.99124 |
Mackay | Business | 132.53898 |
Fraser Coast | Holiday | 130.82576 |
Outback | Business | 130.22385 |
Melbourne East | Visiting | 125.18315 |
North West | Holiday | 118.89323 |
Mallee | Holiday | 115.03198 |
Limestone Coast | Holiday | 112.93466 |
Upper Yarra | Holiday | 106.31121 |
Goulburn | Visiting | 105.36416 |
Ballarat | Visiting | 102.81502 |
Central Highlands | Holiday | 99.62681 |
Outback NSW | Holiday | 97.37752 |
Bundaberg | Visiting | 95.28684 |
Eyre Peninsula | Holiday | 94.90047 |
Spa Country | Holiday | 85.73199 |
Katherine Daly | Holiday | 84.97401 |
Lasseter | Holiday | 84.94788 |
Wilderness West | Holiday | 84.88036 |
Kakadu Arnhem | Holiday | 82.49212 |
Murraylands | Holiday | 81.13738 |
Adelaide Hills | Visiting | 81.10211 |
Riverland | Holiday | 78.26580 |
Alice Springs | Holiday | 76.54138 |
Western Grampians | Holiday | 65.64098 |
Macedon | Visiting | 59.78706 |
Barossa | Holiday | 51.00731 |
Wimmera | Visiting | 42.51681 |
Clare Valley | Holiday | 42.23342 |
Murray East | Business | 40.18606 |
Barkly | Holiday | 37.87040 |
Kangaroo Island | Holiday | 36.89924 |
MacDonnell | Holiday | 28.50539 |
# total trips by state
<-aggregate(Trips~Quarter+State, tourism, sum)%>%
trips_statearrange(State)%>%
as_tsibble(key=State, index = Quarter)
#plot total trips for visual assessment
%>%
trips_stateautoplot(.vars=Trips)+
labs(title= 'Total Trips by State: 2000-2015', y = 'Number of Trips', x= 'Year')+
theme_classic()+
facet_grid(State ~ ., scales = "free_y")
Monthly Australian retail data is provided in aus_retail. Select one of the time series as follows (but choose your own seed value):
set.seed(12345678) myseries <- aus_retail %>% filter(Series ID
== sample(aus_retail$Series ID
,1))
Explore your chosen retail time series using the following functions:
Can you spot any seasonality, cyclicity and trend? What do you learn about the series?
See response below following the graphs.
set.seed(124566791)
# load data
<- tsibbledata::aus_retail %>%
series filter(`Series ID` == sample(aus_retail$`Series ID`,1)) #1 specifies number of Series ID groups
# autoplot
<-series%>%
p1autoplot(Turnover)+
labs(title = 'Figure 1. Inter-Annual Turnover in Pharmaceutical, Cosmetic and Toiletry Retail Industry',
subtitle = '1982-2020')+
theme(axis.text.x = element_text(angle = 90))+
theme_classic()
<-series%>%
p2separate(Month, into=c('Year', 'Month'), sep=' ')%>%
mutate(Year = as.numeric(Year))%>%
filter(Year >= 1982)%>%
ggplot(aes(Month, Turnover))+
geom_point(aes( color=Year))+
theme(axis.text.x = element_text(angle = 90))+
labs(title = 'Figure 2. Monthly Turnover in Pharmaceutical, Cosmetic and Toiletry Retail Industry',
subtitle = '1982-2020')+
theme_classic()
# gg_season
<-series%>%
p3gg_season(Turnover)+
labs(title = 'Figure 3. Turnover in Pharmaceutical, Cosmetic and Toiletry Retail Industry',
subtitle='By Month and Year')+
theme_classic()
# gg_subseries
<-series%>%
p4gg_subseries(Turnover)+
labs(title = 'Figure 4. Turnover in Pharmaceutical, Cosmetic and Toiletry Retail Industry',
subtitle='Subplots: Month and Year', x='Year')+
theme_classic()+
theme(axis.text.x = element_text(angle = 90))
# ACF
<-series%>%
p5ACF(Turnover, lag_max=48) %>%
autoplot()+
labs('Figure 5. Autocorrelation: Turnover in Pharmaceutical, Cosmetic and Toiletry Retail Industry')+
theme_classic()
# gg_lag
<-series%>%
p6gg_lag(Turnover, geom = 'point', lags = 1:12)+
labs(title = 'Figure 6. Lag in Turnover in Pharmaceutical, Cosmetic and Toiletry Retail Industry',
x = "lag(Turnover, k)")+
theme_classic()
# print plots
p1
p2
p3
p4
p5
p6
Q8 Summary:
The following patterns are apparent in this series:
Retail turnover ($Million AUD) in the Pharmaceutical, Cosmetic, and Toiletry industry has increased since the early 1980’s, with greater seasonal and inter-annual variation subsequent to ~2008. There is a strong seasonal component with turnover spiking with the end-of-year holiday season, followed by a lull for several months. The possibility of a decadal cycle in the industry (if confirmed) is interesting and warrants further investigation.