# Import required R libraries
library(fpp3)

Exercise 2.1

Use the help function to explore what the series gafa_stock, PBS, vic_elec and pelt represent.

gafa_stock: Historical stock prices from 2014-2018 for Google, Amazon, Facebook and Apple. All prices are in $USD. Contains data on irregular trading days.

PBS: Monthly tsibble with two values, Scripts for total number of scripts and Cost for cost of the scripts in $AUD

vic_elec: Half-hourly tsibble with three values, Demand for total electricity demand in MW, Temperature for the temperature of Melbourne (BOM site 086071) and Holiday for the indicator for if that day is a public holiday.

pelt: Hudson Bay Company trading records for Snowshoe Hare and Canadian Lynx furs from 1845 to 1935. This data contains trade records for all areas of the company.

head(gafa_stock)
## # A tsibble: 6 x 8 [!]
## # Key:       Symbol [1]
##   Symbol Date        Open  High   Low Close Adj_Close    Volume
##   <chr>  <date>     <dbl> <dbl> <dbl> <dbl>     <dbl>     <dbl>
## 1 AAPL   2014-01-02  79.4  79.6  78.9  79.0      67.0  58671200
## 2 AAPL   2014-01-03  79.0  79.1  77.2  77.3      65.5  98116900
## 3 AAPL   2014-01-06  76.8  78.1  76.2  77.7      65.9 103152700
## 4 AAPL   2014-01-07  77.8  78.0  76.8  77.1      65.4  79302300
## 5 AAPL   2014-01-08  77.0  77.9  77.0  77.6      65.8  64632400
## 6 AAPL   2014-01-09  78.1  78.1  76.5  76.6      65.0  69787200
head(PBS)
## # A tsibble: 6 x 9 [1M]
## # Key:       Concession, Type, ATC1, ATC2 [1]
##      Month Concession   Type   ATC1  ATC1_desc    ATC2  ATC2_desc  Scripts  Cost
##      <mth> <chr>        <chr>  <chr> <chr>        <chr> <chr>        <dbl> <dbl>
## 1 1991 Jul Concessional Co-pa… A     Alimentary … A01   STOMATOLO…   18228 67877
## 2 1991 Aug Concessional Co-pa… A     Alimentary … A01   STOMATOLO…   15327 57011
## 3 1991 Sep Concessional Co-pa… A     Alimentary … A01   STOMATOLO…   14775 55020
## 4 1991 Oct Concessional Co-pa… A     Alimentary … A01   STOMATOLO…   15380 57222
## 5 1991 Nov Concessional Co-pa… A     Alimentary … A01   STOMATOLO…   14371 52120
## 6 1991 Dec Concessional Co-pa… A     Alimentary … A01   STOMATOLO…   15028 54299
head(vic_elec)
## # A tsibble: 6 x 5 [30m] <Australia/Melbourne>
##   Time                Demand Temperature Date       Holiday
##   <dttm>               <dbl>       <dbl> <date>     <lgl>  
## 1 2012-01-01 00:00:00  4383.        21.4 2012-01-01 TRUE   
## 2 2012-01-01 00:30:00  4263.        21.0 2012-01-01 TRUE   
## 3 2012-01-01 01:00:00  4049.        20.7 2012-01-01 TRUE   
## 4 2012-01-01 01:30:00  3878.        20.6 2012-01-01 TRUE   
## 5 2012-01-01 02:00:00  4036.        20.4 2012-01-01 TRUE   
## 6 2012-01-01 02:30:00  3866.        20.2 2012-01-01 TRUE
head(pelt)
## # A tsibble: 6 x 3 [1Y]
##    Year  Hare  Lynx
##   <dbl> <dbl> <dbl>
## 1  1845 19580 30090
## 2  1846 19600 45150
## 3  1847 19610 49150
## 4  1848 11990 39520
## 5  1849 28040 21230
## 6  1850 58000  8420

A.

Use autoplot() to plot some of the series in these data sets.

autoplot(gafa_stock, Close) +
  labs(title = "Closing Stock Price",
       subtitle = "Google, Amazon, Facebook, Apple",
       y = "Price")

pbs_a10 <- PBS %>%
  filter(ATC2 == "A10" & Concession == "Concessional" & Type == "Co-payments") %>%
  mutate(Cost = Cost/1e6) %>%
  select(Month, Cost)

autoplot(pbs_a10, Cost) +
  labs(title = "Total cost of Medicare Australia prescriptions by Month",
       subtitle = "Data defined as ATC2 is A10, Concessional and Co-payments",
       y = "Cost in millions")

autoplot(vic_elec, Demand) +
  labs(title = "Electricity Demand",
       subtitle = "Victoria, Australia",
       y = "Megawatts")

autoplot(pelt, Hare) +
  labs(title = "Trading records of furs from 1845 to 1935",
       subtitle = "Hudson Bay Company",
       y = "Pelts Traded")

B.

What is the time interval of each series?

gafa_stock: One day, excluding days the stock market is closed (time interval of [!] indicates irregularity)

PBS: One month

vic_elec: 30 minutes

pelt: One year

Exercise 2.2

Use filter() to find what days corresponded to the peak closing price for each of the four stocks in gafa_stock.

result <- gafa_stock %>% 
             group_by(Symbol) %>%
             filter(Close == max(Close)) %>%
             arrange(desc(Close))
result
## # A tsibble: 4 x 8 [!]
## # Key:       Symbol [4]
## # Groups:    Symbol [4]
##   Symbol Date        Open  High   Low Close Adj_Close   Volume
##   <chr>  <date>     <dbl> <dbl> <dbl> <dbl>     <dbl>    <dbl>
## 1 AMZN   2018-09-04 2026. 2050. 2013  2040.     2040.  5721100
## 2 GOOG   2018-07-26 1251  1270. 1249. 1268.     1268.  2405600
## 3 AAPL   2018-10-03  230.  233.  230.  232.      230. 28654800
## 4 FB     2018-07-25  216.  219.  214.  218.      218. 58954200

Exercise 2.3

Download the file tute1.csv from the book website, open it in Excel (or some other spreadsheet application), and review its contents. You should find four columns of information. Columns B through D each contain a quarterly series, labelled Sales, AdBudget and GDP. Sales contains the quarterly sales for a small company over the period 1981-2005. AdBudget is the advertising budget and GDP is the gross domestic product. All series have been adjusted for inflation.

A.

You can read the data into R with the following script:

tute1 <- readr::read_csv("tute1.csv")
View(tute1)

B.

Convert the data to time series

mytimeseries <- tute1 %>%
  mutate(Quarter = yearmonth(Quarter)) %>%
  as_tsibble(index = Quarter)

head(mytimeseries)
## # A tsibble: 6 x 4 [3M]
##    Quarter Sales AdBudget   GDP
##      <mth> <dbl>    <dbl> <dbl>
## 1 1981 Mar 1020.     659.  252.
## 2 1981 Jun  889.     589   291.
## 3 1981 Sep  795      512.  291.
## 4 1981 Dec 1004.     614.  292.
## 5 1982 Mar 1058.     647.  279.
## 6 1982 Jun  944.     602   254

C.

Construct time series plots of each of the three series

mytimeseries %>%
  pivot_longer(-Quarter) %>%
  ggplot(aes(x = Quarter, y = value, colour = name)) +
  geom_line() +
  facet_grid(name ~ ., scales = "free_y")

Check what happens when you don’t include facet_grid().

As seen below, without facet_grid(), the three time series are all graphed on the same graph instead of 3 individual graphs. I would argue this provides a more contextualized graph of the relationship between the three separate time series. With a shared y-axis, the relationships between the three time series are more evident than the separated graphs.

mytimeseries %>%
  pivot_longer(-Quarter) %>%
  ggplot(aes(x = Quarter, y = value, colour = name)) +
  geom_line()

Exercise 2.4

The USgas package contains data on the demand for natural gas in the US.

A.

Install the USgas package.

library(USgas)

B.

Create a tsibble from us_total with year as the index and state as the key.

ts <- us_total

ts <- ts %>%
  as_tsibble(index = year, key = state)

head(ts)
## # A tsibble: 6 x 3 [1Y]
## # Key:       state [1]
##    year state        y
##   <int> <chr>    <int>
## 1  1997 Alabama 324158
## 2  1998 Alabama 329134
## 3  1999 Alabama 337270
## 4  2000 Alabama 353614
## 5  2001 Alabama 332693
## 6  2002 Alabama 379343

C.

Plot the annual natural gas consumption by state for the New England area (comprising the states of Maine, Vermont, New Hampshire, Massachusetts, Connecticut and Rhode Island).

ne_ts <- ts %>%
  filter(state == 'Maine' |
           state == 'Vermont' |
           state == 'New Hampshire' |
           state == 'Massachusetts' |
           state == 'Connecticut' |
           state == 'Rhode Island') %>%
  mutate(y = y/1e3)


head(ne_ts)
## # A tsibble: 6 x 3 [1Y]
## # Key:       state [1]
##    year state           y
##   <int> <chr>       <dbl>
## 1  1997 Connecticut  145.
## 2  1998 Connecticut  131.
## 3  1999 Connecticut  152.
## 4  2000 Connecticut  160.
## 5  2001 Connecticut  146.
## 6  2002 Connecticut  178.
autoplot(ne_ts, y) +
  labs(title = "Annual natural gas consumption by state",
       subtitle = "New England area",
       y = "Consumption in thousands")

Exercise 2.5

A.

Download tourism.xlsx from the book website and read it into R using readxl::read_excel().

tourism_data <- readxl::read_excel("tourism.xlsx")

head(tourism_data)
## # A tibble: 6 × 5
##   Quarter    Region   State           Purpose  Trips
##   <chr>      <chr>    <chr>           <chr>    <dbl>
## 1 1998-01-01 Adelaide South Australia Business  135.
## 2 1998-04-01 Adelaide South Australia Business  110.
## 3 1998-07-01 Adelaide South Australia Business  166.
## 4 1998-10-01 Adelaide South Australia Business  127.
## 5 1999-01-01 Adelaide South Australia Business  137.
## 6 1999-04-01 Adelaide South Australia Business  200.

B.

Create a tsibble which is identical to the tourism tsibble from the tsibble package.

First, display tourism as sample.

# Output tourism tsibble
head(tourism)
## # A tsibble: 6 x 5 [1Q]
## # Key:       Region, State, Purpose [1]
##   Quarter Region   State           Purpose  Trips
##     <qtr> <chr>    <chr>           <chr>    <dbl>
## 1 1998 Q1 Adelaide South Australia Business  135.
## 2 1998 Q2 Adelaide South Australia Business  110.
## 3 1998 Q3 Adelaide South Australia Business  166.
## 4 1998 Q4 Adelaide South Australia Business  127.
## 5 1999 Q1 Adelaide South Australia Business  137.
## 6 1999 Q2 Adelaide South Australia Business  200.

Initial thoughts, five columns are the same. Steps: 1: Convert column Quarter into the index. 2: Define Region, State and Purpose as keys.

# Convert tourism_data to tsibble
tourism_ts <- tourism_data %>%
  mutate(Quarter = yearquarter(Quarter)) %>%
  as_tsibble(index = Quarter, key = c(Region, State, Purpose))

head(tourism_ts)
## # A tsibble: 6 x 5 [1Q]
## # Key:       Region, State, Purpose [1]
##   Quarter Region   State           Purpose  Trips
##     <qtr> <chr>    <chr>           <chr>    <dbl>
## 1 1998 Q1 Adelaide South Australia Business  135.
## 2 1998 Q2 Adelaide South Australia Business  110.
## 3 1998 Q3 Adelaide South Australia Business  166.
## 4 1998 Q4 Adelaide South Australia Business  127.
## 5 1999 Q1 Adelaide South Australia Business  137.
## 6 1999 Q2 Adelaide South Australia Business  200.

C.

Find what combination of Region and Purpose had the maximum number of overnight trips on average.

tourism_by_reg_pur_ts <- tourism_data %>%
  group_by(Region, Purpose) %>%
  summarise(Trip_Avg = mean(Trips)) %>%
  filter(Trip_Avg == max(Trip_Avg)) %>%
  arrange(desc(Trip_Avg))

head(tourism_by_reg_pur_ts)
## # A tibble: 6 × 3
## # Groups:   Region [6]
##   Region          Purpose  Trip_Avg
##   <chr>           <chr>       <dbl>
## 1 Sydney          Visiting     747.
## 2 Melbourne       Visiting     619.
## 3 North Coast NSW Holiday      588.
## 4 Gold Coast      Holiday      528.
## 5 South Coast     Holiday      495.
## 6 Brisbane        Visiting     493.

Answer: Combination of region Sydney and purpose Visiting has the maximum number of overnight trips on average per quarter surveyed with a count of 747.

D.

Create a new tsibble which combines the Purposes and Regions, and just has total trips by State.

trips_by_state <- tourism_ts %>%
  group_by(State) %>%
  summarise(Trips = sum(Trips)) %>%
  mutate(Quarter = yearquarter(Quarter)) %>%
  as_tsibble(index = Quarter, key = State)

head(trips_by_state)
## # A tsibble: 6 x 3 [1Q]
## # Key:       State [1]
##   State Quarter Trips
##   <chr>   <qtr> <dbl>
## 1 ACT   1998 Q1  551.
## 2 ACT   1998 Q2  416.
## 3 ACT   1998 Q3  436.
## 4 ACT   1998 Q4  450.
## 5 ACT   1999 Q1  379.
## 6 ACT   1999 Q2  558.

Exercise 2.8

Monthly Australian retail data is provided in aus_retail. Select one of the time series as follows (but choose your own seed value):

#set.seed(12345678)
set.seed(8675309)

myseries <- aus_retail %>%
  filter(`Series ID` == sample(aus_retail$`Series ID`,1))

head(myseries)
## # A tsibble: 6 x 5 [1M]
## # Key:       State, Industry [1]
##   State      Industry               `Series ID`    Month Turnover
##   <chr>      <chr>                  <chr>          <mth>    <dbl>
## 1 Queensland Takeaway food services A3349718A   1982 Apr     26.7
## 2 Queensland Takeaway food services A3349718A   1982 May     27.3
## 3 Queensland Takeaway food services A3349718A   1982 Jun     26.2
## 4 Queensland Takeaway food services A3349718A   1982 Jul     25.2
## 5 Queensland Takeaway food services A3349718A   1982 Aug     25.6
## 6 Queensland Takeaway food services A3349718A   1982 Sep     26.7
dim(myseries)
## [1] 441   5

Explore your chosen retail time series using the following functions:

autoplot(), gg_season(), gg_subseries(), gg_lag(), ACF() %>% autoplot()

autoplot(myseries, Turnover) +
  labs(title = "Turnover in Queensland Takeaway food services",
       subtitle = "Series ID: A3349767W",
       y = "Turnover")

autoplot() indicates a increasing trend.

gg_season(myseries, Turnover) +
  labs(title = "Turnover in Queensland Takeaway food services",
       subtitle = "Series ID: A3349767W",
       y = "Turnover")

gg_season() indicates a seasonal pattern particularly in the more recent years.

gg_subseries(myseries, Turnover) +
  labs(title = "Turnover in Queensland Takeaway food services",
       subtitle = "Series ID: A3349767W",
       y = "Turnover")

gg_subseries() reiterates the finding from gg_season(). The blue average line represents the seasonal, yearly nature of the turnover.

gg_lag(myseries, geom ="point") +
  labs(x = "lag(Turnover, k)")

I’ll admit the gg_lag() is not telling me much besides a strongly positive relationship. My impression is that the graph is speaking more to the overall increase of turnover.

myseries %>% ACF(Turnover) %>% autoplot()

Can you spot any seasonality, cyclicity and trend? What do you learn about the series?

I would say an increasing trend is present in the data and the data does follow a seasonal pattern over the course of a given year. The ACF() graph almost captures the scallop graphical pattern as identified in the textbook, but not quite. Given the step-down pattern present after lag 12, 24, I do define this data as seasonal on a yearly basis. I would not say a cyclic nature exists for the data.

In wanting to say Yes to a cyclic nature, perhaps an argument could be made for a general 7-year stock-market cycle starting in 1998 through 2014, but I think that’s a stretch. I’m not an expert on the Australian economy, but if the Australian economy follows the US economy based on global impacts, then I could believe the dot com bust of late 90s and the 2008 subprime mortgage market crash are being reflected here. But I wouldn’t say that’s a cycle. I wouldn’t say this data captures a true stock-market cycle given the data provided.