library(fpp3)

2.1

Explore the following four time series: Bricks from aus_production, Lynx from pelt, Close from gafa_stock, Demand from vic_elec.

Use ? (or help()) to find out about the data in each series.
What is the time interval of each series?
Use autoplot() to produce a time plot of each series.
For the last plot, modify the axis labels and title.

These commands are commented out so they don’t pop up in your browser.

#?aus_production
#?pelt
#?gafa_stock
#?vic_elec
aus_production
## # A tsibble: 218 x 7 [1Q]
##    Quarter  Beer Tobacco Bricks Cement Electricity   Gas
##      <qtr> <dbl>   <dbl>  <dbl>  <dbl>       <dbl> <dbl>
##  1 1956 Q1   284    5225    189    465        3923     5
##  2 1956 Q2   213    5178    204    532        4436     6
##  3 1956 Q3   227    5297    208    561        4806     7
##  4 1956 Q4   308    5681    197    570        4418     6
##  5 1957 Q1   262    5577    187    529        4339     5
##  6 1957 Q2   228    5651    214    604        4811     7
##  7 1957 Q3   236    5317    227    603        5259     7
##  8 1957 Q4   320    6152    222    582        4735     6
##  9 1958 Q1   272    5758    199    554        4608     5
## 10 1958 Q2   233    5641    229    620        5196     7
## # i 208 more rows
aus_production %>% autoplot(Bricks) +
  labs(y = "Brick production (millions)", title = "Quarterly Brick Production")
## Warning: Removed 20 rows containing missing values (`geom_line()`).

pelt
## # A tsibble: 91 x 3 [1Y]
##     Year  Hare  Lynx
##    <dbl> <dbl> <dbl>
##  1  1845 19580 30090
##  2  1846 19600 45150
##  3  1847 19610 49150
##  4  1848 11990 39520
##  5  1849 28040 21230
##  6  1850 58000  8420
##  7  1851 74600  5560
##  8  1852 75090  5080
##  9  1853 88480 10170
## 10  1854 61280 19600
## # i 81 more rows
pelt %>% autoplot(Lynx) + 
  labs(y = "Number of pelts traded", title = "Annual Lynx Pelts Traded")

gafa_stock
## # A tsibble: 5,032 x 8 [!]
## # Key:       Symbol [4]
##    Symbol Date        Open  High   Low Close Adj_Close    Volume
##    <chr>  <date>     <dbl> <dbl> <dbl> <dbl>     <dbl>     <dbl>
##  1 AAPL   2014-01-02  79.4  79.6  78.9  79.0      67.0  58671200
##  2 AAPL   2014-01-03  79.0  79.1  77.2  77.3      65.5  98116900
##  3 AAPL   2014-01-06  76.8  78.1  76.2  77.7      65.9 103152700
##  4 AAPL   2014-01-07  77.8  78.0  76.8  77.1      65.4  79302300
##  5 AAPL   2014-01-08  77.0  77.9  77.0  77.6      65.8  64632400
##  6 AAPL   2014-01-09  78.1  78.1  76.5  76.6      65.0  69787200
##  7 AAPL   2014-01-10  77.1  77.3  75.9  76.1      64.5  76244000
##  8 AAPL   2014-01-13  75.7  77.5  75.7  76.5      64.9  94623200
##  9 AAPL   2014-01-14  76.9  78.1  76.8  78.1      66.1  83140400
## 10 AAPL   2014-01-15  79.1  80.0  78.8  79.6      67.5  97909700
## # i 5,022 more rows
gafa_stock %>% autoplot(Close) +
  labs(y = "Closing Price (USD)", title = "GAFA Stock Closing Prices")

vic_elec
## # A tsibble: 52,608 x 5 [30m] <Australia/Melbourne>
##    Time                Demand Temperature Date       Holiday
##    <dttm>               <dbl>       <dbl> <date>     <lgl>  
##  1 2012-01-01 00:00:00  4383.        21.4 2012-01-01 TRUE   
##  2 2012-01-01 00:30:00  4263.        21.0 2012-01-01 TRUE   
##  3 2012-01-01 01:00:00  4049.        20.7 2012-01-01 TRUE   
##  4 2012-01-01 01:30:00  3878.        20.6 2012-01-01 TRUE   
##  5 2012-01-01 02:00:00  4036.        20.4 2012-01-01 TRUE   
##  6 2012-01-01 02:30:00  3866.        20.2 2012-01-01 TRUE   
##  7 2012-01-01 03:00:00  3694.        20.1 2012-01-01 TRUE   
##  8 2012-01-01 03:30:00  3562.        19.6 2012-01-01 TRUE   
##  9 2012-01-01 04:00:00  3433.        19.1 2012-01-01 TRUE   
## 10 2012-01-01 04:30:00  3359.        19.0 2012-01-01 TRUE   
## # i 52,598 more rows
vic_elec %>% autoplot(Demand) +
  labs(y = "Demand (MWh)", title = "Electricity Demand for Victoria, Australia")


2.2

Use filter() to find what days corresponded to the peak closing price for each of the four stocks in gafa_stock.

gafa_stock %>% 
  group_by(Symbol) %>% 
  filter(Close == max(Close))
## # A tsibble: 4 x 8 [!]
## # Key:       Symbol [4]
## # Groups:    Symbol [4]
##   Symbol Date        Open  High   Low Close Adj_Close   Volume
##   <chr>  <date>     <dbl> <dbl> <dbl> <dbl>     <dbl>    <dbl>
## 1 AAPL   2018-10-03  230.  233.  230.  232.      230. 28654800
## 2 AMZN   2018-09-04 2026. 2050. 2013  2040.     2040.  5721100
## 3 FB     2018-07-25  216.  219.  214.  218.      218. 58954200
## 4 GOOG   2018-07-26 1251  1270. 1249. 1268.     1268.  2405600

2.3

You can read the data in to R with the following script.

tute1 <- readr::read_csv("tute1.csv")
## Rows: 100 Columns: 4
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## dbl  (3): Sales, AdBudget, GDP
## date (1): Quarter
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
#View(tute1)

Convert the data to time series.

mytimeseries <- tute1 %>% 
  mutate(Quarter = yearquarter(Quarter)) %>% 
  as_tibble(index = Quarter)

Construct time series plots of each of the three series.

mytimeseries %>% 
  pivot_longer(-Quarter) %>% 
  ggplot(aes(x = Quarter, y= value, color = name)) +
  geom_line() + 
  facet_grid(name ~ ., scales = "free_y")

Check what happens when you don’t include `facet_grid().

mytimeseries %>% 
  pivot_longer(-Quarter) %>% 
  ggplot(aes(x = Quarter, y= value, color = name)) +
  geom_line()

The lines are on one plot and share a y-axis compared to when facet_grid() was used.


2.4

library(USgas)
my_ts <- us_total %>% 
  as_tsibble(index = year, key = state)
ne_area <- c("Maine", "Vermont", "New Hampshire", "Massachusetts", "Connecticut", "Rhode Island")
my_ts %>% filter(state %in% ne_area) %>% 
  autoplot(y) +
  labs(y = "Gas Consumption (million cubic ft)", title = "Gas Consumption: New England")


2.5

Download tourism.xlsx from the book website.

my_tourism <- readxl::read_excel("tourism.xlsx")
my_tourism
## # A tibble: 24,320 x 5
##    Quarter    Region   State           Purpose  Trips
##    <chr>      <chr>    <chr>           <chr>    <dbl>
##  1 1998-01-01 Adelaide South Australia Business  135.
##  2 1998-04-01 Adelaide South Australia Business  110.
##  3 1998-07-01 Adelaide South Australia Business  166.
##  4 1998-10-01 Adelaide South Australia Business  127.
##  5 1999-01-01 Adelaide South Australia Business  137.
##  6 1999-04-01 Adelaide South Australia Business  200.
##  7 1999-07-01 Adelaide South Australia Business  169.
##  8 1999-10-01 Adelaide South Australia Business  134.
##  9 2000-01-01 Adelaide South Australia Business  154.
## 10 2000-04-01 Adelaide South Australia Business  169.
## # i 24,310 more rows

Create a tsibble which is identical to the tourism tsibble from the tsibble package.

tsibble::tourism
## # A tsibble: 24,320 x 5 [1Q]
## # Key:       Region, State, Purpose [304]
##    Quarter Region   State           Purpose  Trips
##      <qtr> <chr>    <chr>           <chr>    <dbl>
##  1 1998 Q1 Adelaide South Australia Business  135.
##  2 1998 Q2 Adelaide South Australia Business  110.
##  3 1998 Q3 Adelaide South Australia Business  166.
##  4 1998 Q4 Adelaide South Australia Business  127.
##  5 1999 Q1 Adelaide South Australia Business  137.
##  6 1999 Q2 Adelaide South Australia Business  200.
##  7 1999 Q3 Adelaide South Australia Business  169.
##  8 1999 Q4 Adelaide South Australia Business  134.
##  9 2000 Q1 Adelaide South Australia Business  154.
## 10 2000 Q2 Adelaide South Australia Business  169.
## # i 24,310 more rows
tourism_ts <- my_tourism %>% 
  mutate(Quarter = yearquarter(Quarter)) %>% 
  as_tsibble(index = Quarter, key = c(Region, State, Purpose))

tourism_ts
## # A tsibble: 24,320 x 5 [1Q]
## # Key:       Region, State, Purpose [304]
##    Quarter Region   State           Purpose  Trips
##      <qtr> <chr>    <chr>           <chr>    <dbl>
##  1 1998 Q1 Adelaide South Australia Business  135.
##  2 1998 Q2 Adelaide South Australia Business  110.
##  3 1998 Q3 Adelaide South Australia Business  166.
##  4 1998 Q4 Adelaide South Australia Business  127.
##  5 1999 Q1 Adelaide South Australia Business  137.
##  6 1999 Q2 Adelaide South Australia Business  200.
##  7 1999 Q3 Adelaide South Australia Business  169.
##  8 1999 Q4 Adelaide South Australia Business  134.
##  9 2000 Q1 Adelaide South Australia Business  154.
## 10 2000 Q2 Adelaide South Australia Business  169.
## # i 24,310 more rows

Find what combination of Region and Purpose had the maximum number of overnight trips on average.

tourism_ts %>% 
  group_by(Region, Purpose) %>% 
  filter(Trips == max(Trips)) %>% 
  arrange(desc(Trips))
## # A tsibble: 304 x 5 [1Q]
## # Key:       Region, State, Purpose [304]
## # Groups:    Region, Purpose [304]
##    Quarter Region          State           Purpose  Trips
##      <qtr> <chr>           <chr>           <chr>    <dbl>
##  1 2017 Q4 Melbourne       Victoria        Visiting  985.
##  2 2001 Q4 Sydney          New South Wales Business  948.
##  3 2016 Q4 Sydney          New South Wales Visiting  921.
##  4 1998 Q1 South Coast     New South Wales Holiday   915.
##  5 2016 Q1 North Coast NSW New South Wales Holiday   906.
##  6 1998 Q1 Sydney          New South Wales Holiday   828.
##  7 2017 Q4 Melbourne       Victoria        Holiday   806.
##  8 2016 Q4 Brisbane        Queensland      Visiting  796.
##  9 2002 Q1 Gold Coast      Queensland      Holiday   711.
## 10 2017 Q3 Melbourne       Victoria        Business  704.
## # i 294 more rows

The (Region, Purpose) combination (Melbourne, Visiting) had the maximum number of overnight trips on average.

Create a new tsibble which combines the Purposes and Regions, and just has total trips by State.

my_tourism %>% 
  mutate(Quarter = yearquarter(Quarter)) %>% 
  pivot_longer(c(Region, Purpose)) %>% 
  group_by(Quarter, State) %>% 
  summarize(Total_Trips = sum(Trips)) %>% 
  as_tsibble(index = Quarter, key=State)
## `summarise()` has grouped output by 'Quarter'. You can override using the
## `.groups` argument.
## # A tsibble: 640 x 3 [1Q]
## # Key:       State [8]
## # Groups:    @ Quarter [80]
##    Quarter State Total_Trips
##      <qtr> <chr>       <dbl>
##  1 1998 Q1 ACT         1102.
##  2 1998 Q2 ACT          832.
##  3 1998 Q3 ACT          872.
##  4 1998 Q4 ACT          900.
##  5 1999 Q1 ACT          757.
##  6 1999 Q2 ACT         1116.
##  7 1999 Q3 ACT          898.
##  8 1999 Q4 ACT         1190.
##  9 2000 Q1 ACT         1199.
## 10 2000 Q2 ACT         1114.
## # i 630 more rows

2.8

Use the following graphics functions: autoplot(), gg_season(), gg_subseries(), gg_lag(), ACF() and explore features from the following time series: “Total Private” Employed from us_employment, Bricks from aus_production, Hare from pelt, “H02” Cost from PBS, and Barrels from us_gasoline.

Can you spot any seasonality, cyclicity and trend?
What do you learn about the series?
What can you say about the seasonal patterns?
Can you identify any unusual years?

“Total Private” Employed

tp <- us_employment %>% 
  filter(Title == "Total Private") 
tp %>% 
  autoplot(Employed)

* There is a clear increasing trend.

tp %>% gg_season(Employed)

tp %>% gg_subseries(y=Employed)

* The gg_season plot confirms the increasing trend seen earlier. The more recent years are stacked on top of each other, indicating that Employment is increasing over time. * Note that the plot only shows one value of Series_ID. It would have been good to further facet the plot by Series_ID but gg_subseries does not appear to have such a function.

tp %>% filter(year(Month) < 1990 & year(Month) > 1980) %>% 
  gg_season(Employed, labels="right")

* I noticed something different about some years in the 80s, so I decided to filter for them. * Unlike the other years, the number Employed in 1981 and 1982 decreased towards the end of the year.


Brick Production

aus_production %>% 
  autoplot(Bricks) 
## Warning: Removed 20 rows containing missing values (`geom_line()`).

* There is not an overall clear monotonous increasing or decreasing trend in brick production. But if the plot is broken down, brick production appears to have increased monotonously until a sharp drop in the 80s. After that, it trend began to decrease. * There no clear cycle. While there may be distinctive troughs, they are not evenly spaced.

aus_production %>%
  gg_season(Bricks)
## Warning: Removed 20 rows containing missing values (`geom_line()`).

aus_production %>% 
  gg_subseries(Bricks)
## Warning: Removed 5 rows containing missing values (`geom_line()`).

* Seasonality: Q1 has the smallest brick production compared to Q2, Q3, and Q4.

aus_production %>% 
  gg_lag(Bricks, geom="point")
## Warning: Removed 20 rows containing missing values (gg_lag).

aus_production %>%
  ACF(Bricks, lag_max=9)
## # A tsibble: 9 x 2 [1Q]
##        lag   acf
##   <cf_lag> <dbl>
## 1       1Q 0.900
## 2       2Q 0.815
## 3       3Q 0.813
## 4       4Q 0.828
## 5       5Q 0.720
## 6       6Q 0.642
## 7       7Q 0.655
## 8       8Q 0.692
## 9       9Q 0.609
aus_production %>% 
  ACF(Bricks) %>% 
  autoplot()