Homework 1

Author

Naomi Buell

library(tidyverse)
library(janitor)
library(fpp3)
library(USgas)
library(readxl)
library(feasts)
library(ggplot2)
library(scales)

2.1

Explore the following four time series: Bricks from aus_production, Lynx from pelt, Close from gafa_stock, Demand from vic_elec.

aus_production |> select(Bricks) |> head()
# A tsibble: 6 x 2 [1Q]
  Bricks Quarter
   <dbl>   <qtr>
1    189 1956 Q1
2    204 1956 Q2
3    208 1956 Q3
4    197 1956 Q4
5    187 1957 Q1
6    214 1957 Q2
pelt |> select(Lynx) |> head()
# A tsibble: 6 x 2 [1Y]
   Lynx  Year
  <dbl> <dbl>
1 30090  1845
2 45150  1846
3 49150  1847
4 39520  1848
5 21230  1849
6  8420  1850
gafa_stock |> select(Close) |> head()
# A tsibble: 6 x 3 [!]
# Key:       Symbol [1]
  Close Date       Symbol
  <dbl> <date>     <chr> 
1  79.0 2014-01-02 AAPL  
2  77.3 2014-01-03 AAPL  
3  77.7 2014-01-06 AAPL  
4  77.1 2014-01-07 AAPL  
5  77.6 2014-01-08 AAPL  
6  76.6 2014-01-09 AAPL  
vic_elec |> select(Demand) |> head()
# A tsibble: 6 x 2 [30m] <Australia/Melbourne>
  Demand Time               
   <dbl> <dttm>             
1  4383. 2012-01-01 00:00:00
2  4263. 2012-01-01 00:30:00
3  4049. 2012-01-01 01:00:00
4  3878. 2012-01-01 01:30:00
5  4036. 2012-01-01 02:00:00
6  3866. 2012-01-01 02:30:00
  • Use ? (or help()) to find out about the data in each series.

    aus_production |> help()
    pelt |> help()
    gafa_stock |> help()
    vic_elec |> help()
  • What is the time interval of each series?
    aus_production are quarterly estimates. pelt is an annual tsibble. gafa_stock is on a daily interval. vic_elec is a half-hourly tsibble.

  • Use autoplot() to produce a time plot of each series.

    aus_production |> select(Bricks) |> autoplot()

    pelt |> select(Lynx) |> autoplot()

    gafa_stock |> select(Close) |> autoplot()

  • For the last plot, modify the axis labels and title.

    vic_elec |> 
      select(Demand) |> 
      autoplot() +
      labs(title = "Half-hourly Electricity Demand for Victoria, Australia",
           y = "Demand (MWh)", 
           x = "Year")

2.2

Use filter() to find what days corresponded to the peak closing price for each of the four stocks in gafa_stock.

for (symbol in unique(gafa_stock$Symbol)) {
  date <- gafa_stock |> filter(Symbol == symbol) |> 
    filter(Close == max(Close)) |> 
    pull(Date)
  
  print(paste(symbol,"peak closing price on", date))
}
[1] "AAPL peak closing price on 2018-10-03"
[1] "AMZN peak closing price on 2018-09-04"
[1] "FB peak closing price on 2018-07-25"
[1] "GOOG peak closing price on 2018-07-26"

2.3

Download the file tute1.csv from the book website, open it in Excel (or some other spreadsheet application), and review its contents. You should find four columns of information. Columns B through D each contain a quarterly series, labelled Sales, AdBudget and GDP. Sales contains the quarterly sales for a small company over the period 1981-2005. AdBudget is the advertising budget and GDP is the gross domestic product. All series have been adjusted for inflation.

  1. You can read the data into R with the following script:

    tute1 <- readr::read_csv("https://bit.ly/fpptute1") 
    Rows: 100 Columns: 4
    ── Column specification ────────────────────────────────────────────────────────
    Delimiter: ","
    dbl  (3): Sales, AdBudget, GDP
    date (1): Quarter
    
    ℹ Use `spec()` to retrieve the full column specification for this data.
    ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
    head(tute1)
    # A tibble: 6 × 4
      Quarter    Sales AdBudget   GDP
      <date>     <dbl>    <dbl> <dbl>
    1 1981-03-01 1020.     659.  252.
    2 1981-06-01  889.     589   291.
    3 1981-09-01  795      512.  291.
    4 1981-12-01 1004.     614.  292.
    5 1982-03-01 1058.     647.  279.
    6 1982-06-01  944.     602   254 
  2. Convert the data to time series

    mytimeseries <- tute1 |>
      mutate(Quarter = yearquarter(Quarter)) |>
      as_tsibble(index = Quarter)
  3. Construct time series plots of each of the three series

    mytimeseries |>   
      pivot_longer(-Quarter) |>   
      ggplot(aes(x = Quarter, y = value, colour = name)) +   
      geom_line() +   
      facet_grid(name ~ ., scales = "free_y")

  4. Check what happens when you don’t include facet_grid().

    mytimeseries |>   
      pivot_longer(-Quarter) |>   
      ggplot(aes(x = Quarter, y = value, colour = name)) +   
      geom_line()

    Without including facet_grid(), the lines are plotted on the same graph along the same vertical axis.

2.4

The USgas package contains data on the demand for natural gas in the US.

  1. Install the USgas package.

    # install.packages("USgas")

    Installed package. See USgas library loaded at top of file.

  2. Create a tsibble from us_total with year as the index and state as the key.

    us_total_tsibble <- us_total |> tsibble(key = state, index = year)
    us_total_tsibble |> head()
    # A tsibble: 6 x 3 [1Y]
    # Key:       state [1]
       year state        y
      <int> <chr>    <int>
    1  1997 Alabama 324158
    2  1998 Alabama 329134
    3  1999 Alabama 337270
    4  2000 Alabama 353614
    5  2001 Alabama 332693
    6  2002 Alabama 379343
  3. Plot the annual natural gas consumption by state for the New England area (comprising the states of Maine, Vermont, New Hampshire, Massachusetts, Connecticut and Rhode Island).

    ne_states <- c("Maine", "Vermont", "New Hampshire", "Massachusetts", "Connecticut", "Rhode Island")
    
    us_total_tsibble |> filter(state %in% ne_states) |> 
      rename(State = state) |> 
      autoplot() +
      labs(title = "New England Annual Total Natural Gas Consumption by State",
           y = "Natural gas consumption (million cubic feet)",
           x = "Year")

2.5

  1. Download tourism.xlsx from the book website and read it into R using readxl::read_excel().

    download.file("https://bit.ly/fpptourism", destfile = "tourism.xlsx", mode = "wb")
    tourism_data <- read_excel("tourism.xlsx")
    
    head(tourism_data)
    # A tibble: 6 × 5
      Quarter    Region   State           Purpose  Trips
      <chr>      <chr>    <chr>           <chr>    <dbl>
    1 1998-01-01 Adelaide South Australia Business  135.
    2 1998-04-01 Adelaide South Australia Business  110.
    3 1998-07-01 Adelaide South Australia Business  166.
    4 1998-10-01 Adelaide South Australia Business  127.
    5 1999-01-01 Adelaide South Australia Business  137.
    6 1999-04-01 Adelaide South Australia Business  200.
  2. Create a tsibble which is identical to the tourism tsibble from the tsibble package.

    tourism_copy <- tourism
    
    tourism_copy |> head()
    # A tsibble: 6 x 5 [1Q]
    # Key:       Region, State, Purpose [1]
      Quarter Region   State           Purpose  Trips
        <qtr> <chr>    <chr>           <chr>    <dbl>
    1 1998 Q1 Adelaide South Australia Business  135.
    2 1998 Q2 Adelaide South Australia Business  110.
    3 1998 Q3 Adelaide South Australia Business  166.
    4 1998 Q4 Adelaide South Australia Business  127.
    5 1999 Q1 Adelaide South Australia Business  137.
    6 1999 Q2 Adelaide South Australia Business  200.
  3. Find what combination of Region and Purpose had the maximum number of overnight trips on average.

    max_combo <- tourism_copy |> 
      as_tibble() |> 
      group_by(Region, Purpose) |> 
      summarise(avg_trips = mean(Trips, na.rm = T), .groups = "drop") |> 
      filter(avg_trips == max(avg_trips))
    
    print(paste(max_combo$Purpose, max_combo$Region, "had the maximum number of overnight trips, on average."))
    [1] "Visiting Sydney had the maximum number of overnight trips, on average."
  4. Create a new tsibble which combines the Purposes and Regions, and just has total trips by State.

    total_trips <- tourism_copy |> 
      group_by(State) |> 
      summarise(total_trips = sum(Trips, na.rm = T), .groups = "drop") 
    
    total_trips |> head()
    # A tsibble: 6 x 3 [1Q]
    # Key:       State [1]
      State Quarter total_trips
      <chr>   <qtr>       <dbl>
    1 ACT   1998 Q1        551.
    2 ACT   1998 Q2        416.
    3 ACT   1998 Q3        436.
    4 ACT   1998 Q4        450.
    5 ACT   1999 Q1        379.
    6 ACT   1999 Q2        558.

2.8

Use the following graphics functions: autoplot(), gg_season(), gg_subseries(), gg_lag(), ACF() and explore features from the following time series: “Total Private” Employed from us_employment, Bricks from aus_production, Hare from pelt, “H02” Cost from PBS, and Barrels from us_gasoline.

  • Can you spot any seasonality, cyclicity, and trend?
  • What do you learn about the series?
  • What can you say about the seasonal patterns?
  • Can you identify any unusual years?

Below, I retrieve and summarize the time series specified above.

total_private <- us_employment |>
  filter(Title == "Total Private") |> 
  select(Employed)

bricks <- aus_production |>
  select(Bricks)

hare <- pelt |>
  select(Hare)

h02 <- PBS |> 
  filter(ATC2 == "H02") |> 
  as_tibble() |> 
  select(-c(starts_with("ATC"), Scripts)) |> 
  as_tsibble(index = Month, key = c(Concession, Type))

barrels <- us_gasoline |>
  select(Barrels)

time_series <- list(total_private, bricks, hare, h02, barrels)

for (series in time_series) {
  series |> head() |> print()
}
# A tsibble: 6 x 2 [1M]
  Employed    Month
     <dbl>    <mth>
1    25338 1939 Jan
2    25447 1939 Feb
3    25833 1939 Mar
4    25801 1939 Apr
5    26113 1939 May
6    26485 1939 Jun
# A tsibble: 6 x 2 [1Q]
  Bricks Quarter
   <dbl>   <qtr>
1    189 1956 Q1
2    204 1956 Q2
3    208 1956 Q3
4    197 1956 Q4
5    187 1957 Q1
6    214 1957 Q2
# A tsibble: 6 x 2 [1Y]
   Hare  Year
  <dbl> <dbl>
1 19580  1845
2 19600  1846
3 19610  1847
4 11990  1848
5 28040  1849
6 58000  1850
# A tsibble: 6 x 4 [1M]
# Key:       Concession, Type [1]
     Month Concession   Type          Cost
     <mth> <chr>        <chr>        <dbl>
1 1991 Jul Concessional Co-payments 317384
2 1991 Aug Concessional Co-payments 269891
3 1991 Sep Concessional Co-payments 269703
4 1991 Oct Concessional Co-payments 280418
5 1991 Nov Concessional Co-payments 268070
6 1991 Dec Concessional Co-payments 277139
# A tsibble: 6 x 2 [1W]
  Barrels     Week
    <dbl>   <week>
1    6.62 1991 W06
2    6.43 1991 W07
3    6.58 1991 W08
4    7.22 1991 W09
5    6.88 1991 W10
6    6.95 1991 W11

US Employment

I use the graphics functions to explore “Total Private” Employed from us_employment.

# Autoplot
total_private |> autoplot() +
  theme_classic() +
  theme(axis.line = element_blank()) +
  labs(subtitle = "US employment historically trends upwards, with a few shocks.", x = "", y = "Monthly private employment") +
  scale_y_continuous(labels = comma)

# Seasonal plot
total_private |> gg_season() +
  theme_classic() +
  theme(axis.line = element_blank()) +
  labs(title = "Employment may rise in the first half of the year, then level off.", x = "", y = "Monthly private employment") + 
  scale_y_continuous(labels = comma)

# Lag plot
total_private |>
  gg_lag(geom = "point") +
  theme_classic() +
  theme(axis.line = element_blank(),
        axis.text.x = element_text(
          angle = 45,
          hjust = 1,
          size = 8
        )) +
  labs(title = "Seasonality undetectable from lags.",x = "lag(Monthly private employment, k)", y = "Monthly private employment") +
  scale_y_continuous(labels = comma) +
  scale_x_continuous(labels = comma)

# Subseries
total_private |> gg_subseries() +
  theme_classic() +
  theme(axis.line = element_blank(),
        axis.text.x = element_text(
          angle = 45,
          hjust = 1,
          size = 8
        )) +
  labs(title = "Monthly averages rise over the first half of the year, then level off.", x = "", y = "Private employment") +
  scale_y_continuous(labels = scales::comma) +
  scale_x_yearmonth(breaks = scales::pretty_breaks(n = 3),
                    labels = scales::label_date("%Y"))

# ACF plot
total_private |> ACF() |> autoplot() + 
  theme_classic() + 
  theme(axis.line = element_blank()) +
  labs(title = "ACF of US monthly private employment shows long-term upward trend.")

According to the time plot, there is a long-term, upward trend. The ACF also confirms the trend, since the autocorrelations for small lags tend to be large and positive and slowly decrease as the lags increase. On seasonality, the seasonal plot and the blue lines for means in the sub-series plot show employment generally rises in the first half of each year, and levels off starting in June. I do not detect any cyclicity.

From these plots, I can see that employment has risen over the past 80 years, with a few shocks, e.g., unusual years of data during the financial crisis of 2007-08. These dips in employment during the Great Recession can be seen in the time plot and in the sub-series plot, where the purple lines overlap with the lower levels of employment from the 90s (blue lines).

Bricks

# Autoplot
bricks |> autoplot() +
  theme_classic() +
  theme(axis.line = element_blank()) +
  labs(
    title = "Brick production trends upwards until the 80's, then drops off.",
    subtitle = "Cyclical booms and crashes in 1975, 1983, 1991, 1996, and 2001.",
    x = "",
    y = "Clay brick production (in millions of bricks)"
  ) +
  scale_x_yearquarter(breaks = seq(yearquarter("1950 Q1"), yearquarter("2010 Q1"), by = 20))

bricks |> mutate(year = year(Quarter)) |> filter(year > 1970) |> arrange(Bricks) |> pull(year) |>  head(10) |> unique() # ID crash years
[1] 1983 2001 1996 1997 1998 1975 2002 2005
# Seasonal plot
bricks |> gg_season() +
  theme_classic() +
  theme(axis.line = element_blank()) +
  labs(title = "Production is seasonal, starting low in Q1 and peaking in Q3 each year.", x = "", y = "Clay brick production (in millions of bricks)")

# Lag plot
bricks |>
  gg_lag(geom = "point") +
  theme_classic() +
  theme(axis.line = element_blank())+
  labs(title = "The relationship is strongly positive at lag 4, indicating strong seasonality.",
    x = "lag(Clay brick production (in millions of bricks), k)",
    y = "Clay brick production (in millions of bricks)")

# Subseries
bricks |> gg_subseries() +
  theme_classic() +
  theme(axis.line = element_blank())

  labs(title = "Bricks are produced seasonally, peaking in Q3.", x = "", y = "Production (millions of bricks)")
$x
[1] ""

$y
[1] "Production (millions of bricks)"

$title
[1] "Bricks are produced seasonally, peaking in Q3."

attr(,"class")
[1] "labels"
# ACF plot
bricks |> ACF() |> autoplot() + 
  theme_classic() + 
  theme(axis.line = element_blank()) +
  labs(title = "The scallopped shape of the ACF plot indicates data are both trended and seasonal.")

The quarterly production of bricks in Australia trends upwards from 1956 until its peak in the early 80’s, then declines, as shown in the time plot. Yes, there is seasonality, as indicated by the seasonal plot: production is low in Q1 and highest in Q3. The lag plot also supports this finding with a strong positive relationship at lag 4 (i.e., patterns tend to repeat on an annual basis). Additionally, the scalloped shape of the ACF plot supports that there is seasonality and trend in the data. From the time plot, it looks like there are some cyclical patterns where production gradually rises, then crashes steeply every 5-10 years (more frequent cycles in recent years). There were unusually steep drops in 1975, 1983, 1991, 1996, and 2001.

Hare

# Autoplot
hare |> autoplot() +
  theme_classic() +
  theme(axis.line = element_blank()) +
  labs(title = "Pelt trading records rise and fall about every 10 years.",
       x = "", 
       y = "Number of Canadian Lynx pelts traded") +
  scale_y_continuous(labels = comma) +
  scale_x_continuous(breaks = seq(min(hare$Year), max(hare$Year), by = 5))  

# Lag plot
hare |>
  gg_lag(geom = "point") +
  theme_classic() +
  theme(axis.line = element_blank(),
        axis.text.x = element_text(
          angle = 45,
          hjust = 1,
          size = 8
        )) +    
  labs(title = "Negative relationship at lag 5 indicates peaks and troughs are 5 years apart.", x = "lag(Number of Canadian Lynx pelts traded), k)", y = "Number of Canadian Lunx pelts traded") +
  scale_y_continuous(labels = comma) +
  scale_x_continuous(labels = comma)

# ACF plot
hare |> ACF() |> autoplot() + 
  theme_classic() + 
  theme(axis.line = element_blank()) +
  labs(title = "The wave pattern, with peaks and troughs 5 years apart, indicates data are seasonal.")

There doesn’t appear to be a long-term trend in the data according to the time plot, but you can see trading rise and fall sharply about every 10 years or so. The negative relationship in lag 5 of the lag plot and the wave pattern in the ACF plot (with mins and maxes every 5 years) suggests the 10-year seasonality of the data, where the peaks and troughs tend to be 5 years apart from each other. The decreasing magnitude indicates that the influence of past amounts of trade weakens over time.

I do not detect any unusual years.

H02

# Autoplot
h02 |>
  mutate(Cost = Cost/1000) |> 
  autoplot() +
  theme_classic() +
  theme(axis.line = element_blank()) +
  labs(title = "Australian Medicare prescription costs of corticosteroids for systemic use (H02).",
       x = "", 
       y = "Cost (thousand $AUD)")

# Seasonal plot
h02 |>
  mutate(Cost = Cost / 1000) |>
  gg_season() +
  theme_classic() +
  theme(axis.line = element_blank()) +
  labs(title = "Production is seasonal for all types except general co-payments.", 
       subtitle = "Safety net costs inverse seasonality of concessional co-paymnents.",
       x = "", 
       y = "Cost (thousand $AUD)")

# Lag plots
h02 |> 
  mutate(Cost = Cost / 1000) |> 
  group_by(Type, Concession) |>
  group_split() |>
  map(~ {
    # Create a lag plot for each group
    lag_plot <- .x |>
      gg_lag(geom = "point") +
      theme_classic() +
      theme(axis.line = element_blank()) +
      labs(
        title = paste(.x$Concession[1], .x$Type[1]),
        x = "lag(Cost (thousand $AUD), k)",
        y = "Cost (thousand $AUD)"
      )
  })
[[1]]


[[2]]


[[3]]


[[4]]

# Subseries
h02 |>
  mutate(Cost = Cost / 1000) |>
  gg_subseries() +
  theme_classic() +
  theme(
    axis.line = element_blank(),
    axis.text.x = element_text(
      angle = 45,
      hjust = 1,
      size = 7
    ),
    axis.text.y = element_text(size = 7)
  )  +
  labs(title = "Concessional co-payments peak in spring. Safety net costs peak in winter.", x = "", y = "Cost (thousand $AUD)") +
  scale_y_continuous(labels = scales::comma)

# ACF plot
h02 |> ACF() |> autoplot() + 
  theme_classic() + 
  theme(axis.line = element_blank()) +
  labs(subtitle = "Wave pattern with peaks and troughs 6 months apart indicates annual seasonal pattern.")

Based on the time plot, Australian Medicare prescription costs of corticosteriods for systemic use (anatomical therapeutic chemical index = H02) trends upwards for concessional scripts (which are given to pensioners, unemployed, dependents, and other card holders). These concessional scripts cost generally much more than the general scripts used by patients without a concession card. Safety net scripts’ costs have a steady strend.

All concessions and types of script costs appear to be seasonal except for general co-payments (green line in time plot does not have a seasonal pattern like the rest). The seasonality of the concessional co-payments appear to be the inverse of that for the safety net scripts. The seasonal plot more clearly shows how safety net costs are lowest in the spring, then start rising in the summer months, and how the inverse is true for concessional co-payments. The blue monthly averages in the sub-series plots show this same seasonality. The lag plots provide further evidence of this: there are patterns in the plots for all types and concessions except for general co-payments, which appear random. The wave pattern in the ACF plots (with mins and maxes every 6 months) suggests the annual seasonality of the data. The decreasing magnitude of the ACF plot for general co-payments indicates that the influence of past costs weaken over time.

I do not detect any unusual years.

Gasoline

# Autoplot
barrels |> autoplot() +
  theme_classic() +
  theme(axis.line = element_blank(),
        axis.text.x = element_text(
          angle = 45,
          hjust = 1,
          size = 8
        )) +
  labs(title = "US gas supplied trends upward, with seasonal rises and falls.", 
       x = "", 
       y = "Million barrels per day") +
  scale_x_yearmonth(breaks = seq(min(barrels$Week), max(barrels$Week), by = 52))  

# Seasonal plot
barrels |> gg_season() +
  theme_classic() +
  theme(axis.line = element_blank()) +
  labs(title = "Supply is seasonal, peaking in the summer months.", 
       x = "", 
       y = "Million barrels per day") 

# lag plot (by month)
barrels |> 
  index_by(YearMonth = yearmonth(Week)) |> 
  summarize(Barrels = sum(Barrels)) |> 
  gg_lag(geom = "point") +
  theme_classic() +
  theme(axis.line = element_blank(),
        axis.text.x = element_text(
          angle = 45,
          hjust = 1,
          size = 8
        )) +    
  labs(
       x = "lag(Million barrels per day), k)", 
       y = "Million barrels per day")

# Subseries
barrels |>
  index_by(YearMonth = yearmonth(Week)) |>
  summarize(Barrels = sum(Barrels)) |>
  gg_subseries() +
  theme_classic() +
  theme(axis.line = element_blank(),
        axis.text.x = element_text(
          angle = 45,
          hjust = 1,
          size = 8
        )) +
  labs(title = "Supply is seasonal, peaking in the summer months.", 
       subtitle = "February is an unusual month, with fewer fluctuations over the years.", 
       x = "",
       y= "Million barrels per day") +
  scale_y_continuous(labels = scales::comma) +
  scale_x_yearmonth(breaks = scales::pretty_breaks(n = 3),
                    labels = scales::label_date("%Y"))

# ACF plot
barrels |> ACF() |> autoplot() + 
  theme_classic() + 
  theme(axis.line = element_blank()) +
  labs(subtitle = "The downward slope of the ACF indicates data are trended, and slight scalloping indicates seasonality.")

Gasoline supply has trended upwards in the US since the 90’s. Data is seasonal, peaking in the summer months each year, as shown by the seasonal and sub-series plots. February is an unusual month, with fewer fluctuations in gas supply over the years. The ACF has a downward slope, indicating data are trended. You might detect a slight scallop pattern in the ACF, indicating seasonality.