# libraries
library(ggplot2)
library(fpp3)
library(dplyr)
library(tidyverse)

Exercise 2.1

checking the help for each:

help(aus_production)
## starting httpd help server ... done
help(pelt) 
help(gafa_stock) 
help(vic_elec) 

Time interval of each series:

Bricks

aus_production
## # A tsibble: 218 x 7 [1Q]
##    Quarter  Beer Tobacco Bricks Cement Electricity   Gas
##      <qtr> <dbl>   <dbl>  <dbl>  <dbl>       <dbl> <dbl>
##  1 1956 Q1   284    5225    189    465        3923     5
##  2 1956 Q2   213    5178    204    532        4436     6
##  3 1956 Q3   227    5297    208    561        4806     7
##  4 1956 Q4   308    5681    197    570        4418     6
##  5 1957 Q1   262    5577    187    529        4339     5
##  6 1957 Q2   228    5651    214    604        4811     7
##  7 1957 Q3   236    5317    227    603        5259     7
##  8 1957 Q4   320    6152    222    582        4735     6
##  9 1958 Q1   272    5758    199    554        4608     5
## 10 1958 Q2   233    5641    229    620        5196     7
## # ℹ 208 more rows
interval(aus_production)
## <interval[1]>
## [1] 1Q

Looking at the entries/rows, I see that there are 4 quarters for each year, and one row per quarter. So the interval for Bricks in aus_production is quarterly.

Lynx

pelt
## # A tsibble: 91 x 3 [1Y]
##     Year  Hare  Lynx
##    <dbl> <dbl> <dbl>
##  1  1845 19580 30090
##  2  1846 19600 45150
##  3  1847 19610 49150
##  4  1848 11990 39520
##  5  1849 28040 21230
##  6  1850 58000  8420
##  7  1851 74600  5560
##  8  1852 75090  5080
##  9  1853 88480 10170
## 10  1854 61280 19600
## # ℹ 81 more rows
interval(pelt)
## <interval[1]>
## [1] 1Y

I can see from pelt that Lynx has yearly/annual intervals.

Close

gafa_stock
## # A tsibble: 5,032 x 8 [!]
## # Key:       Symbol [4]
##    Symbol Date        Open  High   Low Close Adj_Close    Volume
##    <chr>  <date>     <dbl> <dbl> <dbl> <dbl>     <dbl>     <dbl>
##  1 AAPL   2014-01-02  79.4  79.6  78.9  79.0      67.0  58671200
##  2 AAPL   2014-01-03  79.0  79.1  77.2  77.3      65.5  98116900
##  3 AAPL   2014-01-06  76.8  78.1  76.2  77.7      65.9 103152700
##  4 AAPL   2014-01-07  77.8  78.0  76.8  77.1      65.4  79302300
##  5 AAPL   2014-01-08  77.0  77.9  77.0  77.6      65.8  64632400
##  6 AAPL   2014-01-09  78.1  78.1  76.5  76.6      65.0  69787200
##  7 AAPL   2014-01-10  77.1  77.3  75.9  76.1      64.5  76244000
##  8 AAPL   2014-01-13  75.7  77.5  75.7  76.5      64.9  94623200
##  9 AAPL   2014-01-14  76.9  78.1  76.8  78.1      66.1  83140400
## 10 AAPL   2014-01-15  79.1  80.0  78.8  79.6      67.5  97909700
## # ℹ 5,022 more rows
interval(gafa_stock)
## <interval[1]>
## [1] !

Close from gafa_stock seems to have daily intervals.

Demand

vic_elec
## # A tsibble: 52,608 x 5 [30m] <Australia/Melbourne>
##    Time                Demand Temperature Date       Holiday
##    <dttm>               <dbl>       <dbl> <date>     <lgl>  
##  1 2012-01-01 00:00:00  4383.        21.4 2012-01-01 TRUE   
##  2 2012-01-01 00:30:00  4263.        21.0 2012-01-01 TRUE   
##  3 2012-01-01 01:00:00  4049.        20.7 2012-01-01 TRUE   
##  4 2012-01-01 01:30:00  3878.        20.6 2012-01-01 TRUE   
##  5 2012-01-01 02:00:00  4036.        20.4 2012-01-01 TRUE   
##  6 2012-01-01 02:30:00  3866.        20.2 2012-01-01 TRUE   
##  7 2012-01-01 03:00:00  3694.        20.1 2012-01-01 TRUE   
##  8 2012-01-01 03:30:00  3562.        19.6 2012-01-01 TRUE   
##  9 2012-01-01 04:00:00  3433.        19.1 2012-01-01 TRUE   
## 10 2012-01-01 04:30:00  3359.        19.0 2012-01-01 TRUE   
## # ℹ 52,598 more rows
interval(vic_elec)
## <interval[1]>
## [1] 30m

Finally, demand from vic_elec seems to have 30-min intervals!

Using autopilot() for each:

Bricks

aus_production |> autoplot(Bricks)

Lynx

pelt |> autoplot(Lynx)

I can see from pelt that Lynx has yearly/annual intervals.

Close

gafa_stock |> autoplot(Close)

Close from gafa_stock seems to have daily intervals.

Demand

vic_elec |> autoplot(Demand)

modifying the axis labels and title:

vic_elec |> 
  autoplot(Demand) +
  labs(
    title = "Electricity demand in Victoria",
    x = "time",
    y = "demand (megawatts)")

Exercise 2.2

gafa_stock %>% group_by(Symbol) %>%
  filter(Close == max(Close)) %>%
  ungroup() %>% select(Symbol, Date, Close)
## # A tsibble: 4 x 3 [!]
## # Key:       Symbol [4]
##   Symbol Date       Close
##   <chr>  <date>     <dbl>
## 1 AAPL   2018-10-03  232.
## 2 AMZN   2018-09-04 2040.
## 3 FB     2018-07-25  218.
## 4 GOOG   2018-07-26 1268.

Exercise 2.3

# after using a website service that checks the destination of bit.ly url's, I found that the original short url https://bit.ly/fpptute1 led to the full url https://otexts.com/fpp3/extrafiles/tute1.csv, which makes it easier to load the data directly
tute1 <- read.csv("https://otexts.com/fpp3/extrafiles/tute1.csv")

Converting the data to time series

mytimeseries <- tute1 %>%
  mutate(Quarter = yearquarter(Quarter)) %>%
  as_tsibble(index = Quarter)

Constructing time series plots of each of the three series

mytimeseries %>%
  pivot_longer(-Quarter) %>%
  ggplot(aes(x = Quarter, y = value, colour = name)) +
  geom_line() +
  facet_grid(name ~ ., scales = "free_y")

mytimeseries %>%
  pivot_longer(-Quarter) %>%
  ggplot(aes(x = Quarter, y = value, colour = name)) +
  geom_line()

Without using facet_grid(), the plot shows the three time series overlaid on a single plot with a single y-axis shared by all variables. This seems to lead to some variables (GDP) being overshadowed by the scale of the others to an extent.

Exercise 2.4

library(USgas)

tsibble <- us_total %>% as_tsibble(index=year, key=state)

tsibble %>%
  filter(state %in% c("Maine", "Vermont", "New Hampshire", "Massachusetts",
                      "Connecticut", "Rhode Island")) %>%
  autoplot(y/1e3) + labs(y = "billion cubic feet")

tsibble %>%
  filter(state %in% c("Maine", "Vermont", "New Hampshire", "Massachusetts",
                      "Connecticut", "Rhode Island")) %>%
  ggplot(aes(x = year, y = y / 1e3, color = state)) +
  geom_line() +
  facet_wrap(~ state, scales = "free_y") +  # Adds a separate panel for each state
  labs(
    y = "Billion Cubic Feet",
    x = "Year",
    title = "Natural Gas Consumption in Selected States"
  ) +
  theme_minimal()

Exercise 2.5:

# after using a website service that checks the destination of bit.ly url's, I found that the original short url https://bit.ly/fpptourism led to the full url http://OTexts.com/fpp3/extrafiles/tourism.xlsx, which makes it easier to load the data directly
library(readxl)
library(httr)
url <- "http://OTexts.com/fpp3/extrafiles/tourism.xlsx"
temp_file <- tempfile(fileext = ".xlsx")
response <- GET(url, write_disk(temp_file, overwrite = TRUE))
tourism <- read_excel(temp_file)

a tsibble which is identical to the tourism tsibble from the tsibble package

library(tsibble)
library(lubridate)
tourism <- tourism %>%
  mutate(Quarter = yearquarter(Quarter))

tourism_tsibble <- tourism %>%
  as_tsibble(
    index = Quarter,               
    key = c(Region, State, Purpose))

tourism_tsibble
## # A tsibble: 24,320 x 5 [1Q]
## # Key:       Region, State, Purpose [304]
##    Quarter Region   State           Purpose  Trips
##      <qtr> <chr>    <chr>           <chr>    <dbl>
##  1 1998 Q1 Adelaide South Australia Business  135.
##  2 1998 Q2 Adelaide South Australia Business  110.
##  3 1998 Q3 Adelaide South Australia Business  166.
##  4 1998 Q4 Adelaide South Australia Business  127.
##  5 1999 Q1 Adelaide South Australia Business  137.
##  6 1999 Q2 Adelaide South Australia Business  200.
##  7 1999 Q3 Adelaide South Australia Business  169.
##  8 1999 Q4 Adelaide South Australia Business  134.
##  9 2000 Q1 Adelaide South Australia Business  154.
## 10 2000 Q2 Adelaide South Australia Business  169.
## # ℹ 24,310 more rows
tourism
## # A tibble: 24,320 × 5
##    Quarter Region   State           Purpose  Trips
##      <qtr> <chr>    <chr>           <chr>    <dbl>
##  1 1998 Q1 Adelaide South Australia Business  135.
##  2 1998 Q2 Adelaide South Australia Business  110.
##  3 1998 Q3 Adelaide South Australia Business  166.
##  4 1998 Q4 Adelaide South Australia Business  127.
##  5 1999 Q1 Adelaide South Australia Business  137.
##  6 1999 Q2 Adelaide South Australia Business  200.
##  7 1999 Q3 Adelaide South Australia Business  169.
##  8 1999 Q4 Adelaide South Australia Business  134.
##  9 2000 Q1 Adelaide South Australia Business  154.
## 10 2000 Q2 Adelaide South Australia Business  169.
## # ℹ 24,310 more rows

maximum number of overnight trips on average.

tourism %>%
  as_tibble() %>%
  summarise(Trips = mean(Trips), .by=c(Region, Purpose)) %>%
  filter(Trips == max(Trips))
## # A tibble: 1 × 3
##   Region Purpose  Trips
##   <chr>  <chr>    <dbl>
## 1 Sydney Visiting  747.

a new tsibble which combines the Purposes and Regions, and just has total trips by State.

# Aggregate the data: combine Purposes and Regions, and calculate total Trips by State
state_tsibble <- tourism %>%
  group_by(State, Quarter) %>%
  summarise(TotalTrips = sum(Trips, na.rm = TRUE), .groups = "drop") %>%
  as_tsibble(index = Quarter, key = State)

state_tsibble
## # A tsibble: 640 x 3 [1Q]
## # Key:       State [8]
##    State Quarter TotalTrips
##    <chr>   <qtr>      <dbl>
##  1 ACT   1998 Q1       551.
##  2 ACT   1998 Q2       416.
##  3 ACT   1998 Q3       436.
##  4 ACT   1998 Q4       450.
##  5 ACT   1999 Q1       379.
##  6 ACT   1999 Q2       558.
##  7 ACT   1999 Q3       449.
##  8 ACT   1999 Q4       595.
##  9 ACT   2000 Q1       600.
## 10 ACT   2000 Q2       557.
## # ℹ 630 more rows

Exercise 2.8:

# loading the data and converting to tsibble

us_emp_ts <- us_employment %>% filter(Title == "Total Private") %>% as_tsibble(index = Month)
bricks_ts <- aus_production %>% select(Bricks) %>% as_tsibble(index = Quarter)
hare_ts <- pelt %>% select(Hare) %>% as_tsibble(index = Year)
h02_ts <- PBS %>% filter(ATC2 == "H02") %>% summarise(TotalCost = sum(Cost)) %>% as_tsibble(index = Month)
barrels_ts <- us_gasoline %>% as_tsibble(index = Week)
us_emp_ts <- us_employment %>%
  filter(Title == "Total Private") %>%
  as_tsibble(index = Month)

Employment in the US

autoplot(us_emp_ts) + ggtitle("Total Private Employment (US)")

gg_season(us_emp_ts) + ggtitle("Seasonal Plot: US Private Employment")

gg_subseries(us_emp_ts) + ggtitle("Subseries Plot: US Private Employment")

gg_lag(us_emp_ts) + ggtitle("Lag Plot: US Private Employment")

us_emp_ts <- us_employment %>%
  filter(Title == "Total Private") %>%
  as_tsibble(index = Month)
us_emp_ts %>%
  ACF(Employed) %>%
  autoplot() +
  ggtitle("ACF: US Private Employment") +
  labs(y = "Autocorrelation")

US employment in these plots shows seasonality and a clear trend over time. I can also see smaller cyclic trend that repeats again and again over time. Some of these trends are clearer when looking at the monthly window with gg_subseries(). There are some unusual years where I think financial crises took place, such as in 2008 (very clear), and earlier in 1980s and 1940s.

Brick production

autoplot(bricks_ts) + ggtitle("Bricks Production (Australia)")

gg_season(bricks_ts) + ggtitle("Seasonal Plot: Bricks Production")

gg_subseries(bricks_ts) + ggtitle("Subseries Plot: Bricks Production")

gg_lag(bricks_ts) + ggtitle("Lag Plot: Bricks Production")

ACF(bricks_ts) %>% autoplot() + ggtitle("ACF: Bricks Production")

There is an upward trend from the start of the data until about half way (around 1980), then the trend dips abruptly with a rebound, but then the trend is downward for the rest of the data series. There also seems to be cyclic tendency that also repeats over time. Looking at the gg_subseries() plot, it seems Q1 has consistently been lower.

Hare from pelt

autoplot(hare_ts) + ggtitle("Hare Pelts (Pelt Dataset)")

gg_subseries(hare_ts) + ggtitle("SHare Pelts")

gg_lag(hare_ts) + ggtitle("SHare Pelts")

ACF(hare_ts) %>% autoplot() + ggtitle("ACF: SHare Pelts")

There are huge ups and downs over time, resembling cyclic patterns but not equal in magnitude. I can’t quite tell much beyond that, other than in certain years, certain peaks appear, such as in early 1860s and mid 1880s.

H02

autoplot(h02_ts) + ggtitle("H02 Cost (PBS Dataset)")

gg_season(h02_ts) + ggtitle("Seasonal Plot: H02 Cost")

gg_subseries(h02_ts) + ggtitle("Subseries Plot: H02 Cost")

gg_lag(h02_ts) + ggtitle("Lag Plot: H02 Cost")

ACF(h02_ts) %>% autoplot() + ggtitle("ACF: H02 Cost")

A cyclic pattern appears here too, combined with a slight upward trend that is clearer earlier than later in time. Looking at the seasonality, the data appears very seasonal, with a consistent drip in February, followed by a slow increase through out the year. Looking at the monthly window via gg_subseries(), it shows a larger magnitude of change in the later months of the year than the earlier ones (especially Dec, less so as we go back until we get to Feb). The seasonality appears again using ACF.

US gasoline

autoplot(barrels_ts) + ggtitle("US Gasoline Barrels")

gg_season(barrels_ts) + ggtitle("Seasonal Plot: US Gasoline Barrels")

gg_subseries(barrels_ts) + ggtitle("Subseries Plot: US Gasoline Barrels")

gg_lag(barrels_ts) + ggtitle("Lag Plot: US Gasoline Barrels")

ACF(barrels_ts) %>% autoplot() + ggtitle("ACF: US Gasoline Barrels")

There is also a slow upward trend until about mod 2000s, followed by a downtrend (around 2008, so maybe related to the financial crisis then) and then a short upward trend again. it is difficult to read the plot from gg_season() meaningfully. The same for the plot from gg_subseries, except for the average shown to be going up and down. There is some seasonality seen on the ACF plot and the gg_lag plot.