Introduction

This assignment uses time series graphics from the fpp3 framework to explore key features such as trend, seasonality, cyclicity, and unusual observations. The goal is to understand the series in the real-world datasets from economics, health and energy.

library(fpp3)
library(tidyverse)
library(USgas)
library(readxl)

2.1

1. Bricks from aus_production

bricks <- aus_production %>%
  select(Quarter, Bricks) %>%
  drop_na()
#plot bricks
autoplot(bricks, Bricks) +
  labs(
    title = "Bricks Production in Australia",
    x = "Quarter",
    y = "Bricks",
  ) 

2. Lynx from Pelt

lynx <- pelt %>%
  select(Year, Lynx) %>%
  drop_na()
autoplot(lynx, Lynx) +
  labs(
    title = "Canadian Lynx Trappings",
    x = "Year",
    y = "Number of Lynx"
  )

3. Close from gafa_stock

goog_close <- gafa_stock %>%
  filter(Symbol == "GOOG") %>%
  select(Date, Close) %>%
  drop_na()
autoplot(goog_close, Close) +
  labs(
    title = "GOOG Stock Closing Price",
    x = "Date",
    y = "Close Price in USD"
  )

4. Demand from vic_elec

demand <- vic_elec %>%
  select(Time, Demand) %>%
  drop_na()
autoplot(demand, Demand) +
  labs(
    title = "Electricity Demand in Victoria",
    x = "Time",
    y = "Demand"
  )

## 2.2

unique(gafa_stock$Symbol)
## [1] "AAPL" "AMZN" "FB"   "GOOG"
# check the unique symbol

peak_days <- gafa_stock %>%
  group_by(Symbol) %>%
  filter(Close == max(Close, na.rm=TRUE)) %>%
  select(Symbol, Date, Close)

peak_days
## # A tsibble: 4 x 3 [!]
## # Key:       Symbol [4]
## # Groups:    Symbol [4]
##   Symbol Date       Close
##   <chr>  <date>     <dbl>
## 1 AAPL   2018-10-03  232.
## 2 AMZN   2018-09-04 2040.
## 3 FB     2018-07-25  218.
## 4 GOOG   2018-07-26 1268.

2.3

tute1 <- read.csv("https://raw.githubusercontent.com/vincent-usny/week2/refs/heads/main/tute1.csv")
view(tute1)
mytimeseries <- tute1 |>
  mutate(Quarter = yearquarter(Quarter)) |>
  as_tsibble(index = Quarter)
mytimeseries |>
  pivot_longer(-Quarter) |>
  ggplot(aes(x = Quarter, y = value, colour = name)) +
  geom_line() 

Without facet_grid(), all time series will be in one grid. Using facet_grid() is easier to identify each time series.

2.4

?us_total
## starting httpd help server ... done
head(us_total)
##   year   state      y
## 1 1997 Alabama 324158
## 2 1998 Alabama 329134
## 3 1999 Alabama 337270
## 4 2000 Alabama 353614
## 5 2001 Alabama 332693
## 6 2002 Alabama 379343
us_total_ts <- us_total %>%
  as_tsibble(
    index = year,
    key = state
  )
new_england <- c("Maine", "Vermont", "New Hampshire",
                 "Massachusetts", "Connecticut", "Rhode Island")

us_new_england <- us_total_ts %>%
  filter(state %in% new_england)
autoplot(us_new_england, y) +
  labs(
    title = "Annual Gas Consumption in New England",
    x = "Year",
    y = "Consumption"
  )

2.5

#a
download.file(
  url = "https://github.com/vincent-usny/week2/raw/main/tourism.xlsx",
  destfile = "tourism.xlsx",
  mode = "wb"
)

tourism <-read_excel("tourism.xlsx")
head(tourism)
## # A tibble: 6 × 5
##   Quarter    Region   State           Purpose  Trips
##   <chr>      <chr>    <chr>           <chr>    <dbl>
## 1 1998-01-01 Adelaide South Australia Business  135.
## 2 1998-04-01 Adelaide South Australia Business  110.
## 3 1998-07-01 Adelaide South Australia Business  166.
## 4 1998-10-01 Adelaide South Australia Business  127.
## 5 1999-01-01 Adelaide South Australia Business  137.
## 6 1999-04-01 Adelaide South Australia Business  200.
#b
tourism_ts <- tourism %>%
  mutate(Quarter = yearquarter(Quarter)) %>%
  as_tsibble(
    index = Quarter,
    key = c(State, Region, Purpose)
  )

tourism_ts
## # A tsibble: 24,320 x 5 [1Q]
## # Key:       State, Region, Purpose [304]
##    Quarter Region   State Purpose  Trips
##      <qtr> <chr>    <chr> <chr>    <dbl>
##  1 1998 Q1 Canberra ACT   Business 150. 
##  2 1998 Q2 Canberra ACT   Business  99.9
##  3 1998 Q3 Canberra ACT   Business 130. 
##  4 1998 Q4 Canberra ACT   Business 102. 
##  5 1999 Q1 Canberra ACT   Business  95.5
##  6 1999 Q2 Canberra ACT   Business 229. 
##  7 1999 Q3 Canberra ACT   Business 109. 
##  8 1999 Q4 Canberra ACT   Business 159. 
##  9 2000 Q1 Canberra ACT   Business 105. 
## 10 2000 Q2 Canberra ACT   Business 202. 
## # ℹ 24,310 more rows
#c
max_avg <- tourism_ts %>%
  group_by(Region, Purpose) %>%
  summarise(avg_trips = mean(Trips, na.rm=TRUE)) %>%
  arrange(desc(avg_trips)) %>%
  slice(1)
#d
tourism_state <- tourism_ts %>%
  index_by(Quarter) %>%
  group_by(State) %>%
  summarise(TotalTrips = sum(Trips, na.rm = TRUE)) %>%
  ungroup() %>%
  as_tsibble(index = Quarter, key = State)

tourism_state
## # A tsibble: 640 x 3 [1Q]
## # Key:       State [8]
##    State Quarter TotalTrips
##    <chr>   <qtr>      <dbl>
##  1 ACT   1998 Q1       551.
##  2 ACT   1998 Q2       416.
##  3 ACT   1998 Q3       436.
##  4 ACT   1998 Q4       450.
##  5 ACT   1999 Q1       379.
##  6 ACT   1999 Q2       558.
##  7 ACT   1999 Q3       449.
##  8 ACT   1999 Q4       595.
##  9 ACT   2000 Q1       600.
## 10 ACT   2000 Q2       557.
## # ℹ 630 more rows

2.8

#1. Total Private
us_private <- us_employment %>%
  filter(Title == "Total Private") %>%
  select(Month, Employed)

autoplot(us_private, Employed)

gg_season(us_private, Employed)
## Warning: `gg_season()` was deprecated in feasts 0.4.2.
## ℹ Please use `ggtime::gg_season()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

gg_subseries(us_private, Employed)
## Warning: `gg_subseries()` was deprecated in feasts 0.4.2.
## ℹ Please use `ggtime::gg_subseries()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

gg_lag(us_private, Employed, lag=12)
## Warning: `gg_lag()` was deprecated in feasts 0.4.2.
## ℹ Please use `ggtime::gg_lag()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

ACF(us_private, Employed)
## # A tsibble: 29 x 2 [1M]
##         lag   acf
##    <cf_lag> <dbl>
##  1       1M 0.997
##  2       2M 0.993
##  3       3M 0.990
##  4       4M 0.986
##  5       5M 0.983
##  6       6M 0.980
##  7       7M 0.977
##  8       8M 0.974
##  9       9M 0.971
## 10      10M 0.968
## # ℹ 19 more rows
#2. Bricks
bricks <- aus_production %>%
  select(Quarter, Bricks) %>%
  drop_na(Bricks)

autoplot(bricks, Bricks)

gg_season(bricks, Bricks)

gg_subseries(bricks, Bricks)

gg_lag(bricks, Bricks, lag = 4)

ACF(bricks, Bricks)
## # A tsibble: 22 x 2 [1Q]
##         lag   acf
##    <cf_lag> <dbl>
##  1       1Q 0.900
##  2       2Q 0.815
##  3       3Q 0.813
##  4       4Q 0.828
##  5       5Q 0.720
##  6       6Q 0.642
##  7       7Q 0.655
##  8       8Q 0.692
##  9       9Q 0.609
## 10      10Q 0.556
## # ℹ 12 more rows
#3. Hare
hare <- pelt %>%
  select(Year, Hare)

autoplot(hare, Hare)

gg_lag(hare, Hare, lag = 1)

ACF(hare, Hare)
## # A tsibble: 19 x 2 [1Y]
##         lag     acf
##    <cf_lag>   <dbl>
##  1       1Y  0.658 
##  2       2Y  0.214 
##  3       3Y -0.155 
##  4       4Y -0.401 
##  5       5Y -0.493 
##  6       6Y -0.401 
##  7       7Y -0.168 
##  8       8Y  0.113 
##  9       9Y  0.307 
## 10      10Y  0.340 
## 11      11Y  0.296 
## 12      12Y  0.206 
## 13      13Y  0.0372
## 14      14Y -0.153 
## 15      15Y -0.285 
## 16      16Y -0.295 
## 17      17Y -0.202 
## 18      18Y -0.0676
## 19      19Y  0.0956
#4. PBS HO2
pbs_h02 <- PBS %>%
  filter(
      ATC1 == "H",
      ATC2 == "H02", 
      Concession == "General",
      Type == "Co-payments") %>%
  select(Month, Cost)

autoplot(pbs_h02, Cost)

gg_season(pbs_h02, Cost)

gg_subseries(pbs_h02, Cost)

gg_lag(pbs_h02, Cost, lag = 12)

ACF(pbs_h02, Cost)
## # A tsibble: 23 x 2 [1M]
##         lag   acf
##    <cf_lag> <dbl>
##  1       1M 0.525
##  2       2M 0.484
##  3       3M 0.417
##  4       4M 0.386
##  5       5M 0.320
##  6       6M 0.357
##  7       7M 0.346
##  8       8M 0.239
##  9       9M 0.272
## 10      10M 0.197
## # ℹ 13 more rows
#5. Gasoline
gasoline <- us_gasoline %>%
  select(Week, Barrels)

autoplot(gasoline, Barrels)

gg_season(gasoline, Barrels)

gg_subseries(gasoline, Barrels)

gg_lag(gasoline, Barrels, lag = 52)

ACF(gasoline, Barrels)
## # A tsibble: 31 x 2 [1W]
##         lag   acf
##    <cf_lag> <dbl>
##  1       1W 0.893
##  2       2W 0.882
##  3       3W 0.873
##  4       4W 0.866
##  5       5W 0.847
##  6       6W 0.844
##  7       7W 0.832
##  8       8W 0.831
##  9       9W 0.822
## 10      10W 0.808
## # ℹ 21 more rows

Seasonality is strong is total private employment, PBS H02 cost and US gasoline, but none in Hare. Hare is strong in cyclicity. Employment and PBS costs show strong upward trend. Employment and gasoline peak in specific seasons each year. Unusual years vary in each session.

Summary

The analysis shows that many series have strong trends and seasonal patterns, especially employment, pharmaceutical costs, and gasoline consumption. rick production displays seasonal behavior with long-term decline, while hare pelts show clear multi-year cycles without seasonality.