library(fpp3)
## Warning: package 'fpp3' was built under R version 4.5.2
## Registered S3 method overwritten by 'tsibble':
##   method               from 
##   as_tibble.grouped_df dplyr
## ── Attaching packages ──────────────────────────────────────────── fpp3 1.0.2 ──
## ✔ tibble      3.3.0     ✔ tsibble     1.1.6
## ✔ dplyr       1.1.4     ✔ tsibbledata 0.4.1
## ✔ tidyr       1.3.1     ✔ feasts      0.4.2
## ✔ lubridate   1.9.4     ✔ fable       0.5.0
## ✔ ggplot2     4.0.0
## Warning: package 'tsibble' was built under R version 4.5.2
## Warning: package 'tsibbledata' was built under R version 4.5.2
## Warning: package 'feasts' was built under R version 4.5.2
## Warning: package 'fabletools' was built under R version 4.5.2
## Warning: package 'fable' was built under R version 4.5.2
## ── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ──
## ✖ lubridate::date()    masks base::date()
## ✖ dplyr::filter()      masks stats::filter()
## ✖ tsibble::intersect() masks base::intersect()
## ✖ tsibble::interval()  masks lubridate::interval()
## ✖ dplyr::lag()         masks stats::lag()
## ✖ tsibble::setdiff()   masks base::setdiff()
## ✖ tsibble::union()     masks base::union()
library(dplyr)
library(USgas)
## Warning: package 'USgas' was built under R version 4.5.2
library(readxl)
## Warning: package 'readxl' was built under R version 4.5.2

2.1

Expplore the following four times series: Bricks from aus_productrion, Lynx from pelt, Close from gafa_stock, Demand from vic_elec.

?aus_production
## starting httpd help server ... done
?pelt
?gafa_stock
?vic_elec
glimpse(aus_production)
## Rows: 218
## Columns: 7
## $ Quarter     <qtr> 1956 Q1, 1956 Q2, 1956 Q3, 1956 Q4, 1957 Q1, 1957 Q2, 1957…
## $ Beer        <dbl> 284, 213, 227, 308, 262, 228, 236, 320, 272, 233, 237, 313…
## $ Tobacco     <dbl> 5225, 5178, 5297, 5681, 5577, 5651, 5317, 6152, 5758, 5641…
## $ Bricks      <dbl> 189, 204, 208, 197, 187, 214, 227, 222, 199, 229, 249, 234…
## $ Cement      <dbl> 465, 532, 561, 570, 529, 604, 603, 582, 554, 620, 646, 637…
## $ Electricity <dbl> 3923, 4436, 4806, 4418, 4339, 4811, 5259, 4735, 4608, 5196…
## $ Gas         <dbl> 5, 6, 7, 6, 5, 7, 7, 6, 5, 7, 8, 6, 5, 7, 8, 6, 6, 8, 8, 7…
glimpse(pelt)
## Rows: 91
## Columns: 3
## $ Year <dbl> 1845, 1846, 1847, 1848, 1849, 1850, 1851, 1852, 1853, 1854, 1855,…
## $ Hare <dbl> 19580, 19600, 19610, 11990, 28040, 58000, 74600, 75090, 88480, 61…
## $ Lynx <dbl> 30090, 45150, 49150, 39520, 21230, 8420, 5560, 5080, 10170, 19600…
glimpse(gafa_stock)
## Rows: 5,032
## Columns: 8
## Key: Symbol [4]
## $ Symbol    <chr> "AAPL", "AAPL", "AAPL", "AAPL", "AAPL", "AAPL", "AAPL", "AAP…
## $ Date      <date> 2014-01-02, 2014-01-03, 2014-01-06, 2014-01-07, 2014-01-08,…
## $ Open      <dbl> 79.38286, 78.98000, 76.77857, 77.76000, 76.97285, 78.11429, …
## $ High      <dbl> 79.57571, 79.10000, 78.11429, 77.99429, 77.93714, 78.12286, …
## $ Low       <dbl> 78.86000, 77.20428, 76.22857, 76.84571, 76.95571, 76.47857, …
## $ Close     <dbl> 79.01857, 77.28286, 77.70428, 77.14857, 77.63715, 76.64571, …
## $ Adj_Close <dbl> 66.96433, 65.49342, 65.85053, 65.37959, 65.79363, 64.95345, …
## $ Volume    <dbl> 58671200, 98116900, 103152700, 79302300, 64632400, 69787200,…
glimpse(vic_elec)
## Rows: 52,608
## Columns: 5
## $ Time        <dttm> 2012-01-01 00:00:00, 2012-01-01 00:30:00, 2012-01-01 01:0…
## $ Demand      <dbl> 4382.825, 4263.366, 4048.966, 3877.563, 4036.230, 3865.597…
## $ Temperature <dbl> 21.40, 21.05, 20.70, 20.55, 20.40, 20.25, 20.10, 19.60, 19…
## $ Date        <date> 2012-01-01, 2012-01-01, 2012-01-01, 2012-01-01, 2012-01-0…
## $ Holiday     <lgl> TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE…
tsibble::interval(aus_production)
## <interval[1]>
## [1] 1Q
tsibble::interval(pelt)
## <interval[1]>
## [1] 1Y
tsibble::interval(gafa_stock)
## <interval[1]>
## [1] !
tsibble::interval(vic_elec)
## <interval[1]>
## [1] 30m
aus_production %>%
  autoplot(Bricks) +
  labs(
    title = "Australian Clay Brick Production",
    x = "Quarter",
    y = "Bricks (millions)"
  )
## Warning: Removed 20 rows containing missing values or values outside the scale range
## (`geom_line()`).

pelt %>%
  autoplot(Lynx) +
  labs(
    title = "Canadian Lynx Pelts Traded",
    x = "Year",
    y = "Pelts (thousands)"
  )

gafa_stock %>%
  autoplot(Close) +
  labs(
    title = "Daily Closing Prices - GAFA Stocks (2014)",
    x = "Date",
    y = "Closing Price (USD)"
  )

vic_elec %>%
  autoplot(Demand) +
  labs(
    title = "Half Hourly Electricity Demand in Victoria",
    x = "Time",
    y = "Demand (MW)"
  )

2.2

Use filter() to find what days corresponded to the peak closing price for each of the four stocks in gafa_stock.

peak_close_days <- gafa_stock %>%
  group_by(Symbol) %>%
  filter(Close == max(Close)) %>%
  arrange(Symbol, Date)

peak_close_days

2.3

Download the file tute1.csv from the book website, open it in Excel (or some other spreadsheet application), and review its contents. You should find four columns of information. Column B throught coloumn D each contain a quarterly series, labelled Sales, AdBudget and GDP. Sales contains the quarterly sales for a small company over the period 1981-2005. AdBudget is the advertising budget and GDP is the gross domestic product. All series have been adjusted for inflation.

tute1 <-  read.csv("https://raw.githubusercontent.com/JaydeeJan/Data-624-hw1/refs/heads/main/tute1.csv")

glimpse(tute1)
## Rows: 100
## Columns: 4
## $ Quarter  <chr> "1981-03-01", "1981-06-01", "1981-09-01", "1981-12-01", "1982…
## $ Sales    <dbl> 1020.2, 889.2, 795.0, 1003.9, 1057.7, 944.4, 778.5, 932.5, 99…
## $ AdBudget <dbl> 659.2, 589.0, 512.5, 614.1, 647.2, 602.0, 530.7, 608.4, 637.9…
## $ GDP      <dbl> 251.8, 290.9, 290.8, 292.4, 279.1, 254.0, 295.6, 271.7, 259.6…
mytimeseries <- tute1 %>%
  mutate(Quarter = yearquarter(Quarter)) %>%
  as_tsibble(index = Quarter)

mytimeseries %>%
  pivot_longer(-Quarter) %>%
  ggplot(aes(x = Quarter, y = value, colour = name)) +
  geom_line() +
  facet_grid(name ~ .,scales = "free_y") +
  labs(
    title = "Quarterly Sales, and GDP, Advertising Budget",
    x = "Quarter",
    y = "Value (inflation-adjusted)"
  )

mytimeseries %>%
  pivot_longer(-Quarter) %>%
  ggplot(aes(x = Quarter, y = value, colour = name)) +
  geom_line() +
  labs(
    title = "All series plotted on the same axis (no faceting)",
    x = "Quarter",
    y = "Value (inflation-adjusted)"
  )

without facet_grid() all three series are drawn in a single panel with one common y-axis.Since GDP is on a much larger scale, so the Sales and AdBudget lines will become flatend and hard to read and understand.

2.4

The USgas package contains data on the demand for natural gas in the US. a. Install the USgas package. b. Create a tsibble from us_total with year as the index and state as the key. c. Plot the annual natural gas consumption by state for the New England area (comprising the states of Main, Vermont, New Hampshire, Massachusetts, Connecticut and Rhode Island).

# load dataset from USgas
data("us_total") 

# convert to tsibble: index = year, key = state
us_total_ts <- us_total %>%
  as_tsibble(index = year, key = state)

us_total_ts
ne_states <- c(
  "Maine",
  "Vermont",
  "New Hampshire",
  "Massachusetts",
  "Connecticut",
  "Rhode Island"
)

us_total_ts %>%
  filter(state %in% ne_states) %>%
  autoplot(y) +
  labs(
    title = "Annual Natural Gas Consumption - New England States",
    x = "Year",
    y = "Natural gas consuption (million cubic feet)"
  )

2.5

  1. Download tourism.xlsx from the book website and read it into R using readxl::read_excel().
  2. Create a tsibble which is identical to the tourism tsibble from the tsibble package.
  3. Find what combination of Region and Purpose had the maximum number of overnight trips on average.
  4. Create a new tsibble which combines the Purposes and Regions, and just ahs total trips by State.
tourism_xl <- read_excel("tourism.xlsx")
glimpse(tourism_xl)
## Rows: 24,320
## Columns: 5
## $ Quarter <chr> "1998-01-01", "1998-04-01", "1998-07-01", "1998-10-01", "1999-…
## $ Region  <chr> "Adelaide", "Adelaide", "Adelaide", "Adelaide", "Adelaide", "A…
## $ State   <chr> "South Australia", "South Australia", "South Australia", "Sout…
## $ Purpose <chr> "Business", "Business", "Business", "Business", "Business", "B…
## $ Trips   <dbl> 135.0777, 109.9873, 166.0347, 127.1605, 137.4485, 199.9126, 16…
tourism_xl_ts <- tourism_xl %>%
  mutate(Quarter = yearquarter(Quarter)) %>%
  as_tsibble(index = Quarter,
             key = c(Region, State, Purpose))

tourism_xl_ts
avg_trips <- tourism_xl_ts %>%
  group_by(Region, Purpose) %>%
  summarise(
    mean_trips = mean(Trips),
    .groups = "drop"
  ) %>%
  arrange(desc(mean_trips))
## Warning: Current temporal ordering may yield unexpected results.
## ℹ Suggest to sort by `Region`, `Purpose`, `Quarter` first.
# top combination
head(avg_trips, 1)
State_trips <- tourism_xl_ts %>%
  as_tibble() %>%
  group_by(State, Quarter) %>%
  summarise(
    Trips = sum(Trips),
    .groups = "drop"
  ) %>%
  as_tsibble(index = Quarter, key = State)

head(State_trips)

2.8

Use the following graphics functions: autoplot(), gg_subseries(), gg_lag(),ACF()and explore features from the following time series: "Total Private"Employedfromus_employment,Bricksfromaus_production,Harefrompelt, "H02"CostfromPBS, andBarrelsfromus_gasoline`. - Can you spot any seasonality, cyclicity and trend? - What do you learn about the series? - What can you say about the seasonal patterns? - Can you identify any unusual years?

total_private <- us_employment %>%
  filter(Title == "Total Private")

bricks <- aus_production

barrels <- us_gasoline
# Total Private Employment

# overall behaviour
total_private %>%
  autoplot(Employed) +
  labs(
    title = "Total Private Employment (US)",
    x = "Month",
    y = "Number employed")

# seasonal patterns
total_private %>%
  gg_season(Employed) +
  labs(
    title = "Seasonal Plot - Total Private Employment"
  )
## Warning: `gg_season()` was deprecated in feasts 0.4.2.
## ℹ Please use `ggtime::gg_season()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

total_private %>%
  gg_subseries(Employed) +
  labs(
    title = "Subseries Plot - Total Private Employment"
  )
## Warning: `gg_subseries()` was deprecated in feasts 0.4.2.
## ℹ Please use `ggtime::gg_subseries()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

# unusual years
total_private %>%
  gg_lag(Employed) +
  labs(
    title = "Lag Plot - Total Private Employment"
  )
## Warning: `gg_lag()` was deprecated in feasts 0.4.2.
## ℹ Please use `ggtime::gg_lag()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

total_private %>%
  ACF(Employed) %>%
  autoplot() +
  labs(
    title = "ACF - Total Private Employment"
  )

From the plots of Total Private employment, I can clearly see an overall upward trend, with some sharp drops around recession periods. The seasonal and subseries plots show a consistent yearly pattern where certain months are always higher or lower, and the ACF has big spikes at lag 12, which backs up the annual seasonality. The lag plot also suggests strong positive autocorrelation, so the series changes gradually rather than jumping randomly. Overall, this looks like a non-stationary series with trend, strong seasonality, and a few unusual years where recessions hit employment hard.

# Bricks (Australian brick production)

bricks %>%
  autoplot(Bricks) +
  labs(
    title = "Clay Brick Production - Australia",
    x = "Quarter",
    y = "Million of bricks"
    )
## Warning: Removed 20 rows containing missing values or values outside the scale range
## (`geom_line()`).

bricks %>%
  gg_season(Bricks) +
  labs(
    title = "Seasonal Plot - Bricks"
    )
## Warning: Removed 20 rows containing missing values or values outside the scale range
## (`geom_line()`).

bricks %>%
  gg_subseries(Bricks) +
  labs(
    title = "Subseries Plot - Bricks"
    )
## Warning: Removed 5 rows containing missing values or values outside the scale range
## (`geom_line()`).

bricks %>%
  ACF(Bricks) %>%
  autoplot() +
  labs(
    title = "ACF - Bricks"
  )

For the Bricks series, the time plot shows a general downward trend in brick production over time, plus some medium-term ups and downs. The seasonal and subseries plots make the quarterly pattern really obvious: the same quarters tend to be relatively high or low each year, and the ACF spikes at multiples of 4, which matches quarterly seasonality. This tells me the series is strongly seasonal and not stationary, with both trends and cyclic behaviour. Quarters that are much lower than usual compared to the surrounding years stand out as unusual and might reflect structural changes or shocks in the construction sector.

# Hare Pelts

pelt %>%
  autoplot(Hare) +
  labs(
    title = "Hare Pelts - Canada",
    x = "Year",
    y = "Number of pelts"
  )

pelt %>%
  gg_lag(Hare) +
  labs(
    title = "Lag Plot - Hare pelts"
  )

pelt %>%
  ACF(Hare) %>%
  autoplot() +
  labs(
    title = "ACF - Hare pelts"
  )

When I look at the Hare series, there is no strong long-run trend, but there is a big, regular cycles where the numbers rise and fall over several years. Since the data are annual, there isn’t within-year seasonality to worry about, so the main feature is this multi-year cyclic process rather than simple trend or random noise. Very high or very low years line up with the peaks and troughs of these cycles and can be treated as unsual population booms or crashes.

# H02 Cost

h02 <- PBS %>%
  filter(
    ATC2 == "H02",
    Concession == "General",
    Type == "Co-payments"
  )

h02
h02 %>%
  autoplot(Cost) +
  labs(
    title = "PBS - H02 drug cost (General, Co-payment)",
    x = "Month",
    y = "Cost (AUD)"
  )

h02 %>%
  gg_season(Cost) +
  labs(
    title = "Seasonal Plot - H02 cost"
  )

h02 %>%
  gg_subseries(Cost) +
  labs(
    title = "Subseries Plot - H02 cost"
  )

h02 %>%
  gg_lag(Cost) +
  labs(
    title = "Lag Plot - H02 cost"
  )

h02 %>%
  ACF(Cost) %>%
  autoplot() +
  labs(
    title = "ACF - H02 cost"
  )

For the H02 cost series, after filtering PBS to the H02, General, Co-payments group, the time plot shows a clear upward trend in costs over time. The seasonal and subseries plots show a stable yearly pattern where some months consistently have higher spending than others, and the ACF has strong spikes at lag 12, which supports the idea of annual seasonality on top of the trend. The lag plot also indicates strong persistence from one month to the next. Overall, it’s a trending, strongly seasonal series, and any sudden jump in the level or noticeable change in the seasonal shape would stand out as an unusual period, possibly linked to policy or pricing changes.

# Barrels (US gasoline consumption)

barrels %>%
  autoplot(Barrels) +
  labs(
    title = "US Gasoline Consumption - Barrels",
    x = "Week",
    y = "Barrels"
  )

barrels %>%
  gg_season(Barrels) +
  labs(
    title = "Seasonal Plot - Barrels"
  )

barrels %>%
  gg_subseries(Barrels) +
  labs(
    title = "Subseries Plot - Barrels"
  )

barrels %>%
  gg_lag(Barrels) +
  labs(
    title = "Lag Plot - Barrels"
  )

barrels %>%
  ACF(Barrels) %>%
  autoplot() +
  labs(
    title = "ACF - Barrels"
  )

For the Barrels series, the weekly gasoline data show a very clear yearly seasonal pattern, with higher consumption during certain parts of the year and lower levels in others. There might be a mild long-run trend, but the dominant feature is this strong annual seasonality, which shows up nicely in the seasonal and asubseries plots and as big ACF spikes around the yearly lag. The lag plot suggests high positive correlation between neighbouring weeks, so the series moves smoothly rather than randomly. Years where the whole seasonal curve is noticeably higher or lower than usual, or where specific weeks behave very differently, would be reasonable to label as unusual.