DATA 624 Homework 1

library(fpp3)
library(tidyverse)
library(readxl)
data("aus_production")
data("pelt")
data("gafa_stock")
data("vic_elec")

Question 2.1

?aus_production
?pelt
?gafa_stock
?vic_elec
interval(aus_production)
## <interval[1]>
## [1] 1Q
interval(pelt)
## <interval[1]>
## [1] 1Y
interval(gafa_stock)
## <interval[1]>
## [1] !
interval(vic_elec)
## <interval[1]>
## [1] 30m
autoplot(aus_production, Bricks) +
  ggtitle("Bricks Production in Australia")
## Warning: Removed 20 rows containing missing values or values outside the scale range
## (`geom_line()`).

autoplot(pelt, Lynx) +
  ggtitle("Annual Lynx Trappings")

autoplot(gafa_stock, Close) +
  ggtitle("GAFA Stocks Closing Prices")

autoplot(vic_elec, Demand) +
  labs(title = "Electricity Demand in Victoria",
       x = "Year",
       y = "Demand (MW)")

Question 2.2

peak_closing_days <- gafa_stock |>
  group_by(Symbol) |>
  filter(Close == max(Close)) |>
  select(Symbol, Date, Close) |>
  arrange(Symbol, Date)

peak_closing_days
## # A tsibble: 4 x 3 [!]
## # Key:       Symbol [4]
## # Groups:    Symbol [4]
##   Symbol Date       Close
##   <chr>  <date>     <dbl>
## 1 AAPL   2018-10-03  232.
## 2 AMZN   2018-09-04 2040.
## 3 FB     2018-07-25  218.
## 4 GOOG   2018-07-26 1268.

Question 2.3

tute1 <- read_csv("tute1.csv")
## Rows: 100 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl  (3): Sales, AdBudget, GDP
## date (1): Quarter
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#View(tute1)
mytimeseries <- tute1 |>
  mutate(Quarter = yearquarter(Quarter)) |>
  as_tsibble(index = Quarter)
mytimeseries |>
  pivot_longer(-Quarter) |>
  ggplot(aes(x = Quarter, y = value, colour = name)) +
  geom_line() +
  facet_grid(name ~ ., scales = "free_y")

mytimeseries |>
  pivot_longer(-Quarter) |>
  ggplot(aes(x = Quarter, y = value, colour = name)) +
  geom_line()

Without facet_grid(), all three series are plotted on the same y-axis, which makes the smaller values (like Sales or AdBudget) hard to see, especially if one variable (like GDP) has much larger values. Using facet_grid() with scales = “free_y” lets each variable have its own y-axis scale

Question 2.4

#install.packages("USgas")
library(USgas)
glimpse(us_total)
## Rows: 1,266
## Columns: 3
## $ year  <int> 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007…
## $ state <chr> "Alabama", "Alabama", "Alabama", "Alabama", "Alabama", "Alabama"…
## $ y     <int> 324158, 329134, 337270, 353614, 332693, 379343, 350345, 382367, …
us_gas_tsibble <- us_total |>
  as_tsibble(index = year, key = state)
new_england_states <- c("Maine", "Vermont", "New Hampshire",
                        "Massachusetts", "Connecticut", "Rhode Island")
us_gas_tsibble |>
  filter(state %in% new_england_states) |>
  ggplot(aes(x = year, y = y, colour = state)) +
  geom_line(size = 1) +
  labs(title = "Annual Natural Gas Consumption in New England States",
       x = "Year",
       y = "Total Consumption (Billion Cubic Feet)",
       colour = "State") +
  theme_minimal()
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

Question 2.5

tourism_raw <- read_excel("tourism.xlsx")
glimpse(tourism_raw)
## Rows: 24,320
## Columns: 5
## $ Quarter <chr> "1998-01-01", "1998-04-01", "1998-07-01", "1998-10-01", "1999-…
## $ Region  <chr> "Adelaide", "Adelaide", "Adelaide", "Adelaide", "Adelaide", "A…
## $ State   <chr> "South Australia", "South Australia", "South Australia", "Sout…
## $ Purpose <chr> "Business", "Business", "Business", "Business", "Business", "B…
## $ Trips   <dbl> 135.0777, 109.9873, 166.0347, 127.1605, 137.4485, 199.9126, 16…
tourism <- tourism_raw %>%
  mutate(Quarter = yearquarter(Quarter)) %>%
  as_tsibble(key = c(Region, Purpose), index = Quarter)
max_avg_trips <- tourism %>%
  group_by(Region, Purpose) %>%
  summarise(avg_trips = mean(Trips)) %>%
  arrange(desc(avg_trips)) %>%
  slice(1)
## Warning: Current temporal ordering may yield unexpected results.
## ℹ Suggest to sort by `Region`, `Purpose`, `Quarter` first.
max_avg_trips
## # A tsibble: 76 x 4 [1Q]
## # Key:       Region, Purpose [76]
## # Groups:    Region [76]
##    Region                     Purpose  Quarter avg_trips
##    <chr>                      <chr>      <qtr>     <dbl>
##  1 Adelaide                   Visiting 2017 Q1     270. 
##  2 Adelaide Hills             Visiting 2002 Q4      81.1
##  3 Alice Springs              Holiday  1998 Q3      76.5
##  4 Australia's Coral Coast    Holiday  2014 Q3     198. 
##  5 Australia's Golden Outback Business 2017 Q3     174. 
##  6 Australia's North West     Business 2016 Q3     297. 
##  7 Australia's South West     Holiday  2016 Q1     612. 
##  8 Ballarat                   Visiting 2004 Q1     103. 
##  9 Barkly                     Holiday  1998 Q3      37.9
## 10 Barossa                    Holiday  2006 Q1      51.0
## # ℹ 66 more rows
tourism_state <- tourism %>%
  group_by(State) %>%
  summarise(Trips = sum(Trips)) %>%
  as_tsibble(key = State, index = Quarter)

tourism_state
## # A tsibble: 640 x 3 [1Q]
## # Key:       State [8]
##    State Quarter Trips
##    <chr>   <qtr> <dbl>
##  1 ACT   1998 Q1  551.
##  2 ACT   1998 Q2  416.
##  3 ACT   1998 Q3  436.
##  4 ACT   1998 Q4  450.
##  5 ACT   1999 Q1  379.
##  6 ACT   1999 Q2  558.
##  7 ACT   1999 Q3  449.
##  8 ACT   1999 Q4  595.
##  9 ACT   2000 Q1  600.
## 10 ACT   2000 Q2  557.
## # ℹ 630 more rows

In order to keep a tsibble I need a time index which is the Quarter. To get it without I can use a tibble.

state_total_trips <- tourism %>%
  as_tibble() %>%               
  group_by(State) %>%           
  summarise(Total_Trips = sum(Trips)) %>%  
  arrange(desc(Total_Trips))

state_total_trips
## # A tibble: 8 × 2
##   State              Total_Trips
##   <chr>                    <dbl>
## 1 New South Wales        557367.
## 2 Victoria               390463.
## 3 Queensland             386643.
## 4 Western Australia      147820.
## 5 South Australia        118151.
## 6 Tasmania                54137.
## 7 ACT                     41007.
## 8 Northern Territory      28614.

Question 2.8

Employed

us_employment |>
  filter(Title == "Total Private") |>
  autoplot(Employed) +
  labs(title = "Total Private Employment in US")

us_employment |>
  filter(Title == "Total Private") |>
  gg_season(Employed)
## Warning: `gg_season()` was deprecated in feasts 0.4.2.
## ℹ Please use `ggtime::gg_season()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

us_employment |>
  filter(Title == "Total Private") |>
  gg_subseries(Employed)
## Warning: `gg_subseries()` was deprecated in feasts 0.4.2.
## ℹ Please use `ggtime::gg_subseries()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

us_employment |>
  filter(Title == "Total Private") |>
  gg_lag(Employed)
## Warning: `gg_lag()` was deprecated in feasts 0.4.2.
## ℹ Please use `ggtime::gg_lag()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

us_employment |>
  filter(Title == "Total Private") |>
  ACF(Employed) |>
  autoplot()

Trend: Clear upward trend in employment over time.

Seasonality: Strong seasonal cycle — likely due to hiring trends.

Cyclicality: Some drops during economic issues (2008–2009).

Unusual years: Look for dips like in 2008.

Bricks

autoplot(aus_production, Bricks)
## Warning: Removed 20 rows containing missing values or values outside the scale range
## (`geom_line()`).

gg_season(aus_production, Bricks)
## Warning: Removed 20 rows containing missing values or values outside the scale range
## (`geom_line()`).

gg_subseries(aus_production, Bricks)
## Warning: Removed 5 rows containing missing values or values outside the scale range
## (`geom_line()`).

gg_lag(aus_production, Bricks)
## Warning: Removed 20 rows containing missing values (gg_lag).

ACF(aus_production, Bricks) |>
  autoplot()

Seasonality: Clear seasonal patterns (higher in Q2/Q3).

Trend: Generally decreasing over time.

Cyclicality: Linked to construction cycles and possibly economic activity.

Unusual years: Recent years (post-2010) show very low levels.

Hare

autoplot(pelt, Hare)

gg_subseries(pelt, Hare)

gg_lag(pelt, Hare)

ACF(pelt, Hare) |>
  autoplot()

Cyclicality: Strong cyclical pattern (population cycles).

Seasonality: Less pronounced.

Trend: No clear upward/downward trend.

Unusual years: Peaks and troughs are extreme due to population booms/crashes.

Cost

PBS |>
  filter(ATC2 == "H02") |>
  autoplot(Cost)

PBS |>
  filter(ATC2 == "H02") |>
  gg_season(Cost)

PBS |>
  filter(ATC2 == "H02") |>
  gg_subseries(Cost)

PBS |>
  filter(ATC2 == "H02") |>
  ACF(Cost) |>
  autoplot()

Trend: Gradual increase in costs over time.

Seasonality: Present but weak — maybe higher in winter months (if H02 relates to hormones).

Cyclicality: Weak.

Unusual years: Look for sudden spikes (price changes, drug introduction, etc.).

Barrels

autoplot(us_gasoline, Barrels)

gg_season(us_gasoline, Barrels)

gg_subseries(us_gasoline, Barrels)

gg_lag(us_gasoline, Barrels)

ACF(us_gasoline, Barrels) |>
  autoplot()

Trend: Stable with slight increases.

Seasonality: Clear seasonal patterns (higher usage in summer).

Cyclicality: Relates to travel behavior and economic cycles.

Unusual years: 2020 stands out with a sharp drop during the pandemic.