Instructions

Please submit exercises 2.1, 2.2, 2.3, 2.4, 2.5 and 2.8 from the Hyndman online Forecasting book. Please submit both your Rpubs link as well as attach the .pdf file with your code.

# Load libraries
library(fpp3)
library(dplyr)
library(imputeTS)

Exercise 2.1

Explore the following four time series: Bricks from aus_production, Lynx from pelt, Close from gafa_stock, Demand from vic_elec.

  • What is the time interval of each series?
  • Use autoplot() to produce a time plot of each series.
  • For the last plot, modify the axis labels and title.
# Load the time series datasets
bricks <- aus_production |> select(Bricks)
lynx <- pelt |> select(Lynx)
gafa <- gafa_stock |> select(Close)
vic_elec <- vic_elec |> select(Demand)

# Brick Interval
bricks_interval <- interval(bricks)
bricks_interval
## <interval[1]>
## [1] 1Q
# lynx interval
lynx_interval <- interval(lynx)
lynx_interval
## <interval[1]>
## [1] 1Y
# gafa interval
gafa_interval <- interval(gafa)
gafa_interval
## <interval[1]>
## [1] !
# victoria electrical interval
vic_elec_interval <- interval(vic_elec)
vic_elec_interval
## <interval[1]>
## [1] 30m
Regularized Gafa Interval
# Correct the GAFA Interval

# Convert to tsibble 
gafa_tsibble <- gafa |>
  mutate(Date = as.Date(Date)) |>
  as_tsibble(index = Date)

# Fill in missing dates and interpolate missing values for 'Close'
gafa_regular <- gafa_tsibble |>
  fill_gaps() |>
  mutate(Close = na_interpolation(Close))

# Check the interval
gafa_interval <- interval(gafa_regular)

# Print the regularized gafa interval
print(gafa_interval)
## <interval[1]>
## [1] 1D
# Time plot for Bricks production
autoplot(bricks) +
  labs(title = "Bricks Production Over Time",
       y = "Bricks Produced")

# Time plot for Lynx pelts
autoplot(lynx) +
  labs(title = "Lynx Pelts Over Time",
       y = "Number of Pelts")

# Time plot for GAFA stock prices
autoplot(gafa_regular) +
  labs(title = "GAFA Stock Prices",
       y = "Stock Price")

# Time plot for Demand from Victoria Electricity
autoplot(vic_elec) +
  labs(title = "Electricity Demand in Victoria",
       x = "Year",
       y = "Electricity Demand (MW)")

Exercise 2.2

Use filter() to find what days corresponded to the peak closing price for each of the four stocks in gafa_stock.

# Peak closing price for each stock
peak_closing_days <- gafa_stock |>
  group_by(Symbol) |>
  filter(Close == max(Close, na.rm = TRUE)) |>
  ungroup()

# Results
peak_closing_days
## # A tsibble: 4 x 8 [!]
## # Key:       Symbol [4]
##   Symbol Date        Open  High   Low Close Adj_Close   Volume
##   <chr>  <date>     <dbl> <dbl> <dbl> <dbl>     <dbl>    <dbl>
## 1 AAPL   2018-10-03  230.  233.  230.  232.      230. 28654800
## 2 AMZN   2018-09-04 2026. 2050. 2013  2040.     2040.  5721100
## 3 FB     2018-07-25  216.  219.  214.  218.      218. 58954200
## 4 GOOG   2018-07-26 1251  1270. 1249. 1268.     1268.  2405600

Exercise 2.3

  • Download the file tute1.csv
  • You can read the data into R with the following script:
  • Convert the data to time series
  • Construct time series plots of each of the three series
  • Check what happens when you don’t include facet_grid()
# Read CSV from github repo
tute1 <- read.csv("https://raw.githubusercontent.com/johnnydrodriguez/Data624/main/tute1.csv", header = TRUE, sep = ',', na.strings="", fill = TRUE)
head(tute1)
##      Quarter  Sales AdBudget   GDP
## 1 1981-03-01 1020.2    659.2 251.8
## 2 1981-06-01  889.2    589.0 290.9
## 3 1981-09-01  795.0    512.5 290.8
## 4 1981-12-01 1003.9    614.1 292.4
## 5 1982-03-01 1057.7    647.2 279.1
## 6 1982-06-01  944.4    602.0 254.0
# Convert to Time series
mytimeseries <- tute1 |>
  mutate(Quarter = yearquarter(Quarter)) |>
  as_tsibble(index = Quarter)
mytimeseries
## # A tsibble: 100 x 4 [1Q]
##    Quarter Sales AdBudget   GDP
##      <qtr> <dbl>    <dbl> <dbl>
##  1 1981 Q1 1020.     659.  252.
##  2 1981 Q2  889.     589   291.
##  3 1981 Q3  795      512.  291.
##  4 1981 Q4 1004.     614.  292.
##  5 1982 Q1 1058.     647.  279.
##  6 1982 Q2  944.     602   254 
##  7 1982 Q3  778.     531.  296.
##  8 1982 Q4  932.     608.  272.
##  9 1983 Q1  996.     638.  260.
## 10 1983 Q2  908.     582.  280.
## # ℹ 90 more rows
# Time Series Plot
mytimeseries |>
  pivot_longer(-Quarter) |>
  ggplot(aes(x = Quarter, y = value, colour = name)) +
  geom_line() +
  facet_grid(name ~ ., scales = "free_y")

# Time Series Plot without Faceting
mytimeseries |>
  pivot_longer(-Quarter) |>
  ggplot(aes(x = Quarter, y = value, colour = name)) +
  geom_line()

Exercise 2.4

The USgas package contains data on the demand for natural gas in the US.

  • Install the USgas package.
  • Create a tsibble from us_total with year as the index and state as the key.
  • Plot the annual natural gas consumption by state for the New England area (comprising the states of Maine, Vermont, New Hampshire, Massachusetts, Connecticut and Rhode Island).
library(USgas)

# tsibble from us_total
gas_tsibble <- us_total |>
  as_tsibble(index = year, key = state)

# Plot annual natural gas consumption for New England states
new_england_states <- c("Maine", "Vermont", "New Hampshire", "Massachusetts", "Connecticut", "Rhode Island")

gas_tsibble |>
  filter(state %in% new_england_states) |>
  ggplot(aes(x = year, y = y, color = state)) +
  geom_line() +
  labs(title = "Annual Natural Gas Consumption in New England",
       y = "Consumption (Billion Cubic Feet)",
       x = "Year")

Exercise 2.5

  • Download tourism.xlsx from the book website and read it into R using readxl::read_excel().
  • Create a tsibble which is identical to the tourism tsibble from the tsibble package.
  • Find what combination of Region and Purpose had the maximum number of overnight trips on average.
  • Create a new tsibble which combines the Purposes and Regions, and just has total trips by State.
library(readxl)
library(httr)
library(tsibble)
library(lubridate)

# Read Excel file hosted on github repo
url <- "https://raw.githubusercontent.com/johnnydrodriguez/Data624/main/tourism.xlsx"
GET(url, write_disk(tf <- tempfile(fileext = ".xlsx")))
## Response [https://raw.githubusercontent.com/johnnydrodriguez/Data624/main/tourism.xlsx]
##   Date: 2024-09-08 00:39
##   Status: 200
##   Content-Type: application/octet-stream
##   Size: 679 kB
## <ON DISK>  /var/folders/59/y07xv_ln01g9fty91jhgybsc0000gq/T//RtmpIdxErq/file219871d68e41.xlsx
tourism_data <- read_excel(tf)
unlink(tf)
head(tourism_data)
## # A tibble: 6 × 5
##   Quarter    Region   State           Purpose  Trips
##   <chr>      <chr>    <chr>           <chr>    <dbl>
## 1 1998-01-01 Adelaide South Australia Business  135.
## 2 1998-04-01 Adelaide South Australia Business  110.
## 3 1998-07-01 Adelaide South Australia Business  166.
## 4 1998-10-01 Adelaide South Australia Business  127.
## 5 1999-01-01 Adelaide South Australia Business  137.
## 6 1999-04-01 Adelaide South Australia Business  200.
# Create tsibble
tourism_tsibble <- tourism_data |>
  mutate(Quarter = yearquarter(Quarter)) |>
  as_tsibble(key = c(Region, State, Purpose), index = Quarter)

# Region and Purpose with maximum average overnight trips
max_trips <- tourism_tsibble |>
  group_by(Region, Purpose) |>
  summarise(avg_trips = mean(Trips), .groups = "drop") |>
  slice_max(avg_trips, n = 1)

print("Region and Purpose with maximum average overnight trips:")
## [1] "Region and Purpose with maximum average overnight trips:"
print(max_trips)
## # A tsibble: 1 x 4 [1Q]
## # Key:       Region, Purpose [1]
##   Region    Purpose  Quarter avg_trips
##   <chr>     <chr>      <qtr>     <dbl>
## 1 Melbourne Visiting 2017 Q4      985.
# Purposes and Regions tsibble with total trips by State
state_trips <- tourism_tsibble |>
  index_by(Quarter) |>
  group_by(State) |>
  summarise(Total_Trips = sum(Trips), .groups = "drop") |>
  as_tsibble(key = State, index = Quarter)

print("Total State Trips:")
## [1] "Total State Trips:"
print(head(state_trips))
## # A tsibble: 6 x 3 [1Q]
## # Key:       State [1]
##   State Quarter Total_Trips
##   <chr>   <qtr>       <dbl>
## 1 ACT   1998 Q1        551.
## 2 ACT   1998 Q2        416.
## 3 ACT   1998 Q3        436.
## 4 ACT   1998 Q4        450.
## 5 ACT   1999 Q1        379.
## 6 ACT   1999 Q2        558.
# Plot state trips time series
state_trips_plot <- ggplot(state_trips, aes(x = Quarter, y = Total_Trips, color = State)) +
  geom_line() +
  labs(title = "Total Trips by State Over Time",
       x = "Quarter",
       y = "Total Trips") +
  theme(legend.position = "right")

print(state_trips_plot)

# Plot max trips time series combination
max_trips_series <- tourism_tsibble |>
  filter(Region == max_trips$Region, Purpose == max_trips$Purpose) |>
  as_tsibble(key = c(Region, State, Purpose), index = Quarter)

max_trips_plot <- ggplot(max_trips_series, aes(x = Quarter, y = Trips)) +
  geom_line() +
  labs(title = paste("Trips for", max_trips$Region, "-", max_trips$Purpose),
       x = "Quarter",
       y = "Number of Trips")

print(max_trips_plot)

Exercise 2.8

Use the following graphics functions: autoplot(), gg_season(), gg_subseries(), gg_lag(), ACF() and explore features from the following time series: “Total Private” Employed from us_employment, Bricks from aus_production, Hare from pelt, “H02” Cost from PBS, and Barrels from us_gasoline.

  • Can you spot any seasonality, cyclicity and trend?
  • What do you learn about the series?
  • What can you say about the seasonal patterns?
  • Can you identify any unusual years?

Observations noticed in the plots:

  • Total Private Employment Trend: There is a upward trend in total private employment over time with a noticeable dip around 2008–2010, likely due to the global financial crisis.
  • Bricks Production Unusual Years: The peak around the late 1970s and early 1980s stands out. It is followed by a period of volatility and general decline.
  • Hare Pelts ACF Plot Cyclicity: Significant spikes at lags 1 and 10 suggest strong cyclic behavior in hare pelts production.
  • Barrels of Gasoline Seasonality: There are periodic fluctuations within each year, indicating seasonality in gasoline production, likely driven by seasonal travel demands.
# Load time series datasets

# 1. Total Private Employed from us_employment
employment_ts <- us_employment |> filter(Title == "Total Private")

# Time plot for Total Private Employment
autoplot(employment_ts) + labs(title = "Time Plot of Total Private Employment")

# Seasonal plot for Total Private Employment
gg_season(employment_ts) + labs(title = "Seasonal Plot of Total Private Employment")

# Subseries plot for Total Private Employment
gg_subseries(employment_ts) + labs(title = "Subseries Plot of Total Private Employment")

# Lag plot for Total Private Employment
gg_lag(employment_ts)  + 
  geom_point() + labs(title = "Lag Plot of Total Private Employment")

# ACF plot for Total Private Employment
ACF(employment_ts, Employed) |>
  autoplot() + labs(title = "ACF Plot of Total Private Employment")

# 2. Bricks from aus_production
bricks_ts <- aus_production |> select(Bricks)

# Time plot for Bricks Production
autoplot(bricks_ts) + labs(title = "Time Plot of Bricks Production")

# Seasonal plot for Bricks Production
gg_season(bricks_ts) + labs(title = "Seasonal Plot of Bricks Production")

# Subseries plot for Bricks Production
gg_subseries(bricks_ts) + labs(title = "Subseries Plot of Bricks Production")

# Lag plot for Bricks Production
gg_lag(bricks_ts)+ 
  geom_point() + labs(title = "Lag Plot of Bricks Production")

# ACF plot for Bricks Production
ACF(bricks_ts) |> autoplot() + labs(title = "ACF Plot of Bricks Production")

# 3. Hare from pelt
hare_ts <- pelt |>
  as_tsibble(index = Year) |>
  select(Hare)

# Time plot for Hare Pelts
autoplot(hare_ts) + labs(title = "Time Plot of Hare Pelts")

# Subseries plot for Hare Pelts
gg_subseries(hare_ts) + labs(title = "Subseries Plot of Hare Pelts")

# Lag plot for Hare Pelts
gg_lag(hare_ts)+ 
  geom_point() +  labs(title = "Lag Plot of Hare Pelts")

# ACF plot for Hare Pelts
ACF(hare_ts, Hare) |>
  autoplot() + labs(title = "ACF Plot of Hare Pelts")

# 4. "H02" Cost from PBS
h02_ts <- PBS |>
  filter(ATC2 == "H02") |>
  select(Month, Type, Cost)

# Time plot for H02 Cost
autoplot(h02_ts, Cost) + labs(title = "Time Plot of H02 Cost")

# Seasonal plot for H02 Cost
gg_season(h02_ts, y = Cost) + labs(title = "Seasonal Plot of H02 Cost")

# Subseries plot for H02 Cost
gg_subseries(h02_ts, y = Cost) + labs(title = "Subseries Plot of H02 Cost")

# Filter the dataset for a single `Type`
h02_single <- h02_ts |>
  filter(Type == "Safety net") |>
  as_tsibble(index = Month)

# Lag plot for H02 Cost (for a single type: Safety net)
#gg_lag(h02_single, Cost) + labs(title = "Lag Plot of H02 Cost (Safety net)")

# ACF plot for H02 Cost (for a single type: Safety net)
ACF(h02_single, Cost) |>
  autoplot() + labs(title = "ACF Plot of H02 Cost (Safety net)")

# 5. Barrels from us_gasoline
barrels_ts <- us_gasoline |> select(Barrels)

# Time plot for Barrels of Gasoline
autoplot(barrels_ts) + labs(title = "Time Plot of Barrels of Gasoline")

# Seasonal plot for Barrels of Gasoline
gg_season(barrels_ts) + labs(title = "Seasonal Plot of Barrels of Gasoline")

# Subseries plot for Barrels of Gasoline
gg_subseries(barrels_ts) + labs(title = "Subseries Plot of Barrels of Gasoline")

# Lag plot for Barrels of Gasoline
gg_lag(barrels_ts) + 
  geom_point() +  labs(title = "Lag Plot of Barrels of Gasoline")

# ACF plot for Barrels of Gasoline
ACF(barrels_ts) |> autoplot() + labs(title = "ACF Plot of Barrels of Gasoline")