R Markdown

The chuck below contains the required libraries for this assignment.

if (!require("fpp3")) { 
  install.packages("fpp3") 
} 
## Loading required package: fpp3
## Warning: package 'fpp3' was built under R version 4.4.2
## Registered S3 method overwritten by 'tsibble':
##   method               from 
##   as_tibble.grouped_df dplyr
## ── Attaching packages ──────────────────────────────────────────── fpp3 1.0.1 ──
## ✔ tibble      3.2.1     ✔ tsibble     1.1.6
## ✔ dplyr       1.1.4     ✔ tsibbledata 0.4.1
## ✔ tidyr       1.3.1     ✔ feasts      0.4.1
## ✔ lubridate   1.9.3     ✔ fable       0.4.1
## ✔ ggplot2     3.5.1
## Warning: package 'ggplot2' was built under R version 4.4.2
## Warning: package 'tsibble' was built under R version 4.4.2
## Warning: package 'tsibbledata' was built under R version 4.4.2
## Warning: package 'feasts' was built under R version 4.4.2
## Warning: package 'fabletools' was built under R version 4.4.2
## Warning: package 'fable' was built under R version 4.4.2
## ── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ──
## ✖ lubridate::date()    masks base::date()
## ✖ dplyr::filter()      masks stats::filter()
## ✖ tsibble::intersect() masks base::intersect()
## ✖ tsibble::interval()  masks lubridate::interval()
## ✖ dplyr::lag()         masks stats::lag()
## ✖ tsibble::setdiff()   masks base::setdiff()
## ✖ tsibble::union()     masks base::union()
library(fpp3)
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.4.2
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.0     ✔ readr   2.1.5
## ✔ purrr   1.0.2     ✔ stringr 1.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter()     masks stats::filter()
## ✖ tsibble::interval() masks lubridate::interval()
## ✖ dplyr::lag()        masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ggplot2)

#Exercise 2.10 Explore the following four time series: Bricks from aus_production, Lynx from pelt, Close from gafa_stock, Demand from vic_elec.

Use ? (or help()) to find out about the data in each series. What is the time interval of each series? Use autoplot() to produce a time plot of each series. For the last plot, modify the axis labels and title.

data("aus_production")
data("pelt")
data("gafa_stock")
data("vic_elec")
help("aus_production")
## starting httpd help server ... done
autoplot(aus_production, Bricks)
## Warning: Removed 20 rows containing missing values or values outside the scale range
## (`geom_line()`).

The time interval for aus_production serie is quarterly.

help("pelt")
autoplot(pelt, Lynx)

The time interval for the pelt serie is every twenty years.

help("gafa_stock")
autoplot(gafa_stock, Close)

The time interval for the gafa_stock serie is every two years.

help("vic_elec")

# Load the data
data("vic_elec")

# Create the plot with modified axis labels and title
autoplot(vic_elec, Demand) +
  labs(
    title = "Electricity Demand in Victoria",
    x = "30 min intervals",
    y = "Demand (MW)"
  )

The time interval for the vic_elec serie is 30 minutes.

##2.2) Use filter() to find what days corresponded to the peak closing price for each of the four stocks in gafa_stock

# Find the days with peak closing prices for each stock
peak_closing_days <- gafa_stock %>%
  group_by(Symbol) %>%
  filter(Close == max(Close))

# Display the result
print(peak_closing_days)
## # A tsibble: 4 x 8 [!]
## # Key:       Symbol [4]
## # Groups:    Symbol [4]
##   Symbol Date        Open  High   Low Close Adj_Close   Volume
##   <chr>  <date>     <dbl> <dbl> <dbl> <dbl>     <dbl>    <dbl>
## 1 AAPL   2018-10-03  230.  233.  230.  232.      230. 28654800
## 2 AMZN   2018-09-04 2026. 2050. 2013  2040.     2040.  5721100
## 3 FB     2018-07-25  216.  219.  214.  218.      218. 58954200
## 4 GOOG   2018-07-26 1251  1270. 1249. 1268.     1268.  2405600

2.3) Download the file tute1.csv from the book website, open it in Excel (or some other spreadsheet application), and review its contents. You should find four columns of information. Columns B through D each contain a quarterly series, labelled Sales, AdBudget and GDP. Sales contains the quarterly sales for a small company over the period 1981-2005. AdBudget is the advertising budget and GDP is the gross domestic product. All series have been adjusted for inflation.

# Read the data
tute1 <- readr::read_csv("C:/Users/Dell/Downloads/tute1.csv")
## Rows: 100 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl  (3): Sales, AdBudget, GDP
## date (1): Quarter
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
View(tute1)

# Convert the data to time series
mytimeseries <- tute1 |>
  mutate(Quarter = yearquarter(Quarter)) |>
  as_tsibble(index = Quarter)

# Construct time series plots of each of the three series with facet_grid
mytimeseries |>
  pivot_longer(-Quarter) |>
  ggplot(aes(x = Quarter, y = value, colour = name)) +
  geom_line() +
  facet_grid(name ~ ., scales = "free_y") +
  labs(title = "Time Series Plots of Sales, AdBudget, and GDP",
       x = "Quarter", y = "Value")

# Construct time series plots of each of the three series without facet_grid
mytimeseries |>
  pivot_longer(-Quarter) |>
  ggplot(aes(x = Quarter, y = value, colour = name)) +
  geom_line() +
  labs(title = "Time Series Plots of Sales, AdBudget, and GDP",
       x = "Quarter", y = "Value")

2.4) The USgas package contains data on the demand for natural gas in the US.

Install the USgas package. Create a tsibble from us_total with year as the index and state as the key. Plot the annual natural gas consumption by state for the New England area (comprising the states of Maine, Vermont, New Hampshire, Massachusetts, Connecticut and Rhode Island).

if (!require("USgas")) { 
  install.packages("USgas") 
}
## Loading required package: USgas
## Warning: package 'USgas' was built under R version 4.4.2
library(USgas)
library(tsibble)
library(ggplot2)

data("us_total")

us_total_tsibble <- us_total |>
  as_tsibble(index = year, key = state)

new_england_states <- c("Maine", "Vermont", "New Hampshire", "Massachusetts", "Connecticut", "Rhode Island")

new_england_data <- us_total_tsibble |>
  filter(state %in% new_england_states)

ggplot(new_england_data, aes(x = year, y = y, color = state)) +
  geom_line() +
  labs(title = "Annual Natural Gas Consumption by State in New England",
       x = "Year",
       y = "Natural Gas Consumption") +
  theme_minimal()

##2.5) Download tourism.xlsx from the book website and read it into R using readxl::read_excel(). Create a tsibble which is identical to the tourism tsibble from the tsibble package. Find what combination of Region and Purpose had the maximum number of overnight trips on average. Create a new tsibble which combines the Purposes and Regions, and just has total trips by State.

library(readxl)
library(tsibble)
library(dplyr)

tourism_data <- readxl::read_excel("C:/Users/Dell/Downloads/tourism.xlsx")

# Create a tsibble which is identical to the tourism tsibble
tourism_tsibble <- tourism_data %>%
  mutate(Quarter = yearquarter(Quarter)) %>%
  as_tsibble(index = Quarter, key = c(Region, Purpose))

# Find the combination of Region and Purpose with the maximum number of overnight trips on average
max_avg_overnight_trips <- tourism_tsibble %>%
  group_by(Region, Purpose) %>%
  summarize(avg_trips = mean(Trips, na.rm = TRUE)) %>%
  filter(avg_trips == max(avg_trips))

print(max_avg_overnight_trips)
## # A tsibble: 76 x 4 [1Q]
## # Key:       Region, Purpose [76]
## # Groups:    Region [76]
##    Region                     Purpose  Quarter avg_trips
##    <chr>                      <chr>      <qtr>     <dbl>
##  1 Adelaide                   Visiting 2017 Q1     270. 
##  2 Adelaide Hills             Visiting 2002 Q4      81.1
##  3 Alice Springs              Holiday  1998 Q3      76.5
##  4 Australia's Coral Coast    Holiday  2014 Q3     198. 
##  5 Australia's Golden Outback Business 2017 Q3     174. 
##  6 Australia's North West     Business 2016 Q3     297. 
##  7 Australia's South West     Holiday  2016 Q1     612. 
##  8 Ballarat                   Visiting 2004 Q1     103. 
##  9 Barkly                     Holiday  1998 Q3      37.9
## 10 Barossa                    Holiday  2006 Q1      51.0
## # ℹ 66 more rows
# Create a new tsibble which combines the Purposes and Regions, and just has total trips by State
total_trips_by_state <- tourism_tsibble %>%
  index_by(Quarter) %>%
  group_by(State) %>%
  summarize(total_trips = sum(Trips, na.rm = TRUE)) %>%
  as_tsibble(index = Quarter, key = State)

print(total_trips_by_state)
## # A tsibble: 640 x 3 [1Q]
## # Key:       State [8]
##    State Quarter total_trips
##    <chr>   <qtr>       <dbl>
##  1 ACT   1998 Q1        551.
##  2 ACT   1998 Q2        416.
##  3 ACT   1998 Q3        436.
##  4 ACT   1998 Q4        450.
##  5 ACT   1999 Q1        379.
##  6 ACT   1999 Q2        558.
##  7 ACT   1999 Q3        449.
##  8 ACT   1999 Q4        595.
##  9 ACT   2000 Q1        600.
## 10 ACT   2000 Q2        557.
## # ℹ 630 more rows

##2.8) Use the following graphics functions: autoplot(), gg_season(), gg_subseries(), gg_lag(), ACF() and explore features from the following time series: “Total Private” Employed from us_employment, Bricks from aus_production, Hare from pelt, “H02” Cost from PBS, and Barrels from us_gasoline.

Can you spot any seasonality, cyclicity and trend? What do you learn about the series? What can you say about the seasonal patterns? Can you identify any unusual years?

# Install and load necessary packages
library(fpp3)
library(USgas)
library(ggplot2)

# Load the data
data("us_employment")
data("aus_production")
data("pelt")
data("PBS")
data("us_gasoline")

# Visualize Total Private Employed from us_employment
us_employment_private <- us_employment %>% filter(Title == "Total Private")

autoplot(us_employment_private, Employed) +
  labs(title = "Total Private Employment in US", x = "Year", y = "Employed")

gg_season(us_employment_private, Employed) +
  labs(title = "Seasonal Plot: Total Private Employment in US")

gg_subseries(us_employment_private, Employed) +
  labs(title = "Subseries Plot: Total Private Employment in US")

gg_lag(us_employment_private, Employed) +
  labs(title = "Lag Plot: Total Private Employment in US")

acf_us_employment_private <- ACF(us_employment_private, Employed)
autoplot(acf_us_employment_private) +
  labs(title = "ACF Plot: Total Private Employment in US")

# Visualize Bricks from aus_production
autoplot(aus_production, Bricks) +
  labs(title = "Bricks Production in Australia", x = "Year", y = "Bricks")
## Warning: Removed 20 rows containing missing values or values outside the scale range
## (`geom_line()`).

gg_season(aus_production, Bricks) +
  labs(title = "Seasonal Plot: Bricks Production in Australia")
## Warning: Removed 20 rows containing missing values or values outside the scale range
## (`geom_line()`).

gg_subseries(aus_production, Bricks) +
  labs(title = "Subseries Plot: Bricks Production in Australia")
## Warning: Removed 5 rows containing missing values or values outside the scale range
## (`geom_line()`).

gg_lag(aus_production, Bricks) +
  labs(title = "Lag Plot: Bricks Production in Australia")
## Warning: Removed 20 rows containing missing values (gg_lag).

acf_aus_production <- ACF(aus_production, Bricks)
autoplot(acf_aus_production) +
  labs(title = "ACF Plot: Bricks Production in Australia")

# Visualize Hare from pelt
autoplot(pelt, Hare) +
  labs(title = "Hare Pelts", x = "Year", y = "Hare Pelts")

gg_subseries(pelt, Hare) +
  labs(title = "Subseries Plot: Hare Pelts")

gg_lag(pelt, Hare) +
  labs(title = "Lag Plot: Hare Pelts")

acf_pelt <- ACF(pelt, Hare)
autoplot(acf_pelt) +
  labs(title = "ACF Plot: Hare Pelts")

# H02 Cost from PBS
ts_cost <- PBS %>%
  filter(ATC2 == 'H02') %>%
  select(Month, Cost)

autoplot(ts_cost, .vars = Cost) +
  labs(title = "H02 Cost from PBS")

gg_season(ts_cost, y = Cost) +
  labs(title = "Seasonal Plot: H02 Cost from PBS")

gg_subseries(ts_cost, y = Cost) +
  labs(title = "Subseries Plot: H02 Cost from PBS")

gg_lag(ts_cost %>% filter(Concession == 'General', Type == 'Safety net'), y = Cost) +
  labs(title = "Lag Plot: H02 Cost from PBS (General, Safety net)")

ACF(ts_cost, y = Cost) %>%
  autoplot() +
  labs(title = "ACF Plot: H02 Cost from PBS")

# Barrels from us_gasoline
ts_barrels <- us_gasoline %>%
  select(Week, Barrels)

# Convert ts_barrels to a tsibble with specified week_start
ts_barrels <- ts_barrels %>%
  mutate(Week = yearweek(Week, week_start = 1)) %>%
  as_tsibble(index = Week)

autoplot(ts_barrels, .vars = Barrels) +
  labs(title = "Gasoline Consumption in US")

gg_season(ts_barrels, Barrels) +
  labs(title = "Seasonal Plot: Gasoline Consumption in US")

gg_subseries(ts_barrels, Barrels) +
  labs(title = "Subseries Plot: Gasoline Consumption in US")

gg_lag(ts_barrels, Barrels, period = 12, lags = 1:12) +
  labs(title = "Lag Plot: Gasoline Consumption in US")

ACF(ts_barrels, Barrels) %>%
  autoplot() +
  labs(title = "ACF Plot: Gasoline Consumption in US")

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.