library(fpp3)
## Warning: package 'fpp3' was built under R version 4.3.3
## Registered S3 method overwritten by 'tsibble':
##   method               from 
##   as_tibble.grouped_df dplyr
## ── Attaching packages ──────────────────────────────────────────── fpp3 1.0.0 ──
## ✔ tibble      3.2.1     ✔ tsibble     1.1.5
## ✔ dplyr       1.1.4     ✔ tsibbledata 0.4.1
## ✔ tidyr       1.3.1     ✔ feasts      0.3.2
## ✔ lubridate   1.9.3     ✔ fable       0.3.4
## ✔ ggplot2     3.5.1     ✔ fabletools  0.4.2
## Warning: package 'tsibble' was built under R version 4.3.3
## ── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ──
## ✖ lubridate::date()    masks base::date()
## ✖ dplyr::filter()      masks stats::filter()
## ✖ tsibble::intersect() masks base::intersect()
## ✖ tsibble::interval()  masks lubridate::interval()
## ✖ dplyr::lag()         masks stats::lag()
## ✖ tsibble::setdiff()   masks base::setdiff()
## ✖ tsibble::union()     masks base::union()
library(dplyr)
library(tsibble)
library(ggplot2)

Excercise 2.1

  1. Explore the following four time series: Bricks from aus_production, Lynx from pelt, Close from gafa_stock, Demand from vic_elec.
data(aus_production, pelt, gafa_stock, vic_elec)

Use ? (or help()) to find out about the data in each series.

help("aus_production")
help("pelt")
help("gafa_stock")
help("vic_elec")
(aus_plot <- autoplot(aus_production, Bricks))
## Warning: Removed 20 rows containing missing values or values outside the scale range
## (`geom_line()`).

(pelt_plot <- autoplot(pelt, Lynx))

(gafa_plot <- autoplot(gafa_stock, Close)) 

+ For the last plot, modify the axis labels and title.

((vic_plot <- autoplot(vic_elec, Demand)) +
   labs(title = "The Demand of Electricity Over Time",
        x = "Time (30 min intervals)" ,
        y = "Total electricity demand (MWh)")
        )

Exercise 2.2

Use filter() to find what days corresponded to the peak closing price for each of the four stocks in gafa_stock.

gafa_stock |>
  group_by(Symbol) |>
  filter(Close == max(Close)) |>
  knitr::kable()
Symbol Date Open High Low Close Adj_Close Volume
AAPL 2018-10-03 230.05 233.470 229.78 232.07 230.2755 28654800
AMZN 2018-09-04 2026.50 2050.500 2013.00 2039.51 2039.5100 5721100
FB 2018-07-25 215.72 218.620 214.27 217.50 217.5000 58954200
GOOG 2018-07-26 1251.00 1269.771 1249.02 1268.33 1268.3300 2405600

Exercise 2.3

Download the file tute1.csv from the book website, open it in Excel (or some other spreadsheet application), and review its contents. You should find four columns of information. Columns B through D each contain a quarterly series, labelled Sales, AdBudget and GDP. Sales contains the quarterly sales for a small company over the period 1981-2005. AdBudget is the advertising budget and GDP is the gross domestic product. All series have been adjusted for inflation.

  1. You can read the data into R with the following script:
tute1 <- read.csv("~/Desktop/Data 624 fall 2024/Data-624-Homework-1/tute1.csv")
View(tute1)
  1. Convert the data to time series
timeseries_a <- tute1 |>
  mutate(Quarter = yearquarter(Quarter)) |>
  as_tsibble(index = Quarter)
# View the tsibble
head(timeseries_a)
  1. Construct time series plots of each of the three series
timeseries_a |>
  pivot_longer(-Quarter) |>
  ggplot(aes(x = Quarter, y = value, colour = name)) +
  geom_line() +
  facet_grid(name ~ ., scales = "free_y")

Check what happens when you don’t include facet_grid().

timeseries_a |>
  pivot_longer(-Quarter) |>
  ggplot(aes(x = Quarter, y = value, colour = name)) +
  geom_line() 

Without using facet_grid, all of the data is plotted on the same grid using the same scale.

Exercise 2.4

The USgas package contains data on the demand for natural gas in the US. a. Install the USgas package.

#install.packages('USgas')
library(USgas)
#help("usgas")
  1. Create a tsibble from us_total with year as the index and state as the key.
us_total_tsibble <- as_tsibble(us_total,index = year, key = state)
  1. Plot the annual natural gas consumption by state for the New England area (comprising the states of Maine, Vermont, New Hampshire, Massachusetts, Connecticut and Rhode Island).
n_e_area <- c('Maine', 'Vermont', 'New Hampshire', 'Massachusetts', 'Connecticut', 'Rhode Island') 
  us_total_tsibble |>
  filter(state %in% n_e_area) |>
  autoplot() +
  labs(title = "Natural Gas Consumption for New England States (1997-2019)",
       x = 'Year',
       y = 'Gas Consumption (Million Cubic Feet)') +
  scale_y_continuous(label = scales::comma)
## Plot variable not specified, automatically selected `.vars = y`

## Exercise 2.5 a. Download tourism.xlsx from the book website and read it into R using readxl::read_excel().

tourism_ds <- readxl::read_excel("~/Desktop/Data 624 fall 2024/Data-624-Homework-1/tourism.xlsx")
head(tourism_ds)
  1. Create a tsibble which is identical to the tourism tsibble from the tsibble package.
data("tourism")
head(tourism)
keys = c('Region', 'State', 'Purpose')
tourism_tsibble <- tourism_ds |>
  mutate(Quarter = yearquarter(Quarter)) |>
  as_tsibble(index=Quarter, key=all_of(keys))
  1. Find what combination of Region and Purpose had the maximum number of overnight trips on average.
tourism_ds |>
  group_by(Region, Purpose) |>
  summarize(avg_trips = mean(Trips)) |>
  arrange(desc(avg_trips)) |>
  head(1)  |>
  knitr::kable()
## `summarise()` has grouped output by 'Region'. You can override using the
## `.groups` argument.
Region Purpose avg_trips
Sydney Visiting 747.27
  1. Create a new tsibble which combines the Purposes and Regions, and just has total trips by State.
trips_by_state <- tourism_ds |>
  group_by(Quarter, State) |>
  summarize(total_trips = sum(Trips)) |>
  mutate(Quarter = yearquarter(Quarter)) |>
  as_tsibble(index=Quarter, key=State)
## `summarise()` has grouped output by 'Quarter'. You can override using the
## `.groups` argument.
head(trips_by_state)

Exercise 2.8

Use the following graphics functions: autoplot(), gg_season(), gg_subseries(), gg_lag(), ACF() and explore features from the following time series: “Total Private” Employed from us_employment, Bricks from aus_production, Hare from pelt, “H02” Cost from PBS, and Barrels from us_gasoline.

Results

Employed

us_employment |>
  filter(Title == 'Total Private') |>
  autoplot(Employed) +
  labs(title = 'Autoplot')

us_employment |>
  filter(Title == 'Total Private') |>
  gg_season(Employed) +
  labs(title = 'Seasonal Plot')

us_employment |>
  filter(Title == 'Total Private') |>
  gg_subseries(Employed) +
  labs(title = 'Seasonal Subseries Plot')

us_employment |>
  filter(Title == 'Total Private') |>
  gg_lag(Employed) +
  labs(title = 'Lag Plot')

us_employment |>
  filter(Title == 'Total Private') |>
  ACF(Employed) |>
  autoplot() +
  labs(title = 'Autocorrelation and Cross-Correlation Function Plot')

Bricks

aus_production |>
  autoplot(Bricks) +
  labs(title = 'Autoplot')
## Warning: Removed 20 rows containing missing values or values outside the scale range
## (`geom_line()`).

aus_production |>
  gg_season(Bricks) +
  labs(title = 'Seasonal Plot')
## Warning: Removed 20 rows containing missing values or values outside the scale range
## (`geom_line()`).

aus_production |>
  gg_subseries(Bricks) +
  labs(title = 'Seasonal Subseries Plot')
## Warning: Removed 5 rows containing missing values or values outside the scale range
## (`geom_line()`).

aus_production |>
  gg_lag(Bricks, geom = 'point') +
  labs(title = 'Lag Plot')
## Warning: Removed 20 rows containing missing values (gg_lag).

aus_production |>
  ACF(Bricks) |>
  autoplot() +
  labs(title = 'Autocorrelation and Cross-Correlation Function Plot')

The aus_production data seems to be cyclical. Seasonally, it appears to be increasing from the first to the third quarter, then it decreases on the fourth quarter. Trend-wise, there seems to be an upward trend up until the 80s, where it begins to decline.The years that stand out are in the mid 70s and early 80s, where there was a drastic drop.

Hare

pelt |>
  autoplot(Hare) +
  labs(title = 'Autoplot')

pelt |>
  gg_lag(Hare, geom='point') +
  labs(title = 'Lag Plot')

pelt |>
  ACF(Hare) |>
  autoplot() +
  labs(title = 'Autocorrelation and Cross-Correlation Function Plot')

This data is very cyclical as demonstrated in the plots. The cycle seems to be about every 5 years.

“H02” Cost

PBS |>
  filter(ATC2 == 'H02') |>
  autoplot(Cost) +
  labs(title = 'Autoplot')

PBS |>
  filter(ATC2 == 'H02') |>
  gg_season(Cost) +
  labs(title = 'Seasonal Plot')

While Concessional Co-payments and General Co-payments seem to peak on the seasons from March to July, Concessional safety net, and General Safety Net are at their lowest.

Barrels

us_gasoline |>
  autoplot(Barrels) +
  labs(title = 'Autoplot')

us_gasoline |>
  gg_season(Barrels) +
  labs(title = 'Seasonal Plot')

us_gasoline |>
  gg_subseries(Barrels) +
  labs(title = 'Seasonal Subseries Plot')

It’s difficult to spot a trend here. It’s generally an upward trend. Seasonally, it seems at though the the barrels supplied peaks in June/July (summer season).