# loading libraries
library(fpp3)
## Registered S3 method overwritten by 'tsibble':
##   method               from 
##   as_tibble.grouped_df dplyr
## ── Attaching packages ──────────────────────────────────────────── fpp3 1.0.1 ──
## ✔ tibble      3.2.1     ✔ tsibble     1.1.6
## ✔ dplyr       1.1.4     ✔ tsibbledata 0.4.1
## ✔ tidyr       1.3.1     ✔ feasts      0.4.1
## ✔ lubridate   1.9.3     ✔ fable       0.4.1
## ✔ ggplot2     3.5.1
## ── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ──
## ✖ lubridate::date()    masks base::date()
## ✖ dplyr::filter()      masks stats::filter()
## ✖ tsibble::intersect() masks base::intersect()
## ✖ tsibble::interval()  masks lubridate::interval()
## ✖ dplyr::lag()         masks stats::lag()
## ✖ tsibble::setdiff()   masks base::setdiff()
## ✖ tsibble::union()     masks base::union()

2.1

Explore the following four time series: Bricks from aus_production, Lynx from pelt, Close from gafa_stock, Demand from vic_elec.

Use ? (or help()) to find out about the data in each series. What is the time interval of each series? Use autoplot() to produce a time plot of each series. For the last plot, modify the axis labels and title.

help(aus_production)
# Bricks from aus_production: Quarterly estimates of bricks manufacturing in Australia from 1956 to 2010. 
head(aus_production)
?pelt
# Lynx from pelt: Yearly number of Canadian Lynx pelts traded between 1845 and 1935. 

?gafa_stock
# Close from gafa_stock : Daily closing price for the stock on irregular trading days between 2014-2018 for google, amazon, facebook and apple.

?vic_elec
#Demand from vic_elec:  Half-hourly total electricity demand in MWh.

autoplot(aus_production, Bricks)
## Warning: Removed 20 rows containing missing values or values outside the scale range
## (`geom_line()`).

autoplot(pelt, Lynx)

autoplot(gafa_stock, Close)

autoplot(vic_elec, Demand) + 
  labs(title = "Total Electricity Demand for Victora, Australia", 
       x = "Time (30 minute intervals)", 
       y = "Demand (MWh)")

2.2

Use filter() to find what days corresponded to the peak closing price for each of the four stocks in gafa_stock.

head(gafa_stock)
gafa_stock %>%
  group_by(Symbol) %>%
  filter(Close == max(Close)) %>%
  select(Symbol,Date,Close)

2.3

Download the file tute1.csv from the book website, open it in Excel (or some other spreadsheet application), and review its contents. You should find four columns of information. Columns B through D each contain a quarterly series, labelled Sales, AdBudget and GDP. Sales contains the quarterly sales for a small company over the period 1981-2005. AdBudget is the advertising budget and GDP is the gross domestic product. All series have been adjusted for inflation.

tute1 <- readr::read_csv("tute1.csv")
## Rows: 100 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl  (3): Sales, AdBudget, GDP
## date (1): Quarter
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
View(tute1)

#Convert the data to time series
mytimeseries <- tute1 |>
  mutate(Quarter = yearquarter(Quarter)) |>
  as_tsibble(index = Quarter)

#Construct time series plots of each of the three series

mytimeseries |>
  pivot_longer(-Quarter) |>
  ggplot(aes(x = Quarter, y = value, colour = name)) +
  geom_line() +
  facet_grid(name ~ ., scales = "free_y")

mytimeseries |>
  pivot_longer(-Quarter) |>
  ggplot(aes(x = Quarter, y = value, colour = name)) +
  geom_line()

#Check what happens when you don’t include facet_grid()

#Facet grid creates an individual y-axis for each variable instead of plotting them on the same scale, and this makes it easier to observe the trends. 

2.4

The USgas package contains data on the demand for natural gas in the US.

Install the USgas package. Create a tsibble from us_total with year as the index and state as the key. Plot the annual natural gas consumption by state for the New England area (comprising the states of Maine, Vermont, New Hampshire, Massachusetts, Connecticut and Rhode Island).

library(USgas)

head(us_total)
usgas_series <- us_total |>
  as_tsibble(index = year, 
             key = state)

View(usgas_series)

ne_usgas_series <- usgas_series %>%
  filter(state == c("Maine", "Vermont", "New Hampshire", "Massachusetts", "Connecticut", "Rhode Island"))

autoplot(ne_usgas_series, y)

2.5

Download tourism.xlsx from the book website and read it into R using readxl::read_excel(). Create a tsibble which is identical to the tourism tsibble from the tsibble package. Find what combination of Region and Purpose had the maximum number of overnight trips on average. Create a new tsibble which combines the Purposes and Regions, and just has total trips by State.

tourism_data <- readxl::read_excel("fpptourism.xlsx")

head(tourism_data)
?tourism
head(tourism)
# year quarter - index

trsm_tsibble <- tourism_data %>%
  mutate(Quarter = yearquarter(Quarter)) %>%
  as_tibble(index = Quarter)

head(trsm_tsibble)
av_trsm_tsibble <- trsm_tsibble %>%
  group_by(Region, Purpose) %>%
  summarise(average = mean(Trips, na.rm = TRUE), .groups = "drop") %>%
  arrange(desc(average)) %>%
  slice(1)

print(av_trsm_tsibble)
## # A tibble: 1 × 3
##   Region Purpose  average
##   <chr>  <chr>      <dbl>
## 1 Sydney Visiting    747.
#Maximum number of overnight trips on average was in Sydney with a purpose of visiting. 

2.8

Use the following graphics functions: autoplot(), gg_season(), gg_subseries(), gg_lag(), ACF() and explore features from the following time series: “Total Private” Employed from us_employment, Bricks from aus_production, Hare from pelt, “H02” Cost from PBS, and Barrels from us_gasoline.

-Can you spot any seasonality, cyclicity and trend? -What do you learn about the series? -What can you say about the seasonal patterns? -Can you identify any unusual years?

Total Private from us_employment

total_private <- us_employment %>%
  filter(Title == "Total Private")

autoplot(total_private, Employed)

gg_season(total_private, Employed)

gg_subseries(total_private, Employed)

gg_lag(total_private, Employed)

ACF(total_private, Employed) %>%
  autoplot()

Bricks from aus_production

autoplot(aus_production, Bricks)
## Warning: Removed 20 rows containing missing values or values outside the scale range
## (`geom_line()`).

gg_season(aus_production, Bricks)
## Warning: Removed 20 rows containing missing values or values outside the scale range
## (`geom_line()`).

gg_subseries(aus_production, Bricks)
## Warning: Removed 5 rows containing missing values or values outside the scale range
## (`geom_line()`).

gg_lag(aus_production, Bricks)
## Warning: Removed 20 rows containing missing values (gg_lag).

ACF(aus_production, Bricks) %>%
  autoplot()

Hare from pelt

autoplot(pelt, Hare)

#gg_season(pelt, Hare)
gg_subseries(pelt, Hare)

gg_lag(pelt, Hare)

ACF(pelt, Hare) %>%
  autoplot()

### “H02” Cost from PBS

?PBS

#str(PBS)

h02 <- PBS %>%
  filter(ATC2 == "H02")

head(h02)
autoplot(h02, Cost)

gg_season(h02, Cost)

gg_subseries(h02, Cost)

#gg_lag(h02, Cost)
ACF(h02, Cost)%>%
  autoplot()

Barrels from us_gasoline

autoplot(us_gasoline, Barrels)

gg_season(us_gasoline, Barrels)

gg_subseries(us_gasoline, Barrels)

gg_lag(us_gasoline, Barrels)

ACF(us_gasoline, Barrels) %>%
  autoplot()