Exercise 2.1

Explore the following four time series: Bricks from aus_production, Lynx from pelt, Close from gafa_stock, Demand from vic_elec.

  1. Use ? (or help()) to find out about the about the data in each series.

  2. What is the time interval of each series?

  3. Use autoplot() to produce a time plot of each series.

  4. For the last plot, modify the axis labels and title.

Answer.

#install.packages("fpp3")
library(fpp3)
## Warning: package 'fpp3' was built under R version 4.3.2
## ── Attaching packages ────────────────────────────────────────────── fpp3 0.5 ──
## ✔ tibble      3.2.1     ✔ tsibble     1.1.4
## ✔ dplyr       1.1.3     ✔ tsibbledata 0.4.1
## ✔ tidyr       1.3.0     ✔ feasts      0.3.1
## ✔ lubridate   1.9.3     ✔ fable       0.3.3
## ✔ ggplot2     3.4.3     ✔ fabletools  0.3.4
## Warning: package 'tsibble' was built under R version 4.3.2
## Warning: package 'tsibbledata' was built under R version 4.3.2
## Warning: package 'feasts' was built under R version 4.3.2
## Warning: package 'fabletools' was built under R version 4.3.2
## Warning: package 'fable' was built under R version 4.3.2
## ── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ──
## ✖ lubridate::date()    masks base::date()
## ✖ dplyr::filter()      masks stats::filter()
## ✖ tsibble::intersect() masks base::intersect()
## ✖ tsibble::interval()  masks lubridate::interval()
## ✖ dplyr::lag()         masks stats::lag()
## ✖ tsibble::setdiff()   masks base::setdiff()
## ✖ tsibble::union()     masks base::union()
head(aus_production, 3)
ABCDEFGHIJ0123456789
Quarter
<qtr>
Beer
<dbl>
Tobacco
<dbl>
Bricks
<dbl>
Cement
<dbl>
Electricity
<dbl>
Gas
<dbl>
1956 Q1284522518946539235
1956 Q2213517820453244366
1956 Q3227529720856148067
head(pelt, 3)
ABCDEFGHIJ0123456789
Year
<dbl>
Hare
<dbl>
Lynx
<dbl>
18451958030090
18461960045150
18471961049150
head(gafa_stock)
ABCDEFGHIJ0123456789
Symbol
<chr>
Date
<date>
Open
<dbl>
High
<dbl>
Low
<dbl>
Close
<dbl>
Adj_Close
<dbl>
Volume
<dbl>
AAPL2014-01-0279.3828679.5757178.8600079.0185766.9643358671200
AAPL2014-01-0378.9800079.1000077.2042877.2828665.4934298116900
AAPL2014-01-0676.7785778.1142976.2285777.7042865.85053103152700
AAPL2014-01-0777.7600077.9942976.8457177.1485765.3795979302300
AAPL2014-01-0876.9728577.9371476.9557177.6371565.7936364632400
AAPL2014-01-0978.1142978.1228676.4785776.6457164.9534569787200
head(vic_elec)
ABCDEFGHIJ0123456789
Time
<dttm>
Demand
<dbl>
Temperature
<dbl>
Date
<date>
Holiday
<lgl>
2012-01-01 00:00:004382.82521.402012-01-01TRUE
2012-01-01 00:30:004263.36621.052012-01-01TRUE
2012-01-01 01:00:004048.96620.702012-01-01TRUE
2012-01-01 01:30:003877.56320.552012-01-01TRUE
2012-01-01 02:00:004036.23020.402012-01-01TRUE
2012-01-01 02:30:003865.59720.252012-01-01TRUE
help(aus_production)
## starting httpd help server ... done
  1. Bricks contains the information about the number of clay bricks produced in millions of bricks
?pelt

Lynx contains data related to number of canadian Lynx pelts traded.

help("gafa_stock")

gafa_stock data contains the historical prices for Google, Amazon, Facebook and Apple. All prices are in $USD. The time series ‘Close’ contains the closing price of the each stock mentioned above for the given period.

help("vic_elec")

vic_elec contains the data for the half_hourly electricity demand for Victoria, Australia. The time series Demand has total electricity demand in MWh.

  1. Plot of each time series using the function autoplot()
autoplot(aus_production, Bricks)+
  labs(title = "Plot of the timeseries Bricks")
## Warning: Removed 20 rows containing missing values (`geom_line()`).

autoplot(pelt, Lynx)+
  labs(title = "Plot of the timeseries Lynx")

autoplot(gafa_stock, Close)+
  labs(title = "Plot of the timeseries Closing price of each stock")

autoplot(vic_elec, Demand)+
  labs(title = "Plot of the timeseries Demand from the data vic_elec", 
       x="Time with interval 30 minutes",
       y="Demand (MWh)")

Exercise 2.2

Use filter() to find what days corresponding to the peak closing price for each of the four stocks in gafa_stock.

Answer.

gafa_stock|>
  filter(Symbol=='AAPL')|>
  filter(Close== max(Close))
ABCDEFGHIJ0123456789
Symbol
<chr>
Date
<date>
Open
<dbl>
High
<dbl>
Low
<dbl>
Close
<dbl>
Adj_Close
<dbl>
Volume
<dbl>
AAPL2018-10-03230.05233.47229.78232.07230.275528654800
gafa_stock|>
  group_by(Symbol)|>
  filter(Close==max(Close))
ABCDEFGHIJ0123456789
Symbol
<chr>
Date
<date>
Open
<dbl>
High
<dbl>
Low
<dbl>
Close
<dbl>
Adj_Close
<dbl>
Volume
<dbl>
AAPL2018-10-03230.05233.470229.78232.07230.275528654800
AMZN2018-09-042026.502050.5002013.002039.512039.51005721100
FB2018-07-25215.72218.620214.27217.50217.500058954200
GOOG2018-07-261251.001269.7711249.021268.331268.33002405600

Hence, it can be seen that AAPL had its peak on 2018-10-03, AMZN had its peak 2018-09-04, FB had its peak on 2018-07-25, and GOOG had its peak on 2018-07-26

Exercise 2.3

Download the file tute1.csv from the book website, open it in Excel or some other spreadsheet application, and review its contents. YOu should find four columns of information. Columns B through D each contain a quarterly series, labelled Sales, AdBudget and GDP. Sales contains the quarterly sales for a small company over the period 1981-2005. AdBudget is the advertising budget and GDP is the gross domestic product. All series have been adjusted for information.

a. Read the data into R

tute1<- readr::read_csv('tute1.csv')
## Rows: 100 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl  (3): Sales, AdBudget, GDP
## date (1): Quarter
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(tute1)
ABCDEFGHIJ0123456789
Quarter
<date>
Sales
<dbl>
AdBudget
<dbl>
GDP
<dbl>
1981-03-011020.2659.2251.8
1981-06-01889.2589.0290.9
1981-09-01795.0512.5290.8
1981-12-011003.9614.1292.4
1982-03-011057.7647.2279.1
1982-06-01944.4602.0254.0

b. Convert the data into time series

mytimeseries <- tute1|>
  mutate(Quarter = yearquarter(Quarter))|>
  as_tibble(index = Quarter)
head(mytimeseries, 3)
ABCDEFGHIJ0123456789
Quarter
<qtr>
Sales
<dbl>
AdBudget
<dbl>
GDP
<dbl>
1981 Q11020.2659.2251.8
1981 Q2889.2589.0290.9
1981 Q3795.0512.5290.8

c. Construct time series plots of each of the three series.

mytimeseries |>
  pivot_longer(-Quarter)|>
  ggplot(aes(x=Quarter, y=value, color =name))+
  geom_line()+
  facet_grid(name ~., scales ='free_y')

mytimeseries |>
  pivot_longer(-Quarter)|>
  ggplot(aes(x=Quarter, y=value, color =name ))+
  geom_line()

if we don’t include facet_grid() then all the plots are in the same canvas.

Exercise 2.4

The USgas package contains data on the demand for natural gas in the US.

a. Install the USgas package

#install.packages("USgas")

b. Create tsibble from us_total with year as the index and state as the key.

library(USgas)
## Warning: package 'USgas' was built under R version 4.3.2
head(us_total)
ABCDEFGHIJ0123456789
 
 
year
<int>
state
<chr>
y
<int>
11997Alabama324158
21998Alabama329134
31999Alabama337270
42000Alabama353614
52001Alabama332693
62002Alabama379343
us_tg <- us_total|>
  mutate(year= year)|>
  as_tsibble(key = state,
    index =year)
head(us_tg)
ABCDEFGHIJ0123456789
year
<int>
state
<chr>
y
<int>
1997Alabama324158
1998Alabama329134
1999Alabama337270
2000Alabama353614
2001Alabama332693
2002Alabama379343

c. PLot the annual natural gas consumption by state for the New England Area (comprising the states of Maine, Vermont, New Hampshire, Massachussetes, connecticut and Rhode Island).

us_tg|> filter(
  state == 'Maine'|
  state == "Vermont"|
  state == "New Hampshire"|
  state == "Connecticut"|
  state == "Rhode Island"
)|>
  ggplot(aes(x=year, y=y, col = state))+
  geom_line()

Exercise 5.

a. Download tourism.xlsx from the book website and read into R using the readxl::read_excel()

tourism <- readxl::read_excel("tourism.xlsx")
head(tourism, 3)
ABCDEFGHIJ0123456789
Quarter
<chr>
Region
<chr>
State
<chr>
Purpose
<chr>
Trips
<dbl>
1998-01-01AdelaideSouth AustraliaBusiness135.0777
1998-04-01AdelaideSouth AustraliaBusiness109.9873
1998-07-01AdelaideSouth AustraliaBusiness166.0347

b. Create a tsibble which is identical to the tourism tsibble package.

tourism<- tourism|>
  mutate(Quarter = yearquarter(Quarter))|>
  as_tsibble(key = c(Region, State, Purpose, Trips),index =Quarter)
head(tourism, 3)
ABCDEFGHIJ0123456789
Quarter
<qtr>
Region
<chr>
State
<chr>
Purpose
<chr>
Trips
<dbl>
2010 Q1AdelaideSouth AustraliaBusiness68.72539
2005 Q2AdelaideSouth AustraliaBusiness73.25301
2013 Q2AdelaideSouth AustraliaBusiness100.64094

c. Find what combination of Region and Purpose had a maximum number of overnight trips on average.

av_trips <- tourism|>
  select(Region, Purpose, Trips)|>
  group_by(Region, Purpose)|>
  summarize(av_trip = mean(Trips))
av_trips|>
  filter(av_trip==max(av_trip))
ABCDEFGHIJ0123456789
Region
<chr>
Purpose
<chr>
Quarter
<qtr>
av_trip
<dbl>
AdelaideVisiting2017 Q1269.53562
Adelaide HillsVisiting2002 Q481.10211
Alice SpringsHoliday1998 Q376.54138
Australia's Coral CoastHoliday2014 Q3198.17779
Australia's Golden OutbackBusiness2017 Q3173.85992
Australia's North WestBusiness2016 Q3296.80234
Australia's South WestHoliday2016 Q1612.08986
BallaratVisiting2004 Q1102.81502
BarklyHoliday1998 Q337.87040
BarossaHoliday2006 Q151.00731
library(dplyr)
new_tourism <- tourism |> group_by(State, Purpose, Region)|>
  summarize(
    Total_trip = sum(Trips)

            )
new_tourism 
ABCDEFGHIJ0123456789
State
<chr>
Purpose
<chr>
Region
<chr>
Quarter
<qtr>
Total_trip
<dbl>
ACTBusinessCanberra1998 Q1150.1981173
ACTBusinessCanberra1998 Q299.9326775
ACTBusinessCanberra1998 Q3129.5651167
ACTBusinessCanberra1998 Q4101.6989731
ACTBusinessCanberra1999 Q195.5249101
ACTBusinessCanberra1999 Q2229.0576164
ACTBusinessCanberra1999 Q3108.8297679
ACTBusinessCanberra1999 Q4158.9828628
ACTBusinessCanberra2000 Q1105.2419137
ACTBusinessCanberra2000 Q2202.0169523

Exercise 2.8

Use the following graphic function autoplot(), gg_season(), gg_subseries(), gg_log(), ACF() and explore features from the following time series: “Total Private” Employed from the us_employment, Bricks from aus_production, Hare from pelt, “Ho2” Cost from PBS and Barrels from us_gasoline.

Answer.

head(us_employment)
ABCDEFGHIJ0123456789
Month
<mth>
Series_ID
<chr>
Title
<chr>
Employed
<dbl>
1939 JanCEU0500000001Total Private25338
1939 FebCEU0500000001Total Private25447
1939 MarCEU0500000001Total Private25833
1939 AprCEU0500000001Total Private25801
1939 MayCEU0500000001Total Private26113
1939 JunCEU0500000001Total Private26485
total_priv <- us_employment|> 
  filter(Title == "Total Private")|>
  select(Employed)
autoplot(total_priv, Employed)+
  labs(title = "plot of total private employed", 
       x= "Time (months)",
       y= "Total number of private employed")

In this plot, the seasonality is clearly seen. The graph has certain period of seasonality. Also there is a strong upaward trend in the data from 1940 to 2020. Although there are some dips in the graph but after dips the data holds the trend.

gg_season(total_priv, Employed)+
  labs( y= "Total number of private employed",
        x="Time (month)",
        title = "Seasonal plot of total private employed")

This graph shows that the data repeats seasonality eash and every year. All the lines on the graph are almost the parallel to each other thus depicting strong seasonality in the data.

gg_subseries(total_priv, Employed)+
    labs(y= "Total number of private employed",
        x="Time (month)",
        title = "Subseries of total private employed")

This graph shows the sub-series for each and every month of each year. Almost the same pattern can be seen in each graph which shows the cyclic.

gg_lag(total_priv, Employed, geom = 'point')+
  labs(y = "Total number of private employed",
        x="Time (month)",
        title = "Subseries of total private employed")+
  theme(axis.text.x = element_text(angle=90, vjust=0, hjust=1))

total_priv |>
  ACF(Employed, lag_max = 48) |>
  autoplot() +
  labs(title="Total number of private employed")

head(aus_production, 3)
ABCDEFGHIJ0123456789
Quarter
<qtr>
Beer
<dbl>
Tobacco
<dbl>
Bricks
<dbl>
Cement
<dbl>
Electricity
<dbl>
Gas
<dbl>
1956 Q1284522518946539235
1956 Q2213517820453244366
1956 Q3227529720856148067
bricks <- aus_production|>
  select(Bricks)
autoplot(bricks, Bricks)+
  labs(title = "plot of clay bricks production in million")
## Warning: Removed 20 rows containing missing values (`geom_line()`).

gg_season(bricks, Bricks)+
  labs(title = "plot of clay bricks production in million")
## Warning: Removed 20 rows containing missing values (`geom_line()`).

gg_subseries(bricks, Bricks)+
  labs(title = "plot of clay bricks production in million")
## Warning: Removed 5 rows containing missing values (`geom_line()`).

gg_lag(bricks, Bricks)+
  labs(title = "plot of clay bricks production in million")+
  theme(axis.text.x = element_text(angle=90, vjust=0, hjust=1))
## Warning: Removed 20 rows containing missing values (gg_lag).

bricks |>
  ACF(Bricks, lag_max = 50 ) |>
  autoplot() +
  labs(title="Plot of ACF of bricks ")

head(pelt, 3)
ABCDEFGHIJ0123456789
Year
<dbl>
Hare
<dbl>
Lynx
<dbl>
18451958030090
18461960045150
18471961049150
hare <- pelt|>
  select(Hare)
autoplot(hare, Hare)+
  labs(title = "plo;t of number of snowshoe Hare pelts traded")

#gg_season(hare, Hare)+
  #labs(title = "Seasonal plot of snowshoe Hare pelts traded")
gg_subseries(hare, Hare)+
  labs(title = "Subseries plot of hare")

gg_lag(hare, Hare)+
  labs(title = "Lag plots of snowshoe hare pelts traded")+
  theme(axis.text.x = element_text(angle=90, vjust=0, hjust=1))

hare |>
  ACF(Hare, lag_max = 50 ) |>
  autoplot() +
  labs(title="Plot of ACF of hare")

head(PBS,3)
ABCDEFGHIJ0123456789
Month
<mth>
Concession
<chr>
Type
<chr>
ATC1
<chr>
ATC1_desc
<chr>
ATC2
<chr>
1991 JulConcessionalCo-paymentsAAlimentary tract and metabolismA01
1991 AugConcessionalCo-paymentsAAlimentary tract and metabolismA01
1991 SepConcessionalCo-paymentsAAlimentary tract and metabolismA01
cost <- PBS|>
  filter(ATC2=="H02")|>
  select(Month, Cost)
autoplot(cost, Cost)+
  labs(title = "")

gg_season(cost, Cost)+
  labs(title = "")

gg_subseries(cost, Cost)+
  labs(title = "")

lag1_series<- cost|>filter(
  Concession =='Concessional',
  Type =="Co-payments"
)
gg_lag(lag1_series, Cost)+
  labs(title = "Lag plots of snowshoe hare pelts traded")+
  theme(axis.text.x = element_text(angle=90, vjust=0, hjust=1))

cost |>
  ACF(Cost, lag_max = 50 ) |>
  autoplot() +
  labs(title="")

Barrels from us_gasoline

head(us_gasoline,3)
ABCDEFGHIJ0123456789
Week
<week>
Barrels
<dbl>
1991 W066.621
1991 W076.433
1991 W086.582
autoplot(us_gasoline, Barrels)+
  labs(title = "plot barrels of gasoline ", 
       y= "Barrel (million/day)",
       x='Week')

gg_season(us_gasoline, Barrels)+
  labs(title = "Seasonal plot of barrels per day")

gg_subseries(us_gasoline, Barrels)+
  labs(title = "Subseries plot of Barrels")

gg_lag(us_gasoline, Barrels)+
  labs(title = "Lag plots of Barrels")+
  theme(axis.text.x = element_text(angle=90, vjust=0, hjust=1))

us_gasoline |>
  ACF(Barrels, lag_max = 50 ) |>
  autoplot()+
  labs(title="Plot of ACF of Barrels")