library(fpp3)
## Registered S3 method overwritten by 'tsibble':
## method from
## as_tibble.grouped_df dplyr
## ── Attaching packages ──────────────────────────────────────────── fpp3 1.0.1 ──
## ✔ tibble 3.2.1 ✔ tsibble 1.1.6
## ✔ dplyr 1.1.4 ✔ tsibbledata 0.4.1
## ✔ tidyr 1.3.1 ✔ feasts 0.4.1
## ✔ lubridate 1.9.3 ✔ fable 0.4.1
## ✔ ggplot2 3.5.1
## ── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ──
## ✖ lubridate::date() masks base::date()
## ✖ dplyr::filter() masks stats::filter()
## ✖ tsibble::intersect() masks base::intersect()
## ✖ tsibble::interval() masks lubridate::interval()
## ✖ dplyr::lag() masks stats::lag()
## ✖ tsibble::setdiff() masks base::setdiff()
## ✖ tsibble::union() masks base::union()
library(dplyr)
Exercises 2.1, 2.2, 2.3, 2.4, 2.5 and 2.8
Use ? (or help()) to find out about the data in each series.
What is the time interval of each series?
Use autoplot() to produce a time plot of each series.
For the last plot, modify the axis labels and title.
Bricks
from aus_production
:
aus_production
## # A tsibble: 218 x 7 [1Q]
## Quarter Beer Tobacco Bricks Cement Electricity Gas
## <qtr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1956 Q1 284 5225 189 465 3923 5
## 2 1956 Q2 213 5178 204 532 4436 6
## 3 1956 Q3 227 5297 208 561 4806 7
## 4 1956 Q4 308 5681 197 570 4418 6
## 5 1957 Q1 262 5577 187 529 4339 5
## 6 1957 Q2 228 5651 214 604 4811 7
## 7 1957 Q3 236 5317 227 603 5259 7
## 8 1957 Q4 320 6152 222 582 4735 6
## 9 1958 Q1 272 5758 199 554 4608 5
## 10 1958 Q2 233 5641 229 620 5196 7
## # ℹ 208 more rows
The time time interval is quarterly.
aus_production_bricks <- aus_production |>
select(Bricks)
autoplot(aus_production_bricks, Bricks)
## Warning: Removed 20 rows containing missing values or values outside the scale range
## (`geom_line()`).
Lynx
from pelt
:pelt
## # A tsibble: 91 x 3 [1Y]
## Year Hare Lynx
## <dbl> <dbl> <dbl>
## 1 1845 19580 30090
## 2 1846 19600 45150
## 3 1847 19610 49150
## 4 1848 11990 39520
## 5 1849 28040 21230
## 6 1850 58000 8420
## 7 1851 74600 5560
## 8 1852 75090 5080
## 9 1853 88480 10170
## 10 1854 61280 19600
## # ℹ 81 more rows
The time time interval is yearly.
pelt_lynx <- pelt |>
select(Lynx)
autoplot(pelt_lynx, Lynx)
Close
from gafa_stock
:gafa_stock
## # A tsibble: 5,032 x 8 [!]
## # Key: Symbol [4]
## Symbol Date Open High Low Close Adj_Close Volume
## <chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 AAPL 2014-01-02 79.4 79.6 78.9 79.0 67.0 58671200
## 2 AAPL 2014-01-03 79.0 79.1 77.2 77.3 65.5 98116900
## 3 AAPL 2014-01-06 76.8 78.1 76.2 77.7 65.9 103152700
## 4 AAPL 2014-01-07 77.8 78.0 76.8 77.1 65.4 79302300
## 5 AAPL 2014-01-08 77.0 77.9 77.0 77.6 65.8 64632400
## 6 AAPL 2014-01-09 78.1 78.1 76.5 76.6 65.0 69787200
## 7 AAPL 2014-01-10 77.1 77.3 75.9 76.1 64.5 76244000
## 8 AAPL 2014-01-13 75.7 77.5 75.7 76.5 64.9 94623200
## 9 AAPL 2014-01-14 76.9 78.1 76.8 78.1 66.1 83140400
## 10 AAPL 2014-01-15 79.1 80.0 78.8 79.6 67.5 97909700
## # ℹ 5,022 more rows
The time interval is irregular (days).
gafa_stock_close <- gafa_stock |>
select(Close)
autoplot(gafa_stock_close, Close)
Demand
from vic_elec
:?vic_elec
The time time interval is 30 minutes.
vic_elec_demand <- vic_elec |>
select(Demand)
autoplot(vic_elec_demand, Demand) +
labs(title = "Half-hourly electricity demand for Victoria, Australia",
y = "Total electricity demand in MWh",
x = "Time (30 min)")
filter()
to find what days corresponded to the peak
closing price for each of the four stocks in
gafa_stock
.gafa_stock_close_appl <- gafa_stock |>
select(Close) |>
filter(Symbol == "AAPL")
head(gafa_stock_close_appl[order(gafa_stock_close_appl$Close, decreasing = TRUE),], 1)
## # A tsibble: 1 x 3 [!]
## # Key: Symbol [1]
## Close Date Symbol
## <dbl> <date> <chr>
## 1 232. 2018-10-03 AAPL
gafa_stock_close_goog <- gafa_stock |>
select(Close) |>
filter(Symbol == "GOOG")
head(gafa_stock_close_goog[order(gafa_stock_close_goog$Close, decreasing = TRUE),], 1)
## # A tsibble: 1 x 3 [!]
## # Key: Symbol [1]
## Close Date Symbol
## <dbl> <date> <chr>
## 1 1268. 2018-07-26 GOOG
gafa_stock_close_fb <- gafa_stock |>
select(Close) |>
filter(Symbol == "FB")
head(gafa_stock_close_fb[order(gafa_stock_close_fb$Close, decreasing = TRUE),], 1)
## # A tsibble: 1 x 3 [!]
## # Key: Symbol [1]
## Close Date Symbol
## <dbl> <date> <chr>
## 1 218. 2018-07-25 FB
gafa_stock_close_amzn <- gafa_stock |>
select(Close) |>
filter(Symbol == "AMZN")
head(gafa_stock_close_amzn[order(gafa_stock_close_amzn$Close, decreasing = TRUE),], 1)
## # A tsibble: 1 x 3 [!]
## # Key: Symbol [1]
## Close Date Symbol
## <dbl> <date> <chr>
## 1 2040. 2018-09-04 AMZN
Peak closing dates:
tute1 <- readr::read_csv("https://raw.githubusercontent.com/gillianmcgovern0/cuny-data-624/refs/heads/main/tute1.csv")
## Rows: 100 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (3): Sales, AdBudget, GDP
## date (1): Quarter
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
mytimeseries <- tute1 |>
mutate(Quarter = yearquarter(Quarter)) |>
as_tsibble(index = Quarter)
mytimeseries |>
pivot_longer(-Quarter) |>
ggplot(aes(x = Quarter, y = value, colour = name)) +
geom_line() +
facet_grid(name ~ ., scales = "free_y")
Check what happens when you don’t include facet_grid():
mytimeseries |>
pivot_longer(-Quarter) |>
ggplot(aes(x = Quarter, y = value, colour = name)) +
geom_line()
When you don’t include facet_grid(), all series plots appear on one graph. This way could make it easier if you need to compare each series against each other.
USgas
package contains data on the demand for
natural gas in the US.USgas
package.library(USgas)
us_total
with year as the index
and state as the key.us_total_times_series <- us_total |>
as_tsibble(index = year, key = state)
ne <- c("Connecticut", "Maine", "Massachusetts",
"New Hampshire", "Rhode Island", "Vermont")
ne_gas <- us_total_times_series[which(us_total_times_series$state %in% ne),]
autoplot(ne_gas)
## Plot variable not specified, automatically selected `.vars = y`
tourism.xlsx
from the book website and read it
into R using readxl::read_excel()
.This command worked fine locally:
# tourism_df <- readxl::read_excel("/Users/gillianmcgovern/Downloads/tourism.xlsx")
But when trying to use the xlsx file on GitHub, I was not able to access the raw contents of the file and therefore wasn’t able to read the file into R. This is most likely due to git thinking it’s a binary file. I could probably overwrite this in a .gitattributes file for my repo, but I chose to just save the file as a csv instead.
tourism_df <- readr::read_csv("https://raw.githubusercontent.com/gillianmcgovern0/cuny-data-624/refs/heads/main/tourism.csv")
## Rows: 24320 Columns: 5
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): Region, State, Purpose
## dbl (1): Trips
## date (1): Quarter
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
tourism
tsibble from the tsibble
package.tourism_time_series <- tourism_df |>
mutate(Quarter = yearquarter(Quarter)) |>
as_tsibble(index = Quarter, key = c(Region, State, Purpose))
head(tourism_time_series, 5)
## # A tsibble: 5 x 5 [1Q]
## # Key: Region, State, Purpose [1]
## Quarter Region State Purpose Trips
## <qtr> <chr> <chr> <chr> <dbl>
## 1 1998 Q1 Adelaide South Australia Business 135.
## 2 1998 Q2 Adelaide South Australia Business 110.
## 3 1998 Q3 Adelaide South Australia Business 166.
## 4 1998 Q4 Adelaide South Australia Business 127.
## 5 1999 Q1 Adelaide South Australia Business 137.
Region
and
Purpose
had the maximum number of overnight trips on
average.trips <- tourism_time_series |>
as_tibble() |> # if we want the mean, we need to remove the Quarter index
select(Region, Purpose, Trips) |>
group_by(Region, Purpose) |>
summarise(mean_trips = mean(Trips))
## `summarise()` has grouped output by 'Region'. You can override using the
## `.groups` argument.
head(trips[order(trips$mean_trips, decreasing = TRUE),], 1)
## # A tibble: 1 × 3
## # Groups: Region [1]
## Region Purpose mean_trips
## <chr> <chr> <dbl>
## 1 Sydney Visiting 747.
The winning combination is Sydney and Visiting. I had to use as_tibble to get rid of the time index and get an accurate mean.
Let’s first create a tsibble from tourism_time_series
which combines the Purposes and Regions:
tourism_time_series_purpose_and_region <- tourism_time_series |>
mutate(Purpose_And_Region = paste(Purpose, Region, sep=" in "))
tourism_time_series_purpose_and_region
## # A tsibble: 24,320 x 6 [1Q]
## # Key: Region, State, Purpose [304]
## Quarter Region State Purpose Trips Purpose_And_Region
## <qtr> <chr> <chr> <chr> <dbl> <chr>
## 1 1998 Q1 Adelaide South Australia Business 135. Business in Adelaide
## 2 1998 Q2 Adelaide South Australia Business 110. Business in Adelaide
## 3 1998 Q3 Adelaide South Australia Business 166. Business in Adelaide
## 4 1998 Q4 Adelaide South Australia Business 127. Business in Adelaide
## 5 1999 Q1 Adelaide South Australia Business 137. Business in Adelaide
## 6 1999 Q2 Adelaide South Australia Business 200. Business in Adelaide
## 7 1999 Q3 Adelaide South Australia Business 169. Business in Adelaide
## 8 1999 Q4 Adelaide South Australia Business 134. Business in Adelaide
## 9 2000 Q1 Adelaide South Australia Business 154. Business in Adelaide
## 10 2000 Q2 Adelaide South Australia Business 169. Business in Adelaide
## # ℹ 24,310 more rows
We now have a column called Purpose_And_Region
which
says the purpose “in” the region.
Now let’s have the tsibble show the total trips by State:
tourism_time_series_by_state <- tourism_time_series_purpose_and_region |>
group_by(State) |>
summarise(Total_Trips = sum(Trips))
tourism_time_series_by_state
## # A tsibble: 640 x 3 [1Q]
## # Key: State [8]
## State Quarter Total_Trips
## <chr> <qtr> <dbl>
## 1 ACT 1998 Q1 551.
## 2 ACT 1998 Q2 416.
## 3 ACT 1998 Q3 436.
## 4 ACT 1998 Q4 450.
## 5 ACT 1999 Q1 379.
## 6 ACT 1999 Q2 558.
## 7 ACT 1999 Q3 449.
## 8 ACT 1999 Q4 595.
## 9 ACT 2000 Q1 600.
## 10 ACT 2000 Q2 557.
## # ℹ 630 more rows
The tsibble now shows the total trips for each state.
autoplot()
,
gg_season()
, gg_subseries()
,
gg_lag()
, ACF()
and explore features from the
following time series: “Total Private” Employed
from
us_employment
, Bricks
from
aus_production
, Hare
from pelt
,
“H02” Cost
from PBS
, and Barrels
from us_gasoline
.“Total Private” Employed
from
us_employment
:
us_employment_filtered <- us_employment |>
filter(Title == "Total Private") |>
select(Employed)
us_employment_filtered
## # A tsibble: 969 x 2 [1M]
## Employed Month
## <dbl> <mth>
## 1 25338 1939 Jan
## 2 25447 1939 Feb
## 3 25833 1939 Mar
## 4 25801 1939 Apr
## 5 26113 1939 May
## 6 26485 1939 Jun
## 7 26481 1939 Jul
## 8 26848 1939 Aug
## 9 27468 1939 Sep
## 10 27830 1939 Oct
## # ℹ 959 more rows
autoplot(us_employment_filtered, Employed) +
labs(title = "Monthly “Total Private” Employed US Employment Data")
gg_season(us_employment_filtered, Employed) +
labs(title = "Monthly “Total Private” Employed US Employment Data")
gg_subseries(us_employment_filtered, Employed) +
labs(title = "Monthly “Total Private” Employed US Employment Data")
gg_lag(us_employment_filtered, Employed, geom = "point") +
labs(title = "Monthly “Total Private” Employed US Employment Data")
us_employment_filtered |>
ACF(Employed) |>
autoplot() + labs(title="Monthly “Total Private” Employed US Employment Data")
Can you spot any seasonality, cyclicity and trend?
autoplot:
gg_season:
gg_subseries:
gg_lag:
ACF:
What do you learn about the series?
What can you say about the seasonal patterns?
Can you identify any unusual years?
Bricks
from aus_production
:
aus_production_bricks <- aus_production |>
select(Bricks)
aus_production_bricks
## # A tsibble: 218 x 2 [1Q]
## Bricks Quarter
## <dbl> <qtr>
## 1 189 1956 Q1
## 2 204 1956 Q2
## 3 208 1956 Q3
## 4 197 1956 Q4
## 5 187 1957 Q1
## 6 214 1957 Q2
## 7 227 1957 Q3
## 8 222 1957 Q4
## 9 199 1958 Q1
## 10 229 1958 Q2
## # ℹ 208 more rows
autoplot(aus_production_bricks, Bricks) + geom_point()
## Warning: Removed 20 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 20 rows containing missing values or values outside the scale range
## (`geom_point()`).
gg_season(aus_production_bricks, Bricks) + geom_point()
## Warning: Removed 20 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Removed 20 rows containing missing values or values outside the scale range
## (`geom_point()`).
gg_subseries(aus_production_bricks, Bricks)
## Warning: Removed 5 rows containing missing values or values outside the scale range
## (`geom_line()`).
gg_lag(aus_production_bricks, Bricks, geom = "point")
## Warning: Removed 20 rows containing missing values (gg_lag).
aus_production_bricks |>
ACF(Bricks) |>
autoplot()
Can you spot any seasonality, cyclicity and trend?
autoplot:
gg_season:
gg_subseries:
gg_lag:
ACF:
What do you learn about the series?
What can you say about the seasonal patterns?
More details above, but
Can you identify any unusual years?
Hare
from pelt
:
pelt_hare <- pelt |>
select(Hare)
pelt_hare
## # A tsibble: 91 x 2 [1Y]
## Hare Year
## <dbl> <dbl>
## 1 19580 1845
## 2 19600 1846
## 3 19610 1847
## 4 11990 1848
## 5 28040 1849
## 6 58000 1850
## 7 74600 1851
## 8 75090 1852
## 9 88480 1853
## 10 61280 1854
## # ℹ 81 more rows
autoplot(pelt_hare, Hare) + geom_point()
# gg_season(pelt_hare, Hare) # The data must contain at least one observation per seasonal period.
gg_subseries(pelt_hare, Hare) + geom_point()
gg_lag(pelt_hare, Hare, geom = "point")
pelt_hare |>
ACF(Hare) |>
autoplot()
autoplot:
gg_season:
gg_subseries:
gg_lag:
ACF:
What do you learn about the series?
What can you say about the seasonal patterns?
Can you identify any unusual years?
“H02” Cost
from PBS
:
pbs_filtered_cost <- PBS |>
filter(ATC2 == "H02") |>
select(Cost)
pbs_filtered_cost_c_c <- PBS |>
filter(ATC2 == "H02", Concession == "Concessional", Type == "Co-payments") |>
select(Cost)
pbs_filtered_cost_c_s <- PBS |>
filter(ATC2 == "H02", Concession == "Concessional", Type == "Safety net") |>
select(Cost)
pbs_filtered_cost_g_c <- PBS |>
filter(ATC2 == "H02", Concession == "General", Type == "Co-payments") |>
select(Cost)
pbs_filtered_cost_g_s <- PBS |>
filter(ATC2 == "H02", Concession == "General", Type == "Safety net") |>
select(Cost)
autoplot(pbs_filtered_cost, Cost)
gg_season(pbs_filtered_cost, Cost)
gg_subseries(pbs_filtered_cost, Cost)
gg_lag(pbs_filtered_cost_c_c, geom = "point")
## Plot variable not specified, automatically selected `y = Cost`
gg_lag(pbs_filtered_cost_c_s, geom = "point")
## Plot variable not specified, automatically selected `y = Cost`
gg_lag(pbs_filtered_cost_g_c, geom = "point")
## Plot variable not specified, automatically selected `y = Cost`
gg_lag(pbs_filtered_cost_g_s, geom = "point")
## Plot variable not specified, automatically selected `y = Cost`
pbs_filtered_cost |>
ACF(Cost) |>
autoplot()
autoplot:
Concessional/Co-payments/H/H02:
General/Co-payments/H/H02:
Concessional/Safety Net/H/H02:
General/Safety Net/H/H02:
gg_season:
Concessional/Co-payments/H/H02:
General/Co-payments/H/H02:
Concessional/Safety Net/H/H02:
General/Safety Net/H/H02:
gg_subseries:
Concessional/Co-payments/H/H02:
General/Co-payments/H/H02:
Concessional/Safety Net/H/H02:
General/Safety Net/H/H02:
gg_lag:
Concessional/Co-payments/H/H02:
General/Co-payments/H/H02:
Concessional/Safety Net/H/H02:
General/Safety Net/H/H02:
ACF:
Concessional/Co-payments/H/H02:
General/Co-payments/H/H02:
Concessional/Safety Net/H/H02:
General/Safety Net/H/H02:
What do you learn about the series?
What can you say about the seasonal patterns?
Concessional/Co-payments/H/H02:
General/Co-payments/H/H02:
Concessional/Safety Net/H/H02:
General/Safety Net/H/H02:
Can you identify any unusual years?
Barrels
from us_gasoline
:
us_gasoline_barrels <- us_gasoline |>
select(Barrels)
us_gasoline_barrels
## # A tsibble: 1,355 x 2 [1W]
## Barrels Week
## <dbl> <week>
## 1 6.62 1991 W06
## 2 6.43 1991 W07
## 3 6.58 1991 W08
## 4 7.22 1991 W09
## 5 6.88 1991 W10
## 6 6.95 1991 W11
## 7 7.33 1991 W12
## 8 6.78 1991 W13
## 9 7.50 1991 W14
## 10 6.92 1991 W15
## # ℹ 1,345 more rows
autoplot(us_gasoline_barrels, Barrels)
gg_season(us_gasoline_barrels, Barrels)
gg_subseries(us_gasoline_barrels, Barrels)
gg_lag(us_gasoline_barrels, Barrels, geom = "point")
us_gasoline_barrels |>
ACF(Barrels) |>
autoplot()
autoplot:
gg_season:
gg_subseries:
gg_lag:
ACF:
What do you learn about the series?
What can you say about the seasonal patterns?
Can you identify any unusual years?