library(tsibble)

## Warning: package 'tsibble' was built under R version 4.3.3

## Registered S3 method overwritten by 'tsibble':
##   method               from 
##   as_tibble.grouped_df dplyr

## 
## Attaching package: 'tsibble'

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, union

library(fpp3)

## Warning: package 'fpp3' was built under R version 4.3.3

## ── Attaching packages ──────────────────────────────────────────── fpp3 1.0.0 ──

## ✔ tibble      3.2.1     ✔ tsibbledata 0.4.1
## ✔ dplyr       1.1.4     ✔ feasts      0.3.2
## ✔ tidyr       1.3.1     ✔ fable       0.3.4
## ✔ lubridate   1.9.3     ✔ fabletools  0.4.2
## ✔ ggplot2     3.5.1

## ── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ──
## ✖ lubridate::date()     masks base::date()
## ✖ dplyr::filter()       masks stats::filter()
## ✖ tsibble::intersect()  masks base::intersect()
## ✖ lubridate::interval() masks tsibble::interval()
## ✖ dplyr::lag()          masks stats::lag()
## ✖ tsibble::setdiff()    masks base::setdiff()
## ✖ tsibble::union()      masks base::union()

Chapter 2, Exercise 1

Explore the following four time series: Bricks from aus_production, Lynx from pelt, Close from gafa_stock, Demand from vic_elec.

Use ? (or help()) to find out about the data in each series.
What is the time interval of each series?
Use autoplot() to produce a time plot of each series.
For the last plot, modify the axis labels and title.

Bricks

bricks_data <- aus_production |>
  select(Bricks)

print(head(bricks_data))

## # A tsibble: 6 x 2 [1Q]
##   Bricks Quarter
##    <dbl>   <qtr>
## 1    189 1956 Q1
## 2    204 1956 Q2
## 3    208 1956 Q3
## 4    197 1956 Q4
## 5    187 1957 Q1
## 6    214 1957 Q2

The time interval for the Bricks series is quarters.

autoplot(bricks_data, Bricks)

## Warning: Removed 20 rows containing missing values or values outside the scale range
## (`geom_line()`).

Lynx

lynx_data <- pelt |>
  select(Lynx)

print(head(lynx_data))

## # A tsibble: 6 x 2 [1Y]
##    Lynx  Year
##   <dbl> <dbl>
## 1 30090  1845
## 2 45150  1846
## 3 49150  1847
## 4 39520  1848
## 5 21230  1849
## 6  8420  1850

The time interval for the Lynx series is years.

autoplot(lynx_data, Lynx)

Close

close_data <- gafa_stock |>
  select(Close)

print(head(close_data))

## # A tsibble: 6 x 3 [!]
## # Key:       Symbol [1]
##   Close Date       Symbol
##   <dbl> <date>     <chr> 
## 1  79.0 2014-01-02 AAPL  
## 2  77.3 2014-01-03 AAPL  
## 3  77.7 2014-01-06 AAPL  
## 4  77.1 2014-01-07 AAPL  
## 5  77.6 2014-01-08 AAPL  
## 6  76.6 2014-01-09 AAPL

The time interval for the Close series is days.

autoplot(close_data, Close)

Demand

demand_data <- vic_elec |>
  select(Demand)

print(head(demand_data))

## # A tsibble: 6 x 2 [30m] <Australia/Melbourne>
##   Demand Time               
##    <dbl> <dttm>             
## 1  4383. 2012-01-01 00:00:00
## 2  4263. 2012-01-01 00:30:00
## 3  4049. 2012-01-01 01:00:00
## 4  3878. 2012-01-01 01:30:00
## 5  4036. 2012-01-01 02:00:00
## 6  3866. 2012-01-01 02:30:00

The time interval for the Demand series is half-hours.

autoplot(demand_data, Demand) +
  labs(x = "Date",
       y = "Total Demand (MWh",
       title = "Victoria, Australia Electricity Demand")

Chapter 2, Exercise 2

Use filter() to find what days corresponded to the peak closing price for each of the four stocks in gafa_stock.

print(close_data |>
        group_by(Symbol) |>
        filter(Close == max(Close))
)

## # A tsibble: 4 x 3 [!]
## # Key:       Symbol [4]
## # Groups:    Symbol [4]
##   Close Date       Symbol
##   <dbl> <date>     <chr> 
## 1  232. 2018-10-03 AAPL  
## 2 2040. 2018-09-04 AMZN  
## 3  218. 2018-07-25 FB    
## 4 1268. 2018-07-26 GOOG

The peak closing price for each stock is shown in the table above.

Chapter 2, Exercise 3

Part B

Convert the data to time series

# Reading the data
library(readr)
tute1 <- read_csv("tute1.csv")

## Rows: 100 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl  (3): Sales, AdBudget, GDP
## date (1): Quarter
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

# Code provided by the textbook
mytimeseries <- tute1 |>
  mutate(Quarter = yearquarter(Quarter)) |>
  as_tsibble(index = Quarter)

Part C

Construct time series plots of each of the three series. Check what happens when you don’t include facet_grid().

# Code provided by the textbook
mytimeseries |>
  pivot_longer(-Quarter) |>
  ggplot(aes(x = Quarter, y = value, colour = name)) +
  geom_line() +
  facet_grid(name ~ ., scales = "free_y")

Now, without including facet_grid:

mytimeseries |>
  pivot_longer(-Quarter) |>
  ggplot(aes(x = Quarter, y = value, colour = name)) +
  geom_line()

Without facet_grid, all three variables are plotted on the same grid with a single scale that applies to all three. This could be useful to compare the same quantity in multiple contexts (for example, natural gas consumption in different states across the same time period. Spoilers!) With facet_grid, each numeric variable gets its own graph with its own scale.

Chapter 2, Exercise 4

Part B

Create a tsibble from us_total with year as the index and state as the key.

library(USgas)
gas_tsibble <- us_total |>
  as_tsibble(key = state, index = year)
print(head(gas_tsibble))

## # A tsibble: 6 x 3 [1Y]
## # Key:       state [1]
##    year state        y
##   <int> <chr>    <int>
## 1  1997 Alabama 324158
## 2  1998 Alabama 329134
## 3  1999 Alabama 337270
## 4  2000 Alabama 353614
## 5  2001 Alabama 332693
## 6  2002 Alabama 379343

Part C

Plot the annual natural gas consumption by state for the New England area (comprising the states of Maine, Vermont, New Hampshire, Massachusetts, Connecticut and Rhode Island).

ne_states <- c("Maine", "Vermont", "New Hampshire", "Massachusetts", "Connecticut", "Rhode Island")
ne_gas <- gas_tsibble |>
  filter(state %in% ne_states)

autoplot(ne_gas, y)

Chapter 2, Exercise 5

Part A

Download tourism.xlsx from the book website and read it into R using readxl::read_excel().

library(readxl)

tourism <- read_excel("tourism.xlsx")
print(head(tourism))

## # A tibble: 6 × 5
##   Quarter    Region   State           Purpose  Trips
##   <chr>      <chr>    <chr>           <chr>    <dbl>
## 1 1998-01-01 Adelaide South Australia Business  135.
## 2 1998-04-01 Adelaide South Australia Business  110.
## 3 1998-07-01 Adelaide South Australia Business  166.
## 4 1998-10-01 Adelaide South Australia Business  127.
## 5 1999-01-01 Adelaide South Australia Business  137.
## 6 1999-04-01 Adelaide South Australia Business  200.

Part B

Create a tsibble which is identical to the tourism tsibble from the tsibble package.

tourism_tsibble <- tourism |>
  mutate(Quarter = yearquarter(Quarter)) |>
  as_tsibble(key = c(Region, State, Purpose), index = Quarter)

head(tourism_tsibble)

## # A tsibble: 6 x 5 [1Q]
## # Key:       Region, State, Purpose [1]
##   Quarter Region   State           Purpose  Trips
##     <qtr> <chr>    <chr>           <chr>    <dbl>
## 1 1998 Q1 Adelaide South Australia Business  135.
## 2 1998 Q2 Adelaide South Australia Business  110.
## 3 1998 Q3 Adelaide South Australia Business  166.
## 4 1998 Q4 Adelaide South Australia Business  127.
## 5 1999 Q1 Adelaide South Australia Business  137.
## 6 1999 Q2 Adelaide South Australia Business  200.

Part C

Find what combination of Region and Purpose had the maximum number of overnight trips on average.

This question seems ambiguous to me, I can’t tell if it’s asking for: 1. The values of Region and Purpose for the row with the maximum possible Trips value; or 2. The values of Region and Purpose that have the highest average value across the data set.

So, I did it both ways!

Approach 1

max_combo_1 <- tourism_tsibble |>
  filter(Trips == max(Trips))

print(max_combo_1)

## # A tsibble: 1 x 5 [1Q]
## # Key:       Region, State, Purpose [1]
##   Quarter Region    State    Purpose  Trips
##     <qtr> <chr>     <chr>    <chr>    <dbl>
## 1 2017 Q4 Melbourne Victoria Visiting  985.

In interpretation 1, Visiting Melbourne had the maximum number of overnight trips (985.2784 in Q4 of 2017).

Approach 2

max_combo_2 <- tourism |>
  group_by(Region, Purpose) |>
  summarise(avg_trips = mean(Trips)) |>
  select(Region, Purpose, avg_trips) |>
  distinct() |>
  arrange(desc(avg_trips))

## `summarise()` has grouped output by 'Region'. You can override using the
## `.groups` argument.

print(head(max_combo_2,1))

## # A tibble: 1 × 3
## # Groups:   Region [1]
##   Region Purpose  avg_trips
##   <chr>  <chr>        <dbl>
## 1 Sydney Visiting      747.

In interpretation 2, the combination with the highest average value in the Trips column across the data set was Visiting Sydney.

Part D

Create a new tsibble which combines the Purposes and Regions, and just has total trips by State.

total_by_state <- tourism_tsibble |>
  group_by(State) |>
  summarise(total_trips = sum(Trips))

print(head(total_by_state))

## # A tsibble: 6 x 3 [1Q]
## # Key:       State [1]
##   State Quarter total_trips
##   <chr>   <qtr>       <dbl>
## 1 ACT   1998 Q1        551.
## 2 ACT   1998 Q2        416.
## 3 ACT   1998 Q3        436.
## 4 ACT   1998 Q4        450.
## 5 ACT   1999 Q1        379.
## 6 ACT   1999 Q2        558.

Chapter 2, Exercise 8

Use the following graphics functions: autoplot(), gg_season(), gg_subseries(), gg_lag(), ACF() and explore features from the following time series: “Total Private” Employed from us_employment, Bricks from aus_production, Hare from pelt, “H02” Cost from PBS, and Barrels from us_gasoline.

Can you spot any seasonality, cyclicity and trend?
What do you learn about the series?
What can you say about the seasonal patterns?
Can you identify any unusual years?

Total Private Employed

priv_employed_data <- us_employment |>
  filter(Title == "Total Private")

autoplot(priv_employed_data)

## Plot variable not specified, automatically selected `.vars = Employed`

gg_season(priv_employed_data)

## Plot variable not specified, automatically selected `y = Employed`

gg_subseries(priv_employed_data)

## Plot variable not specified, automatically selected `y = Employed`

gg_lag(priv_employed_data)

## Plot variable not specified, automatically selected `y = Employed`

ACF(priv_employed_data, Employed) |> autoplot()

There is no seasonality or cyclicity evident in the data, but the overall trend is up over time. From the gg_season plot we can see that there is no particular seasonal pattern (as the line for each year is close to flat), but both the original line plot and the subseries plot show the dramatic impact that the financial crisis of 2008 had on payrolls. I’m not sure how useful this data is, since the number of employed persons will almost certainly increase as the population increases over time. I wonder if this analysis would be more meaningful if it used the labor force participation rate, or unemployment rate, or some other rate stat that accounted for the growth in population.

Bricks

bricks_data <- aus_production |>
  select(Bricks)

autoplot(bricks_data, Bricks)

## Warning: Removed 20 rows containing missing values or values outside the scale range
## (`geom_line()`).

gg_season(bricks_data, Bricks)

## Warning: Removed 20 rows containing missing values or values outside the scale range
## (`geom_line()`).

gg_subseries(bricks_data, Bricks)

## Warning: Removed 5 rows containing missing values or values outside the scale range
## (`geom_line()`).

gg_lag(bricks_data, Bricks)

## Warning: Removed 20 rows containing missing values (gg_lag).

ACF(bricks_data, Bricks) |> autoplot()

This data shows a general trend upward, followed by a general trend downward. There are different cyclical patterns evident, in some years there is almost always a Q2 increase, and this is sometimes followed by a Q3 increase and sometims followed by a Q3 decrease. Four specific major outlier drops are visible in the data.

Hare

hare_data <- pelt |>
  select(Hare)

autoplot(hare_data, Hare)

gg_subseries(hare_data, Hare)

gg_lag(hare_data, Hare)

ACF(hare_data, Hare) |> autoplot()

The overall trend in this data is roughly flat. We do see a cyclical nature to it but is quite a long cycle, appearing to be about a decade. There are a couple of unusually high peaks.

H02 Cost

ho2_data <- PBS |>
  filter(ATC2 == "H02") |>
  select(Cost)

autoplot(ho2_data, Cost)

gg_season(ho2_data, Cost)

gg_subseries(ho2_data, Cost)

ACF(ho2_data, Cost) |> autoplot()

This data contains four separate time series. The Concessional/Co-payments series shows a clear overall trend upward, with some evidence of seasonal variation (generally, higher in the summer months). The General/Co-Payments series is largely flat with no clear trend or seasonality/cyclicity.The Concessional/Safety net series has a clear overall trend upward with an even clearer seasonal pattern (starting high in January before dropping very low from February-April and then trending back up again until the following January). The General/Safety net series also shows that seasonal pattern, but the overall trend there is flat (with a few noticeable outliers at the beginning of the data set).

Barrels

barrels_data <- us_gasoline

autoplot(barrels_data, Barrels)

gg_season(barrels_data, Barrels)

gg_subseries(barrels_data, Barrels)

gg_lag(barrels_data, Barrels)

ACF(barrels_data, Barrels) |> autoplot()

This series shows a trend upward until around 2006, followed by a trend downward until about 2011, followed by another trend upward. There is something of a seasonal pattern evident, with most years higher in the summer months than the winter months, but it is not nearly as dramatic of a seasonal pattern as there was in the two “safety net” series from the PBS data. There are a couple of visible outlier years, but the data fluctuates a fair amount which makes the outliers look less extreme.

Myrianthopoulos DATA624 HW1

Marley Myrianthopoulos

2024-09-08

Chapter 2, Exercise 1

Bricks

Lynx

Close

Demand

Chapter 2, Exercise 2

Chapter 2, Exercise 3

Part B

Part C

Chapter 2, Exercise 4

Part B

Part C

Chapter 2, Exercise 5

Part A

Part B

Part C

Approach 1

Approach 2

Part D

Chapter 2, Exercise 8

Total Private Employed

Bricks

Hare

H02 Cost

Barrels