Exercise 2.1

Explore the following four time series: Bricks from aus_production, Lynx from pelt, Close from gafa_stock, Demand from vic_elec.

Bricks
help(aus_production)

The aus_production data set is the “Quarterly production of selected commodities in Australia. Bricks is the”Clay brick production in millions of bricks."

head(aus_production)
## # A tsibble: 6 x 7 [1Q]
##   Quarter  Beer Tobacco Bricks Cement Electricity   Gas
##     <qtr> <dbl>   <dbl>  <dbl>  <dbl>       <dbl> <dbl>
## 1 1956 Q1   284    5225    189    465        3923     5
## 2 1956 Q2   213    5178    204    532        4436     6
## 3 1956 Q3   227    5297    208    561        4806     7
## 4 1956 Q4   308    5681    197    570        4418     6
## 5 1957 Q1   262    5577    187    529        4339     5
## 6 1957 Q2   228    5651    214    604        4811     7

The time interval is fiscal quarters.

aus_production |> feasts::autoplot(Bricks)
## Warning: Removed 20 rows containing missing values or values outside the scale range
## (`geom_line()`).

Lynx
help(pelt)

The pelt data set includes Hudson Bay Company pelt trading records for Snowshoe Hare and Canadian Lynx furs from 1845-1935. The lynx column is specifically the values for Candian Lynx furs.

head(pelt)
## # A tsibble: 6 x 3 [1Y]
##    Year  Hare  Lynx
##   <dbl> <dbl> <dbl>
## 1  1845 19580 30090
## 2  1846 19600 45150
## 3  1847 19610 49150
## 4  1848 11990 39520
## 5  1849 28040 21230
## 6  1850 58000  8420

The time interval for Lynx is year.

pelt %>% feasts::autoplot(Lynx)

gafa_stock
help(gafa_stock)

This data set is the historical stock prices for Google, Amazon, Facebook and Apple. ‘Close’ is the closing price for a specific stock on a specific day.

head(gafa_stock)
## # A tsibble: 6 x 8 [!]
## # Key:       Symbol [1]
##   Symbol Date        Open  High   Low Close Adj_Close    Volume
##   <chr>  <date>     <dbl> <dbl> <dbl> <dbl>     <dbl>     <dbl>
## 1 AAPL   2014-01-02  79.4  79.6  78.9  79.0      67.0  58671200
## 2 AAPL   2014-01-03  79.0  79.1  77.2  77.3      65.5  98116900
## 3 AAPL   2014-01-06  76.8  78.1  76.2  77.7      65.9 103152700
## 4 AAPL   2014-01-07  77.8  78.0  76.8  77.1      65.4  79302300
## 5 AAPL   2014-01-08  77.0  77.9  77.0  77.6      65.8  64632400
## 6 AAPL   2014-01-09  78.1  78.1  76.5  76.6      65.0  69787200

The interval for gafa_stock is weekday.

gafa_stock |> feasts::autoplot(Close)

Demand
help(vic_elec)

Demand is the total electricity demand in MWh, for Victoria Australia

head(vic_elec)
## # A tsibble: 6 x 5 [30m] <Australia/Melbourne>
##   Time                Demand Temperature Date       Holiday
##   <dttm>               <dbl>       <dbl> <date>     <lgl>  
## 1 2012-01-01 00:00:00  4383.        21.4 2012-01-01 TRUE   
## 2 2012-01-01 00:30:00  4263.        21.0 2012-01-01 TRUE   
## 3 2012-01-01 01:00:00  4049.        20.7 2012-01-01 TRUE   
## 4 2012-01-01 01:30:00  3878.        20.6 2012-01-01 TRUE   
## 5 2012-01-01 02:00:00  4036.        20.4 2012-01-01 TRUE   
## 6 2012-01-01 02:30:00  3866.        20.2 2012-01-01 TRUE

The interval is 30 minute intervals

vic_elec |> feasts::autoplot(Demand) + labs(title = "Half-hourly electricty demand for Victoria, Australia", y= "30 minute intervals", x="Demand in MWh")

Exercise 2.2

Use filter() to find what days corresponded to the peak closing price for each of the four stocks in gafa_stock.

library(dplyr)
gafa_stock |>  group_by(Symbol) |> filter(Close == max(Close))
## # A tsibble: 4 x 8 [!]
## # Key:       Symbol [4]
## # Groups:    Symbol [4]
##   Symbol Date        Open  High   Low Close Adj_Close   Volume
##   <chr>  <date>     <dbl> <dbl> <dbl> <dbl>     <dbl>    <dbl>
## 1 AAPL   2018-10-03  230.  233.  230.  232.      230. 28654800
## 2 AMZN   2018-09-04 2026. 2050. 2013  2040.     2040.  5721100
## 3 FB     2018-07-25  216.  219.  214.  218.      218. 58954200
## 4 GOOG   2018-07-26 1251  1270. 1249. 1268.     1268.  2405600

Exercise 2.3

Part a.
tute1 <- readr::read_csv("/Users/williamaiken/DATA624/HW/tute1.csv")
## Rows: 100 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl  (3): Sales, AdBudget, GDP
## date (1): Quarter
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
View(tute1)
Part b.
mytimeseries <- tute1 |>
  mutate(Quarter = yearquarter(Quarter)) |>
  as_tsibble(index = Quarter)
Part c.
mytimeseries |>
  pivot_longer(-Quarter) |>
  ggplot(aes(x = Quarter, y = value, colour = name)) +
  geom_line() +
  facet_grid(name ~ ., scales = "free_y")

mytimeseries |>
  pivot_longer(-Quarter) |>
  ggplot(aes(x = Quarter, y = value, colour = name)) +
  geom_line() #+

  #facet_grid(name ~ ., scales = "free_y")

If you don’t include facet_grid it gives the plots as stacked plots (sometimes this is what you want)

Exercise 2.4

The USgas package contains data on the demand for natural gas in the US.

library(USgas)
head(us_total)
##   year   state      y
## 1 1997 Alabama 324158
## 2 1998 Alabama 329134
## 3 1999 Alabama 337270
## 4 2000 Alabama 353614
## 5 2001 Alabama 332693
## 6 2002 Alabama 379343
hw_gas <- us_total |>
  as_tsibble(index = year, key = state)
hw_gas |> filter(state %in% c("Maine", "Vermont", "New Hampshire", "Massachusetts", "Connecticut", "Rhode Island")) |> autoplot()
## Plot variable not specified, automatically selected `.vars = y`

Exercise 2.5

hw_tour_raw <- readxl::read_excel("/Users/williamaiken/DATA624/HW/tourism.xlsx")
hw_tour <- hw_tour_raw |>
    mutate(Quarter = yearquarter(Quarter)) |>
    as_tsibble(index = Quarter, key = c(Region, State, Purpose))

head(hw_tour)
## # A tsibble: 6 x 5 [1Q]
## # Key:       Region, State, Purpose [1]
##   Quarter Region   State           Purpose  Trips
##     <qtr> <chr>    <chr>           <chr>    <dbl>
## 1 1998 Q1 Adelaide South Australia Business  135.
## 2 1998 Q2 Adelaide South Australia Business  110.
## 3 1998 Q3 Adelaide South Australia Business  166.
## 4 1998 Q4 Adelaide South Australia Business  127.
## 5 1999 Q1 Adelaide South Australia Business  137.
## 6 1999 Q2 Adelaide South Australia Business  200.

The Region and Purpose with the most trips on average is ‘Melbourne’ and ‘Visiting’ respectively

hw_tour |> filter(Trips == max(Trips))
## # A tsibble: 1 x 5 [1Q]
## # Key:       Region, State, Purpose [1]
##   Quarter Region    State    Purpose  Trips
##     <qtr> <chr>     <chr>    <chr>    <dbl>
## 1 2017 Q4 Melbourne Victoria Visiting  985.
hw_tour_reduced <- hw_tour_raw |> mutate(Quarter = yearquarter(Quarter)) |> select(-c('Region', 'Purpose'))  |> group_by(Quarter, State) |> mutate(Trips = sum(Trips)) |> ungroup() |> distinct() |> as_tsibble(index = Quarter, key = State)

head(hw_tour_reduced)
## # A tsibble: 6 x 3 [1Q]
## # Key:       State [1]
##   Quarter State Trips
##     <qtr> <chr> <dbl>
## 1 1998 Q1 ACT    551.
## 2 1998 Q2 ACT    416.
## 3 1998 Q3 ACT    436.
## 4 1998 Q4 ACT    450.
## 5 1999 Q1 ACT    379.
## 6 1999 Q2 ACT    558.

Exercise 2.8

Use the following graphics functions: autoplot(), gg_season(), gg_subseries(), gg_lag(), ACF() and explore features from the following time series: “Total Private” Employed from us_employment, Bricks from aus_production, Hare from pelt, “H02” Cost from PBS, and Barrels from us_gasoline.

“Total Private” Employed

This data set required some processing before it could be analysed

  • There is a positive trend to this data, it has both a yearly seasonality and cyclicity of every 5-10 years.

-Interesting the largest drop in Private employment lines up with the 2008 housing crash.

-This series is highly auto-correlated

-unsurprisingly, there is a seasonal increase in employment towards the end of year holidays.

hw_emply <- us_employment |> mutate(Month = yearmonth(Month)) |> select(-Series_ID) |> as_tsibble(index = Month, key = Title)

head(hw_emply)
## # A tsibble: 6 x 4 [1M]
## # Key:       Title [1]
##      Month Title                                  Employed Series_ID
##      <mth> <chr>                                     <dbl> <chr>    
## 1 1939 Jan All Employees, Temporary Help Services       NA TEMPHELPN
## 2 1939 Feb All Employees, Temporary Help Services       NA TEMPHELPN
## 3 1939 Mar All Employees, Temporary Help Services       NA TEMPHELPN
## 4 1939 Apr All Employees, Temporary Help Services       NA TEMPHELPN
## 5 1939 May All Employees, Temporary Help Services       NA TEMPHELPN
## 6 1939 Jun All Employees, Temporary Help Services       NA TEMPHELPN
hw_emply |> filter(Title == "Total Private") |> autoplot(Employed)

hw_emply |> filter(Title == "Total Private") |> gg_season()
## Plot variable not specified, automatically selected `y = Employed`

hw_emply |> filter(Title == "Total Private") |> gg_subseries()
## Plot variable not specified, automatically selected `y = Employed`

hw_emply |> filter(Title == "Total Private") |> gg_lag() + geom_point()
## Plot variable not specified, automatically selected `y = Employed`

hw_emply |> filter(Title == "Total Private") |> ACF()
## Response variable not specified, automatically selected `var = Employed`
## # A tsibble: 29 x 3 [1M]
## # Key:       Title [1]
##    Title              lag   acf
##    <chr>         <cf_lag> <dbl>
##  1 Total Private       1M 0.997
##  2 Total Private       2M 0.993
##  3 Total Private       3M 0.990
##  4 Total Private       4M 0.986
##  5 Total Private       5M 0.983
##  6 Total Private       6M 0.980
##  7 Total Private       7M 0.977
##  8 Total Private       8M 0.974
##  9 Total Private       9M 0.971
## 10 Total Private      10M 0.968
## # ℹ 19 more rows
Bricks
  • There is a positive trend to this data until the early 1980s, it has both a yearly seasonality and cyclicity of every 5-10 years.

-Interesting the largest drop in brick production occurs in the 1980s, production recovers but the trend changes from positive to negative.

-This series is moderately auto-correlated

-unsurprisingly, there is a seasonal increase in Q4 which is summer and the best time for new construction.

aus_production |> autoplot(Bricks)
## Warning: Removed 20 rows containing missing values or values outside the scale range
## (`geom_line()`).

aus_production |> gg_season(Bricks)
## Warning: Removed 20 rows containing missing values or values outside the scale range
## (`geom_line()`).

aus_production |> gg_subseries(Bricks)
## Warning: Removed 5 rows containing missing values or values outside the scale range
## (`geom_line()`).

aus_production |> gg_lag(Bricks) + geom_point()
## Warning: Removed 20 rows containing missing values (gg_lag).

aus_production |> ACF(Bricks)
## # A tsibble: 22 x 2 [1Q]
##         lag   acf
##    <cf_lag> <dbl>
##  1       1Q 0.900
##  2       2Q 0.815
##  3       3Q 0.813
##  4       4Q 0.828
##  5       5Q 0.720
##  6       6Q 0.642
##  7       7Q 0.655
##  8       8Q 0.692
##  9       9Q 0.609
## 10      10Q 0.556
## # ℹ 12 more rows
Hare
  • There is no apparent trend, it has no seasonality but a pronounced cyclicity of every ~10 years which I image is the result of over harvesting and then sharp recovery.

-Interesting the largest amount of hare pelt harvesting lines up with the American Civil War. Was this material used for making uniforms or did this boon result for the disruption of American fur harvesting?

-This series is negatively auto-correlated due to the cyclic boom and busts.

pelt |> autoplot(Hare)

There is only one data point per year. The seasonality can’t be plotted

#pelt |> gg_season(Hare)
pelt |> gg_subseries(Hare)

pelt |> gg_lag(Hare) + geom_point()

pelt |> ACF(Hare)
## # A tsibble: 19 x 2 [1Y]
##         lag     acf
##    <cf_lag>   <dbl>
##  1       1Y  0.658 
##  2       2Y  0.214 
##  3       3Y -0.155 
##  4       4Y -0.401 
##  5       5Y -0.493 
##  6       6Y -0.401 
##  7       7Y -0.168 
##  8       8Y  0.113 
##  9       9Y  0.307 
## 10      10Y  0.340 
## 11      11Y  0.296 
## 12      12Y  0.206 
## 13      13Y  0.0372
## 14      14Y -0.153 
## 15      15Y -0.285 
## 16      16Y -0.295 
## 17      17Y -0.202 
## 18      18Y -0.0676
## 19      19Y  0.0956
H02
  • There is a positive trend to this data, it has both a yearly seasonality.

-The trends for the different Concessions and Types are different. Concessional/General Saftey Net drop sharply at the start of the calendar year while the others increase or stay constant.

-Per the text, there is a seasonal increase is due to people stocking up at the end of the calendar year.

PBS |> filter(ATC2 == "H02") |> autoplot(Cost)

PBS |> filter(ATC2 == "H02") |> gg_season(Cost)

PBS |> filter(ATC2 == "H02") |> gg_subseries(Cost)

I had to reduce the scope of this time series to make gg_lag work. It was unclear for the question what additional filtering or aggregating I should do.

PBS_reduced <- PBS |> filter(ATC2 == "H02", Concession == "Concessional", Type == "Co-payments") 
PBS_reduced |> gg_lag(Cost) + geom_point()

PBS |> filter(ATC2 == "H02") |> ACF(Cost)
## # A tsibble: 92 x 6 [1M]
## # Key:       Concession, Type, ATC1, ATC2 [4]
##    Concession   Type        ATC1  ATC2       lag   acf
##    <chr>        <chr>       <chr> <chr> <cf_lag> <dbl>
##  1 Concessional Co-payments H     H02         1M 0.834
##  2 Concessional Co-payments H     H02         2M 0.679
##  3 Concessional Co-payments H     H02         3M 0.514
##  4 Concessional Co-payments H     H02         4M 0.352
##  5 Concessional Co-payments H     H02         5M 0.264
##  6 Concessional Co-payments H     H02         6M 0.219
##  7 Concessional Co-payments H     H02         7M 0.253
##  8 Concessional Co-payments H     H02         8M 0.337
##  9 Concessional Co-payments H     H02         9M 0.464
## 10 Concessional Co-payments H     H02        10M 0.574
## # ℹ 82 more rows
us_gasoline
  • There is a positive trend to this data until the 2008 housing crash, it has both a yearly seasonality and and no apparent cyclicity.

-There was a decreasing in the amplitude of the seasonality going up the 2008 crash which has become more pronounced after the recovery. Maybe gasoline consumption was less affected by seasonality heading up to the crash due to high consumer confidence (even though prices were at the highest since 1990)

-This series is moderately auto-correlated

-Unsurprisingly, there is a seasonal increase in the summer when people are driving the most.

us_gasoline |> autoplot(Barrels)

us_gasoline |> gg_season(Barrels)

us_gasoline |> gg_subseries(Barrels)

us_gasoline |> gg_lag(Barrels) + geom_point()

us_gasoline |> ACF(Barrels)
## # A tsibble: 31 x 2 [1W]
##         lag   acf
##    <cf_lag> <dbl>
##  1       1W 0.893
##  2       2W 0.882
##  3       3W 0.873
##  4       4W 0.866
##  5       5W 0.847
##  6       6W 0.844
##  7       7W 0.832
##  8       8W 0.831
##  9       9W 0.822
## 10      10W 0.808
## # ℹ 21 more rows