Explore the following four time series: Bricks from aus_production, Lynx from pelt, Close from gafa_stock, Demand from vic_elec.
help(aus_production)
The aus_production data set is the “Quarterly production of selected commodities in Australia. Bricks is the”Clay brick production in millions of bricks."
head(aus_production)
## # A tsibble: 6 x 7 [1Q]
## Quarter Beer Tobacco Bricks Cement Electricity Gas
## <qtr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1956 Q1 284 5225 189 465 3923 5
## 2 1956 Q2 213 5178 204 532 4436 6
## 3 1956 Q3 227 5297 208 561 4806 7
## 4 1956 Q4 308 5681 197 570 4418 6
## 5 1957 Q1 262 5577 187 529 4339 5
## 6 1957 Q2 228 5651 214 604 4811 7
The time interval is fiscal quarters.
aus_production |> feasts::autoplot(Bricks)
## Warning: Removed 20 rows containing missing values or values outside the scale range
## (`geom_line()`).
help(pelt)
The pelt data set includes Hudson Bay Company pelt trading records for Snowshoe Hare and Canadian Lynx furs from 1845-1935. The lynx column is specifically the values for Candian Lynx furs.
head(pelt)
## # A tsibble: 6 x 3 [1Y]
## Year Hare Lynx
## <dbl> <dbl> <dbl>
## 1 1845 19580 30090
## 2 1846 19600 45150
## 3 1847 19610 49150
## 4 1848 11990 39520
## 5 1849 28040 21230
## 6 1850 58000 8420
The time interval for Lynx is year.
pelt %>% feasts::autoplot(Lynx)
help(gafa_stock)
This data set is the historical stock prices for Google, Amazon, Facebook and Apple. ‘Close’ is the closing price for a specific stock on a specific day.
head(gafa_stock)
## # A tsibble: 6 x 8 [!]
## # Key: Symbol [1]
## Symbol Date Open High Low Close Adj_Close Volume
## <chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 AAPL 2014-01-02 79.4 79.6 78.9 79.0 67.0 58671200
## 2 AAPL 2014-01-03 79.0 79.1 77.2 77.3 65.5 98116900
## 3 AAPL 2014-01-06 76.8 78.1 76.2 77.7 65.9 103152700
## 4 AAPL 2014-01-07 77.8 78.0 76.8 77.1 65.4 79302300
## 5 AAPL 2014-01-08 77.0 77.9 77.0 77.6 65.8 64632400
## 6 AAPL 2014-01-09 78.1 78.1 76.5 76.6 65.0 69787200
The interval for gafa_stock is weekday.
gafa_stock |> feasts::autoplot(Close)
help(vic_elec)
Demand is the total electricity demand in MWh, for Victoria Australia
head(vic_elec)
## # A tsibble: 6 x 5 [30m] <Australia/Melbourne>
## Time Demand Temperature Date Holiday
## <dttm> <dbl> <dbl> <date> <lgl>
## 1 2012-01-01 00:00:00 4383. 21.4 2012-01-01 TRUE
## 2 2012-01-01 00:30:00 4263. 21.0 2012-01-01 TRUE
## 3 2012-01-01 01:00:00 4049. 20.7 2012-01-01 TRUE
## 4 2012-01-01 01:30:00 3878. 20.6 2012-01-01 TRUE
## 5 2012-01-01 02:00:00 4036. 20.4 2012-01-01 TRUE
## 6 2012-01-01 02:30:00 3866. 20.2 2012-01-01 TRUE
The interval is 30 minute intervals
vic_elec |> feasts::autoplot(Demand) + labs(title = "Half-hourly electricty demand for Victoria, Australia", y= "30 minute intervals", x="Demand in MWh")
Use filter() to find what days corresponded to the peak closing price for each of the four stocks in gafa_stock.
library(dplyr)
gafa_stock |> group_by(Symbol) |> filter(Close == max(Close))
## # A tsibble: 4 x 8 [!]
## # Key: Symbol [4]
## # Groups: Symbol [4]
## Symbol Date Open High Low Close Adj_Close Volume
## <chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 AAPL 2018-10-03 230. 233. 230. 232. 230. 28654800
## 2 AMZN 2018-09-04 2026. 2050. 2013 2040. 2040. 5721100
## 3 FB 2018-07-25 216. 219. 214. 218. 218. 58954200
## 4 GOOG 2018-07-26 1251 1270. 1249. 1268. 1268. 2405600
tute1 <- readr::read_csv("/Users/williamaiken/DATA624/HW/tute1.csv")
## Rows: 100 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (3): Sales, AdBudget, GDP
## date (1): Quarter
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
View(tute1)
mytimeseries <- tute1 |>
mutate(Quarter = yearquarter(Quarter)) |>
as_tsibble(index = Quarter)
mytimeseries |>
pivot_longer(-Quarter) |>
ggplot(aes(x = Quarter, y = value, colour = name)) +
geom_line() +
facet_grid(name ~ ., scales = "free_y")
mytimeseries |>
pivot_longer(-Quarter) |>
ggplot(aes(x = Quarter, y = value, colour = name)) +
geom_line() #+
#facet_grid(name ~ ., scales = "free_y")
If you don’t include facet_grid it gives the plots as stacked plots (sometimes this is what you want)
The USgas package contains data on the demand for natural gas in the US.
library(USgas)
head(us_total)
## year state y
## 1 1997 Alabama 324158
## 2 1998 Alabama 329134
## 3 1999 Alabama 337270
## 4 2000 Alabama 353614
## 5 2001 Alabama 332693
## 6 2002 Alabama 379343
hw_gas <- us_total |>
as_tsibble(index = year, key = state)
hw_gas |> filter(state %in% c("Maine", "Vermont", "New Hampshire", "Massachusetts", "Connecticut", "Rhode Island")) |> autoplot()
## Plot variable not specified, automatically selected `.vars = y`
hw_tour_raw <- readxl::read_excel("/Users/williamaiken/DATA624/HW/tourism.xlsx")
hw_tour <- hw_tour_raw |>
mutate(Quarter = yearquarter(Quarter)) |>
as_tsibble(index = Quarter, key = c(Region, State, Purpose))
head(hw_tour)
## # A tsibble: 6 x 5 [1Q]
## # Key: Region, State, Purpose [1]
## Quarter Region State Purpose Trips
## <qtr> <chr> <chr> <chr> <dbl>
## 1 1998 Q1 Adelaide South Australia Business 135.
## 2 1998 Q2 Adelaide South Australia Business 110.
## 3 1998 Q3 Adelaide South Australia Business 166.
## 4 1998 Q4 Adelaide South Australia Business 127.
## 5 1999 Q1 Adelaide South Australia Business 137.
## 6 1999 Q2 Adelaide South Australia Business 200.
The Region and Purpose with the most trips on average is ‘Melbourne’ and ‘Visiting’ respectively
hw_tour |> filter(Trips == max(Trips))
## # A tsibble: 1 x 5 [1Q]
## # Key: Region, State, Purpose [1]
## Quarter Region State Purpose Trips
## <qtr> <chr> <chr> <chr> <dbl>
## 1 2017 Q4 Melbourne Victoria Visiting 985.
hw_tour_reduced <- hw_tour_raw |> mutate(Quarter = yearquarter(Quarter)) |> select(-c('Region', 'Purpose')) |> group_by(Quarter, State) |> mutate(Trips = sum(Trips)) |> ungroup() |> distinct() |> as_tsibble(index = Quarter, key = State)
head(hw_tour_reduced)
## # A tsibble: 6 x 3 [1Q]
## # Key: State [1]
## Quarter State Trips
## <qtr> <chr> <dbl>
## 1 1998 Q1 ACT 551.
## 2 1998 Q2 ACT 416.
## 3 1998 Q3 ACT 436.
## 4 1998 Q4 ACT 450.
## 5 1999 Q1 ACT 379.
## 6 1999 Q2 ACT 558.
Use the following graphics functions: autoplot(), gg_season(), gg_subseries(), gg_lag(), ACF() and explore features from the following time series: “Total Private” Employed from us_employment, Bricks from aus_production, Hare from pelt, “H02” Cost from PBS, and Barrels from us_gasoline.
This data set required some processing before it could be analysed
-Interesting the largest drop in Private employment lines up with the 2008 housing crash.
-This series is highly auto-correlated
-unsurprisingly, there is a seasonal increase in employment towards the end of year holidays.
hw_emply <- us_employment |> mutate(Month = yearmonth(Month)) |> select(-Series_ID) |> as_tsibble(index = Month, key = Title)
head(hw_emply)
## # A tsibble: 6 x 4 [1M]
## # Key: Title [1]
## Month Title Employed Series_ID
## <mth> <chr> <dbl> <chr>
## 1 1939 Jan All Employees, Temporary Help Services NA TEMPHELPN
## 2 1939 Feb All Employees, Temporary Help Services NA TEMPHELPN
## 3 1939 Mar All Employees, Temporary Help Services NA TEMPHELPN
## 4 1939 Apr All Employees, Temporary Help Services NA TEMPHELPN
## 5 1939 May All Employees, Temporary Help Services NA TEMPHELPN
## 6 1939 Jun All Employees, Temporary Help Services NA TEMPHELPN
hw_emply |> filter(Title == "Total Private") |> autoplot(Employed)
hw_emply |> filter(Title == "Total Private") |> gg_season()
## Plot variable not specified, automatically selected `y = Employed`
hw_emply |> filter(Title == "Total Private") |> gg_subseries()
## Plot variable not specified, automatically selected `y = Employed`
hw_emply |> filter(Title == "Total Private") |> gg_lag() + geom_point()
## Plot variable not specified, automatically selected `y = Employed`
hw_emply |> filter(Title == "Total Private") |> ACF()
## Response variable not specified, automatically selected `var = Employed`
## # A tsibble: 29 x 3 [1M]
## # Key: Title [1]
## Title lag acf
## <chr> <cf_lag> <dbl>
## 1 Total Private 1M 0.997
## 2 Total Private 2M 0.993
## 3 Total Private 3M 0.990
## 4 Total Private 4M 0.986
## 5 Total Private 5M 0.983
## 6 Total Private 6M 0.980
## 7 Total Private 7M 0.977
## 8 Total Private 8M 0.974
## 9 Total Private 9M 0.971
## 10 Total Private 10M 0.968
## # ℹ 19 more rows
-Interesting the largest drop in brick production occurs in the 1980s, production recovers but the trend changes from positive to negative.
-This series is moderately auto-correlated
-unsurprisingly, there is a seasonal increase in Q4 which is summer and the best time for new construction.
aus_production |> autoplot(Bricks)
## Warning: Removed 20 rows containing missing values or values outside the scale range
## (`geom_line()`).
aus_production |> gg_season(Bricks)
## Warning: Removed 20 rows containing missing values or values outside the scale range
## (`geom_line()`).
aus_production |> gg_subseries(Bricks)
## Warning: Removed 5 rows containing missing values or values outside the scale range
## (`geom_line()`).
aus_production |> gg_lag(Bricks) + geom_point()
## Warning: Removed 20 rows containing missing values (gg_lag).
aus_production |> ACF(Bricks)
## # A tsibble: 22 x 2 [1Q]
## lag acf
## <cf_lag> <dbl>
## 1 1Q 0.900
## 2 2Q 0.815
## 3 3Q 0.813
## 4 4Q 0.828
## 5 5Q 0.720
## 6 6Q 0.642
## 7 7Q 0.655
## 8 8Q 0.692
## 9 9Q 0.609
## 10 10Q 0.556
## # ℹ 12 more rows
-Interesting the largest amount of hare pelt harvesting lines up with the American Civil War. Was this material used for making uniforms or did this boon result for the disruption of American fur harvesting?
-This series is negatively auto-correlated due to the cyclic boom and busts.
pelt |> autoplot(Hare)
There is only one data point per year. The seasonality can’t be plotted
#pelt |> gg_season(Hare)
pelt |> gg_subseries(Hare)
pelt |> gg_lag(Hare) + geom_point()
pelt |> ACF(Hare)
## # A tsibble: 19 x 2 [1Y]
## lag acf
## <cf_lag> <dbl>
## 1 1Y 0.658
## 2 2Y 0.214
## 3 3Y -0.155
## 4 4Y -0.401
## 5 5Y -0.493
## 6 6Y -0.401
## 7 7Y -0.168
## 8 8Y 0.113
## 9 9Y 0.307
## 10 10Y 0.340
## 11 11Y 0.296
## 12 12Y 0.206
## 13 13Y 0.0372
## 14 14Y -0.153
## 15 15Y -0.285
## 16 16Y -0.295
## 17 17Y -0.202
## 18 18Y -0.0676
## 19 19Y 0.0956
-The trends for the different Concessions and Types are different. Concessional/General Saftey Net drop sharply at the start of the calendar year while the others increase or stay constant.
-Per the text, there is a seasonal increase is due to people stocking up at the end of the calendar year.
PBS |> filter(ATC2 == "H02") |> autoplot(Cost)
PBS |> filter(ATC2 == "H02") |> gg_season(Cost)
PBS |> filter(ATC2 == "H02") |> gg_subseries(Cost)
I had to reduce the scope of this time series to make gg_lag work. It was unclear for the question what additional filtering or aggregating I should do.
PBS_reduced <- PBS |> filter(ATC2 == "H02", Concession == "Concessional", Type == "Co-payments")
PBS_reduced |> gg_lag(Cost) + geom_point()
PBS |> filter(ATC2 == "H02") |> ACF(Cost)
## # A tsibble: 92 x 6 [1M]
## # Key: Concession, Type, ATC1, ATC2 [4]
## Concession Type ATC1 ATC2 lag acf
## <chr> <chr> <chr> <chr> <cf_lag> <dbl>
## 1 Concessional Co-payments H H02 1M 0.834
## 2 Concessional Co-payments H H02 2M 0.679
## 3 Concessional Co-payments H H02 3M 0.514
## 4 Concessional Co-payments H H02 4M 0.352
## 5 Concessional Co-payments H H02 5M 0.264
## 6 Concessional Co-payments H H02 6M 0.219
## 7 Concessional Co-payments H H02 7M 0.253
## 8 Concessional Co-payments H H02 8M 0.337
## 9 Concessional Co-payments H H02 9M 0.464
## 10 Concessional Co-payments H H02 10M 0.574
## # ℹ 82 more rows
-There was a decreasing in the amplitude of the seasonality going up the 2008 crash which has become more pronounced after the recovery. Maybe gasoline consumption was less affected by seasonality heading up to the crash due to high consumer confidence (even though prices were at the highest since 1990)
-This series is moderately auto-correlated
-Unsurprisingly, there is a seasonal increase in the summer when people are driving the most.
us_gasoline |> autoplot(Barrels)
us_gasoline |> gg_season(Barrels)
us_gasoline |> gg_subseries(Barrels)
us_gasoline |> gg_lag(Barrels) + geom_point()
us_gasoline |> ACF(Barrels)
## # A tsibble: 31 x 2 [1W]
## lag acf
## <cf_lag> <dbl>
## 1 1W 0.893
## 2 2W 0.882
## 3 3W 0.873
## 4 4W 0.866
## 5 5W 0.847
## 6 6W 0.844
## 7 7W 0.832
## 8 8W 0.831
## 9 9W 0.822
## 10 10W 0.808
## # ℹ 21 more rows