# libraries
library(ggplot2)
library(fpp3)
library(dplyr)
library(tidyverse)
help(aus_production)
## starting httpd help server ... done
help(pelt)
help(gafa_stock)
help(vic_elec)
aus_production
## # A tsibble: 218 x 7 [1Q]
## Quarter Beer Tobacco Bricks Cement Electricity Gas
## <qtr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1956 Q1 284 5225 189 465 3923 5
## 2 1956 Q2 213 5178 204 532 4436 6
## 3 1956 Q3 227 5297 208 561 4806 7
## 4 1956 Q4 308 5681 197 570 4418 6
## 5 1957 Q1 262 5577 187 529 4339 5
## 6 1957 Q2 228 5651 214 604 4811 7
## 7 1957 Q3 236 5317 227 603 5259 7
## 8 1957 Q4 320 6152 222 582 4735 6
## 9 1958 Q1 272 5758 199 554 4608 5
## 10 1958 Q2 233 5641 229 620 5196 7
## # ℹ 208 more rows
interval(aus_production)
## <interval[1]>
## [1] 1Q
Looking at the entries/rows, I see that there are 4 quarters for each year, and one row per quarter. So the interval for Bricks in aus_production is quarterly.
pelt
## # A tsibble: 91 x 3 [1Y]
## Year Hare Lynx
## <dbl> <dbl> <dbl>
## 1 1845 19580 30090
## 2 1846 19600 45150
## 3 1847 19610 49150
## 4 1848 11990 39520
## 5 1849 28040 21230
## 6 1850 58000 8420
## 7 1851 74600 5560
## 8 1852 75090 5080
## 9 1853 88480 10170
## 10 1854 61280 19600
## # ℹ 81 more rows
interval(pelt)
## <interval[1]>
## [1] 1Y
I can see from pelt that Lynx has yearly/annual intervals.
gafa_stock
## # A tsibble: 5,032 x 8 [!]
## # Key: Symbol [4]
## Symbol Date Open High Low Close Adj_Close Volume
## <chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 AAPL 2014-01-02 79.4 79.6 78.9 79.0 67.0 58671200
## 2 AAPL 2014-01-03 79.0 79.1 77.2 77.3 65.5 98116900
## 3 AAPL 2014-01-06 76.8 78.1 76.2 77.7 65.9 103152700
## 4 AAPL 2014-01-07 77.8 78.0 76.8 77.1 65.4 79302300
## 5 AAPL 2014-01-08 77.0 77.9 77.0 77.6 65.8 64632400
## 6 AAPL 2014-01-09 78.1 78.1 76.5 76.6 65.0 69787200
## 7 AAPL 2014-01-10 77.1 77.3 75.9 76.1 64.5 76244000
## 8 AAPL 2014-01-13 75.7 77.5 75.7 76.5 64.9 94623200
## 9 AAPL 2014-01-14 76.9 78.1 76.8 78.1 66.1 83140400
## 10 AAPL 2014-01-15 79.1 80.0 78.8 79.6 67.5 97909700
## # ℹ 5,022 more rows
interval(gafa_stock)
## <interval[1]>
## [1] !
Close from gafa_stock seems to have daily intervals.
vic_elec
## # A tsibble: 52,608 x 5 [30m] <Australia/Melbourne>
## Time Demand Temperature Date Holiday
## <dttm> <dbl> <dbl> <date> <lgl>
## 1 2012-01-01 00:00:00 4383. 21.4 2012-01-01 TRUE
## 2 2012-01-01 00:30:00 4263. 21.0 2012-01-01 TRUE
## 3 2012-01-01 01:00:00 4049. 20.7 2012-01-01 TRUE
## 4 2012-01-01 01:30:00 3878. 20.6 2012-01-01 TRUE
## 5 2012-01-01 02:00:00 4036. 20.4 2012-01-01 TRUE
## 6 2012-01-01 02:30:00 3866. 20.2 2012-01-01 TRUE
## 7 2012-01-01 03:00:00 3694. 20.1 2012-01-01 TRUE
## 8 2012-01-01 03:30:00 3562. 19.6 2012-01-01 TRUE
## 9 2012-01-01 04:00:00 3433. 19.1 2012-01-01 TRUE
## 10 2012-01-01 04:30:00 3359. 19.0 2012-01-01 TRUE
## # ℹ 52,598 more rows
interval(vic_elec)
## <interval[1]>
## [1] 30m
Finally, demand from vic_elec seems to have 30-min intervals!
aus_production |> autoplot(Bricks)
pelt |> autoplot(Lynx)
I can see from pelt that Lynx has yearly/annual intervals.
gafa_stock |> autoplot(Close)
Close from gafa_stock seems to have daily intervals.
vic_elec |> autoplot(Demand)
vic_elec |>
autoplot(Demand) +
labs(
title = "Electricity demand in Victoria",
x = "time",
y = "demand (megawatts)")
gafa_stock %>% group_by(Symbol) %>%
filter(Close == max(Close)) %>%
ungroup() %>% select(Symbol, Date, Close)
## # A tsibble: 4 x 3 [!]
## # Key: Symbol [4]
## Symbol Date Close
## <chr> <date> <dbl>
## 1 AAPL 2018-10-03 232.
## 2 AMZN 2018-09-04 2040.
## 3 FB 2018-07-25 218.
## 4 GOOG 2018-07-26 1268.
# after using a website service that checks the destination of bit.ly url's, I found that the original short url https://bit.ly/fpptute1 led to the full url https://otexts.com/fpp3/extrafiles/tute1.csv, which makes it easier to load the data directly
tute1 <- read.csv("https://otexts.com/fpp3/extrafiles/tute1.csv")
Converting the data to time series
mytimeseries <- tute1 %>%
mutate(Quarter = yearquarter(Quarter)) %>%
as_tsibble(index = Quarter)
Constructing time series plots of each of the three series
mytimeseries %>%
pivot_longer(-Quarter) %>%
ggplot(aes(x = Quarter, y = value, colour = name)) +
geom_line() +
facet_grid(name ~ ., scales = "free_y")
mytimeseries %>%
pivot_longer(-Quarter) %>%
ggplot(aes(x = Quarter, y = value, colour = name)) +
geom_line()
Without using facet_grid(), the plot shows the three time series overlaid on a single plot with a single y-axis shared by all variables. This seems to lead to some variables (GDP) being overshadowed by the scale of the others to an extent.
library(USgas)
tsibble <- us_total %>% as_tsibble(index=year, key=state)
tsibble %>%
filter(state %in% c("Maine", "Vermont", "New Hampshire", "Massachusetts",
"Connecticut", "Rhode Island")) %>%
autoplot(y/1e3) + labs(y = "billion cubic feet")
tsibble %>%
filter(state %in% c("Maine", "Vermont", "New Hampshire", "Massachusetts",
"Connecticut", "Rhode Island")) %>%
ggplot(aes(x = year, y = y / 1e3, color = state)) +
geom_line() +
facet_wrap(~ state, scales = "free_y") + # Adds a separate panel for each state
labs(
y = "Billion Cubic Feet",
x = "Year",
title = "Natural Gas Consumption in Selected States"
) +
theme_minimal()
# after using a website service that checks the destination of bit.ly url's, I found that the original short url https://bit.ly/fpptourism led to the full url http://OTexts.com/fpp3/extrafiles/tourism.xlsx, which makes it easier to load the data directly
library(readxl)
library(httr)
url <- "http://OTexts.com/fpp3/extrafiles/tourism.xlsx"
temp_file <- tempfile(fileext = ".xlsx")
response <- GET(url, write_disk(temp_file, overwrite = TRUE))
tourism <- read_excel(temp_file)
library(tsibble)
library(lubridate)
tourism <- tourism %>%
mutate(Quarter = yearquarter(Quarter))
tourism_tsibble <- tourism %>%
as_tsibble(
index = Quarter,
key = c(Region, State, Purpose))
tourism_tsibble
## # A tsibble: 24,320 x 5 [1Q]
## # Key: Region, State, Purpose [304]
## Quarter Region State Purpose Trips
## <qtr> <chr> <chr> <chr> <dbl>
## 1 1998 Q1 Adelaide South Australia Business 135.
## 2 1998 Q2 Adelaide South Australia Business 110.
## 3 1998 Q3 Adelaide South Australia Business 166.
## 4 1998 Q4 Adelaide South Australia Business 127.
## 5 1999 Q1 Adelaide South Australia Business 137.
## 6 1999 Q2 Adelaide South Australia Business 200.
## 7 1999 Q3 Adelaide South Australia Business 169.
## 8 1999 Q4 Adelaide South Australia Business 134.
## 9 2000 Q1 Adelaide South Australia Business 154.
## 10 2000 Q2 Adelaide South Australia Business 169.
## # ℹ 24,310 more rows
tourism
## # A tibble: 24,320 × 5
## Quarter Region State Purpose Trips
## <qtr> <chr> <chr> <chr> <dbl>
## 1 1998 Q1 Adelaide South Australia Business 135.
## 2 1998 Q2 Adelaide South Australia Business 110.
## 3 1998 Q3 Adelaide South Australia Business 166.
## 4 1998 Q4 Adelaide South Australia Business 127.
## 5 1999 Q1 Adelaide South Australia Business 137.
## 6 1999 Q2 Adelaide South Australia Business 200.
## 7 1999 Q3 Adelaide South Australia Business 169.
## 8 1999 Q4 Adelaide South Australia Business 134.
## 9 2000 Q1 Adelaide South Australia Business 154.
## 10 2000 Q2 Adelaide South Australia Business 169.
## # ℹ 24,310 more rows
tourism %>%
as_tibble() %>%
summarise(Trips = mean(Trips), .by=c(Region, Purpose)) %>%
filter(Trips == max(Trips))
## # A tibble: 1 × 3
## Region Purpose Trips
## <chr> <chr> <dbl>
## 1 Sydney Visiting 747.
# Aggregate the data: combine Purposes and Regions, and calculate total Trips by State
state_tsibble <- tourism %>%
group_by(State, Quarter) %>%
summarise(TotalTrips = sum(Trips, na.rm = TRUE), .groups = "drop") %>%
as_tsibble(index = Quarter, key = State)
state_tsibble
## # A tsibble: 640 x 3 [1Q]
## # Key: State [8]
## State Quarter TotalTrips
## <chr> <qtr> <dbl>
## 1 ACT 1998 Q1 551.
## 2 ACT 1998 Q2 416.
## 3 ACT 1998 Q3 436.
## 4 ACT 1998 Q4 450.
## 5 ACT 1999 Q1 379.
## 6 ACT 1999 Q2 558.
## 7 ACT 1999 Q3 449.
## 8 ACT 1999 Q4 595.
## 9 ACT 2000 Q1 600.
## 10 ACT 2000 Q2 557.
## # ℹ 630 more rows
# loading the data and converting to tsibble
us_emp_ts <- us_employment %>% filter(Title == "Total Private") %>% as_tsibble(index = Month)
bricks_ts <- aus_production %>% select(Bricks) %>% as_tsibble(index = Quarter)
hare_ts <- pelt %>% select(Hare) %>% as_tsibble(index = Year)
h02_ts <- PBS %>% filter(ATC2 == "H02") %>% summarise(TotalCost = sum(Cost)) %>% as_tsibble(index = Month)
barrels_ts <- us_gasoline %>% as_tsibble(index = Week)
us_emp_ts <- us_employment %>%
filter(Title == "Total Private") %>%
as_tsibble(index = Month)
autoplot(us_emp_ts) + ggtitle("Total Private Employment (US)")
gg_season(us_emp_ts) + ggtitle("Seasonal Plot: US Private Employment")
gg_subseries(us_emp_ts) + ggtitle("Subseries Plot: US Private Employment")
gg_lag(us_emp_ts) + ggtitle("Lag Plot: US Private Employment")
us_emp_ts <- us_employment %>%
filter(Title == "Total Private") %>%
as_tsibble(index = Month)
us_emp_ts %>%
ACF(Employed) %>%
autoplot() +
ggtitle("ACF: US Private Employment") +
labs(y = "Autocorrelation")
US employment in these plots shows seasonality and a clear trend over time. I can also see smaller cyclic trend that repeats again and again over time. Some of these trends are clearer when looking at the monthly window with gg_subseries(). There are some unusual years where I think financial crises took place, such as in 2008 (very clear), and earlier in 1980s and 1940s.
autoplot(bricks_ts) + ggtitle("Bricks Production (Australia)")
gg_season(bricks_ts) + ggtitle("Seasonal Plot: Bricks Production")
gg_subseries(bricks_ts) + ggtitle("Subseries Plot: Bricks Production")
gg_lag(bricks_ts) + ggtitle("Lag Plot: Bricks Production")
ACF(bricks_ts) %>% autoplot() + ggtitle("ACF: Bricks Production")
There is an upward trend from the start of the data until about half way (around 1980), then the trend dips abruptly with a rebound, but then the trend is downward for the rest of the data series. There also seems to be cyclic tendency that also repeats over time. Looking at the gg_subseries() plot, it seems Q1 has consistently been lower.
autoplot(hare_ts) + ggtitle("Hare Pelts (Pelt Dataset)")
gg_subseries(hare_ts) + ggtitle("SHare Pelts")
gg_lag(hare_ts) + ggtitle("SHare Pelts")
ACF(hare_ts) %>% autoplot() + ggtitle("ACF: SHare Pelts")
There are huge ups and downs over time, resembling cyclic patterns but not equal in magnitude. I can’t quite tell much beyond that, other than in certain years, certain peaks appear, such as in early 1860s and mid 1880s.
autoplot(h02_ts) + ggtitle("H02 Cost (PBS Dataset)")
gg_season(h02_ts) + ggtitle("Seasonal Plot: H02 Cost")
gg_subseries(h02_ts) + ggtitle("Subseries Plot: H02 Cost")
gg_lag(h02_ts) + ggtitle("Lag Plot: H02 Cost")
ACF(h02_ts) %>% autoplot() + ggtitle("ACF: H02 Cost")
A cyclic pattern appears here too, combined with a slight upward trend that is clearer earlier than later in time. Looking at the seasonality, the data appears very seasonal, with a consistent drip in February, followed by a slow increase through out the year. Looking at the monthly window via gg_subseries(), it shows a larger magnitude of change in the later months of the year than the earlier ones (especially Dec, less so as we go back until we get to Feb). The seasonality appears again using ACF.
autoplot(barrels_ts) + ggtitle("US Gasoline Barrels")
gg_season(barrels_ts) + ggtitle("Seasonal Plot: US Gasoline Barrels")
gg_subseries(barrels_ts) + ggtitle("Subseries Plot: US Gasoline Barrels")
gg_lag(barrels_ts) + ggtitle("Lag Plot: US Gasoline Barrels")
ACF(barrels_ts) %>% autoplot() + ggtitle("ACF: US Gasoline Barrels")
There is also a slow upward trend until about mod 2000s, followed by a downtrend (around 2008, so maybe related to the financial crisis then) and then a short upward trend again. it is difficult to read the plot from gg_season() meaningfully. The same for the plot from gg_subseries, except for the average shown to be going up and down. There is some seasonality seen on the ACF plot and the gg_lag plot.