library(tsibbledata)
library(fpp3)
## Registered S3 method overwritten by 'tsibble':
## method from
## as_tibble.grouped_df dplyr
## ── Attaching packages ──────────────────────────────────────────── fpp3 1.0.1 ──
## ✔ tibble 3.2.1 ✔ ggplot2 3.5.1
## ✔ dplyr 1.1.4 ✔ tsibble 1.1.6
## ✔ tidyr 1.3.1 ✔ feasts 0.4.2
## ✔ lubridate 1.9.4 ✔ fable 0.4.1
## ── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ──
## ✖ lubridate::date() masks base::date()
## ✖ dplyr::filter() masks stats::filter()
## ✖ tsibble::intersect() masks base::intersect()
## ✖ tsibble::interval() masks lubridate::interval()
## ✖ dplyr::lag() masks stats::lag()
## ✖ tsibble::setdiff() masks base::setdiff()
## ✖ tsibble::union() masks base::union()
?aus_production
?pelt
?gafa_stock
?vic_elec
# Bricks - aus_production
autoplot(aus_production, Bricks)
## Warning: Removed 20 rows containing missing values or values outside the scale range
## (`geom_line()`).
# Lynx - pelt
autoplot(pelt, Lynx)
# Close - gafa_stock
autoplot(gafa_stock, Close)
# Demand - vic_elec
autoplot(vic_elec, Demand)
- Bricks shows an overall upward trend (with some sharp drops) until
just after 1980 where it starts declining. The ups and downs seem to be
repeated yearly, suggesting a seasonal pattern. - Lynx shows a clear
rise and fall repeatedly approximately every 10 years suggesting a
cyclical pattern. - Stocks have also been showing an upward trend (with
a bit of a downward trend towards the end of 2018). I don’t think that
there is any clear seasonality or cycle visible. - Demand shows very
strong seasonality with clear spikes around the beginning and middle of
the year (winter and summer months).
autoplot(vic_elec, Demand) +
labs(title = "Electricity Demand in Victoria, Australia",
y = "Demand (MWh)", x = "Time (30 mins)")
gafa_stock %>%
group_by(Symbol) %>%
filter(Close == max(Close)) %>%
arrange(Symbol, Date)
tute1 <- readr::read_csv("/Users/aaliyahmjh/Downloads/tute1.csv", show_col_types = FALSE)
head(tute1)
mytimeseries <- tute1 |>
mutate(Quarter = yearquarter(Quarter)) |>
as_tsibble(index = Quarter)
mytimeseries |>
pivot_longer(-Quarter) |>
ggplot(aes(x = Quarter, y = value, colour = name)) +
geom_line() +
facet_grid(name ~ ., scales = "free_y")
## Check what happens when you don’t include facet_grid().
mytimeseries |>
pivot_longer(-Quarter) |>
ggplot(aes(x = Quarter, y = value, colour = name)) +
geom_line()
With facet_grid(), each variable is plotted separately and we are able
to see the rises and falls and overall pattern much easier individually.
However, we’re not able to directly compare it with the other variables
since they are plotted separately and with a different scale.
If focusing on the patterns (trends and seasonality etc) is more important for a specific use case - facet grid should be implemented. However, if the goal is to compare the levels/magnitudes of each variable then facet grid should be removed (there is a risk associated here with not being able to see the details on smaller series when comparing them to much larger ones).
install.packages("USgas")
##
## The downloaded binary packages are in
## /var/folders/nz/h7z329n55nxfs2dv7hmbhc400000gn/T//Rtmp1lwx3G/downloaded_packages
library(USgas)
?us_total
us_gas <- us_total %>%
as_tsibble(index = year, key = state)
new_england_gas <- us_gas %>%
filter(state %in% c("Maine", "Vermont", "New Hampshire",
"Massachusetts", "Connecticut", "Rhode Island"))
autoplot(new_england_gas, y) +
labs(title = "New England Annual Natural Gas Consumption",
x = "Year",
y = "Million Cubic Feet") +
theme_minimal()
autoplot(new_england_gas, y) +
labs(title = "New England Annual Natural Gas Consumption",
x = "Year",
y = "Million Cubic Feet") +
facet_wrap(~state, scales = "free_y") +
theme_minimal()
When examined collectively and individually it is clear to see that: -
Massachusetts is consistently consuming the most gas and has an overall
upward trend. - Connecticut is also trending upward and inches close to
Massachusetts especially after a sharp spike around 2010. -
Comparatively New Hampshire, Rhode Island, and Vermont consume much less
gas than MA and CT with NH showing very slight growth and fluctuations,
RI showing a steady increase after big dips in the early 2000s and
Vermont with very clear growth in the 2010s but still having
significantly less consumption than the other New England states. -
Maine is also on the lower end of the consumption and is the only state
that is showing a steady decline in the recent years.
tourism_xl <- readxl::read_excel("/Users/aaliyahmjh/Downloads/tourism.xlsx")
tourism <- tourism_xl %>%
mutate(Quarter = yearquarter(Quarter)) %>%
as_tsibble(index = Quarter, key = c(Region, State, Purpose))
head(tourism)
Visiting Melbourne had the maximum number of overnight trips on average (~985.28 trips)
tourism %>%
group_by(Region, Purpose) %>%
summarise(avg_trips = mean(Trips, na.rm = TRUE)) %>%
arrange(desc(avg_trips))
## Warning: Current temporal ordering may yield unexpected results.
## ℹ Suggest to sort by `Region`, `Purpose`, `Quarter` first.
## Current temporal ordering may yield unexpected results.
## ℹ Suggest to sort by `Region`, `Purpose`, `Quarter` first.
## Current temporal ordering may yield unexpected results.
## ℹ Suggest to sort by `Region`, `Purpose`, `Quarter` first.
tourism_state <- tourism %>%
group_by(State) %>%
summarise(Trips = sum(Trips, na.rm = TRUE))
tourism_state
tourism_state <- tourism %>%
group_by(State) %>%
summarise(Trips = sum(Trips, na.rm = TRUE))%>%
arrange(desc(Trips))
## Warning: Current temporal ordering may yield unexpected results.
## ℹ Suggest to sort by `State`, `Quarter` first.
head(tourism_state,10)
## Warning: Current temporal ordering may yield unexpected results.
## ℹ Suggest to sort by `State`, `Quarter` first.
## Current temporal ordering may yield unexpected results.
## ℹ Suggest to sort by `State`, `Quarter` first.
New South Wales is the state with the most total trips.
head(us_employment)
?us_employment
us_emp <- us_employment %>%
filter(Title == "Total Private")
autoplot(us_emp, Employed)
gg_season(us_emp, Employed)
## Warning: `gg_season()` was deprecated in feasts 0.4.2.
## ℹ Please use `ggtime::gg_season()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
gg_subseries(us_emp, Employed)
## Warning: `gg_subseries()` was deprecated in feasts 0.4.2.
## ℹ Please use `ggtime::gg_subseries()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
gg_lag(us_emp, Employed)
## Warning: `gg_lag()` was deprecated in feasts 0.4.2.
## ℹ Please use `ggtime::gg_lag()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
us_emp %>% ACF(Employed) %>% autoplot()
autoplot(aus_production, Bricks)
## Warning: Removed 20 rows containing missing values or values outside the scale range
## (`geom_line()`).
gg_season(aus_production, Bricks)
## Warning: Removed 20 rows containing missing values or values outside the scale range
## (`geom_line()`).
gg_subseries(aus_production, Bricks)
## Warning: Removed 5 rows containing missing values or values outside the scale range
## (`geom_line()`).
gg_lag(aus_production, Bricks)
## Warning: Removed 20 rows containing missing values (gg_lag).
aus_production %>% ACF(Bricks) %>% autoplot()
autoplot(pelt, Hare)
gg_lag(pelt, Hare)
pelt %>% ACF(Hare) %>% autoplot()
# gg_season() requires data with a known seasonal period and the pelt dataset has no seasonal structure
# gg_season(pelt, Hare)
## gg_subseries() also won't work because since the data is annual there are no repeating sub-periods within a year.
## gg_subseries(pelt, Hare)
?PBS
pbs_h02_filt <- PBS %>%
filter(ATC2 == "H02")
head(pbs_h02_filt)
pbs_h02 <- PBS %>%
filter(ATC2 == "H02") %>%
summarise(Cost = sum(Cost))
autoplot(pbs_h02, Cost)
gg_season(pbs_h02, Cost)
gg_subseries(pbs_h02, Cost)
gg_lag(pbs_h02, Cost)
pbs_h02 %>% ACF(Cost) %>% autoplot()
# H02 Cost (PBS)
?us_gasoline
autoplot(us_gasoline, Barrels)
gg_season(us_gasoline, Barrels)
gg_subseries(us_gasoline, Barrels)
gg_lag(us_gasoline, Barrels)
us_gasoline %>% ACF(Barrels) %>% autoplot()