library("fpp3")
## Exploring datasets
# explore aus_production dataset
?aus_production
# explore pelt dataset
?pelt
# explore gafa_stock dataset
?gafa_stock
# explore vic_elec dataset
?vic_elec
From the above R documentation pages:
The interval of aus_production is quarterly.
The interval of pelt is yearly between 1845 and 1935.
The interval of gafa_stock is irregular trading days between 2014-2018.
The interval of vic_elec is half-hourly.
library(scales)
## Time plot for each series.
# aus_production
aus_production %>%
autoplot(Bricks) +
labs(
title = "Australian Brick Production", # add plot title
x = "Quarter", # add x-axis title
y = "Millions of Bricks") + # add y-axis title
theme(
plot.title = element_text(hjust=0.5, face = "bold") # center and bold plot title
)
# pelt
pelt %>%
autoplot(Lynx) +
labs(
title = "Canadian Lynx Pelt Trading", # add plot title
x = "Year", # add x-axis title
y = "Number of Canadian\n Lynx Pelts Traded") + # add y-axis title
theme(
plot.title = element_text(hjust=0.5, face = "bold"), # center and bold plot title
axis.title.y = element_text(hjust=0.5) # center y-axis label
)
# gafa_stock
gafa_stock %>%
autoplot(Close) + # plot
labs(
title = "Closing Price of GAFA Stock",
x = "Day",
y = "USD"
) + # add title and x/y axis labels
scale_x_date(
date_breaks = "6 months",
date_labels = "%Y-%m"
) + # add date breaks
scale_y_continuous(
labels = comma # add commas to y axis
) +
theme(
plot.title = element_text(hjust = 0.5, face = "bold"), # center and bold title
axis.title.x = element_text(hjust = 0.5), # center x-axis label
axis.title.y = element_text(hjust = 0.5), # center y-axis label
axis.text.x = element_text(angle = 45, hjust=1) # rotate x-axis ticks
)
# vic_elec
vic_elec %>%
autoplot(Demand) +
labs(
title = "Electricity Demand in Victoria, Australia", # add plot title
x = "Time", # add x-axis title
y = "Electricity Demand (MWh)") + # add y-axis title
scale_x_datetime(date_breaks = "60 day", date_labels = "%b %d") + # add more tick marks on x-axis
theme(
plot.title = element_text(hjust=0.5, face = "bold"), # center and bold plot title
axis.title.y = element_text(hjust=0.5), # center y-axis label
axis.text.x = element_text(angle = 45, hjust=1) # rotate x-axis ticks
)
gafa_stock %>%
group_by(Symbol) %>% # group by company stock symbol
filter(Close == max(Close)) %>% # keep only the rows with the peak close
select(Symbol, Date, Close) # select the Company stock name, date, and closing stock price.
## # A tsibble: 4 x 3 [!]
## # Key: Symbol [4]
## # Groups: Symbol [4]
## Symbol Date Close
## <chr> <date> <dbl>
## 1 AAPL 2018-10-03 232.
## 2 AMZN 2018-09-04 2040.
## 3 FB 2018-07-25 218.
## 4 GOOG 2018-07-26 1268.
# read csv file and store in tute1
tute1 <- read.csv("/Users/kristinlussi/Documents/MSDS/DATA 624/tute1.csv")
# Convert data into time series --> using the code from textbook
mytimeseries <- tute1 |>
mutate(Quarter = yearquarter(Quarter)) |>
as_tsibble(index = Quarter)
# Construct time series plots of each of the three series --> using the code from the textbook
mytimeseries |>
pivot_longer(-Quarter) |>
ggplot(aes(x = Quarter, y = value, colour = name)) +
geom_line() +
facet_grid(name ~ ., scales = "free_y")
Checking to see what happens when you don’t use facet_grid in the plot code…
# Construct time series plots of each of the three series --> removing facet_grid
mytimeseries |>
pivot_longer(-Quarter) |>
ggplot(aes(x = Quarter, y = value, colour = name)) +
geom_line()
It puts all three time series plots on the same graph.
library(USgas)
library(ggplot2)
## Create tsibble from us_total
us_total_tsibble <- us_total %>%
as_tsibble(index = year, key = state)
## Plot the annual natural gas consumption by state for the New England area
## (comprising the states of Maine, Vermont, New Hampshire, Massachusetts, Connecticut and Rhode Island).
us_total_tsibble %>%
filter(state %in% c("Maine", "Vermont", "New Hampshire", "Massachusetts", "Connecticut", "Rhode Island")) %>% # filter for the New England states
ggplot(aes(x=year, y=y, color = state)) + # create ggplot
geom_line() + # specify line plot
labs(
title = "Annual Natural Gas\nConsumption in New England", # add plot title
x = "Year", # add x-axis title
y = "Gas Consumption\n(Million Cubic Feet)", # add y-axis title
color = "State" # adjsut legend title
) +
theme(
plot.title = element_text(hjust = 0.5, face = "bold"), # center and bold plot title
x.axis.title = element_text(hjust = 0.5), # center x axis title
legend.title = element_text(hjust = 0.5, face = "bold") # center and bold legend title
)
library(readxl)
tourism <- read_excel("/Users/kristinlussi/Documents/MSDS/DATA 624/tourism.xlsx")
tourism_tsibble <- tourism %>%
mutate(Quarter = yearquarter(Quarter)) %>% # convert to yearquarter object
as_tsibble( # convert dataframe to tsibble
index = Quarter, # Quarter is the time index
key = c(Region, State, Purpose)) # use Region, State, and Purpose as keys
head(tourism_tsibble)
## # A tsibble: 6 x 5 [1Q]
## # Key: Region, State, Purpose [1]
## Quarter Region State Purpose Trips
## <qtr> <chr> <chr> <chr> <dbl>
## 1 1998 Q1 Adelaide South Australia Business 135.
## 2 1998 Q2 Adelaide South Australia Business 110.
## 3 1998 Q3 Adelaide South Australia Business 166.
## 4 1998 Q4 Adelaide South Australia Business 127.
## 5 1999 Q1 Adelaide South Australia Business 137.
## 6 1999 Q2 Adelaide South Australia Business 200.
tourism_tsibble %>%
group_by(Region, Purpose) %>% # group the tsibble by region and purpose
summarise(avg_trips = Trips) %>% # calculate the average trips %>%
ungroup() %>%
filter(avg_trips == max(avg_trips)) # keep only the max avg_trips
## # A tsibble: 1 x 4 [1Q]
## # Key: Region, Purpose [1]
## Region Purpose Quarter avg_trips
## <chr> <chr> <qtr> <dbl>
## 1 Melbourne Visiting 2017 Q4 985.
The combination with the maximum number of overnight trips on average is Melbourne and Visiting friends and relatives.
new_tsibble <- tourism %>%
mutate(Quarter = yearquarter(Quarter)) %>% # convert to yearquarter object
group_by(Quarter, State) %>% # group by Quarter and State
summarise(Total_Trips = sum(Trips, na.rm = TRUE), .groups = "drop") %>% # sum Trips across Region & Purpose
as_tsibble( # convert dataframe to tsibble
index = Quarter, # Quarter is the time index
key = State # select only State as the key
)
head(new_tsibble)
## # A tsibble: 6 x 3 [1Q]
## # Key: State [1]
## Quarter State Total_Trips
## <qtr> <chr> <dbl>
## 1 1998 Q1 ACT 551.
## 2 1998 Q2 ACT 416.
## 3 1998 Q3 ACT 436.
## 4 1998 Q4 ACT 450.
## 5 1999 Q1 ACT 379.
## 6 1999 Q2 ACT 558.
# Time Plot
time_plot_employment <- us_employment %>%
filter(Title == "Total Private") %>% # filter for "Total Private"
autoplot(Employed) +
labs(
title = "Time Plot" # add plot title
) +
theme(
plot.title= element_text(hjust = 0.5, face = "bold"), # ceter and bold plot title
y.axis.title = element_text(hjust = 0.5), # center y axis title
x.axis.title = element_text(hjust = 0.5) # center x axis title
)
# Seasonal Plot
seasonal_plot_employment <- us_employment %>%
filter(Title == "Total Private") %>% # filter for "Total Private"
gg_season(Employed) + # seasonal plot
labs(
title = "Seasonal Plot" # add plot title
) +
theme(
plot.title = element_text(hjust = 0.5, face = "bold") # center and bold plot title
)
# Subseries Plot
subseries_plot_employment <-us_employment %>%
filter(Title == "Total Private") %>% # filter for "Total Private"
gg_subseries(Employed) + # subseries plot
labs(
title = "Subseries Plot" # add plot title
) +
theme(
plot.title = element_text(hjust = 0.5, face = "bold") # center and bold plot title
)
# Lag Plot
lag_plot_employment <- us_employment %>%
filter(Title == "Total Private") %>% # filter for "Total Private"
gg_lag(Employed) + # lag plot
labs(
title = "Lag Plot" # add plot title
) +
theme(
plot.title = element_text(hjust = 0.5, face = "bold"), # center and bold plot title
axis.text.x = element_text(angle = 45, hjust = 1) # rotate x-axis ticks so they're more visible
)
# Autocorrelation Plot
acf_plot_employment <- us_employment %>%
filter(Title == "Total Private") %>% # filter for "Total Private"
ACF(Employed, lag_max = 2000) %>% # ACF plot
autoplot() +
labs(
title = "ACF Plot"
) +
theme(
plot.title = element_text(hjust = 0.5, face = "bold"), # center and bold plot title
axis.text.x = element_text(angle = 45, hjust = 1) # rotate x-axis ticks so they're more visible
)
# combine plots into one grid
library(gridExtra)
grid.arrange(
time_plot_employment,
seasonal_plot_employment,
subseries_plot_employment,
lag_plot_employment,
acf_plot_employment,
nrow = 2,
ncol = 3
)
Can you spot any seasonality, cyclicity and trend? The time plot shows a clear upward long-term trend in employment.
What do you learn about the series? The series is dominated by the upward trend in employment. There are no strong seasonal trends, but the series does show cyclical dips associated with recessions.
What can you say about the seasonal patterns? There are no strong seasonal patterns by looking at the plots. The upward trend is very strong and overpowers any other patterns that may be present.
Can you identify any unusual years? There are a few unusual years.
In the mid 1940s there was a dip on the time plot. There was a recession in 1945, so this dip makes sense.
Around 1983 there was a large dip on the time plot. There was a recession in the early 1980s, so this dip makes sense.
Around 2008-2010 there was a large dip on the time plot. There was a recession in 2008, so this dip makes sense.
# Time Plot
time_plot_bricks <- aus_production %>%
autoplot(Bricks) +
labs(
title = "Time Plot" # add plot title
) +
theme(
plot.title= element_text(hjust = 0.5, face = "bold"), # ceter and bold plot title
y.axis.title = element_text(hjust = 0.5), # center y axis title
x.axis.title = element_text(hjust = 0.5) # center x axis title
)
# Seasonal Plot
seasonal_plot_bricks <- aus_production %>%
gg_season(Bricks) + # seasonal plot
labs(
title = "Seasonal Plot" # add plot title
) +
theme(
plot.title = element_text(hjust = 0.5, face = "bold") # center and bold plot title
)
# Subseries Plot
subseries_plot_bricks <- aus_production %>%
gg_subseries(Bricks) + # subseries plot
labs(
title = "Subseries Plot" # add plot title
) +
theme(
plot.title = element_text(hjust = 0.5, face = "bold") # center and bold plot title
)
# Lag Plot
lag_plot_bricks <- aus_production %>%
gg_lag(Bricks) + # lag plot
labs(
title = "Lag Plot" # add plot title
) +
theme(
plot.title = element_text(hjust = 0.5, face = "bold"), # center and bold plot title
axis.text.x = element_text(angle = 45, hjust = 1) # rotate x-axis ticks so they're more visible
)
# Autocorrelation Plot
acf_plot_bricks <- aus_production %>%
ACF(Bricks, lag_max = 200) %>% # ACF plot
autoplot()+
labs(
title = "ACF Plot"
) +
theme(
plot.title = element_text(hjust = 0.5, face = "bold"), # center and bold plot title
axis.text.x = element_text(angle = 45, hjust = 1) # rotate x-axis ticks so they're more visible
)
# combine plots into one grid
library(gridExtra)
grid.arrange(
time_plot_bricks,
seasonal_plot_bricks,
subseries_plot_bricks,
lag_plot_bricks,
acf_plot_bricks,
nrow = 2,
ncol = 3
)
Can you spot any seasonality, cyclicity and trend? The brick production series exhibits a clear upward trend until the late 1970s, followed by a downward trend with more volatility. Strong quarterly seasonality is shown with the seasonal plot with higher production in Q2 and Q3. Additionally, there are some longer term cycles that may be tied to construction and economic activity.
What do you learn about the series? There is seasonality with this time series with higher production levels during Q2 and Q3. The series also has cyclical patterns tied to economic activity.
What can you say about the seasonal patterns? Production levels are consistently higher during Q2 and Q3 throughout the time series. This aligns with construction cycles, which are more active in the spring and summer.
Can you identify any unusual years? There are large dips in the following years:
Mid 1970s: there was a sharp decline in production, corresponding to a recession and possibly the energy crisis of the 1970s
Early 1980s: there was a sharp decline in production, corresponding to the recession
# Time Plot
time_plot_hare <- pelt %>%
autoplot(Hare) +
labs(
title = "Time Plot" # add plot title
) +
theme(
plot.title= element_text(hjust = 0.5, face = "bold"), # ceter and bold plot title
y.axis.title = element_text(hjust = 0.5), # center y axis title
x.axis.title = element_text(hjust = 0.5) # center x axis title
)
# Seasonal Plot
# No seasonal plot because the time interval is annual!
# Subseries Plot
subseries_plot_hare <- pelt %>%
gg_subseries(Hare) + # subseries plot
labs(
title = "Subseries Plot" # add plot title
) +
theme(
plot.title = element_text(hjust = 0.5, face = "bold") # center and bold plot title
)
# Lag Plot
lag_plot_hare <- pelt %>%
gg_lag(Hare) + # lag plot
labs(
title = "Lag Plot" # add plot title
) +
theme(
plot.title = element_text(hjust = 0.5, face = "bold"), # center and bold plot title
axis.text.x = element_text(angle = 45, hjust = 1) # rotate x-axis ticks so they're more visible
)
# Autocorrelation Plot
acf_plot_hare <- pelt %>%
ACF(Hare) %>% # ACF plot
autoplot() +
labs(
title = "ACF Plot"
) +
theme(
plot.title = element_text(hjust = 0.5, face = "bold"), # center and bold plot title
axis.text.x = element_text(angle = 45, hjust = 1) # rotate x-axis ticks so they're more visible
)
# combine plots into one grid
library(gridExtra)
grid.arrange(
time_plot_hare,
subseries_plot_hare,
lag_plot_hare,
acf_plot_hare,
nrow = 2,
ncol = 2
)
Can you spot any seasonality, cyclicity and trend? Since this is annual data, there is no seasonality. The time plot shows no long-term upward or downward trends. The time plot, lag plot, and the ACF plot all indicate a cyclical trend.
Time Plot: Clearly shows a repeated up and down cycle of about 10 years.
Lag Plot: The looping patterns indicate nonlinear cyclical dependence.
ACF Plot: Alternating positive and negative spikes indicate a quasi-periodic cycle.
What do you learn about the series? This time series exhibits no long-term trend or seasonality, but shows a strong 10-year cyclical pattern.
What can you say about the seasonal patterns? There are no seasonal patterns since the time series interval is annual.
Can you identify any unusual years? There are two points on the time plot where the population reaches nearly zero.
Early 1860s
Early 1900s
# Time Plot
time_plot_cost <- PBS %>%
filter(ATC2 == "H02") %>% # filter for "Total H02
summarise(total_cost = sum(Cost)) %>% # calculate the total cost of H02 as there are 4 different subgroups
autoplot(total_cost) +
labs(
title = "Time Plot"# add plot title
) +
theme(
plot.title= element_text(hjust = 0.5, face = "bold"), # ceter and bold plot title
y.axis.title = element_text(hjust = 0.5), # center y axis title
x.axis.title = element_text(hjust = 0.5) # center x axis title
)
# Seasonal Plot
seasonal_plot_cost <- PBS %>%
filter(ATC2 == "H02") %>% # filter for "H02"
summarise(total_cost = sum(Cost)) %>% # calculate the total cost of H02 as there are 4 different subgroups
gg_season(total_cost) + # seasonal plot
labs(
title = "Seasonal Plot" # add plot title
) +
theme(
plot.title = element_text(hjust = 0.5, face = "bold") # center and bold plot title
)
# Subseries Plot
subseries_plot_cost <- PBS %>%
filter(ATC2 == "H02") %>% # filter for "H02"
summarise(total_cost = sum(Cost)) %>% # calculate the total cost of H02 as there are 4 different subgroups
gg_subseries(total_cost) + # subseries plot
labs(
title = "Subseries Plot" # add plot title
) +
theme(
plot.title = element_text(hjust = 0.5, face = "bold") # center and bold plot title
)
# Lag Plot
lag_plot_cost <- PBS %>%
filter(ATC2 == "H02") %>% # filter for "H02"
summarise(total_cost = sum(Cost)) %>% # calculate the total cost of H02 as there are 4 different subgroups
gg_lag(total_cost) + # lag plot
labs(
title = "Lag Plot" # add plot title
) +
theme(
plot.title = element_text(hjust = 0.5, face = "bold"), # center and bold plot title
axis.text.x = element_text(angle = 45, hjust = 1) # rotate x-axis ticks so they're more visible
)
# Autocorrelation Plot
acf_plot_cost <- PBS %>%
filter(ATC2 == "H02") %>% # filter for "H02"
summarise(total_cost = sum(Cost)) %>% # calculate the total cost of H02 as there are 4 different subgroups
ACF(total_cost) %>% # ACF plot
autoplot() +
labs(
title = "ACF Plot"
) +
theme(
plot.title = element_text(hjust = 0.5, face = "bold"), # center and bold plot title
axis.text.x = element_text(angle = 45, hjust = 1) # rotate x-axis ticks so they're more visible
)
# combine plots into one grid
library(gridExtra)
grid.arrange(
time_plot_cost,
seasonal_plot_cost,
subseries_plot_cost,
lag_plot_cost,
acf_plot_cost,
nrow = 2,
ncol = 3
)
Can you spot any seasonality, cyclicity and trend? Looking at the time plot, there is a strong long-term upward trend. The seasonal plot shows a very strong seasonality trend. Each year has a sharp decrease from January to February and then climbs steadily throughout the year and finished high in December. There’s no obvious cyclicity from the plots.
What do you learn about the series? There is a strong seasonality to this series, as well as a long-term upward trend.
What can you say about the seasonal patterns? Year-over-year February/March are the months where the cost is the lowest, with costs increasing steadily throughout the year (April - November) and the peak being in December and January.
Can you identify any unusual years? It’s difficult to determine unusual years from these plots.
# Time Plot
time_plot_barrels <- us_gasoline %>%
autoplot(Barrels) +
labs(
title = "Time Plot" # add plot title
) +
theme(
plot.title= element_text(hjust = 0.5, face = "bold"), # ceter and bold plot title
y.axis.title = element_text(hjust = 0.5), # center y axis title
x.axis.title = element_text(hjust = 0.5) # center x axis title
)
# Seasonal Plot
seasonal_plot_barrels <- us_gasoline %>%
gg_season(Barrels) + # seasonal plot
labs(
title = "Seasonal Plot" # add plot title
) +
theme(
plot.title = element_text(hjust = 0.5, face = "bold") # center and bold plot title
)
# Subseries Plot
subseries_plot_barrels <- us_gasoline %>%
gg_subseries(Barrels) + # subseries plot
labs(
title = "Subseries Plot" # add plot title
) +
theme(
plot.title = element_text(hjust = 0.5, face = "bold"), # center and bold plot title
axis.text.x = element_text(angle = 45, hjust = 1)
)
# Lag Plot
lag_plot_barrels <- us_gasoline %>%
gg_lag(Barrels) + # lag plot
labs(
title = "Lag Plot" # add plot title
) +
theme(
plot.title = element_text(hjust = 0.5, face = "bold"), # center and bold plot title
axis.text.x = element_text(angle = 45, hjust = 1) # rotate x-axis ticks so they're more visible
)
# Autocorrelation Plot
acf_plot_barrels <- us_gasoline %>%
ACF(Barrels, lag_max = 1355) %>% # ACF plot
autoplot()+
labs(
title = "ACF Plot"
) +
theme(
plot.title = element_text(hjust = 0.5, face = "bold"), # center and bold plot title
axis.text.x = element_text(angle = 45, hjust = 1) # rotate x-axis ticks so they're more visible
)
# combine plots into one grid
library(gridExtra)
grid.arrange(
time_plot_barrels,
seasonal_plot_barrels,
subseries_plot_barrels,
lag_plot_barrels,
acf_plot_barrels,
nrow = 2,
ncol = 3
)
Can you spot any seasonality, cyclicity and trend? From the time plot, there is a strong long-term upward trend from 1990 through the mid 2000s, then it levels out/slightly declines towards the late 2000s. There is a strong seasonal trend shown on the seaonsal plot. The ACF’s slow decay and wavy shape reflect the persistence and a yearly seasonal cycle. There is no clear cyclicity.
What do you learn about the series? The series is highly persistent, shown by the the lag plot hugging the diagonal.
What can you say about the seasonal patterns? There is a seasonal trend with lows being January - March and October - December, rising phase being March - June, and the high being July - September.
Can you identify any unusual years? There is a larger than usual dip around 1995 and around 2000 on the time plot. The mid-2000s summers are unusually high.