Exercise 2.1

library("fpp3")
## Exploring datasets

# explore aus_production dataset
?aus_production

# explore pelt dataset
?pelt

# explore gafa_stock dataset
?gafa_stock

# explore vic_elec dataset
?vic_elec

From the above R documentation pages:

library(scales)
## Time plot for each series. 

# aus_production 
aus_production %>%
  autoplot(Bricks) +
  labs(
    title = "Australian Brick Production", # add plot title
    x = "Quarter", # add x-axis title
    y = "Millions of Bricks") + # add y-axis title
  theme(
    plot.title = element_text(hjust=0.5, face = "bold") # center and bold plot title
    )

# pelt
pelt %>%
  autoplot(Lynx) +
  labs(
    title = "Canadian Lynx Pelt Trading", # add plot title
    x = "Year", # add x-axis title
    y = "Number of Canadian\n Lynx Pelts Traded") + # add y-axis title
  theme(
    plot.title = element_text(hjust=0.5, face = "bold"), # center and bold plot title
    axis.title.y = element_text(hjust=0.5) # center y-axis label
    )

# gafa_stock
gafa_stock %>%
  autoplot(Close) + # plot
  labs(
    title = "Closing Price of GAFA Stock", 
    x = "Day", 
    y = "USD"
    ) + # add title and x/y axis labels 
  scale_x_date(
    date_breaks = "6 months", 
    date_labels = "%Y-%m"
    ) + # add date breaks 
  scale_y_continuous(
    labels = comma # add commas to y axis
    ) + 
  theme(
    plot.title = element_text(hjust = 0.5, face = "bold"), # center and bold title
    axis.title.x = element_text(hjust = 0.5), # center x-axis label
    axis.title.y = element_text(hjust = 0.5), # center y-axis label
    axis.text.x = element_text(angle = 45, hjust=1)  # rotate x-axis ticks
  )

# vic_elec
vic_elec %>%
  autoplot(Demand) +
  labs(
    title = "Electricity Demand in Victoria, Australia", # add plot title
    x = "Time", # add x-axis title
    y = "Electricity Demand (MWh)") + # add y-axis title
  scale_x_datetime(date_breaks = "60 day", date_labels = "%b %d") + # add more tick marks on x-axis
  theme(
    plot.title = element_text(hjust=0.5, face = "bold"), # center and bold plot title
    axis.title.y = element_text(hjust=0.5), # center y-axis label
    axis.text.x = element_text(angle = 45, hjust=1) # rotate x-axis ticks
    )

Exercise 2.2

gafa_stock %>%
  group_by(Symbol) %>% # group by company stock symbol
  filter(Close == max(Close)) %>% # keep only the rows with the peak close
  select(Symbol, Date, Close) # select the Company stock name, date, and closing stock price. 
## # A tsibble: 4 x 3 [!]
## # Key:       Symbol [4]
## # Groups:    Symbol [4]
##   Symbol Date       Close
##   <chr>  <date>     <dbl>
## 1 AAPL   2018-10-03  232.
## 2 AMZN   2018-09-04 2040.
## 3 FB     2018-07-25  218.
## 4 GOOG   2018-07-26 1268.

Exercise 2.3

# read csv file and store in tute1
tute1 <- read.csv("/Users/kristinlussi/Documents/MSDS/DATA 624/tute1.csv")

# Convert data into time series --> using the code from textbook
mytimeseries <- tute1 |>
  mutate(Quarter = yearquarter(Quarter)) |>
  as_tsibble(index = Quarter)

# Construct time series plots of each of the three series --> using the code from the textbook
mytimeseries |>
  pivot_longer(-Quarter) |>
  ggplot(aes(x = Quarter, y = value, colour = name)) +
  geom_line() +
  facet_grid(name ~ ., scales = "free_y")

Checking to see what happens when you don’t use facet_grid in the plot code…

# Construct time series plots of each of the three series --> removing facet_grid
mytimeseries |>
  pivot_longer(-Quarter) |>
  ggplot(aes(x = Quarter, y = value, colour = name)) +
  geom_line()

It puts all three time series plots on the same graph.

Exercise 2.4

library(USgas)
library(ggplot2)

## Create tsibble from us_total
us_total_tsibble <- us_total %>%
  as_tsibble(index = year, key = state)

## Plot the annual natural gas consumption by state for the New England area 
## (comprising the states of Maine, Vermont, New Hampshire, Massachusetts, Connecticut and Rhode Island).

us_total_tsibble %>%
  filter(state %in% c("Maine", "Vermont", "New Hampshire", "Massachusetts", "Connecticut", "Rhode Island")) %>% # filter for the New England states
  ggplot(aes(x=year, y=y, color = state)) + # create ggplot
  geom_line() + # specify line plot
  labs(
    title = "Annual Natural Gas\nConsumption in New England", # add plot title
    x = "Year", # add x-axis title
    y = "Gas Consumption\n(Million Cubic Feet)", # add y-axis title
    color = "State" # adjsut legend title 
  ) +
  theme(
    plot.title = element_text(hjust = 0.5, face = "bold"), # center and bold plot title
    x.axis.title = element_text(hjust = 0.5), # center x axis title
    legend.title = element_text(hjust = 0.5, face = "bold") # center and bold legend title
  )

Exercise 2.5

Part A

library(readxl)

tourism <- read_excel("/Users/kristinlussi/Documents/MSDS/DATA 624/tourism.xlsx")

tourism_tsibble <- tourism %>%
  mutate(Quarter = yearquarter(Quarter)) %>% # convert to yearquarter object
  as_tsibble( # convert dataframe to tsibble
    index = Quarter, # Quarter is the time index
    key = c(Region, State, Purpose)) # use Region, State, and Purpose as keys

head(tourism_tsibble)
## # A tsibble: 6 x 5 [1Q]
## # Key:       Region, State, Purpose [1]
##   Quarter Region   State           Purpose  Trips
##     <qtr> <chr>    <chr>           <chr>    <dbl>
## 1 1998 Q1 Adelaide South Australia Business  135.
## 2 1998 Q2 Adelaide South Australia Business  110.
## 3 1998 Q3 Adelaide South Australia Business  166.
## 4 1998 Q4 Adelaide South Australia Business  127.
## 5 1999 Q1 Adelaide South Australia Business  137.
## 6 1999 Q2 Adelaide South Australia Business  200.

Part B

tourism_tsibble %>%
  group_by(Region, Purpose) %>% # group the tsibble by region and purpose 
  summarise(avg_trips = Trips) %>% # calculate the average trips %>%
  ungroup() %>%
  filter(avg_trips == max(avg_trips)) # keep only the max avg_trips
## # A tsibble: 1 x 4 [1Q]
## # Key:       Region, Purpose [1]
##   Region    Purpose  Quarter avg_trips
##   <chr>     <chr>      <qtr>     <dbl>
## 1 Melbourne Visiting 2017 Q4      985.

The combination with the maximum number of overnight trips on average is Melbourne and Visiting friends and relatives.

Part C

new_tsibble <- tourism %>%
  mutate(Quarter = yearquarter(Quarter)) %>% # convert to yearquarter object
  group_by(Quarter, State) %>% # group by Quarter and State
  summarise(Total_Trips = sum(Trips, na.rm = TRUE), .groups = "drop") %>%  # sum Trips across Region & Purpose
  as_tsibble( # convert dataframe to tsibble 
    index = Quarter, # Quarter is the time index
    key = State # select only State as the key
  )

head(new_tsibble)
## # A tsibble: 6 x 3 [1Q]
## # Key:       State [1]
##   Quarter State Total_Trips
##     <qtr> <chr>       <dbl>
## 1 1998 Q1 ACT          551.
## 2 1998 Q2 ACT          416.
## 3 1998 Q3 ACT          436.
## 4 1998 Q4 ACT          450.
## 5 1999 Q1 ACT          379.
## 6 1999 Q2 ACT          558.

Exercise 2.8

“Total Private”” employed from us_employment

# Time Plot
time_plot_employment <- us_employment %>% 
  filter(Title == "Total Private") %>% # filter for "Total Private"
  autoplot(Employed) + 
  labs(
    title = "Time Plot" # add plot title
  ) +
  theme(
    plot.title= element_text(hjust = 0.5, face = "bold"), # ceter and bold plot title
    y.axis.title = element_text(hjust = 0.5), # center y axis title
    x.axis.title = element_text(hjust = 0.5) # center x axis title
  )

# Seasonal Plot
seasonal_plot_employment <- us_employment %>%
  filter(Title == "Total Private") %>% # filter for "Total Private"
  gg_season(Employed) + # seasonal plot
  labs(
    title = "Seasonal Plot" # add plot title
  ) +
  theme(
    plot.title = element_text(hjust = 0.5, face = "bold") # center and bold plot title
  )

# Subseries Plot
subseries_plot_employment <-us_employment %>%
  filter(Title == "Total Private") %>% # filter for "Total Private"
  gg_subseries(Employed) + # subseries plot
  labs(
    title = "Subseries Plot" # add plot title
    ) +
  theme(
    plot.title = element_text(hjust = 0.5, face = "bold") # center and bold plot title 
  )

# Lag Plot
lag_plot_employment <- us_employment %>%
  filter(Title == "Total Private") %>% # filter for "Total Private"
  gg_lag(Employed) + # lag plot
  labs(
    title = "Lag Plot" # add plot title
  ) +
  theme(
    plot.title = element_text(hjust = 0.5, face = "bold"), # center and bold plot title 
    axis.text.x = element_text(angle = 45, hjust = 1) # rotate x-axis ticks so they're more visible 
  )

# Autocorrelation Plot
acf_plot_employment <- us_employment %>%
  filter(Title == "Total Private") %>% # filter for "Total Private"
  ACF(Employed, lag_max = 2000) %>% # ACF plot
  autoplot() +
  labs(
    title = "ACF Plot"
  ) +
  theme(
    plot.title = element_text(hjust = 0.5, face = "bold"), # center and bold plot title 
    axis.text.x = element_text(angle = 45, hjust = 1) # rotate x-axis ticks so they're more visible 
  )

# combine plots into one grid
library(gridExtra)

grid.arrange(
  time_plot_employment, 
  seasonal_plot_employment, 
  subseries_plot_employment, 
  lag_plot_employment, 
  acf_plot_employment,
  nrow = 2,
  ncol = 3
)

Can you spot any seasonality, cyclicity and trend? The time plot shows a clear upward long-term trend in employment.

What do you learn about the series? The series is dominated by the upward trend in employment. There are no strong seasonal trends, but the series does show cyclical dips associated with recessions.

What can you say about the seasonal patterns? There are no strong seasonal patterns by looking at the plots. The upward trend is very strong and overpowers any other patterns that may be present.

Can you identify any unusual years? There are a few unusual years.

  • In the mid 1940s there was a dip on the time plot. There was a recession in 1945, so this dip makes sense.

  • Around 1983 there was a large dip on the time plot. There was a recession in the early 1980s, so this dip makes sense.

  • Around 2008-2010 there was a large dip on the time plot. There was a recession in 2008, so this dip makes sense.

Bricks from aus_production

# Time Plot
time_plot_bricks <- aus_production %>% 
  autoplot(Bricks) + 
  labs(
    title = "Time Plot" # add plot title
  ) +
  theme(
    plot.title= element_text(hjust = 0.5, face = "bold"), # ceter and bold plot title
    y.axis.title = element_text(hjust = 0.5), # center y axis title
    x.axis.title = element_text(hjust = 0.5) # center x axis title
  )

# Seasonal Plot
seasonal_plot_bricks <- aus_production %>%
  gg_season(Bricks) + # seasonal plot
  labs(
    title = "Seasonal Plot" # add plot title
  ) +
  theme(
    plot.title = element_text(hjust = 0.5, face = "bold") # center and bold plot title
  )

# Subseries Plot
subseries_plot_bricks <- aus_production %>%
  gg_subseries(Bricks) + # subseries plot
  labs(
    title = "Subseries Plot" # add plot title
    ) +
  theme(
    plot.title = element_text(hjust = 0.5, face = "bold") # center and bold plot title 
  )

# Lag Plot
lag_plot_bricks <- aus_production %>%
  gg_lag(Bricks) + # lag plot
  labs(
    title = "Lag Plot" # add plot title
  ) +
  theme(
    plot.title = element_text(hjust = 0.5, face = "bold"), # center and bold plot title 
    axis.text.x = element_text(angle = 45, hjust = 1) # rotate x-axis ticks so they're more visible 
  )

# Autocorrelation Plot
acf_plot_bricks <- aus_production %>%
  ACF(Bricks, lag_max = 200) %>% # ACF plot
  autoplot()+
  labs(
    title = "ACF Plot"
  ) +
  theme(
    plot.title = element_text(hjust = 0.5, face = "bold"), # center and bold plot title 
    axis.text.x = element_text(angle = 45, hjust = 1) # rotate x-axis ticks so they're more visible 
  )

# combine plots into one grid
library(gridExtra)

grid.arrange(
  time_plot_bricks, 
  seasonal_plot_bricks, 
  subseries_plot_bricks, 
  lag_plot_bricks, 
  acf_plot_bricks,
  nrow = 2,
  ncol = 3
)

Can you spot any seasonality, cyclicity and trend? The brick production series exhibits a clear upward trend until the late 1970s, followed by a downward trend with more volatility. Strong quarterly seasonality is shown with the seasonal plot with higher production in Q2 and Q3. Additionally, there are some longer term cycles that may be tied to construction and economic activity.

What do you learn about the series? There is seasonality with this time series with higher production levels during Q2 and Q3. The series also has cyclical patterns tied to economic activity.

What can you say about the seasonal patterns? Production levels are consistently higher during Q2 and Q3 throughout the time series. This aligns with construction cycles, which are more active in the spring and summer.

Can you identify any unusual years? There are large dips in the following years:

  • Mid 1970s: there was a sharp decline in production, corresponding to a recession and possibly the energy crisis of the 1970s

  • Early 1980s: there was a sharp decline in production, corresponding to the recession

Hare from pelt

# Time Plot
time_plot_hare <- pelt %>% 
  autoplot(Hare) + 
  labs(
    title = "Time Plot" # add plot title
  ) +
  theme(
    plot.title= element_text(hjust = 0.5, face = "bold"), # ceter and bold plot title
    y.axis.title = element_text(hjust = 0.5), # center y axis title
    x.axis.title = element_text(hjust = 0.5) # center x axis title
  )

# Seasonal Plot
# No seasonal plot because the time interval is annual!

# Subseries Plot
subseries_plot_hare <- pelt %>%
  gg_subseries(Hare) + # subseries plot
  labs(
    title = "Subseries Plot" # add plot title
    ) +
  theme(
    plot.title = element_text(hjust = 0.5, face = "bold") # center and bold plot title 
  )

# Lag Plot
lag_plot_hare <- pelt %>%
  gg_lag(Hare) + # lag plot
  labs(
    title = "Lag Plot" # add plot title
  ) +
  theme(
    plot.title = element_text(hjust = 0.5, face = "bold"), # center and bold plot title 
    axis.text.x = element_text(angle = 45, hjust = 1) # rotate x-axis ticks so they're more visible 
  )

# Autocorrelation Plot
acf_plot_hare <- pelt %>%
  ACF(Hare) %>% # ACF plot
  autoplot() +
  labs(
    title = "ACF Plot"
  ) +
  theme(
    plot.title = element_text(hjust = 0.5, face = "bold"), # center and bold plot title 
    axis.text.x = element_text(angle = 45, hjust = 1) # rotate x-axis ticks so they're more visible 
  )

# combine plots into one grid
library(gridExtra)

grid.arrange(
  time_plot_hare, 
  subseries_plot_hare, 
  lag_plot_hare, 
  acf_plot_hare,
  nrow = 2,
  ncol = 2
)

Can you spot any seasonality, cyclicity and trend? Since this is annual data, there is no seasonality. The time plot shows no long-term upward or downward trends. The time plot, lag plot, and the ACF plot all indicate a cyclical trend.

  • Time Plot: Clearly shows a repeated up and down cycle of about 10 years.

  • Lag Plot: The looping patterns indicate nonlinear cyclical dependence.

  • ACF Plot: Alternating positive and negative spikes indicate a quasi-periodic cycle.

What do you learn about the series? This time series exhibits no long-term trend or seasonality, but shows a strong 10-year cyclical pattern.

What can you say about the seasonal patterns? There are no seasonal patterns since the time series interval is annual.

Can you identify any unusual years? There are two points on the time plot where the population reaches nearly zero.

  • Early 1860s

  • Early 1900s

“H02” Cost from PBS

# Time Plot
time_plot_cost <- PBS %>% 
  filter(ATC2 == "H02") %>% # filter for "Total H02 
  summarise(total_cost = sum(Cost)) %>% # calculate the total cost of H02 as there are 4 different subgroups
  autoplot(total_cost) + 
  labs(
    title = "Time Plot"# add plot title
  ) +
  theme(
    plot.title= element_text(hjust = 0.5, face = "bold"), # ceter and bold plot title
    y.axis.title = element_text(hjust = 0.5), # center y axis title
    x.axis.title = element_text(hjust = 0.5) # center x axis title
  )

# Seasonal Plot
seasonal_plot_cost <- PBS %>%
  filter(ATC2 == "H02") %>% # filter for "H02"
  summarise(total_cost = sum(Cost)) %>% # calculate the total cost of H02 as there are 4 different subgroups
  gg_season(total_cost) + # seasonal plot
  labs(
    title = "Seasonal Plot" # add plot title
  ) +
  theme(
    plot.title = element_text(hjust = 0.5, face = "bold") # center and bold plot title
  )

# Subseries Plot
subseries_plot_cost <- PBS %>%
  filter(ATC2 == "H02") %>% # filter for "H02"
  summarise(total_cost = sum(Cost)) %>% # calculate the total cost of H02 as there are 4 different subgroups
  gg_subseries(total_cost) + # subseries plot
  labs(
    title = "Subseries Plot" # add plot title
    ) +
  theme(
    plot.title = element_text(hjust = 0.5, face = "bold") # center and bold plot title 
  )

# Lag Plot
lag_plot_cost <- PBS %>%
  filter(ATC2 == "H02") %>% # filter for "H02"
  summarise(total_cost = sum(Cost)) %>% # calculate the total cost of H02 as there are 4 different subgroups
  gg_lag(total_cost) + # lag plot
  labs(
    title = "Lag Plot" # add plot title
  ) +
  theme(
    plot.title = element_text(hjust = 0.5, face = "bold"), # center and bold plot title 
    axis.text.x = element_text(angle = 45, hjust = 1) # rotate x-axis ticks so they're more visible 
  )

# Autocorrelation Plot
acf_plot_cost <- PBS %>%
  filter(ATC2 == "H02") %>% # filter for "H02" 
  summarise(total_cost = sum(Cost)) %>% # calculate the total cost of H02 as there are 4 different subgroups
  ACF(total_cost) %>% # ACF plot
  autoplot() +
  labs(
    title = "ACF Plot"
  ) +
  theme(
    plot.title = element_text(hjust = 0.5, face = "bold"), # center and bold plot title 
    axis.text.x = element_text(angle = 45, hjust = 1) # rotate x-axis ticks so they're more visible 
  )

# combine plots into one grid
library(gridExtra)

grid.arrange(
  time_plot_cost, 
  seasonal_plot_cost, 
  subseries_plot_cost, 
  lag_plot_cost, 
  acf_plot_cost,
  nrow = 2,
  ncol = 3
)

Can you spot any seasonality, cyclicity and trend? Looking at the time plot, there is a strong long-term upward trend. The seasonal plot shows a very strong seasonality trend. Each year has a sharp decrease from January to February and then climbs steadily throughout the year and finished high in December. There’s no obvious cyclicity from the plots.

What do you learn about the series? There is a strong seasonality to this series, as well as a long-term upward trend.

What can you say about the seasonal patterns? Year-over-year February/March are the months where the cost is the lowest, with costs increasing steadily throughout the year (April - November) and the peak being in December and January.

Can you identify any unusual years? It’s difficult to determine unusual years from these plots.

Barrels from us_gasoline

# Time Plot
time_plot_barrels <- us_gasoline %>% 
  autoplot(Barrels) + 
  labs(
    title = "Time Plot" # add plot title
  ) +
  theme(
    plot.title= element_text(hjust = 0.5, face = "bold"), # ceter and bold plot title
    y.axis.title = element_text(hjust = 0.5), # center y axis title
    x.axis.title = element_text(hjust = 0.5) # center x axis title
  )

# Seasonal Plot
seasonal_plot_barrels <- us_gasoline %>%
  gg_season(Barrels) + # seasonal plot
  labs(
    title = "Seasonal Plot" # add plot title
  ) +
  theme(
    plot.title = element_text(hjust = 0.5, face = "bold") # center and bold plot title
  )

# Subseries Plot
subseries_plot_barrels <- us_gasoline %>%
  gg_subseries(Barrels) + # subseries plot
  labs(
    title = "Subseries Plot" # add plot title
    ) +
  theme(
    plot.title = element_text(hjust = 0.5, face = "bold"), # center and bold plot title 
    axis.text.x = element_text(angle = 45, hjust = 1)
  )

# Lag Plot
lag_plot_barrels <- us_gasoline %>%
  gg_lag(Barrels) + # lag plot
  labs(
    title = "Lag Plot" # add plot title
  ) +
  theme(
    plot.title = element_text(hjust = 0.5, face = "bold"), # center and bold plot title 
    axis.text.x = element_text(angle = 45, hjust = 1) # rotate x-axis ticks so they're more visible 
  )

# Autocorrelation Plot
acf_plot_barrels <- us_gasoline %>%
  ACF(Barrels, lag_max = 1355) %>% # ACF plot
  autoplot()+
  labs(
    title = "ACF Plot"
  ) +
  theme(
    plot.title = element_text(hjust = 0.5, face = "bold"), # center and bold plot title 
    axis.text.x = element_text(angle = 45, hjust = 1) # rotate x-axis ticks so they're more visible 
  )

# combine plots into one grid
library(gridExtra)

grid.arrange(
  time_plot_barrels, 
  seasonal_plot_barrels, 
  subseries_plot_barrels, 
  lag_plot_barrels, 
  acf_plot_barrels,
  nrow = 2,
  ncol = 3
)

Can you spot any seasonality, cyclicity and trend? From the time plot, there is a strong long-term upward trend from 1990 through the mid 2000s, then it levels out/slightly declines towards the late 2000s. There is a strong seasonal trend shown on the seaonsal plot. The ACF’s slow decay and wavy shape reflect the persistence and a yearly seasonal cycle. There is no clear cyclicity.

What do you learn about the series? The series is highly persistent, shown by the the lag plot hugging the diagonal.

What can you say about the seasonal patterns? There is a seasonal trend with lows being January - March and October - December, rising phase being March - June, and the high being July - September.

Can you identify any unusual years? There is a larger than usual dip around 1995 and around 2000 on the time plot. The mid-2000s summers are unusually high.