# DO NOT FORGET TO CALL THESE 3 packages FIRST
library(fpp3) # forecasting package
## Registered S3 method overwritten by 'tsibble':
## method from
## as_tibble.grouped_df dplyr
## ── Attaching packages ──────────────────────────────────────────── fpp3 1.0.1 ──
## ✔ tibble 3.2.1 ✔ tsibble 1.1.5
## ✔ dplyr 1.1.4 ✔ tsibbledata 0.4.1
## ✔ tidyr 1.3.1 ✔ feasts 0.4.1
## ✔ lubridate 1.9.3 ✔ fable 0.4.0
## ✔ ggplot2 3.5.1
## ── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ──
## ✖ lubridate::date() masks base::date()
## ✖ dplyr::filter() masks stats::filter()
## ✖ tsibble::intersect() masks base::intersect()
## ✖ tsibble::interval() masks lubridate::interval()
## ✖ dplyr::lag() masks stats::lag()
## ✖ tsibble::setdiff() masks base::setdiff()
## ✖ tsibble::union() masks base::union()
library(tidyverse) # graphs and tidy
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.0 ✔ readr 2.1.5
## ✔ purrr 1.0.2 ✔ stringr 1.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ tsibble::interval() masks lubridate::interval()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(readxl) # reading excel data
# download the data from the web page.
download.file("http://OTexts.com/fpp3/extrafiles/tourism.xlsx",
tourism_file <- tempfile())
# reads the downloaded tourism data by excel
my_tourism <- readxl::read_excel("C:/Users/vishe/Downloads/tourism.xlsx")
## Your turn: create a tsibble format of the data below, and rename it as my_tourism:
# A1.Answer:
my_tourism <- my_tourism %>%
mutate(Quarter = yearquarter(Quarter)) %>%
as_tsibble(index = Quarter, key = c(Region, State, Purpose))
# View the tsibble
print(my_tourism)
## # A tsibble: 24,320 x 5 [1Q]
## # Key: Region, State, Purpose [304]
## Quarter Region State Purpose Trips
## <qtr> <chr> <chr> <chr> <dbl>
## 1 1998 Q1 Adelaide South Australia Business 135.
## 2 1998 Q2 Adelaide South Australia Business 110.
## 3 1998 Q3 Adelaide South Australia Business 166.
## 4 1998 Q4 Adelaide South Australia Business 127.
## 5 1999 Q1 Adelaide South Australia Business 137.
## 6 1999 Q2 Adelaide South Australia Business 200.
## 7 1999 Q3 Adelaide South Australia Business 169.
## 8 1999 Q4 Adelaide South Australia Business 134.
## 9 2000 Q1 Adelaide South Australia Business 154.
## 10 2000 Q2 Adelaide South Australia Business 169.
## # ℹ 24,310 more rows
# A2.Answer:
is_quarterly <- !is.null(as_tsibble(my_tourism)$index)
## Warning: Unknown or uninitialised column: `index`.
start_year <- min(year(as_tsibble(my_tourism)$Quarter))
# part-1 :Check if data is quarterly
if (is_quarterly) {
print("The data is quarterly.")
} else {
print("The data is annual.")
}
## [1] "The data is annual."
# Print the starting year
print(paste("The data starts from:", start_year))
## [1] "The data starts from: 1998"
# part-2: Cross-table of 'State' and 'Purpose'
state_purpose_table <- table(my_tourism$State, my_tourism$Purpose)
# Print the cross-table
print(state_purpose_table)
##
## Business Holiday Other Visiting
## ACT 80 80 80 80
## New South Wales 1040 1040 1040 1040
## Northern Territory 560 560 560 560
## Queensland 960 960 960 960
## South Australia 960 960 960 960
## Tasmania 400 400 400 400
## Victoria 1680 1680 1680 1680
## Western Australia 400 400 400 400
# Count the unique values in 'State' and 'Purpose'
num_states <- n_distinct(my_tourism$State)
num_purposes <- n_distinct(my_tourism$Purpose)
# Print the results
print(paste("There are", num_states, "states in the data."))
## [1] "There are 8 states in the data."
print(paste("There are", num_purposes, "trip purpose categories in the data."))
## [1] "There are 4 trip purpose categories in the data."
#part-3:
# Group the data by 'Region' and 'Purpose'
grouped_data <- my_tourism %>%
as_tibble() %>% # Convert to a tibble to remove the tsibble properties (time effect)
group_by(Region, Purpose)
# View the grouped data
print(grouped_data)
## # A tibble: 24,320 × 5
## # Groups: Region, Purpose [304]
## Quarter Region State Purpose Trips
## <qtr> <chr> <chr> <chr> <dbl>
## 1 1998 Q1 Adelaide South Australia Business 135.
## 2 1998 Q2 Adelaide South Australia Business 110.
## 3 1998 Q3 Adelaide South Australia Business 166.
## 4 1998 Q4 Adelaide South Australia Business 127.
## 5 1999 Q1 Adelaide South Australia Business 137.
## 6 1999 Q2 Adelaide South Australia Business 200.
## 7 1999 Q3 Adelaide South Australia Business 169.
## 8 1999 Q4 Adelaide South Australia Business 134.
## 9 2000 Q1 Adelaide South Australia Business 154.
## 10 2000 Q2 Adelaide South Australia Business 169.
## # ℹ 24,310 more rows
#Part-4:
average_trips <- grouped_data %>%
summarise(Trips = mean(Trips, na.rm = TRUE))
## `summarise()` has grouped output by 'Region'. You can override using the
## `.groups` argument.
# View the summarized data
print(average_trips)
## # A tibble: 304 × 3
## # Groups: Region [76]
## Region Purpose Trips
## <chr> <chr> <dbl>
## 1 Adelaide Business 156.
## 2 Adelaide Holiday 157.
## 3 Adelaide Other 56.6
## 4 Adelaide Visiting 205.
## 5 Adelaide Hills Business 2.66
## 6 Adelaide Hills Holiday 10.5
## 7 Adelaide Hills Other 1.40
## 8 Adelaide Hills Visiting 14.2
## 9 Alice Springs Business 14.6
## 10 Alice Springs Holiday 31.9
## # ℹ 294 more rows
#part-5:
max_trips <- average_trips %>%
ungroup() %>%
filter(Trips == max(Trips, na.rm = TRUE)) %>%
slice(1) # Select only the first occurrence if there are ties
# Print the result
print(max_trips)
## # A tibble: 1 × 3
## Region Purpose Trips
## <chr> <chr> <dbl>
## 1 Sydney Visiting 747.
# A3.Answer:
state_tourism <- my_tourism %>%
as_tibble() %>% # Convert to tibble to simplify manipulation
group_by(State, Quarter) %>% # Group by State and Quarter to retain the time index
summarise(Trips = sum(Trips, na.rm = TRUE), .groups = "drop") %>% # Sum the trips for each state
as_tsibble(index = Quarter, key = State) # Convert back to tsibble with Quarter as the time index
# Print the new tsibble
print(state_tourism)
## # A tsibble: 640 x 3 [1Q]
## # Key: State [8]
## State Quarter Trips
## <chr> <qtr> <dbl>
## 1 ACT 1998 Q1 551.
## 2 ACT 1998 Q2 416.
## 3 ACT 1998 Q3 436.
## 4 ACT 1998 Q4 450.
## 5 ACT 1999 Q1 379.
## 6 ACT 1999 Q2 558.
## 7 ACT 1999 Q3 449.
## 8 ACT 1999 Q4 595.
## 9 ACT 2000 Q1 600.
## 10 ACT 2000 Q2 557.
## # ℹ 630 more rows
# Bricks from aus_production
# Lynx from pelt
# Close from gafa_stock
# Demand from vic_elec
# B1.Answer:
# Required libraries
library(fpp3)
# B1: Plotting the Time Series
# 1. Bricks from aus_production
bricks_plot <- aus_production %>%
autoplot(Bricks) +
ggtitle("Australian Brick Production") +
ylab("Bricks Produced (millions)")
# 2. Lynx from pelt
lynx_plot <- pelt %>%
autoplot(Lynx) +
ggtitle("Annual Lynx Trappings") +
ylab("Number of Lynx Trapped")
# 3. Close from gafa_stock
gafa_stock_plot <- gafa_stock %>%
filter(Symbol == "GOOG") %>% # Example: Plotting Google's stock prices
autoplot(Close) +
ggtitle("Google Stock Closing Prices") +
ylab("Closing Price (USD)")
# 4. Demand from vic_elec
vic_elec_plot <- vic_elec %>%
autoplot(Demand) +
ggtitle("Electricity Demand in Victoria") +
ylab("Demand (MW)")
# Display the plots
print(bricks_plot)
## Warning: Removed 20 rows containing missing values or values outside the scale range
## (`geom_line()`).
print(lynx_plot)
print(gafa_stock_plot)
print(vic_elec_plot)
snowy <- my_tourism %>%
filter(Region == "Snowy Mountains")
# View the filtered dataset
print(snowy)
## # A tsibble: 320 x 5 [1Q]
## # Key: Region, State, Purpose [4]
## Quarter Region State Purpose Trips
## <qtr> <chr> <chr> <chr> <dbl>
## 1 1998 Q1 Snowy Mountains New South Wales Business 15.9
## 2 1998 Q2 Snowy Mountains New South Wales Business 20.3
## 3 1998 Q3 Snowy Mountains New South Wales Business 36.2
## 4 1998 Q4 Snowy Mountains New South Wales Business 9.15
## 5 1999 Q1 Snowy Mountains New South Wales Business 20.9
## 6 1999 Q2 Snowy Mountains New South Wales Business 33.3
## 7 1999 Q3 Snowy Mountains New South Wales Business 21.6
## 8 1999 Q4 Snowy Mountains New South Wales Business 8.45
## 9 2000 Q1 Snowy Mountains New South Wales Business 20.1
## 10 2000 Q2 Snowy Mountains New South Wales Business 15.2
## # ℹ 310 more rows
Question: Take snowy data. Then sums up all trips in State and Purpose by each quarter every year by using summarizer() commands. Then Use autoplot(), gg_season() and gg_subseries() to explore the quarterly trips of snowy data. What do you observe? What type of pattern do you see. Write your comment on Answer below:
# C2.Answer:
# 1. autoplot(): Plot the total trips over time for the "Snowy Mountains"
autoplot(snowy, Trips) +
ggtitle("Snowy Mountains - Total Trips Over Time") +
ylab("Number of Trips")
# 2. gg_season(): Visualizing seasonal patterns in the data
gg_season(snowy, Trips) +
ggtitle("Seasonal Plot of Trips in Snowy Mountains") +
ylab("Number of Trips")
# 3. gg_subseries(): Exploring subseries patterns across different quarters
gg_subseries(snowy, Trips) +
ggtitle("Subseries Plot of Trips in Snowy Mountains") +
ylab("Number of Trips")
observation: The autoplot() reveals strong seasonal patterns, with regular peaks likely in winter. The gg_season() plot confirms that trips spike in Q1, hinting at winter sports activity. The gg_subseries() further highlights consistent peaks within specific quarters, reinforcing the seasonal tourism trend.
# D1.Answer:
# gg_lag for Bricks
gg_lag(aus_production, Bricks, geom = "point") +
ggtitle("Lag Plot for Australian Brick Production")
## Warning: Removed 20 rows containing missing values (gg_lag).
# ACF for Bricks
aus_production %>%
ACF(Bricks) %>%
autoplot() +
ggtitle("ACF for Australian Brick Production")
# D1.2: Lynx from pelt
# gg_lag for Lynx
gg_lag(pelt, Lynx, geom = "point") +
ggtitle("Lag Plot for Lynx Trappings")
# ACF for Lynx
pelt %>%
ACF(Lynx) %>%
autoplot() +
ggtitle("ACF for Lynx Trappings")
# D1.3: Victorian Electricity Demand from Electricity Demand
# gg_lag for Demand
# gg_lag for Demand
gg_lag(vic_elec, Demand, geom = "point") +
ggtitle("Lag Plot for Victorian Electricity Demand")
# ACF for Demand
vic_elec %>%
ACF(Demand) %>%
autoplot() +
ggtitle("ACF for Victorian Electricity Demand")
observation : Observations for D1:
Bricks:
- gg_lag(): Indicates strong seasonal
correlation. - ACF(): Spikes every 4
quarters confirm seasonality and persistence.
Lynx:
- gg_lag(): Shows a cyclic pattern,
matching ecological cycles. - ACF(): Peaks
every 10 years indicate cyclic behavior.
Electricity Demand:
- gg_lag(): Highlights daily and weekly
patterns. - ACF(): Peaks at 24-hour
intervals confirm daily seasonality.
The lag plots (gg_lag()) and ACF plots for the Bricks data reveal: Seasonality: A clear repeating pattern at regular intervals, with significant peaks in the ACF plot every 4 quarters, highlighting a strong yearly seasonal component. Cyclicity: No indication of long-term cycles beyond the yearly seasonality, aligning with typical annual production patterns. Trend: The autoplot() shows some variations, but there’s no evidence of a sustained upward or downward trend. Production appears stable, mainly fluctuating due to seasonal changes.
Seasonality: Since the data is annual, the lag plot doesnt show a regular seasonal pattern typical of quarterly or monthly series. However, the ACF plot reveals periodic peaks, indicating some cyclic behavior, though not in a simple yearly cycle. Cyclicity: The ACF plot shows strong peaks roughly every 10 years, suggesting a cyclic pattern. This aligns with ecological data, where animal populations follow multi-year cycles due to factors like predator-prey dynamics. Trend: There’s no clear long-term trend in the Lynx data. The cyclic peaks remain fairly stable, indicating a repeating ecological cycle.
’’’ Seasonality: The lag plot reveals a clear daily pattern with consistent peaks, indicating strong daily seasonality. The ACF plot confirms this with spikes at 24-hour intervals.
Cyclicity: In addition to daily seasonality, the ACF plot hints at weekly cycles, likely reflecting higher demand on certain days, such as weekdays.
Trend: The autoplot() suggests a potential upward trend over time, possibly driven by factors like population growth and changes in usage habits. ’’’
goog <- gafa_stock %>% filter(Symbol == “GOOG”, year(Date) >= 2018) # Filter for Google and dates from 2018 onwards # Calculate the first difference dgoog <- goog %>% mutate(trading_day = row_number()) %>% # Create a sequential trading day index update_tsibble(index = trading_day, regular = TRUE) %>% # Update the tsibble with the new index mutate(diff = difference(Close)) # Calculate the first difference of the “Close” price # Display the first few rows of the result print(dgoog)
autoplot(dgoog, diff) + ggtitle(“First Difference of Google Stock Price (Close)”) + ylab(“Daily Change in Stock Price”)
dgoog %>% ACF(diff) %>% autoplot() + ggtitle(“ACF of First Difference (diff) of Google Stock Price”)
ljung_box_test <- dgoog %>% features(diff, ljung_box, lag = 10)
print(ljung_box_test)
Visual Inspection: If “diff” fluctuates randomly around a constant mean (zero) without trends or patterns, it may indicate white noise.
ACF Plot: Autocorrelation values within the 95% confidence interval suggest no significant autocorrelation, supporting the white noise hypothesis.
Ljung-Box Test: A high p-value (over 0.05) indicates no significant autocorrelation, suggesting the series is white noise.