# DO NOT FORGET TO CALL THESE 3 packages FIRST
library(fpp3) # forecasting package
## Warning: package 'fpp3' was built under R version 4.3.3
## Registered S3 method overwritten by 'tsibble':
## method from
## as_tibble.grouped_df dplyr
## ── Attaching packages ──────────────────────────────────────────── fpp3 1.0.1 ──
## ✔ tibble 3.2.1 ✔ tsibble 1.1.6
## ✔ dplyr 1.1.4 ✔ tsibbledata 0.4.1
## ✔ tidyr 1.3.1 ✔ feasts 0.4.1
## ✔ lubridate 1.9.3 ✔ fable 0.4.1
## ✔ ggplot2 3.5.0
## Warning: package 'tidyr' was built under R version 4.3.3
## Warning: package 'tsibble' was built under R version 4.3.3
## Warning: package 'tsibbledata' was built under R version 4.3.3
## Warning: package 'feasts' was built under R version 4.3.3
## Warning: package 'fabletools' was built under R version 4.3.3
## Warning: package 'fable' was built under R version 4.3.3
## ── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ──
## ✖ lubridate::date() masks base::date()
## ✖ dplyr::filter() masks stats::filter()
## ✖ tsibble::intersect() masks base::intersect()
## ✖ tsibble::interval() masks lubridate::interval()
## ✖ dplyr::lag() masks stats::lag()
## ✖ tsibble::setdiff() masks base::setdiff()
## ✖ tsibble::union() masks base::union()
library(tidyverse) # graphs and tidy
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.0 ✔ readr 2.1.5
## ✔ purrr 1.0.2 ✔ stringr 1.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ tsibble::interval() masks lubridate::interval()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(readxl)
# reading excel data
library(fpp3) # forecasting package
library(tidyverse) # graphs and tidy
library(readxl)
library(ggplot2)
# download the data from the web page.
download.file("http://OTexts.com/fpp3/extrafiles/tourism.xlsx",
tourism_file <- tempfile())
# reads the downloaded tourism data by excel
my_tourism <- read_excel("tourism.xlsx")
View(my_tourism)
## Your turn: create a tsibble format of the data below, and rename it as my_tourism:
# A1.Answer:
str(my_tourism)
## tibble [24,320 × 5] (S3: tbl_df/tbl/data.frame)
## $ Quarter: chr [1:24320] "1998-01-01" "1998-04-01" "1998-07-01" "1998-10-01" ...
## $ Region : chr [1:24320] "Adelaide" "Adelaide" "Adelaide" "Adelaide" ...
## $ State : chr [1:24320] "South Australia" "South Australia" "South Australia" "South Australia" ...
## $ Purpose: chr [1:24320] "Business" "Business" "Business" "Business" ...
## $ Trips : num [1:24320] 135 110 166 127 137 ...
my_tourism <- my_tourism %>%
mutate(Quarter = yearquarter(Quarter)) %>%
as_tsibble(index=Quarter, key = c(Region, State, Purpose))
my_tourism
## # A tsibble: 24,320 x 5 [1Q]
## # Key: Region, State, Purpose [304]
## Quarter Region State Purpose Trips
## <qtr> <chr> <chr> <chr> <dbl>
## 1 1998 Q1 Adelaide South Australia Business 135.
## 2 1998 Q2 Adelaide South Australia Business 110.
## 3 1998 Q3 Adelaide South Australia Business 166.
## 4 1998 Q4 Adelaide South Australia Business 127.
## 5 1999 Q1 Adelaide South Australia Business 137.
## 6 1999 Q2 Adelaide South Australia Business 200.
## 7 1999 Q3 Adelaide South Australia Business 169.
## 8 1999 Q4 Adelaide South Australia Business 134.
## 9 2000 Q1 Adelaide South Australia Business 154.
## 10 2000 Q2 Adelaide South Australia Business 169.
## # ℹ 24,310 more rows
# A2.Answer:
#### 1. Is it annual or quarterly data? Which year does it start from?
# Check the structure of the data
# Check the range of Quarter values
head(my_tourism$Quarter)
## <yearquarter[6]>
## [1] "1998 Q1" "1998 Q2" "1998 Q3" "1998 Q4" "1999 Q1" "1999 Q2"
## # Year starts on: January
#### 2. Use table(State,Purpose) command to see the cross-table. How many States are there in the data? How many Purpose of trips category?
# Create a cross-table
table(my_tourism$State, my_tourism$Purpose)
##
## Business Holiday Other Visiting
## ACT 80 80 80 80
## New South Wales 1040 1040 1040 1040
## Northern Territory 560 560 560 560
## Queensland 960 960 960 960
## South Australia 960 960 960 960
## Tasmania 400 400 400 400
## Victoria 1680 1680 1680 1680
## Western Australia 400 400 400 400
# Count number of unique States
num_states <- length(unique(my_tourism$State))
num_states
## [1] 8
# Count number of unique trip Purposes
num_purposes <- length(unique(my_tourism$Purpose))
num_purposes
## [1] 4
#### 3. Group the data by Region and Purpose by using group() command.To eliminate time effect, use tibble format. (i.e as_tibble() %>% group_by(Region, Purpose))
# Convert to tibble and group by Region and Purpose
grouped_data <- my_tourism %>%
as_tibble() %>%
group_by(Region, Purpose)
# Display first few rows
grouped_data
## # A tibble: 24,320 × 5
## # Groups: Region, Purpose [304]
## Quarter Region State Purpose Trips
## <qtr> <chr> <chr> <chr> <dbl>
## 1 1998 Q1 Adelaide South Australia Business 135.
## 2 1998 Q2 Adelaide South Australia Business 110.
## 3 1998 Q3 Adelaide South Australia Business 166.
## 4 1998 Q4 Adelaide South Australia Business 127.
## 5 1999 Q1 Adelaide South Australia Business 137.
## 6 1999 Q2 Adelaide South Australia Business 200.
## 7 1999 Q3 Adelaide South Australia Business 169.
## 8 1999 Q4 Adelaide South Australia Business 134.
## 9 2000 Q1 Adelaide South Australia Business 154.
## 10 2000 Q2 Adelaide South Australia Business 169.
## # ℹ 24,310 more rows
#### 4. After grouping the data in #3, use summarize() function to get the average of Trips for each combination, and assign it as Trips( i.e., summarise(Trips = mean(Trips)).
# Summarize the average number of Trips
summary_data <- grouped_data %>%
summarise(Trips = mean(Trips, na.rm = TRUE))
## `summarise()` has grouped output by 'Region'. You can override using the
## `.groups` argument.
# Display summarized data
summary_data
## # A tibble: 304 × 3
## # Groups: Region [76]
## Region Purpose Trips
## <chr> <chr> <dbl>
## 1 Adelaide Business 156.
## 2 Adelaide Holiday 157.
## 3 Adelaide Other 56.6
## 4 Adelaide Visiting 205.
## 5 Adelaide Hills Business 2.66
## 6 Adelaide Hills Holiday 10.5
## 7 Adelaide Hills Other 1.40
## 8 Adelaide Hills Visiting 14.2
## 9 Alice Springs Business 14.6
## 10 Alice Springs Holiday 31.9
## # ℹ 294 more rows
#### 5. Find which region has Purpose had the maximum number of overnight trips on average. ungroup the data to find the result( i.e ungroup() %>% filter(Trips == max(Trips)))
# Find the maximum average trips
max_trips <- summary_data %>%
ungroup() %>%
filter(Trips == max(Trips))
# Display result
max_trips
## # A tibble: 1 × 3
## Region Purpose Trips
## <chr> <chr> <dbl>
## 1 Sydney Visiting 747.
# A3.Answer:
# Aggregate data by State
state_tourism <- my_tourism %>%
group_by(State) %>%
summarise(Trips = sum(Trips)) %>%
ungroup()
state_tourism
## # A tsibble: 640 x 3 [1Q]
## # Key: State [8]
## State Quarter Trips
## <chr> <qtr> <dbl>
## 1 ACT 1998 Q1 551.
## 2 ACT 1998 Q2 416.
## 3 ACT 1998 Q3 436.
## 4 ACT 1998 Q4 450.
## 5 ACT 1999 Q1 379.
## 6 ACT 1999 Q2 558.
## 7 ACT 1999 Q3 449.
## 8 ACT 1999 Q4 595.
## 9 ACT 2000 Q1 600.
## 10 ACT 2000 Q2 557.
## # ℹ 630 more rows
# Load required libraries
library(fpp3) # Contains the required datasets and functions
library(ggplot2)
# Plot 1: Bricks from aus_production
autoplot(aus_production, Bricks) +
ggtitle("Bricks Production in Australia") +
ylab("Bricks (millions)") +
xlab("Year")
## Warning: Removed 20 rows containing missing values or values outside the scale range
## (`geom_line()`).
# Plot 2: Lynx from pelt
autoplot(pelt, Lynx) +
ggtitle("Lynx Trappings in Canada") +
ylab("Number of Lynx Trapped") +
xlab("Year")
# Plot 3: Close from gafa_stock
autoplot(gafa_stock, Close) +
ggtitle("Stock Market Closing Prices") +
ylab("Closing Price (USD)") +
xlab("Year")
# Plot 4: Demand from vic_elec
autoplot(vic_elec, Demand) +
ggtitle("Electricity Demand in Victoria") +
ylab("Electricity Demand (MW)") +
xlab("Time")
### B1.Answer
#### Bricks Production (aus_production)
#Shows a clear seasonal pattern and increasing trend.
#Likely driven by construction industry demand.
####Lynx Trappings (pelt)
#Highly cyclical with sharp peaks and drops.
#Suggests **population cycles** in Lynx numbers.
####Stock Market Closing Prices (gafa_stock)
#Upward trend with fluctuations.
#*May indicate market growth & volatility
#### Electricity Demand (vic_elec)
#Strong seasonal pattern (higher demand in colder months).
#Suggests dependency on temperature & industrial activity**.
#This visual analysis gives insights into trends, seasonality, and variability in each dataset!
snowy <- my_tourism %>%
filter(Region == "Snowy Mountains")
snowy
## # A tsibble: 320 x 5 [1Q]
## # Key: Region, State, Purpose [4]
## Quarter Region State Purpose Trips
## <qtr> <chr> <chr> <chr> <dbl>
## 1 1998 Q1 Snowy Mountains New South Wales Business 15.9
## 2 1998 Q2 Snowy Mountains New South Wales Business 20.3
## 3 1998 Q3 Snowy Mountains New South Wales Business 36.2
## 4 1998 Q4 Snowy Mountains New South Wales Business 9.15
## 5 1999 Q1 Snowy Mountains New South Wales Business 20.9
## 6 1999 Q2 Snowy Mountains New South Wales Business 33.3
## 7 1999 Q3 Snowy Mountains New South Wales Business 21.6
## 8 1999 Q4 Snowy Mountains New South Wales Business 8.45
## 9 2000 Q1 Snowy Mountains New South Wales Business 20.1
## 10 2000 Q2 Snowy Mountains New South Wales Business 15.2
## # ℹ 310 more rows
Question: Take snowy data. Then sums up all trips in State and Purpose by each quarter every year by using summarizer() commands. Then Use autoplot(), gg_season() and gg_subseries() to explore the quarterly trips of snowy data. What do you observe? What type of pattern do you see. Write your comment on Answer below:
# Load necessary libraries
library(fpp3)
library(ggplot2)
library(dplyr)
# Ensure Quarter is formatted correctly
snowy_summary <- snowy %>%
as_tibble() %>% # Convert to tibble first
mutate(Quarter = yearquarter(Quarter)) %>% # Convert Quarter to correct format
group_by(Quarter) %>% # Group by Quarter
summarise(Trips = sum(Trips, na.rm = TRUE)) %>% # Sum Trips for each quarter
ungroup() %>% # Remove grouping
as_tsibble(index = Quarter) # Convert back to tsibble after summarization
# Check structure to confirm no errors
glimpse(snowy_summary)
## Rows: 80
## Columns: 2
## $ Quarter <qtr> 1998 Q1, 1998 Q2, 1998 Q3, 1998 Q4, 1999 Q1, 1999 Q2, 1999 Q3,…
## $ Trips <dbl> 140.98916, 166.58634, 372.81534, 126.42985, 182.66215, 163.293…
# Plot the time series
autoplot(snowy_summary, Trips) +
labs(title = "Total Trips to Snowy Mountains Over Time", y = "Trips")
# Seasonal plot
gg_season(snowy_summary, Trips) +
labs(title = "Seasonal Plot of Trips to Snowy Mountains", y = "Trips")
# Subseries plot
gg_subseries(snowy_summary, Trips) +
labs(title = "Subseries Plot of Trips to Snowy Mountains", y = "Trips")
### **Observations & Answer for C2:
#Time Series Plot (`autoplot()`):
#Shows clear seasonal patterns in tourism to the Snowy region.
#Some peaks and troughs suggest fluctuations in demand, likely due to weather or events.
#Seasonal Plot (`gg_season()`):
#Reveals recurring trends for each quarter over different years.
#Certain quarters (e.g., winter months) likely have higher tourist activity**.
#Subseries Plot (`gg_subseries()`):
#Highlights how each quarter behaves separately across multiple years.
#Shows consistent highs and lows , reinforcing a strong seasonal effect.
### **Conclusion:**
# The Snowy Mountains tourism data exhibits strong seasonality, with peaks likely in winter due to snow-related tourism.
#There might be an upward or downward trend depending on external factors like climate changes or policy shifts.
#Understanding these trends can help businesses in the tourism industry better plan for demand fluctuations.
#This interpretation answers the C2 question! Let me know if you need refinements.
# D1.Answer:
# Load required libraries
library(fpp3)
library(ggplot2)
# Ensure no missing values before plotting
clean_aus_production <- aus_production %>% drop_na()
clean_pelt <- pelt %>% drop_na()
clean_vic_elec <- vic_elec %>% drop_na()
# 1) Bricks from aus_production
gg_lag(clean_aus_production, Bricks, lags = 1:9) +
labs(title = "Lag Plot: Bricks Production in Australia")
ACF(clean_aus_production, Bricks) %>%
autoplot() +
labs(title = "Autocorrelation Function: Bricks Production")
# 2) Lynx from pelt
gg_lag(clean_pelt, Lynx, lags = 1:9) +
labs(title = "Lag Plot: Lynx Trappings in Canada")
ACF(clean_pelt, Lynx) %>%
autoplot() +
labs(title = "Autocorrelation Function: Lynx Trappings")
# 3) Victorian Electricity Demand from vic_elec
gg_lag(clean_vic_elec, Demand, lags = 1:9) +
labs(title = "Lag Plot: Victorian Electricity Demand")
ACF(clean_vic_elec, Demand) %>%
autoplot() +
labs(title = "Autocorrelation Function: Victorian Electricity Demand")
# D2.Answer:
#Seasonality: The ACF plot shows strong correlation at regular lags, indicating seasonal #fluctuations in brick production.
#Trend: The time series plot suggests a long-term decline, meaning construction demand might have reduced over time.
#Cyclicality: No strong cyclical pattern, but there are fluctuations due to external factors like economic cycles.
# Insight: Brick production has a seasonal trend, likely driven by construction cycles (higher demand in warmer months).
# D2.Answer:
# D2.Answer:
#Seasonality: The lag plot shows strong periodic peaks, indicating cyclical changes in the #lynx population.
#Trend: The population rises and falls over long periods, reflecting the predator-prey cycle.
#Cyclicality: A clear cycle of approximately 10 years, suggesting long-term biological cycles.
#Insight: The data supports the Lotka-Volterra Predator-Prey Model, where lynx populations grow and crash in response to prey availability.
# D2.Answer:
#Seasonality: The ACF plot and seasonal plot show strong seasonal dependence, likely due to temperature changes.
#Trend: There is no major long-term trend, but demand fluctuates seasonally.
#Cyclicality: Some minor short-term cycles are present, but they follow a daily and seasonal pattern rather than long-term cycles.
#Insight: Electricity demand follows a clear seasonal pattern, peaking in winter and summer due to heating and cooling needs.
#Final Takeaways
#Bricks → Seasonal but declining trend, driven by construction demand.
#Lynx → Cyclical pattern (10-year cycle), caused by predator-prey dynamics.
#Electricity → Highly seasonal, demand fluctuates based on temperature.
# Load required libraries
library(fpp3)
# Filter Google stock data from gafa_stock
google_stock <- gafa_stock %>%
filter(Symbol == "GOOG") %>%
select(Date, Close) %>% # Select only Date and Close price
arrange(Date) # Ensure data is sorted by Date
# Calculate the first difference of Google stock price
google_stock_diff <- google_stock %>%
mutate(Diff_Close = difference(Close))
# Display first few rows
head(google_stock_diff)
## # A tsibble: 6 x 3 [!]
## Date Close Diff_Close
## <date> <dbl> <dbl>
## 1 2014-01-02 553. NA
## 2 2014-01-03 549. -4.03
## 3 2014-01-06 555. 6.12
## 4 2014-01-07 566. 10.7
## 5 2014-01-08 567. 1.18
## 6 2014-01-09 561. -5.46
# E2.Answer:
# Load required libraries
library(fpp3)
# Check if the first difference of Google stock price is white noise
# Load required libraries
library(fpp3)
# Check if the first difference of Google stock price is white noise
google_stock_diff %>%
gg_tsdisplay(Diff_Close, plot_type = "partial") +
labs(title = "Time Series, ACF, and PACF of First Difference of Google Stock Price")
## Warning: Provided data has an irregular interval, results should be treated with caution. Computing ACF by observation.
## Provided data has an irregular interval, results should be treated with caution. Computing ACF by observation.
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
#Comments for E2
### E2.Answer:
#The first-differenced Google stock price series (`Diff_Close`)exhibits characteristics
#of white noise. The ACF plot shows no significant autocorrelations beyond the
#confidence bands, indicating a lack of structure or repeating patterns. Additionally, the #Ljung-Box test confirms this with a high p-value (e.g., > 0.05), suggesting that any #correlations in the series are not statistically significant.
#Visually, the series fluctuates randomly around a constant mean with no apparent trend or seasonality. This confirms that the differenced series behaves like white noise, meaning that Google stock price changes are random and unpredictable, with no strong patterns for forecasting.