# DO NOT FORGET TO CALL THESE 3 packages FIRST
library(fpp3) # forecasting package
library(tidyverse) # graphs and tidy
library(readxl) # reading excel data
# download the data from the web page.
download.file("http://OTexts.com/fpp3/extrafiles/tourism.xlsx", destfile = "tourism.xlsx", mode = "wb")
# reads the downloaded tourism data by excel
my_tourism <- readxl::read_excel("tourism.xlsx")
## Your turn: create a tsibble format of the data below, and rename it as my_tourism:
# A1.Answer:
my_tourism <- my_tourism %>%
mutate(Quarter = yearquarter(Quarter)) %>%
as_tsibble(key = c(State, Region, Purpose), index = Quarter)
# A2.Answer:
# 1. Checking if data is quarterly and the start year
head(my_tourism$Quarter) # Shows 1998 Q1, so it's quarterly and starts in 1998
## <yearquarter[6]>
## [1] "1998 Q1" "1998 Q2" "1998 Q3" "1998 Q4" "1999 Q1" "1999 Q2"
## # Year starts on: January
# 2. Cross-table of State and Purpose
table(my_tourism$State, my_tourism$Purpose)
##
## Business Holiday Other Visiting
## ACT 80 80 80 80
## New South Wales 1040 1040 1040 1040
## Northern Territory 560 560 560 560
## Queensland 960 960 960 960
## South Australia 960 960 960 960
## Tasmania 400 400 400 400
## Victoria 1680 1680 1680 1680
## Western Australia 400 400 400 400
# Counting unique States and Purposes
num_states <- n_distinct(my_tourism$State)
num_purposes <- n_distinct(my_tourism$Purpose)
# 3. Grouping by Region and Purpose, and calculating the average number of trips
avg_trips <- my_tourism %>%
as_tibble() %>%
group_by(Region, Purpose) %>%
summarise(Trips = mean(Trips, na.rm = TRUE))
# 4. Finding the region and purpose with the maximum average trips
max_trips <- avg_trips %>%
ungroup() %>%
filter(Trips == max(Trips))
max_trips
## # A tibble: 1 × 3
## Region Purpose Trips
## <chr> <chr> <dbl>
## 1 Sydney Visiting 747.
# A3.Answer:
state_tourism <- my_tourism %>%
group_by(State) %>%
summarise(Trips = sum(Trips)) %>%
ungroup()
state_tourism
## # A tsibble: 640 x 3 [1Q]
## # Key: State [8]
## State Quarter Trips
## <chr> <qtr> <dbl>
## 1 ACT 1998 Q1 551.
## 2 ACT 1998 Q2 416.
## 3 ACT 1998 Q3 436.
## 4 ACT 1998 Q4 450.
## 5 ACT 1999 Q1 379.
## 6 ACT 1999 Q2 558.
## 7 ACT 1999 Q3 449.
## 8 ACT 1999 Q4 595.
## 9 ACT 2000 Q1 600.
## 10 ACT 2000 Q2 557.
## # ℹ 630 more rows
# B1.Answer:
# Bricks from aus_production
autoplot(aus_production, Bricks)
# Lynx from pelt
autoplot(pelt, Lynx)
# Close from gafa_stock
autoplot(gafa_stock, Close)
# Demand from vic_elec
autoplot(vic_elec, Demand)
#COMMENT:
# Bricks (`aus_production`): Shows a long-term upward trend with noticeable cyclical patterns and increased volatility after 1980, possibly reflecting economic cycles.
# Lynx (`pelt`): Clear cyclical behavior with peaks approximately every 10 years, indicating natural population dynamics or overharvesting effects.
# Close (`gafa_stock`): Strong exponential growth, especially after 2015, with volatility around 2018, reflecting tech stock market shifts.
#Demand (`vic_elec`): Strong seasonal and daily patterns, with peaks likely during extreme weather conditions (winter/summer) and spikes due to peak electricity usage times.
snowy <- my_tourism %>%
filter(Region == "Snowy Mountains")
Question: Take snowy data. Then sums up all trips in State and Purpose by each quarter every year by using summarizer() commands. Then Use autoplot(), gg_season() and gg_subseries() to explore the quarterly trips of snowy data. What do you observe? What type of pattern do you see. Write your comment on Answer below:
# C2.Answer:
snowy_summary <- snowy %>%
index_by(Quarter) %>% # Group by Quarter (time index)
group_by(State, Purpose) %>%
summarise(total_trips = sum(Trips, na.rm = TRUE))
snowy_tsibble <- snowy_summary %>%
as_tsibble(index = Quarter, key = c(State, Purpose))
autoplot(snowy_tsibble, total_trips) +
labs(title = "Total Trips by Quarter", y = "Total Trips")
gg_season(snowy_tsibble, total_trips) +
labs(title = "Seasonal Plot of Total Trips by Quarter", y = "Total Trips")
gg_subseries(snowy_tsibble, total_trips) +
labs(title = "Subseries Plot of Total Trips by Quarter", y = "Total Trips")
#COMMENT:
#Autoplot :Shows an upward trend with seasonal peaks, especially in Q3 and Q4.
#Seasonal Plot (gg_season):Holiday trips peak in Q3, while business trips stay consistent throughout.
#Subseries Plot (gg_subseries): Q3 consistently has the highest trips, confirming strong seasonality.
#The data exhibits strong seasonality in trip purposes, particularly for "Holiday" trips, which consistently peak in Q3 (likely driven by vacation periods). Other purposes like "Business" and "Visiting" show a relatively stable trend with minor fluctuations across quarters. The cyclical nature of the data is clear, with consistent seasonal peaks observed annually.
# D1.Answer:
gg_lag(aus_production, Bricks, geom = "point")
aus_production %>%
ACF(Bricks) %>%
autoplot() +
labs(title = "ACF of Bricks Production")
gg_lag(pelt, Lynx, geom = "point")
pelt %>%
ACF(Lynx) %>%
autoplot() +
labs(title = "ACF of Lynx Population")
gg_lag(vic_elec, Demand, geom = "point")
vic_elec %>%
ACF(Demand) %>%
autoplot() +
labs(title = "ACF of Electricity Demand")
#COMMENT:
# Bricks from aus_production
# Lag Plot: Strong positive correlation with clear seasonality, indicating regular cyclical patterns in production.
# ACF Plot: High autocorrelation, with a slow decay pattern, confirming strong seasonality likely related to quarterly or annual cycles.
# Lynx from pelt
# Lag Plot: Scattered points suggest irregular cycles, which is common in wildlife population dynamics.
# ACF Plot: Cyclical peaks indicate periodic fluctuations in the population, potentially driven by ecological cycles (e.g., predator-prey interactions).
# Victorian Electricity Demand from aus_elec
# Lag Plot: Strong correlation at short lags, showing predictability in daily demand, with more variability at higher lags.
# ACF Plot: High autocorrelation at 24-lag intervals, confirming daily cycles in electricity demand, with some external influences causing variability at longer lags.
# D2.Answer:
# Seasonality: Clear seasonality observed, with repeating patterns every year.
# Cyclicity: No significant cyclicity beyond seasonality.
# Trend: Upward trend with increasing production over time.
# D2.Answer:
# Seasonality: No strong seasonality detected.
# Cyclicity: Clear cyclic patterns with sharp population booms and busts.
# Trend: No long-term trend, highly volatile.
# D2.Answer:
# Seasonality: Strong seasonality visible, especially at quarterly intervals.
# Cyclicity: Cyclic patterns related to seasonal demand fluctuations.
# Trend: Gradual upward trend in electricity demand.
goog <- gafa_stock %>%
filter(Symbol == "GOOG", year(Date) >= 2018)%>%
# E1. Answer:
# dgoog = goog %>% # get google daily data(>2018)
mutate(trading_day = row_number()) %>% #missing dates, create rownumber()-trading days!
update_tsibble(index = trading_day, regular = TRUE) %>% #update tsibble() with new index.
mutate(diff = difference(Close)) #calculates the first difference of a series with difference() command. it calculates the daily changes in the stock price.
autoplot(goog, diff) +
labs(title = "First Difference of Google Stock Prices", y = "Daily Change in Stock Price")
#COMMENT: The first difference of the Google stock prices shows the daily change in price. This transformation removes trends and helps to stabilize the variance of the series.
goog %>%
ACF(diff) %>%
autoplot() +
labs(title = "ACF of First Differences of Google Stock Prices")
# E2.Answer:
#COMMENT: Based on the ACF plot, the first differenced series is not purely white noise. There are significant correlations at some lags, indicating that the series retains some structure and is not completely random.