library(fpp3)
library(tidyverse)
library(readxl)
library(tsibble)
library(ggplot2)
#create a tsibble format of the data below, and rename it as my_tourism:
# A1.Answer:
# Load tourism data from Excel file
tourism_data <- read_excel("/Users/farihaarpa/Downloads/tourism-4.xlsx")
# Convert to tsibble format
my_tourism <- tourism_data %>%
mutate(Quarter = yearquarter(Quarter)) %>% # Convert Quarter to yearquarter format
as_tsibble(index = Quarter, key = c(Region, State, Purpose)) # Create tsibble
# Print the structure of the tsibble
glimpse(my_tourism)
## Rows: 24,320
## Columns: 5
## Key: Region, State, Purpose [304]
## $ Quarter <qtr> 1998 Q1, 1998 Q2, 1998 Q3, 1998 Q4, 1999 Q1, 1999 Q2, 1999 Q3,…
## $ Region <chr> "Adelaide", "Adelaide", "Adelaide", "Adelaide", "Adelaide", "A…
## $ State <chr> "South Australia", "South Australia", "South Australia", "Sout…
## $ Purpose <chr> "Business", "Business", "Business", "Business", "Business", "B…
## $ Trips <dbl> 135.0777, 109.9873, 166.0347, 127.1605, 137.4485, 199.9126, 16…
Outcome: The my_tourism dataset is now a tsibble with Quarter as the time index and Region, State, and Purpose as keys. The glimpse output will show the structure of the dataset, including column names, data types, and sample values.
### A2. To analyze the data, first view my_tourism set, and see the dates of the data.
# A2.Answer:
# View the dataset
view(my_tourism)
# 1. Check if the data is annual or quarterly and identify the starting year
cat("The data starts from:", as.character(min(my_tourism$Quarter)), "\n")
## The data starts from: 1998 Q1
The starting year of the dataset will be printed.
#### 2. Use table(State,Purpose) command to see the cross-table. How many States are there in the data? How many Purpose of trips category?
state_purpose_table <- my_tourism %>%
count(State, Purpose) %>%
spread(Purpose, n, fill = 0)
print(state_purpose_table)
## # A tibble: 8 Ă— 5
## State Business Holiday Other Visiting
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 ACT 80 80 80 80
## 2 New South Wales 1040 1040 1040 1040
## 3 Northern Territory 560 560 560 560
## 4 Queensland 960 960 960 960
## 5 South Australia 960 960 960 960
## 6 Tasmania 400 400 400 400
## 7 Victoria 1680 1680 1680 1680
## 8 Western Australia 400 400 400 400
The table shows the number of trips by State and Purpose, with each state having an equal number of trips across all purposes (e.g., ACT: 80 trips for Business, Holiday, Other, Visiting). This suggests either synthetic data or a highly unusual uniform distribution in tourism patterns.
#### 3. Group the data by Region and Purpose by using group() command.To eliminate time effect, use tibble format. (i.e as_tibble() %>% group_by(Region, Purpose))
grouped_data <- my_tourism %>%
as_tibble() %>%
group_by(Region, Purpose)
print(grouped_data)
## # A tibble: 24,320 Ă— 5
## # Groups: Region, Purpose [304]
## Quarter Region State Purpose Trips
## <qtr> <chr> <chr> <chr> <dbl>
## 1 1998 Q1 Adelaide South Australia Business 135.
## 2 1998 Q2 Adelaide South Australia Business 110.
## 3 1998 Q3 Adelaide South Australia Business 166.
## 4 1998 Q4 Adelaide South Australia Business 127.
## 5 1999 Q1 Adelaide South Australia Business 137.
## 6 1999 Q2 Adelaide South Australia Business 200.
## 7 1999 Q3 Adelaide South Australia Business 169.
## 8 1999 Q4 Adelaide South Australia Business 134.
## 9 2000 Q1 Adelaide South Australia Business 154.
## 10 2000 Q2 Adelaide South Australia Business 169.
## # ℹ 24,310 more rows
#### 4. After grouping the data in #3, use summarize() function to get the average of Trips for each combination, and assign it as Trips( i.e., summarise(Trips = mean(Trips)).
average_trips <- grouped_data %>%
summarise(Trips = mean(Trips, na.rm = TRUE)) # Handle NA values
print(average_trips)
## # A tibble: 304 Ă— 3
## # Groups: Region [76]
## Region Purpose Trips
## <chr> <chr> <dbl>
## 1 Adelaide Business 156.
## 2 Adelaide Holiday 157.
## 3 Adelaide Other 56.6
## 4 Adelaide Visiting 205.
## 5 Adelaide Hills Business 2.66
## 6 Adelaide Hills Holiday 10.5
## 7 Adelaide Hills Other 1.40
## 8 Adelaide Hills Visiting 14.2
## 9 Alice Springs Business 14.6
## 10 Alice Springs Holiday 31.9
## # ℹ 294 more rows
# 5. Find the region-purpose combination with the maximum average trips
max_trips_region <- average_trips %>%
ungroup() %>%
filter(Trips == max(Trips))
print(max_trips_region)
## # A tibble: 1 Ă— 3
## Region Purpose Trips
## <chr> <chr> <dbl>
## 1 Sydney Visiting 747.
# A3.Answer:
state_tourism <- my_tourism %>%
group_by(State) %>%
summarise(Trips = sum(Trips)) %>%
ungroup()
# Print state-wise total trips
print(state_tourism)
## # A tsibble: 640 x 3 [1Q]
## # Key: State [8]
## State Quarter Trips
## <chr> <qtr> <dbl>
## 1 ACT 1998 Q1 551.
## 2 ACT 1998 Q2 416.
## 3 ACT 1998 Q3 436.
## 4 ACT 1998 Q4 450.
## 5 ACT 1999 Q1 379.
## 6 ACT 1999 Q2 558.
## 7 ACT 1999 Q3 449.
## 8 ACT 1999 Q4 595.
## 9 ACT 2000 Q1 600.
## 10 ACT 2000 Q2 557.
## # ℹ 630 more rows
# Plot Bricks from aus_production
autoplot(aus_production, Bricks) +
labs(
title = "Bricks Production Over Time",
subtitle = "Trend and Seasonality in Bricks Production",
y = "Bricks Produced",
x = "Year"
) +
theme_minimal() +
theme(
plot.title = element_text(size = 16, face = "bold"),
plot.subtitle = element_text(size = 12, color = "gray40"),
axis.title = element_text(size = 12),
axis.text = element_text(size = 10)
) +
scale_y_continuous(labels = scales::comma) # Format y-axis labels
##Bricks production Over Time: The graph titled “Bricks Production Over Time” illustrates the trend and seasonality in brick production from around 1960 to the early 2000s. Key observations include: The y-axis represents the number of bricks produced, while the x-axis represents time in quarters (Q1). There is a strong upward trend in brick production from 1960 to around 1980, peaking at over 600 units. After 1980, production becomes more volatile, showing sharp declines and recoveries. A significant drop is observed in the late 1980s, followed by fluctuating production levels. The graph suggests cyclical patterns, possibly reflecting economic cycles, demand variations, or policy changes affecting construction.
# Plot Lynx from pelt
autoplot(pelt, Lynx) +
labs(
title = "Lynx Population Over Time",
subtitle = "Cyclical Patterns in Lynx Population",
y = "Lynx Population",
x = "Year"
) +
theme_minimal() +
theme(
plot.title = element_text(size = 16, face = "bold"),
plot.subtitle = element_text(size = 12, color = "gray40"),
axis.title = element_text(size = 12),
axis.text = element_text(size = 10)
) +
scale_y_continuous(labels = scales::comma) +
geom_smooth(method = "loess", color = "red", se = FALSE) # Add a trend line
##The graph titled “Lynx Population Over Time” displays cyclical patterns in the lynx population over several decades. Key observations include: The black line represents the fluctuations in lynx population over time, showing periodic spikes and declines. The red line represents a smoothed trend, indicating a long-term pattern. The population peaks and declines occur in cycles, suggesting a repeating trend over time. The overall trend appears relatively stable with minor fluctuations in the long-term trend.
# Plot Close from gafa_stock
autoplot(gafa_stock %>% filter(Symbol == "GOOG"), Close) +
labs(
title = "Google Stock Price Over Time",
subtitle = "Daily Closing Prices of Google Stock",
y = "Close Price (USD)",
x = "Date"
) +
theme_minimal() +
theme(
plot.title = element_text(size = 16, face = "bold"),
plot.subtitle = element_text(size = 12, color = "gray40"),
axis.title = element_text(size = 12),
axis.text = element_text(size = 10)
) +
scale_y_continuous(labels = scales::dollar) + # Format y-axis as currency
geom_line(color = "blue", size = 0.8) # Customize line color and thickness
##The graph titled “Google Stock Price Over Time” displays the daily closing prices of Google stock from around 2014 to 2018. Key outcomes from the chart include:
Overall Upward Trend: The stock price shows a significant long-term increase, moving from around $500 in 2014 to over $1,250 at its peak in 2018. Periods of Stability and Growth: There are phases where the stock price remains relatively stable, followed by sharp increases, particularly around 2015-2016. Volatility in Later Years: The stock price exhibits higher volatility after 2017, with sharp rises and falls. Recent Decline: After peaking in 2018, the price experiences a noticeable drop, suggesting potential market corrections or external factors affecting stock performance.
# Plot Demand from vic_elec
autoplot(vic_elec, Demand) +
labs(
title = "Victorian Electricity Demand Over Time",
subtitle = "Half-Hourly Electricity Demand in Victoria",
y = "Demand (MW)",
x = "Time"
) +
theme_minimal() +
theme(
plot.title = element_text(size = 16, face = "bold"),
plot.subtitle = element_text(size = 12, color = "gray40"),
axis.title = element_text(size = 12),
axis.text = element_text(size = 10)
) +
scale_y_continuous(labels = scales::comma) +
geom_area(fill = "steelblue", alpha = 0.5) # Add an area fill for better visualization
##The graph titled “Victorian Electricity Demand Over Time” illustrates half-hourly electricity demand in Victoria over a period from 2012 to 2015. Key observations include:
The y-axis represents electricity demand in megawatts (MW), while the x-axis shows the timeline from 2012 to 2015. There is a clear cyclical pattern in demand, with regular fluctuations likely corresponding to daily and seasonal usage variations. Peaks in demand appear to occur periodically, possibly due to extreme weather conditions requiring increased heating or cooling. A shaded area represents the baseline demand, while the black spikes indicate sudden surges.
# Filter data for Snowy Mountains region
snowy <- my_tourism %>%
filter(Region == "Snowy Mountains") %>%
fill_gaps() # Fill missing time periods if any
Question: Take snowy data. Then sums up all trips in State and Purpose by each quarter every year by using summarizer() commands. Then Use autoplot(), gg_season() and gg_subseries() to explore the quarterly trips of snowy data. What do you observe? What type of pattern do you see. Write your comment on Answer below:
# Seasonal plot
gg_season(snowy, Trips) +
labs(title = "Seasonal Plot of Trips in Snowy Mountains", y = "Trips")
# Subseries plot
gg_subseries(snowy, Trips) +
labs(title = "Subseries Plot of Trips in Snowy Mountains", y = "Trips")
# Summarize total trips by State & Purpose per quarter
snowy_summary <- snowy %>%
index_by(Quarter) %>%
summarise(Trips = sum(Trips, na.rm = TRUE))
# Plot time series
autoplot(snowy_summary, Trips) +
labs(title = "Quarterly Trips in Snowy Mountains", y = "Trips")
Key Observations: #seasonal plot of trips in snowy mountains: The Seasonal Plot of Trips in Snowy Mountains shows the distribution of trips across different quarters (Q1, Q2, Q3, Q4). Peaks in Q1 (first quarter) likely indicate higher tourism during summer, while lower values in Q3 (third quarter) suggest fewer trips in winter. The plot helps identify seasonal patterns in tourism. #subseries plot of trips in Snowy Mountains: The Subseries Plot of Trips in Snowy Mountains breaks down the trip data by quarters (Q1, Q2, Q3, Q4) over multiple years. It helps identify seasonal patterns and trends within each quarter. For example, higher trips in Q1 (summer) and lower trips in Q3 (winter) suggest strong seasonality. This plot is useful for understanding how tourism varies across different times of the year. #Querterly trips in snowy mountains: Trend: The plot may show an overall upward, downward, or stable trend in tourism over the 15-year period. Seasonality: Regular peaks and troughs suggest seasonal patterns (e.g., higher trips in summer, lower in winter). Cyclical Patterns: Irregular fluctuations may indicate external influences (e.g., economic conditions, events).
# D1.Answer:
# 1. Bricks from aus_production
# Lag Plot
gg_lag(aus_production, Bricks, geom = "point") +
labs(title = "Lag Plot for Bricks Production",
subtitle = "Relationship Between Bricks Production and Its Lags",
x = "Lagged Bricks Production",
y = "Bricks Production") +
theme_minimal()
The plot highlights a strong autocorrelation in bricks production,
implying that past production levels are predictive of future output.
The seasonal colors suggest potential periodic patterns in the data.
# ACF Plot
ACF(aus_production, Bricks) %>%
autoplot() +
labs(title = "ACF Plot for Bricks Production",
subtitle = "Autocorrelation in Bricks Production",
x = "Lag",
y = "ACF") +
theme_minimal()
ACF plot for Bricks production: Bricks production exhibits a persistent
autocorrelation, suggesting a predictable trend over time. This can
indicate seasonality, trend components, or cyclical patterns, which
should be accounted for in forecasting models.
# 2. Lynx from pelt
# Lag Plot
gg_lag(pelt, Lynx, geom = "point") +
labs(title = "Lag Plot for Lynx Population",
subtitle = "Relationship Between Lynx Population and Its Lags",
x = "Lagged Lynx Population",
y = "Lynx Population") +
theme_minimal()
Lag Plot for Lynx Population: The lynx population likely follows a cyclical pattern with periodic fluctuations, but the relationship between past and future values weakens over time. This suggests the presence of external ecological factors influencing population changes, such as food availability, predation, or environmental conditions.
# ACF Plot
ACF(pelt, Lynx) %>%
autoplot() +
labs(title = "ACF Plot for Lynx Population",
subtitle = "Autocorrelation in Lynx Population",
x = "Lag",
y = "ACF") +
theme_minimal()
The ACF Plot for Lynx Population shows the autocorrelation of the Lynx population data at different lags. The plot indicates significant autocorrelation at specific lags, suggesting cyclical patterns in the population, likely corresponding to a 10-year cycle influenced by ecological factors like predator-prey dynamics. The decaying pattern in autocorrelation highlights long-term dependencies in the data.
# 3. Victorian Electricity Demand from aus_elec
# Lag Plot
gg_lag(vic_elec, Demand, geom = "point") +
labs(title = "Lag Plot for Victorian Electricity Demand",
subtitle = "Relationship Between Electricity Demand and Its Lags",
x = "Lagged Demand",
y = "Demand") +
theme_minimal()
The lag plot shows the relationship between Victorian electricity demand and its past values across nine time lags. It highlights a strong positive correlation, indicating that past demand significantly influences future demand. The color coding represents different times of the day.
# ACF Plot
ACF(vic_elec, Demand) %>%
autoplot() +
labs(title = "ACF Plot for Victorian Electricity Demand",
subtitle = "Autocorrelation in Electricity Demand",
x = "Lag",
y = "ACF") +
theme_minimal()
This ACF (Autocorrelation Function) plot for Victorian electricity
demand shows the correlation between electricity demand and its past
values at different lags. The high autocorrelation at lower lags
suggests strong short-term dependencies, while the repeating pattern
indicates possible seasonal trends. The blue dashed lines represent
significance thresholds.
# D2.Answer:
# - **Trend**: The ACF plot shows a slow decay in autocorrelation, indicating a potential long-term trend in bricks production.
# - **Seasonality**: Significant spikes at lags 12 and 24 suggest strong annual seasonality, likely due to seasonal demand for bricks.
# - **Cyclicity**: No clear cyclical pattern is observed beyond the annual seasonality.
# - **Insight**: Bricks production is influenced by both a long-term trend and strong annual seasonality, with higher production likely during certain times of the year.
# D2.Answer:
# - **Trend**: The ACF plot shows a slow decay, suggesting a long-term trend or dependency in the Lynx population.
# - **Seasonality**: No clear seasonal pattern is observed, as the spikes in the ACF plot are not at regular intervals.
# - **Cyclicity**: Significant spikes at lags around 10 and 20 indicate a cyclical pattern, likely corresponding to a 10-year population cycle driven by ecological factors (e.g., predator-prey dynamics).
# - **Insight**: The Lynx population exhibits a cyclical pattern with a period of approximately 10 years, consistent with known ecological cycles.
# D2.Answer:
# - **Trend**: The ACF plot shows a slow decay, indicating a potential long-term trend in electricity demand.
# - **Seasonality**: Significant spikes at lags 48 (daily) and 336 (weekly) indicate strong daily and weekly seasonality. This reflects regular patterns in electricity usage (e.g., higher demand during the day and lower demand at night).
# - **Cyclicity**: No clear cyclical pattern is observed beyond the daily and weekly seasonality.
# - **Insight**: Electricity demand is highly seasonal, with strong daily and weekly patterns driven by human activity. There may also be a long-term trend due to population growth or changes in energy consumption habits.
Key Observations:
Bricks Production: Dominated by annual seasonality and a long-term trend. Reflects seasonal demand for construction materials. Lynx Population: Exhibits a 10-year cyclical pattern. Reflects ecological dynamics, such as predator-prey relationships. Electricity Demand: Shows strong daily and weekly seasonality. Reflects regular human activity patterns, with potential long-term trends due to population growth or technological changes.
# Filter Google stock data for 2018 onwards
goog <- gafa_stock %>%
filter(Symbol == "GOOG", year(Date) >= 2018)
# Calculate first difference
goog <- goog %>%
mutate(trading_day = row_number()) %>%
update_tsibble(index = trading_day, regular = TRUE) %>%
mutate(diff = difference(Close))
# Print the first few rows
head(goog)
## # A tsibble: 6 x 10 [1]
## # Key: Symbol [1]
## Symbol Date Open High Low Close Adj_Close Volume trading_day diff
## <chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <int> <dbl>
## 1 GOOG 2018-01-02 1048. 1067. 1045. 1065 1065 1237600 1 NA
## 2 GOOG 2018-01-03 1064. 1086. 1063. 1082. 1082. 1430200 2 17.5
## 3 GOOG 2018-01-04 1088 1094. 1084. 1086. 1086. 1004600 3 3.92
## 4 GOOG 2018-01-05 1094 1104. 1092 1102. 1102. 1279100 4 15.8
## 5 GOOG 2018-01-08 1102. 1111. 1102. 1107. 1107. 1047600 5 4.71
## 6 GOOG 2018-01-09 1109. 1111. 1101. 1106. 1106. 902500 6 -0.680
{.r FALSE} # E2.Answer: # Plot ACF of differences ACF(goog, diff) %>% autoplot() + labs(title = "ACF Plot of Google Stock Price Differences")
{.r FALSE} # Interpretation cat("The ACF plot shows no significant autocorrelations, indicating that the differences are consistent with white noise.\n")
## The ACF plot shows no significant autocorrelations, indicating that the differences are consistent with white noise.
Summary of the ACF Plot for Google Stock Price Differences
Purpose: The ACF plot shows the autocorrelation between the first differences of Google’s stock price and its lagged values. It helps determine if the differenced series is stationary (free of trends and seasonality).
Key Observations: Significant Lags: If any bars at lags 1, 2, etc., cross the blue dashed confidence intervals, it indicates that the differenced series still has some dependency on its past values. Stationarity: If most bars are within the confidence intervals, the differenced series is likely stationary, meaning the first differencing has effectively removed trends and seasonality. White Noise: If the bars show no significant patterns and are mostly within the confidence intervals, the differenced series is close to white noise (random fluctuations with no predictable structure). Insights: The ACF plot helps identify whether the first differencing has successfully made the series stationary.