#Import needed libraries
library(tsibbledata) #to use the time series data in it for the exercises.
library(tsibble) # to use function as_tsibble
library(tibble) # to use view function
library(ggplot2)
library(feasts) # to use the functions for graphics like autoplot()
library(readr) # to uses read_csv function
library(dplyr) # to use Filter, mutate function etc
library(tidyr) # to use pivot_longer function
library(USgas) # to use us_total data
library(fpp3) # to use us_gasoline dataset
library(seasonal) # X-13ARIMA-SEATS decomposition
global_economy |>
autoplot(GDP/Population,show.legend = FALSE) +
labs(title= "GDP per capita", y = "$USD")
## Warning: Removed 3242 rows containing missing values or values outside the scale range
## (`geom_line()`).
global_economy |>
mutate(GDPperCapita = GDP/Population) |>
index_by(Year) |>
filter(GDPperCapita == max(GDPperCapita, na.rm = TRUE)) |>
select(Country, GDPperCapita) |>
arrange(Year) |>
ggplot(aes(x = Year, y = GDPperCapita, fill = Country)) +
geom_bar(stat = "identity") +
labs(title= "GDP per capita", y = "$USD") +
scale_x_continuous(breaks = seq(min(global_economy$Year), max(global_economy$Year), by = 5))
# before transformation
global_economy |>
filter(Country == "United States") |>
autoplot(GDP) +
labs(title= "United States GDP", y = "$USD")+
scale_x_continuous(breaks = seq(min(global_economy$Year), max(global_economy$Year), by = 5))+
scale_y_continuous(labels = scales::comma)
# performing population adjustments by dividing GDP by population
global_economy |>
filter(Country == "United States") |>
autoplot(GDP/Population) +
labs(title= "United States GDPperCapita", y = "$USD")+
scale_x_continuous(breaks = seq(min(global_economy$Year), max(global_economy$Year), by = 5))+
scale_y_continuous(labels = scales::comma)
# applying transformation
us_economy <- global_economy |>
filter(Country == "United States") |>
mutate(GDPperCapita = GDP/Population)
lambda <- us_economy |>
features(GDPperCapita, features = guerrero) |>
pull(lambda_guerrero)
us_economy |>
autoplot(box_cox(GDPperCapita, lambda)) +
labs(y = "",title = paste("United States Transformed GDPperCapita with λ = ",round(lambda,2)))+
scale_x_continuous(breaks = seq(min(global_economy$Year), max(global_economy$Year), by = 5))
#original plot
aus_livestock |>
filter( Animal == 'Bulls, bullocks and steers' & State == 'Victoria') |>
autoplot(Count) +
labs(title= "Meat production in Australia for human consumption", y = "Number of animals slaughtered")
# create a refined set
aus_vic_livestock <- aus_livestock |>
filter( Animal == 'Bulls, bullocks and steers' & State == 'Victoria')
# Check distribution
aus_vic_livestock |>
ggplot(aes(x = Count)) +
geom_histogram(bins = 30) +
labs(title = "Distribution of Count")
# Check for heteroscedasticity
aus_vic_livestock |>
ggplot(aes(x = Month, y = Count)) +
geom_boxplot() +
labs(title = "Monthly Distribution of Counts")
## Warning: Continuous x aesthetic
## ℹ did you forget `aes(group = ...)`?
# box con transformation
lambda <- aus_vic_livestock |>
features(Count, features = guerrero) |>
pull(lambda_guerrero)
aus_vic_livestock |>
autoplot(box_cox(Count, lambda)) +
labs(y = "",title = paste("Meat production in Australia for human consumption with λ = ",round(lambda,2)))
#log transformation
aus_livestock |>
filter( Animal == 'Bulls, bullocks and steers' & State == 'Victoria') |>
autoplot(log(Count)) +
labs(y = "Log(Count)", title= "Meat production in Australia for human consumption (log transformed)")
#original time series
autoplot(vic_elec, Demand)+
ggtitle("Half-hourly electricity demand for Victoria, Australia") +
xlab("Date ") +
ylab("Demand")
# Check distribution
vic_elec |>
ggplot(aes(x = Demand)) +
geom_histogram(bins = 30) +
labs(title = "Distribution of Count")
# Check for heteroscedasticity
vic_elec |>
ggplot(aes(x = Time, y = Demand)) +
geom_boxplot() +
labs(title = "Monthly Distribution of Counts")
## Warning: Continuous x aesthetic
## ℹ did you forget `aes(group = ...)`?
# using yearmonth class function to transform to a monthly time series
vic_elec_month <- vic_elec |>
mutate(YearMonth = yearmonth(Date)) |>
index_by(YearMonth) |>
summarise(Demand = sum(Demand))
# box con transformation
lambda <- vic_elec_month |>
features(Demand, features = guerrero) |>
pull(lambda_guerrero)
print(lambda)
## [1] -0.8999268
autoplot(vic_elec_month,Demand)+
ggtitle("Half-hourly electricity demand for Victoria, Australia") +
xlab("Month ") +
ylab("Demand")
# Method 2: Simple reciprocal transformation (1/x)
vic_elec_month |>
mutate(Demand_reciprocal = 1/Demand) |>
autoplot(Demand_reciprocal) +
ggtitle("Reciprocal transformed electricity demand") +
xlab("Month") +
ylab("1/Demand")
#original time series
aus_production |>
autoplot(Gas) +
labs ( title = "Quarterly production of Gas in Australia", y = "Gas production in petajoules")
# Check distribution
aus_production |>
ggplot(aes(x = Gas)) +
geom_histogram(bins = 30) +
labs(title = "Distribution of Count")
# Check for heteroscedasticity
aus_production |>
ggplot(aes(x = Quarter, y = Gas)) +
geom_boxplot() +
labs(title = "Monthly Distribution of Counts")
## Warning: Continuous x aesthetic
## ℹ did you forget `aes(group = ...)`?
# Estimate the lambda
lambda <- aus_production |>
features(Gas, features = guerrero) |>
pull(lambda_guerrero)
aus_production |>
autoplot(box_cox(Gas, lambda)) +
labs(y = "",
title = paste("Transformed gas production with = ",round(lambda,2)))
# perform log transformation
aus_production |>
autoplot(log(Gas)) +
labs ( title = "Quarterly production of Gas in Australia (log transformed)", y = "Log(Gas production in petajoules)")
autoplot(canadian_gas, Volume)+
labs( y = "gas production in billions of cubic metres", title ="Monthly Canadian gas production")
# Check distribution
canadian_gas |>
ggplot(aes(x = Volume)) +
geom_histogram(bins = 30) +
labs(title = "Distribution of Count")
# Check for heteroscedasticity
canadian_gas |>
ggplot(aes(x = Month, y = Volume)) +
geom_boxplot() +
labs(title = "Monthly Distribution of Counts")
## Warning: Continuous x aesthetic
## ℹ did you forget `aes(group = ...)`?
# Estimate the lambda
lambda <- canadian_gas |>
features(Volume, features = guerrero) |>
pull(lambda_guerrero)
canadian_gas |>
autoplot(box_cox(Volume, lambda)) +
labs(y = "",
title = paste("Transformed gas production with λ = ",round(lambda,2)))
set.seed(12345698)
myseries <- aus_retail |>
filter(`Series ID` == sample(aus_retail$`Series ID`,1))
autoplot(myseries,Turnover)+
labs( y = "Retail turnover in $Million AUD", title = "Australian retail trade turnover")
gg_season(myseries,Turnover)+
labs( y = "Retail turnover in $Million AUD", title = "Australian retail trade turnover")
gg_subseries(myseries,Turnover)+
labs( y = "Retail turnover in $Million AUD", title = "Australian retail trade turnover")
# Estimate the lambda
lambda <- myseries |>
features(Turnover, features = guerrero) |>
pull(lambda_guerrero)
myseries |>
autoplot(box_cox(Turnover, lambda)) +
labs(y = "",
title = paste("Transformed Australian retail trade turnover = ",round(lambda,2)))
autoplot(myseries,sqrt(Turnover))+
labs( y = "SQRT(Retail turnover in $Million AUD)", title = "Australian retail trade turnover( SQRT transformed)")
aus_production |>
autoplot(Tobacco) +
labs(title= "Quarterly Production of Tobbaco" , y = "Tobacco and cigarette production in tonnes")
## Warning: Removed 24 rows containing missing values or values outside the scale range
## (`geom_line()`).
# Generating the lambda
lambda <- aus_production %>%
features(Tobacco,features = guerrero) %>%
pull(lambda_guerrero)
aus_production |>
autoplot(box_cox(Tobacco, lambda)) +
labs(y = "Tobacco and cigarette production in tonnes",
title = paste("Transformed Tobacco production with λ = ",round(lambda,2)))
## Warning: Removed 24 rows containing missing values or values outside the scale range
## (`geom_line()`).
mel_syd_ansett <- ansett |>
filter(Airports == "MEL-SYD" & Class == "Economy")
autoplot(mel_syd_ansett,Passengers) + labs(title = "Passenger numbers on Ansett airline flights" , y = "Total air passengers travelling with Ansett")+
scale_y_continuous(labels = scales::comma)
#Generating the lambda
lambda <- mel_syd_ansett %>%
features(Passengers,features = guerrero) %>%
pull(lambda_guerrero)
mel_syd_ansett |>
autoplot(box_cox(Passengers, lambda)) +
labs(y = "(Total air passengers travelling with Ansett)^2",
title = paste("Square Transformed Passenger numbers on Ansett airline flights with λ = ",round(lambda,2))) +
scale_y_continuous(labels = scales::comma)
#original time series
pedestrian |>
filter(Sensor == "Southern Cross Station") |>
autoplot(Count) + labs(title = "Pedestrian counts in the city of Melbourne", y = "Hourly pedestrian counts")
# Convert to weekly time series
weekly_pedestrian <- pedestrian |>
filter(Sensor == "Southern Cross Station") |>
mutate(YearWeek = yearweek(Date)) |>
index_by(YearWeek) |>
summarise(Count = sum(Count))
lambda <- weekly_pedestrian |>
features(Count,features = guerrero) %>%
pull(lambda_guerrero)
print(lambda)
## [1] -0.1108714
weekly_pedestrian |>
autoplot(box_cox(Count,lambda)) +
labs(title = paste("Log Transformed Pedestrian counts in the city of Melbourne with λ = ",round(lambda,2)),
y = "log(Weekly pedestrian counts)")
Answer: a) Gas production in Australia for the years 2005 to 2010, based on the time series is indicating a upward trend with a seasonality fluctuation. It increases from Q1 to Q3 and decreases in Q4 for these years.
The classical multiplicative decomposition shows the trend cycle is general increasing without the other components. The seasonal index shows the repeating pattern each year. The remainder index shows values around 1.0 indicate normal variation.
The results support the graphical interpretation from part a.
After computing and plotting the seasonal adjusted data, i can see the true trend cycle without the seasonal patterns. Thus better comparison across the year like a dip in 2008 Q2.
After changing one observation 2010 q4 to be an outlier at the middle of the time series and recomputing the seasonally adjusted data,i see that it significantly affected the trend cycle. the trend is not showing that it is upward trend and highlighting a spike.
If i change the outlier to be added to the near end of the time series, there is a difference in where the outlier is placed showing the spike and accordingly a difference in the season adjusted representation.
# gas production last 5 years
gas_aus_production <- tail(aus_production, 5*4) |>
select(Gas)
# plot the time series
autoplot(gas_aus_production, Gas)+labs ( title = "Quarterly production of Gas in Australia", y = "Gas production in petajoules")
#Use classical_decomposition with type=multiplicative to calculate the trend-cycle and seasonal indices.
gas_aus_production |>
model(
classical_decomposition(Gas, type = "multiplicative")
) |>
components() |>
autoplot() +
labs(title = "Classical multiplicative decomposition of Quarterly production of Gas in Australia")
## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Compute and plot the seasonally adjusted data.
gas_aus_production |>
model(
classical_decomposition(Gas, type = "multiplicative")
) |>
components() |>
ggplot(aes(x = Quarter)) +
geom_line(aes(y = Gas, color = "Original")) +
geom_line(aes(y = season_adjust, color = "Seasonally Adjusted")) +
labs(title = "Original vs Seasonally Adjusted Gas Production",
y = "Gas Production",
color = "Series") +
scale_color_manual(values = c("Original" = "blue", "Seasonally Adjusted" = "red"))
## Change one observation to be an outlier (e.g., add 300 to one observation), and recompute the seasonally adjusted data.
gas_aus_production_test <- gas_aus_production
gas_aus_production_test$Gas[10] = gas_aus_production_test$Gas[10] +300
## Re Compute and plot the seasonally adjusted data showing the effect of the outlier
gas_aus_production_test |>
model(
classical_decomposition(Gas, type = "multiplicative")
) |>
components() |>
ggplot(aes(x = Quarter)) +
geom_line(aes(y = Gas, color = "Original")) +
geom_line(aes(y = season_adjust, color = "Seasonally Adjusted")) +
labs(title = "Original vs Seasonally Adjusted Gas Production( outlier in middle of series)",
y = "Gas Production",
color = "Series") +
scale_color_manual(values = c("Original" = "blue", "Seasonally Adjusted" = "red"))
#change the outlier to be added to the near end of the time series
gas_aus_production_new <- gas_aus_production
gas_aus_production_new$Gas[19] = gas_aus_production_new$Gas[19] +300
## Re Compute and plot the seasonally adjusted data showing the effect of the outlier
gas_aus_production_new |>
model(
classical_decomposition(Gas, type = "multiplicative")
) |>
components() |>
ggplot(aes(x = Quarter)) +
geom_line(aes(y = Gas, color = "Original")) +
geom_line(aes(y = season_adjust, color = "Seasonally Adjusted")) +
labs(title = "Original vs Seasonally Adjusted Gas Production(outlier in the near end of the series)",
y = "Gas Production",
color = "Series") +
scale_color_manual(values = c("Original" = "blue", "Seasonally Adjusted" = "red"))
Answer: The retail time series data is indicating a changing direction trend as its on a upward trend from 1982 to 2002. Then it is decreasing till 2006 and so on.The seasonality showed the retail turnover is highest in December over the years and relatively lowest in February. Decomposing the series using X-11 revealed the key outliers that was not clearly visible previously. This is evident in the irregular component which shows spikes around 1990 to 2000.
myseries <- aus_retail |>
filter(`Series ID` == sample(aus_retail$`Series ID`,1))
autoplot(myseries,Turnover)+
labs( y = "Retail turnover in $Million AUD", title = "Australian retail trade turnover")
x11_dcmp <- myseries |>
model(x11 = X_13ARIMA_SEATS(Turnover ~ x11())) |>
components()
autoplot(x11_dcmp) +
labs(title =
"Decomposition of Australian retail trade turnover using X-11.")
Answer: