Packages

unlink("C:/Users/91976/R-4.2.2/library/00LOCK", recursive = TRUE)

library(scales)

## Warning: package 'scales' was built under R version 4.2.3

library(ggplot2)

## Warning: package 'ggplot2' was built under R version 4.2.3

library(fabletools)

## Warning: package 'fabletools' was built under R version 4.2.3

library(broom)

## Warning: package 'broom' was built under R version 4.2.3

library(fable)

## Warning: package 'fable' was built under R version 4.2.3

library(dplyr)

## Warning: package 'dplyr' was built under R version 4.2.3

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(lubridate)

## 
## Attaching package: 'lubridate'

## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union

library(ggplot2)
library(tidyr)
library(tsibble)

## Warning: package 'tsibble' was built under R version 4.2.3

## 
## Attaching package: 'tsibble'

## The following object is masked from 'package:lubridate':
## 
##     interval

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, union

library(tsibbledata)

## Warning: package 'tsibbledata' was built under R version 4.2.3

library(magrittr)

## 
## Attaching package: 'magrittr'

## The following object is masked from 'package:tidyr':
## 
##     extract

library(dplR)

## This is dplR version 1.7.4.
## dplR is part of openDendro https://opendendro.org.
## New users can visit https://opendendro.github.io/dplR-workshop/ to get started.

library(forecast)

## Warning: package 'forecast' was built under R version 4.2.3

## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo

library(tsibble)
library(dplyr)
library(ggplot2)
library(feasts)

## Warning: package 'feasts' was built under R version 4.2.3

library(fable)
library(fabletools)
library(broom)

Chapter 7

Question 1

# Load the data
vic_elec <- tsibbledata::vic_elec

# Filter and summarize the data for January 2014
jan14_vic_elec <- vic_elec %>%
  filter(yearmonth(Time) == yearmonth("2014 Jan")) %>%
  index_by(Date = as_date(Time)) %>%
  summarise(
    Demand = sum(Demand),
    Temperature = max(Temperature)
  )


# Plot Demand and Temperature over time
jan14_vic_elec %>%
  gather("Variable", "Value", Demand, Temperature) %>%
  ggplot(aes(x = Date, y = Value)) +
  geom_line() +
  facet_grid(vars(Variable), scales = "free_y") +
  labs(title = "Electricity Demand and Temperature in January 2014") +
  theme(plot.title = element_text(hjust = 0.5))

# Another scatter plot of Demand vs Temperature without a regression line
jan14_vic_elec %>%
  ggplot(aes(x = Temperature, y = Demand)) +
  geom_point()

# Scatter plot of Demand vs Temperature with a linear regression line
jan14_vic_elec %>%
  ggplot(aes(x = Temperature, y = Demand)) +
  geom_point() +
  geom_smooth(method = "lm", se = FALSE) +
  labs(title = "Electricity Demand vs Temperature") +
  theme(plot.title = element_text(hjust = 0.5))

## `geom_smooth()` using formula = 'y ~ x'

# Fitting the linear model using lm for easier residual diagnostics
fit <- lm(Demand ~ Temperature, data = jan14_vic_elec)

# Producing a residual plot using broom and ggplot2
augmented_fit <- augment(fit)

# Residual plot
augmented_fit %>%
  ggplot(aes(x = .fitted, y = .resid)) +
  geom_point() +
  geom_hline(yintercept = 0, linetype = "dashed") +
  labs(title = "Residual Plot", x = "Fitted Values", y = "Residuals") +
  theme(plot.title = element_text(hjust = 0.5))

# Fit a linear model using tslm
fit <- jan14_vic_elec %>% model(tslm = TSLM(Demand ~ Temperature))

# Extract the residuals
residuals <- fit %>% augment() %>% pull(.innov)

# Create a time index for residuals
residuals_index <- jan14_vic_elec$Date

# Create a data frame with residuals and time index
residuals_df <- data.frame(Date = residuals_index, Residuals = residuals)

ggplot(residuals_df, aes(x = Date, y = Residuals)) +
  geom_line() +
  labs(title = "Innovation Residuals Plot") +
  theme(plot.title = element_text(hjust = 0.5))

#B. Produce a residual plot. Is the model adequate? Are there any outliers or influential observations?
  
res_test <- jan14_vic_elec %>% 
  model(TSLM(Demand ~ Temperature))
res_test %>% 
  gg_tsresiduals()

#The residuals from this section of the data indicate that the model is missing a trend, even though examining a broader data window reveals this isn't an actual pattern. Additionally, the significant variability points to the presence of outliers in the data.


# Model and forecast for Temperature = 15
model_15 <- jan14_vic_elec %>%
  model(tslm_15 = TSLM(Demand ~ Temperature))

forecast_15 <- model_15 %>%
  forecast(new_data(jan14_vic_elec, 1) %>% mutate(Temperature = 15))

# Model and forecast for Temperature = 35
model_35 <- jan14_vic_elec %>%
  model(tslm_35 = TSLM(Demand ~ Temperature))

forecast_35 <- model_35 %>%
  forecast(new_data(jan14_vic_elec, 1) %>% mutate(Temperature = 35))

p <- jan14_vic_elec %>%
  autoplot(Demand) +
  autolayer(forecast_15, series = "Forecast (Temp 15)", PI = FALSE, colour = "blue") +
  autolayer(forecast_35, series = "Forecast (Temp 35)", PI = FALSE, colour = "red") +
  labs(
    title = "Electricity Demand Forecasts",
    subtitle = "Forecasting Demand for Temperature Scenarios",
    x = "Time",
    y = "Demand"
  ) +
  theme_minimal() +
  theme(
    plot.title = element_text(hjust = 0.5, size = 20, face = "bold"),
    plot.subtitle = element_text(hjust = 0.5, size = 15),
    legend.title = element_blank(),
    legend.position = "bottom"
  ) +
  scale_color_manual(values = c("Forecast (Temp 15)" = "blue", "Forecast (Temp 35)" = "red"))

## Warning in layer_slabinterval(data = data, mapping = mapping, stat =
## StatInterval, : Ignoring unknown parameters: `series` and `PI`

## Warning in ggplot2::geom_point(mapping = without(mapping, "linetype"), data =
## unpack_data(object[single_row[["TRUE"]], : Ignoring unknown parameters:
## `series` and `PI`

## Warning in layer_slabinterval(data = data, mapping = mapping, stat =
## StatInterval, : Ignoring unknown parameters: `series` and `PI`

## Warning in ggplot2::geom_point(mapping = without(mapping, "linetype"), data =
## unpack_data(object[single_row[["TRUE"]], : Ignoring unknown parameters:
## `series` and `PI`

## Scale for colour_ramp is already present.
## Adding another scale for colour_ramp, which will replace the existing scale.

print(p)

## Warning: Computation failed in `stat_interval()`.
## Caused by error in `trans$transform()`:
## ! `transform_date()` works with objects of class <Date> only

## Warning: Computation failed in `stat_interval()`.
## Caused by error in `trans$transform()`:
## ! `transform_date()` works with objects of class <Date> only

## Warning in min(x, na.rm = na.rm): no non-missing arguments to min; returning
## Inf

## Warning in max(x, na.rm = na.rm): no non-missing arguments to max; returning
## -Inf

## Warning in min(x, na.rm = na.rm): no non-missing arguments to min; returning
## Inf

## Warning in max(x, na.rm = na.rm): no non-missing arguments to max; returning
## -Inf

## Warning in min(x, na.rm = na.rm): no non-missing arguments to min; returning
## Inf

## Warning in max(x, na.rm = na.rm): no non-missing arguments to max; returning
## -Inf

## Warning: No shared levels found between `names(values)` of the manual scale and the
## data's colour values.

#D. Give prediction intervals for your forecasts.

fit <- lm(Demand ~ Temperature, data=jan14_vic_elec)

forecast(fit, newdata=data.frame(Temperature=c(15,35)))

##   Point Forecast    Lo 80    Hi 80     Lo 95    Hi 95
## 1       151398.4 117127.2 185669.5  97951.22 204845.5
## 2       274484.2 241333.0 307635.5 222783.69 326184.8

#E. Plot Demand vs Temperature for all of the available data in vic_elec aggregated to daily total demand and maximum temperature. What does this say about your model?
  
plot(Demand~Temperature, data=vic_elec, main= "Demand vs. Temp")

#The relationship is not entirely linear, as the 'U' shape suggests likely due to increased heating-related electricity demand at lower temperatures. This non-linearity can distort a linear regression model, resulting in inaccurate forecasts. This could explain the unusual value when forecasting for 15°C.

residuals <- fit %>% augment() %>% pull(.resid)

# Create a time series object
residuals_ts <- ts(residuals)

# Plot ACF
Acf(residuals_ts, main = "ACF of Residuals")

##The forecasts seem reasonable. However we should be aware that there is not much data to support the forecasts at these temperature extremes, especially in that no daily maximum below 20∘C is observed during January (a summer month in Victoria).

Question 4

#The data set souvenirs concerns the monthly sales figures of a shop which opened in January 1987 and sells gifts, souvenirs, and novelties. The shop is situated on the wharf at a beach resort town in Queensland, Australia. The sales volume varies with the seasonal population of tourists. There is a large influx of visitors to the town at Christmas and for the local surfing festival, held every March since 1988. Over time, the shop has expanded its premises, range of products, and staff.

library(forecast)

data("souvenirs", package = "fpp2")

## Warning in data("souvenirs", package = "fpp2"): data set 'souvenirs' not found

library(fpp3)

## ── Attaching packages ─────────────────────────────────────── fpp3 0.5.0.9000 ──

## ✔ tibble 3.2.1

## Warning: package 'tibble' was built under R version 4.2.3

## ── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ──
## ✖ lubridate::date()    masks base::date()
## ✖ magrittr::extract()  masks tidyr::extract()
## ✖ dplyr::filter()      masks stats::filter()
## ✖ tsibble::intersect() masks base::intersect()
## ✖ tsibble::interval()  masks lubridate::interval()
## ✖ dplyr::lag()         masks stats::lag()
## ✖ tsibble::setdiff()   masks base::setdiff()
## ✖ tsibble::union()     masks base::union()

souvenirs

## # A tsibble: 84 x 2 [1M]
##       Month Sales
##       <mth> <dbl>
##  1 1987 Jan 1665.
##  2 1987 Feb 2398.
##  3 1987 Mar 2841.
##  4 1987 Apr 3547.
##  5 1987 May 3753.
##  6 1987 Jun 3715.
##  7 1987 Jul 4350.
##  8 1987 Aug 3566.
##  9 1987 Sep 5022.
## 10 1987 Oct 6423.
## # ℹ 74 more rows

autoplot(souvenirs)+ labs(title = "Time Series of Australian Gas Production") +
  theme(plot.title = element_text(hjust = 0.5))

## Plot variable not specified, automatically selected `.vars = Sales`

#We have identified a prominent seasonal pattern in the dataset. Specifically, there is a notable surge in sales during the peak Christmas season, particularly in November and December, followed by a decline in January. Additionally, there is a minor increase in sales in March, coinciding with the Surfing festival, except for the year 1987. These fluctuations appear to amplify as the time series progresses, suggesting a positive upward trend. As the magnitude of the pattern increases with the level, it is essential to consider implementing a log transformation of this variable to stabilize both the pattern and its variance. shorten this

#B. Explain why it is necessary to take logarithms of these data before fitting a model.


souv_num <- souvenirs
souv_num$Sales <- as.numeric(souvenirs$Sales)
hist(souv_num$Sales)

souv_num_log <- souv_num
souv_num_log$Sales <- log(souv_num$Sales)
hist(souv_num_log$Sales)

#Applied log transformations to the data to better approximate a normal distribution.

# Question 4.c
#Fit a regression model to the logarithms of these sales data with a linear trend, seasonal dummies and a “surfing festival” dummy variable.


# Fit a regression model to the sales data
sales_model <- souvenirs %>%
  mutate(festival = month(Month) == 3 & year(Month) != 1987) |>
  model(regression_model = TSLM(log(Sales) ~ trend() + season() + festival))


# Create a plot of the sales data overlaid with fitted values from the model
sales_plot <- souvenirs |>
  autoplot(Sales, col = "Red") +
  geom_line(data = augment(sales_model), aes(y = .fitted), col = "Green")

sales_plot

# Question 4.d
#Plot the residuals against time and against the fitted values. Do these plots reveal any problems with the model?
  
sales_model %>% gg_tsresiduals()

# Scatter plot
augment(sales_model) %>% 
  ggplot(aes(x = .fitted, y = .innov)) + 
  geom_point(color = "dodgerblue", alpha = 0.6) + 
  scale_x_log10() +
  labs(
    title = "Residuals vs Fitted Values",
    x = "Fitted Values (log scale)",
    y = "Residuals",
    caption = "Data Source: Sales Model"
  ) +
  theme_minimal() +
  theme(
    plot.title = element_text(hjust = 0.5, face = "bold", size = 14),
    axis.title.x = element_text(face = "bold"),
    axis.title.y = element_text(face = "bold"),
    plot.caption = element_text(hjust = 0, face = "italic"),
    panel.grid.major = element_line(color = "gray", linetype = "dashed"),
    panel.grid.minor = element_line(color = "lightgray", linetype = "dashed")
  )

#The residuals plotted against time seems random in the left part but in the right part it seems not random and has an increasing trend from 1991 to 1994. So I can’t tell it is unbiased or homoscedastic. And the residuals values vary from -0.03 to 0.03.
#The residuals plotted against the fitted values shows random points from -0.03 to 0.03 so that from this plot the model seems to be unbiased and homoscedastic.

#Question 4.e
#Do boxplots of the residuals for each month. Does this reveal any problems with the model?


library(ggplot2)
library(dplyr)
library(lubridate)

augment(sales_model) %>%
  mutate(month = lubridate::month(Month, label = TRUE, abbr = TRUE)) %>%
  ggplot(aes(x = month, y = .innov)) +
  geom_boxplot(fill = "skyblue", color = "darkblue", outlier.color = "red", outlier.shape = 19) +
  labs(
    title = "Innovations by Month",
    x = "Month",
    y = "Innovation Value",
    caption = "Data Source: Sales Model"
  ) +
  theme_minimal() +
  theme(
    plot.title = element_text(hjust = 0.5, face = "bold", size = 14),
    axis.title.x = element_text(face = "bold"),
    axis.title.y = element_text(face = "bold"),
    axis.text.x = element_text(angle = 45, hjust = 1),
    plot.caption = element_text(hjust = 0, face = "italic")
  )

#question 4.f
#What do the values of the coefficients tell you about each variable?

tidy(sales_model) %>% mutate(pceffect = (exp(estimate) - 1))

## # A tibble: 14 × 7
##    .model           term          estimate std.error statistic  p.value pceffect
##    <chr>            <chr>            <dbl>     <dbl>     <dbl>    <dbl>    <dbl>
##  1 regression_model (Intercept)     7.62    0.0742      103.   4.67e-78  2.04e+3
##  2 regression_model trend()         0.0220  0.000827     26.6  2.32e-38  2.23e-2
##  3 regression_model season()year2   0.251   0.0957        2.63 1.06e- 2  2.86e-1
##  4 regression_model season()year3   0.266   0.193         1.38 1.73e- 1  3.05e-1
##  5 regression_model season()year4   0.384   0.0957        4.01 1.48e- 4  4.68e-1
##  6 regression_model season()year5   0.409   0.0957        4.28 5.88e- 5  5.06e-1
##  7 regression_model season()year6   0.449   0.0958        4.69 1.33e- 5  5.66e-1
##  8 regression_model season()year7   0.610   0.0958        6.37 1.71e- 8  8.41e-1
##  9 regression_model season()year8   0.588   0.0959        6.13 4.53e- 8  8.00e-1
## 10 regression_model season()year9   0.669   0.0959        6.98 1.36e- 9  9.53e-1
## 11 regression_model season()year…   0.747   0.0960        7.79 4.48e-11  1.11e+0
## 12 regression_model season()year…   1.21    0.0960       12.6  1.29e-19  2.34e+0
## 13 regression_model season()year…   1.96    0.0961       20.4  3.39e-31  6.12e+0
## 14 regression_model festivalTRUE    0.502   0.196         2.55 1.29e- 2  6.51e-1

#Each month shows a positive and statistically significant impact compared to January, the reference month. Except for March, all months have positive coefficients, likely due to the dummy variable capturing a specific increase in March. Sales in November and December are about 1.2 and 1.9 times higher than in January, respectively. The March dummy variable, indicating a surfing festival, significantly impacts sales, causing a 0.5 unit change. Additionally, there's a significant upward trend, though with a smaller-than-expected coefficient.  

# Question 4.g
#What does the Ljung-Box test tell you about your model?
  
augment(sales_model) %>%
  features(.innov, ljung_box, lag = 24)

## # A tibble: 1 × 3
##   .model           lb_stat lb_pvalue
##   <chr>              <dbl>     <dbl>
## 1 regression_model    112.  2.15e-13

##The residuals exhibit a significant level of serial correlation.


# Question 4.h
##Regardless of your answers to the above questions, use your regression model to predict the monthly sales for 1994, 1995, and 1996. Produce prediction intervals for each of your forecasts.

predicted_souvenirs <- new_data(souvenirs, n = 36) %>%
  mutate(festival = month(Month) == 3)
sales_model %>%
  forecast(new_data = predicted_souvenirs) %>%
  autoplot(souvenirs)

#The model can be improved by addressing the nonlinearity of the trend and dealing with heteroscedasticity. The current model may be overfitting, as indicated by high total sales numbers. While seasonality is well captured, the trend appears too steep. Adding more variables and correcting the trend's nonlinearity could enhance the model.

Question 5

library(fpp3)

us_gasoline %>% autoplot(Barrels)

us_gasoline %>% 
  autoplot(Barrels) +
  labs(
    title = "Weekly Gasoline Consumption in the US",
    x = "Week",
    y = "Barrels"
  ) +
  theme_minimal() +
  theme(
    plot.title = element_text(face = "bold", size = 14),
    axis.title = element_text(face = "bold", size = 12),
    axis.text = element_text(size = 10)
  )

library(fable)
library(ggplot2)

# Define the time series data
us_gasoline <- us_gasoline
gasoline_2004 <- ts(us_gasoline$Barrels, start = (1991), end = (2017), frequency = 52)
gas04 <- window(gasoline_2004, end = 2005)

# Loop through different numbers of Fourier terms
for (num in c(1, 5, 10, 20)) {
  var_name <- paste("fit", as.character(num), "_gasoline_2004", sep = "")
  
  # Fit the model
  assign(var_name,
         tslm(gas04 ~ trend + fourier(gas04, K = num)))
  
  # Plot the data and fitted values
  print(
    autoplot(gas04) +
      autolayer(get(var_name)$fitted.values,
                series = as.character(num)) +
      ggtitle(paste("Gasoline Consumption - Fourier Terms:", num)) +
      ylab("Barrels") +
      guides(colour = guide_legend(title = "Fourier Terms")) +
      theme_minimal() +
      theme(
        plot.title = element_text(face = "bold", size = 14),
        axis.title = element_text(face = "bold", size = 12),
        axis.text = element_text(size = 10),
        legend.title = element_text(face = "bold", size = 10),
        legend.text = element_text(size = 10),
        panel.background = element_rect(fill = "lightblue", color = "black"),
        panel.grid.major = element_line(color = "gray"),
        panel.grid.minor = element_blank(),
        plot.background = element_rect(fill = "lightgray")
      )
  )
}

library(fable)
library(ggplot2)

# Define the time series data
us_gasoline <- us_gasoline
gasoline_2004 <- ts(us_gasoline$Barrels, start = (1991), end = (2017), frequency = 52)
gas04 <- window(gasoline_2004, end = 2005)

# Loop through different numbers of Fourier terms
for (num in c(1, 5, 10, 20)) {
  var_name <- paste("fit", as.character(num), "_gasoline_2004", sep = "")
  
  # Fit the model
  assign(var_name,
         tslm(gas04 ~ trend + fourier(gas04, K = num)))
  
  # Plot the data and fitted values
  print(
    autoplot(gas04) +
      autolayer(get(var_name)$fitted.values,
                series = as.character(num)) +
      ggtitle(paste("Gasoline Consumption - Fourier Terms:", num)) +
      ylab("Barrels") +
      guides(colour = guide_legend(title = "Fourier Terms")) +
      theme_minimal() +
      theme(
        plot.title = element_text(face = "bold", size = 14),
        axis.title = element_text(face = "bold", size = 12),
        axis.text = element_text(size = 10),
        legend.title = element_text(face = "bold", size = 10),
        legend.text = element_text(size = 10),
        panel.background = element_rect(fill = "lightblue", color = "black"),
        panel.grid.major = element_line(color = "gray"),
        panel.grid.minor = element_blank(),
        plot.background = element_rect(fill = "lightgray")
      )
  )
}

#Question 5b

us_gas3 <- us_gasoline |> filter(year(Week) <= 2004)
us_gas3 |> autoplot(Barrels)

fit <- us_gas3 |>
  model(
    fourier1 = TSLM(Barrels ~ trend() + fourier(K = 1)),
    fourier2 = TSLM(Barrels ~ trend() + fourier(K = 2)),
    fourier3 = TSLM(Barrels ~ trend() + fourier(K = 3)),
    fourier4 = TSLM(Barrels ~ trend() + fourier(K = 4)),
    fourier5 = TSLM(Barrels ~ trend() + fourier(K = 5)),
    fourier6 = TSLM(Barrels ~ trend() + fourier(K = 6)),
    fourier7 = TSLM(Barrels ~ trend() + fourier(K = 7)),
    fourier8 = TSLM(Barrels ~ trend() + fourier(K = 8)),
    fourier9 = TSLM(Barrels ~ trend() + fourier(K = 9)),
    fourier10 = TSLM(Barrels ~ trend() + fourier(K = 10)),
    fourier11 = TSLM(Barrels ~ trend() + fourier(K = 11)),
    fourier12 = TSLM(Barrels ~ trend() + fourier(K = 12)),
    fourier13 = TSLM(Barrels ~ trend() + fourier(K = 13)),
    fourier14 = TSLM(Barrels ~ trend() + fourier(K = 14)),
    fourier15 = TSLM(Barrels ~ trend() + fourier(K = 15))
  )
glance(fit) |>
  arrange(AICc) |>
  select(.model, AICc, CV)

## # A tibble: 15 × 3
##    .model      AICc     CV
##    <chr>      <dbl>  <dbl>
##  1 fourier7  -1887. 0.0740
##  2 fourier11 -1885. 0.0742
##  3 fourier8  -1885. 0.0743
##  4 fourier12 -1884. 0.0742
##  5 fourier6  -1884. 0.0744
##  6 fourier9  -1882. 0.0745
##  7 fourier10 -1881. 0.0746
##  8 fourier13 -1881. 0.0745
##  9 fourier14 -1879. 0.0748
## 10 fourier15 -1876. 0.0750
## 11 fourier5  -1862. 0.0766
## 12 fourier4  -1856. 0.0773
## 13 fourier3  -1853. 0.0777
## 14 fourier2  -1814. 0.0819
## 15 fourier1  -1809. 0.0825

##Based on the AICc values, the model fourier7 has the lowest AICc value, 
#suggesting it might be the best model among the ones considered for the data filtered up to 2004.


# 5.c
#Check the residuals of the final model using the gg_tsresiduals() function. 
#Use a Ljung-Box test to check for residual autocorrelation.

fit %>%
  select(fourier7) %>%
  gg_tsresiduals() +
  labs(title = "Residuals of Model with Fourier (K = 7)", x = "Week", y = "Residuals")

# Perform Ljung-Box test for autocorrelation
fit %>%
  augment() %>%
  filter(.model == "fourier7") %>%
  features(.innov, ljung_box, lag = 52)

## # A tibble: 1 × 3
##   .model   lb_stat lb_pvalue
##   <chr>      <dbl>     <dbl>
## 1 fourier7    56.9     0.299

##It is important to acknowledge the presence of a significant issue related to
#type I error. The results of the Ljung-Box test indicate that it is not 
#significant. Even if the Ljung-Box test had produced significant results, 
#it is worth noting that the correlations are very small, and as a result, 
#any impact on the forecasts and prediction intervals would be minimal.

#5.D

#Generate forecasts for the next year of data. Plot these along with the actual data for 2005. Comment on the forecasts.


fc_gasoline_2005 <- forecast(
  fit20_gasoline_2004,
  newdata=data.frame(fourier(
    gasoline_2004, K = 20, h = 52)
  )
)


autoplot(fc_gasoline_2005) +
  autolayer(window(
    gasoline_2004,
    start = 2004,
    end = 2006
  )
  ) +
  scale_x_continuous(limits = c(2004, 2006)) +
  guides(colour = guide_legend(title = "Fourier Transform pairs"))

## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.

## Warning: Removed 676 rows containing missing values or values outside the scale range
## (`geom_line()`).

##The accuracy of the forecasts was consistent in the initial six months but 
#became less reliable afterward. Overall, the forecasts were on target, 
#with the noticeable exception of a sharp decline shortly after 2006. 
#It's worth noting that this drop was still within the lower bounds of the 
#forecasts, indicating that the forecasts were able to account for it.

Question 6

#The annual population of Afghanistan is available in the global_economy data set.

global_economy %>%
  filter(Country=="Afghanistan") %>%
  tsibble(key = Code, index = Year)%>%
  autoplot(Population, show.legend =  FALSE)

#Between 1960 and 1980, the population exhibited a gradual increase, followed by a decline during the Soviet-Afghan war (24 Dec 1979 – 15 Feb 1989), and has since experienced a relatively rapid growth. Over the past three decades, there has been an observable nearly linear progression in population growth.

#b. Fit a linear trend model and compare this to a piecewise linear trend model with knots at 1980 and 1989.

global_economy %>%
  filter(Country=="Afghanistan")%>%
  model(TSLM(Population ~ Year)) %>%
  report()

## Series: Population 
## Model: TSLM 
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -5794518 -2582559   744761  2259222  6036280 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -829292529   49730866  -16.68   <2e-16 ***
## Year            425774      25008   17.02   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3188000 on 56 degrees of freedom
## Multiple R-squared: 0.8381,  Adjusted R-squared: 0.8352
## F-statistic: 289.9 on 1 and 56 DF, p-value: < 2.22e-16

global_economy %>%
  filter(Country=="Afghanistan")%>%
  filter(Year<1980)%>%
  model(TSLM(Population ~ Year)) %>%
  report()

## Series: Population 
## Model: TSLM 
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -146380.8 -110290.6    -451.2  105877.8  202881.3 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -470734527    8787062  -53.57   <2e-16 ***
## Year            244657       4462   54.84   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 115100 on 18 degrees of freedom
## Multiple R-squared: 0.994,   Adjusted R-squared: 0.9937
## F-statistic:  3007 on 1 and 18 DF, p-value: < 2.22e-16

global_economy %>%
  filter(Country=="Afghanistan")%>%
  filter(Year>1989)%>%
  model(TSLM(Population ~ Year)) %>%
  report()

## Series: Population 
## Model: TSLM 
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -619234 -212927    6598  234280  612277 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -1.670e+09  1.640e+07  -101.8   <2e-16 ***
## Year         8.451e+05  8.184e+03   103.3   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 349800 on 26 degrees of freedom
## Multiple R-squared: 0.9976,  Adjusted R-squared: 0.9975
## F-statistic: 1.066e+04 on 1 and 26 DF, p-value: < 2.22e-16

# c. Generate forecasts from these two models for the five years after the end of the data, and comment on the results.

library(tsibble)
library(dplyr)
library(fable)
library(ggplot2)

# Fit the models
m1 <- global_economy %>%
  filter(Country == "Afghanistan") %>%
  model(TSLM(Population ~ Year))

m2 <- global_economy %>%
  filter(Country == "Afghanistan") %>%
  filter(Year > 1989) %>%
  model(TSLM(Population ~ Year))

forecast(m1, h=5)

## # A fable: 5 x 5 [1Y]
## # Key:     Country, .model [1]
##   Country     .model                   Year          Population     .mean
##   <fct>       <chr>                   <dbl>              <dist>     <dbl>
## 1 Afghanistan TSLM(Population ~ Year)  2018   N(3e+07, 1.1e+13) 29919575.
## 2 Afghanistan TSLM(Population ~ Year)  2019   N(3e+07, 1.1e+13) 30345349.
## 3 Afghanistan TSLM(Population ~ Year)  2020 N(3.1e+07, 1.1e+13) 30771123.
## 4 Afghanistan TSLM(Population ~ Year)  2021 N(3.1e+07, 1.1e+13) 31196897.
## 5 Afghanistan TSLM(Population ~ Year)  2022 N(3.2e+07, 1.1e+13) 31622671.

forecast(m2, h=5)

## # A fable: 5 x 5 [1Y]
## # Key:     Country, .model [1]
##   Country     .model                   Year          Population     .mean
##   <fct>       <chr>                   <dbl>              <dist>     <dbl>
## 1 Afghanistan TSLM(Population ~ Year)  2018 N(3.6e+07, 1.4e+11) 35925602.
## 2 Afghanistan TSLM(Population ~ Year)  2019 N(3.7e+07, 1.4e+11) 36770747.
## 3 Afghanistan TSLM(Population ~ Year)  2020 N(3.8e+07, 1.4e+11) 37615892.
## 4 Afghanistan TSLM(Population ~ Year)  2021 N(3.8e+07, 1.5e+11) 38461037.
## 5 Afghanistan TSLM(Population ~ Year)  2022 N(3.9e+07, 1.5e+11) 39306182.

# 6.c
#Generate forecasts from these two models for the five years after the end of the data, and comment on the results.

library(tsibble)
library(dplyr)
library(fable)
library(ggplot2)

# Fit the model
fit <- global_economy %>%
  filter(Country == "Afghanistan") %>%
  model(TSLM(Population ~ Year))

# Generate forecasts for 5 years ahead
fc <- fit %>%
  forecast(h = "5 years")

# Plotting historical data and forecasts
autoplot(fc) +
  autolayer(global_economy %>%
              filter(Country == "Afghanistan"),
            Population,
            series = "Historical Data",
            color = "#4e79a7",  # Historical data color
            size = 1.2) +  # Line thickness
  labs(
    title = "Population Forecast for Afghanistan",
    x = "Year",
    y = "Population"
  ) +
  theme_minimal() +
  theme(
    plot.title = element_text(size = 18, face = "bold", hjust = 0.5), 
    
    axis.title = element_text(size = 14),  
    legend.position = "bottom",  
    legend.text = element_text(size = 12),  
    panel.grid.major = element_line(color = "#f0f0f0"),  
    panel.background = element_rect(fill = "white"),  
    plot.background = element_rect(fill = "#f0f0f0"), 
    axis.line = element_line(color = "black") 
  ) +
  guides(color = guide_legend(title = "Series"))

## Warning in geom_line(eval_tidy(expr(aes(!!!aes_spec))), data = object, ..., :
## Ignoring unknown parameters: `series`

#The linear model exhibits noticeable inadequacies, characterized by overly wide prediction intervals and conservative point forecasts. Conversely, the piecewise linear model appears visually appealing, yet its prediction intervals may be excessively narrow. This model assumes a consistent continuation of trends post the latest knot, a premise that lacks realism.

Chapter 8

Question 6

##Forecast the Chinese GDP from the global_economy data set using an ETS model. Experiment with the various options in the ETS() function to see how much the forecasts change with damped trend, or with a Box-Cox transformation. Try to develop an intuition of what each is doing to the forecasts. [Hint: use a relatively large value of h when forecasting, so you can clearly see the differences between the various options when plotting the forecasts.]

global_economy

## # A tsibble: 15,150 x 9 [1Y]
## # Key:       Country [263]
##    Country     Code   Year         GDP Growth   CPI Imports Exports Population
##    <fct>       <fct> <dbl>       <dbl>  <dbl> <dbl>   <dbl>   <dbl>      <dbl>
##  1 Afghanistan AFG    1960  537777811.     NA    NA    7.02    4.13    8996351
##  2 Afghanistan AFG    1961  548888896.     NA    NA    8.10    4.45    9166764
##  3 Afghanistan AFG    1962  546666678.     NA    NA    9.35    4.88    9345868
##  4 Afghanistan AFG    1963  751111191.     NA    NA   16.9     9.17    9533954
##  5 Afghanistan AFG    1964  800000044.     NA    NA   18.1     8.89    9731361
##  6 Afghanistan AFG    1965 1006666638.     NA    NA   21.4    11.3     9938414
##  7 Afghanistan AFG    1966 1399999967.     NA    NA   18.6     8.57   10152331
##  8 Afghanistan AFG    1967 1673333418.     NA    NA   14.2     6.77   10372630
##  9 Afghanistan AFG    1968 1373333367.     NA    NA   15.2     8.90   10604346
## 10 Afghanistan AFG    1969 1408888922.     NA    NA   15.0    10.1    10854428
## # ℹ 15,140 more rows

china <- global_economy |>
  filter(Country == "China")
china |> autoplot(GDP)

china |> autoplot(GDP)

lambda <- global_economy %>%
  filter(Country == "China") %>%
  features(GDP, features = guerrero) %>%
  pull(lambda_guerrero)

fit_ch <- global_economy %>%
  filter(Country == "China") %>%
  model(
    `ETS` = ETS(GDP ~ error("A") + trend("A") + season("N")),
    `ETS Box-Cox` = ETS(box_cox(GDP, lambda) ~ error("A") + trend("A") + season("N")),
    `ETS Damped` = ETS(GDP ~ error("A") + trend("Ad", phi = 0.8) + season("N")),
    `ETS Log` = ETS(log(GDP) ~ error("A") + trend("A") + season("N"))
  )

fc_ch <- fit_ch %>%
  forecast(h = 15)

fc_ch %>%
  autoplot(global_economy, level = NULL) +
  labs(title = "GDP Forecast for China") +
  guides(colour = guide_legend(title = "Forecast"))

#The damped and ETS models exhibit similar growth trajectories for Chinese GDP, while the log and Box-Cox transformations significantly alter their forecasts. The log transformation performs better than Box-Cox with lambda 0.2, demonstrating considerable forecast amplification. Chinese GDP, characterized by a rising trend without seasonal patterns, benefits from models using additive errors and non-seasonal specifications ("N").

Question 7

#Find an ETS model for the Gas data from aus_production and forecast the next few years. Why is multiplicative seasonality necessary here? Experiment with making the trend damped. Does it improve the forecasts?

aus_production %>% autoplot(Gas) +
  labs(title = "Australian Gas Production") + 
  ylab("Production (in bcf)") + 
  xlab("Quarter") + theme(plot.title = element_text(hjust = 0.5))

gas_ts <- ts(aus_production$Gas, frequency = 4)
gas_ets <- ets(gas_ts, model = "MAM")

gas_f <- gas_ets %>% forecast(h=12)
gas_f %>% autoplot() +
  labs(title = "Australian Gas Production with Forecast") + 
  ylab("Production (in bcf)") + 
  xlab("Quarter") + theme(plot.title = element_text(hjust = 0.5))

gas_ets_damped <- ets(gas_ts, model = "MAM", damped = TRUE)
gas_ets_damped_f <- gas_ets_damped %>% forecast(h = 12)
gas_ets_damped_f %>% autoplot() +
  labs(title = "Australian Gas Production with Damped Forecast") + 
  ylab("Production (in bcf)") + 
  xlab("Quarter") + theme(plot.title = element_text(hjust = 0.5))

#The non-damped model appears to perform slightly better in this context, likely due to the pronounced strength of the trend observed across most of the historical data.

gas_ets

## ETS(M,A,M) 
## 
## Call:
##  ets(y = gas_ts, model = "MAM") 
## 
##   Smoothing parameters:
##     alpha = 0.6529 
##     beta  = 0.1442 
##     gamma = 0.0978 
## 
##   Initial states:
##     l = 5.9456 
##     b = 0.0706 
##     s = 0.9309 1.1779 1.0749 0.8163
## 
##   sigma:  0.057
## 
##      AIC     AICc      BIC 
## 1680.929 1681.794 1711.389

gas_ets_damped

## ETS(M,Ad,M) 
## 
## Call:
##  ets(y = gas_ts, model = "MAM", damped = TRUE) 
## 
##   Smoothing parameters:
##     alpha = 0.6489 
##     beta  = 0.1551 
##     gamma = 0.0937 
##     phi   = 0.98 
## 
##   Initial states:
##     l = 5.8589 
##     b = 0.0994 
##     s = 0.9282 1.1779 1.0768 0.8171
## 
##   sigma:  0.0573
## 
##      AIC     AICc      BIC 
## 1684.028 1685.091 1717.873

#Multiplicative seasonality is essential due to the growing seasonal fluctuations.

Chapter 9

Question 10

#10.a
#Produce an STL decomposition of the data and describe the trend and seasonality.

us_employment %>% filter(Title == "Mining and Logging") %>%
  model(STL(Employed)) %>%
  components() %>% autoplot()

#b. Do the data need transforming? If so, find a suitable transformation.

us_employment %>% filter(Title == "Mining and Logging") %>%
  model(STL(log(Employed))) %>%
  components() %>% autoplot()

#C
#Are the data stationary? If not, find an appropriate differencing which yields stationary data.

us_employment %>% filter(Title == "Mining and Logging")  %>%
  autoplot(Employed)

us_employment %>% filter(Title == "Mining and Logging") %>%
  features(Employed, unitroot_kpss)

## # A tibble: 1 × 3
##   Series_ID     kpss_stat kpss_pvalue
##   <chr>             <dbl>       <dbl>
## 1 CEU1000000001      2.83        0.01

us_employment %>% filter(Title == "Mining and Logging") %>%
  features(Employed, unitroot_ndiffs)

## # A tibble: 1 × 2
##   Series_ID     ndiffs
##   <chr>          <int>
## 1 CEU1000000001      1

us_employment %>% filter(Title == "Mining and Logging") %>%
  mutate(log_employed = difference(log(Employed), 12)) %>%
  features(log_employed, unitroot_ndiffs)

## # A tibble: 1 × 2
##   Series_ID     ndiffs
##   <chr>          <int>
## 1 CEU1000000001      0

#D Identify a couple of ARIMA models that might be useful in describing the time series. Which of your models is the best according to their AICc values? The stepwise and auto produce the same results (same AIC values) so either model could be chosen.

us_employment %>%
  filter(Title == "Mining and Logging")  %>%
  gg_tsdisplay(difference(Employed), plot_type='partial')

## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_line()`).

## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).

fit <- us_employment %>%
  filter(Title == "Mining and Logging") %>%
  model(arima310 = ARIMA(Employed ~ pdq(3,1,0)),
        arima210 = ARIMA(Employed ~ pdq(2,1,0)),
        stepwise = ARIMA(Employed),
        auto = ARIMA(Employed, stepwise=FALSE))
report (fit)

## Warning in report.mdl_df(fit): Model reporting is only supported for individual
## models, so a glance will be shown. To see the report for a specific model, use
## `select()` and `filter()` to identify a single model.

## # A tibble: 4 × 9
##   Series_ID     .model   sigma2 log_lik   AIC  AICc   BIC ar_roots   ma_roots 
##   <chr>         <chr>     <dbl>   <dbl> <dbl> <dbl> <dbl> <list>     <list>   
## 1 CEU1000000001 arima310   910.  -4670. 9351. 9351. 9381. <cpl [27]> <cpl [0]>
## 2 CEU1000000001 arima210   977.  -4704. 9416. 9416. 9436. <cpl [14]> <cpl [0]>
## 3 CEU1000000001 stepwise   900.  -4664. 9342. 9342. 9376. <cpl [28]> <cpl [0]>
## 4 CEU1000000001 auto       900.  -4664. 9342. 9342. 9376. <cpl [28]> <cpl [0]>

#E) Estimate the parameters of your best model and do diagnostic testing on the residuals. Do the residuals resemble white noise? If not, try to find another ARIMA model which fits better. Generally, the data are white noise as the ACF plot shows the bars settling within the bounds. The residuals are distributed normally, with some outliers.


fit2 <- us_employment %>%
  filter(Title == "Mining and Logging") %>%
  model(ARIMA(Employed)) %>%
  report(fit2)

## Series: Employed 
## Model: ARIMA(4,1,0)(2,0,0)[12] 
## 
## Coefficients:
##           ar1      ar2      ar3     ar4    sar1    sar2
##       -0.3209  -0.2162  -0.0969  0.1100  0.1188  0.2564
## s.e.   0.0321   0.0336   0.0343  0.0328  0.0327  0.0327
## 
## sigma^2 estimated as 900.2:  log likelihood=-4664.08
## AIC=9342.15   AICc=9342.27   BIC=9376.28

fit2 %>% gg_tsresiduals()

augment(fit2) %>% features(.innov, ljung_box, lag=10, dof=2)

## # A tibble: 1 × 4
##   Series_ID     .model          lb_stat lb_pvalue
##   <chr>         <chr>             <dbl>     <dbl>
## 1 CEU1000000001 ARIMA(Employed)    4.60     0.800

# f) Forecast the next 3 years of data. Get the latest figures from https://fred.stlouisfed.org/categories/11 to check the accuracy of your forecasts.

ml_arima <- us_employment %>%
  filter(Title == "Mining and Logging") %>%
  model(ARIMA(Employed))

ml_arima %>%
  forecast(h = "3 years") %>%
  autoplot(us_employment, level = NULL) + labs(title = "US Mining & Logging Employment Forecast") +
  theme(plot.title = element_text(hjust = 0.5))

#G) Eventually, the prediction intervals are so wide that the forecasts are not particularly useful. How many years of forecasts do you think are sufficiently accurate to be usable? I would say that 1 year would be an accurate forecast.

pred_int <- ml_arima %>%
  forecast(h = "3 years") %>%
  mutate(PI = hilo(Employed, level = 95))
pred_int

## # A fable: 36 x 6 [1M]
## # Key:     Series_ID, .model [1]
##    Series_ID     .model          Month     Employed .mean                     PI
##    <chr>         <chr>           <mth>       <dist> <dbl>                 <hilo>
##  1 CEU1000000001 ARIMA(Emplo… 2019 Oct  N(759, 900)  759. [699.8304, 817.4398]95
##  2 CEU1000000001 ARIMA(Emplo… 2019 Nov N(759, 1315)  759. [687.4222, 829.5843]95
##  3 CEU1000000001 ARIMA(Emplo… 2019 Dec N(757, 1603)  757. [678.3775, 835.3445]95
##  4 CEU1000000001 ARIMA(Emplo… 2020 Jan N(753, 1901)  753. [667.7366, 838.6365]95
##  5 CEU1000000001 ARIMA(Emplo… 2020 Feb N(756, 2390)  756. [660.2835, 851.9288]95
##  6 CEU1000000001 ARIMA(Emplo… 2020 Mar N(759, 2781)  759. [655.3179, 862.0389]95
##  7 CEU1000000001 ARIMA(Emplo… 2020 Apr N(761, 3145)  761. [651.2540, 871.0766]95
##  8 CEU1000000001 ARIMA(Emplo… 2020 May N(765, 3520)  765. [648.4519, 881.0059]95
##  9 CEU1000000001 ARIMA(Emplo… 2020 Jun N(770, 3927)  770. [646.8510, 892.4950]95
## 10 CEU1000000001 ARIMA(Emplo… 2020 Jul N(771, 4314)  771. [642.2205, 899.6761]95
## # ℹ 26 more rows

#The initial forecasts look okay, but then the pandemic had a significant downfall on the economy, that is not predicted.

#In my assumption, a couple of months, since like I mentioned, the pandemic led to a significant downfall, but in absence of pandemic, I would say 4 quarters.

Question 11

# Choose one of the following seasonal time series: the Australian production of electricity, cement, or gas (from aus_production).


aus_production %>% autoplot(Electricity) + labs(title = "Australian Electricity Production") +
  theme(plot.title = element_text(hjust = 0.5))

aus_elec <- aus_production %>% select("Quarter", "Electricity")
aus_elec$Electricity <- log(aus_elec$Electricity)
aus_elec %>% autoplot(.vars=Electricity) +
  labs(title = "Australian Electricity Production with Log Transformation") +
  theme(plot.title = element_text(hjust = 0.5))

#11.b
# Are the data stationary? If not, find an appropriate differencing which yields stationary data.

aus_elec %>%
  features(Electricity, unitroot_kpss)

## # A tibble: 1 × 2
##   kpss_stat kpss_pvalue
##       <dbl>       <dbl>
## 1      4.26        0.01

aus_elec %>% 
  features(Electricity, unitroot_ndiffs)

## # A tibble: 1 × 1
##   ndiffs
##    <int>
## 1      2

aus_elec %>%
  mutate(log_elec = difference(log(Electricity), 12)) %>%
  features(log_elec, unitroot_ndiffs)

## # A tibble: 1 × 1
##   ndiffs
##    <int>
## 1      1

# 11.c
# Identify a couple of ARIMA models that might be useful in describing the time series. Which of your models is the best according to their AIC values?

aus_elec %>%
  gg_tsdisplay(difference(Electricity), plot_type='partial')

## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_line()`).

## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).

elec_fit <- aus_elec %>%
  model(arima310 = ARIMA(Electricity ~ pdq(3,1,0)),
        arima210 = ARIMA(Electricity ~ pdq(2,1,0)),
        stepwise = ARIMA(Electricity),
        auto = ARIMA(Electricity, stepwise=FALSE))
report (elec_fit)

## Warning in report.mdl_df(elec_fit): Model reporting is only supported for
## individual models, so a glance will be shown. To see the report for a specific
## model, use `select()` and `filter()` to identify a single model.

## # A tibble: 4 × 8
##   .model     sigma2 log_lik    AIC   AICc    BIC ar_roots   ma_roots 
##   <chr>       <dbl>   <dbl>  <dbl>  <dbl>  <dbl> <list>     <list>   
## 1 arima310 0.000357    545. -1076. -1076. -1053. <cpl [11]> <cpl [4]>
## 2 arima210 0.000350    546. -1080. -1080. -1060. <cpl [6]>  <cpl [8]>
## 3 stepwise 0.000359    543. -1079. -1078. -1065. <cpl [1]>  <cpl [5]>
## 4 auto     0.000347    547. -1083. -1082. -1063. <cpl [8]>  <cpl [9]>

# D) Estimate the parameters of your best model and do diagnostic testing on the residuals. Do the residuals resemble white noise? If not, try to find another ARIMA model which fits better.

fit11d <- aus_elec %>%
  model(ARIMA(Electricity)) %>%
  report(fit11d)

## Series: Electricity 
## Model: ARIMA(1,1,1)(0,1,1)[4] 
## 
## Coefficients:
##          ar1      ma1     sma1
##       0.4537  -0.8002  -0.6075
## s.e.  0.1320   0.0969   0.0527
## 
## sigma^2 estimated as 0.0003586:  log likelihood=543.29
## AIC=-1078.59   AICc=-1078.4   BIC=-1065.14

fit11d %>% gg_tsresiduals()

augment(fit11d) %>% features(.innov, ljung_box, lag=10, dof=2)

## # A tibble: 1 × 3
##   .model             lb_stat lb_pvalue
##   <chr>                <dbl>     <dbl>
## 1 ARIMA(Electricity)    12.8     0.119

#e) Forecast the next 24 months of data using your preferred model.

elec_arima <- aus_elec %>%
  model(ARIMA(Electricity))

elec_arima %>%
  forecast(h = "2 years") %>%
  autoplot(aus_elec, level = NULL) + labs(title = "24 Month Australian Electricity Production Forecast") +
  theme(plot.title = element_text(hjust = 0.5))

#f) Forecast the next 24 months of data using your preferred model.

ets_compare11 <- aus_elec %>% model(ETS(Electricity))
report(ets_compare11)

## Series: Electricity 
## Model: ETS(M,A,A) 
##   Smoothing parameters:
##     alpha = 0.5160242 
##     beta  = 0.02948315 
##     gamma = 0.3091674 
## 
##   Initial states:
##      l[0]       b[0]        s[0]      s[-1]      s[-2]      s[-3]
##  8.328679 0.02053009 -0.03579961 0.08180292 0.02721019 -0.0732135
## 
##   sigma^2:  0
## 
##       AIC      AICc       BIC 
## -546.2358 -545.3705 -515.7754

#Compare the forecasts obtained using ETS().

ets_compare11 %>% forecast(h=24) %>%
  autoplot(aus_elec) + labs(title = "Forecasted Australian Electricity Production using ETS") +
  theme(plot.title = element_text(hjust = 0.5))

#combining the graphs together


library(fabletools)
library(ggplot2)


aus_elec_decomp <- stl(aus_elec, t.window = 13, s.window = "periodic")

aus_elec_sznadj <- seasadj(aus_elec_decomp)

aus_elec_stl <- stlf(aus_elec_sznadj, method = "arima")
aus_elec_fc <- forecast(aus_elec_stl)
plot(aus_elec_fc)

autoplot(aus_elec_fc) +
  labs(title = "Forecasted Electricity Production", y = "Production") + 
  autolayer(aus_elec_fc, series = "mean", PI = TRUE, alpha = 0.5, fill = "grey") +  # Add prediction intervals
  theme_minimal() +  # Apply a minimal theme for cleaner look
  guides(colour = FALSE)  # Remove legend for series mean

## Warning: The `<scale>` argument of `guides()` cannot be `FALSE`. Use "none" instead as
## of ggplot2 3.3.4.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

Assignment

Anuranjan Saxena

2024-05-16

Packages

Chapter 7

Question 1

Question 4

Question 5

Question 6

Chapter 8

Question 6

Question 7

Chapter 9

Question 10

Question 11