#Cleaning the environment
remove(list = ls())
#Installing necessary packages
library(fpp3)
## ── Attaching packages ────────────────────────────────────────────── fpp3 0.5 ──
## âś” tibble      3.2.1     âś” tsibble     1.1.4
## âś” dplyr       1.1.3     âś” tsibbledata 0.4.1
## âś” tidyr       1.3.0     âś” feasts      0.3.1
## âś” lubridate   1.9.3     âś” fable       0.3.3
## âś” ggplot2     3.5.0     âś” fabletools  0.4.1
## ── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ──
## âś– lubridate::date()    masks base::date()
## âś– dplyr::filter()      masks stats::filter()
## âś– tsibble::intersect() masks base::intersect()
## âś– tsibble::interval()  masks lubridate::interval()
## âś– dplyr::lag()         masks stats::lag()
## âś– tsibble::setdiff()   masks base::setdiff()
## âś– tsibble::union()     masks base::union()
library(dplyr)
library(USgas)
library(readxl)
library(forecast)
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
library(magrittr)
## 
## Attaching package: 'magrittr'
## The following object is masked from 'package:tidyr':
## 
##     extract

Chapter 1

Explore the following four time series

?(aus_production)
?(pelt)
?(gafa_stock)
?(vic_elec)

1. What is the time interval of each series?

  1. aus_production: Quarterly

  2. pelt: Annual

  3. gafa_stock: Days based

  4. vic_elec: Half-hourly based

2. Use autoplot() to produce a time plot of each series.

autoplot(aus_production,Bricks)
## Warning: Removed 20 rows containing missing values or values outside the scale range
## (`geom_line()`).

autoplot(pelt,Lynx)

autoplot(gafa_stock,Close)

autoplot(vic_elec,Demand)

3. For the last plot, modify the axis labels and title.

plot = autoplot(vic_elec,Demand)+ggtitle("Victoria, Australia Electricity Consumption Trend")
plot + labs(x= "Time(every 30 mins)")

Use filter() to find what days corresponded to the peak closing price

data(gafa_stock)
gafa_stock %>%
  group_by(Symbol) %>%
  filter(Close == max(Close))
## # A tsibble: 4 x 8 [!]
## # Key:       Symbol [4]
## # Groups:    Symbol [4]
##   Symbol Date        Open  High   Low Close Adj_Close   Volume
##   <chr>  <date>     <dbl> <dbl> <dbl> <dbl>     <dbl>    <dbl>
## 1 AAPL   2018-10-03  230.  233.  230.  232.      230. 28654800
## 2 AMZN   2018-09-04 2026. 2050. 2013  2040.     2040.  5721100
## 3 FB     2018-07-25  216.  219.  214.  218.      218. 58954200
## 4 GOOG   2018-07-26 1251  1270. 1249. 1268.     1268.  2405600

Download the file tute1.csv

tute1 <- readr::read_csv("/Users/aritraray/Desktop/tute1.csv")
## Rows: 100 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl  (3): Sales, AdBudget, GDP
## date (1): Quarter
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
View(tute1)
mytimeseries <- tute1 |>
  mutate(Quarter = yearquarter(Quarter)) |>
  as_tsibble(index = Quarter)
mytimeseries |>
  pivot_longer(-Quarter) |>
  ggplot(aes(x = Quarter, y = value, colour = name)) +
  geom_line() +
  facet_grid(name ~ ., scales = "free_y") #allows us to create separate plots for the subsets of the dataset used here

USgas package

us_tg <- us_total|>
  mutate(year = year)|>
  as_tsibble(key = state, index =year)
head(us_tg)
## # A tsibble: 6 x 3 [1Y]
## # Key:       state [1]
##    year state        y
##   <int> <chr>    <int>
## 1  1997 Alabama 324158
## 2  1998 Alabama 329134
## 3  1999 Alabama 337270
## 4  2000 Alabama 353614
## 5  2001 Alabama 332693
## 6  2002 Alabama 379343
us_tg %>%
  filter(state %in% c('Maine', 'Vermont', 'New Hampshire', 'Connecticut', 'Rhode Island')) %>%
  ggplot(aes(x = year, y = y, col = state)) +
  geom_line()+
  scale_y_continuous(labels = scales::comma) # This line formats y-axis labels

tourism.xlsx

Create a tsibble which is identical to the tourism tsibble

tourism<- tourism|>
  mutate(Quarter = yearquarter(Quarter))|>
  as_tsibble(key = c(Region, State, Purpose, Trips),index =Quarter)
head(tourism, 3)
## # A tsibble: 3 x 5 [1Q]
## # Key:       Region, State, Purpose, Trips [3]
##   Quarter Region   State           Purpose  Trips
##     <qtr> <chr>    <chr>           <chr>    <dbl>
## 1 2010 Q1 Adelaide South Australia Business  68.7
## 2 2005 Q2 Adelaide South Australia Business  73.3
## 3 2013 Q2 Adelaide South Australia Business 101.

Find what combination of Region and Purpose had the maximum number of overnight trips on average.

max_overnight_trips <- tourism|>
  select(Region, Purpose, Trips)|>
  group_by(Region, Purpose)|>
  summarize(max_overnight_trips = mean(Trips))
max_overnight_trips|>
  filter(max_overnight_trips==max(max_overnight_trips))
## # A tsibble: 76 x 4 [1Q]
## # Key:       Region, Purpose [76]
## # Groups:    Region [76]
##    Region                     Purpose  Quarter max_overnight_trips
##    <chr>                      <chr>      <qtr>               <dbl>
##  1 Adelaide                   Visiting 2017 Q1               270. 
##  2 Adelaide Hills             Visiting 2002 Q4                81.1
##  3 Alice Springs              Holiday  1998 Q3                76.5
##  4 Australia's Coral Coast    Holiday  2014 Q3               198. 
##  5 Australia's Golden Outback Business 2017 Q3               174. 
##  6 Australia's North West     Business 2016 Q3               297. 
##  7 Australia's South West     Holiday  2016 Q1               612. 
##  8 Ballarat                   Visiting 2004 Q1               103. 
##  9 Barkly                     Holiday  1998 Q3                37.9
## 10 Barossa                    Holiday  2006 Q1                51.0
## # ℹ 66 more rows

Create a new tsibble which combines the Purposes and Regions, and just has total trips by State.

tsibble_1 <- tourism %>%
 group_by(State) %>% summarise(Trips = sum(Trips))%>%
 ungroup()
tsibble_1
## # A tsibble: 640 x 3 [1Q]
## # Key:       State [8]
##    State Quarter Trips
##    <chr>   <qtr> <dbl>
##  1 ACT   1998 Q1  551.
##  2 ACT   1998 Q2  416.
##  3 ACT   1998 Q3  436.
##  4 ACT   1998 Q4  450.
##  5 ACT   1999 Q1  379.
##  6 ACT   1999 Q2  558.
##  7 ACT   1999 Q3  449.
##  8 ACT   1999 Q4  595.
##  9 ACT   2000 Q1  600.
## 10 ACT   2000 Q2  557.
## # ℹ 630 more rows

The aus_arrivals data set

autoplot(aus_arrivals, Arrivals)

gg_subseries(aus_arrivals, Arrivals)


The plots reveal notable trends in arrivals from different countries to Australia:

  1. Japanese Arrivals Decline: Between 2000 and 2010, arrivals from Japan to Australia experienced a significant decrease.

  2. UK Arrival Growth and Seasonality: Arrivals from the UK showed an upward trend from 1980 to 2000. From 2000 to 2010, arrivals exhibited consistent seasonality, with a peak in Q1 followed by a decrease in Q2.

  3. Steady Growth in US and NZ Arrivals: Arrivals from the US and New Zealand demonstrated consistent growth over the observed period.

Chapter 3

Appropriate Box-Cox transformation in order to stabilise the variance

autoplot(aus_production, Tobacco)
## Warning: Removed 24 rows containing missing values or values outside the scale range
## (`geom_line()`).

lambda_tobacco <- BoxCox.lambda(aus_production$Tobacco)
lambda_tobacco
## [1] 0.7099451
mel_syd_economy <- ansett %>% 
  filter(Class == "Economy", Airports == "MEL-SYD")
mel_syd_economy %>% autoplot(Passengers)

lambda <- mel_syd_economy %>%
  features(Passengers, features = guerrero) %>%
  pull(lambda_guerrero)
mel_syd_economy %>%
  autoplot(box_cox(Passengers, lambda)) +
  labs(y = "",
       title = paste("Box-Cox Transformation with lambda = ", round(lambda,2)))

SCT <- pedestrian %>%
  filter(Sensor == "Southern Cross Station") %>%
  group_by(Sensor) %>%
  index_by(Week = yearweek(Date_Time)) %>%
  summarise(Count = sum(Count))
SCT %>% autoplot(Count)

lambda <- SCT %>%
  features(Count, features = guerrero) %>%
  pull(lambda_guerrero)
SCT %>%
  autoplot(box_cox(Count, lambda)) +
  labs(y = "",
       title = paste("Box-Cox Transformation with lambda = ", round(lambda,2)))

Consider the last five years of the Gas data from aus_production

gas <- tail(aus_production, 5*4) %>% select(Gas)

Plot the time series.

autoplot(gas, Gas)

There is a yearly upward trend in this data.

Use classical_decomposition with type=multiplicative to calculate the trend-cycle and seasonal indices.

# Perform classical decomposition
decomposition <- stl(gas, s.window = "periodic", t.window = 7)

autoplot(decomposition) 

Yes, there is a clear upward trend in the data.

Compute and plot the seasonally adjusted data.

# Extract seasonally adjusted component
seasonally_adjusted <- seasadj(decomposition)

# Plot the seasonally adjusted data
autoplot(seasonally_adjusted) +
  labs(title = "Seasonally Adjusted Australia Gas Production")

Change one observation to be an outlier

gas %>%
  mutate(Gas = if_else(Quarter==yearquarter("2008Q4"), Gas + 300, Gas)) %>%
  model(classical_decomposition(Gas, type = "multiplicative")) %>%
  components() %>%
  as_tsibble() %>%
  autoplot(season_adjust)

There is an odd peak in 2008 Q4 which brings significant change to the trend, reducing the effect of the upward trend.

Does it make any difference if the outlier is near the end rather than in the middle of the time series?

gas %>%
  mutate(Gas = if_else(Quarter==yearquarter("2010Q1"), Gas + 300, Gas)) %>%
  model(classical_decomposition(Gas, type = "multiplicative")) %>%
  components() %>%
  as_tsibble() %>%
  autoplot(season_adjust)

This causes significant change as the pattern before the appearance of the outlier is very static and has completely lost the upward trend.

Civilian labour force in Australia

a.The Australian civilian labor force exhibits a discernible upward trend over the observed period. Seasonality remains consistent throughout the years, suggesting recurring patterns in labor force participation across seasons. Notably, a prominent feature is the substantial decline observed in the remainder graph for the years 1991 and 1992. This decline could signify significant fluctuations or irregularities in the data during this period, warranting further investigation to understand the underlying factors contributing to this anomaly.

b. Indeed, the graphical representation of the remainder component vividly portrays a notable dip during the years 1991 and 1992. This distinctive feature underscores a significant deviation from the expected trend and seasonal patterns in the Australian civilian labor force data, prompting further scrutiny to comprehend the underlying factors driving this observed anomaly.

Discussion-1 prompt

Overreliance on historical data in statistical forecasting carries numerous dangers that cut across industries and decision-making contexts. One of the primary drawbacks of depending entirely on historical data is the assumption of stationarity, which implies that underlying patterns and correlations remain constant throughout time. However, in dynamic and evolving situations, this assumption frequently becomes unworkable as external factors such as market trends, consumer tastes, and technology breakthroughs change. As a result, historical data may fail to reflect emerging patterns, disruptive events, or structural changes, resulting in inaccurate forecasting models.

The limits of historical data are most apparent in quickly changing markets or environments when the rate of innovation and disruption exceeds the predictive capacity of established forecasting methodologies. For example, in the field of technology, the rapid change of goods, services, and business models can leave old data obsolete, making it difficult to effectively forecast future developments. Similarly, in financial markets, unexpected geopolitical events, regulatory changes, or economic downturns can cause significant volatility, making historical trends ineffective predictors of future performance.

Solution: Organizations and decision-makers can use a variety of measures to reduce the dangers associated with relying too heavily on previous data. To begin, they can supplement historical data with alternative sources of information, such as real-time data feeds, expert opinions, or scenario studies, to improve forecasting models and capture a greater variety of possible outcomes. Second, new analytical tools such as machine learning algorithms, artificial intelligence, and predictive modeling can improve forecasting models’ ability to detect and respond to changing data patterns and trends. These tools can unearth complicated, non-linear correlations between variables and detect minor variations in market dynamics that standard approaches may miss.

Furthermore, using probabilistic forecasting approaches like Monte Carlo simulations or Bayesian inference can provide a more sophisticated understanding of uncertainty and variability in future events. Decision-makers can make better decisions and establish effective risk management strategies by estimating the range of probable events and their related probability.

Fostering a culture of data-driven decision-making and continual learning within organisations can help to address the constraints of previous data. Organizations can improve forecast accuracy and reliability by fostering a collaborative approach to forecasting that includes input from a wide range of stakeholders, including domain experts, data scientists, and front-line employees.

Finally, while historical data is still a significant resource for informing decision-making and forecasting future outcomes, its limits must be recognized and reinforced by complementary approaches to successfully manage risks. Organizations that embrace advanced analytical approaches, incorporate alternative sources of information, and develop a culture of constant learning and adaptation may negotiate the complexities of fast changing environments and make more resilient and informed decisions.