#Cleaning the environment
remove(list = ls())
#Installing necessary packages
library(fpp3)
## ── Attaching packages ────────────────────────────────────────────── fpp3 0.5 ──
## âś” tibble 3.2.1 âś” tsibble 1.1.4
## âś” dplyr 1.1.3 âś” tsibbledata 0.4.1
## âś” tidyr 1.3.0 âś” feasts 0.3.1
## âś” lubridate 1.9.3 âś” fable 0.3.3
## âś” ggplot2 3.5.0 âś” fabletools 0.4.1
## ── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ──
## âś– lubridate::date() masks base::date()
## âś– dplyr::filter() masks stats::filter()
## âś– tsibble::intersect() masks base::intersect()
## âś– tsibble::interval() masks lubridate::interval()
## âś– dplyr::lag() masks stats::lag()
## âś– tsibble::setdiff() masks base::setdiff()
## âś– tsibble::union() masks base::union()
library(dplyr)
library(USgas)
library(readxl)
library(forecast)
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
library(magrittr)
##
## Attaching package: 'magrittr'
## The following object is masked from 'package:tidyr':
##
## extract
?(aus_production)
?(pelt)
?(gafa_stock)
?(vic_elec)
aus_production: Quarterly
pelt: Annual
gafa_stock: Days based
vic_elec: Half-hourly based
autoplot(aus_production,Bricks)
## Warning: Removed 20 rows containing missing values or values outside the scale range
## (`geom_line()`).
autoplot(pelt,Lynx)
autoplot(gafa_stock,Close)
autoplot(vic_elec,Demand)
plot = autoplot(vic_elec,Demand)+ggtitle("Victoria, Australia Electricity Consumption Trend")
plot + labs(x= "Time(every 30 mins)")
filter()
to find what days corresponded to the peak
closing pricedata(gafa_stock)
gafa_stock %>%
group_by(Symbol) %>%
filter(Close == max(Close))
## # A tsibble: 4 x 8 [!]
## # Key: Symbol [4]
## # Groups: Symbol [4]
## Symbol Date Open High Low Close Adj_Close Volume
## <chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 AAPL 2018-10-03 230. 233. 230. 232. 230. 28654800
## 2 AMZN 2018-09-04 2026. 2050. 2013 2040. 2040. 5721100
## 3 FB 2018-07-25 216. 219. 214. 218. 218. 58954200
## 4 GOOG 2018-07-26 1251 1270. 1249. 1268. 1268. 2405600
tute1.csv
tute1 <- readr::read_csv("/Users/aritraray/Desktop/tute1.csv")
## Rows: 100 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (3): Sales, AdBudget, GDP
## date (1): Quarter
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
View(tute1)
mytimeseries <- tute1 |>
mutate(Quarter = yearquarter(Quarter)) |>
as_tsibble(index = Quarter)
mytimeseries |>
pivot_longer(-Quarter) |>
ggplot(aes(x = Quarter, y = value, colour = name)) +
geom_line() +
facet_grid(name ~ ., scales = "free_y") #allows us to create separate plots for the subsets of the dataset used here
USgas
packageus_tg <- us_total|>
mutate(year = year)|>
as_tsibble(key = state, index =year)
head(us_tg)
## # A tsibble: 6 x 3 [1Y]
## # Key: state [1]
## year state y
## <int> <chr> <int>
## 1 1997 Alabama 324158
## 2 1998 Alabama 329134
## 3 1999 Alabama 337270
## 4 2000 Alabama 353614
## 5 2001 Alabama 332693
## 6 2002 Alabama 379343
us_tg %>%
filter(state %in% c('Maine', 'Vermont', 'New Hampshire', 'Connecticut', 'Rhode Island')) %>%
ggplot(aes(x = year, y = y, col = state)) +
geom_line()+
scale_y_continuous(labels = scales::comma) # This line formats y-axis labels
tourism
tsibbletourism<- tourism|>
mutate(Quarter = yearquarter(Quarter))|>
as_tsibble(key = c(Region, State, Purpose, Trips),index =Quarter)
head(tourism, 3)
## # A tsibble: 3 x 5 [1Q]
## # Key: Region, State, Purpose, Trips [3]
## Quarter Region State Purpose Trips
## <qtr> <chr> <chr> <chr> <dbl>
## 1 2010 Q1 Adelaide South Australia Business 68.7
## 2 2005 Q2 Adelaide South Australia Business 73.3
## 3 2013 Q2 Adelaide South Australia Business 101.
Region
and
Purpose
had the maximum number of overnight trips on
average.max_overnight_trips <- tourism|>
select(Region, Purpose, Trips)|>
group_by(Region, Purpose)|>
summarize(max_overnight_trips = mean(Trips))
max_overnight_trips|>
filter(max_overnight_trips==max(max_overnight_trips))
## # A tsibble: 76 x 4 [1Q]
## # Key: Region, Purpose [76]
## # Groups: Region [76]
## Region Purpose Quarter max_overnight_trips
## <chr> <chr> <qtr> <dbl>
## 1 Adelaide Visiting 2017 Q1 270.
## 2 Adelaide Hills Visiting 2002 Q4 81.1
## 3 Alice Springs Holiday 1998 Q3 76.5
## 4 Australia's Coral Coast Holiday 2014 Q3 198.
## 5 Australia's Golden Outback Business 2017 Q3 174.
## 6 Australia's North West Business 2016 Q3 297.
## 7 Australia's South West Holiday 2016 Q1 612.
## 8 Ballarat Visiting 2004 Q1 103.
## 9 Barkly Holiday 1998 Q3 37.9
## 10 Barossa Holiday 2006 Q1 51.0
## # ℹ 66 more rows
tsibble_1 <- tourism %>%
group_by(State) %>% summarise(Trips = sum(Trips))%>%
ungroup()
tsibble_1
## # A tsibble: 640 x 3 [1Q]
## # Key: State [8]
## State Quarter Trips
## <chr> <qtr> <dbl>
## 1 ACT 1998 Q1 551.
## 2 ACT 1998 Q2 416.
## 3 ACT 1998 Q3 436.
## 4 ACT 1998 Q4 450.
## 5 ACT 1999 Q1 379.
## 6 ACT 1999 Q2 558.
## 7 ACT 1999 Q3 449.
## 8 ACT 1999 Q4 595.
## 9 ACT 2000 Q1 600.
## 10 ACT 2000 Q2 557.
## # ℹ 630 more rows
aus_arrivals
data setautoplot(aus_arrivals, Arrivals)
gg_subseries(aus_arrivals, Arrivals)
The plots reveal notable trends in arrivals from different countries to
Australia:
Japanese Arrivals Decline: Between 2000 and 2010, arrivals from Japan to Australia experienced a significant decrease.
UK Arrival Growth and Seasonality: Arrivals from the UK showed an upward trend from 1980 to 2000. From 2000 to 2010, arrivals exhibited consistent seasonality, with a peak in Q1 followed by a decrease in Q2.
Steady Growth in US and NZ Arrivals: Arrivals from the US and New Zealand demonstrated consistent growth over the observed period.
autoplot(aus_production, Tobacco)
## Warning: Removed 24 rows containing missing values or values outside the scale range
## (`geom_line()`).
lambda_tobacco <- BoxCox.lambda(aus_production$Tobacco)
lambda_tobacco
## [1] 0.7099451
mel_syd_economy <- ansett %>%
filter(Class == "Economy", Airports == "MEL-SYD")
mel_syd_economy %>% autoplot(Passengers)
lambda <- mel_syd_economy %>%
features(Passengers, features = guerrero) %>%
pull(lambda_guerrero)
mel_syd_economy %>%
autoplot(box_cox(Passengers, lambda)) +
labs(y = "",
title = paste("Box-Cox Transformation with lambda = ", round(lambda,2)))
SCT <- pedestrian %>%
filter(Sensor == "Southern Cross Station") %>%
group_by(Sensor) %>%
index_by(Week = yearweek(Date_Time)) %>%
summarise(Count = sum(Count))
SCT %>% autoplot(Count)
lambda <- SCT %>%
features(Count, features = guerrero) %>%
pull(lambda_guerrero)
SCT %>%
autoplot(box_cox(Count, lambda)) +
labs(y = "",
title = paste("Box-Cox Transformation with lambda = ", round(lambda,2)))
aus_production
gas <- tail(aus_production, 5*4) %>% select(Gas)
autoplot(gas, Gas)
There is a yearly upward trend in this data.
classical_decomposition
with
type=multiplicative
to calculate the trend-cycle and
seasonal indices.# Perform classical decomposition
decomposition <- stl(gas, s.window = "periodic", t.window = 7)
autoplot(decomposition)
Yes, there is a clear upward trend in the data.
# Extract seasonally adjusted component
seasonally_adjusted <- seasadj(decomposition)
# Plot the seasonally adjusted data
autoplot(seasonally_adjusted) +
labs(title = "Seasonally Adjusted Australia Gas Production")
gas %>%
mutate(Gas = if_else(Quarter==yearquarter("2008Q4"), Gas + 300, Gas)) %>%
model(classical_decomposition(Gas, type = "multiplicative")) %>%
components() %>%
as_tsibble() %>%
autoplot(season_adjust)
There is an odd peak in 2008 Q4 which brings significant change to the trend, reducing the effect of the upward trend.
gas %>%
mutate(Gas = if_else(Quarter==yearquarter("2010Q1"), Gas + 300, Gas)) %>%
model(classical_decomposition(Gas, type = "multiplicative")) %>%
components() %>%
as_tsibble() %>%
autoplot(season_adjust)
This causes significant change as the pattern before the appearance of the outlier is very static and has completely lost the upward trend.
a.The Australian civilian labor force exhibits a discernible upward trend over the observed period. Seasonality remains consistent throughout the years, suggesting recurring patterns in labor force participation across seasons. Notably, a prominent feature is the substantial decline observed in the remainder graph for the years 1991 and 1992. This decline could signify significant fluctuations or irregularities in the data during this period, warranting further investigation to understand the underlying factors contributing to this anomaly.
b. Indeed, the graphical representation of the remainder component vividly portrays a notable dip during the years 1991 and 1992. This distinctive feature underscores a significant deviation from the expected trend and seasonal patterns in the Australian civilian labor force data, prompting further scrutiny to comprehend the underlying factors driving this observed anomaly.
Overreliance on historical data in statistical forecasting carries
numerous dangers that cut across industries and decision-making
contexts. One of the primary drawbacks of depending entirely on
historical data is the assumption of stationarity, which implies that
underlying patterns and correlations remain constant throughout time.
However, in dynamic and evolving situations, this assumption frequently
becomes unworkable as external factors such as market trends, consumer
tastes, and technology breakthroughs change. As a result, historical
data may fail to reflect emerging patterns, disruptive events, or
structural changes, resulting in inaccurate forecasting models.
The limits of historical data are most apparent in quickly changing
markets or environments when the rate of innovation and disruption
exceeds the predictive capacity of established forecasting
methodologies. For example, in the field of technology, the rapid change
of goods, services, and business models can leave old data obsolete,
making it difficult to effectively forecast future developments.
Similarly, in financial markets, unexpected geopolitical events,
regulatory changes, or economic downturns can cause significant
volatility, making historical trends ineffective predictors of future
performance.
Solution: Organizations and decision-makers can use a
variety of measures to reduce the dangers associated with relying too
heavily on previous data. To begin, they can supplement historical data
with alternative sources of information, such as real-time data feeds,
expert opinions, or scenario studies, to improve forecasting models and
capture a greater variety of possible outcomes. Second, new analytical
tools such as machine learning algorithms, artificial intelligence, and
predictive modeling can improve forecasting models’ ability to detect
and respond to changing data patterns and trends. These tools can
unearth complicated, non-linear correlations between variables and
detect minor variations in market dynamics that standard approaches may
miss.
Furthermore, using probabilistic forecasting approaches like Monte
Carlo simulations or Bayesian inference can provide a more sophisticated
understanding of uncertainty and variability in future events.
Decision-makers can make better decisions and establish effective risk
management strategies by estimating the range of probable events and
their related probability.
Fostering a culture of data-driven decision-making and continual
learning within organisations can help to address the constraints of
previous data. Organizations can improve forecast accuracy and
reliability by fostering a collaborative approach to forecasting that
includes input from a wide range of stakeholders, including domain
experts, data scientists, and front-line employees.
Finally, while historical data is still a significant resource for
informing decision-making and forecasting future outcomes, its limits
must be recognized and reinforced by complementary approaches to
successfully manage risks. Organizations that embrace advanced
analytical approaches, incorporate alternative sources of information,
and develop a culture of constant learning and adaptation may negotiate
the complexities of fast changing environments and make more resilient
and informed decisions.