# load libraries
library(fpp3)  
## Registered S3 method overwritten by 'tsibble':
##   method               from 
##   as_tibble.grouped_df dplyr
## ── Attaching packages ──────────────────────────────────────────── fpp3 1.0.1 ──
## ✔ tibble      3.2.1     ✔ tsibble     1.1.6
## ✔ dplyr       1.1.4     ✔ tsibbledata 0.4.1
## ✔ tidyr       1.3.1     ✔ feasts      0.4.1
## ✔ lubridate   1.9.3     ✔ fable       0.4.1
## ✔ ggplot2     3.5.1
## ── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ──
## ✖ lubridate::date()    masks base::date()
## ✖ dplyr::filter()      masks stats::filter()
## ✖ tsibble::intersect() masks base::intersect()
## ✖ tsibble::interval()  masks lubridate::interval()
## ✖ dplyr::lag()         masks stats::lag()
## ✖ tsibble::setdiff()   masks base::setdiff()
## ✖ tsibble::union()     masks base::union()
library(ggplot2)
library(readxl)   
library(tsibble)  
library(dplyr)  
library(ggplot2)
library(USgas)
# 2.1 : Bricks
?aus_production 
bricks_data <- aus_production %>% select(Quarter, Bricks)

time_interval_bricks <- range(bricks_data$Quarter)  
time_interval_bricks
## <yearquarter[2]>
## [1] "1956 Q1" "2010 Q2"
## # Year starts on: January
autoplot(aus_production, Bricks) +
  ggtitle("Bricks Production in Australia") +
  xlab("Year") + ylab("Bricks Produced")
## Warning: Removed 20 rows containing missing values or values outside the scale range
## (`geom_line()`).

# 2.1 : Lynx
?pelt  
lynx_data <- pelt %>% select(Year, Lynx)

time_interval_lynx <- range(lynx_data$Year)
time_interval_lynx
## [1] 1845 1935
autoplot(pelt, Lynx) +
  ggtitle("Lynx Population Over Time") +
  xlab("Year") + ylab("Lynx Count")

# 2.1 : Close
?gafa_stock  
close_data <- gafa_stock %>% select(Date, Close)

time_interval_close <- range(close_data$Date) 
time_interval_close
## [1] "2014-01-02" "2018-12-31"
autoplot(gafa_stock, Close) +
  ggtitle("GAFA Stock Closing Prices") +
  xlab("Year") + ylab("Closing Price")

# 2.1 : Demand
?vic_elec  
demand_data <- vic_elec %>% select(Time, Demand)

time_interval_demand <- range(demand_data$Time)  
time_interval_demand
## [1] "2012-01-01 00:00:00 AEDT" "2014-12-31 23:30:00 AEDT"
autoplot(vic_elec, Demand) +
  ggtitle("Electricity Demand in Victoria") +
  xlab("Year") + ylab("Electricity Demand (MW)") +
  theme_minimal() 

#2.2
gafa_tbl <- as_tibble(gafa_stock)

# maximum closing price for each stock
peak_prices <- gafa_tbl %>%
  group_by(Symbol) %>%
  summarize(Max_Close = max(Close, na.rm = TRUE), .groups = "drop")

# days when closing price matched peak price
peak_days <- gafa_tbl %>%
  inner_join(peak_prices, by = "Symbol") %>%
  filter(Close == Max_Close) %>%
  select(Date, Symbol, Close)

peak_days
## # A tibble: 4 × 3
##   Date       Symbol Close
##   <date>     <chr>  <dbl>
## 1 2018-10-03 AAPL    232.
## 2 2018-09-04 AMZN   2040.
## 3 2018-07-25 FB      218.
## 4 2018-07-26 GOOG   1268.
#2.3 
tute1 <- readr::read_csv("tute1.csv")
## Rows: 100 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl  (3): Sales, AdBudget, GDP
## date (1): Quarter
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
View(tute1)
# data to time series
mytimeseries <- tute1 |>
  mutate(Quarter = yearquarter(Quarter)) |>
  as_tsibble(index = Quarter)

# time series plots of each of the three series
mytimeseries |>
  pivot_longer(-Quarter) |>
  ggplot(aes(x = Quarter, y = value, colour = name)) +
  geom_line() +
  facet_grid(name ~ ., scales = "free_y")

#2.4 
library(USgas)
library(fpp3)
library(ggplot2)

# US natural gas consumption data
data("us_total", package = "USgas")

us_gas_tsibble <- us_total %>%
  rename(Consumption = y) %>%  
  as_tsibble(index = year, key = state) 

new_england_states <- c("Maine", "Vermont", "New Hampshire", 
                         "Massachusetts", "Connecticut", "Rhode Island")

new_england_gas <- us_gas_tsibble %>%
  filter(state %in% new_england_states)

autoplot(new_england_gas, Consumption) +
  ggtitle("Annual Natural Gas Consumption in New England (US)") +
  xlab("Year") + ylab("Consumption (Billion Cubic Feet)") +
  facet_wrap(~state, scales = "free_y") +  
  theme_minimal()

#2.5
tourism_data <- read_excel("tourism.xlsx")
head(tourism_data)
## # A tibble: 6 × 5
##   Quarter    Region   State           Purpose  Trips
##   <chr>      <chr>    <chr>           <chr>    <dbl>
## 1 1998-01-01 Adelaide South Australia Business  135.
## 2 1998-04-01 Adelaide South Australia Business  110.
## 3 1998-07-01 Adelaide South Australia Business  166.
## 4 1998-10-01 Adelaide South Australia Business  127.
## 5 1999-01-01 Adelaide South Australia Business  137.
## 6 1999-04-01 Adelaide South Australia Business  200.
tourism_tsibble <- tourism_data %>%
  mutate(Quarter = yearquarter(Quarter)) %>% 
  as_tsibble(index = Quarter, key = c(State, Region, Purpose))  

glimpse(tourism_tsibble)
## Rows: 24,320
## Columns: 5
## Key: State, Region, Purpose [304]
## $ Quarter <qtr> 1998 Q1, 1998 Q2, 1998 Q3, 1998 Q4, 1999 Q1, 1999 Q2, 1999 Q3,…
## $ Region  <chr> "Canberra", "Canberra", "Canberra", "Canberra", "Canberra", "C…
## $ State   <chr> "ACT", "ACT", "ACT", "ACT", "ACT", "ACT", "ACT", "ACT", "ACT",…
## $ Purpose <chr> "Business", "Business", "Business", "Business", "Business", "B…
## $ Trips   <dbl> 150.19812, 99.93268, 129.56512, 101.69897, 95.52491, 229.05762…
# Region and purpose with max average overnight trips
max_avg_trips <- tourism_tsibble %>%
  arrange(Region, Purpose, Quarter) %>%  
  group_by(Region, Purpose) %>%
  summarise(Avg_Trips = mean(Trips, na.rm = TRUE), .groups = "drop") %>%
  arrange(desc(Avg_Trips)) %>%
  slice(1)  
## Warning: Current temporal ordering may yield unexpected results.
## ℹ Suggest to sort by `Region`, `Purpose`, `Quarter` first.
max_avg_trips
## # A tsibble: 1 x 4 [1Q]
## # Key:       Region, Purpose [1]
##   Region    Purpose  Quarter Avg_Trips
##   <chr>     <chr>      <qtr>     <dbl>
## 1 Melbourne Visiting 2017 Q4      985.
state_tsibble <- tourism_tsibble %>%
  index_by(Quarter) %>%  
  summarise(Total_Trips = sum(Trips, na.rm = TRUE), .groups = "drop")

state_tsibble
## # A tsibble: 80 x 2 [1Q]
##    Quarter Total_Trips
##      <qtr>       <dbl>
##  1 1998 Q1      23182.
##  2 1998 Q2      20323.
##  3 1998 Q3      19827.
##  4 1998 Q4      20830.
##  5 1999 Q1      22087.
##  6 1999 Q2      21458.
##  7 1999 Q3      19914.
##  8 1999 Q4      20028.
##  9 2000 Q1      22339.
## 10 2000 Q2      19941.
## # ℹ 70 more rows
# 2.8 : Total Private Employment from us_employment

?us_employment

employment_ts <- us_employment %>%
  filter(Title == "Total Private") %>%
  select(Month, Employed)

# Autoplot 
autoplot(employment_ts, Employed) +
  ggtitle("Total Private Employment in the US") +
  xlab("Year") + ylab("Employed (millions)")

# Seasonal plot
gg_season(employment_ts, Employed) +
  ggtitle("Seasonal Plot: Total Private Employment")

# Subseries plot
gg_subseries(employment_ts, Employed) +
  ggtitle("Subseries Plot: Total Private Employment")

# Lag plot
gg_lag(employment_ts, Employed, geom = "point")

# Autocorrelation
ACF(employment_ts, Employed) %>% autoplot()

- upward trend in private employment from 1940 to 2023 - employment numbers steadily increase, reflecting economic growth over time - seasonal variation is relatively small

# 2.8 : Bricks from aus_production
?aus_production

# Autoplot
autoplot(aus_production, Bricks) +
  ggtitle("Bricks Production in Australia") +
  xlab("Year") + ylab("Production (millions)")
## Warning: Removed 20 rows containing missing values or values outside the scale range
## (`geom_line()`).

# Seasonal plot
gg_season(aus_production, Bricks) +
  ggtitle("Seasonal Plot: Bricks Production")
## Warning: Removed 20 rows containing missing values or values outside the scale range
## (`geom_line()`).

# Subseries plot
gg_subseries(aus_production, Bricks) +
  ggtitle("Subseries Plot: Bricks Production")
## Warning: Removed 5 rows containing missing values or values outside the scale range
## (`geom_line()`).

# Lag plot
gg_lag(aus_production, Bricks, geom = "point")
## Warning: Removed 20 rows containing missing values (gg_lag).

# Autocorrelation
ACF(aus_production, Bricks) %>% autoplot()

- There is a clear growth trend up to 1980, followed by cyclical fluctuations and volatility. - Bricks production exhibits strong seasonal patterns, with higher output in Q2 and Q3.

# 2.8 : Hare Population from pelt
?pelt

# Autoplot - General Trend
autoplot(pelt, Hare) +
  ggtitle("Hare Population Over Time") +
  xlab("Year") + ylab("Hare Count")

# Lag plot - Checks for cyclic dependence
gg_lag(pelt, Hare, geom = "point") +
  ggtitle("Lag Plot: Hare Population")

# Autocorrelation function (ACF) to check periodicity
ACF(pelt, Hare) %>% autoplot() +
  ggtitle("Autocorrelation: Hare Population")

- The hare population follows cyclic fluctuations, with peaks and crashes roughly every 10 years

#2.8: H02 Cost from PBS (Pharmaceutical Benefit Scheme)
?PBS

pbs_h02 <- PBS %>%
  filter(ATC2 == "H02") %>%
  index_by(Month) %>%  
  summarise(Cost = sum(Cost, na.rm = TRUE)) %>%  
  as_tsibble(index = Month)  

# Autoplot - Trend Analysis
autoplot(pbs_h02, Cost) +
  ggtitle("H02 Drug Costs Over Time") +
  xlab("Year") + ylab("Cost (millions)")

# Seasonal Plot
gg_season(pbs_h02, Cost) +
  ggtitle("Seasonal Plot: H02 Costs")

# Subseries Plot
gg_subseries(pbs_h02, Cost) +
  ggtitle("Subseries Plot: H02 Costs")

# Lag Plot
gg_lag(pbs_h02, Cost, geom = "point") +
  ggtitle("Lag Plot: H02 Costs")

# Autocorrelation Function (ACF)
ACF(pbs_h02, Cost) %>% autoplot() +
  ggtitle("Autocorrelation: H02 Costs")

- H02 drug costs show long-term growth with seasonal fluctuations. - Drug costs follow a predictable annual cycle, with peaks in January and December.

#2.8: Barrels from us_gasoline
?us_gasoline

# Autoplot
autoplot(us_gasoline, Barrels) +
  ggtitle("US Gasoline Production Over Time") +
  xlab("Year") + ylab("Barrels (millions)")

# Seasonal plot
gg_season(us_gasoline, Barrels) +
  ggtitle("Seasonal Plot: US Gasoline Production")

# Subseries plot
gg_subseries(us_gasoline, Barrels) +
  ggtitle("Subseries Plot: US Gasoline Production")

# Lag plot
gg_lag(us_gasoline, Barrels, geom = "point")

# Autocorrelation
ACF(us_gasoline, Barrels) %>% autoplot()

- Upward trend
- Strong weekly/seasonal