library(tidyverse)

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(fpp3)

## Registered S3 method overwritten by 'tsibble':
##   method               from 
##   as_tibble.grouped_df dplyr
## ── Attaching packages ──────────────────────────────────────────── fpp3 1.0.1 ──
## ✔ tsibble     1.1.6     ✔ feasts      0.4.1
## ✔ tsibbledata 0.4.1     ✔ fable       0.4.1
## ── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ──
## ✖ lubridate::date()    masks base::date()
## ✖ dplyr::filter()      masks stats::filter()
## ✖ tsibble::intersect() masks base::intersect()
## ✖ tsibble::interval()  masks lubridate::interval()
## ✖ dplyr::lag()         masks stats::lag()
## ✖ tsibble::setdiff()   masks base::setdiff()
## ✖ tsibble::union()     masks base::union()

library(dplyr)
library(ggplot2)
library(forcats)
library(tsibble)

data("global_economy")

1) global_economy

Which country has the highest GDP per capita? How has this changed over time?

Over time, the nation with the highest GDP per capita shifts. Small, resource-rich economies, like Norway or Luxembourg, typically have high GDP per capita.

global_economy |>
  ggplot(aes(x = Year, y = GDP / Population, color = Country)) +
  geom_line() +
  labs(title = "GDP per Capita Over Time",
       y = "GDP per Capita",
       x = "Year")

## Warning: Removed 3242 rows containing missing values or values outside the scale range
## (`geom_line()`).

global_economy |>
  autoplot(GDP / Population, show.legend =  FALSE) +
  labs(title= "GDP per Capita", x="Year")

## Warning: Removed 3242 rows containing missing values or values outside the scale range
## (`geom_line()`).

global_economy|>
  mutate(GDP_per_capita = GDP / Population) |>
  ggplot(aes(x = Year, y = GDP_per_capita)) +
  geom_line(color = "blue") +  # Set the line color to blue
  labs(title = "GDP per Capita Over Time",
       y = "GDP per Capita",       x = "Year")

2 Data visualization

2a

United States GDP from global_economy

global_economy |>
  filter(Country == "United States") |>
  autoplot(GDP) +
  labs(title = "United States GDP", x = "Year", y = "GDP (USD)")

2b

Slaughter of Victorian “Bulls, bullocks and steers” in aus_livestock.

aus_livestock |>
  filter(Animal == "Bulls, bullocks and steers", State == "Victoria") |>
  autoplot(Count) +
  labs(title = "Slaughter of Victorian Bulls, Bullocks, and Steers")

#2c Victorian Electricity Demand

autoplot(vic_elec, Demand) +
  labs(title = "Victorian Electricity Demand")

vic_elec |>
  index_by(Date) |>
  summarise(Total_Demand = sum(Demand)) |>
  autoplot(Total_Demand) + 
  ggtitle("Daily Electricity Demand for Australia", subtitle="State of Victoria")

##2d

autoplot(aus_production, Gas) +
  labs(title = "Gas Production Over Time")

canadian_gas |>
  autoplot(Volume) +
  labs(title = "Canadian Gas Volume")

set.seed(12345678)
myseries <- aus_retail |>
  filter(`Series ID` == sample(aus_retail$`Series ID`, 1))

data("global_economy")
print(global_economy)

## # A tsibble: 15,150 x 9 [1Y]
## # Key:       Country [263]
##    Country     Code   Year         GDP Growth   CPI Imports Exports Population
##    <fct>       <fct> <dbl>       <dbl>  <dbl> <dbl>   <dbl>   <dbl>      <dbl>
##  1 Afghanistan AFG    1960  537777811.     NA    NA    7.02    4.13    8996351
##  2 Afghanistan AFG    1961  548888896.     NA    NA    8.10    4.45    9166764
##  3 Afghanistan AFG    1962  546666678.     NA    NA    9.35    4.88    9345868
##  4 Afghanistan AFG    1963  751111191.     NA    NA   16.9     9.17    9533954
##  5 Afghanistan AFG    1964  800000044.     NA    NA   18.1     8.89    9731361
##  6 Afghanistan AFG    1965 1006666638.     NA    NA   21.4    11.3     9938414
##  7 Afghanistan AFG    1966 1399999967.     NA    NA   18.6     8.57   10152331
##  8 Afghanistan AFG    1967 1673333418.     NA    NA   14.2     6.77   10372630
##  9 Afghanistan AFG    1968 1373333367.     NA    NA   15.2     8.90   10604346
## 10 Afghanistan AFG    1969 1408888922.     NA    NA   15.0    10.1    10854428
## # ℹ 15,140 more rows

Moving Average, Box-Cox Transformation

3 × 5 MA is equivalent to a 7-term weighted MA with weights

weights <- c(0.067, 0.133, 0.200, 0.200, 0.200, 0.133, 0.067)
print(weights)

## [1] 0.067 0.133 0.200 0.200 0.200 0.133 0.067

Gas production

gas <- tail(aus_production, 5*4)|>select(Gas)
autoplot(gas)

## Plot variable not specified, automatically selected `.vars = Gas`

fit <- gas |>
  model(classical_decomposition(Gas, type = "multiplicative"))
components(fit) |>autoplot()

## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_line()`).

gas_outlier <- gas

gas_outlier$Gas[10] <- gas_outlier$Gas[10] + 300

print(gas_outlier)

## # A tsibble: 20 x 2 [1Q]
##      Gas Quarter
##    <dbl>   <qtr>
##  1   221 2005 Q3
##  2   180 2005 Q4
##  3   171 2006 Q1
##  4   224 2006 Q2
##  5   233 2006 Q3
##  6   192 2006 Q4
##  7   187 2007 Q1
##  8   234 2007 Q2
##  9   245 2007 Q3
## 10   505 2007 Q4
## 11   194 2008 Q1
## 12   229 2008 Q2
## 13   249 2008 Q3
## 14   203 2008 Q4
## 15   196 2009 Q1
## 16   238 2009 Q2
## 17   252 2009 Q3
## 18   210 2009 Q4
## 19   205 2010 Q1
## 20   236 2010 Q2

Canadian Gas production

canadian_gas|>autoplot()

## Plot variable not specified, automatically selected `.vars = Volume`

gas_outlier <- gas
gas_outlier[20,] <- gas_outlier[20,] + 300

pedestrian_daily <- pedestrian |>
  filter(Sensor == "Southern Cross Station") |>
  index_by(week = yearweek(Date_Time)) |>
  summarise(Weekly_Count = sum(Count, na.rm = TRUE))
print(pedestrian_daily)

## # A tsibble: 105 x 2 [1W]
##        week Weekly_Count
##      <week>        <int>
##  1 2015 W01        10236
##  2 2015 W02        60134
##  3 2015 W03        71440
##  4 2015 W04        73393
##  5 2015 W05        62535
##  6 2015 W06        79154
##  7 2015 W07        81006
##  8 2015 W08        81497
##  9 2015 W09        82705
## 10 2015 W10        82120
## # ℹ 95 more rows

filtered_ansett <- ansett |>
  filter(Class == "Economy", Airports == "MEL-SYD")
print(filtered_ansett)

## # A tsibble: 282 x 4 [1W]
## # Key:       Airports, Class [1]
##        Week Airports Class   Passengers
##      <week> <chr>    <chr>        <dbl>
##  1 1987 W26 MEL-SYD  Economy      20167
##  2 1987 W27 MEL-SYD  Economy      20161
##  3 1987 W28 MEL-SYD  Economy      19993
##  4 1987 W29 MEL-SYD  Economy      20986
##  5 1987 W30 MEL-SYD  Economy      20497
##  6 1987 W31 MEL-SYD  Economy      20770
##  7 1987 W32 MEL-SYD  Economy      21111
##  8 1987 W33 MEL-SYD  Economy      20675
##  9 1987 W34 MEL-SYD  Economy      22092
## 10 1987 W35 MEL-SYD  Economy      20772
## # ℹ 272 more rows

lambda_passengers <- filtered_ansett |>
  features(Passengers, features = guerrero) |>
  pull(lambda_guerrero)

print(lambda_passengers)

## [1] 1.999927

filtered_ansett |>
  mutate(Passengers_transformed = box_cox(Passengers, lambda_passengers)) |>
  autoplot(Passengers_transformed) +
  labs(title = "Box-Cox Transformed Economy Class Passengers (MEL-SYD)",
       y = "Passengers")

##Retail Data Decomposition Using X-11 decomposition on the retail data:

myseries <- aus_retail %>%
  filter(`Series ID` == sample(aus_retail$`Series ID`, 1))

autoplot(myseries)

## Plot variable not specified, automatically selected `.vars = Turnover`

gg_season(data = myseries)

## Plot variable not specified, automatically selected `y = Turnover`

gg_subseries(data = myseries)

## Plot variable not specified, automatically selected `y = Turnover`

gg_lag(data = myseries, geom = "point")

## Plot variable not specified, automatically selected `y = Turnover`

ACF(myseries, Turnover) %>%
  autoplot()

myseries_x11 <- myseries |>
  model(X_13ARIMA_SEATS(Turnover ~ x11())) |> 
  components()
autoplot(myseries_x11)

dgoog <- gafa_stock |>
  filter(Symbol == "GOOG", year(Date) >= 2018) |>
  mutate(trading_day = row_number()) |>
  update_tsibble(index = trading_day, regular = TRUE) |>
  mutate(diff = difference(Close))

# Description example:
"The decomposition of the civilian labor force data reveals clear seasonal patterns and a trend decline in the early 1990s. The seasonal component shows annual fluctuations, while the trend indicates the economic downturn around the early 1990s, consistent with the recession."

## [1] "The decomposition of the civilian labor force data reveals clear seasonal patterns and a trend decline in the early 1990s. The seasonal component shows annual fluctuations, while the trend indicates the economic downturn around the early 1990s, consistent with the recession."

##Analysis GDP per Capita and Time Series Analysis

In analyzing GDP per capita, I used the max() function to identify the country with the highest value and visualized its changes over time. This helped me see long-term economic trends and shifts. Using gg_lag(), ggAcf(), and autoplot(), I examined autocorrelations and trends, confirming the need for Box-Cox transformations to stabilize variance where necessary. I also explored moving averages, finding that a 7-term weighted MA is equivalent to a 3×5 MA. Introducing outliers significantly altered seasonally adjusted results, especially depending on their placement. Finally, X-11 decomposition revealed seasonal trends and potential outliers in retail data.

HW2_data624

Woodelyne Durosier

2025-02-16

1) global_economy

2 Data visualization

2a

2b

Moving Average, Box-Cox Transformation

Gas production