library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(fpp3)
## Registered S3 method overwritten by 'tsibble':
## method from
## as_tibble.grouped_df dplyr
## ── Attaching packages ──────────────────────────────────────────── fpp3 1.0.1 ──
## ✔ tsibble 1.1.6 ✔ feasts 0.4.1
## ✔ tsibbledata 0.4.1 ✔ fable 0.4.1
## ── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ──
## ✖ lubridate::date() masks base::date()
## ✖ dplyr::filter() masks stats::filter()
## ✖ tsibble::intersect() masks base::intersect()
## ✖ tsibble::interval() masks lubridate::interval()
## ✖ dplyr::lag() masks stats::lag()
## ✖ tsibble::setdiff() masks base::setdiff()
## ✖ tsibble::union() masks base::union()
library(dplyr)
library(ggplot2)
library(forcats)
library(tsibble)
data("global_economy")
Which country has the highest GDP per capita? How has this changed over time?
Over time, the nation with the highest GDP per capita shifts. Small, resource-rich economies, like Norway or Luxembourg, typically have high GDP per capita.
global_economy |>
ggplot(aes(x = Year, y = GDP / Population, color = Country)) +
geom_line() +
labs(title = "GDP per Capita Over Time",
y = "GDP per Capita",
x = "Year")
## Warning: Removed 3242 rows containing missing values or values outside the scale range
## (`geom_line()`).
global_economy |>
autoplot(GDP / Population, show.legend = FALSE) +
labs(title= "GDP per Capita", x="Year")
## Warning: Removed 3242 rows containing missing values or values outside the scale range
## (`geom_line()`).
global_economy|>
mutate(GDP_per_capita = GDP / Population) |>
ggplot(aes(x = Year, y = GDP_per_capita)) +
geom_line(color = "blue") + # Set the line color to blue
labs(title = "GDP per Capita Over Time",
y = "GDP per Capita", x = "Year")
United States GDP from global_economy
global_economy |>
filter(Country == "United States") |>
autoplot(GDP) +
labs(title = "United States GDP", x = "Year", y = "GDP (USD)")
Slaughter of Victorian “Bulls, bullocks and steers” in aus_livestock.
aus_livestock |>
filter(Animal == "Bulls, bullocks and steers", State == "Victoria") |>
autoplot(Count) +
labs(title = "Slaughter of Victorian Bulls, Bullocks, and Steers")
#2c Victorian Electricity Demand
autoplot(vic_elec, Demand) +
labs(title = "Victorian Electricity Demand")
vic_elec |>
index_by(Date) |>
summarise(Total_Demand = sum(Demand)) |>
autoplot(Total_Demand) +
ggtitle("Daily Electricity Demand for Australia", subtitle="State of Victoria")
##2d
autoplot(aus_production, Gas) +
labs(title = "Gas Production Over Time")
canadian_gas |>
autoplot(Volume) +
labs(title = "Canadian Gas Volume")
set.seed(12345678)
myseries <- aus_retail |>
filter(`Series ID` == sample(aus_retail$`Series ID`, 1))
data("global_economy")
print(global_economy)
## # A tsibble: 15,150 x 9 [1Y]
## # Key: Country [263]
## Country Code Year GDP Growth CPI Imports Exports Population
## <fct> <fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Afghanistan AFG 1960 537777811. NA NA 7.02 4.13 8996351
## 2 Afghanistan AFG 1961 548888896. NA NA 8.10 4.45 9166764
## 3 Afghanistan AFG 1962 546666678. NA NA 9.35 4.88 9345868
## 4 Afghanistan AFG 1963 751111191. NA NA 16.9 9.17 9533954
## 5 Afghanistan AFG 1964 800000044. NA NA 18.1 8.89 9731361
## 6 Afghanistan AFG 1965 1006666638. NA NA 21.4 11.3 9938414
## 7 Afghanistan AFG 1966 1399999967. NA NA 18.6 8.57 10152331
## 8 Afghanistan AFG 1967 1673333418. NA NA 14.2 6.77 10372630
## 9 Afghanistan AFG 1968 1373333367. NA NA 15.2 8.90 10604346
## 10 Afghanistan AFG 1969 1408888922. NA NA 15.0 10.1 10854428
## # ℹ 15,140 more rows
3 × 5 MA is equivalent to a 7-term weighted MA with weights
weights <- c(0.067, 0.133, 0.200, 0.200, 0.200, 0.133, 0.067)
print(weights)
## [1] 0.067 0.133 0.200 0.200 0.200 0.133 0.067
gas <- tail(aus_production, 5*4)|>select(Gas)
autoplot(gas)
## Plot variable not specified, automatically selected `.vars = Gas`
fit <- gas |>
model(classical_decomposition(Gas, type = "multiplicative"))
components(fit) |>autoplot()
## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_line()`).
gas_outlier <- gas
gas_outlier$Gas[10] <- gas_outlier$Gas[10] + 300
print(gas_outlier)
## # A tsibble: 20 x 2 [1Q]
## Gas Quarter
## <dbl> <qtr>
## 1 221 2005 Q3
## 2 180 2005 Q4
## 3 171 2006 Q1
## 4 224 2006 Q2
## 5 233 2006 Q3
## 6 192 2006 Q4
## 7 187 2007 Q1
## 8 234 2007 Q2
## 9 245 2007 Q3
## 10 505 2007 Q4
## 11 194 2008 Q1
## 12 229 2008 Q2
## 13 249 2008 Q3
## 14 203 2008 Q4
## 15 196 2009 Q1
## 16 238 2009 Q2
## 17 252 2009 Q3
## 18 210 2009 Q4
## 19 205 2010 Q1
## 20 236 2010 Q2
Canadian Gas production
canadian_gas|>autoplot()
## Plot variable not specified, automatically selected `.vars = Volume`
gas_outlier <- gas
gas_outlier[20,] <- gas_outlier[20,] + 300
pedestrian_daily <- pedestrian |>
filter(Sensor == "Southern Cross Station") |>
index_by(week = yearweek(Date_Time)) |>
summarise(Weekly_Count = sum(Count, na.rm = TRUE))
print(pedestrian_daily)
## # A tsibble: 105 x 2 [1W]
## week Weekly_Count
## <week> <int>
## 1 2015 W01 10236
## 2 2015 W02 60134
## 3 2015 W03 71440
## 4 2015 W04 73393
## 5 2015 W05 62535
## 6 2015 W06 79154
## 7 2015 W07 81006
## 8 2015 W08 81497
## 9 2015 W09 82705
## 10 2015 W10 82120
## # ℹ 95 more rows
filtered_ansett <- ansett |>
filter(Class == "Economy", Airports == "MEL-SYD")
print(filtered_ansett)
## # A tsibble: 282 x 4 [1W]
## # Key: Airports, Class [1]
## Week Airports Class Passengers
## <week> <chr> <chr> <dbl>
## 1 1987 W26 MEL-SYD Economy 20167
## 2 1987 W27 MEL-SYD Economy 20161
## 3 1987 W28 MEL-SYD Economy 19993
## 4 1987 W29 MEL-SYD Economy 20986
## 5 1987 W30 MEL-SYD Economy 20497
## 6 1987 W31 MEL-SYD Economy 20770
## 7 1987 W32 MEL-SYD Economy 21111
## 8 1987 W33 MEL-SYD Economy 20675
## 9 1987 W34 MEL-SYD Economy 22092
## 10 1987 W35 MEL-SYD Economy 20772
## # ℹ 272 more rows
lambda_passengers <- filtered_ansett |>
features(Passengers, features = guerrero) |>
pull(lambda_guerrero)
print(lambda_passengers)
## [1] 1.999927
filtered_ansett |>
mutate(Passengers_transformed = box_cox(Passengers, lambda_passengers)) |>
autoplot(Passengers_transformed) +
labs(title = "Box-Cox Transformed Economy Class Passengers (MEL-SYD)",
y = "Passengers")
##Retail Data Decomposition Using X-11 decomposition on the retail data:
myseries <- aus_retail %>%
filter(`Series ID` == sample(aus_retail$`Series ID`, 1))
autoplot(myseries)
## Plot variable not specified, automatically selected `.vars = Turnover`
gg_season(data = myseries)
## Plot variable not specified, automatically selected `y = Turnover`
gg_subseries(data = myseries)
## Plot variable not specified, automatically selected `y = Turnover`
gg_lag(data = myseries, geom = "point")
## Plot variable not specified, automatically selected `y = Turnover`
ACF(myseries, Turnover) %>%
autoplot()
myseries_x11 <- myseries |>
model(X_13ARIMA_SEATS(Turnover ~ x11())) |>
components()
autoplot(myseries_x11)
dgoog <- gafa_stock |>
filter(Symbol == "GOOG", year(Date) >= 2018) |>
mutate(trading_day = row_number()) |>
update_tsibble(index = trading_day, regular = TRUE) |>
mutate(diff = difference(Close))
# Description example:
"The decomposition of the civilian labor force data reveals clear seasonal patterns and a trend decline in the early 1990s. The seasonal component shows annual fluctuations, while the trend indicates the economic downturn around the early 1990s, consistent with the recession."
## [1] "The decomposition of the civilian labor force data reveals clear seasonal patterns and a trend decline in the early 1990s. The seasonal component shows annual fluctuations, while the trend indicates the economic downturn around the early 1990s, consistent with the recession."
##Analysis GDP per Capita and Time Series Analysis
In analyzing GDP per capita, I used the max() function to identify the country with the highest value and visualized its changes over time. This helped me see long-term economic trends and shifts. Using gg_lag(), ggAcf(), and autoplot(), I examined autocorrelations and trends, confirming the need for Box-Cox transformations to stabilize variance where necessary. I also explored moving averages, finding that a 7-term weighted MA is equivalent to a 3×5 MA. Introducing outliers significantly altered seasonally adjusted results, especially depending on their placement. Finally, X-11 decomposition revealed seasonal trends and potential outliers in retail data.