1. Import the data of this flat file into R.

STCE <- read.csv("C:/Users/marsh/Downloads/stcemployment.csv")

2. Demonstrate that the you have a date index that properly shows for each data point the month and year. You may use a dataframe, a tibble, or a tsibble as you wish.

library(fpp3)
## Warning: package 'fpp3' was built under R version 4.4.2
## Registered S3 method overwritten by 'tsibble':
##   method               from 
##   as_tibble.grouped_df dplyr
## ── Attaching packages ──────────────────────────────────────────── fpp3 1.0.1 ──
## ✔ tibble      3.2.1     ✔ tsibble     1.1.6
## ✔ dplyr       1.1.4     ✔ tsibbledata 0.4.1
## ✔ tidyr       1.3.1     ✔ feasts      0.4.1
## ✔ lubridate   1.9.3     ✔ fable       0.4.1
## ✔ ggplot2     3.5.1
## Warning: package 'tsibble' was built under R version 4.4.2
## Warning: package 'tsibbledata' was built under R version 4.4.2
## Warning: package 'feasts' was built under R version 4.4.2
## Warning: package 'fabletools' was built under R version 4.4.2
## Warning: package 'fable' was built under R version 4.4.2
## ── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ──
## ✖ lubridate::date()    masks base::date()
## ✖ dplyr::filter()      masks stats::filter()
## ✖ tsibble::intersect() masks base::intersect()
## ✖ tsibble::interval()  masks lubridate::interval()
## ✖ dplyr::lag()         masks stats::lag()
## ✖ tsibble::setdiff()   masks base::setdiff()
## ✖ tsibble::union()     masks base::union()
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.0     ✔ readr   2.1.5
## ✔ purrr   1.0.2     ✔ stringr 1.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter()     masks stats::filter()
## ✖ tsibble::interval() masks lubridate::interval()
## ✖ dplyr::lag()        masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(dplyr)

# Example: Creating a sequence of dates from Jan 1990 to Dec 2024
date_seq <- seq.Date(from = as.Date("1990-01-01"), to = as.Date("2024-12-01"), by = "months")

# Create dataframe with formatted Date
STCEDF <- data.frame(
  Date = format(date_seq, "%b-%y"),  # "Mmm-YY" format (e.g., Jan-90, Feb-90)
  empces = NA  # Placeholder for employment data
)

# Check output
head(STCE)
##     Date empces
## 1 Jan-90  55800
## 2 Feb-90  56000
## 3 Mar-90  55900
## 4 Apr-90  56500
## 5 May-90  57900
## 6 Jun-90  57800
tail(STCE)
##       Date empces
## 413 May-24  93772
## 414 Jun-24  94317
## 415 Jul-24  94705
## 416 Aug-24  94713
## 417 Sep-24  94029
## 418 Oct-24  94178
str(STCE)
## 'data.frame':    418 obs. of  2 variables:
##  $ Date  : chr  "Jan-90" "Feb-90" "Mar-90" "Apr-90" ...
##  $ empces: int  55800 56000 55900 56500 57900 57800 58300 58200 59800 60600 ...

3. Generate a plot of the raw data. Write a paragraph to describe what you see.

library(ggplot2)
library(dplyr)
library(scales)
## 
## Attaching package: 'scales'
## The following object is masked from 'package:purrr':
## 
##     discard
## The following object is masked from 'package:readr':
## 
##     col_factor
# Convert Date to proper format
STCE$Date <- as.Date(paste0("01-", STCE$Date), format="%d-%b-%y")

# Define recession periods
STCER <- STCE %>%
  mutate(Recession = case_when(
    Date >= as.Date("2007-12-01") & Date <= as.Date("2009-06-01") ~ "Great Recession",
    Date >= as.Date("2020-02-01") & Date <= as.Date("2020-04-01") ~ "COVID Recession",
    TRUE ~ "No Recession"
  ))

# Create the plot
ggplot(STCER, aes(x = Date, y = empces, color = Recession, group = 1)) + 
  geom_line(size = 1) +  # Line plot with dynamic color
  scale_color_manual(values = c("Great Recession" = "red", 
                                "COVID Recession" = "red", 
                                "No Recession" = "blue")) + 
  labs(title = "Employment Trends Over Time",
       x = "Date",
       y = "Employment",
       color = "Period") +
  theme_minimal() +  
  scale_x_date(date_breaks = "5 years", date_labels = "%b-%Y") +  
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +  

  # Add textual labels for recessions
  annotate("text", x = as.Date("2008-06-01"), 
           y = max(STCE$empces, na.rm = TRUE) * 0.99, 
           label = "The Great Recession (2007-2009)", color = "red", hjust = 0) +
  
  annotate("text", x = as.Date("2020-07-01"), 
           y = max(STCE$empces, na.rm = TRUE) * 0.9, 
           label = "COVID Recession (2020)", color = "red", hjust = 0)
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

4. Write code that takes the annual average for each year, and plot this on a graph as well.

# Read in the data first
STCE <- read.csv("C:/Users/marsh/Downloads/stcemployment.csv")

# Convert Date to proper format (if not already)
STCE$Date <- as.Date(paste0("01-", STCE$Date), format="%d-%b-%y")

# Calculate annual average employment
annual_avg <- STCE %>%
  mutate(Year = as.numeric(format(Date, "%Y"))) %>%  # Extract Year and ensure numeric
  group_by(Year) %>%
  summarize(Average_Employment = mean(empces, na.rm = TRUE)) %>%
  ungroup()

# Check for NA values
annual_avg <- na.omit(annual_avg)  # Remove any potential NA rows

# Plot the annual average employment
ggplot(annual_avg, aes(x = Year, y = Average_Employment)) +
  geom_line(color = "blue", size = 1) +  # Line plot
  geom_point(color = "red") +  # Highlight each year's average
  labs(title = "Trends in Annual Average Employment (STCE Data)",
       x = "Year",
       y = "Average Employment (empces)") +
  theme_minimal() +
  scale_x_continuous(breaks = seq(min(annual_avg$Year, na.rm = TRUE), 
                                  max(annual_avg$Year, na.rm = TRUE), 
                                  by = 5)) +  # Ensure finite values
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

5. Following the examples in Chapter 3, perform all of the following decompositions of these data.

## 1. A moving average of a length you think is best to describe the cycle in these data (section 3.3 in the text)
library(TTR)  # For moving average function
## Warning: package 'TTR' was built under R version 4.4.2
# Apply a 12-month moving average
STCE$Moving_Avg <- SMA(STCE$empces, n = 12)

# Plot the original vs. smoothed series
ggplot(STCE, aes(x = Date)) +
  geom_line(aes(y = empces), color = "grey", alpha = 0.5) +  # Original data
  geom_line(aes(y = Moving_Avg), color = "blue", size = 1) +  # Moving Average
  labs(title = "12-Month Moving Average of Employment",
       x = "Year", y = "Employment (empces)") +
  theme_minimal()
## Warning: Removed 11 rows containing missing values or values outside the scale range
## (`geom_line()`).

## 2. Classical decomposition (3.4)
library(forecast)
## Warning: package 'forecast' was built under R version 4.4.2
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
# Convert to time series format
ts_emp <- ts(STCE$empces, start = c(1990, 1), frequency = 12)

# Perform classical decomposition
classical_decomp <- decompose(ts_emp, type = "multiplicative")

# Plot decomposition
autoplot(classical_decomp) +
  labs(title = "Classical Decomposition of Employment Data")

## 3. STL decomposition (3.6)
# Perform STL decomposition
stl_decomp <- stl(ts_emp, s.window = "periodic")

# Plot STL decomposition
autoplot(stl_decomp) +
  labs(title = "STL Decomposition of Employment Data")