1. Import the data of this flat file into R.
STCE <- read.csv("C:/Users/marsh/Downloads/stcemployment.csv")
2. Demonstrate that the you have a date index that properly shows
for each data point the month and year. You may use a dataframe, a
tibble, or a tsibble as you wish.
library(fpp3)
## Warning: package 'fpp3' was built under R version 4.4.2
## Registered S3 method overwritten by 'tsibble':
## method from
## as_tibble.grouped_df dplyr
## ── Attaching packages ──────────────────────────────────────────── fpp3 1.0.1 ──
## ✔ tibble 3.2.1 ✔ tsibble 1.1.6
## ✔ dplyr 1.1.4 ✔ tsibbledata 0.4.1
## ✔ tidyr 1.3.1 ✔ feasts 0.4.1
## ✔ lubridate 1.9.3 ✔ fable 0.4.1
## ✔ ggplot2 3.5.1
## Warning: package 'tsibble' was built under R version 4.4.2
## Warning: package 'tsibbledata' was built under R version 4.4.2
## Warning: package 'feasts' was built under R version 4.4.2
## Warning: package 'fabletools' was built under R version 4.4.2
## Warning: package 'fable' was built under R version 4.4.2
## ── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ──
## ✖ lubridate::date() masks base::date()
## ✖ dplyr::filter() masks stats::filter()
## ✖ tsibble::intersect() masks base::intersect()
## ✖ tsibble::interval() masks lubridate::interval()
## ✖ dplyr::lag() masks stats::lag()
## ✖ tsibble::setdiff() masks base::setdiff()
## ✖ tsibble::union() masks base::union()
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.0 ✔ readr 2.1.5
## ✔ purrr 1.0.2 ✔ stringr 1.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ tsibble::interval() masks lubridate::interval()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(dplyr)
# Example: Creating a sequence of dates from Jan 1990 to Dec 2024
date_seq <- seq.Date(from = as.Date("1990-01-01"), to = as.Date("2024-12-01"), by = "months")
# Create dataframe with formatted Date
STCEDF <- data.frame(
Date = format(date_seq, "%b-%y"), # "Mmm-YY" format (e.g., Jan-90, Feb-90)
empces = NA # Placeholder for employment data
)
# Check output
head(STCE)
## Date empces
## 1 Jan-90 55800
## 2 Feb-90 56000
## 3 Mar-90 55900
## 4 Apr-90 56500
## 5 May-90 57900
## 6 Jun-90 57800
tail(STCE)
## Date empces
## 413 May-24 93772
## 414 Jun-24 94317
## 415 Jul-24 94705
## 416 Aug-24 94713
## 417 Sep-24 94029
## 418 Oct-24 94178
str(STCE)
## 'data.frame': 418 obs. of 2 variables:
## $ Date : chr "Jan-90" "Feb-90" "Mar-90" "Apr-90" ...
## $ empces: int 55800 56000 55900 56500 57900 57800 58300 58200 59800 60600 ...
3. Generate a plot of the raw data. Write a paragraph to describe
what you see.
library(ggplot2)
library(dplyr)
library(scales)
##
## Attaching package: 'scales'
## The following object is masked from 'package:purrr':
##
## discard
## The following object is masked from 'package:readr':
##
## col_factor
# Convert Date to proper format
STCE$Date <- as.Date(paste0("01-", STCE$Date), format="%d-%b-%y")
# Define recession periods
STCER <- STCE %>%
mutate(Recession = case_when(
Date >= as.Date("2007-12-01") & Date <= as.Date("2009-06-01") ~ "Great Recession",
Date >= as.Date("2020-02-01") & Date <= as.Date("2020-04-01") ~ "COVID Recession",
TRUE ~ "No Recession"
))
# Create the plot
ggplot(STCER, aes(x = Date, y = empces, color = Recession, group = 1)) +
geom_line(size = 1) + # Line plot with dynamic color
scale_color_manual(values = c("Great Recession" = "red",
"COVID Recession" = "red",
"No Recession" = "blue")) +
labs(title = "Employment Trends Over Time",
x = "Date",
y = "Employment",
color = "Period") +
theme_minimal() +
scale_x_date(date_breaks = "5 years", date_labels = "%b-%Y") +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
# Add textual labels for recessions
annotate("text", x = as.Date("2008-06-01"),
y = max(STCE$empces, na.rm = TRUE) * 0.99,
label = "The Great Recession (2007-2009)", color = "red", hjust = 0) +
annotate("text", x = as.Date("2020-07-01"),
y = max(STCE$empces, na.rm = TRUE) * 0.9,
label = "COVID Recession (2020)", color = "red", hjust = 0)
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

4. Write code that takes the annual average for each year, and plot
this on a graph as well.
# Read in the data first
STCE <- read.csv("C:/Users/marsh/Downloads/stcemployment.csv")
# Convert Date to proper format (if not already)
STCE$Date <- as.Date(paste0("01-", STCE$Date), format="%d-%b-%y")
# Calculate annual average employment
annual_avg <- STCE %>%
mutate(Year = as.numeric(format(Date, "%Y"))) %>% # Extract Year and ensure numeric
group_by(Year) %>%
summarize(Average_Employment = mean(empces, na.rm = TRUE)) %>%
ungroup()
# Check for NA values
annual_avg <- na.omit(annual_avg) # Remove any potential NA rows
# Plot the annual average employment
ggplot(annual_avg, aes(x = Year, y = Average_Employment)) +
geom_line(color = "blue", size = 1) + # Line plot
geom_point(color = "red") + # Highlight each year's average
labs(title = "Trends in Annual Average Employment (STCE Data)",
x = "Year",
y = "Average Employment (empces)") +
theme_minimal() +
scale_x_continuous(breaks = seq(min(annual_avg$Year, na.rm = TRUE),
max(annual_avg$Year, na.rm = TRUE),
by = 5)) + # Ensure finite values
theme(axis.text.x = element_text(angle = 45, hjust = 1))
