# read excel file
predf <- read_excel("BLS Analysis.xlsx", sheet = "Summary")
#process and manipulate data
df <- predf %>%
clean_names() %>%
pivot_longer(cols = 2:5, names_to = "type", values_to = "value") %>%
mutate(initial_release_date = as_date(initial_release_date)) %>%
mutate(revised_release_date = as_date(revised_release_date))
# convert month from character to month format
df$month <- factor(df$month, levels = month.name)
# view data
df
## # A tibble: 48 × 5
## month initial_release_date revised_release_date type value
## <fct> <date> <date> <chr> <dbl>
## 1 January 2023-02-03 2023-03-10 dow_jones_consensus… 187
## 2 January 2023-02-03 2023-03-10 initial_release 517
## 3 January 2023-02-03 2023-03-10 revised_release 472
## 4 January 2023-02-03 2023-03-10 release_variance -45
## 5 February 2023-03-10 2023-04-07 dow_jones_consensus… 225
## 6 February 2023-03-10 2023-04-07 initial_release 311
## 7 February 2023-03-10 2023-04-07 revised_release 248
## 8 February 2023-03-10 2023-04-07 release_variance -63
## 9 March 2023-04-07 2023-05-05 dow_jones_consensus… 238
## 10 March 2023-04-07 2023-05-05 initial_release 236
## # ℹ 38 more rows
This data set was manually created using a combination of
official Bureau of Labor Statistics data and monthly released jobs
reports from 2023. The BLS data, which shows the final(revised) figures,
can be accessed here : https://data.bls.gov/cgi-bin/surveymost Unfortunately,
the link does not currently work when accessed through this document. If
it is not working at time of access, you can copy the link into your
search browser or follow these steps: On the BLS.gov website, navigate
to Data Tools and select BLS Popular Series. Then select the Total
Nonfarm Employment series from the Employment section. Finally, click
Retrieve Data at the bottom. Monthly Job reports were used in data
sourcing because they include the market estimates and
initial(pre-revision) figures. This allows us to see what was expected,
what was reported, and what was revised.
# assign chronological order to value type
type_order <- c("dow_jones_consensus_estimate",
"initial_release",
"revised_release",
"release_variance")
# create bar graph
ggplot(df, aes(x = month, y = value, fill = fct_reorder(type, match(type, type_order)))) +
geom_col(position = "dodge") +
scale_fill_manual(
values = c("dow_jones_consensus_estimate" = "navy",
"initial_release" = "blue",
"revised_release" = "lightblue",
"release_variance" = "orange"),
labels = c("dow_jones_consensus_estimate" = "Dow Jones Consensus Estimate",
"initial_release" = "Initial Release",
"revised_release" = "Revised Release",
"release_variance" = "Release Variance")
) +
guides(fill = guide_legend(title = "Data Type")) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
labs(title = "Change in Total Nonfarm Payroll Employment", x = "Month", y = "Value") +
geom_vline(xintercept = seq(0.5, length(unique(df$month)) - 0.5), linetype = "dashed", color = "darkgrey") +
scale_y_continuous(breaks = seq(-150, 550, by = 50), limits = c(-150, 550))
The best way to analyze the full year of BLS Job figures is to
plot each month’s estimate, initial value, revised value, and the size
of the revision. It can be seen that initial values beat expectations
8/12 months, but were revised down 10/11 months (December revision is
NA).
# generate report data frame with summary statistics
report <- df %>%
group_by(month) %>%
mutate(beat_exp = if_else(value[type == "initial_release"] > value[type == "dow_jones_consensus_estimate"], "Yes", "No")) %>%
mutate(rev_percent = round((value[type == "release_variance"] / value[type == "initial_release"]), 2)) %>%
mutate(exp_var_pct = round((((value[type == "initial_release"])-(value[type == "dow_jones_consensus_estimate"])) / value[type == "dow_jones_consensus_estimate"]), 2)) %>%
summarise(beat_exp = first(beat_exp),
exp_var_pct = first(exp_var_pct),
rev_percent = first(rev_percent)) %>%
mutate(avg_exp_var = sum(unique(exp_var_pct))/n_distinct(df$month),
avg_rev_pct = sum(unique(rev_percent))/11)
# view report
print(report)
## # A tibble: 12 × 6
## month beat_exp exp_var_pct rev_percent avg_exp_var avg_rev_pct
## <fct> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 January Yes 1.76 -0.09 0.368 -0.154
## 2 February Yes 0.38 -0.2 0.368 -0.154
## 3 March No -0.01 -0.08 0.368 -0.154
## 4 April Yes 0.41 -0.14 0.368 -0.154
## 5 May Yes 0.78 -0.17 0.368 -0.154
## 6 June No -0.13 -0.5 0.368 -0.154
## 7 July No -0.06 0.26 0.368 -0.154
## 8 August Yes 0.1 -0.12 0.368 -0.154
## 9 September Yes 0.98 -0.22 0.368 -0.154
## 10 October No -0.12 -0.3 0.368 -0.154
## 11 November Yes 0.05 -0.13 0.368 -0.154
## 12 December Yes 0.27 0 0.368 -0.154
Summary statistics are created to tell a high-level story of the jobs
data for 2023. On average, initial reports beat expectations by 36.75%
and were revised down 15.36%.
summary23 <- df %>%
summarise(estimate_sum = sum(value[type == "dow_jones_consensus_estimate"])*1000,
initial_sum = sum(value[type == "initial_release"])*1000,
revised_sum = sum(value[type == "revised_release"])*1000,
variance_sum = -sum(value[type == "release_variance"])*1000
)
summary23 <- summary23 %>%
pivot_longer(cols = 1:4, names_to = "name", values_to = "value")
ggplot(summary23, aes(x = name, y = value, fill = name)) +
geom_bar(stat = "identity") +
scale_fill_manual(
values = c("estimate_sum" = "navy",
"initial_sum" = "blue",
"revised_sum" = "lightblue",
"variance_sum" = "orange"),
labels = c("estimate_sum" = "Dow Jones Consensus Estimate",
"initial_sum" = "Initial Release",
"revised_sum" = "Revised Release",
"variance_sum" = "Release Variance")
)+
scale_y_continuous(labels = comma, breaks = seq(0, 3200000, by = 250000), limits = c(0, 3200000)) +
labs(title = "2023 Nonfarm Payroll Employment Summary", x = "", y = "Change in Nonfarm Payroll") +
theme_minimal()
Looking at the full year picture, initial BLS releases surpassed
estimates by 810,000 jobs. Initial releases were revised down 443,000
jobs, which resulted in a final figure of 367,000 more jobs than
estimated.
# set key to pull data from FRED
api_key <- "21489194ba838be7e47627eb82142f3a"
set_fred_key(api_key)
# load SP500 data from FRED
gscp <- fred(sp500 = "SP500") %>%
filter(date >= "2023-01-01",
date <= "2023-12-31") %>%
filter(!is.na(sp500))
# data set of Initial BLS release dates
bls_release_dates <- unique(df$initial_release_date)
# create data to show how the SP500 performed the subsequent week after a jobs report release
gscp_df <- gscp %>%
mutate(release = if_else(date %in% bls_release_dates, "yes", "no")) %>%
mutate(test = if_else(release == "yes", "yes", lag(release, n=5, default = "no"))) %>%
mutate(month = month(ymd(date))) %>%
filter(test == "yes") %>%
group_by(month) %>%
mutate(return = (sp500[release == "no"] - sp500[release == "yes"])/sp500[release == "yes"]) %>%
filter(release == "yes")
gscp_df
## # A tibble: 10 × 6
## # Groups: month [10]
## date sp500 release test month return
## <date> <dbl> <chr> <chr> <dbl> <dbl>
## 1 2023-02-03 4136. yes yes 2 -0.0111
## 2 2023-03-10 3862. yes yes 3 0.0143
## 3 2023-05-05 4136. yes yes 5 -0.00294
## 4 2023-06-02 4282. yes yes 6 0.00385
## 5 2023-07-07 4399. yes yes 7 0.0242
## 6 2023-08-04 4478. yes yes 8 -0.00312
## 7 2023-09-01 4516. yes yes 9 -0.00627
## 8 2023-10-06 4308. yes yes 10 0.00447
## 9 2023-11-03 4358. yes yes 11 0.0131
## 10 2023-12-08 4604. yes yes 12 0.0249
This table aims to observe how SP500 behaves following an Employment
Situation Release. This is done by calculating the return for the week
that immediately proceeds the release. Because all releases are on the
first Friday of the month, the return is calculated from the next
Monday-Friday.
filtered_gscp <- gscp %>%
mutate(month = month(ymd(date))) %>%
mutate(value_norm = ifelse(is.na(sp500), NA, round((sp500 / max(sp500, na.rm = TRUE)), 2))) %>%
group_by(month) %>%
mutate(return = round(((last(sp500) - first(sp500)) / first(sp500)) * 100, 2)) %>%
ungroup() %>%
mutate(annual_return = round(((last(sp500) - first(sp500)) / first(sp500)) * 100, 2))
# Add a random date for each month in filtered_gscp
filtered_gscp <- filtered_gscp %>%
group_by(month) %>%
mutate(random_date = sample(date, 1)) %>%
ungroup()
# Create a lookup table for month names and corresponding first day of the month
month_lookup <- data.frame(
month = month.name,
date = floor_date(ymd("2022-01-01") + months(0:11), "month")
)
# Merge the beat_exp_test data with the month_lookup table to get the first day of the month
beat_exp_test <- report %>%
select(month, exp_var_pct, rev_percent) %>%
left_join(month_lookup, by = c("month" = "month")) %>%
mutate(exp_pct = round(exp_var_pct * 100, 2)) %>%
mutate(rev_pct = round(rev_percent * 100, 2)) %>%
mutate(month = month(date)) %>%
select(-date)
merged_data <- left_join(filtered_gscp, beat_exp_test, by = "month")
# mutate(pct_norm = ifelse(is.na(pct), NA, round((pct / max(pct, na.rm = TRUE)), 2)))
plot_ly() %>%
add_trace(data = merged_data,
x = ~date, y = ~value_norm,
color = ~factor(month),
text = ~paste("Date: ", date, "<br>Monthly Return: ", return, "%"),
type = 'scatter', mode = 'lines') %>%
add_trace(data = merged_data,
x = ~date, y = ~exp_var_pct,
text = ~paste("Expectation Variance: ", exp_pct, "%"),
type = 'scatter', mode = 'lines',
line = list(color = 'red', dash = 'dash'),
name = 'Expectation Variance') %>%
add_trace(data = merged_data,
x = ~date, y = ~rev_percent,
text = ~paste("Revision Variance: ", rev_pct, "%"),
type = 'scatter', mode = 'lines',
line = list(color = 'orange', dash = 'dash'),
name = 'Revision Variance') %>%
layout(title = "SP500 Performance & Variance",
xaxis = list(title = "Date"),
yaxis = list(title = "Normalized Value"),
dragmode = "zoom", # Enable zoom with mouse
showlegend = TRUE)
Hover your pointer over the plot lines to see series details. To use
the zoom feature, draw a box with your mouse around the area you would
like to zoom. Double-click to return to the normal view. You can isolate
a specific series by double-clicking it in the right side of the plot.
For example, to see just January, double-click the first line with the
“1” next to it.
To get a better understanding of how the
SP500 interacts with releases, we can plot it with various metrics
related to the jobs report. The dotted red line represents the percent
variance of the initial release and the estimate. Negative values
indicate releases below expectations. The orange line represents the
percent change of the revision to each initial release.