# read excel file
predf <- read_excel("BLS Analysis.xlsx", sheet = "Summary")

#process and manipulate data
df <- predf %>% 
  clean_names() %>% 
  pivot_longer(cols = 2:5, names_to = "type", values_to = "value") %>% 
  mutate(initial_release_date = as_date(initial_release_date)) %>% 
  mutate(revised_release_date = as_date(revised_release_date))

# convert month from character to month format
df$month <- factor(df$month, levels = month.name)

# view data
df
## # A tibble: 48 × 5
##    month    initial_release_date revised_release_date type                 value
##    <fct>    <date>               <date>               <chr>                <dbl>
##  1 January  2023-02-03           2023-03-10           dow_jones_consensus…   187
##  2 January  2023-02-03           2023-03-10           initial_release        517
##  3 January  2023-02-03           2023-03-10           revised_release        472
##  4 January  2023-02-03           2023-03-10           release_variance       -45
##  5 February 2023-03-10           2023-04-07           dow_jones_consensus…   225
##  6 February 2023-03-10           2023-04-07           initial_release        311
##  7 February 2023-03-10           2023-04-07           revised_release        248
##  8 February 2023-03-10           2023-04-07           release_variance       -63
##  9 March    2023-04-07           2023-05-05           dow_jones_consensus…   238
## 10 March    2023-04-07           2023-05-05           initial_release        236
## # ℹ 38 more rows


This data set was manually created using a combination of official Bureau of Labor Statistics data and monthly released jobs reports from 2023. The BLS data, which shows the final(revised) figures, can be accessed here : https://data.bls.gov/cgi-bin/surveymost Unfortunately, the link does not currently work when accessed through this document. If it is not working at time of access, you can copy the link into your search browser or follow these steps: On the BLS.gov website, navigate to Data Tools and select BLS Popular Series. Then select the Total Nonfarm Employment series from the Employment section. Finally, click Retrieve Data at the bottom. Monthly Job reports were used in data sourcing because they include the market estimates and initial(pre-revision) figures. This allows us to see what was expected, what was reported, and what was revised.





# assign chronological order to value type
type_order <- c("dow_jones_consensus_estimate",
                "initial_release",
                "revised_release",
                "release_variance")

# create bar graph
ggplot(df, aes(x = month, y = value, fill = fct_reorder(type, match(type, type_order)))) +
  geom_col(position = "dodge") +
  scale_fill_manual(
    values = c("dow_jones_consensus_estimate" = "navy", 
               "initial_release" = "blue", 
               "revised_release" = "lightblue", 
               "release_variance" = "orange"),
    labels = c("dow_jones_consensus_estimate" = "Dow Jones Consensus Estimate",
               "initial_release" = "Initial Release",
               "revised_release" = "Revised Release",
               "release_variance" = "Release Variance")
  ) +
  guides(fill = guide_legend(title = "Data Type")) + 
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  labs(title = "Change in Total Nonfarm Payroll Employment", x = "Month", y = "Value") +
  geom_vline(xintercept = seq(0.5, length(unique(df$month)) - 0.5), linetype = "dashed", color = "darkgrey") +
  scale_y_continuous(breaks = seq(-150, 550, by = 50), limits = c(-150, 550))



The best way to analyze the full year of BLS Job figures is to plot each month’s estimate, initial value, revised value, and the size of the revision. It can be seen that initial values beat expectations 8/12 months, but were revised down 10/11 months (December revision is NA).





# generate report data frame with summary statistics
report <- df %>% 
  group_by(month) %>% 
  mutate(beat_exp = if_else(value[type == "initial_release"] > value[type == "dow_jones_consensus_estimate"], "Yes", "No")) %>% 
  mutate(rev_percent = round((value[type == "release_variance"] / value[type == "initial_release"]), 2)) %>% 
  mutate(exp_var_pct = round((((value[type == "initial_release"])-(value[type == "dow_jones_consensus_estimate"])) / value[type == "dow_jones_consensus_estimate"]), 2)) %>% 
  summarise(beat_exp = first(beat_exp),
            exp_var_pct = first(exp_var_pct),
            rev_percent = first(rev_percent)) %>%
  mutate(avg_exp_var = sum(unique(exp_var_pct))/n_distinct(df$month),
         avg_rev_pct = sum(unique(rev_percent))/11)

# view report
print(report)
## # A tibble: 12 × 6
##    month     beat_exp exp_var_pct rev_percent avg_exp_var avg_rev_pct
##    <fct>     <chr>          <dbl>       <dbl>       <dbl>       <dbl>
##  1 January   Yes             1.76       -0.09       0.368      -0.154
##  2 February  Yes             0.38       -0.2        0.368      -0.154
##  3 March     No             -0.01       -0.08       0.368      -0.154
##  4 April     Yes             0.41       -0.14       0.368      -0.154
##  5 May       Yes             0.78       -0.17       0.368      -0.154
##  6 June      No             -0.13       -0.5        0.368      -0.154
##  7 July      No             -0.06        0.26       0.368      -0.154
##  8 August    Yes             0.1        -0.12       0.368      -0.154
##  9 September Yes             0.98       -0.22       0.368      -0.154
## 10 October   No             -0.12       -0.3        0.368      -0.154
## 11 November  Yes             0.05       -0.13       0.368      -0.154
## 12 December  Yes             0.27        0          0.368      -0.154

Summary statistics are created to tell a high-level story of the jobs data for 2023. On average, initial reports beat expectations by 36.75% and were revised down 15.36%.





summary23 <- df %>% 
  summarise(estimate_sum = sum(value[type == "dow_jones_consensus_estimate"])*1000,
            initial_sum = sum(value[type == "initial_release"])*1000,
            revised_sum = sum(value[type == "revised_release"])*1000,
            variance_sum = -sum(value[type == "release_variance"])*1000
            )

summary23 <- summary23 %>% 
  pivot_longer(cols = 1:4, names_to = "name", values_to = "value")

ggplot(summary23, aes(x = name, y = value, fill = name)) +
         geom_bar(stat = "identity") +
  scale_fill_manual(
    values = c("estimate_sum" = "navy", 
               "initial_sum" = "blue", 
               "revised_sum" = "lightblue", 
               "variance_sum" = "orange"),
    labels = c("estimate_sum" = "Dow Jones Consensus Estimate",
               "initial_sum" = "Initial Release",
               "revised_sum" = "Revised Release",
               "variance_sum" = "Release Variance")
  )+
  scale_y_continuous(labels = comma, breaks = seq(0, 3200000, by = 250000), limits = c(0, 3200000)) +
  labs(title = "2023 Nonfarm Payroll Employment Summary", x = "", y = "Change in Nonfarm Payroll") +
  theme_minimal()

Looking at the full year picture, initial BLS releases surpassed estimates by 810,000 jobs. Initial releases were revised down 443,000 jobs, which resulted in a final figure of 367,000 more jobs than estimated.





# set key to pull data from FRED
api_key <- "21489194ba838be7e47627eb82142f3a"
set_fred_key(api_key)

# load SP500 data from FRED
gscp <- fred(sp500 = "SP500") %>% 
  filter(date >= "2023-01-01",
         date <= "2023-12-31") %>% 
  filter(!is.na(sp500))

# data set of Initial BLS release dates
bls_release_dates <- unique(df$initial_release_date)

# create data to show how the SP500 performed the subsequent week after a jobs report release
gscp_df <- gscp %>%
  mutate(release = if_else(date %in% bls_release_dates, "yes", "no")) %>%
  mutate(test = if_else(release == "yes", "yes", lag(release, n=5, default = "no"))) %>% 
  mutate(month = month(ymd(date))) %>% 
  filter(test == "yes") %>% 
  group_by(month) %>% 
  mutate(return = (sp500[release == "no"] - sp500[release == "yes"])/sp500[release == "yes"]) %>% 
  filter(release == "yes")

gscp_df
## # A tibble: 10 × 6
## # Groups:   month [10]
##    date       sp500 release test  month   return
##    <date>     <dbl> <chr>   <chr> <dbl>    <dbl>
##  1 2023-02-03 4136. yes     yes       2 -0.0111 
##  2 2023-03-10 3862. yes     yes       3  0.0143 
##  3 2023-05-05 4136. yes     yes       5 -0.00294
##  4 2023-06-02 4282. yes     yes       6  0.00385
##  5 2023-07-07 4399. yes     yes       7  0.0242 
##  6 2023-08-04 4478. yes     yes       8 -0.00312
##  7 2023-09-01 4516. yes     yes       9 -0.00627
##  8 2023-10-06 4308. yes     yes      10  0.00447
##  9 2023-11-03 4358. yes     yes      11  0.0131 
## 10 2023-12-08 4604. yes     yes      12  0.0249

This table aims to observe how SP500 behaves following an Employment Situation Release. This is done by calculating the return for the week that immediately proceeds the release. Because all releases are on the first Friday of the month, the return is calculated from the next Monday-Friday.





filtered_gscp <- gscp %>% 
  mutate(month = month(ymd(date))) %>% 
  mutate(value_norm = ifelse(is.na(sp500), NA, round((sp500 / max(sp500, na.rm = TRUE)), 2))) %>% 
  group_by(month) %>% 
  mutate(return = round(((last(sp500) - first(sp500)) / first(sp500)) * 100, 2)) %>% 
  ungroup() %>% 
  mutate(annual_return = round(((last(sp500) - first(sp500)) / first(sp500)) * 100, 2))

# Add a random date for each month in filtered_gscp
filtered_gscp <- filtered_gscp %>%
  group_by(month) %>%
  mutate(random_date = sample(date, 1)) %>%
  ungroup()
  
# Create a lookup table for month names and corresponding first day of the month
month_lookup <- data.frame(
  month = month.name,
  date = floor_date(ymd("2022-01-01") + months(0:11), "month")
)

# Merge the beat_exp_test data with the month_lookup table to get the first day of the month
beat_exp_test <- report %>% 
  select(month, exp_var_pct, rev_percent) %>% 
  left_join(month_lookup, by = c("month" = "month")) %>% 
  mutate(exp_pct = round(exp_var_pct * 100, 2)) %>% 
  mutate(rev_pct = round(rev_percent * 100, 2)) %>% 
  mutate(month = month(date)) %>% 
  select(-date)

merged_data <- left_join(filtered_gscp, beat_exp_test, by = "month")
  # mutate(pct_norm = ifelse(is.na(pct), NA, round((pct / max(pct, na.rm = TRUE)), 2)))

plot_ly() %>%
  add_trace(data = merged_data, 
            x = ~date, y = ~value_norm, 
            color = ~factor(month),
            text = ~paste("Date: ", date, "<br>Monthly Return: ", return, "%"),
            type = 'scatter', mode = 'lines') %>%
  
  add_trace(data = merged_data,
            x = ~date, y = ~exp_var_pct,
            text = ~paste("Expectation Variance: ", exp_pct, "%"),
            type = 'scatter', mode = 'lines',
            line = list(color = 'red', dash = 'dash'),
            name = 'Expectation Variance') %>%
   add_trace(data = merged_data,
            x = ~date, y = ~rev_percent,
            text = ~paste("Revision Variance: ", rev_pct, "%"),
            type = 'scatter', mode = 'lines',
            line = list(color = 'orange', dash = 'dash'),
            name = 'Revision Variance') %>%
  
  layout(title = "SP500 Performance & Variance",
         xaxis = list(title = "Date"),
         yaxis = list(title = "Normalized Value"),
         dragmode = "zoom",  # Enable zoom with mouse
         showlegend = TRUE)

Hover your pointer over the plot lines to see series details. To use the zoom feature, draw a box with your mouse around the area you would like to zoom. Double-click to return to the normal view. You can isolate a specific series by double-clicking it in the right side of the plot. For example, to see just January, double-click the first line with the “1” next to it.


To get a better understanding of how the SP500 interacts with releases, we can plot it with various metrics related to the jobs report. The dotted red line represents the percent variance of the initial release and the estimate. Negative values indicate releases below expectations. The orange line represents the percent change of the revision to each initial release.