Tracking NLRB Cases Over Time from 1951 to 2009

This report analyzes trends seen in the number of cases received by the National Labor Relations Board, published in their annual report under Table 5A. These cases are organized by the state in which they were reported.

Legend:

Unfair Labor Practice Cases (C series)

All C Cases — Total of all Unfair Labor Practice (ULP) charges filed.
CA — Charges alleging employer discrimination in hiring or tenure to discourage union membership.
CB — Charges alleging labor organization coercion (e.g., causing employer discrimination or restraining employees in rights).
CC — Charges involving jurisdictional disputes between unions (work assignment conflicts).
CD — Charges involving recognition disputes (e.g., employer recognition of minority union).
CE — Charges alleging employer refusal to bargain in good faith with certified union.
CG — Charges alleging illegal strikes, picketing, or secondary boycotts.
CP — Charges alleging unlawful practices by individuals or organizations not covered above.

Representation Cases (R series)

All R Cases — Total of all representation petitions filed.
RC — Representation–Certification petitions: employees petition to be represented by a union.
RM — Employer petitions for an election to test a union’s majority status.
RD — Decertification petitions: employees seek to remove an existing union as their bargaining representative.

Other Categories

UD Cases — Union Disaffiliation cases (petitions to disaffiliate from an international union while maintaining a local organization).

library(tidyverse)

── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.5.1     ✔ tibble    3.2.1
✔ lubridate 1.9.3     ✔ tidyr     1.3.1
✔ purrr     1.0.2     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(ggplot2)

library(readxl)
library(purrr)
library(dplyr)

# Define file path
file_path <- "US_States_By_Year_1951_to_2009_New.xlsx"

# Get sheet names
sheet_names <- excel_sheets(file_path)

# Read all sheets into a list of data frames and add a 'year' column to each
data_list <- map(sheet_names, ~ {
  df <- read_excel(file_path, sheet = .x)
  df$year <- .x  # Add a column for the year (sheet name)
  df
})

# Combine all data frames into one big data frame
combined_data <- bind_rows(data_list)

# View the result
head(combined_data)

# A tibble: 6 × 19
  `State Abbreviation` `All Cases`    CA    CB    CC    CD    RC    RM    RD
  <chr>                      <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 AK                            20     4     8     3     0     3     0     1
2 AL                           235    43     4     0     1   120     7     3
3 AR                           121    45     2     0     0    64     6     4
4 AZ                           143    33     3     1     0    97     7     1
5 CA                          2470   411   141    27     8   884    42    31
6 CO                           425    66     4     1     0   274     2     7
# ℹ 10 more variables: UA <dbl>, year <chr>, `C Total` <dbl>, `R Total` <dbl>,
#   CE <dbl>, CP <dbl>, UD <dbl>, AC <dbl>, UC <dbl>, CG <dbl>

combined_data <- combined_data |>
  mutate(C_Total = rowSums(across(c(CA, CB, CC, CD, CE, CG, CP)), na.rm = TRUE)) |>
  mutate(year = as.numeric(year))

combined_data |>
  group_by(`State Abbreviation`) |>
  ggplot(aes(x = year, y = `All Cases`, group = `State Abbreviation`)) +
  geom_line() +
  scale_x_continuous(breaks = seq(min(combined_data$year),
                                  max(combined_data$year),
                                  by = 5)) +
  labs(title = "All Cases Over Time by State",
       x = "Year",
       y = "All Cases")

# Filter rows where All Cases is NA
combined_data |> 
  filter(is.na(`All Cases`))

# A tibble: 0 × 20
# ℹ 20 variables: State Abbreviation <chr>, All Cases <dbl>, CA <dbl>,
#   CB <dbl>, CC <dbl>, CD <dbl>, RC <dbl>, RM <dbl>, RD <dbl>, UA <dbl>,
#   year <dbl>, C Total <dbl>, R Total <dbl>, CE <dbl>, CP <dbl>, UD <dbl>,
#   AC <dbl>, UC <dbl>, CG <dbl>, C_Total <dbl>

# Filter years 1975–1980
filtered_data <- combined_data[combined_data$year >= 1975 & combined_data$year <= 1980, ]

# Plot All Cases over time
ggplot(filtered_data, aes(x = year, y = `All Cases`)) +
  geom_line() +
  labs(title = "Data from 1975 to 1980")

library(dplyr)
library(ggplot2)

# -------------------------
# 1. Group states into geographic districts and plot total cases over time
# -------------------------

region_map <- c(
  # Midwest
  "IL"="Midwest","IN"="Midwest","IA"="Midwest","KS"="Midwest","MI"="Midwest","MN"="Midwest",
  "MO"="Midwest","NE"="Midwest","ND"="Midwest","OH"="Midwest","SD"="Midwest","WI"="Midwest",
  # South
  "AL"="South","AR"="South","DE"="South","FL"="South","GA"="South","KY"="South","LA"="South",
  "MD"="South","MS"="South","NC"="South","OK"="South","SC"="South","TN"="South","TX"="South",
  "VA"="South","WV"="South",
  # Northeast
  "CT"="Northeast","ME"="Northeast","MA"="Northeast","NH"="Northeast","NJ"="Northeast","NY"="Northeast",
  "PA"="Northeast","RI"="Northeast","VT"="Northeast",
  # West
  "AK"="West","AZ"="West","CA"="West","CO"="West","HI"="West","ID"="West","MT"="West",
  "NV"="West","NM"="West","OR"="West","UT"="West","WA"="West","WY"="West"
)

combined_data <- combined_data %>%
  mutate(Region = region_map[`State Abbreviation`])

ggplot(combined_data, aes(x = year, y = `All Cases`, color = Region, group = Region)) +
  stat_summary(fun = sum, geom = "line") +
  scale_x_continuous(breaks = seq(min(combined_data$year),
                                  max(combined_data$year),
                                  by = 5)) +
  labs(title = "Unfair Labor Practice Cases by Region (1951–2009)",
       x = "Year",
       y = "Total Cases")

# -------------------------
# 2. Which year had the highest number of cases
# -------------------------

combined_data %>%
  group_by(year) %>%
  summarise(total_cases = sum(`All Cases`, na.rm = TRUE)) %>%
  ggplot(aes(x = year, y = total_cases)) +
  geom_line() +
  geom_point(data = . %>% filter(total_cases == max(total_cases)),
             color = "red", size = 3) +
  scale_x_continuous(breaks = seq(min(combined_data$year),
                                  max(combined_data$year),
                                  by = 5)) + 
  labs(title = "Total ULP Cases by Year",
       subtitle = "Red dot shows the highest year",
       x = "Year",
       y = "Total Cases")

# -------------------------
# 3. Parse states into groups based on population
# -------------------------
# This assumes you have a 'Population' column

#combined_data <- combined_data %>%
 # mutate(PopGroup = case_when(
  #  Population < 2000000 ~ "Small",
   # Population < 7000000 ~ "Medium",
  #  TRUE ~ "Large"
#  ))
#
#ggplot(combined_data, aes(x = year, y = `All Cases`, color = PopGroup)) +
 # stat_summary(fun = mean, geom = "line") +
#  labs(title = "Average ULP Cases by Population Group",
 #      x = "Year",
  #     y = "Average Cases")

library(ggplot2)
library(dplyr)

# --- 1. RC cases by year ---
ggplot(combined_data, aes(x = year, y = RC)) +
  geom_line(color = "blue", size = 1) +
  scale_x_continuous(breaks = seq(min(combined_data$year),
                                  max(combined_data$year),
                                  by = 5)) +
  labs(
    title = "RC Cases by Year",
    x = "Year",
    y = "Number of Cases"
  ) +
  theme_minimal()

Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
ℹ Please use `linewidth` instead.

# --- 2. All C Cases by year ---
ggplot(combined_data, aes(x = year, y = `C_Total`)) +
  geom_line(color = "red", size = 1) +
  scale_x_continuous(breaks = seq(min(combined_data$year),
                                  max(combined_data$year),
                                  by = 5)) +
  labs(
    title = "All C Cases by Year",
    x = "Year",
    y = "Number of Cases"
  ) +
  theme_minimal()

# --- 3. UD cases by year ---
ggplot(combined_data, aes(x = year, y = UD)) +
  geom_line(color = "darkgreen", size = 1) +
  scale_x_continuous(breaks = seq(min(combined_data$year),
                                  max(combined_data$year),
                                  by = 5)) +
  labs(
    title = "UD Cases by Year",
    x = "Year",
    y = "Number of Cases"
  ) +
  theme_minimal()

Warning: Removed 663 rows containing missing values or values outside the scale range
(`geom_line()`).