In this report, I calculate and plot unionization trends for doctors and nurses using data from the Current Population Survey (CPS) from January 2003 to August 2024, the latest data available as of October 2024.
The variable of interest in this report is UNION, which indicates whether, for the current job, the respondent was: 1) a member of a labor union or employee association similar to a union; 2) not a union member but covered by a union or employee association contract; or 3) neither a union member nor covered by a union contract.
For this analysis, I use the EARNWT weight, as recommended by CPS documentation.
These are the following steps I took for this task:
# adding union labels
data <- data %>%
mutate(UNION = case_when(
UNION == 1 ~ "No union coverage",
UNION == 2 ~ "Member of labor union",
UNION == 3 ~ "Covered by union but not a member",
TRUE ~ NA_character_
)) %>%
mutate(date = paste(YEAR, MONTH, "1",sep="-") %>% as.Date("%Y-%m-%d")) # formats date
# now creating nurse and doctor flags and filtering only for nurses and doctors
hc.df <- data %>%
filter(EMPSTAT %in% c(10,12)) %>% # "At work" or "Has job, not at work last week"
mutate(nurse = ifelse(OCC2010 %in% c(3500,3600,3130),1,0), # create nurse dummy
doctor = ifelse(OCC2010==3060,1,0)) %>% # create doctor dummy
filter(nurse==1 | doctor==1) # keep only doctors and nurses
# unionized workers per occupation-industry (# of unionized doctors/nurses in each industry)
# i use EARNWT, which is "a person-level weight that should be used in any analysis including one of the following variables: EARNWEEK, HOURWAGE, PAIDHOUR, UNION, UHRSWORKORG, WKSWORKORG, ELIGORG, and OTPAY. For any other analysis using ASEC data, researchers should use ASECWT or for analyses of non-ASEC data, WTFINL."
# union <- hc.df %>%
# filter(is.na(UNION) == FALSE) %>%
# group_by(date, nurse, UNION) %>%
# summarise(count = sum(EARNWT, na.rm = T)) %>%
# mutate(log_count = log(count)) %>%
# arrange(date,UNION)
# creating df for nurse unionization only
nurses <- hc.df %>%
filter(is.na(UNION) == FALSE & nurse == 1) %>%
group_by(date, UNION) %>%
summarise(count = sum(EARNWT, na.rm = T)) %>%
mutate(log_count = log(count)) %>%
arrange(date,UNION)
## `summarise()` has grouped output by 'date'. You can override using the
## `.groups` argument.
# creating df for doctor unionization only
doctors <- hc.df %>%
filter(is.na(UNION) == FALSE & nurse == 0) %>%
group_by(date, UNION) %>%
summarise(count = sum(EARNWT, na.rm = T)) %>%
mutate(log_count = log(count)) %>%
arrange(date,UNION)
## `summarise()` has grouped output by 'date'. You can override using the
## `.groups` argument.
# creating regressions for doctors by unionization status
# doctors covered but not members
d.notmem <- doctors %>%
filter(UNION == "Covered by union but not a member")
dreg.notmem <- lm(log_count ~ date,
data = d.notmem %>%
filter(between(date, as.Date('2013-01-01'), as.Date('2020-01-01'))))
d.notmem$exp_values <- dreg.notmem$coefficients["(Intercept)"] +
dreg.notmem$coefficients["date"]*as.numeric(d.notmem$date)
# doctors with no union coverage
d.nocov <- doctors %>%
filter(UNION == "No union coverage")
dreg.nocov <- lm(log_count ~ date,
data = d.nocov %>%
filter(between(date, as.Date('2013-01-01'), as.Date('2020-01-01'))))
d.nocov$exp_values <- dreg.nocov$coefficients["(Intercept)"] +
dreg.nocov$coefficients["date"]*as.numeric(d.nocov$date)
# doctors union members
d.member <- doctors %>%
filter(UNION == "Member of labor union")
dreg.member <- lm(log_count ~ date,
data = d.member %>%
filter(between(date, as.Date('2013-01-01'), as.Date('2020-01-01'))))
d.member$exp_values <- dreg.member$coefficients["(Intercept)"] +
dreg.member$coefficients["date"]*as.numeric(d.member$date)
# plotting
ggplot(doctors, aes(x = date, y = log_count, color = UNION)) +
geom_line(aes(y=exp_values, x=date), color="darkblue", linetype="dotted", linewidth=.8,
data=d.notmem %>% filter(date>=ymd("2013-01-01"))) +
geom_line(aes(y=exp_values, x=date), color="red", linetype="dotted", linewidth=.8,
data=d.member %>% filter(date>=ymd("2013-01-01"))) +
geom_line(aes(y=exp_values, x=date), color="darkgreen", linetype="dotted", linewidth=.8,
data=d.nocov %>% filter(date>=ymd("2013-01-01"))) +
geom_line(size = 0.5) +
labs(title = "Doctor Unionization (Jan. 2003 to Aug. 2024)",
x = "",
y = "Log Count") +
scale_color_manual(values = c("#31597d","#9b494f","#4b8b3b"),
c(name="")) +
geom_vline(xintercept = 2020-01-01, linetype = "dashed", color = "grey", size = 0.5) +
theme(legend.position="bottom") +
theme_stata()
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
# creating regressions for nurses by unionization status
# nurses covered but not members
n.notmem <- nurses %>%
filter(UNION == "Covered by union but not a member")
nreg.notmem <- lm(log_count ~ date,
data = n.notmem %>%
filter(between(date, as.Date('2013-01-01'), as.Date('2020-01-01'))))
n.notmem$exp_values <- nreg.notmem$coefficients["(Intercept)"] +
nreg.notmem$coefficients["date"]*as.numeric(n.notmem$date)
# nurses with no union coverage
n.nocov <- nurses %>%
filter(UNION == "No union coverage")
nreg.nocov <- lm(log_count ~ date,
data = n.nocov %>%
filter(between(date, as.Date('2013-01-01'), as.Date('2020-01-01'))))
n.nocov$exp_values <- nreg.nocov$coefficients["(Intercept)"] +
nreg.nocov$coefficients["date"]*as.numeric(n.nocov$date)
# nurses union members
n.member <- nurses %>%
filter(UNION == "Member of labor union")
nreg.member <- lm(log_count ~ date,
data = n.member %>%
filter(between(date, as.Date('2013-01-01'), as.Date('2020-01-01'))))
n.member$exp_values <- nreg.member$coefficients["(Intercept)"] +
nreg.member$coefficients["date"]*as.numeric(n.member$date)
# plotting
ggplot(nurses, aes(x = date, y = log_count, color = UNION)) +
geom_line(aes(y=exp_values, x=date), color="darkblue", linetype="dotted", linewidth=.8,
data=n.notmem %>% filter(date>=ymd("2013-01-01"))) +
geom_line(aes(y=exp_values, x=date), color="red", linetype="dotted", linewidth=.8,
data=n.member %>% filter(date>=ymd("2013-01-01"))) +
geom_line(aes(y=exp_values, x=date), color="darkgreen", linetype="dotted", linewidth=.8,
data=n.nocov %>% filter(date>=ymd("2013-01-01"))) +
geom_line(size = 0.5) +
labs(title = "Nurse Unionization (Jan. 2003 to Aug. 2024)",
x = "",
y = "Log Count") +
scale_color_manual(values = c("#31597d","#9b494f","#4b8b3b"),
c(name="")) +
geom_vline(xintercept = as.Date("2020-01-01"), linetype = "dashed", color = "grey", size = 0.5) +
theme(legend.position="bottom") +
theme_stata()