Set-Up, Data cleaning & wrangling

Load Libraries

library(tidyverse) #data wrangling
library(readxl) #reading xlsx file
library(lubridate) #manipulation of dates
library(vtable) #for summary statistics
library(modelsummary) #cleaner regression output tables
library(RColorBrewer) #more visually appealing charts
library(gridExtra) #to plot graphs side by side, if needed
library(grid)
library(kableExtra) #further enhancements to regression table
library(scales) #Axis manipulation (ggplot)
library(stargazer) #latex output for regression tables

options(scipen=999) #prevent scientific notation

Load Datasets

The following datasets are used in this paper.

Annual data on VBAC rates from 2011-2021 in Oregon and California, which will be stored in the variable df. This dataset is retrieved from PeriStats, an online source for perinatal statistics developed by the March of Dimes Perinatal Data Center.
Utilization Rates for Selected Medical Procedures in California Hospitals, which will be stored as cal. This dataset is retrieved from the Open Data Portal by California Health and Human Services Agency (CalHHS) which provides non-confidential health and human services data. It can be found in the link here. The 3 medical procedures captured by this dataset are: Cesarean Delivery Rate, Uncomplicated; Primary Cesarean Delivery Rate, Uncomplicated and Vaginal Birth After Cesarean Rate, Uncomplicated.
Deomgraphic data of women aged 15-50 who had given birth in the past 12 months from the US Census Bureau. This will be stored as ddf (demographic df)

df <- read.csv("VBAC.csv")
cal <- read.csv("californiaprocedures.csv", na.strings='.') #force empty values (stored as ".") to NA
ddf <- read.csv("Demographics.csv")

df$State <- as.factor(df$State) #convert State and Procedure to factor class
df$Procedure <- as.factor(df$Procedure)

Preliminary charts

Rates of selected procedures in California’s largest three counties

cal <- cal %>% filter(County == "Los Angeles" | County == "San Diego" | County == "Orange")
cal <- na.omit(cal) 
cal$Procedure[cal$Procedure == 'Laparoscopic Cholecystectomy (Gall Bladder Surgery)'] <- 'LC'
cal <- cal %>% group_by(Year, County, Procedure) %>% summarize(average_rate = mean(Rate))
cal$Procedure <- as.factor(cal$Procedure)
cal$County <- as.factor(cal$County)

cal1 <- cal %>% filter(Procedure == "Uncomplicated Cesarean Delivery" | Procedure == "Uncomplicated Primary Cesarean Delivery" | Procedure == "Uncomplicated VBAC")

cal1 %>% ggplot(aes(x=Year, y=average_rate)) + geom_line(aes(color = County), linewidth = .7) +  
  facet_wrap(cal1$Procedure, nrow = 3, scales="free_y") + scale_color_brewer(palette = "RdBu") + theme_dark() +
  ylab("Average Rate (%)") + #scales argument in facet_wrap sets different y axis for each facet
  labs(caption = "Notes: Number of deliveries per 100 deliveries. Excludes deliveries with complications (abnormal presentation,
       preterm delivery, fetal death, multiple gestation diagnoses, and breech procedure).", title = "Figure 3: Annual rates of selected procedures, 2005-2021") + 
  theme(plot.caption.position = "plot", plot.caption = element_text(hjust = 0), plot.title = element_text(hjust = 0.5),
        text=element_text(size=14,  family="serif"))

rm(cal1)

Historical VBAC rates in States near and including California

df0 <- df[-c(101:106),] #remove statewide values for now


df0 %>% filter(Procedure == "VBAC" & Year <= 2021) %>% ggplot(aes(x=Year, y=Rate)) + geom_line(aes(color = State)) + 
        geom_point(aes(color = State)) + ylab("VBAC Rate (%)") + theme(text=element_text(size=14, family="serif"), plot.title = element_text(hjust = 0.5)) + ggtitle("Figure 5: Annual VBAC rates in selected states, 2011 - 2021") + scale_x_continuous(labels=as.character(df0$Year),breaks=df0$Year)

rm(df0) #remove from environment

VBAC rate from 2011-2021, California and Oregon

df1 <- df %>% filter(State == "California" | State == "Oregon")
df1 <- df1 %>% filter(Procedure == "VBAC")

df1oregon <- df1 %>% filter(State == "Oregon")
df1oregon_pre <- df1oregon %>% filter(Year <= 2015)
df1oregon_post <- df1oregon %>% filter(Year >= 2015)

df1 %>% ggplot(aes(y = Rate, x = Year)) + geom_line(aes(color = State), size = .7) +
        geom_point(aes(color = State)) + 
          labs(title = "Figure 6: VBAC Rates in California and Oregon, 2011-2021") +
            ylab("VBAC Rate (%)") +
            geom_vline(xintercept = 2015, linetype="dotted", linewidth=.8) + 
            geom_smooth(data=df1oregon_pre,aes(x=Year,y=Rate),method=lm,se=TRUE, linewidth = .5, linetype="dashed")+
            scale_x_continuous(labels=as.character(df$Year),breaks=df$Year) + 
            theme(text=element_text(size=14, family="serif"), plot.title = element_text(hjust = 0.5))

#scale_x_continuous argument serves to display all tick marks on the x axis(years)

rm(df1oregon, df1oregon_pre, df1oregon_post)

Comparison of VBAC rate statewide and in California, 2016-2021

The years here are restricted to 2016-2021 as statewide VBAC data is only available in these years. Over these 6 years, the VBAC rate gap between California and that in the whole of United States (Average) is narrowing.

df2 <- df %>% filter(State == "California" | State == "United States")
df2 <- df2 %>% filter(Year >= 2016 & Procedure == "VBAC")

ggplot(df2, aes(x = Year, y = Rate, group = State)) + geom_line(aes(linetype = State, color = State), linewidth = .8)+
  geom_point(aes(color = State), size = 1.8)+ theme(legend.position = "top") + scale_colour_brewer(palette = "Dark2") + 
  geom_text(label = paste(df2$Rate,'%'), aes(color = State), nudge_x = 0.4, nudge_y = -.1, check_overlap = T, size = 3.5) + 
    ylab("VBAC Rate (%)") + scale_x_continuous(labels=as.character(df2$Year),breaks=df2$Year) + 
    ggtitle("Figure 4: Annual VBAC rates statewide and in California, 2016-2021") + 
  theme(text=element_text(size=14, family="serif"), plot.title = element_text(hjust = 0.5))

rm(df2)

Main Analysis

Data for DID analysis will be stored in variable df1, which has been created above.

DID charts

df1$treat <- ifelse(df1$State == "California",1,0)
df1$after <- ifelse(df1$Year >=2015,1,0)

df1_plot <- df1
df1_plot$after <- ifelse(df1_plot$after==1, "After 2015","Before 2015")
class(df1_plot$after) #need to change class of "after" from character to factor in order for facet wrap to work

## [1] "character"

df1_plot$after <- as.factor(df1_plot$after)

plot1 <- ggplot(df1_plot, aes(x = State, y = Rate)) +
  geom_point(size = 0.5, alpha = 0.2) + ylab("VBAC Rate (%)") +
  stat_summary(geom = "point", fun = "mean", size = 5, color = "red") +
  facet_wrap(~fct_rev(after)) + xlab("State") #reverse order of facets, if not "After Jan 2017" graph will be on the left

plot1

plot2 <- ggplot(df1_plot, aes(x=State, y=Rate)) + geom_boxplot(aes(color = State)) + facet_wrap(~fct_rev(after)) + xlab("State") +
  stat_summary(fun.y=mean, geom="point", shape=20, size=2, fill = "black") + ylab("VBAC Rate (%)") + 
  theme(axis.text.x = element_text(angle = 90), text=element_text(size=14, family="serif"), plot.title = element_text(hjust = 0.2)) 

  #rotate x axis labels to prevent overlaps

plot2

rm(plot1, df1_plot, plot2)

Standard DID chart

df1_plot <- df1 %>% 
  mutate(treat = factor(treat, labels = c("Oregon", "California")),
         after = factor(after, labels = c("Before 2015", "After 2015"))) %>% 
  group_by(treat, after) %>% 
  summarize(average = mean(Rate),
            se_ave = sd(average) / sqrt(n()),
            upper = average + (1.96 * se_ave),
            lower = average + (-1.96 * se_ave)) 

df1_plot <- df1_plot %>% rename(state = treat)

plot3 <- ggplot(df1_plot, aes(x = after, y = average, color = state)) +
  geom_pointrange(aes(ymin = lower, ymax = upper), size = 1) + 
  geom_line(aes(group = state), size = .8) + xlab("Before/After 2015") + ylab("Average VBAC rate (%)") + 
  scale_colour_brewer(palette = "Paired") +
   theme(text=element_text(size=14, family="serif"), plot.title = element_text(hjust = 0.5)) +
  ggtitle("Figure 7: VBAC rates in Oregon & California, Before & After 2015")

plot3

rm(df1_plot, plot3)

Equation (1), 2011-2021

model<- lm(Rate ~ treat + after + treat * after + Year, data = df1)
summary(model)

## 
## Call:
## lm(formula = Rate ~ treat + after + treat * after + Year, data = df1)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.6619 -0.4405  0.1250  0.4917  2.1048 
## 
## Coefficients:
##               Estimate Std. Error t value         Pr(>|t|)    
## (Intercept) -1123.7667   226.6281  -4.959         0.000119 ***
## treat         -10.6500     0.6469 -16.463 0.00000000000701 ***
## after          -1.2381     0.8440  -1.467         0.160667    
## Year            0.5667     0.1126   5.032         0.000102 ***
## treat:after     2.4643     0.8109   3.039         0.007414 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.9148 on 17 degrees of freedom
## Multiple R-squared:  0.9739, Adjusted R-squared:  0.9678 
## F-statistic: 158.9 on 4 and 17 DF,  p-value: 0.0000000000003182

## repeat DID for other 2 procedures

###Primary CS
df1_pcs <- df %>% filter(State == "California" | State == "Oregon")
df1_pcs <- df1_pcs %>% filter(Procedure == "Primary Cesarean Deliveries" & Year >=2011)
df1_pcs$treat <- ifelse(df1_pcs$State == "California",1,0)
df1_pcs$after <- ifelse(df1_pcs$Year >=2015,1,0)

model_pcs<- lm(Rate ~ treat + after + treat * after + Year, data = df1_pcs)

## Repeat CS
df1_rcs <- df %>% filter(State == "California" | State == "Oregon")
df1_rcs <- df1_rcs %>% filter(Procedure == "Repeat Cesarean Deliveries" & Year >=2011)
df1_rcs$treat <- ifelse(df1_rcs$State == "California",1,0)
df1_rcs$after <- ifelse(df1_rcs$Year >=2015,1,0)

model_rcs<- lm(Rate ~ treat + after + treat * after + Year, data = df1_rcs)

library(gt) #tweak regression output

coef <- c('(Intercept)'='Constant', 'treat'='State', 'after' = 'After') #rename variables
regtable <- modelsummary(stars = c('*' = .1, '**' = .05, '***' = .01), output = "gt",
                         list("VBAC" =model, "Primary Cesarean" = model_pcs,"Repeat Cesarean" = model_rcs), coef_omit = 1, gof_omit = "AIC|BIC|Log|F", 
                         coef_rename = coef_rename(coef)) %>% tab_spanner(label = 'Cesarean Deliveries', columns = 3:4)

regtable %>% tab_style(style = cell_text(color = 'red'),locations = cells_body(rows = 7)) %>%
            tab_footnote(footnote = md("Caution to be exerted when interpreting coefficient of Primary Cesarean"),
            locations = cells_body(rows = 7, columns = 3)) %>%  tab_header(title = md("Results of Equation (1), 2011-2021"))

	VBAC	Cesarean Deliveries
Results of Equation (1), 2011-2021
	VBAC	Primary Cesarean	Repeat Cesarean
State	-10.650***	1.900***	10.650***
	(0.647)	(0.439)	(0.647)
After	-1.238	-0.079	1.238
	(0.844)	(0.573)	(0.844)
Year	0.567***	0.118	-0.567***
	(0.113)	(0.076)	(0.113)
State:After	2.464***	-2.029***¹	-2.464***
	(0.811)	(0.550)	(0.811)
Num.Obs.	22	22	22
R2	0.974	0.584	0.974
R2 Adj.	0.968	0.486	0.968
RMSE	0.80	0.55	0.80
* p < 0.1, p < 0.05, * p < 0.01
¹ Caution to be exerted when interpreting coefficient of Primary Cesarean

Reproduce the regression table in a nicer format using stargazer. The latex output is copied onto latex and reproduced in the paper. Standard errors here will be different from the table above because I used robust SE in the stargazer command.

library(sandwich) #to compute robust SE in stargazer command
cov <- vcovHC(model, type = "HC")
robust.se <- sqrt(diag(cov))

cov1 <- vcovHC(model_pcs, type = "HC")
robust.se1 <- sqrt(diag(cov1))

cov2 <- vcovHC(model_rcs, type = "HC")
robust.se2 <- sqrt(diag(cov2))


stargazer(model, model_pcs, model_rcs,
          title = "Results of Equation (1): DID estimation of rates of selected procedures",
          column.labels = c("VBAC", "Primary Caesarean", "Repeated Caesarean"),
          se = list(robust.se,robust.se1,robust.se2),
          notes = c("Primary Caesarean model to be interpreted with caution", "Robust standard errors in parentheses","lm() function"),
          notes.align = "l", notes.append = T, single.row = T, header = F, omit = "Constant")

## 
## \begin{table}[!htbp] \centering 
##   \caption{Results of Equation (1): DID estimation of rates of selected procedures} 
##   \label{} 
## \begin{tabular}{@{\extracolsep{5pt}}lccc} 
## \\[-1.8ex]\hline 
## \hline \\[-1.8ex] 
##  & \multicolumn{3}{c}{\textit{Dependent variable:}} \\ 
## \cline{2-4} 
## \\[-1.8ex] & \multicolumn{3}{c}{Rate} \\ 
##  & VBAC & Primary Caesarean & Repeated Caesarean \\ 
## \\[-1.8ex] & (1) & (2) & (3)\\ 
## \hline \\[-1.8ex] 
##  treat & $-$10.650$^{***}$ (0.332) & 1.900$^{***}$ (0.358) & 10.650$^{***}$ (0.332) \\ 
##   after & $-$1.238 (0.865) & $-$0.079 (0.579) & 1.238 (0.865) \\ 
##   Year & 0.567$^{***}$ (0.123) & 0.118 (0.089) & $-$0.567$^{***}$ (0.123) \\ 
##   treat:after & 2.464$^{***}$ (0.604) & $-$2.029$^{***}$ (0.469) & $-$2.464$^{***}$ (0.604) \\ 
##  \hline \\[-1.8ex] 
## Observations & 22 & 22 & 22 \\ 
## R$^{2}$ & 0.974 & 0.584 & 0.974 \\ 
## Adjusted R$^{2}$ & 0.968 & 0.486 & 0.968 \\ 
## Residual Std. Error (df = 17) & 0.915 & 0.621 & 0.915 \\ 
## F Statistic (df = 4; 17) & 158.858$^{***}$ & 5.969$^{***}$ & 158.858$^{***}$ \\ 
## \hline 
## \hline \\[-1.8ex] 
## \textit{Note:}  & \multicolumn{3}{l}{$^{*}$p$<$0.1; $^{**}$p$<$0.05; $^{***}$p$<$0.01} \\ 
##  & \multicolumn{3}{l}{Primary Caesarean model to be interpreted with caution} \\ 
##  & \multicolumn{3}{l}{Robust standard errors in parentheses} \\ 
##  & \multicolumn{3}{l}{lm() function} \\ 
## \end{tabular} 
## \end{table}

rm(model, model_pcs, model_rcs, model_pcs, regtable, df1_pcs,df1_rcs, robust.se, robust.se1, robust.se2, cov1, cov2, cov)

Equation (1), 2011-2019 (exclude pandemic years 2020 & 2021)

df1_pre <- df1 %>% filter(Year <=2019)
model<- lm(Rate ~ treat + after + treat * after + Year, data = df1_pre)
summary(model)

## 
## Call:
## lm(formula = Rate ~ treat + after + treat * after + Year, data = df1_pre)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.0000 -0.3942  0.0000  0.3187  1.6967 
## 
## Coefficients:
##               Estimate Std. Error t value        Pr(>|t|)    
## (Intercept) -1385.3917   268.0302  -5.169        0.000181 ***
## treat         -10.6500     0.5158 -20.647 0.0000000000254 ***
## after          -1.2850     0.7737  -1.661        0.120666    
## Year            0.6967     0.1332   5.231        0.000162 ***
## treat:after     1.6300     0.6920   2.355        0.034875 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.7295 on 13 degrees of freedom
## Multiple R-squared:  0.9857, Adjusted R-squared:  0.9813 
## F-statistic: 223.8 on 4 and 13 DF,  p-value: 0.000000000007621

## repeat DID for other 2 procedures

###Primary CS
df1_pcs <- df %>% filter(State == "California" | State == "Oregon")
df1_pcs <- df1_pcs %>% filter(Procedure == "Primary Cesarean Deliveries" & Year <=2019 & Year >= 2011)
df1_pcs$treat <- ifelse(df1_pcs$State == "California",1,0)
df1_pcs$after <- ifelse(df1_pcs$Year >=2015,1,0)

model_pcs<- lm(Rate ~ treat + after + treat * after + Year, data = df1_pcs)

## Repeat CS
df1_rcs <- df %>% filter(State == "California" | State == "Oregon" )
df1_rcs <- df1_rcs %>% filter(Procedure == "Repeat Cesarean Deliveries"& Year <=2019 & Year >= 2011)
df1_rcs$treat <- ifelse(df1_rcs$State == "California",1,0)
df1_rcs$after <- ifelse(df1_rcs$Year >=2015,1,0)

model_rcs<- lm(Rate ~ treat + after + treat * after + Year, data = df1_rcs)

library(gt) #tweak regression output

coef <- c('(Intercept)'='Constant', 'treat'='State', 'after' = 'After') #rename variables
regtable <- modelsummary(stars = c('*' = .1, '**' = .05, '***' = .01), output = "gt",
                         list("VBAC" =model, "Primary Cesarean" = model_pcs,"Repeat Cesarean" = model_rcs), coef_omit = 1, gof_omit = "AIC|BIC|Log|F", 
                         coef_rename = coef_rename(coef)) %>% tab_spanner(label = 'Cesarean Deliveries', columns = 3:4)

regtable %>% tab_style(style = cell_text(color = 'red'),locations = cells_body(rows = 7)) %>%
            tab_footnote(footnote = md("Caution to be exerted when interpreting coefficient of Primary Cesarean"),
            locations = cells_body(rows = 7, columns = 3)) %>%  tab_header(title = md("Results of Equation (1), excluding pandemic years 2020 & 2021"))

	VBAC	Cesarean Deliveries
Results of Equation (1), excluding pandemic years 2020 & 2021
	VBAC	Primary Cesarean	Repeat Cesarean
State	-10.650***	1.900***	10.650***
	(0.516)	(0.318)	(0.516)
After	-1.285	0.410	1.285
	(0.774)	(0.477)	(0.774)
Year	0.697***	-0.073	-0.697***
	(0.133)	(0.082)	(0.133)
State:After	1.630**	-1.580***¹	-1.630**
	(0.692)	(0.426)	(0.692)
Num.Obs.	18	18	18
R2	0.986	0.790	0.986
R2 Adj.	0.981	0.725	0.981
RMSE	0.62	0.38	0.62
* p < 0.1, p < 0.05, * p < 0.01
¹ Caution to be exerted when interpreting coefficient of Primary Cesarean

cov <- vcovHC(model, type = "HC")
robust.se <- sqrt(diag(cov))

cov1 <- vcovHC(model_pcs, type = "HC")
robust.se1 <- sqrt(diag(cov1))

cov2 <- vcovHC(model_rcs, type = "HC")
robust.se2 <- sqrt(diag(cov2))


stargazer(model, model_pcs, model_rcs,
          title = "Results of Equation (1): DID estimation of rates of selected procedures, excluding 2021 & 2020",
          column.labels = c("VBAC", "Primary Caesarean", "Repeated Caesarean"),
          se = list(robust.se,robust.se1,robust.se2),
          notes = c("Primary Caesarean model to be interpreted with caution", "Robust standard errors in parentheses","lm() function"),
          notes.align = "l", notes.append = T, single.row = T, header = F, omit = "Constant")

## 
## \begin{table}[!htbp] \centering 
##   \caption{Results of Equation (1): DID estimation of rates of selected procedures, excluding 2021 & 2020} 
##   \label{} 
## \begin{tabular}{@{\extracolsep{5pt}}lccc} 
## \\[-1.8ex]\hline 
## \hline \\[-1.8ex] 
##  & \multicolumn{3}{c}{\textit{Dependent variable:}} \\ 
## \cline{2-4} 
## \\[-1.8ex] & \multicolumn{3}{c}{Rate} \\ 
##  & VBAC & Primary Caesarean & Repeated Caesarean \\ 
## \\[-1.8ex] & (1) & (2) & (3)\\ 
## \hline \\[-1.8ex] 
##  treat & $-$10.650$^{***}$ (0.265) & 1.900$^{***}$ (0.216) & 10.650$^{***}$ (0.265) \\ 
##   after & $-$1.285$^{*}$ (0.746) & 0.410 (0.549) & 1.285$^{*}$ (0.746) \\ 
##   Year & 0.697$^{***}$ (0.111) & $-$0.073 (0.087) & $-$0.697$^{***}$ (0.111) \\ 
##   treat:after & 1.630$^{***}$ (0.550) & $-$1.580$^{***}$ (0.349) & $-$1.630$^{***}$ (0.550) \\ 
##  \hline \\[-1.8ex] 
## Observations & 18 & 18 & 18 \\ 
## R$^{2}$ & 0.986 & 0.790 & 0.986 \\ 
## Adjusted R$^{2}$ & 0.981 & 0.725 & 0.981 \\ 
## Residual Std. Error (df = 13) & 0.729 & 0.450 & 0.729 \\ 
## F Statistic (df = 4; 13) & 223.805$^{***}$ & 12.222$^{***}$ & 223.805$^{***}$ \\ 
## \hline 
## \hline \\[-1.8ex] 
## \textit{Note:}  & \multicolumn{3}{l}{$^{*}$p$<$0.1; $^{**}$p$<$0.05; $^{***}$p$<$0.01} \\ 
##  & \multicolumn{3}{l}{Primary Caesarean model to be interpreted with caution} \\ 
##  & \multicolumn{3}{l}{Robust standard errors in parentheses} \\ 
##  & \multicolumn{3}{l}{lm() function} \\ 
## \end{tabular} 
## \end{table}

rm(df1_pcs, df1_rcs, model, model_pcs, model_rcs, regtable,df1_pre, robust.se, robust.se1, robust.se2, cov1, cov2, cov)

Equation (2), 2011-2021

ddf <- ddf %>% gather(Category, Value, 3:7, factor_key = TRUE)
ddf$after <- ifelse(ddf$Year >= 2015,1,0)
ddf$treat <- ifelse(ddf$State == "Oregon",0,1)

##race

ddf1 <- ddf %>% filter(Category == "BLKBIRTH")
model_race <- lm(Value ~ treat + after + treat * after + Year, data = ddf1)

##education - high school & bachelor's among women aged 15-50 years old who had a birth in the past year 

ddf2 <- ddf %>% filter(Category == "HIGHSCHL")
model_hs <- lm(Value ~ treat + after + treat * after + Year, data = ddf2)

ddf3 <- ddf %>% filter(Category == "BACHELOR")
model_bach <- lm(Value ~ treat + after + treat * after + Year, data = ddf3)

#age

ddf4 <- ddf %>% filter(Category == "ABV35")
model_age <- lm(Value ~ treat + after + treat * after + Year, data = ddf4)

#Graduate or professional degree

ddf5 <- ddf %>% filter(Category == "GRAD")
model_grad <- lm(Value ~ treat + after + treat * after + Year, data = ddf5)

rm(ddf1,ddf2,ddf3, ddf4, ddf5)

table1 <- modelsummary(stars = c('*' = .1, '**' = .05, '***' = .01),
            list("%Blacks" = model_race,"%Above 35" = model_age, "%High School" = model_hs, "%Bachelor's" = model_bach,  
                 "%Graduate/Professional Degree" = model_grad), 
            coef_omit = 1, gof_omit = "AIC|BIC|Log|F", title = "Results of Equation (2), 2011-2021")

header = c(" " = 3, "Education" = 3)
table1 %>%  row_spec(7, color = 'red') %>% add_header_above(header = header)

Results of Equation (2), 2011-2021
			Education
	%Blacks	%Above 35	%High School	%Bachelor’s	%Graduate/Professional Degree
treat	0.037***	0.035***	0.031***	0.071***	0.001
	(0.005)	(0.012)	(0.010)	(0.016)	(0.008)
after	-0.006	0.001	0.011	-0.003	0.008
	(0.006)	(0.015)	(0.013)	(0.021)	(0.010)
Year	0.001	0.010***	0.002	0.011***	0.004***
	(0.001)	(0.002)	(0.002)	(0.003)	(0.001)
treat × after	-0.002	-0.004	-0.018	-0.039*	-0.009
	(0.006)	(0.014)	(0.012)	(0.020)	(0.010)
Num.Obs.	22	22	22	22	22
R2	0.901	0.862	0.547	0.770	0.691
R2 Adj.	0.878	0.829	0.441	0.715	0.618
RMSE	0.01	0.01	0.01	0.02	0.01
* p < 0.1, p < 0.05, * p < 0.01

Reproduce regression table with stargazer

model.lst = list(model_race,model_age,model_hs,model_bach,model_grad)

stargazer(model_race, model_age, model_hs, model_bach, model_grad,
          title = "Results of Equation (2): DID estimation of maternal demographics",
          column.labels = c("Blacks", "Above35", "High School", "Bachelor's", "Graduate/Professional Degree"),
          se=lapply(model.lst, function(x) sqrt(diag(vcovHC(x, type = "HC1")))),
          notes = c("Data is restricted to women with births in the past 12 months", "Dependent variable is measured in percentage of women",
                    "Robust standard errors in parentheses","lm() function"),
          notes.align = "l", notes.append = T, single.row = F, header = F, omit = "Constant")

## 
## \begin{table}[!htbp] \centering 
##   \caption{Results of Equation (2): DID estimation of maternal demographics} 
##   \label{} 
## \begin{tabular}{@{\extracolsep{5pt}}lccccc} 
## \\[-1.8ex]\hline 
## \hline \\[-1.8ex] 
##  & \multicolumn{5}{c}{\textit{Dependent variable:}} \\ 
## \cline{2-6} 
## \\[-1.8ex] & \multicolumn{5}{c}{Value} \\ 
##  & Blacks & Above35 & High School & Bachelor's & Graduate/Professional Degree \\ 
## \\[-1.8ex] & (1) & (2) & (3) & (4) & (5)\\ 
## \hline \\[-1.8ex] 
##  treat & 0.037$^{***}$ & 0.035$^{***}$ & 0.031$^{***}$ & 0.071$^{***}$ & 0.001 \\ 
##   & (0.005) & (0.013) & (0.006) & (0.011) & (0.011) \\ 
##   & & & & & \\ 
##  after & $-$0.006 & 0.001 & 0.011 & $-$0.003 & 0.008 \\ 
##   & (0.006) & (0.019) & (0.011) & (0.019) & (0.012) \\ 
##   & & & & & \\ 
##  Year & 0.001 & 0.010$^{***}$ & 0.002 & 0.011$^{***}$ & 0.004$^{***}$ \\ 
##   & (0.001) & (0.002) & (0.002) & (0.003) & (0.001) \\ 
##   & & & & & \\ 
##  treat:after & $-$0.002 & $-$0.004 & $-$0.018$^{*}$ & $-$0.039$^{**}$ & $-$0.009 \\ 
##   & (0.006) & (0.015) & (0.011) & (0.018) & (0.012) \\ 
##   & & & & & \\ 
## \hline \\[-1.8ex] 
## Observations & 22 & 22 & 22 & 22 & 22 \\ 
## R$^{2}$ & 0.901 & 0.862 & 0.547 & 0.770 & 0.691 \\ 
## Adjusted R$^{2}$ & 0.878 & 0.829 & 0.441 & 0.715 & 0.618 \\ 
## Residual Std. Error (df = 17) & 0.007 & 0.016 & 0.014 & 0.023 & 0.011 \\ 
## F Statistic (df = 4; 17) & 38.881$^{***}$ & 26.470$^{***}$ & 5.141$^{***}$ & 14.195$^{***}$ & 9.493$^{***}$ \\ 
## \hline 
## \hline \\[-1.8ex] 
## \textit{Note:}  & \multicolumn{5}{l}{$^{*}$p$<$0.1; $^{**}$p$<$0.05; $^{***}$p$<$0.01} \\ 
##  & \multicolumn{5}{l}{Data is restricted to women with births in the past 12 months} \\ 
##  & \multicolumn{5}{l}{Dependent variable is measured in percentage of women} \\ 
##  & \multicolumn{5}{l}{Robust standard errors in parentheses} \\ 
##  & \multicolumn{5}{l}{lm() function} \\ 
## \end{tabular} 
## \end{table}

rm(ddf, model_hs, model_race, model_bach, table1, model_grad, model_age)

Equation (2), 2011-2019 (exclude pandemic years 2020 & 2021)

## now exclude 2020 and 2021 
ddf <- read.csv("demographics.csv")
ddf <- ddf %>% gather(Category, Value, 3:7, factor_key = TRUE)
ddf$after <- ifelse(ddf$Year >= 2015,1,0)
ddf$treat <- ifelse(ddf$State == "Oregon",0,1)
ddf <- ddf %>% filter(Year <= 2019)

##race

ddf1 <- ddf %>% filter(Category == "BLKBIRTH")
model_race <- lm(Value ~ treat + after + treat * after + Year, data = ddf1)
summary(model_race)

## 
## Call:
## lm(formula = Value ~ treat + after + treat * after + Year, data = ddf1)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.008319 -0.005072 -0.001711  0.003199  0.013945 
## 
## Coefficients:
##              Estimate Std. Error t value   Pr(>|t|)    
## (Intercept) -3.559467   2.701693  -1.317      0.210    
## treat        0.036963   0.005199   7.109 0.00000794 ***
## after       -0.010659   0.007799  -1.367      0.195    
## Year         0.001780   0.001342   1.326      0.208    
## treat:after -0.001035   0.006976  -0.148      0.884    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.007353 on 13 degrees of freedom
## Multiple R-squared:  0.8967, Adjusted R-squared:  0.8649 
## F-statistic:  28.2 on 4 and 13 DF,  p-value: 0.000002671

##education - high school & bachelor's among women aged 15-50 years old who had a birth in the past year 

ddf2 <- ddf %>% filter(Category == "HIGHSCHL")
model_hs <- lm(Value ~ treat + after + treat * after + Year, data = ddf2)
summary(model_hs)

## 
## Call:
## lm(formula = Value ~ treat + after + treat * after + Year, data = ddf2)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.012743 -0.007686 -0.003846  0.005163  0.037850 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)   
## (Intercept) -3.561368   5.040910  -0.706  0.49235   
## treat        0.031349   0.009701   3.231  0.00656 **
## after        0.008768   0.014552   0.603  0.55718   
## Year         0.001811   0.002505   0.723  0.48249   
## treat:after -0.011483   0.013015  -0.882  0.39367   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.01372 on 13 degrees of freedom
## Multiple R-squared:  0.5957, Adjusted R-squared:  0.4713 
## F-statistic: 4.789 on 4 and 13 DF,  p-value: 0.01352

ddf3 <- ddf %>% filter(Category == "BACHELOR")
model_bach <- lm(Value ~ treat + after + treat * after + Year, data = ddf3)
summary(model_bach)

## 
## Call:
## lm(formula = Value ~ treat + after + treat * after + Year, data = ddf3)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.025308 -0.006787 -0.003699  0.001840  0.060381 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -19.512875   7.988283  -2.443 0.029617 *  
## treat         0.071109   0.015373   4.626 0.000475 ***
## after        -0.006264   0.023060  -0.272 0.790173    
## Year          0.009742   0.003969   2.454 0.028970 *  
## treat:after  -0.021127   0.020625  -1.024 0.324375    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.02174 on 13 degrees of freedom
## Multiple R-squared:  0.7851, Adjusted R-squared:  0.719 
## F-statistic: 11.87 on 4 and 13 DF,  p-value: 0.0002786

#age

ddf4 <- ddf %>% filter(Category == "ABV35")
model_age <- lm(Value ~ treat + after + treat * after + Year, data = ddf4)
summary(model_age)

## 
## Call:
## lm(formula = Value ~ treat + after + treat * after + Year, data = ddf4)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.027191 -0.011935  0.003609  0.008889  0.025784 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)  
## (Intercept) -18.282500   6.505951  -2.810   0.0147 *
## treat         0.035426   0.012520   2.829   0.0142 *
## after         0.005518   0.018781   0.294   0.7735  
## Year          0.009190   0.003233   2.843   0.0139 *
## treat:after  -0.004124   0.016798  -0.245   0.8099  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.01771 on 13 degrees of freedom
## Multiple R-squared:  0.8011, Adjusted R-squared:  0.7399 
## F-statistic: 13.09 on 4 and 13 DF,  p-value: 0.0001714

# grad
ddf5 <- ddf %>% filter(Category == "GRAD")
model_grad <- lm(Value ~ treat + after + treat * after + Year, data = ddf5)
summary(model_grad)

## 
## Call:
## lm(formula = Value ~ treat + after + treat * after + Year, data = ddf5)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.014262 -0.004176 -0.000647  0.002671  0.033292 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)
## (Intercept) -6.477775   4.285916  -1.511    0.155
## treat        0.001407   0.008248   0.171    0.867
## after        0.010452   0.012372   0.845    0.413
## Year         0.003272   0.002130   1.536    0.148
## treat:after -0.007403   0.011066  -0.669    0.515
## 
## Residual standard error: 0.01166 on 13 degrees of freedom
## Multiple R-squared:  0.5821, Adjusted R-squared:  0.4536 
## F-statistic: 4.528 on 4 and 13 DF,  p-value: 0.01646

rm(ddf1,ddf2,ddf3, ddf4,ddf5)

table1 <- modelsummary(stars = c('*' = .1, '**' = .05, '***' = .01),
            list("%Blacks" = model_race,"%Above 35" = model_age, "%High School" = model_hs, "%Bachelor's" = model_bach, 
                 "Graduate/Professional Degree" = model_age), 
            coef_omit = 1, gof_omit = "AIC|BIC|Log|F", title = "Results of Equation (2), excluding pandemic years (2020 and 2021)")

header = c(" " = 3, "Education" = 3)
table1 %>%  row_spec(7, color = 'red') %>% add_header_above(header = header)

Results of Equation (2), excluding pandemic years (2020 and 2021)
			Education
	%Blacks	%Above 35	%High School	%Bachelor’s	Graduate/Professional Degree
treat	0.037***	0.035**	0.031***	0.071***	0.035**
	(0.005)	(0.013)	(0.010)	(0.015)	(0.013)
after	-0.011	0.006	0.009	-0.006	0.006
	(0.008)	(0.019)	(0.015)	(0.023)	(0.019)
Year	0.002	0.009**	0.002	0.010**	0.009**
	(0.001)	(0.003)	(0.003)	(0.004)	(0.003)
treat × after	-0.001	-0.004	-0.011	-0.021	-0.004
	(0.007)	(0.017)	(0.013)	(0.021)	(0.017)
Num.Obs.	18	18	18	18	18
R2	0.897	0.801	0.596	0.785	0.801
R2 Adj.	0.865	0.740	0.471	0.719	0.740
RMSE	0.01	0.02	0.01	0.02	0.02
* p < 0.1, p < 0.05, * p < 0.01

And again, the regression output using stargazer

stargazer(model_race, model_age, model_hs, model_bach, model_grad,
          title = "Results of Equation (2) excluding pandemic years 2020-2021",
          column.labels = c("Blacks", "Above35", "High School", "Bachelor's", "Graduate/Professional Degree"),
          se=lapply(model.lst, function(x) sqrt(diag(vcovHC(x, type = "HC1")))),
          notes = c("Data is restricted to women with births in the past 12 months", "Dependent variable is measured in percentage of women",
                    "Robust standard errors in parentheses","lm() function"),
          notes.align = "l", notes.append = T, single.row = F, header = F, omit = "Constant")

## 
## \begin{table}[!htbp] \centering 
##   \caption{Results of Equation (2) excluding pandemic years 2020-2021} 
##   \label{} 
## \begin{tabular}{@{\extracolsep{5pt}}lccccc} 
## \\[-1.8ex]\hline 
## \hline \\[-1.8ex] 
##  & \multicolumn{5}{c}{\textit{Dependent variable:}} \\ 
## \cline{2-6} 
## \\[-1.8ex] & \multicolumn{5}{c}{Value} \\ 
##  & Blacks & Above35 & High School & Bachelor's & Graduate/Professional Degree \\ 
## \\[-1.8ex] & (1) & (2) & (3) & (4) & (5)\\ 
## \hline \\[-1.8ex] 
##  treat & 0.037$^{***}$ & 0.035$^{***}$ & 0.031$^{***}$ & 0.071$^{***}$ & 0.001 \\ 
##   & (0.005) & (0.013) & (0.006) & (0.011) & (0.011) \\ 
##   & & & & & \\ 
##  after & $-$0.011$^{*}$ & 0.006 & 0.009 & $-$0.006 & 0.010 \\ 
##   & (0.006) & (0.019) & (0.011) & (0.019) & (0.012) \\ 
##   & & & & & \\ 
##  Year & 0.002$^{***}$ & 0.009$^{***}$ & 0.002 & 0.010$^{***}$ & 0.003$^{***}$ \\ 
##   & (0.001) & (0.002) & (0.002) & (0.003) & (0.001) \\ 
##   & & & & & \\ 
##  treat:after & $-$0.001 & $-$0.004 & $-$0.011 & $-$0.021 & $-$0.007 \\ 
##   & (0.006) & (0.015) & (0.011) & (0.018) & (0.012) \\ 
##   & & & & & \\ 
## \hline \\[-1.8ex] 
## Observations & 18 & 18 & 18 & 18 & 18 \\ 
## R$^{2}$ & 0.897 & 0.801 & 0.596 & 0.785 & 0.582 \\ 
## Adjusted R$^{2}$ & 0.865 & 0.740 & 0.471 & 0.719 & 0.454 \\ 
## Residual Std. Error (df = 13) & 0.007 & 0.018 & 0.014 & 0.022 & 0.012 \\ 
## F Statistic (df = 4; 13) & 28.203$^{***}$ & 13.091$^{***}$ & 4.789$^{**}$ & 11.874$^{***}$ & 4.528$^{**}$ \\ 
## \hline 
## \hline \\[-1.8ex] 
## \textit{Note:}  & \multicolumn{5}{l}{$^{*}$p$<$0.1; $^{**}$p$<$0.05; $^{***}$p$<$0.01} \\ 
##  & \multicolumn{5}{l}{Data is restricted to women with births in the past 12 months} \\ 
##  & \multicolumn{5}{l}{Dependent variable is measured in percentage of women} \\ 
##  & \multicolumn{5}{l}{Robust standard errors in parentheses} \\ 
##  & \multicolumn{5}{l}{lm() function} \\ 
## \end{tabular} 
## \end{table}

rm(ddf, model_hs, model_race, model_bach, table1,model_grad, model_age)

Event Study

This is done in Stata (tables.do); Stata code is reproduced here for ease of reference.

*+++++++++++++++++++++++++++++++++++++++ DATA MANIPULATION ++++++++++++++++++++++++++++++++++

import delimited “VBAC.csv”

drop if year < 2011

*create dummy variables

gen after=1 if year >= 2015 replace after=0 if after==.

gen treat=1 if state == “California” replace treat=0 if treat==.

gen did = after * treat

tab year after // check

*filter data

keep if state == “California” | state == “Oregon” keep if procedure == “VBAC”

*+++++++++++++++++++++++++++++++++++++++ SUMMARY STATS ++++++++++++++++++++++++++++++++++

tabstat rate, by(after) stat(n mean min max sd)
tabstat rate, by(state) stat(n mean min max sd)

twoway line rate year if treat == 0, sort || ///
line rate year if treat ==1, sort lpattern(dash) ///
legend(label(1 “Control”) label(2 “Treatment”)) ///
xline(2015) title(“VBAC rates in California and Oregon, 2011-2021”) ///
ytitle(“Annual VBAC rate, %”)

** DID table ** lab def after1 0 “Before 2015” 1 “After 2015”
label values after after1 table state after, nototals statistic(mean rate)

*+++++++++++++++++++++++++++++++++++++++ DID REG ++++++++++++++++++++++++++++++++++

label drop after1 eststo: reg rate i.treat i.after did year
esttab est1,star(* 0.10 ** 0.05 *** 0.01) title(“Results of Equation (1)”) drop(0.treat 0.after) ///
rename(1.treat treat 1.after after did after*treat _cons intercept)

*+++++++++++++++++++++++++++++++++++++++ EVENT STUDY ++++++++++++++++++++++++++++++++++

gen event = year - 2015

** using OLS **
eventdd rate did after treat, ols timevar(event) vce(robust) graph_op(ytitle(“coefficient”) xtitle(“Years from 2015”) title(“Event Study, VBAC”))

** using hdfe **
eventdd rate did, hdfe absorb(state) timevar(event) vce(robust) graph_op(ytitle(“coefficient”) xtitle(“Years from 2015”) title(“Event Study, VBAC”))

ECON740 Code Documentation - VBAC utilisation in California

2023-07-22