# Load required packages
library(did)
library(bacondecomp)
library(fixest)
library(ggplot2)
library(dplyr)
library(tidyr)
library(readr)
library(stringr)
library(purrr)
library(jsonlite)
library(knitr)
library(kableExtra)
library(ggpubr)
library(patchwork)
# Set theme for consistent plotting
theme_set(theme_minimal(base_size = 12))This analysis compares Traditional Two-Way Fixed Effects (TWFE) with the Sant’Anna & Callaway Difference-in-Differences (DiD) estimator for estimating the effect of Slovak research grants (APVV) on publication outcomes using real Web of Science data.
extract_apvv_year_simple <- function(grant_string) {
if (is.na(grant_string) || grant_string == "" || grant_string == "nan") {
return(NA)
}
grant_str <- as.character(grant_string)
# Look specifically for APVV-XXXX-XX pattern (like APVV-0088-12)
pattern1 <- "APVV[-_]\\d{4}[-_](\\d{2})"
match1 <- str_match(grant_str, regex(pattern1, ignore_case = TRUE))
if (!is.na(match1[1,1]) && !is.na(match1[1,2])) {
year_digits <- as.numeric(match1[1,2])
year <- 2000 + year_digits
if (year >= 2000 && year <= 2025) {
return(year)
}
}
# Look for APVV-XX-XXXX pattern (like APVV-15-0496)
pattern2 <- "APVV[-_](\\d{2})[-_]\\d{4}"
match2 <- str_match(grant_str, regex(pattern2, ignore_case = TRUE))
if (!is.na(match2[1,1]) && !is.na(match2[1,2])) {
year_digits <- as.numeric(match2[1,2])
year <- 2000 + year_digits
if (year >= 2000 && year <= 2025) {
return(year)
}
}
return(NA)
}# Test the specific case
test_apvv <- c("APVV-0088-12", "APVV-15-0496", "APVV-0842-11", "APVV-16-0079")
results <- data.frame(
Grant_String = test_apvv,
Extracted_Year = sapply(test_apvv, extract_apvv_year_simple)
)
kable(results, caption = "Grant Year Extraction Validation") %>%
kable_styling(bootstrap_options = "striped", full_width = FALSE)| Grant_String | Extracted_Year | |
|---|---|---|
| APVV-0088-12 | APVV-0088-12 | 2012 |
| APVV-15-0496 | APVV-15-0496 | 2015 |
| APVV-0842-11 | APVV-0842-11 | 2011 |
| APVV-16-0079 | APVV-16-0079 | 2016 |
# Load the real data
data_file <- "/Users/tomasoles/Library/CloudStorage/Dropbox/Research_Publications/slovak_university/wos_results/wos_results_full_sample.json"
load_real_data <- function(file_path) {
cat("Loading data from:", file_path, "\n")
# Load the JSON data
df <- fromJSON(file_path)
# Convert to dataframe if it's a list
if (is.list(df) && !is.data.frame(df)) {
df <- as.data.frame(df)
}
cat("Original data dimensions:", nrow(df), "rows,", ncol(df), "columns\n")
# Keep only relevant document types
keep_types <- c("Article", "Proceedings Paper", "Meeting Abstract")
df <- df %>% filter(document_type %in% keep_types)
cat("After filtering document types:", nrow(df), "rows\n")
return(df)
}
# Load the data
tryCatch({
raw_data <- load_real_data(data_file)
cat("✓ Successfully loaded real data\n")
}, error = function(e) {
cat("Could not load real data:", e$message, "\n")
stop("Data loading failed - please check the file path")
})## Loading data from: /Users/tomasoles/Library/CloudStorage/Dropbox/Research_Publications/slovak_university/wos_results/wos_results_full_sample.json
## Original data dimensions: 359614 rows, 41 columns
## After filtering document types: 314331 rows
## ✓ Successfully loaded real data
# Extract grant years for each author WITH FILTERING (2000-2022)
grant_years <- raw_data %>%
group_by(author_rid) %>%
summarise(
first_grant_year = {
all_grants <- unique(grant_ids[!is.na(grant_ids) & grant_ids != "" & grant_ids != "nan"])
if (length(all_grants) == 0) {
NA
} else {
years <- map_dbl(all_grants, extract_apvv_year_simple)
years <- years[!is.na(years)]
# FILTER: Keep only years between 2000 and 2022
years <- years[years >= 2000 & years <= 2022]
if (length(years) > 0) min(years) else NA
}
},
.groups = 'drop'
)
cat("Grant extraction completed:\n")## Grant extraction completed:
## Authors with grants: 8727
## Authors without grants: 16322
# Show grant year distribution AFTER FILTERING
grant_dist <- grant_years %>%
filter(!is.na(first_grant_year)) %>%
count(first_grant_year) %>%
arrange(first_grant_year)
kable(grant_dist, caption = "Grant Year Distribution (2000-2022 only)") %>%
kable_styling(bootstrap_options = "striped")| first_grant_year | n |
|---|---|
| 2000 | 5 |
| 2001 | 8 |
| 2002 | 1 |
| 2004 | 4 |
| 2006 | 239 |
| 2007 | 613 |
| 2009 | 1 |
| 2010 | 851 |
| 2011 | 903 |
| 2012 | 750 |
| 2014 | 742 |
| 2015 | 1435 |
| 2016 | 657 |
| 2017 | 653 |
| 2018 | 494 |
| 2019 | 390 |
| 2020 | 508 |
| 2021 | 268 |
| 2022 | 205 |
# Merge grant years and create treatment variables
processed_data <- raw_data %>%
left_join(grant_years, by = "author_rid") %>%
mutate(
first.treat = ifelse(is.na(first_grant_year), 0, first_grant_year),
year = as.integer(year),
treated = first.treat > 0
)
cat("Treatment variables created:\n")## Treatment variables created:
## Treated authors: 213401
## Control authors: 100930
# Treatment group summary
treatment_summary <- final_data %>%
distinct(id, first.treat) %>%
count(first.treat) %>%
arrange(first.treat)
kable(treatment_summary, caption = "Treatment Group Distribution") %>%
kable_styling(bootstrap_options = "striped")| first.treat | n |
|---|---|
| 0 | 16322 |
| 2000 | 5 |
| 2001 | 8 |
| 2002 | 1 |
| 2004 | 4 |
| 2006 | 239 |
| 2007 | 613 |
| 2009 | 1 |
| 2010 | 851 |
| 2011 | 903 |
| 2012 | 750 |
| 2014 | 742 |
| 2015 | 1435 |
| 2016 | 657 |
| 2017 | 653 |
| 2018 | 494 |
| 2019 | 390 |
| 2020 | 508 |
| 2021 | 268 |
| 2022 | 205 |
# Publication patterns
pub_stats <- final_data %>%
group_by(treatment_status = ifelse(first.treat == 0, "Control", "Treated")) %>%
summarise(
Authors = n_distinct(id),
Observations = n(),
Mean_Publications = mean(publications),
Median_Publications = median(publications),
Zero_Publications = sum(publications == 0),
.groups = 'drop'
)
kable(pub_stats, caption = "Publication Patterns by Treatment Status") %>%
kable_styling(bootstrap_options = "striped")| treatment_status | Authors | Observations | Mean_Publications | Median_Publications | Zero_Publications |
|---|---|---|---|---|---|
| Control | 16322 | 47777 | 2.112523 | 1 | 0 |
| Treated | 8727 | 65133 | 3.276388 | 2 | 0 |
# Publication trends over time by treatment status
trend_data <- final_data %>%
mutate(cohort_label = ifelse(first.treat == 0, "Never Treated",
paste("Treated", first.treat))) %>%
group_by(year, cohort_label) %>%
summarise(
mean_pubs = mean(publications),
se_pubs = sd(publications)/sqrt(n()),
n_authors = n_distinct(id),
.groups = 'drop'
) %>%
filter(n_authors >= 5) # Filter out small groups for stability
# Plot publication trends
p_trends <- ggplot(trend_data, aes(x = year, y = mean_pubs,
color = cohort_label, group = cohort_label)) +
geom_line(size = 1.2, alpha = 0.8) +
geom_point(size = 1.5) +
geom_ribbon(aes(ymin = mean_pubs - 1.96*se_pubs,
ymax = mean_pubs + 1.96*se_pubs,
fill = cohort_label),
alpha = 0.1, linetype = 0) +
labs(title = "Publication Trends by Treatment Cohort",
subtitle = "Real Web of Science Data - Mean publications with 95% confidence intervals",
x = "Year", y = "Mean Publications",
color = "Treatment Cohort", fill = "Treatment Cohort") +
theme_minimal() +
theme(legend.position = "bottom")
p_trends# Distribution of publications
p_pub_dist <- final_data %>%
ggplot(aes(x = publications, fill = first.treat > 0)) +
geom_histogram(binwidth = 1, alpha = 0.7, position = "identity") +
labs(title = "Distribution of Publications per Author-Year",
x = "Number of Publications", y = "Count",
fill = "Treated") +
theme_minimal() +
scale_x_continuous(limits = c(0, 20)) # Focus on reasonable range
p_pub_dist# Prepare data for TWFE - ensure proper formatting
twfe_data <- final_data %>%
mutate(
post = as.integer(first.treat > 0 & year >= first.treat),
author_factor = as.factor(id),
year_factor = as.factor(year)
)
# Standard TWFE specification
twfe_model <- feols(publications ~ post | author_factor + year_factor,
data = twfe_data)
# Create numeric author ID for Bacon decomposition
twfe_data <- twfe_data %>%
mutate(author_id = as.numeric(factor(id)))
# Print TWFE results
cat("=== TRADITIONAL TWFE RESULTS ===\n")## === TRADITIONAL TWFE RESULTS ===
## OLS estimation, Dep. Var.: publications
## Observations: 104,747
## Fixed-effects: author_factor: 16,887, year_factor: 41
## Standard-errors: IID
## Estimate Std. Error t value Pr(>|t|)
## post 0.429358 0.042919 10.0039 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 2.73233 Adj. R2: 0.648963
## Within R2: 0.001138
# Extract coefficients for plotting
twfe_coefs <- data.frame(
method = "TWFE",
estimate = coef(twfe_model)["post"],
se = se(twfe_model)["post"],
term = "Overall Effect"
)
# Simple event study for TWFE (if we have enough variation)
tryCatch({
# Create relative time indicators for a simple event study
event_study_data <- twfe_data %>%
filter(first.treat > 0) %>%
mutate(
rel_time = year - first.treat,
rel_time_factor = factor(rel_time)
)
# Simple event study with limited leads/lags
twfe_event <- feols(publications ~ i(rel_time_factor, ref = "-1") |
author_factor + year_factor,
data = event_study_data %>%
filter(rel_time >= -3, rel_time <= 3))
cat("\n=== SIMPLE TWFE EVENT STUDY ===\n")
print(summary(twfe_event))
# Extract event study coefficients
twfe_event_coefs <- broom::tidy(twfe_event, conf.int = TRUE) %>%
filter(str_detect(term, "rel_time_factor")) %>%
mutate(
time = as.numeric(str_extract(term, "[0-9+-]+")),
method = "TWFE"
)
}, error = function(e) {
cat("TWFE event study failed:", e$message, "\n")
twfe_event_coefs <- NULL
})##
## === SIMPLE TWFE EVENT STUDY ===
## OLS estimation, Dep. Var.: publications
## Observations: 18,421
## Fixed-effects: author_factor: 4,729, year_factor: 23
## Standard-errors: IID
## Estimate Std. Error t value Pr(>|t|)
## rel_time_factor::-3 0.150650 0.142818 1.054836 0.291519
## rel_time_factor::-2 0.013673 0.118969 0.114931 0.908501
## rel_time_factor::0 -0.105111 0.085989 -1.222369 0.221589
## rel_time_factor::1 -0.219432 0.079809 -2.749473 0.005977 **
## rel_time_factor::2 -0.050284 0.071099 -0.707244 0.479427
## ... 1 variable was removed because of collinearity (rel_time_factor::3)
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 2.57147 Adj. R2: 0.63651
## Within R2: 9.44e-4
# Create balanced panel subset for Bacon decomposition
bacon_data <- twfe_data %>%
group_by(author_id) %>%
filter(n() >= 3) %>% # Keep only authors with at least 3 observations
ungroup()
cat("Data for Bacon decomposition:", nrow(bacon_data), "observations\n")## Data for Bacon decomposition: 96894 observations
## Unique authors for Bacon: 12960
# Perform Bacon decomposition with balanced data
tryCatch({
bacon_results <- bacon(publications ~ post,
data = bacon_data,
id_var = "author_id",
time_var = "year")
# Plot Bacon decomposition
p_bacon_weights <- ggplot(bacon_results$weights,
aes(x = weight, y = reorder(type, weight))) +
geom_col(fill = "steelblue", alpha = 0.7) +
labs(title = "Bacon Decomposition: Weights on 2x2 Comparisons",
x = "Weight", y = "Comparison Type") +
theme_minimal()
p_bacon_effects <- ggplot(bacon_results$two_by_twos,
aes(x = estimate, y = reorder(type, estimate))) +
geom_point(size = 3, color = "darkred") +
geom_vline(xintercept = 0, linetype = "dashed") +
labs(title = "Bacon Decomposition: 2x2 Treatment Effects",
x = "Treatment Effect Estimate", y = "Comparison Type") +
theme_minimal()
# Combine Bacon plots
p_bacon_combined <- p_bacon_weights / p_bacon_effects
print(p_bacon_combined)
# Display summary of Bacon decomposition
cat("=== BACON DECOMPOSITION SUMMARY ===\n")
print(bacon_results$omega_weighted)
}, error = function(e) {
cat("Bacon decomposition failed:", e$message, "\n")
# Alternative: Manual Bacon-style analysis
cat("\n=== ALTERNATIVE: MANUAL COHORT ANALYSIS ===\n")
cohort_effects <- twfe_data %>%
filter(first.treat > 0) %>%
group_by(first.treat) %>%
summarise(
pre_mean = mean(publications[year < first.treat]),
post_mean = mean(publications[year >= first.treat]),
effect = post_mean - pre_mean,
n_authors = n_distinct(author_id),
.groups = 'drop'
)
kable(cohort_effects, caption = "Manual Cohort Analysis") %>%
kable_styling(bootstrap_options = "striped")
})## Bacon decomposition failed: Unbalanced Panel
##
## === ALTERNATIVE: MANUAL COHORT ANALYSIS ===
| first.treat | pre_mean | post_mean | effect | n_authors |
|---|---|---|---|---|
| 2000 | NaN | 4.727273 | NaN | 5 |
| 2001 | NaN | 3.966667 | NaN | 8 |
| 2002 | NaN | 4.071429 | NaN | 1 |
| 2004 | NaN | 8.083333 | NaN | 4 |
| 2006 | 8.419491 | 4.624086 | -3.7954053 | 239 |
| 2007 | 2.372777 | 3.407396 | 1.0346196 | 613 |
| 2009 | NaN | 1.500000 | NaN | 1 |
| 2010 | 2.355609 | 3.591031 | 1.2354219 | 851 |
| 2011 | 2.301922 | 3.600478 | 1.2985566 | 903 |
| 2012 | 2.075949 | 3.777201 | 1.7012517 | 750 |
| 2014 | 2.197519 | 3.745480 | 1.5479611 | 742 |
| 2015 | 2.293931 | 3.255987 | 0.9620556 | 1435 |
| 2016 | 2.359375 | 3.086428 | 0.7270533 | 657 |
| 2017 | 2.594901 | 3.221037 | 0.6261357 | 653 |
| 2018 | 2.519621 | 3.045094 | 0.5254733 | 494 |
| 2019 | 2.819005 | 3.019031 | 0.2000266 | 390 |
| 2020 | 2.654369 | 2.583539 | -0.0708298 | 508 |
| 2021 | 2.492857 | 2.613828 | 0.1209709 | 268 |
| 2022 | 2.474771 | 2.734491 | 0.2597207 | 205 |
# Prepare clean data for Sant'Anna & Callaway DiD with proper numeric IDs
did_data_clean <- final_data %>%
mutate(
# Create numeric ID for did package
id_num = as.numeric(factor(id)),
year = as.numeric(year),
first.treat = as.numeric(first.treat)
) %>%
# Filter to reasonable time period for analysis
filter(year >= 2000, year <= 2022) %>%
# Remove any missing values that could cause issues
filter(!is.na(publications), !is.na(year), !is.na(first.treat), !is.na(id_num))
cat("Clean data for Sant'Anna & Callaway DiD analysis:", nrow(did_data_clean), "observations\n")## Clean data for Sant'Anna & Callaway DiD analysis: 88693 observations
## Unique authors: 21612
# Check treatment cohort sizes
cohort_sizes <- did_data_clean %>%
filter(first.treat > 0) %>%
distinct(id_num, first.treat) %>%
count(first.treat) %>%
arrange(first.treat)
cat("Treatment cohort sizes:\n")## Treatment cohort sizes:
## # A tibble: 19 × 2
## first.treat n
## <dbl> <int>
## 1 2000 5
## 2 2001 8
## 3 2002 1
## 4 2004 4
## 5 2006 238
## 6 2007 613
## 7 2009 1
## 8 2010 851
## 9 2011 901
## 10 2012 750
## 11 2014 739
## 12 2015 1407
## 13 2016 618
## 14 2017 605
## 15 2018 421
## 16 2019 296
## 17 2020 346
## 18 2021 153
## 19 2022 105
# Use only cohorts with sufficient observations
viable_cohorts <- cohort_sizes %>%
filter(n >= 10) %>% # At least 10 authors per cohort
pull(first.treat)
cat("Viable cohorts (n >= 10):", viable_cohorts, "\n")## Viable cohorts (n >= 10): 2006 2007 2010 2011 2012 2014 2015 2016 2017 2018 2019 2020 2021 2022
if (length(viable_cohorts) == 0) {
stop("No viable treatment cohorts with sufficient observations")
}
# Filter data to viable cohorts
did_data_final <- did_data_clean %>%
filter(first.treat %in% viable_cohorts | first.treat == 0)
cat("Final Sant'Anna & Callaway DiD data:", nrow(did_data_final), "observations\n")## Final Sant'Anna & Callaway DiD data: 88563 observations
## Final unique authors: 21593
# Estimate group-time average treatment effects with robust settings
tryCatch({
sc_attgt <- att_gt(
yname = "publications",
tname = "year",
idname = "id_num",
gname = "first.treat",
data = did_data_final,
control_group = "nevertreated", # Use never-treated for simplicity
est_method = "reg", # Use regression method (more stable)
anticipation = 0,
allow_unbalanced_panel = TRUE
)
# Display group-time ATTs
cat("=== SANT'ANNA & CALLAWAY GROUP-TIME AVERAGE TREATMENT EFFECTS ===\n")
print(summary(sc_attgt))
# Set flag for successful DiD estimation
sc_success <- TRUE
}, error = function(e) {
cat("Sant'Anna & Callaway DiD estimation failed:", e$message, "\n")
sc_success <- FALSE
})## === SANT'ANNA & CALLAWAY GROUP-TIME AVERAGE TREATMENT EFFECTS ===
##
## Call:
## att_gt(yname = "publications", tname = "year", idname = "id_num",
## gname = "first.treat", data = did_data_final, allow_unbalanced_panel = TRUE,
## control_group = "nevertreated", anticipation = 0, est_method = "reg")
##
## Reference: Callaway, Brantly and Pedro H.C. Sant'Anna. "Difference-in-Differences with Multiple Time Periods." Journal of Econometrics, Vol. 225, No. 2, pp. 200-230, 2021. <https://doi.org/10.1016/j.jeconom.2020.12.001>, <https://arxiv.org/abs/1803.09015>
##
## Group-Time Average Treatment Effects:
## Group Time ATT(g,t) Std. Error [95% Simult. Conf. Band]
## 2006 2001 0.1592 1.1590 -3.9142 4.2327
## 2006 2002 -0.9370 1.4755 -6.1226 4.2486
## 2006 2003 -0.7347 1.2355 -5.0770 3.6076
## 2006 2004 0.1969 1.0652 -3.5469 3.9408
## 2006 2005 0.0218 0.9206 -3.2137 3.2574
## 2006 2006 -4.0194 6.0148 -25.1588 17.1200
## 2006 2007 -4.5862 6.5763 -27.6991 18.5267
## 2006 2008 -4.5020 7.0147 -29.1558 20.1518
## 2006 2009 -4.3782 6.8007 -28.2799 19.5234
## 2006 2010 -4.2104 6.7802 -28.0401 19.6193
## 2006 2011 -4.0404 6.9082 -28.3198 20.2390
## 2006 2012 -3.8434 6.6822 -27.3286 19.6418
## 2006 2013 -3.3246 5.7850 -23.6565 17.0073
## 2006 2014 -2.9281 5.4536 -22.0952 16.2390
## 2006 2015 -3.6955 5.8961 -24.4178 17.0268
## 2006 2016 -3.1337 5.5821 -22.7523 16.4850
## 2006 2017 -3.4915 5.8622 -24.0945 17.1115
## 2006 2018 -2.7322 5.2232 -21.0894 15.6251
## 2006 2019 -2.3230 5.1957 -20.5838 15.9378
## 2006 2020 -2.7346 4.9869 -20.2614 14.7921
## 2006 2021 -2.6853 5.4204 -21.7358 16.3651
## 2006 2022 -3.3943 5.6688 -23.3178 16.5291
## 2007 2001 0.4385 0.2737 -0.5233 1.4003
## 2007 2002 -0.0354 0.2405 -0.8807 0.8099
## 2007 2003 -0.0696 0.2232 -0.8541 0.7149
## 2007 2004 0.0287 0.2493 -0.8473 0.9048
## 2007 2005 -0.2310 0.2244 -1.0198 0.5577
## 2007 2006 0.3787 0.2221 -0.4018 1.1592
## 2007 2007 0.0017 0.2328 -0.8164 0.8197
## 2007 2008 0.0456 0.2332 -0.7740 0.8653
## 2007 2009 0.0999 0.2199 -0.6730 0.8727
## 2007 2010 0.3421 0.2473 -0.5271 1.2113
## 2007 2011 0.0416 0.2322 -0.7745 0.8576
## 2007 2012 0.5789 0.2568 -0.3236 1.4814
## 2007 2013 0.5539 0.2456 -0.3093 1.4171
## 2007 2014 1.1429 0.2888 0.1279 2.1578 *
## 2007 2015 0.8939 0.2722 -0.0628 1.8506
## 2007 2016 0.7595 0.2869 -0.2488 1.7678
## 2007 2017 0.8109 0.2906 -0.2105 1.8323
## 2007 2018 0.6394 0.2807 -0.3472 1.6261
## 2007 2019 0.7294 0.3124 -0.3687 1.8275
## 2007 2020 0.6202 0.2857 -0.3838 1.6243
## 2007 2021 1.0134 0.3352 -0.1645 2.1914
## 2007 2022 0.6431 0.2938 -0.3895 1.6757
## 2010 2001 -0.5469 0.2985 -1.5959 0.5021
## 2010 2002 0.1768 0.3185 -0.9427 1.2963
## 2010 2003 0.1345 0.3335 -1.0378 1.3067
## 2010 2004 -0.0265 0.2574 -0.9311 0.8782
## 2010 2005 0.1695 0.2425 -0.6829 1.0220
## 2010 2006 -0.0124 0.2089 -0.7464 0.7217
## 2010 2007 0.1016 0.2101 -0.6369 0.8401
## 2010 2008 -0.0969 0.2101 -0.8354 0.6417
## 2010 2009 0.3054 0.1677 -0.2841 0.8950
## 2010 2010 0.1792 0.1969 -0.5127 0.8712
## 2010 2011 -0.1090 0.1779 -0.7344 0.5164
## 2010 2012 0.0444 0.1791 -0.5849 0.6737
## 2010 2013 0.3401 0.1677 -0.2492 0.9294
## 2010 2014 0.6831 0.2040 -0.0339 1.4001
## 2010 2015 0.7715 0.2009 0.0656 1.4775 *
## 2010 2016 0.5531 0.1988 -0.1456 1.2518
## 2010 2017 0.7722 0.2379 -0.0639 1.6083
## 2010 2018 0.7742 0.2208 -0.0019 1.5503
## 2010 2019 0.8326 0.2365 0.0015 1.6637 *
## 2010 2020 1.1706 0.2378 0.3348 2.0065 *
## 2010 2021 1.0892 0.2601 0.1750 2.0033 *
## 2010 2022 0.8182 0.2374 -0.0162 1.6526
## 2011 2001 0.5045 0.2559 -0.3947 1.4037
## 2011 2002 0.1088 0.3091 -0.9777 1.1953
## 2011 2003 0.0936 0.2615 -0.8255 1.0127
## 2011 2004 -0.1269 0.2808 -1.1139 0.8601
## 2011 2005 -0.1485 0.2564 -1.0495 0.7526
## 2011 2006 0.2125 0.1961 -0.4766 0.9017
## 2011 2007 0.0290 0.2090 -0.7057 0.7637
## 2011 2008 0.1490 0.2307 -0.6619 0.9599
## 2011 2009 0.2057 0.2179 -0.5603 0.9717
## 2011 2010 -0.2768 0.1790 -0.9060 0.3524
## 2011 2011 0.3341 0.1619 -0.2350 0.9032
## 2011 2012 0.4297 0.1669 -0.1567 1.0162
## 2011 2013 0.4240 0.1765 -0.1961 1.0442
## 2011 2014 0.8770 0.1879 0.2166 1.5374 *
## 2011 2015 0.8398 0.1810 0.2037 1.4758 *
## 2011 2016 0.9777 0.2180 0.2114 1.7441 *
## 2011 2017 0.9275 0.2344 0.1037 1.7514 *
## 2011 2018 1.2614 0.2003 0.5573 1.9655 *
## 2011 2019 1.4452 0.2804 0.4597 2.4307 *
## 2011 2020 1.5562 0.2946 0.5209 2.5914 *
## 2011 2021 1.4485 0.2593 0.5373 2.3597 *
## 2011 2022 1.1954 0.2137 0.4442 1.9466 *
## 2012 2001 0.5433 0.3911 -0.8312 1.9178
## 2012 2002 -0.0423 0.3152 -1.1500 1.0654
## 2012 2003 0.1318 0.2737 -0.8300 1.0936
## 2012 2004 -0.3471 0.3196 -1.4703 0.7762
## 2012 2005 0.3282 0.3139 -0.7750 1.4315
## 2012 2006 -0.0960 0.2288 -0.8999 0.7080
## 2012 2007 0.1515 0.2434 -0.7040 1.0070
## 2012 2008 0.2205 0.2832 -0.7748 1.2159
## 2012 2009 0.0615 0.3070 -1.0176 1.1406
## 2012 2010 -0.1658 0.2383 -1.0035 0.6718
## 2012 2011 -0.0186 0.2128 -0.7663 0.7292
## 2012 2012 0.3763 0.1899 -0.2912 1.0439
## 2012 2013 0.2921 0.1845 -0.3564 0.9406
## 2012 2014 0.8932 0.2142 0.1405 1.6458 *
## 2012 2015 0.6134 0.2048 -0.1062 1.3331
## 2012 2016 0.7477 0.1935 0.0678 1.4276 *
## 2012 2017 1.0660 0.2344 0.2421 1.8900 *
## 2012 2018 1.7533 0.2777 0.7773 2.7292 *
## 2012 2019 1.9810 0.2616 1.0615 2.9006 *
## 2012 2020 2.1254 0.2883 1.1122 3.1386 *
## 2012 2021 2.0916 0.3000 1.0374 3.1458 *
## 2012 2022 1.9581 0.3121 0.8612 3.0550 *
## 2014 2001 0.4163 0.3492 -0.8109 1.6435
## 2014 2002 0.0943 0.3698 -1.2054 1.3939
## 2014 2003 0.1440 0.3868 -1.2154 1.5035
## 2014 2004 -0.4716 0.3685 -1.7668 0.8236
## 2014 2005 0.5588 0.4716 -1.0986 2.2162
## 2014 2006 -0.3626 0.4714 -2.0192 1.2940
## 2014 2007 0.0839 0.3681 -1.2097 1.3776
## 2014 2008 -0.1706 0.3356 -1.3501 1.0089
## 2014 2009 0.1461 0.2365 -0.6852 0.9774
## 2014 2010 0.2863 0.2227 -0.4966 1.0692
## 2014 2011 -0.1214 0.2310 -0.9332 0.6904
## 2014 2012 0.0061 0.2253 -0.7856 0.7979
## 2014 2013 0.4732 0.2880 -0.5391 1.4855
## 2014 2014 0.2564 0.2887 -0.7582 1.2710
## 2014 2015 0.1750 0.2673 -0.7645 1.1146
## 2014 2016 0.0630 0.2766 -0.9090 1.0350
## 2014 2017 0.3833 0.2986 -0.6661 1.4327
## 2014 2018 0.8363 0.3300 -0.3236 1.9963
## 2014 2019 1.1700 0.3698 -0.1297 2.4697
## 2014 2020 1.5260 0.4004 0.1187 2.9334 *
## 2014 2021 1.3199 0.3127 0.2208 2.4190 *
## 2014 2022 1.0404 0.3478 -0.1820 2.2628
## 2015 2001 -0.4980 0.3182 -1.6164 0.6204
## 2015 2002 0.2358 0.2219 -0.5439 1.0155
## 2015 2003 -0.1451 0.2481 -1.0172 0.7270
## 2015 2004 0.2112 0.2604 -0.7041 1.1266
## 2015 2005 -0.1371 0.2660 -1.0718 0.7977
## 2015 2006 0.1143 0.2087 -0.6191 0.8476
## 2015 2007 0.3352 0.2628 -0.5882 1.2587
## 2015 2008 -0.3503 0.2869 -1.3586 0.6580
## 2015 2009 0.3536 0.1613 -0.2133 0.9204
## 2015 2010 0.1902 0.1784 -0.4368 0.8171
## 2015 2011 0.0120 0.1720 -0.5926 0.6166
## 2015 2012 0.0460 0.1493 -0.4787 0.5707
## 2015 2013 0.4456 0.1880 -0.2151 1.1063
## 2015 2014 -0.1164 0.1538 -0.6571 0.4242
## 2015 2015 0.3677 0.1577 -0.1865 0.9220
## 2015 2016 -0.0049 0.1460 -0.5179 0.5081
## 2015 2017 0.2346 0.1585 -0.3225 0.7917
## 2015 2018 0.3754 0.1634 -0.1990 0.9498
## 2015 2019 0.5600 0.1992 -0.1401 1.2601
## 2015 2020 0.5063 0.2002 -0.1973 1.2099
## 2015 2021 0.6392 0.1956 -0.0483 1.3268
## 2015 2022 0.7874 0.2294 -0.0190 1.5938
## 2016 2001 -1.2914 1.7075 -7.2925 4.7097
## 2016 2002 0.5292 0.5814 -1.5141 2.5725
## 2016 2003 -0.8530 0.3369 -2.0370 0.3311
## 2016 2004 0.6031 0.2682 -0.3397 1.5458
## 2016 2005 0.3058 0.3658 -0.9799 1.5915
## 2016 2006 -0.3526 0.4091 -1.7903 1.0851
## 2016 2007 0.0413 0.3526 -1.1979 1.2804
## 2016 2008 -0.1020 0.2181 -0.8684 0.6644
## 2016 2009 0.5685 0.2708 -0.3833 1.5203
## 2016 2010 -0.4189 0.3070 -1.4977 0.6599
## 2016 2011 0.0936 0.3440 -1.1155 1.3027
## 2016 2012 0.2101 0.2467 -0.6570 1.0773
## 2016 2013 -0.0928 0.2075 -0.8220 0.6364
## 2016 2014 0.2063 0.2410 -0.6406 1.0532
## 2016 2015 0.4638 0.3076 -0.6172 1.5448
## 2016 2016 -0.1018 0.2916 -1.1266 0.9229
## 2016 2017 -0.3730 0.3289 -1.5288 0.7828
## 2016 2018 -0.0717 0.3816 -1.4130 1.2695
## 2016 2019 -0.2185 0.3597 -1.4827 1.0456
## 2016 2020 -0.1546 0.3544 -1.4003 1.0911
## 2016 2021 -0.0054 0.3603 -1.2719 1.2610
## 2016 2022 0.1113 0.3843 -1.2392 1.4618
## 2017 2001 -0.0606 0.7160 -2.5771 2.4558
## 2017 2002 0.9889 0.5287 -0.8693 2.8471
## 2017 2003 -0.6116 0.8113 -3.4632 2.2399
## 2017 2004 -0.1621 0.6147 -2.3224 1.9982
## 2017 2005 -0.2961 0.4100 -1.7372 1.1450
## 2017 2006 0.2130 0.3243 -0.9266 1.3527
## 2017 2007 0.0775 0.5744 -1.9414 2.0964
## 2017 2008 -0.4098 0.5715 -2.4186 1.5989
## 2017 2009 0.9771 0.3815 -0.3638 2.3179
## 2017 2010 -0.6814 0.3591 -1.9434 0.5805
## 2017 2011 0.1006 0.2953 -0.9371 1.1383
## 2017 2012 -0.0518 0.2566 -0.9538 0.8502
## 2017 2013 0.4942 0.2843 -0.5049 1.4933
## 2017 2014 0.0910 0.2690 -0.8544 1.0363
## 2017 2015 0.1836 0.2630 -0.7408 1.1081
## 2017 2016 0.1663 0.2587 -0.7430 1.0757
## 2017 2017 -0.2616 0.2497 -1.1391 0.6159
## 2017 2018 -0.0062 0.2788 -0.9859 0.9735
## 2017 2019 -0.0586 0.2695 -1.0057 0.8885
## 2017 2020 -0.0214 0.2740 -0.9844 0.9416
## 2017 2021 -0.0277 0.3431 -1.2336 1.1782
## 2017 2022 0.1796 0.4938 -1.5557 1.9150
## 2018 2001 -2.0694 0.8946 -5.2134 1.0746
## 2018 2002 1.1818 0.5978 -0.9193 3.2828
## 2018 2003 0.4291 0.6563 -1.8777 2.7358
## 2018 2004 -1.3008 0.5086 -3.0884 0.4869
## 2018 2005 0.4283 0.3380 -0.7595 1.6161
## 2018 2006 0.0634 0.3889 -1.3035 1.4302
## 2018 2007 -0.0564 0.4109 -1.5004 1.3877
## 2018 2008 0.4211 0.5821 -1.6247 2.4670
## 2018 2009 0.0059 0.5916 -2.0733 2.0851
## 2018 2010 -0.3342 0.3041 -1.4031 0.7348
## 2018 2011 1.2435 1.0201 -2.3419 4.8289
## 2018 2012 -1.2274 1.0554 -4.9367 2.4818
## 2018 2013 0.2111 0.2334 -0.6092 1.0315
## 2018 2014 0.3243 0.3299 -0.8354 1.4839
## 2018 2015 -0.1561 0.2859 -1.1610 0.8488
## 2018 2016 0.6241 0.5030 -1.1438 2.3920
## 2018 2017 -0.5982 0.4495 -2.1779 0.9815
## 2018 2018 0.2102 0.2672 -0.7289 1.1494
## 2018 2019 0.0955 0.2810 -0.8922 1.0831
## 2018 2020 0.5032 0.3283 -0.6506 1.6570
## 2018 2021 0.4377 0.3518 -0.7986 1.6740
## 2018 2022 0.4727 0.3817 -0.8687 1.8141
## 2019 2001 2.4163 0.2428 1.5629 3.2697 *
## 2019 2002 -1.8682 0.4916 -3.5961 -0.1404 *
## 2019 2003 -0.5077 0.5150 -2.3179 1.3024
## 2019 2004 0.9630 0.7156 -1.5522 3.4781
## 2019 2005 -0.0429 1.2427 -4.4105 4.3248
## 2019 2006 -0.8151 1.2021 -5.0400 3.4098
## 2019 2007 0.3497 0.3408 -0.8482 1.5476
## 2019 2008 0.2848 0.4368 -1.2505 1.8200
## 2019 2009 -0.1339 0.3444 -1.3444 1.0765
## 2019 2010 -0.0284 0.2897 -1.0466 0.9898
## 2019 2011 0.6240 0.7670 -2.0718 3.3197
## 2019 2012 -0.3927 0.4909 -2.1179 1.3326
## 2019 2013 0.6966 0.4378 -0.8422 2.2354
## 2019 2014 -0.5113 0.4512 -2.0970 1.0743
## 2019 2015 0.1435 0.4551 -1.4561 1.7431
## 2019 2016 0.9086 0.4572 -0.6983 2.5155
## 2019 2017 -0.9114 0.4891 -2.6303 0.8074
## 2019 2018 0.0459 0.3632 -1.2306 1.3224
## 2019 2019 -0.5336 0.3059 -1.6088 0.5416
## 2019 2020 -0.0025 0.3865 -1.3609 1.3559
## 2019 2021 0.4136 0.3888 -0.9528 1.7800
## 2019 2022 -0.0661 0.3623 -1.3394 1.2073
## 2020 2001 -0.5670 0.5083 -2.3534 1.2193
## 2020 2002 0.4056 0.4572 -1.2012 2.0124
## 2020 2003 0.7065 0.5737 -1.3097 2.7228
## 2020 2004 0.1031 0.7987 -2.7040 2.9102
## 2020 2005 -0.1000 0.9572 -3.4641 3.2641
## 2020 2006 -0.3218 0.8536 -3.3217 2.6781
## 2020 2007 0.2853 0.5355 -1.5969 2.1675
## 2020 2008 -0.6405 0.3598 -1.9050 0.6241
## 2020 2009 0.9243 0.4545 -0.6730 2.5217
## 2020 2010 -0.1049 0.4413 -1.6558 1.4460
## 2020 2011 -0.4790 0.4425 -2.0341 1.0761
## 2020 2012 -0.3833 0.2854 -1.3863 0.6196
## 2020 2013 0.0973 0.3218 -1.0338 1.2284
## 2020 2014 0.5823 0.3436 -0.6253 1.7898
## 2020 2015 0.5108 0.4985 -1.2412 2.2628
## 2020 2016 -0.2055 0.6234 -2.3965 1.9855
## 2020 2017 -0.5278 0.6665 -2.8704 1.8149
## 2020 2018 0.6030 0.3219 -0.5284 1.7344
## 2020 2019 -0.3564 0.3151 -1.4638 0.7509
## 2020 2020 0.0029 0.2805 -0.9831 0.9889
## 2020 2021 -0.3051 0.3193 -1.4272 0.8170
## 2020 2022 -0.0233 0.4849 -1.7275 1.6810
## 2021 2001 0.9330 1.0365 -2.7101 4.5760
## 2021 2002 -1.7849 1.1232 -5.7325 2.1627
## 2021 2003 -0.2697 0.3141 -1.3735 0.8342
## 2021 2004 0.0531 0.2474 -0.8165 0.9227
## 2021 2005 1.3492 0.7282 -1.2102 3.9086
## 2021 2006 -0.4293 0.9214 -3.6678 2.8092
## 2021 2007 -0.2639 0.9248 -3.5141 2.9862
## 2021 2008 -0.2380 0.2686 -1.1818 0.7059
## 2021 2009 0.2206 0.4094 -1.2183 1.6594
## 2021 2010 0.9539 0.4742 -0.7126 2.6205
## 2021 2011 -0.6597 0.7736 -3.3787 2.0594
## 2021 2012 0.2795 0.6223 -1.9077 2.4667
## 2021 2013 -0.3611 0.5110 -2.1568 1.4347
## 2021 2014 0.3546 0.3463 -0.8623 1.5716
## 2021 2015 -0.1772 0.3559 -1.4281 1.0737
## 2021 2016 -0.0866 0.2638 -1.0136 0.8405
## 2021 2017 0.4094 0.3800 -0.9263 1.7451
## 2021 2018 -0.2541 0.3855 -1.6088 1.1007
## 2021 2019 0.2768 0.3559 -0.9739 1.5275
## 2021 2020 -0.2310 0.3023 -1.2935 0.8315
## 2021 2021 0.4905 0.6597 -1.8282 2.8091
## 2021 2022 0.4805 0.8118 -2.3727 3.3336
## 2022 2001 -0.2337 0.8592 -3.2534 2.7859
## 2022 2002 -0.6182 0.4374 -2.1557 0.9192
## 2022 2003 1.0637 0.9977 -2.4429 4.5702
## 2022 2004 -0.5469 1.1351 -4.5363 3.4425
## 2022 2005 -0.4794 0.4102 -1.9211 0.9623
## 2022 2006 0.2612 0.3321 -0.9060 1.4284
## 2022 2007 1.0095 0.9083 -2.1826 4.2017
## 2022 2008 -0.9400 0.9658 -4.3343 2.4543
## 2022 2009 0.4559 0.4731 -1.2067 2.1185
## 2022 2010 -0.3925 0.4085 -1.8283 1.0433
## 2022 2011 0.4877 0.3475 -0.7336 1.7090
## 2022 2012 -0.1848 0.2997 -1.2380 0.8684
## 2022 2013 0.4599 0.4229 -1.0266 1.9463
## 2022 2014 0.0681 0.4707 -1.5861 1.7224
## 2022 2015 -0.0618 0.4045 -1.4834 1.3598
## 2022 2016 1.3574 0.7813 -1.3886 4.1035
## 2022 2017 -1.2489 0.6598 -3.5676 1.0699
## 2022 2018 -0.0142 0.4046 -1.4363 1.4079
## 2022 2019 -0.2513 0.4018 -1.6635 1.1609
## 2022 2020 0.2667 0.5536 -1.6790 2.2124
## 2022 2021 0.5161 0.5147 -1.2927 2.3250
## 2022 2022 0.1612 0.3094 -0.9261 1.2484
## ---
## Signif. codes: `*' confidence band does not cover 0
##
## P-value for pre-test of parallel trends assumption: 0
## Control Group: Never Treated, Anticipation Periods: 0
## Estimation Method: Outcome Regression
## NULL
# Check if we have successful Sant'Anna & Callaway model
if (exists("sc_success") && sc_success) {
# Aggregate to dynamic effects with error handling
tryCatch({
sc_dynamic <- aggte(sc_attgt, type = "dynamic", na.rm = TRUE)
cat("=== DYNAMIC TREATMENT EFFECTS (SANT'ANNA & CALLAWAY) ===\n")
print(summary(sc_dynamic))
# Extract dynamic effects from Sant'Anna & Callaway DiD
sc_dynamic_df <- data.frame(
time = sc_dynamic$egt,
estimate = sc_dynamic$att.egt,
se = sc_dynamic$se.egt,
method = "Sant'Anna & Callaway"
) %>%
mutate(
conf.low = estimate - 1.96 * se,
conf.high = estimate + 1.96 * se
)
# Plot Sant'Anna & Callaway dynamic effects
p_sc_dynamic <- ggdid(sc_dynamic) +
labs(title = "Dynamic Treatment Effects - Sant'Anna & Callaway Estimator",
subtitle = "Event study plot from robust DiD estimation") +
theme_minimal()
print(p_sc_dynamic)
# Compare with TWFE if available
if (exists("twfe_event_coefs") && !is.null(twfe_event_coefs)) {
# Combine TWFE and Sant'Anna & Callaway dynamic effects
dynamic_comparison <- bind_rows(
twfe_event_coefs %>% select(time, estimate, conf.low, conf.high, method),
sc_dynamic_df %>% select(time, estimate, conf.low, conf.high, method)
) %>%
filter(!is.na(time))
# Plot comparison
p_dynamic_comparison <- ggplot(dynamic_comparison,
aes(x = time, y = estimate,
color = method, fill = method)) +
geom_point(position = position_dodge(width = 0.3), size = 2.5) +
geom_errorbar(aes(ymin = conf.low, ymax = conf.high),
position = position_dodge(width = 0.3),
width = 0.2) +
geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
geom_vline(xintercept = -0.5, linetype = "dashed", color = "gray") +
labs(title = "Comparison: Dynamic Treatment Effects",
subtitle = "TWFE vs Sant'Anna & Callaway Estimator with Slovak Grant Data",
x = "Time to Treatment", y = "Treatment Effect (Publications)",
color = "Method", fill = "Method") +
theme_minimal() +
theme(legend.position = "bottom")
print(p_dynamic_comparison)
}
}, error = function(e) {
cat("Dynamic effects aggregation failed:", e$message, "\n")
})
} else {
cat("Cannot compute dynamic effects - Sant'Anna & Callaway model failed to estimate\n")
# Create alternative visualization
p_treatment_timing <- final_data %>%
filter(first.treat > 0) %>%
distinct(id, first.treat) %>%
ggplot(aes(x = first.treat)) +
geom_histogram(binwidth = 1, fill = "steelblue", alpha = 0.7) +
labs(title = "Distribution of Treatment Timing",
subtitle = "When authors received their first grant",
x = "Year of First Grant", y = "Number of Authors") +
theme_minimal()
print(p_treatment_timing)
}## === DYNAMIC TREATMENT EFFECTS (SANT'ANNA & CALLAWAY) ===
##
## Call:
## aggte(MP = sc_attgt, type = "dynamic", na.rm = TRUE)
##
## Reference: Callaway, Brantly and Pedro H.C. Sant'Anna. "Difference-in-Differences with Multiple Time Periods." Journal of Econometrics, Vol. 225, No. 2, pp. 200-230, 2021. <https://doi.org/10.1016/j.jeconom.2020.12.001>, <https://arxiv.org/abs/1803.09015>
##
##
## Overall summary of ATT's based on event-study/dynamic aggregation:
## ATT Std. Error [ 95% Conf. Int.]
## 0.1616 0.8333 -1.4717 1.7949
##
##
## Dynamic Effects:
## Event time Estimate Std. Error [95% Simult. Conf. Band]
## -21 -0.2337 0.8722 -2.7066 2.2392
## -20 0.3017 0.6121 -1.4339 2.0372
## -19 -0.5921 0.4205 -1.7842 0.6001
## -18 0.8410 0.2361 0.1715 1.5105 *
## -17 -0.9250 0.3395 -1.8877 0.0376
## -16 0.3012 0.2745 -0.4770 1.0793
## -15 0.1068 0.4479 -1.1631 1.3766
## -14 -0.3934 0.2084 -0.9844 0.1976
## -13 0.0135 0.1435 -0.3932 0.4203
## -12 -0.0085 0.1077 -0.3138 0.2968
## -11 0.3137 0.1146 -0.0112 0.6386
## -10 -0.0487 0.1177 -0.3824 0.2851
## -9 0.0003 0.0993 -0.2811 0.2817
## -8 0.0792 0.0957 -0.1921 0.3505
## -7 0.0338 0.1022 -0.2558 0.3234
## -6 0.0282 0.0855 -0.2142 0.2706
## -5 0.1041 0.0781 -0.1173 0.3255
## -4 0.0922 0.0828 -0.1426 0.3270
## -3 0.0047 0.0814 -0.2261 0.2354
## -2 0.1275 0.0758 -0.0873 0.3423
## -1 0.0577 0.0723 -0.1474 0.2628
## 0 0.0360 0.1840 -0.4856 0.5575
## 1 -0.0824 0.1984 -0.6450 0.4801
## 2 0.0902 0.2107 -0.5071 0.6876
## 3 0.2064 0.2192 -0.4150 0.8279
## 4 0.3381 0.2336 -0.3241 1.0002
## 5 0.5077 0.2486 -0.1971 1.2125
## 6 0.6772 0.2614 -0.0640 1.4183
## 7 0.9587 0.2519 0.2446 1.6729 *
## 8 1.0203 0.3180 0.1188 1.9217 *
## 9 0.9739 0.4385 -0.2694 2.2172
## 10 1.0502 0.3939 -0.0665 2.1668
## 11 0.6012 0.5291 -0.8989 2.1013
## 12 0.2898 0.7397 -1.8073 2.3869
## 13 -0.2029 1.4373 -4.2779 3.8721
## 14 -0.0348 1.4106 -4.0341 3.9645
## 15 -0.2878 1.5194 -4.5955 4.0200
## 16 -3.3943 5.6500 -19.4129 12.6242
## ---
## Signif. codes: `*' confidence band does not cover 0
##
## Control Group: Never Treated, Anticipation Periods: 0
## Estimation Method: Outcome Regression
## NULL
# Create overall comparison table
overall_comparison <- data.frame(
Method = "TWFE",
Estimate = coef(twfe_model)["post"],
SE = se(twfe_model)["post"],
Observations = nobs(twfe_model),
stringsAsFactors = FALSE
)
if (exists("sc_success") && sc_success) {
# Aggregate overall effects from Sant'Anna & Callaway with error handling
tryCatch({
sc_overall <- aggte(sc_attgt, type = "group", na.rm = TRUE)
overall_comparison <- bind_rows(
overall_comparison,
data.frame(
Method = "Sant'Anna & Callaway",
Estimate = sc_overall$overall.att,
SE = sc_overall$overall.se,
Observations = sc_overall$DIDparams$n
)
)
cat("=== OVERALL AVERAGE TREATMENT EFFECT (SANT'ANNA & CALLAWAY) ===\n")
print(summary(sc_overall))
}, error = function(e) {
cat("Overall Sant'Anna & Callaway aggregation failed:", e$message, "\n")
})
}## === OVERALL AVERAGE TREATMENT EFFECT (SANT'ANNA & CALLAWAY) ===
##
## Call:
## aggte(MP = sc_attgt, type = "group", na.rm = TRUE)
##
## Reference: Callaway, Brantly and Pedro H.C. Sant'Anna. "Difference-in-Differences with Multiple Time Periods." Journal of Econometrics, Vol. 225, No. 2, pp. 200-230, 2021. <https://doi.org/10.1016/j.jeconom.2020.12.001>, <https://arxiv.org/abs/1803.09015>
##
##
## Overall summary of ATT's based on group/cohort aggregation:
## ATT Std. Error [ 95% Conf. Int.]
## 0.3861 0.1792 0.0349 0.7373 *
##
##
## Group Effects:
## Group Estimate Std. Error [95% Simult. Conf. Band]
## 2006 -3.5308 5.9879 -20.2297 13.1682
## 2007 0.5573 0.2092 -0.0261 1.1407
## 2010 0.6092 0.1610 0.1603 1.0581 *
## 2011 0.9764 0.1506 0.5564 1.3963 *
## 2012 1.2635 0.1722 0.7833 1.7437 *
## 2014 0.7523 0.2706 -0.0024 1.5069
## 2015 0.4332 0.1451 0.0286 0.8378 *
## 2016 -0.1163 0.3121 -0.9865 0.7540
## 2017 -0.0327 0.2495 -0.7285 0.6632
## 2018 0.3439 0.2446 -0.3382 1.0259
## 2019 -0.0471 0.2895 -0.8544 0.7601
## 2020 -0.1085 0.3150 -0.9869 0.7699
## 2021 0.4855 0.6492 -1.3249 2.2958
## 2022 0.1612 0.3282 -0.7541 1.0764
## ---
## Signif. codes: `*' confidence band does not cover 0
##
## Control Group: Never Treated, Anticipation Periods: 0
## Estimation Method: Outcome Regression
## NULL
# Calculate confidence intervals and significance
overall_comparison <- overall_comparison %>%
mutate(
CI_lower = Estimate - 1.96 * SE,
CI_upper = Estimate + 1.96 * SE,
Significant = ifelse(CI_lower > 0 | CI_upper < 0, "Yes", "No"),
Percent_Change = (Estimate / mean(final_data$publications)) * 100
)
kable(overall_comparison, digits = 3,
caption = "Overall Treatment Effect Comparison - Sant'Anna & Callaway vs TWFE") %>%
kable_styling(bootstrap_options = "striped", full_width = FALSE) %>%
row_spec(which(overall_comparison$Significant == "Yes"),
background = "#E6F3FF")| Method | Estimate | SE | Observations | CI_lower | CI_upper | Significant | Percent_Change | |
|---|---|---|---|---|---|---|---|---|
| post | TWFE | 0.429 | 0.043 | 104747 | 0.345 | 0.513 | Yes | 15.423 |
| …2 | Sant’Anna & Callaway | 0.386 | 0.179 | 21593 | 0.035 | 0.737 | Yes | 13.869 |
# Create a comprehensive visualization dashboard
visualization_list <- list()
# 1. Treatment cohort distribution
p_cohort_dist <- final_data %>%
filter(first.treat > 0) %>%
distinct(id, first.treat) %>%
ggplot(aes(x = first.treat)) +
geom_histogram(binwidth = 1, fill = "steelblue", alpha = 0.7) +
labs(title = "A) Distribution of First Grant Years",
x = "Year of First Grant", y = "Number of Authors") +
theme_minimal()
# 2. Publication trends by treatment status
p_trends_simple <- final_data %>%
filter(year >= 1995 & year <= 2024) %>%
group_by(year, treated = first.treat > 0) %>%
summarise(mean_publications = mean(publications), .groups = 'drop') %>%
ggplot(aes(x = year, y = mean_publications, color = treated)) +
geom_line(size = 1) +
geom_point() +
labs(title = "B) Publication Trends by Treatment Status",
x = "Year", y = "Mean Publications", color = "Treated") +
theme_minimal()
# Arrange basic plots
basic_dashboard <- p_cohort_dist | p_trends_simple
print(basic_dashboard)# Add overall comparison if available
if (nrow(overall_comparison) > 1) {
p_overall_compare <- overall_comparison %>%
ggplot(aes(x = Method, y = Estimate, fill = Method)) +
geom_col(alpha = 0.7) +
geom_errorbar(aes(ymin = CI_lower, ymax = CI_upper), width = 0.2) +
labs(title = "C) Overall Treatment Effect Comparison",
subtitle = "TWFE vs Sant'Anna & Callaway Estimator",
x = "Method", y = "Treatment Effect (Publications)") +
theme_minimal() +
theme(legend.position = "none")
print(p_overall_compare)
}# Prepare data for Sant'Anna & Callaway event study
event_study_data <- final_data %>%
mutate(
id_num = as.numeric(factor(id)),
year = as.numeric(year),
first_treat = as.numeric(first.treat)
) %>%
filter(
year >= 1995 & year <= 2020,
first_treat >= 2000,
!is.na(publications),
!is.na(year),
!is.na(first_treat)
)
cat("Sant'Anna & Callaway Event Study Dataset:\n")## Sant'Anna & Callaway Event Study Dataset:
## Time period: 1995 - 2020
## Observations: 44918
## Unique authors: 7420
# Estimate event study using Sant'Anna & Callaway estimator
sc_event_attgt <- att_gt(
yname = "publications",
tname = "year",
idname = "id_num",
gname = "first_treat",
data = event_study_data,
control_group = "notyettreated",
est_method = "dr",
anticipation = 0,
allow_unbalanced_panel = TRUE
)
# Aggregate to event study
sc_event_study <- aggte(
sc_event_attgt,
type = "dynamic",
na.rm = TRUE,
min_e = -10,
max_e = 10
)
cat("=== SANT'ANNA & CALLAWAY EVENT STUDY RESULTS ===\n")## === SANT'ANNA & CALLAWAY EVENT STUDY RESULTS ===
##
## Call:
## aggte(MP = sc_event_attgt, type = "dynamic", min_e = -10, max_e = 10,
## na.rm = TRUE)
##
## Reference: Callaway, Brantly and Pedro H.C. Sant'Anna. "Difference-in-Differences with Multiple Time Periods." Journal of Econometrics, Vol. 225, No. 2, pp. 200-230, 2021. <https://doi.org/10.1016/j.jeconom.2020.12.001>, <https://arxiv.org/abs/1803.09015>
##
##
## Overall summary of ATT's based on event-study/dynamic aggregation:
## ATT Std. Error [ 95% Conf. Int.]
## 0.1945 0.3589 -0.5089 0.8978
##
##
## Dynamic Effects:
## Event time Estimate Std. Error [95% Simult. Conf. Band]
## -10 0.0994 0.1402 -0.2835 0.4823
## -9 -0.0770 0.1234 -0.4140 0.2600
## -8 0.1245 0.1369 -0.2494 0.4984
## -7 -0.0585 0.1177 -0.3799 0.2628
## -6 -0.0058 0.1022 -0.2849 0.2733
## -5 0.0769 0.0870 -0.1607 0.3144
## -4 0.0320 0.0903 -0.2147 0.2788
## -3 -0.0135 0.0848 -0.2451 0.2181
## -2 0.0255 0.0924 -0.2270 0.2780
## -1 -0.0528 0.0875 -0.2920 0.1863
## 0 -0.0016 0.2031 -0.5563 0.5531
## 1 -0.2148 0.2239 -0.8263 0.3968
## 2 -0.0514 0.2528 -0.7419 0.6390
## 3 0.0216 0.2501 -0.6616 0.7048
## 4 0.0609 0.2586 -0.6456 0.7673
## 5 0.3376 0.3221 -0.5422 1.2174
## 6 0.4181 0.3838 -0.6302 1.4664
## 7 0.6282 0.4433 -0.5827 1.8391
## 8 0.7137 0.4208 -0.4359 1.8634
## 9 0.3313 0.5761 -1.2425 1.9051
## 10 -0.1042 0.7782 -2.2300 2.0215
## ---
## Signif. codes: `*' confidence band does not cover 0
##
## Control Group: Not Yet Treated, Anticipation Periods: 0
## Estimation Method: Doubly Robust
## NULL
# Extract event study data for plotting
es_data <- data.frame(
period = sc_event_study$egt,
estimate = sc_event_study$att.egt,
std_error = sc_event_study$se.egt,
conf_low = sc_event_study$att.egt - 1.96 * sc_event_study$se.egt,
conf_high = sc_event_study$att.egt + 1.96 * sc_event_study$se.egt
)
# Test for pre-trends
pre_periods <- es_data %>% filter(period < 0)
pre_trend_test <- lm(estimate ~ period, data = pre_periods)
pre_trend_p <- summary(pre_trend_test)$coefficients[2, 4]
# Create enhanced plot with Sant'Anna & Callaway labeling
enhanced_plot <- ggplot(es_data, aes(x = period, y = estimate)) +
# Background shading for pre/post periods
annotate("rect", xmin = -10.5, xmax = -0.5, ymin = -Inf, ymax = Inf,
alpha = 0.03, fill = "red") +
annotate("rect", xmin = -0.5, xmax = 10.5, ymin = -Inf, ymax = Inf,
alpha = 0.03, fill = "blue") +
# Zero line
geom_hline(yintercept = 0, linetype = "dashed", color = "black", alpha = 0.5) +
# Treatment line
geom_vline(xintercept = -0.5, linetype = "solid", color = "darkblue", size = 1) +
# Confidence intervals
geom_ribbon(aes(ymin = conf_low, ymax = conf_high),
fill = "#2E86AB", alpha = 0.3) +
# Point estimates
geom_line(color = "#2E86AB", size = 1.5) +
geom_point(color = "#2E86AB", size = 4, fill = "white", shape = 21, stroke = 1.5) +
# Scale and labels
scale_x_continuous(
breaks = seq(-10, 10, by = 2),
labels = function(x) ifelse(x < 0, paste0(abs(x), "Y"),
ifelse(x > 0, paste0(x, "Y"), "Treatment\nYear")),
expand = expansion(mult = 0.05)
) +
labs(
title = "Dynamic Effects of Research Grants on Scientific Publications",
subtitle = "Sant'Anna & Callaway (2021) Estimator • Two-Decade Analysis",
x = "\nYears Relative to Grant Receipt",
y = "Treatment Effect \n (Additional Publications per Year)",
caption = paste(
"Method: Sant'Anna & Callaway (2021) Robust DiD Estimator • Control: Not-Yet-Treated Authors •",
"Pre-trend p-value:", round(pre_trend_p, 4),
"\nData: Web of Science • Period: 1995-2020"
)
) +
theme_minimal(base_size = 16) +
theme(
plot.title = element_text(face = "bold", size = 22, hjust = 0.5, margin = margin(b = 10)),
plot.subtitle = element_text(size = 16, hjust = 0.5, color = "gray40", margin = margin(b = 20)),
plot.caption = element_text(size = 12, color = "gray50", hjust = 1, margin = margin(t = 15)),
axis.title = element_text(face = "bold", size = 14),
axis.text = element_text(size = 12),
axis.text.x = element_text(angle = 0, hjust = 0.5, lineheight = 0.8),
panel.grid.minor = element_blank(),
panel.grid.major = element_line(color = "gray85", linewidth = 0.4),
plot.background = element_rect(fill = "white", color = NA),
panel.background = element_rect(fill = "white", color = NA),
plot.margin = margin(1, 1, 1, 1, "cm")
)
# Add methodological annotation
enhanced_plot <- enhanced_plot
print(enhanced_plot)# Get cohort-specific estimates using Sant'Anna & Callaway
sc_cohort_effects <- aggte(sc_event_attgt, type = "group", na.rm = TRUE)
sc_cohort_data <- data.frame(
cohort = sc_cohort_effects$egt,
att = sc_cohort_effects$att.egt,
se = sc_cohort_effects$se.egt
) %>%
mutate(
conf_low = att - 1.96 * se,
conf_high = att + 1.96 * se,
significant = ifelse(conf_low > 0 | conf_high < 0, "Yes", "No")
)
# Plot cohort-specific effects
cohort_plot <- ggplot(sc_cohort_data, aes(x = factor(cohort), y = att)) +
geom_pointrange(aes(ymin = conf_low, ymax = conf_high, color = significant),
size = 1, fatten = 2) +
geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
scale_color_manual(values = c("Yes" = "darkgreen", "No" = "gray60")) +
labs(
title = "Cohort-Specific Average Treatment Effects",
subtitle = "Sant'Anna & Callaway Estimator by Treatment Cohort",
x = "Treatment Cohort (Year)",
y = "Average Treatment Effect on Publications",
color = "Statistically\nSignificant"
) +
theme_minimal(base_size = 14) +
theme(
legend.position = "top",
axis.text.x = element_text(angle = 45, hjust = 1)
)
print(cohort_plot)