# Load required packages
library(did)
library(bacondecomp)
library(fixest)
library(ggplot2)
library(dplyr)
library(tidyr)
library(readr)
library(stringr)
library(purrr)
library(jsonlite)
library(knitr)
library(kableExtra)
library(ggpubr)
library(patchwork)

# Set theme for consistent plotting
theme_set(theme_minimal(base_size = 12))

Executive Summary

This analysis compares Traditional Two-Way Fixed Effects (TWFE) with the Sant’Anna & Callaway Difference-in-Differences (DiD) estimator for estimating the effect of Slovak research grants (APVV) on publication outcomes using real Web of Science data.

1. Data Preparation and Grant Extraction

extract_apvv_year_simple <- function(grant_string) {
  if (is.na(grant_string) || grant_string == "" || grant_string == "nan") {
    return(NA)
  }
  
  grant_str <- as.character(grant_string)
  
  # Look specifically for APVV-XXXX-XX pattern (like APVV-0088-12)
  pattern1 <- "APVV[-_]\\d{4}[-_](\\d{2})"
  match1 <- str_match(grant_str, regex(pattern1, ignore_case = TRUE))
  
  if (!is.na(match1[1,1]) && !is.na(match1[1,2])) {
    year_digits <- as.numeric(match1[1,2])
    year <- 2000 + year_digits
    if (year >= 2000 && year <= 2025) {
      return(year)
    }
  }
  
  # Look for APVV-XX-XXXX pattern (like APVV-15-0496)
  pattern2 <- "APVV[-_](\\d{2})[-_]\\d{4}"
  match2 <- str_match(grant_str, regex(pattern2, ignore_case = TRUE))
  
  if (!is.na(match2[1,1]) && !is.na(match2[1,2])) {
    year_digits <- as.numeric(match2[1,2])
    year <- 2000 + year_digits
    if (year >= 2000 && year <= 2025) {
      return(year)
    }
  }
  
  return(NA)
}

Grant Extraction Validation

# Test the specific case
test_apvv <- c("APVV-0088-12", "APVV-15-0496", "APVV-0842-11", "APVV-16-0079")

results <- data.frame(
  Grant_String = test_apvv,
  Extracted_Year = sapply(test_apvv, extract_apvv_year_simple)
)

kable(results, caption = "Grant Year Extraction Validation") %>%
  kable_styling(bootstrap_options = "striped", full_width = FALSE)
Grant Year Extraction Validation
Grant_String Extracted_Year
APVV-0088-12 APVV-0088-12 2012
APVV-15-0496 APVV-15-0496 2015
APVV-0842-11 APVV-0842-11 2011
APVV-16-0079 APVV-16-0079 2016

2. Data Loading and Preparation

# Load the real data
data_file <- "/Users/tomasoles/Library/CloudStorage/Dropbox/Research_Publications/slovak_university/wos_results/wos_results_full_sample.json"

load_real_data <- function(file_path) {
  cat("Loading data from:", file_path, "\n")
  
  # Load the JSON data
  df <- fromJSON(file_path)
  
  # Convert to dataframe if it's a list
  if (is.list(df) && !is.data.frame(df)) {
    df <- as.data.frame(df)
  }
  
  cat("Original data dimensions:", nrow(df), "rows,", ncol(df), "columns\n")
  
  # Keep only relevant document types
  keep_types <- c("Article", "Proceedings Paper", "Meeting Abstract")
  df <- df %>% filter(document_type %in% keep_types)
  
  cat("After filtering document types:", nrow(df), "rows\n")
  
  return(df)
}

# Load the data
tryCatch({
  raw_data <- load_real_data(data_file)
  cat("✓ Successfully loaded real data\n")
}, error = function(e) {
  cat("Could not load real data:", e$message, "\n")
  stop("Data loading failed - please check the file path")
})
## Loading data from: /Users/tomasoles/Library/CloudStorage/Dropbox/Research_Publications/slovak_university/wos_results/wos_results_full_sample.json 
## Original data dimensions: 359614 rows, 41 columns
## After filtering document types: 314331 rows
## ✓ Successfully loaded real data

3. Data Processing Pipeline

Step 1: Grant Year Extraction with Filtering (2000-2022)

# Extract grant years for each author WITH FILTERING (2000-2022)
grant_years <- raw_data %>%
  group_by(author_rid) %>%
  summarise(
    first_grant_year = {
      all_grants <- unique(grant_ids[!is.na(grant_ids) & grant_ids != "" & grant_ids != "nan"])
      if (length(all_grants) == 0) {
        NA
      } else {
        years <- map_dbl(all_grants, extract_apvv_year_simple)
        years <- years[!is.na(years)]
        # FILTER: Keep only years between 2000 and 2022
        years <- years[years >= 2000 & years <= 2022]
        if (length(years) > 0) min(years) else NA
      }
    },
    .groups = 'drop'
  )

cat("Grant extraction completed:\n")
## Grant extraction completed:
cat("  Authors with grants:", sum(!is.na(grant_years$first_grant_year)), "\n")
##   Authors with grants: 8727
cat("  Authors without grants:", sum(is.na(grant_years$first_grant_year)), "\n")
##   Authors without grants: 16322
# Show grant year distribution AFTER FILTERING
grant_dist <- grant_years %>%
  filter(!is.na(first_grant_year)) %>%
  count(first_grant_year) %>%
  arrange(first_grant_year)

kable(grant_dist, caption = "Grant Year Distribution (2000-2022 only)") %>%
  kable_styling(bootstrap_options = "striped")
Grant Year Distribution (2000-2022 only)
first_grant_year n
2000 5
2001 8
2002 1
2004 4
2006 239
2007 613
2009 1
2010 851
2011 903
2012 750
2014 742
2015 1435
2016 657
2017 653
2018 494
2019 390
2020 508
2021 268
2022 205

Step 2: Create Treatment Variables

# Merge grant years and create treatment variables
processed_data <- raw_data %>%
  left_join(grant_years, by = "author_rid") %>%
  mutate(
    first.treat = ifelse(is.na(first_grant_year), 0, first_grant_year),
    year = as.integer(year),
    treated = first.treat > 0
  )

cat("Treatment variables created:\n")
## Treatment variables created:
cat("  Treated authors:", sum(processed_data$treated, na.rm = TRUE), "\n")
##   Treated authors: 213401
cat("  Control authors:", sum(!processed_data$treated, na.rm = TRUE), "\n")
##   Control authors: 100930

Step 3: Create Author-Year Panel for DiD Analysis

# Create publication count per author-year
author_year_pubs <- processed_data %>%
  group_by(author_rid, year) %>%
  summarise(
    publications = n(),
    .groups = 'drop'
  )

# Merge with treatment info
treatment_info <- processed_data %>%
  distinct(author_rid, first.treat, first_grant_year)

final_data <- author_year_pubs %>%
  left_join(treatment_info, by = "author_rid") %>%
  rename(id = author_rid) %>%
  filter(!is.na(year), !is.na(first.treat), !is.na(publications))

cat("Final panel data created:\n")
## Final panel data created:
cat("  Total observations:", nrow(final_data), "\n")
##   Total observations: 112910
cat("  Unique authors:", n_distinct(final_data$id), "\n")
##   Unique authors: 25049
cat("  Time period:", min(final_data$year), "to", max(final_data$year), "\n")
##   Time period: 1979 to 2026

4. Data Quality Assessment

# Treatment group summary
treatment_summary <- final_data %>%
  distinct(id, first.treat) %>%
  count(first.treat) %>%
  arrange(first.treat)

kable(treatment_summary, caption = "Treatment Group Distribution") %>%
  kable_styling(bootstrap_options = "striped")
Treatment Group Distribution
first.treat n
0 16322
2000 5
2001 8
2002 1
2004 4
2006 239
2007 613
2009 1
2010 851
2011 903
2012 750
2014 742
2015 1435
2016 657
2017 653
2018 494
2019 390
2020 508
2021 268
2022 205
# Publication patterns
pub_stats <- final_data %>%
  group_by(treatment_status = ifelse(first.treat == 0, "Control", "Treated")) %>%
  summarise(
    Authors = n_distinct(id),
    Observations = n(),
    Mean_Publications = mean(publications),
    Median_Publications = median(publications),
    Zero_Publications = sum(publications == 0),
    .groups = 'drop'
  )

kable(pub_stats, caption = "Publication Patterns by Treatment Status") %>%
  kable_styling(bootstrap_options = "striped")
Publication Patterns by Treatment Status
treatment_status Authors Observations Mean_Publications Median_Publications Zero_Publications
Control 16322 47777 2.112523 1 0
Treated 8727 65133 3.276388 2 0

6. Traditional TWFE Estimation

# Prepare data for TWFE - ensure proper formatting
twfe_data <- final_data %>%
  mutate(
    post = as.integer(first.treat > 0 & year >= first.treat),
    author_factor = as.factor(id),
    year_factor = as.factor(year)
  )

# Standard TWFE specification
twfe_model <- feols(publications ~ post | author_factor + year_factor, 
                   data = twfe_data)

# Create numeric author ID for Bacon decomposition
twfe_data <- twfe_data %>%
  mutate(author_id = as.numeric(factor(id)))

# Print TWFE results
cat("=== TRADITIONAL TWFE RESULTS ===\n")
## === TRADITIONAL TWFE RESULTS ===
print(summary(twfe_model))
## OLS estimation, Dep. Var.: publications
## Observations: 104,747
## Fixed-effects: author_factor: 16,887,  year_factor: 41
## Standard-errors: IID 
##      Estimate Std. Error t value  Pr(>|t|)    
## post 0.429358   0.042919 10.0039 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 2.73233     Adj. R2: 0.648963
##                 Within R2: 0.001138
# Extract coefficients for plotting
twfe_coefs <- data.frame(
  method = "TWFE",
  estimate = coef(twfe_model)["post"],
  se = se(twfe_model)["post"],
  term = "Overall Effect"
)

# Simple event study for TWFE (if we have enough variation)
tryCatch({
  # Create relative time indicators for a simple event study
  event_study_data <- twfe_data %>%
    filter(first.treat > 0) %>%
    mutate(
      rel_time = year - first.treat,
      rel_time_factor = factor(rel_time)
    )
  
  # Simple event study with limited leads/lags
  twfe_event <- feols(publications ~ i(rel_time_factor, ref = "-1") | 
                       author_factor + year_factor,
                     data = event_study_data %>% 
                       filter(rel_time >= -3, rel_time <= 3))
  
  cat("\n=== SIMPLE TWFE EVENT STUDY ===\n")
  print(summary(twfe_event))
  
  # Extract event study coefficients
  twfe_event_coefs <- broom::tidy(twfe_event, conf.int = TRUE) %>%
    filter(str_detect(term, "rel_time_factor")) %>%
    mutate(
      time = as.numeric(str_extract(term, "[0-9+-]+")),
      method = "TWFE"
    )
  
}, error = function(e) {
  cat("TWFE event study failed:", e$message, "\n")
  twfe_event_coefs <- NULL
})
## 
## === SIMPLE TWFE EVENT STUDY ===
## OLS estimation, Dep. Var.: publications
## Observations: 18,421
## Fixed-effects: author_factor: 4,729,  year_factor: 23
## Standard-errors: IID 
##                      Estimate Std. Error   t value Pr(>|t|)    
## rel_time_factor::-3  0.150650   0.142818  1.054836 0.291519    
## rel_time_factor::-2  0.013673   0.118969  0.114931 0.908501    
## rel_time_factor::0  -0.105111   0.085989 -1.222369 0.221589    
## rel_time_factor::1  -0.219432   0.079809 -2.749473 0.005977 ** 
## rel_time_factor::2  -0.050284   0.071099 -0.707244 0.479427    
## ... 1 variable was removed because of collinearity (rel_time_factor::3)
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 2.57147     Adj. R2: 0.63651
##                 Within R2: 9.44e-4

7. Fixed Bacon Decomposition

# Create balanced panel subset for Bacon decomposition
bacon_data <- twfe_data %>%
  group_by(author_id) %>%
  filter(n() >= 3) %>%  # Keep only authors with at least 3 observations
  ungroup()

cat("Data for Bacon decomposition:", nrow(bacon_data), "observations\n")
## Data for Bacon decomposition: 96894 observations
cat("Unique authors for Bacon:", n_distinct(bacon_data$author_id), "\n")
## Unique authors for Bacon: 12960
# Perform Bacon decomposition with balanced data
tryCatch({
  bacon_results <- bacon(publications ~ post,
                        data = bacon_data,
                        id_var = "author_id",
                        time_var = "year")
  
  # Plot Bacon decomposition
  p_bacon_weights <- ggplot(bacon_results$weights, 
                           aes(x = weight, y = reorder(type, weight))) +
    geom_col(fill = "steelblue", alpha = 0.7) +
    labs(title = "Bacon Decomposition: Weights on 2x2 Comparisons",
         x = "Weight", y = "Comparison Type") +
    theme_minimal()
  
  p_bacon_effects <- ggplot(bacon_results$two_by_twos,
                           aes(x = estimate, y = reorder(type, estimate))) +
    geom_point(size = 3, color = "darkred") +
    geom_vline(xintercept = 0, linetype = "dashed") +
    labs(title = "Bacon Decomposition: 2x2 Treatment Effects",
         x = "Treatment Effect Estimate", y = "Comparison Type") +
    theme_minimal()
  
  # Combine Bacon plots
  p_bacon_combined <- p_bacon_weights / p_bacon_effects
  print(p_bacon_combined)
  
  # Display summary of Bacon decomposition
  cat("=== BACON DECOMPOSITION SUMMARY ===\n")
  print(bacon_results$omega_weighted)
  
}, error = function(e) {
  cat("Bacon decomposition failed:", e$message, "\n")
  
  # Alternative: Manual Bacon-style analysis
  cat("\n=== ALTERNATIVE: MANUAL COHORT ANALYSIS ===\n")
  
  cohort_effects <- twfe_data %>%
    filter(first.treat > 0) %>%
    group_by(first.treat) %>%
    summarise(
      pre_mean = mean(publications[year < first.treat]),
      post_mean = mean(publications[year >= first.treat]),
      effect = post_mean - pre_mean,
      n_authors = n_distinct(author_id),
      .groups = 'drop'
    )
  
  kable(cohort_effects, caption = "Manual Cohort Analysis") %>%
    kable_styling(bootstrap_options = "striped")
})
## Bacon decomposition failed: Unbalanced Panel 
## 
## === ALTERNATIVE: MANUAL COHORT ANALYSIS ===
Manual Cohort Analysis
first.treat pre_mean post_mean effect n_authors
2000 NaN 4.727273 NaN 5
2001 NaN 3.966667 NaN 8
2002 NaN 4.071429 NaN 1
2004 NaN 8.083333 NaN 4
2006 8.419491 4.624086 -3.7954053 239
2007 2.372777 3.407396 1.0346196 613
2009 NaN 1.500000 NaN 1
2010 2.355609 3.591031 1.2354219 851
2011 2.301922 3.600478 1.2985566 903
2012 2.075949 3.777201 1.7012517 750
2014 2.197519 3.745480 1.5479611 742
2015 2.293931 3.255987 0.9620556 1435
2016 2.359375 3.086428 0.7270533 657
2017 2.594901 3.221037 0.6261357 653
2018 2.519621 3.045094 0.5254733 494
2019 2.819005 3.019031 0.2000266 390
2020 2.654369 2.583539 -0.0708298 508
2021 2.492857 2.613828 0.1209709 268
2022 2.474771 2.734491 0.2597207 205

8. Sant’Anna & Callaway DiD Estimation

# Prepare clean data for Sant'Anna & Callaway DiD with proper numeric IDs
did_data_clean <- final_data %>%
  mutate(
    # Create numeric ID for did package
    id_num = as.numeric(factor(id)),
    year = as.numeric(year),
    first.treat = as.numeric(first.treat)
  ) %>%
  # Filter to reasonable time period for analysis
  filter(year >= 2000, year <= 2022) %>%
  # Remove any missing values that could cause issues
  filter(!is.na(publications), !is.na(year), !is.na(first.treat), !is.na(id_num))

cat("Clean data for Sant'Anna & Callaway DiD analysis:", nrow(did_data_clean), "observations\n")
## Clean data for Sant'Anna & Callaway DiD analysis: 88693 observations
cat("Unique authors:", n_distinct(did_data_clean$id_num), "\n")
## Unique authors: 21612
# Check treatment cohort sizes
cohort_sizes <- did_data_clean %>%
  filter(first.treat > 0) %>%
  distinct(id_num, first.treat) %>%
  count(first.treat) %>%
  arrange(first.treat)

cat("Treatment cohort sizes:\n")
## Treatment cohort sizes:
print(cohort_sizes)
## # A tibble: 19 × 2
##    first.treat     n
##          <dbl> <int>
##  1        2000     5
##  2        2001     8
##  3        2002     1
##  4        2004     4
##  5        2006   238
##  6        2007   613
##  7        2009     1
##  8        2010   851
##  9        2011   901
## 10        2012   750
## 11        2014   739
## 12        2015  1407
## 13        2016   618
## 14        2017   605
## 15        2018   421
## 16        2019   296
## 17        2020   346
## 18        2021   153
## 19        2022   105
# Use only cohorts with sufficient observations
viable_cohorts <- cohort_sizes %>%
  filter(n >= 10) %>%  # At least 10 authors per cohort
  pull(first.treat)

cat("Viable cohorts (n >= 10):", viable_cohorts, "\n")
## Viable cohorts (n >= 10): 2006 2007 2010 2011 2012 2014 2015 2016 2017 2018 2019 2020 2021 2022
if (length(viable_cohorts) == 0) {
  stop("No viable treatment cohorts with sufficient observations")
}

# Filter data to viable cohorts
did_data_final <- did_data_clean %>%
  filter(first.treat %in% viable_cohorts | first.treat == 0)

cat("Final Sant'Anna & Callaway DiD data:", nrow(did_data_final), "observations\n")
## Final Sant'Anna & Callaway DiD data: 88563 observations
cat("Final unique authors:", n_distinct(did_data_final$id_num), "\n")
## Final unique authors: 21593
# Estimate group-time average treatment effects with robust settings
tryCatch({
  sc_attgt <- att_gt(
    yname = "publications",
    tname = "year",
    idname = "id_num",
    gname = "first.treat",
    data = did_data_final,
    control_group = "nevertreated",  # Use never-treated for simplicity
    est_method = "reg",  # Use regression method (more stable)
    anticipation = 0,
    allow_unbalanced_panel = TRUE
  )
  
  # Display group-time ATTs
  cat("=== SANT'ANNA & CALLAWAY GROUP-TIME AVERAGE TREATMENT EFFECTS ===\n")
  print(summary(sc_attgt))
  
  # Set flag for successful DiD estimation
  sc_success <- TRUE
  
}, error = function(e) {
  cat("Sant'Anna & Callaway DiD estimation failed:", e$message, "\n")
  sc_success <- FALSE
})
## === SANT'ANNA & CALLAWAY GROUP-TIME AVERAGE TREATMENT EFFECTS ===
## 
## Call:
## att_gt(yname = "publications", tname = "year", idname = "id_num", 
##     gname = "first.treat", data = did_data_final, allow_unbalanced_panel = TRUE, 
##     control_group = "nevertreated", anticipation = 0, est_method = "reg")
## 
## Reference: Callaway, Brantly and Pedro H.C. Sant'Anna.  "Difference-in-Differences with Multiple Time Periods." Journal of Econometrics, Vol. 225, No. 2, pp. 200-230, 2021. <https://doi.org/10.1016/j.jeconom.2020.12.001>, <https://arxiv.org/abs/1803.09015> 
## 
## Group-Time Average Treatment Effects:
##  Group Time ATT(g,t) Std. Error [95% Simult.  Conf. Band]  
##   2006 2001   0.1592     1.1590       -3.9142      4.2327  
##   2006 2002  -0.9370     1.4755       -6.1226      4.2486  
##   2006 2003  -0.7347     1.2355       -5.0770      3.6076  
##   2006 2004   0.1969     1.0652       -3.5469      3.9408  
##   2006 2005   0.0218     0.9206       -3.2137      3.2574  
##   2006 2006  -4.0194     6.0148      -25.1588     17.1200  
##   2006 2007  -4.5862     6.5763      -27.6991     18.5267  
##   2006 2008  -4.5020     7.0147      -29.1558     20.1518  
##   2006 2009  -4.3782     6.8007      -28.2799     19.5234  
##   2006 2010  -4.2104     6.7802      -28.0401     19.6193  
##   2006 2011  -4.0404     6.9082      -28.3198     20.2390  
##   2006 2012  -3.8434     6.6822      -27.3286     19.6418  
##   2006 2013  -3.3246     5.7850      -23.6565     17.0073  
##   2006 2014  -2.9281     5.4536      -22.0952     16.2390  
##   2006 2015  -3.6955     5.8961      -24.4178     17.0268  
##   2006 2016  -3.1337     5.5821      -22.7523     16.4850  
##   2006 2017  -3.4915     5.8622      -24.0945     17.1115  
##   2006 2018  -2.7322     5.2232      -21.0894     15.6251  
##   2006 2019  -2.3230     5.1957      -20.5838     15.9378  
##   2006 2020  -2.7346     4.9869      -20.2614     14.7921  
##   2006 2021  -2.6853     5.4204      -21.7358     16.3651  
##   2006 2022  -3.3943     5.6688      -23.3178     16.5291  
##   2007 2001   0.4385     0.2737       -0.5233      1.4003  
##   2007 2002  -0.0354     0.2405       -0.8807      0.8099  
##   2007 2003  -0.0696     0.2232       -0.8541      0.7149  
##   2007 2004   0.0287     0.2493       -0.8473      0.9048  
##   2007 2005  -0.2310     0.2244       -1.0198      0.5577  
##   2007 2006   0.3787     0.2221       -0.4018      1.1592  
##   2007 2007   0.0017     0.2328       -0.8164      0.8197  
##   2007 2008   0.0456     0.2332       -0.7740      0.8653  
##   2007 2009   0.0999     0.2199       -0.6730      0.8727  
##   2007 2010   0.3421     0.2473       -0.5271      1.2113  
##   2007 2011   0.0416     0.2322       -0.7745      0.8576  
##   2007 2012   0.5789     0.2568       -0.3236      1.4814  
##   2007 2013   0.5539     0.2456       -0.3093      1.4171  
##   2007 2014   1.1429     0.2888        0.1279      2.1578 *
##   2007 2015   0.8939     0.2722       -0.0628      1.8506  
##   2007 2016   0.7595     0.2869       -0.2488      1.7678  
##   2007 2017   0.8109     0.2906       -0.2105      1.8323  
##   2007 2018   0.6394     0.2807       -0.3472      1.6261  
##   2007 2019   0.7294     0.3124       -0.3687      1.8275  
##   2007 2020   0.6202     0.2857       -0.3838      1.6243  
##   2007 2021   1.0134     0.3352       -0.1645      2.1914  
##   2007 2022   0.6431     0.2938       -0.3895      1.6757  
##   2010 2001  -0.5469     0.2985       -1.5959      0.5021  
##   2010 2002   0.1768     0.3185       -0.9427      1.2963  
##   2010 2003   0.1345     0.3335       -1.0378      1.3067  
##   2010 2004  -0.0265     0.2574       -0.9311      0.8782  
##   2010 2005   0.1695     0.2425       -0.6829      1.0220  
##   2010 2006  -0.0124     0.2089       -0.7464      0.7217  
##   2010 2007   0.1016     0.2101       -0.6369      0.8401  
##   2010 2008  -0.0969     0.2101       -0.8354      0.6417  
##   2010 2009   0.3054     0.1677       -0.2841      0.8950  
##   2010 2010   0.1792     0.1969       -0.5127      0.8712  
##   2010 2011  -0.1090     0.1779       -0.7344      0.5164  
##   2010 2012   0.0444     0.1791       -0.5849      0.6737  
##   2010 2013   0.3401     0.1677       -0.2492      0.9294  
##   2010 2014   0.6831     0.2040       -0.0339      1.4001  
##   2010 2015   0.7715     0.2009        0.0656      1.4775 *
##   2010 2016   0.5531     0.1988       -0.1456      1.2518  
##   2010 2017   0.7722     0.2379       -0.0639      1.6083  
##   2010 2018   0.7742     0.2208       -0.0019      1.5503  
##   2010 2019   0.8326     0.2365        0.0015      1.6637 *
##   2010 2020   1.1706     0.2378        0.3348      2.0065 *
##   2010 2021   1.0892     0.2601        0.1750      2.0033 *
##   2010 2022   0.8182     0.2374       -0.0162      1.6526  
##   2011 2001   0.5045     0.2559       -0.3947      1.4037  
##   2011 2002   0.1088     0.3091       -0.9777      1.1953  
##   2011 2003   0.0936     0.2615       -0.8255      1.0127  
##   2011 2004  -0.1269     0.2808       -1.1139      0.8601  
##   2011 2005  -0.1485     0.2564       -1.0495      0.7526  
##   2011 2006   0.2125     0.1961       -0.4766      0.9017  
##   2011 2007   0.0290     0.2090       -0.7057      0.7637  
##   2011 2008   0.1490     0.2307       -0.6619      0.9599  
##   2011 2009   0.2057     0.2179       -0.5603      0.9717  
##   2011 2010  -0.2768     0.1790       -0.9060      0.3524  
##   2011 2011   0.3341     0.1619       -0.2350      0.9032  
##   2011 2012   0.4297     0.1669       -0.1567      1.0162  
##   2011 2013   0.4240     0.1765       -0.1961      1.0442  
##   2011 2014   0.8770     0.1879        0.2166      1.5374 *
##   2011 2015   0.8398     0.1810        0.2037      1.4758 *
##   2011 2016   0.9777     0.2180        0.2114      1.7441 *
##   2011 2017   0.9275     0.2344        0.1037      1.7514 *
##   2011 2018   1.2614     0.2003        0.5573      1.9655 *
##   2011 2019   1.4452     0.2804        0.4597      2.4307 *
##   2011 2020   1.5562     0.2946        0.5209      2.5914 *
##   2011 2021   1.4485     0.2593        0.5373      2.3597 *
##   2011 2022   1.1954     0.2137        0.4442      1.9466 *
##   2012 2001   0.5433     0.3911       -0.8312      1.9178  
##   2012 2002  -0.0423     0.3152       -1.1500      1.0654  
##   2012 2003   0.1318     0.2737       -0.8300      1.0936  
##   2012 2004  -0.3471     0.3196       -1.4703      0.7762  
##   2012 2005   0.3282     0.3139       -0.7750      1.4315  
##   2012 2006  -0.0960     0.2288       -0.8999      0.7080  
##   2012 2007   0.1515     0.2434       -0.7040      1.0070  
##   2012 2008   0.2205     0.2832       -0.7748      1.2159  
##   2012 2009   0.0615     0.3070       -1.0176      1.1406  
##   2012 2010  -0.1658     0.2383       -1.0035      0.6718  
##   2012 2011  -0.0186     0.2128       -0.7663      0.7292  
##   2012 2012   0.3763     0.1899       -0.2912      1.0439  
##   2012 2013   0.2921     0.1845       -0.3564      0.9406  
##   2012 2014   0.8932     0.2142        0.1405      1.6458 *
##   2012 2015   0.6134     0.2048       -0.1062      1.3331  
##   2012 2016   0.7477     0.1935        0.0678      1.4276 *
##   2012 2017   1.0660     0.2344        0.2421      1.8900 *
##   2012 2018   1.7533     0.2777        0.7773      2.7292 *
##   2012 2019   1.9810     0.2616        1.0615      2.9006 *
##   2012 2020   2.1254     0.2883        1.1122      3.1386 *
##   2012 2021   2.0916     0.3000        1.0374      3.1458 *
##   2012 2022   1.9581     0.3121        0.8612      3.0550 *
##   2014 2001   0.4163     0.3492       -0.8109      1.6435  
##   2014 2002   0.0943     0.3698       -1.2054      1.3939  
##   2014 2003   0.1440     0.3868       -1.2154      1.5035  
##   2014 2004  -0.4716     0.3685       -1.7668      0.8236  
##   2014 2005   0.5588     0.4716       -1.0986      2.2162  
##   2014 2006  -0.3626     0.4714       -2.0192      1.2940  
##   2014 2007   0.0839     0.3681       -1.2097      1.3776  
##   2014 2008  -0.1706     0.3356       -1.3501      1.0089  
##   2014 2009   0.1461     0.2365       -0.6852      0.9774  
##   2014 2010   0.2863     0.2227       -0.4966      1.0692  
##   2014 2011  -0.1214     0.2310       -0.9332      0.6904  
##   2014 2012   0.0061     0.2253       -0.7856      0.7979  
##   2014 2013   0.4732     0.2880       -0.5391      1.4855  
##   2014 2014   0.2564     0.2887       -0.7582      1.2710  
##   2014 2015   0.1750     0.2673       -0.7645      1.1146  
##   2014 2016   0.0630     0.2766       -0.9090      1.0350  
##   2014 2017   0.3833     0.2986       -0.6661      1.4327  
##   2014 2018   0.8363     0.3300       -0.3236      1.9963  
##   2014 2019   1.1700     0.3698       -0.1297      2.4697  
##   2014 2020   1.5260     0.4004        0.1187      2.9334 *
##   2014 2021   1.3199     0.3127        0.2208      2.4190 *
##   2014 2022   1.0404     0.3478       -0.1820      2.2628  
##   2015 2001  -0.4980     0.3182       -1.6164      0.6204  
##   2015 2002   0.2358     0.2219       -0.5439      1.0155  
##   2015 2003  -0.1451     0.2481       -1.0172      0.7270  
##   2015 2004   0.2112     0.2604       -0.7041      1.1266  
##   2015 2005  -0.1371     0.2660       -1.0718      0.7977  
##   2015 2006   0.1143     0.2087       -0.6191      0.8476  
##   2015 2007   0.3352     0.2628       -0.5882      1.2587  
##   2015 2008  -0.3503     0.2869       -1.3586      0.6580  
##   2015 2009   0.3536     0.1613       -0.2133      0.9204  
##   2015 2010   0.1902     0.1784       -0.4368      0.8171  
##   2015 2011   0.0120     0.1720       -0.5926      0.6166  
##   2015 2012   0.0460     0.1493       -0.4787      0.5707  
##   2015 2013   0.4456     0.1880       -0.2151      1.1063  
##   2015 2014  -0.1164     0.1538       -0.6571      0.4242  
##   2015 2015   0.3677     0.1577       -0.1865      0.9220  
##   2015 2016  -0.0049     0.1460       -0.5179      0.5081  
##   2015 2017   0.2346     0.1585       -0.3225      0.7917  
##   2015 2018   0.3754     0.1634       -0.1990      0.9498  
##   2015 2019   0.5600     0.1992       -0.1401      1.2601  
##   2015 2020   0.5063     0.2002       -0.1973      1.2099  
##   2015 2021   0.6392     0.1956       -0.0483      1.3268  
##   2015 2022   0.7874     0.2294       -0.0190      1.5938  
##   2016 2001  -1.2914     1.7075       -7.2925      4.7097  
##   2016 2002   0.5292     0.5814       -1.5141      2.5725  
##   2016 2003  -0.8530     0.3369       -2.0370      0.3311  
##   2016 2004   0.6031     0.2682       -0.3397      1.5458  
##   2016 2005   0.3058     0.3658       -0.9799      1.5915  
##   2016 2006  -0.3526     0.4091       -1.7903      1.0851  
##   2016 2007   0.0413     0.3526       -1.1979      1.2804  
##   2016 2008  -0.1020     0.2181       -0.8684      0.6644  
##   2016 2009   0.5685     0.2708       -0.3833      1.5203  
##   2016 2010  -0.4189     0.3070       -1.4977      0.6599  
##   2016 2011   0.0936     0.3440       -1.1155      1.3027  
##   2016 2012   0.2101     0.2467       -0.6570      1.0773  
##   2016 2013  -0.0928     0.2075       -0.8220      0.6364  
##   2016 2014   0.2063     0.2410       -0.6406      1.0532  
##   2016 2015   0.4638     0.3076       -0.6172      1.5448  
##   2016 2016  -0.1018     0.2916       -1.1266      0.9229  
##   2016 2017  -0.3730     0.3289       -1.5288      0.7828  
##   2016 2018  -0.0717     0.3816       -1.4130      1.2695  
##   2016 2019  -0.2185     0.3597       -1.4827      1.0456  
##   2016 2020  -0.1546     0.3544       -1.4003      1.0911  
##   2016 2021  -0.0054     0.3603       -1.2719      1.2610  
##   2016 2022   0.1113     0.3843       -1.2392      1.4618  
##   2017 2001  -0.0606     0.7160       -2.5771      2.4558  
##   2017 2002   0.9889     0.5287       -0.8693      2.8471  
##   2017 2003  -0.6116     0.8113       -3.4632      2.2399  
##   2017 2004  -0.1621     0.6147       -2.3224      1.9982  
##   2017 2005  -0.2961     0.4100       -1.7372      1.1450  
##   2017 2006   0.2130     0.3243       -0.9266      1.3527  
##   2017 2007   0.0775     0.5744       -1.9414      2.0964  
##   2017 2008  -0.4098     0.5715       -2.4186      1.5989  
##   2017 2009   0.9771     0.3815       -0.3638      2.3179  
##   2017 2010  -0.6814     0.3591       -1.9434      0.5805  
##   2017 2011   0.1006     0.2953       -0.9371      1.1383  
##   2017 2012  -0.0518     0.2566       -0.9538      0.8502  
##   2017 2013   0.4942     0.2843       -0.5049      1.4933  
##   2017 2014   0.0910     0.2690       -0.8544      1.0363  
##   2017 2015   0.1836     0.2630       -0.7408      1.1081  
##   2017 2016   0.1663     0.2587       -0.7430      1.0757  
##   2017 2017  -0.2616     0.2497       -1.1391      0.6159  
##   2017 2018  -0.0062     0.2788       -0.9859      0.9735  
##   2017 2019  -0.0586     0.2695       -1.0057      0.8885  
##   2017 2020  -0.0214     0.2740       -0.9844      0.9416  
##   2017 2021  -0.0277     0.3431       -1.2336      1.1782  
##   2017 2022   0.1796     0.4938       -1.5557      1.9150  
##   2018 2001  -2.0694     0.8946       -5.2134      1.0746  
##   2018 2002   1.1818     0.5978       -0.9193      3.2828  
##   2018 2003   0.4291     0.6563       -1.8777      2.7358  
##   2018 2004  -1.3008     0.5086       -3.0884      0.4869  
##   2018 2005   0.4283     0.3380       -0.7595      1.6161  
##   2018 2006   0.0634     0.3889       -1.3035      1.4302  
##   2018 2007  -0.0564     0.4109       -1.5004      1.3877  
##   2018 2008   0.4211     0.5821       -1.6247      2.4670  
##   2018 2009   0.0059     0.5916       -2.0733      2.0851  
##   2018 2010  -0.3342     0.3041       -1.4031      0.7348  
##   2018 2011   1.2435     1.0201       -2.3419      4.8289  
##   2018 2012  -1.2274     1.0554       -4.9367      2.4818  
##   2018 2013   0.2111     0.2334       -0.6092      1.0315  
##   2018 2014   0.3243     0.3299       -0.8354      1.4839  
##   2018 2015  -0.1561     0.2859       -1.1610      0.8488  
##   2018 2016   0.6241     0.5030       -1.1438      2.3920  
##   2018 2017  -0.5982     0.4495       -2.1779      0.9815  
##   2018 2018   0.2102     0.2672       -0.7289      1.1494  
##   2018 2019   0.0955     0.2810       -0.8922      1.0831  
##   2018 2020   0.5032     0.3283       -0.6506      1.6570  
##   2018 2021   0.4377     0.3518       -0.7986      1.6740  
##   2018 2022   0.4727     0.3817       -0.8687      1.8141  
##   2019 2001   2.4163     0.2428        1.5629      3.2697 *
##   2019 2002  -1.8682     0.4916       -3.5961     -0.1404 *
##   2019 2003  -0.5077     0.5150       -2.3179      1.3024  
##   2019 2004   0.9630     0.7156       -1.5522      3.4781  
##   2019 2005  -0.0429     1.2427       -4.4105      4.3248  
##   2019 2006  -0.8151     1.2021       -5.0400      3.4098  
##   2019 2007   0.3497     0.3408       -0.8482      1.5476  
##   2019 2008   0.2848     0.4368       -1.2505      1.8200  
##   2019 2009  -0.1339     0.3444       -1.3444      1.0765  
##   2019 2010  -0.0284     0.2897       -1.0466      0.9898  
##   2019 2011   0.6240     0.7670       -2.0718      3.3197  
##   2019 2012  -0.3927     0.4909       -2.1179      1.3326  
##   2019 2013   0.6966     0.4378       -0.8422      2.2354  
##   2019 2014  -0.5113     0.4512       -2.0970      1.0743  
##   2019 2015   0.1435     0.4551       -1.4561      1.7431  
##   2019 2016   0.9086     0.4572       -0.6983      2.5155  
##   2019 2017  -0.9114     0.4891       -2.6303      0.8074  
##   2019 2018   0.0459     0.3632       -1.2306      1.3224  
##   2019 2019  -0.5336     0.3059       -1.6088      0.5416  
##   2019 2020  -0.0025     0.3865       -1.3609      1.3559  
##   2019 2021   0.4136     0.3888       -0.9528      1.7800  
##   2019 2022  -0.0661     0.3623       -1.3394      1.2073  
##   2020 2001  -0.5670     0.5083       -2.3534      1.2193  
##   2020 2002   0.4056     0.4572       -1.2012      2.0124  
##   2020 2003   0.7065     0.5737       -1.3097      2.7228  
##   2020 2004   0.1031     0.7987       -2.7040      2.9102  
##   2020 2005  -0.1000     0.9572       -3.4641      3.2641  
##   2020 2006  -0.3218     0.8536       -3.3217      2.6781  
##   2020 2007   0.2853     0.5355       -1.5969      2.1675  
##   2020 2008  -0.6405     0.3598       -1.9050      0.6241  
##   2020 2009   0.9243     0.4545       -0.6730      2.5217  
##   2020 2010  -0.1049     0.4413       -1.6558      1.4460  
##   2020 2011  -0.4790     0.4425       -2.0341      1.0761  
##   2020 2012  -0.3833     0.2854       -1.3863      0.6196  
##   2020 2013   0.0973     0.3218       -1.0338      1.2284  
##   2020 2014   0.5823     0.3436       -0.6253      1.7898  
##   2020 2015   0.5108     0.4985       -1.2412      2.2628  
##   2020 2016  -0.2055     0.6234       -2.3965      1.9855  
##   2020 2017  -0.5278     0.6665       -2.8704      1.8149  
##   2020 2018   0.6030     0.3219       -0.5284      1.7344  
##   2020 2019  -0.3564     0.3151       -1.4638      0.7509  
##   2020 2020   0.0029     0.2805       -0.9831      0.9889  
##   2020 2021  -0.3051     0.3193       -1.4272      0.8170  
##   2020 2022  -0.0233     0.4849       -1.7275      1.6810  
##   2021 2001   0.9330     1.0365       -2.7101      4.5760  
##   2021 2002  -1.7849     1.1232       -5.7325      2.1627  
##   2021 2003  -0.2697     0.3141       -1.3735      0.8342  
##   2021 2004   0.0531     0.2474       -0.8165      0.9227  
##   2021 2005   1.3492     0.7282       -1.2102      3.9086  
##   2021 2006  -0.4293     0.9214       -3.6678      2.8092  
##   2021 2007  -0.2639     0.9248       -3.5141      2.9862  
##   2021 2008  -0.2380     0.2686       -1.1818      0.7059  
##   2021 2009   0.2206     0.4094       -1.2183      1.6594  
##   2021 2010   0.9539     0.4742       -0.7126      2.6205  
##   2021 2011  -0.6597     0.7736       -3.3787      2.0594  
##   2021 2012   0.2795     0.6223       -1.9077      2.4667  
##   2021 2013  -0.3611     0.5110       -2.1568      1.4347  
##   2021 2014   0.3546     0.3463       -0.8623      1.5716  
##   2021 2015  -0.1772     0.3559       -1.4281      1.0737  
##   2021 2016  -0.0866     0.2638       -1.0136      0.8405  
##   2021 2017   0.4094     0.3800       -0.9263      1.7451  
##   2021 2018  -0.2541     0.3855       -1.6088      1.1007  
##   2021 2019   0.2768     0.3559       -0.9739      1.5275  
##   2021 2020  -0.2310     0.3023       -1.2935      0.8315  
##   2021 2021   0.4905     0.6597       -1.8282      2.8091  
##   2021 2022   0.4805     0.8118       -2.3727      3.3336  
##   2022 2001  -0.2337     0.8592       -3.2534      2.7859  
##   2022 2002  -0.6182     0.4374       -2.1557      0.9192  
##   2022 2003   1.0637     0.9977       -2.4429      4.5702  
##   2022 2004  -0.5469     1.1351       -4.5363      3.4425  
##   2022 2005  -0.4794     0.4102       -1.9211      0.9623  
##   2022 2006   0.2612     0.3321       -0.9060      1.4284  
##   2022 2007   1.0095     0.9083       -2.1826      4.2017  
##   2022 2008  -0.9400     0.9658       -4.3343      2.4543  
##   2022 2009   0.4559     0.4731       -1.2067      2.1185  
##   2022 2010  -0.3925     0.4085       -1.8283      1.0433  
##   2022 2011   0.4877     0.3475       -0.7336      1.7090  
##   2022 2012  -0.1848     0.2997       -1.2380      0.8684  
##   2022 2013   0.4599     0.4229       -1.0266      1.9463  
##   2022 2014   0.0681     0.4707       -1.5861      1.7224  
##   2022 2015  -0.0618     0.4045       -1.4834      1.3598  
##   2022 2016   1.3574     0.7813       -1.3886      4.1035  
##   2022 2017  -1.2489     0.6598       -3.5676      1.0699  
##   2022 2018  -0.0142     0.4046       -1.4363      1.4079  
##   2022 2019  -0.2513     0.4018       -1.6635      1.1609  
##   2022 2020   0.2667     0.5536       -1.6790      2.2124  
##   2022 2021   0.5161     0.5147       -1.2927      2.3250  
##   2022 2022   0.1612     0.3094       -0.9261      1.2484  
## ---
## Signif. codes: `*' confidence band does not cover 0
## 
## P-value for pre-test of parallel trends assumption:  0
## Control Group:  Never Treated,  Anticipation Periods:  0
## Estimation Method:  Outcome Regression
## NULL

9. Dynamic Effects with Sant’Anna & Callaway Estimator

# Check if we have successful Sant'Anna & Callaway model
if (exists("sc_success") && sc_success) {
  
  # Aggregate to dynamic effects with error handling
  tryCatch({
    sc_dynamic <- aggte(sc_attgt, type = "dynamic", na.rm = TRUE)
    
    cat("=== DYNAMIC TREATMENT EFFECTS (SANT'ANNA & CALLAWAY) ===\n")
    print(summary(sc_dynamic))
    
    # Extract dynamic effects from Sant'Anna & Callaway DiD
    sc_dynamic_df <- data.frame(
      time = sc_dynamic$egt,
      estimate = sc_dynamic$att.egt,
      se = sc_dynamic$se.egt,
      method = "Sant'Anna & Callaway"
    ) %>%
      mutate(
        conf.low = estimate - 1.96 * se,
        conf.high = estimate + 1.96 * se
      )
    
    # Plot Sant'Anna & Callaway dynamic effects
    p_sc_dynamic <- ggdid(sc_dynamic) +
      labs(title = "Dynamic Treatment Effects - Sant'Anna & Callaway Estimator",
           subtitle = "Event study plot from robust DiD estimation") +
      theme_minimal()
    
    print(p_sc_dynamic)
    
    # Compare with TWFE if available
    if (exists("twfe_event_coefs") && !is.null(twfe_event_coefs)) {
      # Combine TWFE and Sant'Anna & Callaway dynamic effects
      dynamic_comparison <- bind_rows(
        twfe_event_coefs %>% select(time, estimate, conf.low, conf.high, method),
        sc_dynamic_df %>% select(time, estimate, conf.low, conf.high, method)
      ) %>%
        filter(!is.na(time))
      
      # Plot comparison
      p_dynamic_comparison <- ggplot(dynamic_comparison, 
                                    aes(x = time, y = estimate, 
                                        color = method, fill = method)) +
        geom_point(position = position_dodge(width = 0.3), size = 2.5) +
        geom_errorbar(aes(ymin = conf.low, ymax = conf.high),
                      position = position_dodge(width = 0.3), 
                      width = 0.2) +
        geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
        geom_vline(xintercept = -0.5, linetype = "dashed", color = "gray") +
        labs(title = "Comparison: Dynamic Treatment Effects",
             subtitle = "TWFE vs Sant'Anna & Callaway Estimator with Slovak Grant Data",
             x = "Time to Treatment", y = "Treatment Effect (Publications)",
             color = "Method", fill = "Method") +
        theme_minimal() +
        theme(legend.position = "bottom")
      
      print(p_dynamic_comparison)
    }
    
  }, error = function(e) {
    cat("Dynamic effects aggregation failed:", e$message, "\n")
  })
  
} else {
  cat("Cannot compute dynamic effects - Sant'Anna & Callaway model failed to estimate\n")
  
  # Create alternative visualization
  p_treatment_timing <- final_data %>%
    filter(first.treat > 0) %>%
    distinct(id, first.treat) %>%
    ggplot(aes(x = first.treat)) +
    geom_histogram(binwidth = 1, fill = "steelblue", alpha = 0.7) +
    labs(title = "Distribution of Treatment Timing",
         subtitle = "When authors received their first grant",
         x = "Year of First Grant", y = "Number of Authors") +
    theme_minimal()
  
  print(p_treatment_timing)
}
## === DYNAMIC TREATMENT EFFECTS (SANT'ANNA & CALLAWAY) ===
## 
## Call:
## aggte(MP = sc_attgt, type = "dynamic", na.rm = TRUE)
## 
## Reference: Callaway, Brantly and Pedro H.C. Sant'Anna.  "Difference-in-Differences with Multiple Time Periods." Journal of Econometrics, Vol. 225, No. 2, pp. 200-230, 2021. <https://doi.org/10.1016/j.jeconom.2020.12.001>, <https://arxiv.org/abs/1803.09015> 
## 
## 
## Overall summary of ATT's based on event-study/dynamic aggregation:  
##     ATT    Std. Error     [ 95%  Conf. Int.] 
##  0.1616        0.8333    -1.4717      1.7949 
## 
## 
## Dynamic Effects:
##  Event time Estimate Std. Error [95% Simult.  Conf. Band]  
##         -21  -0.2337     0.8722       -2.7066      2.2392  
##         -20   0.3017     0.6121       -1.4339      2.0372  
##         -19  -0.5921     0.4205       -1.7842      0.6001  
##         -18   0.8410     0.2361        0.1715      1.5105 *
##         -17  -0.9250     0.3395       -1.8877      0.0376  
##         -16   0.3012     0.2745       -0.4770      1.0793  
##         -15   0.1068     0.4479       -1.1631      1.3766  
##         -14  -0.3934     0.2084       -0.9844      0.1976  
##         -13   0.0135     0.1435       -0.3932      0.4203  
##         -12  -0.0085     0.1077       -0.3138      0.2968  
##         -11   0.3137     0.1146       -0.0112      0.6386  
##         -10  -0.0487     0.1177       -0.3824      0.2851  
##          -9   0.0003     0.0993       -0.2811      0.2817  
##          -8   0.0792     0.0957       -0.1921      0.3505  
##          -7   0.0338     0.1022       -0.2558      0.3234  
##          -6   0.0282     0.0855       -0.2142      0.2706  
##          -5   0.1041     0.0781       -0.1173      0.3255  
##          -4   0.0922     0.0828       -0.1426      0.3270  
##          -3   0.0047     0.0814       -0.2261      0.2354  
##          -2   0.1275     0.0758       -0.0873      0.3423  
##          -1   0.0577     0.0723       -0.1474      0.2628  
##           0   0.0360     0.1840       -0.4856      0.5575  
##           1  -0.0824     0.1984       -0.6450      0.4801  
##           2   0.0902     0.2107       -0.5071      0.6876  
##           3   0.2064     0.2192       -0.4150      0.8279  
##           4   0.3381     0.2336       -0.3241      1.0002  
##           5   0.5077     0.2486       -0.1971      1.2125  
##           6   0.6772     0.2614       -0.0640      1.4183  
##           7   0.9587     0.2519        0.2446      1.6729 *
##           8   1.0203     0.3180        0.1188      1.9217 *
##           9   0.9739     0.4385       -0.2694      2.2172  
##          10   1.0502     0.3939       -0.0665      2.1668  
##          11   0.6012     0.5291       -0.8989      2.1013  
##          12   0.2898     0.7397       -1.8073      2.3869  
##          13  -0.2029     1.4373       -4.2779      3.8721  
##          14  -0.0348     1.4106       -4.0341      3.9645  
##          15  -0.2878     1.5194       -4.5955      4.0200  
##          16  -3.3943     5.6500      -19.4129     12.6242  
## ---
## Signif. codes: `*' confidence band does not cover 0
## 
## Control Group:  Never Treated,  Anticipation Periods:  0
## Estimation Method:  Outcome Regression
## NULL

10. Overall Treatment Effect Comparison

# Create overall comparison table
overall_comparison <- data.frame(
  Method = "TWFE",
  Estimate = coef(twfe_model)["post"],
  SE = se(twfe_model)["post"],
  Observations = nobs(twfe_model),
  stringsAsFactors = FALSE
)

if (exists("sc_success") && sc_success) {
  # Aggregate overall effects from Sant'Anna & Callaway with error handling
  tryCatch({
    sc_overall <- aggte(sc_attgt, type = "group", na.rm = TRUE)
    
    overall_comparison <- bind_rows(
      overall_comparison,
      data.frame(
        Method = "Sant'Anna & Callaway",
        Estimate = sc_overall$overall.att,
        SE = sc_overall$overall.se,
        Observations = sc_overall$DIDparams$n
      )
    )
    
    cat("=== OVERALL AVERAGE TREATMENT EFFECT (SANT'ANNA & CALLAWAY) ===\n")
    print(summary(sc_overall))
    
  }, error = function(e) {
    cat("Overall Sant'Anna & Callaway aggregation failed:", e$message, "\n")
  })
}
## === OVERALL AVERAGE TREATMENT EFFECT (SANT'ANNA & CALLAWAY) ===
## 
## Call:
## aggte(MP = sc_attgt, type = "group", na.rm = TRUE)
## 
## Reference: Callaway, Brantly and Pedro H.C. Sant'Anna.  "Difference-in-Differences with Multiple Time Periods." Journal of Econometrics, Vol. 225, No. 2, pp. 200-230, 2021. <https://doi.org/10.1016/j.jeconom.2020.12.001>, <https://arxiv.org/abs/1803.09015> 
## 
## 
## Overall summary of ATT's based on group/cohort aggregation:  
##     ATT    Std. Error     [ 95%  Conf. Int.]  
##  0.3861        0.1792     0.0349      0.7373 *
## 
## 
## Group Effects:
##  Group Estimate Std. Error [95% Simult.  Conf. Band]  
##   2006  -3.5308     5.9879      -20.2297     13.1682  
##   2007   0.5573     0.2092       -0.0261      1.1407  
##   2010   0.6092     0.1610        0.1603      1.0581 *
##   2011   0.9764     0.1506        0.5564      1.3963 *
##   2012   1.2635     0.1722        0.7833      1.7437 *
##   2014   0.7523     0.2706       -0.0024      1.5069  
##   2015   0.4332     0.1451        0.0286      0.8378 *
##   2016  -0.1163     0.3121       -0.9865      0.7540  
##   2017  -0.0327     0.2495       -0.7285      0.6632  
##   2018   0.3439     0.2446       -0.3382      1.0259  
##   2019  -0.0471     0.2895       -0.8544      0.7601  
##   2020  -0.1085     0.3150       -0.9869      0.7699  
##   2021   0.4855     0.6492       -1.3249      2.2958  
##   2022   0.1612     0.3282       -0.7541      1.0764  
## ---
## Signif. codes: `*' confidence band does not cover 0
## 
## Control Group:  Never Treated,  Anticipation Periods:  0
## Estimation Method:  Outcome Regression
## NULL
# Calculate confidence intervals and significance
overall_comparison <- overall_comparison %>%
  mutate(
    CI_lower = Estimate - 1.96 * SE,
    CI_upper = Estimate + 1.96 * SE,
    Significant = ifelse(CI_lower > 0 | CI_upper < 0, "Yes", "No"),
    Percent_Change = (Estimate / mean(final_data$publications)) * 100
  )

kable(overall_comparison, digits = 3, 
      caption = "Overall Treatment Effect Comparison - Sant'Anna & Callaway vs TWFE") %>%
  kable_styling(bootstrap_options = "striped", full_width = FALSE) %>%
  row_spec(which(overall_comparison$Significant == "Yes"), 
           background = "#E6F3FF")
Overall Treatment Effect Comparison - Sant’Anna & Callaway vs TWFE
Method Estimate SE Observations CI_lower CI_upper Significant Percent_Change
post TWFE 0.429 0.043 104747 0.345 0.513 Yes 15.423
…2 Sant’Anna & Callaway 0.386 0.179 21593 0.035 0.737 Yes 13.869

11. Comprehensive Results Visualization

# Create a comprehensive visualization dashboard
visualization_list <- list()

# 1. Treatment cohort distribution
p_cohort_dist <- final_data %>%
  filter(first.treat > 0) %>%
  distinct(id, first.treat) %>%
  ggplot(aes(x = first.treat)) +
  geom_histogram(binwidth = 1, fill = "steelblue", alpha = 0.7) +
  labs(title = "A) Distribution of First Grant Years",
       x = "Year of First Grant", y = "Number of Authors") +
  theme_minimal()

# 2. Publication trends by treatment status
p_trends_simple <- final_data %>%
  filter(year >= 1995 & year <= 2024) %>%
  group_by(year, treated = first.treat > 0) %>%
  summarise(mean_publications = mean(publications), .groups = 'drop') %>%
  ggplot(aes(x = year, y = mean_publications, color = treated)) +
  geom_line(size = 1) +
  geom_point() +
  labs(title = "B) Publication Trends by Treatment Status",
       x = "Year", y = "Mean Publications", color = "Treated") +
  theme_minimal()

# Arrange basic plots
basic_dashboard <- p_cohort_dist | p_trends_simple
print(basic_dashboard)

# Add overall comparison if available
if (nrow(overall_comparison) > 1) {
  p_overall_compare <- overall_comparison %>%
    ggplot(aes(x = Method, y = Estimate, fill = Method)) +
    geom_col(alpha = 0.7) +
    geom_errorbar(aes(ymin = CI_lower, ymax = CI_upper), width = 0.2) +
    labs(title = "C) Overall Treatment Effect Comparison",
         subtitle = "TWFE vs Sant'Anna & Callaway Estimator",
         x = "Method", y = "Treatment Effect (Publications)") +
    theme_minimal() +
    theme(legend.position = "none")
  
  print(p_overall_compare)
}

Event Study Analysis: Two Decades of Slovak Research Grants

Sant’Anna & Callaway Event Study Estimation

# Prepare data for Sant'Anna & Callaway event study
event_study_data <- final_data %>%
  mutate(
    id_num = as.numeric(factor(id)),
    year = as.numeric(year),
    first_treat = as.numeric(first.treat)
  ) %>%
  filter(
    year >= 1995 & year <= 2020,
    first_treat >= 2000,
    !is.na(publications),
    !is.na(year),
    !is.na(first_treat)
  )

cat("Sant'Anna & Callaway Event Study Dataset:\n")
## Sant'Anna & Callaway Event Study Dataset:
cat("  Time period:", min(event_study_data$year), "-", max(event_study_data$year), "\n")
##   Time period: 1995 - 2020
cat("  Observations:", nrow(event_study_data), "\n")
##   Observations: 44918
cat("  Unique authors:", n_distinct(event_study_data$id_num), "\n")
##   Unique authors: 7420
# Estimate event study using Sant'Anna & Callaway estimator
sc_event_attgt <- att_gt(
  yname = "publications",
  tname = "year",
  idname = "id_num",
  gname = "first_treat",
  data = event_study_data,
  control_group = "notyettreated",
  est_method = "dr",
  anticipation = 0,
  allow_unbalanced_panel = TRUE
)

# Aggregate to event study
sc_event_study <- aggte(
  sc_event_attgt, 
  type = "dynamic", 
  na.rm = TRUE,
  min_e = -10,
  max_e = 10
)

cat("=== SANT'ANNA & CALLAWAY EVENT STUDY RESULTS ===\n")
## === SANT'ANNA & CALLAWAY EVENT STUDY RESULTS ===
print(summary(sc_event_study))
## 
## Call:
## aggte(MP = sc_event_attgt, type = "dynamic", min_e = -10, max_e = 10, 
##     na.rm = TRUE)
## 
## Reference: Callaway, Brantly and Pedro H.C. Sant'Anna.  "Difference-in-Differences with Multiple Time Periods." Journal of Econometrics, Vol. 225, No. 2, pp. 200-230, 2021. <https://doi.org/10.1016/j.jeconom.2020.12.001>, <https://arxiv.org/abs/1803.09015> 
## 
## 
## Overall summary of ATT's based on event-study/dynamic aggregation:  
##     ATT    Std. Error     [ 95%  Conf. Int.] 
##  0.1945        0.3589    -0.5089      0.8978 
## 
## 
## Dynamic Effects:
##  Event time Estimate Std. Error [95% Simult.  Conf. Band] 
##         -10   0.0994     0.1402       -0.2835      0.4823 
##          -9  -0.0770     0.1234       -0.4140      0.2600 
##          -8   0.1245     0.1369       -0.2494      0.4984 
##          -7  -0.0585     0.1177       -0.3799      0.2628 
##          -6  -0.0058     0.1022       -0.2849      0.2733 
##          -5   0.0769     0.0870       -0.1607      0.3144 
##          -4   0.0320     0.0903       -0.2147      0.2788 
##          -3  -0.0135     0.0848       -0.2451      0.2181 
##          -2   0.0255     0.0924       -0.2270      0.2780 
##          -1  -0.0528     0.0875       -0.2920      0.1863 
##           0  -0.0016     0.2031       -0.5563      0.5531 
##           1  -0.2148     0.2239       -0.8263      0.3968 
##           2  -0.0514     0.2528       -0.7419      0.6390 
##           3   0.0216     0.2501       -0.6616      0.7048 
##           4   0.0609     0.2586       -0.6456      0.7673 
##           5   0.3376     0.3221       -0.5422      1.2174 
##           6   0.4181     0.3838       -0.6302      1.4664 
##           7   0.6282     0.4433       -0.5827      1.8391 
##           8   0.7137     0.4208       -0.4359      1.8634 
##           9   0.3313     0.5761       -1.2425      1.9051 
##          10  -0.1042     0.7782       -2.2300      2.0215 
## ---
## Signif. codes: `*' confidence band does not cover 0
## 
## Control Group:  Not Yet Treated,  Anticipation Periods:  0
## Estimation Method:  Doubly Robust
## NULL

Enhanced Event Study Plot with Sant’Anna & Callaway Labeling

# Extract event study data for plotting
es_data <- data.frame(
  period = sc_event_study$egt,
  estimate = sc_event_study$att.egt,
  std_error = sc_event_study$se.egt,
  conf_low = sc_event_study$att.egt - 1.96 * sc_event_study$se.egt,
  conf_high = sc_event_study$att.egt + 1.96 * sc_event_study$se.egt
)

# Test for pre-trends
pre_periods <- es_data %>% filter(period < 0)
pre_trend_test <- lm(estimate ~ period, data = pre_periods)
pre_trend_p <- summary(pre_trend_test)$coefficients[2, 4]

# Create enhanced plot with Sant'Anna & Callaway labeling
enhanced_plot <- ggplot(es_data, aes(x = period, y = estimate)) +
  # Background shading for pre/post periods
  annotate("rect", xmin = -10.5, xmax = -0.5, ymin = -Inf, ymax = Inf, 
           alpha = 0.03, fill = "red") +
  annotate("rect", xmin = -0.5, xmax = 10.5, ymin = -Inf, ymax = Inf, 
           alpha = 0.03, fill = "blue") +
  # Zero line
  geom_hline(yintercept = 0, linetype = "dashed", color = "black", alpha = 0.5) +
  # Treatment line
  geom_vline(xintercept = -0.5, linetype = "solid", color = "darkblue", size = 1) +
  # Confidence intervals
  geom_ribbon(aes(ymin = conf_low, ymax = conf_high), 
              fill = "#2E86AB", alpha = 0.3) +
  # Point estimates
  geom_line(color = "#2E86AB", size = 1.5) +
  geom_point(color = "#2E86AB", size = 4, fill = "white", shape = 21, stroke = 1.5) +
  # Scale and labels
  scale_x_continuous(
    breaks = seq(-10, 10, by = 2),
    labels = function(x) ifelse(x < 0, paste0(abs(x), "Y"), 
                               ifelse(x > 0, paste0(x, "Y"), "Treatment\nYear")),
    expand = expansion(mult = 0.05)
  ) +
  labs(
    title = "Dynamic Effects of Research Grants on Scientific Publications",
    subtitle = "Sant'Anna & Callaway (2021) Estimator • Two-Decade Analysis",
    x = "\nYears Relative to Grant Receipt",
    y = "Treatment Effect \n (Additional Publications per Year)",
    caption = paste(
      "Method: Sant'Anna & Callaway (2021) Robust DiD Estimator • Control: Not-Yet-Treated Authors •",
      "Pre-trend p-value:", round(pre_trend_p, 4),
      "\nData: Web of Science • Period: 1995-2020"
    )
  ) +
  theme_minimal(base_size = 16) +
  theme(
    plot.title = element_text(face = "bold", size = 22, hjust = 0.5, margin = margin(b = 10)),
    plot.subtitle = element_text(size = 16, hjust = 0.5, color = "gray40", margin = margin(b = 20)),
    plot.caption = element_text(size = 12, color = "gray50", hjust = 1, margin = margin(t = 15)),
    axis.title = element_text(face = "bold", size = 14),
    axis.text = element_text(size = 12),
    axis.text.x = element_text(angle = 0, hjust = 0.5, lineheight = 0.8),
    panel.grid.minor = element_blank(),
    panel.grid.major = element_line(color = "gray85", linewidth = 0.4),
    plot.background = element_rect(fill = "white", color = NA),
    panel.background = element_rect(fill = "white", color = NA),
    plot.margin = margin(1, 1, 1, 1, "cm")
  )

# Add methodological annotation
enhanced_plot <- enhanced_plot 
print(enhanced_plot)

Cohort-Specific Analysis with Sant’Anna & Callaway Estimator

# Get cohort-specific estimates using Sant'Anna & Callaway
sc_cohort_effects <- aggte(sc_event_attgt, type = "group", na.rm = TRUE)

sc_cohort_data <- data.frame(
  cohort = sc_cohort_effects$egt,
  att = sc_cohort_effects$att.egt,
  se = sc_cohort_effects$se.egt
) %>%
  mutate(
    conf_low = att - 1.96 * se,
    conf_high = att + 1.96 * se,
    significant = ifelse(conf_low > 0 | conf_high < 0, "Yes", "No")
  )

# Plot cohort-specific effects
cohort_plot <- ggplot(sc_cohort_data, aes(x = factor(cohort), y = att)) +
  geom_pointrange(aes(ymin = conf_low, ymax = conf_high, color = significant), 
                  size = 1, fatten = 2) +
  geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
  scale_color_manual(values = c("Yes" = "darkgreen", "No" = "gray60")) +
  labs(
    title = "Cohort-Specific Average Treatment Effects",
    subtitle = "Sant'Anna & Callaway Estimator by Treatment Cohort",
    x = "Treatment Cohort (Year)",
    y = "Average Treatment Effect on Publications",
    color = "Statistically\nSignificant"
  ) +
  theme_minimal(base_size = 14) +
  theme(
    legend.position = "top",
    axis.text.x = element_text(angle = 45, hjust = 1)
  )

print(cohort_plot)