# Load required packages
library(did)
library(bacondecomp)
library(fixest)
library(ggplot2)
library(dplyr)
library(tidyr)
library(readr)
library(stringr)
library(purrr)
library(jsonlite)
library(knitr)
library(kableExtra)
library(ggpubr)
library(patchwork)

# Set theme for consistent plotting
theme_set(theme_minimal(base_size = 12))

Executive Summary

This analysis compares Traditional Two-Way Fixed Effects (TWFE) with the Sant’Anna & Callaway Difference-in-Differences (DiD) estimator for estimating the effect of Slovak research grants (APVV) on publication outcomes using real Web of Science data.

1. Data Preparation and Grant Extraction

extract_apvv_year_simple <- function(grant_string) {
  if (is.na(grant_string) || grant_string == "" || grant_string == "nan") {
    return(NA)
  }
  
  grant_str <- as.character(grant_string)
  
  # Look specifically for APVV-XXXX-XX pattern (like APVV-0088-12)
  pattern1 <- "APVV[-_]\\d{4}[-_](\\d{2})"
  match1 <- str_match(grant_str, regex(pattern1, ignore_case = TRUE))
  
  if (!is.na(match1[1,1]) && !is.na(match1[1,2])) {
    year_digits <- as.numeric(match1[1,2])
    year <- 2000 + year_digits
    if (year >= 2000 && year <= 2025) {
      return(year)
    }
  }
  
  # Look for APVV-XX-XXXX pattern (like APVV-15-0496)
  pattern2 <- "APVV[-_](\\d{2})[-_]\\d{4}"
  match2 <- str_match(grant_str, regex(pattern2, ignore_case = TRUE))
  
  if (!is.na(match2[1,1]) && !is.na(match2[1,2])) {
    year_digits <- as.numeric(match2[1,2])
    year <- 2000 + year_digits
    if (year >= 2000 && year <= 2025) {
      return(year)
    }
  }
  
  return(NA)
}

Grant Extraction Validation

# Test the specific case
test_apvv <- c("APVV-0088-12", "APVV-15-0496", "APVV-0842-11", "APVV-16-0079")

results <- data.frame(
  Grant_String = test_apvv,
  Extracted_Year = sapply(test_apvv, extract_apvv_year_simple)
)

kable(results, caption = "Grant Year Extraction Validation") %>%
  kable_styling(bootstrap_options = "striped", full_width = FALSE)

Grant Year Extraction Validation
	Grant_String	Extracted_Year
APVV-0088-12	APVV-0088-12	2012
APVV-15-0496	APVV-15-0496	2015
APVV-0842-11	APVV-0842-11	2011
APVV-16-0079	APVV-16-0079	2016

2. Data Loading and Preparation

# Load the real data
data_file <- "/Users/tomasoles/Library/CloudStorage/Dropbox/Research_Publications/slovak_university/wos_results/wos_results_full_sample.json"

load_real_data <- function(file_path) {
  cat("Loading data from:", file_path, "\n")
  
  # Load the JSON data
  df <- fromJSON(file_path)
  
  # Convert to dataframe if it's a list
  if (is.list(df) && !is.data.frame(df)) {
    df <- as.data.frame(df)
  }
  
  cat("Original data dimensions:", nrow(df), "rows,", ncol(df), "columns\n")
  
  # Keep only relevant document types
  keep_types <- c("Article", "Proceedings Paper", "Meeting Abstract")
  df <- df %>% filter(document_type %in% keep_types)
  
  cat("After filtering document types:", nrow(df), "rows\n")
  
  return(df)
}

# Load the data
tryCatch({
  raw_data <- load_real_data(data_file)
  cat("✓ Successfully loaded real data\n")
}, error = function(e) {
  cat("Could not load real data:", e$message, "\n")
  stop("Data loading failed - please check the file path")
})

## Loading data from: /Users/tomasoles/Library/CloudStorage/Dropbox/Research_Publications/slovak_university/wos_results/wos_results_full_sample.json 
## Original data dimensions: 359614 rows, 41 columns
## After filtering document types: 314331 rows
## ✓ Successfully loaded real data

3. Data Processing Pipeline

Step 1: Grant Year Extraction with Filtering (2000-2022)

# Extract grant years for each author WITH FILTERING (2000-2022)
grant_years <- raw_data %>%
  group_by(author_rid) %>%
  summarise(
    first_grant_year = {
      all_grants <- unique(grant_ids[!is.na(grant_ids) & grant_ids != "" & grant_ids != "nan"])
      if (length(all_grants) == 0) {
        NA
      } else {
        years <- map_dbl(all_grants, extract_apvv_year_simple)
        years <- years[!is.na(years)]
        # FILTER: Keep only years between 2000 and 2022
        years <- years[years >= 2000 & years <= 2022]
        if (length(years) > 0) min(years) else NA
      }
    },
    .groups = 'drop'
  )

cat("Grant extraction completed:\n")

## Grant extraction completed:

cat("  Authors with grants:", sum(!is.na(grant_years$first_grant_year)), "\n")

##   Authors with grants: 8727

cat("  Authors without grants:", sum(is.na(grant_years$first_grant_year)), "\n")

##   Authors without grants: 16322

# Show grant year distribution AFTER FILTERING
grant_dist <- grant_years %>%
  filter(!is.na(first_grant_year)) %>%
  count(first_grant_year) %>%
  arrange(first_grant_year)

kable(grant_dist, caption = "Grant Year Distribution (2000-2022 only)") %>%
  kable_styling(bootstrap_options = "striped")

Grant Year Distribution (2000-2022 only)
first_grant_year	n
2000	5
2001	8
2002	1
2004	4
2006	239
2007	613
2009	1
2010	851
2011	903
2012	750
2014	742
2015	1435
2016	657
2017	653
2018	494
2019	390
2020	508
2021	268
2022	205

Step 2: Create Treatment Variables

# Merge grant years and create treatment variables
processed_data <- raw_data %>%
  left_join(grant_years, by = "author_rid") %>%
  mutate(
    first.treat = ifelse(is.na(first_grant_year), 0, first_grant_year),
    year = as.integer(year),
    treated = first.treat > 0
  )

cat("Treatment variables created:\n")

## Treatment variables created:

cat("  Treated authors:", sum(processed_data$treated, na.rm = TRUE), "\n")

##   Treated authors: 213401

cat("  Control authors:", sum(!processed_data$treated, na.rm = TRUE), "\n")

##   Control authors: 100930

Step 3: Create Author-Year Panel for DiD Analysis

# Create publication count per author-year
author_year_pubs <- processed_data %>%
  group_by(author_rid, year) %>%
  summarise(
    publications = n(),
    .groups = 'drop'
  )

# Merge with treatment info
treatment_info <- processed_data %>%
  distinct(author_rid, first.treat, first_grant_year)

final_data <- author_year_pubs %>%
  left_join(treatment_info, by = "author_rid") %>%
  rename(id = author_rid) %>%
  filter(!is.na(year), !is.na(first.treat), !is.na(publications))

cat("Final panel data created:\n")

## Final panel data created:

cat("  Total observations:", nrow(final_data), "\n")

##   Total observations: 112910

cat("  Unique authors:", n_distinct(final_data$id), "\n")

##   Unique authors: 25049

cat("  Time period:", min(final_data$year), "to", max(final_data$year), "\n")

##   Time period: 1979 to 2026

4. Data Quality Assessment

# Treatment group summary
treatment_summary <- final_data %>%
  distinct(id, first.treat) %>%
  count(first.treat) %>%
  arrange(first.treat)

kable(treatment_summary, caption = "Treatment Group Distribution") %>%
  kable_styling(bootstrap_options = "striped")

Treatment Group Distribution
first.treat	n
0	16322
2000	5
2001	8
2002	1
2004	4
2006	239
2007	613
2009	1
2010	851
2011	903
2012	750
2014	742
2015	1435
2016	657
2017	653
2018	494
2019	390
2020	508
2021	268
2022	205

# Publication patterns
pub_stats <- final_data %>%
  group_by(treatment_status = ifelse(first.treat == 0, "Control", "Treated")) %>%
  summarise(
    Authors = n_distinct(id),
    Observations = n(),
    Mean_Publications = mean(publications),
    Median_Publications = median(publications),
    Zero_Publications = sum(publications == 0),
    .groups = 'drop'
  )

kable(pub_stats, caption = "Publication Patterns by Treatment Status") %>%
  kable_styling(bootstrap_options = "striped")

Publication Patterns by Treatment Status
treatment_status	Authors	Observations	Mean_Publications	Median_Publications	Zero_Publications
Control	16322	47777	2.112523	1	0
Treated	8727	65133	3.276388	2	0

5. Descriptive Statistics and Trends

# Publication trends over time by treatment status
trend_data <- final_data %>%
  mutate(cohort_label = ifelse(first.treat == 0, "Never Treated", 
                              paste("Treated", first.treat))) %>%
  group_by(year, cohort_label) %>%
  summarise(
    mean_pubs = mean(publications),
    se_pubs = sd(publications)/sqrt(n()),
    n_authors = n_distinct(id),
    .groups = 'drop'
  ) %>%
  filter(n_authors >= 5)  # Filter out small groups for stability

# Plot publication trends
p_trends <- ggplot(trend_data, aes(x = year, y = mean_pubs, 
                                 color = cohort_label, group = cohort_label)) +
  geom_line(size = 1.2, alpha = 0.8) +
  geom_point(size = 1.5) +
  geom_ribbon(aes(ymin = mean_pubs - 1.96*se_pubs, 
                  ymax = mean_pubs + 1.96*se_pubs, 
                  fill = cohort_label), 
              alpha = 0.1, linetype = 0) +
  labs(title = "Publication Trends by Treatment Cohort",
       subtitle = "Real Web of Science Data - Mean publications with 95% confidence intervals",
       x = "Year", y = "Mean Publications",
       color = "Treatment Cohort", fill = "Treatment Cohort") +
  theme_minimal() +
  theme(legend.position = "bottom")

p_trends

# Distribution of publications
p_pub_dist <- final_data %>%
  ggplot(aes(x = publications, fill = first.treat > 0)) +
  geom_histogram(binwidth = 1, alpha = 0.7, position = "identity") +
  labs(title = "Distribution of Publications per Author-Year",
       x = "Number of Publications", y = "Count",
       fill = "Treated") +
  theme_minimal() +
  scale_x_continuous(limits = c(0, 20))  # Focus on reasonable range

p_pub_dist

6. Traditional TWFE Estimation

# Prepare data for TWFE - ensure proper formatting
twfe_data <- final_data %>%
  mutate(
    post = as.integer(first.treat > 0 & year >= first.treat),
    author_factor = as.factor(id),
    year_factor = as.factor(year)
  )

# Standard TWFE specification
twfe_model <- feols(publications ~ post | author_factor + year_factor, 
                   data = twfe_data)

# Create numeric author ID for Bacon decomposition
twfe_data <- twfe_data %>%
  mutate(author_id = as.numeric(factor(id)))

# Print TWFE results
cat("=== TRADITIONAL TWFE RESULTS ===\n")

## === TRADITIONAL TWFE RESULTS ===

print(summary(twfe_model))

## OLS estimation, Dep. Var.: publications
## Observations: 104,747
## Fixed-effects: author_factor: 16,887,  year_factor: 41
## Standard-errors: IID 
##      Estimate Std. Error t value  Pr(>|t|)    
## post 0.429358   0.042919 10.0039 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 2.73233     Adj. R2: 0.648963
##                 Within R2: 0.001138

# Extract coefficients for plotting
twfe_coefs <- data.frame(
  method = "TWFE",
  estimate = coef(twfe_model)["post"],
  se = se(twfe_model)["post"],
  term = "Overall Effect"
)

# Simple event study for TWFE (if we have enough variation)
tryCatch({
  # Create relative time indicators for a simple event study
  event_study_data <- twfe_data %>%
    filter(first.treat > 0) %>%
    mutate(
      rel_time = year - first.treat,
      rel_time_factor = factor(rel_time)
    )
  
  # Simple event study with limited leads/lags
  twfe_event <- feols(publications ~ i(rel_time_factor, ref = "-1") | 
                       author_factor + year_factor,
                     data = event_study_data %>% 
                       filter(rel_time >= -3, rel_time <= 3))
  
  cat("\n=== SIMPLE TWFE EVENT STUDY ===\n")
  print(summary(twfe_event))
  
  # Extract event study coefficients
  twfe_event_coefs <- broom::tidy(twfe_event, conf.int = TRUE) %>%
    filter(str_detect(term, "rel_time_factor")) %>%
    mutate(
      time = as.numeric(str_extract(term, "[0-9+-]+")),
      method = "TWFE"
    )
  
}, error = function(e) {
  cat("TWFE event study failed:", e$message, "\n")
  twfe_event_coefs <- NULL
})

## 
## === SIMPLE TWFE EVENT STUDY ===
## OLS estimation, Dep. Var.: publications
## Observations: 18,421
## Fixed-effects: author_factor: 4,729,  year_factor: 23
## Standard-errors: IID 
##                      Estimate Std. Error   t value Pr(>|t|)    
## rel_time_factor::-3  0.150650   0.142818  1.054836 0.291519    
## rel_time_factor::-2  0.013673   0.118969  0.114931 0.908501    
## rel_time_factor::0  -0.105111   0.085989 -1.222369 0.221589    
## rel_time_factor::1  -0.219432   0.079809 -2.749473 0.005977 ** 
## rel_time_factor::2  -0.050284   0.071099 -0.707244 0.479427    
## ... 1 variable was removed because of collinearity (rel_time_factor::3)
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 2.57147     Adj. R2: 0.63651
##                 Within R2: 9.44e-4

7. Fixed Bacon Decomposition

# Create balanced panel subset for Bacon decomposition
bacon_data <- twfe_data %>%
  group_by(author_id) %>%
  filter(n() >= 3) %>%  # Keep only authors with at least 3 observations
  ungroup()

cat("Data for Bacon decomposition:", nrow(bacon_data), "observations\n")

## Data for Bacon decomposition: 96894 observations

cat("Unique authors for Bacon:", n_distinct(bacon_data$author_id), "\n")

## Unique authors for Bacon: 12960

# Perform Bacon decomposition with balanced data
tryCatch({
  bacon_results <- bacon(publications ~ post,
                        data = bacon_data,
                        id_var = "author_id",
                        time_var = "year")
  
  # Plot Bacon decomposition
  p_bacon_weights <- ggplot(bacon_results$weights, 
                           aes(x = weight, y = reorder(type, weight))) +
    geom_col(fill = "steelblue", alpha = 0.7) +
    labs(title = "Bacon Decomposition: Weights on 2x2 Comparisons",
         x = "Weight", y = "Comparison Type") +
    theme_minimal()
  
  p_bacon_effects <- ggplot(bacon_results$two_by_twos,
                           aes(x = estimate, y = reorder(type, estimate))) +
    geom_point(size = 3, color = "darkred") +
    geom_vline(xintercept = 0, linetype = "dashed") +
    labs(title = "Bacon Decomposition: 2x2 Treatment Effects",
         x = "Treatment Effect Estimate", y = "Comparison Type") +
    theme_minimal()
  
  # Combine Bacon plots
  p_bacon_combined <- p_bacon_weights / p_bacon_effects
  print(p_bacon_combined)
  
  # Display summary of Bacon decomposition
  cat("=== BACON DECOMPOSITION SUMMARY ===\n")
  print(bacon_results$omega_weighted)
  
}, error = function(e) {
  cat("Bacon decomposition failed:", e$message, "\n")
  
  # Alternative: Manual Bacon-style analysis
  cat("\n=== ALTERNATIVE: MANUAL COHORT ANALYSIS ===\n")
  
  cohort_effects <- twfe_data %>%
    filter(first.treat > 0) %>%
    group_by(first.treat) %>%
    summarise(
      pre_mean = mean(publications[year < first.treat]),
      post_mean = mean(publications[year >= first.treat]),
      effect = post_mean - pre_mean,
      n_authors = n_distinct(author_id),
      .groups = 'drop'
    )
  
  kable(cohort_effects, caption = "Manual Cohort Analysis") %>%
    kable_styling(bootstrap_options = "striped")
})

## Bacon decomposition failed: Unbalanced Panel 
## 
## === ALTERNATIVE: MANUAL COHORT ANALYSIS ===

Manual Cohort Analysis
first.treat	pre_mean	post_mean	effect	n_authors
2000	NaN	4.727273	NaN	5
2001	NaN	3.966667	NaN	8
2002	NaN	4.071429	NaN	1
2004	NaN	8.083333	NaN	4
2006	8.419491	4.624086	-3.7954053	239
2007	2.372777	3.407396	1.0346196	613
2009	NaN	1.500000	NaN	1
2010	2.355609	3.591031	1.2354219	851
2011	2.301922	3.600478	1.2985566	903
2012	2.075949	3.777201	1.7012517	750
2014	2.197519	3.745480	1.5479611	742
2015	2.293931	3.255987	0.9620556	1435
2016	2.359375	3.086428	0.7270533	657
2017	2.594901	3.221037	0.6261357	653
2018	2.519621	3.045094	0.5254733	494
2019	2.819005	3.019031	0.2000266	390
2020	2.654369	2.583539	-0.0708298	508
2021	2.492857	2.613828	0.1209709	268
2022	2.474771	2.734491	0.2597207	205

8. Sant’Anna & Callaway DiD Estimation

# Prepare clean data for Sant'Anna & Callaway DiD with proper numeric IDs
did_data_clean <- final_data %>%
  mutate(
    # Create numeric ID for did package
    id_num = as.numeric(factor(id)),
    year = as.numeric(year),
    first.treat = as.numeric(first.treat)
  ) %>%
  # Filter to reasonable time period for analysis
  filter(year >= 2000, year <= 2022) %>%
  # Remove any missing values that could cause issues
  filter(!is.na(publications), !is.na(year), !is.na(first.treat), !is.na(id_num))

cat("Clean data for Sant'Anna & Callaway DiD analysis:", nrow(did_data_clean), "observations\n")

## Clean data for Sant'Anna & Callaway DiD analysis: 88693 observations

cat("Unique authors:", n_distinct(did_data_clean$id_num), "\n")

## Unique authors: 21612

# Check treatment cohort sizes
cohort_sizes <- did_data_clean %>%
  filter(first.treat > 0) %>%
  distinct(id_num, first.treat) %>%
  count(first.treat) %>%
  arrange(first.treat)

cat("Treatment cohort sizes:\n")

## Treatment cohort sizes:

print(cohort_sizes)

## # A tibble: 19 × 2
##    first.treat     n
##          <dbl> <int>
##  1        2000     5
##  2        2001     8
##  3        2002     1
##  4        2004     4
##  5        2006   238
##  6        2007   613
##  7        2009     1
##  8        2010   851
##  9        2011   901
## 10        2012   750
## 11        2014   739
## 12        2015  1407
## 13        2016   618
## 14        2017   605
## 15        2018   421
## 16        2019   296
## 17        2020   346
## 18        2021   153
## 19        2022   105

# Use only cohorts with sufficient observations
viable_cohorts <- cohort_sizes %>%
  filter(n >= 10) %>%  # At least 10 authors per cohort
  pull(first.treat)

cat("Viable cohorts (n >= 10):", viable_cohorts, "\n")

## Viable cohorts (n >= 10): 2006 2007 2010 2011 2012 2014 2015 2016 2017 2018 2019 2020 2021 2022

if (length(viable_cohorts) == 0) {
  stop("No viable treatment cohorts with sufficient observations")
}

# Filter data to viable cohorts
did_data_final <- did_data_clean %>%
  filter(first.treat %in% viable_cohorts | first.treat == 0)

cat("Final Sant'Anna & Callaway DiD data:", nrow(did_data_final), "observations\n")

## Final Sant'Anna & Callaway DiD data: 88563 observations

cat("Final unique authors:", n_distinct(did_data_final$id_num), "\n")

## Final unique authors: 21593

# Estimate group-time average treatment effects with robust settings
tryCatch({
  sc_attgt <- att_gt(
    yname = "publications",
    tname = "year",
    idname = "id_num",
    gname = "first.treat",
    data = did_data_final,
    control_group = "nevertreated",  # Use never-treated for simplicity
    est_method = "reg",  # Use regression method (more stable)
    anticipation = 0,
    allow_unbalanced_panel = TRUE
  )
  
  # Display group-time ATTs
  cat("=== SANT'ANNA & CALLAWAY GROUP-TIME AVERAGE TREATMENT EFFECTS ===\n")
  print(summary(sc_attgt))
  
  # Set flag for successful DiD estimation
  sc_success <- TRUE
  
}, error = function(e) {
  cat("Sant'Anna & Callaway DiD estimation failed:", e$message, "\n")
  sc_success <- FALSE
})

## === SANT'ANNA & CALLAWAY GROUP-TIME AVERAGE TREATMENT EFFECTS ===
## 
## Call:
## att_gt(yname = "publications", tname = "year", idname = "id_num", 
##     gname = "first.treat", data = did_data_final, allow_unbalanced_panel = TRUE, 
##     control_group = "nevertreated", anticipation = 0, est_method = "reg")
## 
## Reference: Callaway, Brantly and Pedro H.C. Sant'Anna.  "Difference-in-Differences with Multiple Time Periods." Journal of Econometrics, Vol. 225, No. 2, pp. 200-230, 2021. <https://doi.org/10.1016/j.jeconom.2020.12.001>, <https://arxiv.org/abs/1803.09015> 
## 
## Group-Time Average Treatment Effects:
##  Group Time ATT(g,t) Std. Error [95% Simult.  Conf. Band]  
##   2006 2001   0.1592     1.1590       -3.9142      4.2327  
##   2006 2002  -0.9370     1.4755       -6.1226      4.2486  
##   2006 2003  -0.7347     1.2355       -5.0770      3.6076  
##   2006 2004   0.1969     1.0652       -3.5469      3.9408  
##   2006 2005   0.0218     0.9206       -3.2137      3.2574  
##   2006 2006  -4.0194     6.0148      -25.1588     17.1200  
##   2006 2007  -4.5862     6.5763      -27.6991     18.5267  
##   2006 2008  -4.5020     7.0147      -29.1558     20.1518  
##   2006 2009  -4.3782     6.8007      -28.2799     19.5234  
##   2006 2010  -4.2104     6.7802      -28.0401     19.6193  
##   2006 2011  -4.0404     6.9082      -28.3198     20.2390  
##   2006 2012  -3.8434     6.6822      -27.3286     19.6418  
##   2006 2013  -3.3246     5.7850      -23.6565     17.0073  
##   2006 2014  -2.9281     5.4536      -22.0952     16.2390  
##   2006 2015  -3.6955     5.8961      -24.4178     17.0268  
##   2006 2016  -3.1337     5.5821      -22.7523     16.4850  
##   2006 2017  -3.4915     5.8622      -24.0945     17.1115  
##   2006 2018  -2.7322     5.2232      -21.0894     15.6251  
##   2006 2019  -2.3230     5.1957      -20.5838     15.9378  
##   2006 2020  -2.7346     4.9869      -20.2614     14.7921  
##   2006 2021  -2.6853     5.4204      -21.7358     16.3651  
##   2006 2022  -3.3943     5.6688      -23.3178     16.5291  
##   2007 2001   0.4385     0.2737       -0.5233      1.4003  
##   2007 2002  -0.0354     0.2405       -0.8807      0.8099  
##   2007 2003  -0.0696     0.2232       -0.8541      0.7149  
##   2007 2004   0.0287     0.2493       -0.8473      0.9048  
##   2007 2005  -0.2310     0.2244       -1.0198      0.5577  
##   2007 2006   0.3787     0.2221       -0.4018      1.1592  
##   2007 2007   0.0017     0.2328       -0.8164      0.8197  
##   2007 2008   0.0456     0.2332       -0.7740      0.8653  
##   2007 2009   0.0999     0.2199       -0.6730      0.8727  
##   2007 2010   0.3421     0.2473       -0.5271      1.2113  
##   2007 2011   0.0416     0.2322       -0.7745      0.8576  
##   2007 2012   0.5789     0.2568       -0.3236      1.4814  
##   2007 2013   0.5539     0.2456       -0.3093      1.4171  
##   2007 2014   1.1429     0.2888        0.1279      2.1578 *
##   2007 2015   0.8939     0.2722       -0.0628      1.8506  
##   2007 2016   0.7595     0.2869       -0.2488      1.7678  
##   2007 2017   0.8109     0.2906       -0.2105      1.8323  
##   2007 2018   0.6394     0.2807       -0.3472      1.6261  
##   2007 2019   0.7294     0.3124       -0.3687      1.8275  
##   2007 2020   0.6202     0.2857       -0.3838      1.6243  
##   2007 2021   1.0134     0.3352       -0.1645      2.1914  
##   2007 2022   0.6431     0.2938       -0.3895      1.6757  
##   2010 2001  -0.5469     0.2985       -1.5959      0.5021  
##   2010 2002   0.1768     0.3185       -0.9427      1.2963  
##   2010 2003   0.1345     0.3335       -1.0378      1.3067  
##   2010 2004  -0.0265     0.2574       -0.9311      0.8782  
##   2010 2005   0.1695     0.2425       -0.6829      1.0220  
##   2010 2006  -0.0124     0.2089       -0.7464      0.7217  
##   2010 2007   0.1016     0.2101       -0.6369      0.8401  
##   2010 2008  -0.0969     0.2101       -0.8354      0.6417  
##   2010 2009   0.3054     0.1677       -0.2841      0.8950  
##   2010 2010   0.1792     0.1969       -0.5127      0.8712  
##   2010 2011  -0.1090     0.1779       -0.7344      0.5164  
##   2010 2012   0.0444     0.1791       -0.5849      0.6737  
##   2010 2013   0.3401     0.1677       -0.2492      0.9294  
##   2010 2014   0.6831     0.2040       -0.0339      1.4001  
##   2010 2015   0.7715     0.2009        0.0656      1.4775 *
##   2010 2016   0.5531     0.1988       -0.1456      1.2518  
##   2010 2017   0.7722     0.2379       -0.0639      1.6083  
##   2010 2018   0.7742     0.2208       -0.0019      1.5503  
##   2010 2019   0.8326     0.2365        0.0015      1.6637 *
##   2010 2020   1.1706     0.2378        0.3348      2.0065 *
##   2010 2021   1.0892     0.2601        0.1750      2.0033 *
##   2010 2022   0.8182     0.2374       -0.0162      1.6526  
##   2011 2001   0.5045     0.2559       -0.3947      1.4037  
##   2011 2002   0.1088     0.3091       -0.9777      1.1953  
##   2011 2003   0.0936     0.2615       -0.8255      1.0127  
##   2011 2004  -0.1269     0.2808       -1.1139      0.8601  
##   2011 2005  -0.1485     0.2564       -1.0495      0.7526  
##   2011 2006   0.2125     0.1961       -0.4766      0.9017  
##   2011 2007   0.0290     0.2090       -0.7057      0.7637  
##   2011 2008   0.1490     0.2307       -0.6619      0.9599  
##   2011 2009   0.2057     0.2179       -0.5603      0.9717  
##   2011 2010  -0.2768     0.1790       -0.9060      0.3524  
##   2011 2011   0.3341     0.1619       -0.2350      0.9032  
##   2011 2012   0.4297     0.1669       -0.1567      1.0162  
##   2011 2013   0.4240     0.1765       -0.1961      1.0442  
##   2011 2014   0.8770     0.1879        0.2166      1.5374 *
##   2011 2015   0.8398     0.1810        0.2037      1.4758 *
##   2011 2016   0.9777     0.2180        0.2114      1.7441 *
##   2011 2017   0.9275     0.2344        0.1037      1.7514 *
##   2011 2018   1.2614     0.2003        0.5573      1.9655 *
##   2011 2019   1.4452     0.2804        0.4597      2.4307 *
##   2011 2020   1.5562     0.2946        0.5209      2.5914 *
##   2011 2021   1.4485     0.2593        0.5373      2.3597 *
##   2011 2022   1.1954     0.2137        0.4442      1.9466 *
##   2012 2001   0.5433     0.3911       -0.8312      1.9178  
##   2012 2002  -0.0423     0.3152       -1.1500      1.0654  
##   2012 2003   0.1318     0.2737       -0.8300      1.0936  
##   2012 2004  -0.3471     0.3196       -1.4703      0.7762  
##   2012 2005   0.3282     0.3139       -0.7750      1.4315  
##   2012 2006  -0.0960     0.2288       -0.8999      0.7080  
##   2012 2007   0.1515     0.2434       -0.7040      1.0070  
##   2012 2008   0.2205     0.2832       -0.7748      1.2159  
##   2012 2009   0.0615     0.3070       -1.0176      1.1406  
##   2012 2010  -0.1658     0.2383       -1.0035      0.6718  
##   2012 2011  -0.0186     0.2128       -0.7663      0.7292  
##   2012 2012   0.3763     0.1899       -0.2912      1.0439  
##   2012 2013   0.2921     0.1845       -0.3564      0.9406  
##   2012 2014   0.8932     0.2142        0.1405      1.6458 *
##   2012 2015   0.6134     0.2048       -0.1062      1.3331  
##   2012 2016   0.7477     0.1935        0.0678      1.4276 *
##   2012 2017   1.0660     0.2344        0.2421      1.8900 *
##   2012 2018   1.7533     0.2777        0.7773      2.7292 *
##   2012 2019   1.9810     0.2616        1.0615      2.9006 *
##   2012 2020   2.1254     0.2883        1.1122      3.1386 *
##   2012 2021   2.0916     0.3000        1.0374      3.1458 *
##   2012 2022   1.9581     0.3121        0.8612      3.0550 *
##   2014 2001   0.4163     0.3492       -0.8109      1.6435  
##   2014 2002   0.0943     0.3698       -1.2054      1.3939  
##   2014 2003   0.1440     0.3868       -1.2154      1.5035  
##   2014 2004  -0.4716     0.3685       -1.7668      0.8236  
##   2014 2005   0.5588     0.4716       -1.0986      2.2162  
##   2014 2006  -0.3626     0.4714       -2.0192      1.2940  
##   2014 2007   0.0839     0.3681       -1.2097      1.3776  
##   2014 2008  -0.1706     0.3356       -1.3501      1.0089  
##   2014 2009   0.1461     0.2365       -0.6852      0.9774  
##   2014 2010   0.2863     0.2227       -0.4966      1.0692  
##   2014 2011  -0.1214     0.2310       -0.9332      0.6904  
##   2014 2012   0.0061     0.2253       -0.7856      0.7979  
##   2014 2013   0.4732     0.2880       -0.5391      1.4855  
##   2014 2014   0.2564     0.2887       -0.7582      1.2710  
##   2014 2015   0.1750     0.2673       -0.7645      1.1146  
##   2014 2016   0.0630     0.2766       -0.9090      1.0350  
##   2014 2017   0.3833     0.2986       -0.6661      1.4327  
##   2014 2018   0.8363     0.3300       -0.3236      1.9963  
##   2014 2019   1.1700     0.3698       -0.1297      2.4697  
##   2014 2020   1.5260     0.4004        0.1187      2.9334 *
##   2014 2021   1.3199     0.3127        0.2208      2.4190 *
##   2014 2022   1.0404     0.3478       -0.1820      2.2628  
##   2015 2001  -0.4980     0.3182       -1.6164      0.6204  
##   2015 2002   0.2358     0.2219       -0.5439      1.0155  
##   2015 2003  -0.1451     0.2481       -1.0172      0.7270  
##   2015 2004   0.2112     0.2604       -0.7041      1.1266  
##   2015 2005  -0.1371     0.2660       -1.0718      0.7977  
##   2015 2006   0.1143     0.2087       -0.6191      0.8476  
##   2015 2007   0.3352     0.2628       -0.5882      1.2587  
##   2015 2008  -0.3503     0.2869       -1.3586      0.6580  
##   2015 2009   0.3536     0.1613       -0.2133      0.9204  
##   2015 2010   0.1902     0.1784       -0.4368      0.8171  
##   2015 2011   0.0120     0.1720       -0.5926      0.6166  
##   2015 2012   0.0460     0.1493       -0.4787      0.5707  
##   2015 2013   0.4456     0.1880       -0.2151      1.1063  
##   2015 2014  -0.1164     0.1538       -0.6571      0.4242  
##   2015 2015   0.3677     0.1577       -0.1865      0.9220  
##   2015 2016  -0.0049     0.1460       -0.5179      0.5081  
##   2015 2017   0.2346     0.1585       -0.3225      0.7917  
##   2015 2018   0.3754     0.1634       -0.1990      0.9498  
##   2015 2019   0.5600     0.1992       -0.1401      1.2601  
##   2015 2020   0.5063     0.2002       -0.1973      1.2099  
##   2015 2021   0.6392     0.1956       -0.0483      1.3268  
##   2015 2022   0.7874     0.2294       -0.0190      1.5938  
##   2016 2001  -1.2914     1.7075       -7.2925      4.7097  
##   2016 2002   0.5292     0.5814       -1.5141      2.5725  
##   2016 2003  -0.8530     0.3369       -2.0370      0.3311  
##   2016 2004   0.6031     0.2682       -0.3397      1.5458  
##   2016 2005   0.3058     0.3658       -0.9799      1.5915  
##   2016 2006  -0.3526     0.4091       -1.7903      1.0851  
##   2016 2007   0.0413     0.3526       -1.1979      1.2804  
##   2016 2008  -0.1020     0.2181       -0.8684      0.6644  
##   2016 2009   0.5685     0.2708       -0.3833      1.5203  
##   2016 2010  -0.4189     0.3070       -1.4977      0.6599  
##   2016 2011   0.0936     0.3440       -1.1155      1.3027  
##   2016 2012   0.2101     0.2467       -0.6570      1.0773  
##   2016 2013  -0.0928     0.2075       -0.8220      0.6364  
##   2016 2014   0.2063     0.2410       -0.6406      1.0532  
##   2016 2015   0.4638     0.3076       -0.6172      1.5448  
##   2016 2016  -0.1018     0.2916       -1.1266      0.9229  
##   2016 2017  -0.3730     0.3289       -1.5288      0.7828  
##   2016 2018  -0.0717     0.3816       -1.4130      1.2695  
##   2016 2019  -0.2185     0.3597       -1.4827      1.0456  
##   2016 2020  -0.1546     0.3544       -1.4003      1.0911  
##   2016 2021  -0.0054     0.3603       -1.2719      1.2610  
##   2016 2022   0.1113     0.3843       -1.2392      1.4618  
##   2017 2001  -0.0606     0.7160       -2.5771      2.4558  
##   2017 2002   0.9889     0.5287       -0.8693      2.8471  
##   2017 2003  -0.6116     0.8113       -3.4632      2.2399  
##   2017 2004  -0.1621     0.6147       -2.3224      1.9982  
##   2017 2005  -0.2961     0.4100       -1.7372      1.1450  
##   2017 2006   0.2130     0.3243       -0.9266      1.3527  
##   2017 2007   0.0775     0.5744       -1.9414      2.0964  
##   2017 2008  -0.4098     0.5715       -2.4186      1.5989  
##   2017 2009   0.9771     0.3815       -0.3638      2.3179  
##   2017 2010  -0.6814     0.3591       -1.9434      0.5805  
##   2017 2011   0.1006     0.2953       -0.9371      1.1383  
##   2017 2012  -0.0518     0.2566       -0.9538      0.8502  
##   2017 2013   0.4942     0.2843       -0.5049      1.4933  
##   2017 2014   0.0910     0.2690       -0.8544      1.0363  
##   2017 2015   0.1836     0.2630       -0.7408      1.1081  
##   2017 2016   0.1663     0.2587       -0.7430      1.0757  
##   2017 2017  -0.2616     0.2497       -1.1391      0.6159  
##   2017 2018  -0.0062     0.2788       -0.9859      0.9735  
##   2017 2019  -0.0586     0.2695       -1.0057      0.8885  
##   2017 2020  -0.0214     0.2740       -0.9844      0.9416  
##   2017 2021  -0.0277     0.3431       -1.2336      1.1782  
##   2017 2022   0.1796     0.4938       -1.5557      1.9150  
##   2018 2001  -2.0694     0.8946       -5.2134      1.0746  
##   2018 2002   1.1818     0.5978       -0.9193      3.2828  
##   2018 2003   0.4291     0.6563       -1.8777      2.7358  
##   2018 2004  -1.3008     0.5086       -3.0884      0.4869  
##   2018 2005   0.4283     0.3380       -0.7595      1.6161  
##   2018 2006   0.0634     0.3889       -1.3035      1.4302  
##   2018 2007  -0.0564     0.4109       -1.5004      1.3877  
##   2018 2008   0.4211     0.5821       -1.6247      2.4670  
##   2018 2009   0.0059     0.5916       -2.0733      2.0851  
##   2018 2010  -0.3342     0.3041       -1.4031      0.7348  
##   2018 2011   1.2435     1.0201       -2.3419      4.8289  
##   2018 2012  -1.2274     1.0554       -4.9367      2.4818  
##   2018 2013   0.2111     0.2334       -0.6092      1.0315  
##   2018 2014   0.3243     0.3299       -0.8354      1.4839  
##   2018 2015  -0.1561     0.2859       -1.1610      0.8488  
##   2018 2016   0.6241     0.5030       -1.1438      2.3920  
##   2018 2017  -0.5982     0.4495       -2.1779      0.9815  
##   2018 2018   0.2102     0.2672       -0.7289      1.1494  
##   2018 2019   0.0955     0.2810       -0.8922      1.0831  
##   2018 2020   0.5032     0.3283       -0.6506      1.6570  
##   2018 2021   0.4377     0.3518       -0.7986      1.6740  
##   2018 2022   0.4727     0.3817       -0.8687      1.8141  
##   2019 2001   2.4163     0.2428        1.5629      3.2697 *
##   2019 2002  -1.8682     0.4916       -3.5961     -0.1404 *
##   2019 2003  -0.5077     0.5150       -2.3179      1.3024  
##   2019 2004   0.9630     0.7156       -1.5522      3.4781  
##   2019 2005  -0.0429     1.2427       -4.4105      4.3248  
##   2019 2006  -0.8151     1.2021       -5.0400      3.4098  
##   2019 2007   0.3497     0.3408       -0.8482      1.5476  
##   2019 2008   0.2848     0.4368       -1.2505      1.8200  
##   2019 2009  -0.1339     0.3444       -1.3444      1.0765  
##   2019 2010  -0.0284     0.2897       -1.0466      0.9898  
##   2019 2011   0.6240     0.7670       -2.0718      3.3197  
##   2019 2012  -0.3927     0.4909       -2.1179      1.3326  
##   2019 2013   0.6966     0.4378       -0.8422      2.2354  
##   2019 2014  -0.5113     0.4512       -2.0970      1.0743  
##   2019 2015   0.1435     0.4551       -1.4561      1.7431  
##   2019 2016   0.9086     0.4572       -0.6983      2.5155  
##   2019 2017  -0.9114     0.4891       -2.6303      0.8074  
##   2019 2018   0.0459     0.3632       -1.2306      1.3224  
##   2019 2019  -0.5336     0.3059       -1.6088      0.5416  
##   2019 2020  -0.0025     0.3865       -1.3609      1.3559  
##   2019 2021   0.4136     0.3888       -0.9528      1.7800  
##   2019 2022  -0.0661     0.3623       -1.3394      1.2073  
##   2020 2001  -0.5670     0.5083       -2.3534      1.2193  
##   2020 2002   0.4056     0.4572       -1.2012      2.0124  
##   2020 2003   0.7065     0.5737       -1.3097      2.7228  
##   2020 2004   0.1031     0.7987       -2.7040      2.9102  
##   2020 2005  -0.1000     0.9572       -3.4641      3.2641  
##   2020 2006  -0.3218     0.8536       -3.3217      2.6781  
##   2020 2007   0.2853     0.5355       -1.5969      2.1675  
##   2020 2008  -0.6405     0.3598       -1.9050      0.6241  
##   2020 2009   0.9243     0.4545       -0.6730      2.5217  
##   2020 2010  -0.1049     0.4413       -1.6558      1.4460  
##   2020 2011  -0.4790     0.4425       -2.0341      1.0761  
##   2020 2012  -0.3833     0.2854       -1.3863      0.6196  
##   2020 2013   0.0973     0.3218       -1.0338      1.2284  
##   2020 2014   0.5823     0.3436       -0.6253      1.7898  
##   2020 2015   0.5108     0.4985       -1.2412      2.2628  
##   2020 2016  -0.2055     0.6234       -2.3965      1.9855  
##   2020 2017  -0.5278     0.6665       -2.8704      1.8149  
##   2020 2018   0.6030     0.3219       -0.5284      1.7344  
##   2020 2019  -0.3564     0.3151       -1.4638      0.7509  
##   2020 2020   0.0029     0.2805       -0.9831      0.9889  
##   2020 2021  -0.3051     0.3193       -1.4272      0.8170  
##   2020 2022  -0.0233     0.4849       -1.7275      1.6810  
##   2021 2001   0.9330     1.0365       -2.7101      4.5760  
##   2021 2002  -1.7849     1.1232       -5.7325      2.1627  
##   2021 2003  -0.2697     0.3141       -1.3735      0.8342  
##   2021 2004   0.0531     0.2474       -0.8165      0.9227  
##   2021 2005   1.3492     0.7282       -1.2102      3.9086  
##   2021 2006  -0.4293     0.9214       -3.6678      2.8092  
##   2021 2007  -0.2639     0.9248       -3.5141      2.9862  
##   2021 2008  -0.2380     0.2686       -1.1818      0.7059  
##   2021 2009   0.2206     0.4094       -1.2183      1.6594  
##   2021 2010   0.9539     0.4742       -0.7126      2.6205  
##   2021 2011  -0.6597     0.7736       -3.3787      2.0594  
##   2021 2012   0.2795     0.6223       -1.9077      2.4667  
##   2021 2013  -0.3611     0.5110       -2.1568      1.4347  
##   2021 2014   0.3546     0.3463       -0.8623      1.5716  
##   2021 2015  -0.1772     0.3559       -1.4281      1.0737  
##   2021 2016  -0.0866     0.2638       -1.0136      0.8405  
##   2021 2017   0.4094     0.3800       -0.9263      1.7451  
##   2021 2018  -0.2541     0.3855       -1.6088      1.1007  
##   2021 2019   0.2768     0.3559       -0.9739      1.5275  
##   2021 2020  -0.2310     0.3023       -1.2935      0.8315  
##   2021 2021   0.4905     0.6597       -1.8282      2.8091  
##   2021 2022   0.4805     0.8118       -2.3727      3.3336  
##   2022 2001  -0.2337     0.8592       -3.2534      2.7859  
##   2022 2002  -0.6182     0.4374       -2.1557      0.9192  
##   2022 2003   1.0637     0.9977       -2.4429      4.5702  
##   2022 2004  -0.5469     1.1351       -4.5363      3.4425  
##   2022 2005  -0.4794     0.4102       -1.9211      0.9623  
##   2022 2006   0.2612     0.3321       -0.9060      1.4284  
##   2022 2007   1.0095     0.9083       -2.1826      4.2017  
##   2022 2008  -0.9400     0.9658       -4.3343      2.4543  
##   2022 2009   0.4559     0.4731       -1.2067      2.1185  
##   2022 2010  -0.3925     0.4085       -1.8283      1.0433  
##   2022 2011   0.4877     0.3475       -0.7336      1.7090  
##   2022 2012  -0.1848     0.2997       -1.2380      0.8684  
##   2022 2013   0.4599     0.4229       -1.0266      1.9463  
##   2022 2014   0.0681     0.4707       -1.5861      1.7224  
##   2022 2015  -0.0618     0.4045       -1.4834      1.3598  
##   2022 2016   1.3574     0.7813       -1.3886      4.1035  
##   2022 2017  -1.2489     0.6598       -3.5676      1.0699  
##   2022 2018  -0.0142     0.4046       -1.4363      1.4079  
##   2022 2019  -0.2513     0.4018       -1.6635      1.1609  
##   2022 2020   0.2667     0.5536       -1.6790      2.2124  
##   2022 2021   0.5161     0.5147       -1.2927      2.3250  
##   2022 2022   0.1612     0.3094       -0.9261      1.2484  
## ---
## Signif. codes: `*' confidence band does not cover 0
## 
## P-value for pre-test of parallel trends assumption:  0
## Control Group:  Never Treated,  Anticipation Periods:  0
## Estimation Method:  Outcome Regression
## NULL

9. Dynamic Effects with Sant’Anna & Callaway Estimator

# Check if we have successful Sant'Anna & Callaway model
if (exists("sc_success") && sc_success) {
  
  # Aggregate to dynamic effects with error handling
  tryCatch({
    sc_dynamic <- aggte(sc_attgt, type = "dynamic", na.rm = TRUE)
    
    cat("=== DYNAMIC TREATMENT EFFECTS (SANT'ANNA & CALLAWAY) ===\n")
    print(summary(sc_dynamic))
    
    # Extract dynamic effects from Sant'Anna & Callaway DiD
    sc_dynamic_df <- data.frame(
      time = sc_dynamic$egt,
      estimate = sc_dynamic$att.egt,
      se = sc_dynamic$se.egt,
      method = "Sant'Anna & Callaway"
    ) %>%
      mutate(
        conf.low = estimate - 1.96 * se,
        conf.high = estimate + 1.96 * se
      )
    
    # Plot Sant'Anna & Callaway dynamic effects
    p_sc_dynamic <- ggdid(sc_dynamic) +
      labs(title = "Dynamic Treatment Effects - Sant'Anna & Callaway Estimator",
           subtitle = "Event study plot from robust DiD estimation") +
      theme_minimal()
    
    print(p_sc_dynamic)
    
    # Compare with TWFE if available
    if (exists("twfe_event_coefs") && !is.null(twfe_event_coefs)) {
      # Combine TWFE and Sant'Anna & Callaway dynamic effects
      dynamic_comparison <- bind_rows(
        twfe_event_coefs %>% select(time, estimate, conf.low, conf.high, method),
        sc_dynamic_df %>% select(time, estimate, conf.low, conf.high, method)
      ) %>%
        filter(!is.na(time))
      
      # Plot comparison
      p_dynamic_comparison <- ggplot(dynamic_comparison, 
                                    aes(x = time, y = estimate, 
                                        color = method, fill = method)) +
        geom_point(position = position_dodge(width = 0.3), size = 2.5) +
        geom_errorbar(aes(ymin = conf.low, ymax = conf.high),
                      position = position_dodge(width = 0.3), 
                      width = 0.2) +
        geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
        geom_vline(xintercept = -0.5, linetype = "dashed", color = "gray") +
        labs(title = "Comparison: Dynamic Treatment Effects",
             subtitle = "TWFE vs Sant'Anna & Callaway Estimator with Slovak Grant Data",
             x = "Time to Treatment", y = "Treatment Effect (Publications)",
             color = "Method", fill = "Method") +
        theme_minimal() +
        theme(legend.position = "bottom")
      
      print(p_dynamic_comparison)
    }
    
  }, error = function(e) {
    cat("Dynamic effects aggregation failed:", e$message, "\n")
  })
  
} else {
  cat("Cannot compute dynamic effects - Sant'Anna & Callaway model failed to estimate\n")
  
  # Create alternative visualization
  p_treatment_timing <- final_data %>%
    filter(first.treat > 0) %>%
    distinct(id, first.treat) %>%
    ggplot(aes(x = first.treat)) +
    geom_histogram(binwidth = 1, fill = "steelblue", alpha = 0.7) +
    labs(title = "Distribution of Treatment Timing",
         subtitle = "When authors received their first grant",
         x = "Year of First Grant", y = "Number of Authors") +
    theme_minimal()
  
  print(p_treatment_timing)
}

## === DYNAMIC TREATMENT EFFECTS (SANT'ANNA & CALLAWAY) ===
## 
## Call:
## aggte(MP = sc_attgt, type = "dynamic", na.rm = TRUE)
## 
## Reference: Callaway, Brantly and Pedro H.C. Sant'Anna.  "Difference-in-Differences with Multiple Time Periods." Journal of Econometrics, Vol. 225, No. 2, pp. 200-230, 2021. <https://doi.org/10.1016/j.jeconom.2020.12.001>, <https://arxiv.org/abs/1803.09015> 
## 
## 
## Overall summary of ATT's based on event-study/dynamic aggregation:  
##     ATT    Std. Error     [ 95%  Conf. Int.] 
##  0.1616        0.8333    -1.4717      1.7949 
## 
## 
## Dynamic Effects:
##  Event time Estimate Std. Error [95% Simult.  Conf. Band]  
##         -21  -0.2337     0.8722       -2.7066      2.2392  
##         -20   0.3017     0.6121       -1.4339      2.0372  
##         -19  -0.5921     0.4205       -1.7842      0.6001  
##         -18   0.8410     0.2361        0.1715      1.5105 *
##         -17  -0.9250     0.3395       -1.8877      0.0376  
##         -16   0.3012     0.2745       -0.4770      1.0793  
##         -15   0.1068     0.4479       -1.1631      1.3766  
##         -14  -0.3934     0.2084       -0.9844      0.1976  
##         -13   0.0135     0.1435       -0.3932      0.4203  
##         -12  -0.0085     0.1077       -0.3138      0.2968  
##         -11   0.3137     0.1146       -0.0112      0.6386  
##         -10  -0.0487     0.1177       -0.3824      0.2851  
##          -9   0.0003     0.0993       -0.2811      0.2817  
##          -8   0.0792     0.0957       -0.1921      0.3505  
##          -7   0.0338     0.1022       -0.2558      0.3234  
##          -6   0.0282     0.0855       -0.2142      0.2706  
##          -5   0.1041     0.0781       -0.1173      0.3255  
##          -4   0.0922     0.0828       -0.1426      0.3270  
##          -3   0.0047     0.0814       -0.2261      0.2354  
##          -2   0.1275     0.0758       -0.0873      0.3423  
##          -1   0.0577     0.0723       -0.1474      0.2628  
##           0   0.0360     0.1840       -0.4856      0.5575  
##           1  -0.0824     0.1984       -0.6450      0.4801  
##           2   0.0902     0.2107       -0.5071      0.6876  
##           3   0.2064     0.2192       -0.4150      0.8279  
##           4   0.3381     0.2336       -0.3241      1.0002  
##           5   0.5077     0.2486       -0.1971      1.2125  
##           6   0.6772     0.2614       -0.0640      1.4183  
##           7   0.9587     0.2519        0.2446      1.6729 *
##           8   1.0203     0.3180        0.1188      1.9217 *
##           9   0.9739     0.4385       -0.2694      2.2172  
##          10   1.0502     0.3939       -0.0665      2.1668  
##          11   0.6012     0.5291       -0.8989      2.1013  
##          12   0.2898     0.7397       -1.8073      2.3869  
##          13  -0.2029     1.4373       -4.2779      3.8721  
##          14  -0.0348     1.4106       -4.0341      3.9645  
##          15  -0.2878     1.5194       -4.5955      4.0200  
##          16  -3.3943     5.6500      -19.4129     12.6242  
## ---
## Signif. codes: `*' confidence band does not cover 0
## 
## Control Group:  Never Treated,  Anticipation Periods:  0
## Estimation Method:  Outcome Regression
## NULL

10. Overall Treatment Effect Comparison

# Create overall comparison table
overall_comparison <- data.frame(
  Method = "TWFE",
  Estimate = coef(twfe_model)["post"],
  SE = se(twfe_model)["post"],
  Observations = nobs(twfe_model),
  stringsAsFactors = FALSE
)

if (exists("sc_success") && sc_success) {
  # Aggregate overall effects from Sant'Anna & Callaway with error handling
  tryCatch({
    sc_overall <- aggte(sc_attgt, type = "group", na.rm = TRUE)
    
    overall_comparison <- bind_rows(
      overall_comparison,
      data.frame(
        Method = "Sant'Anna & Callaway",
        Estimate = sc_overall$overall.att,
        SE = sc_overall$overall.se,
        Observations = sc_overall$DIDparams$n
      )
    )
    
    cat("=== OVERALL AVERAGE TREATMENT EFFECT (SANT'ANNA & CALLAWAY) ===\n")
    print(summary(sc_overall))
    
  }, error = function(e) {
    cat("Overall Sant'Anna & Callaway aggregation failed:", e$message, "\n")
  })
}

## === OVERALL AVERAGE TREATMENT EFFECT (SANT'ANNA & CALLAWAY) ===
## 
## Call:
## aggte(MP = sc_attgt, type = "group", na.rm = TRUE)
## 
## Reference: Callaway, Brantly and Pedro H.C. Sant'Anna.  "Difference-in-Differences with Multiple Time Periods." Journal of Econometrics, Vol. 225, No. 2, pp. 200-230, 2021. <https://doi.org/10.1016/j.jeconom.2020.12.001>, <https://arxiv.org/abs/1803.09015> 
## 
## 
## Overall summary of ATT's based on group/cohort aggregation:  
##     ATT    Std. Error     [ 95%  Conf. Int.]  
##  0.3861        0.1792     0.0349      0.7373 *
## 
## 
## Group Effects:
##  Group Estimate Std. Error [95% Simult.  Conf. Band]  
##   2006  -3.5308     5.9879      -20.2297     13.1682  
##   2007   0.5573     0.2092       -0.0261      1.1407  
##   2010   0.6092     0.1610        0.1603      1.0581 *
##   2011   0.9764     0.1506        0.5564      1.3963 *
##   2012   1.2635     0.1722        0.7833      1.7437 *
##   2014   0.7523     0.2706       -0.0024      1.5069  
##   2015   0.4332     0.1451        0.0286      0.8378 *
##   2016  -0.1163     0.3121       -0.9865      0.7540  
##   2017  -0.0327     0.2495       -0.7285      0.6632  
##   2018   0.3439     0.2446       -0.3382      1.0259  
##   2019  -0.0471     0.2895       -0.8544      0.7601  
##   2020  -0.1085     0.3150       -0.9869      0.7699  
##   2021   0.4855     0.6492       -1.3249      2.2958  
##   2022   0.1612     0.3282       -0.7541      1.0764  
## ---
## Signif. codes: `*' confidence band does not cover 0
## 
## Control Group:  Never Treated,  Anticipation Periods:  0
## Estimation Method:  Outcome Regression
## NULL

# Calculate confidence intervals and significance
overall_comparison <- overall_comparison %>%
  mutate(
    CI_lower = Estimate - 1.96 * SE,
    CI_upper = Estimate + 1.96 * SE,
    Significant = ifelse(CI_lower > 0 | CI_upper < 0, "Yes", "No"),
    Percent_Change = (Estimate / mean(final_data$publications)) * 100
  )

kable(overall_comparison, digits = 3, 
      caption = "Overall Treatment Effect Comparison - Sant'Anna & Callaway vs TWFE") %>%
  kable_styling(bootstrap_options = "striped", full_width = FALSE) %>%
  row_spec(which(overall_comparison$Significant == "Yes"), 
           background = "#E6F3FF")

Overall Treatment Effect Comparison - Sant’Anna & Callaway vs TWFE
	Method	Estimate	SE	Observations	CI_lower	CI_upper	Significant	Percent_Change
post	TWFE	0.429	0.043	104747	0.345	0.513	Yes	15.423
…2	Sant’Anna & Callaway	0.386	0.179	21593	0.035	0.737	Yes	13.869

11. Comprehensive Results Visualization

# Create a comprehensive visualization dashboard
visualization_list <- list()

# 1. Treatment cohort distribution
p_cohort_dist <- final_data %>%
  filter(first.treat > 0) %>%
  distinct(id, first.treat) %>%
  ggplot(aes(x = first.treat)) +
  geom_histogram(binwidth = 1, fill = "steelblue", alpha = 0.7) +
  labs(title = "A) Distribution of First Grant Years",
       x = "Year of First Grant", y = "Number of Authors") +
  theme_minimal()

# 2. Publication trends by treatment status
p_trends_simple <- final_data %>%
  filter(year >= 1995 & year <= 2024) %>%
  group_by(year, treated = first.treat > 0) %>%
  summarise(mean_publications = mean(publications), .groups = 'drop') %>%
  ggplot(aes(x = year, y = mean_publications, color = treated)) +
  geom_line(size = 1) +
  geom_point() +
  labs(title = "B) Publication Trends by Treatment Status",
       x = "Year", y = "Mean Publications", color = "Treated") +
  theme_minimal()

# Arrange basic plots
basic_dashboard <- p_cohort_dist | p_trends_simple
print(basic_dashboard)

# Add overall comparison if available
if (nrow(overall_comparison) > 1) {
  p_overall_compare <- overall_comparison %>%
    ggplot(aes(x = Method, y = Estimate, fill = Method)) +
    geom_col(alpha = 0.7) +
    geom_errorbar(aes(ymin = CI_lower, ymax = CI_upper), width = 0.2) +
    labs(title = "C) Overall Treatment Effect Comparison",
         subtitle = "TWFE vs Sant'Anna & Callaway Estimator",
         x = "Method", y = "Treatment Effect (Publications)") +
    theme_minimal() +
    theme(legend.position = "none")
  
  print(p_overall_compare)
}

Event Study Analysis: Two Decades of Slovak Research Grants

Sant’Anna & Callaway Event Study Estimation

# Prepare data for Sant'Anna & Callaway event study
event_study_data <- final_data %>%
  mutate(
    id_num = as.numeric(factor(id)),
    year = as.numeric(year),
    first_treat = as.numeric(first.treat)
  ) %>%
  filter(
    year >= 1995 & year <= 2020,
    first_treat >= 2000,
    !is.na(publications),
    !is.na(year),
    !is.na(first_treat)
  )

cat("Sant'Anna & Callaway Event Study Dataset:\n")

## Sant'Anna & Callaway Event Study Dataset:

cat("  Time period:", min(event_study_data$year), "-", max(event_study_data$year), "\n")

##   Time period: 1995 - 2020

cat("  Observations:", nrow(event_study_data), "\n")

##   Observations: 44918

cat("  Unique authors:", n_distinct(event_study_data$id_num), "\n")

##   Unique authors: 7420

# Estimate event study using Sant'Anna & Callaway estimator
sc_event_attgt <- att_gt(
  yname = "publications",
  tname = "year",
  idname = "id_num",
  gname = "first_treat",
  data = event_study_data,
  control_group = "notyettreated",
  est_method = "dr",
  anticipation = 0,
  allow_unbalanced_panel = TRUE
)

# Aggregate to event study
sc_event_study <- aggte(
  sc_event_attgt, 
  type = "dynamic", 
  na.rm = TRUE,
  min_e = -10,
  max_e = 10
)

cat("=== SANT'ANNA & CALLAWAY EVENT STUDY RESULTS ===\n")

## === SANT'ANNA & CALLAWAY EVENT STUDY RESULTS ===

print(summary(sc_event_study))

## 
## Call:
## aggte(MP = sc_event_attgt, type = "dynamic", min_e = -10, max_e = 10, 
##     na.rm = TRUE)
## 
## Reference: Callaway, Brantly and Pedro H.C. Sant'Anna.  "Difference-in-Differences with Multiple Time Periods." Journal of Econometrics, Vol. 225, No. 2, pp. 200-230, 2021. <https://doi.org/10.1016/j.jeconom.2020.12.001>, <https://arxiv.org/abs/1803.09015> 
## 
## 
## Overall summary of ATT's based on event-study/dynamic aggregation:  
##     ATT    Std. Error     [ 95%  Conf. Int.] 
##  0.1945        0.3589    -0.5089      0.8978 
## 
## 
## Dynamic Effects:
##  Event time Estimate Std. Error [95% Simult.  Conf. Band] 
##         -10   0.0994     0.1402       -0.2835      0.4823 
##          -9  -0.0770     0.1234       -0.4140      0.2600 
##          -8   0.1245     0.1369       -0.2494      0.4984 
##          -7  -0.0585     0.1177       -0.3799      0.2628 
##          -6  -0.0058     0.1022       -0.2849      0.2733 
##          -5   0.0769     0.0870       -0.1607      0.3144 
##          -4   0.0320     0.0903       -0.2147      0.2788 
##          -3  -0.0135     0.0848       -0.2451      0.2181 
##          -2   0.0255     0.0924       -0.2270      0.2780 
##          -1  -0.0528     0.0875       -0.2920      0.1863 
##           0  -0.0016     0.2031       -0.5563      0.5531 
##           1  -0.2148     0.2239       -0.8263      0.3968 
##           2  -0.0514     0.2528       -0.7419      0.6390 
##           3   0.0216     0.2501       -0.6616      0.7048 
##           4   0.0609     0.2586       -0.6456      0.7673 
##           5   0.3376     0.3221       -0.5422      1.2174 
##           6   0.4181     0.3838       -0.6302      1.4664 
##           7   0.6282     0.4433       -0.5827      1.8391 
##           8   0.7137     0.4208       -0.4359      1.8634 
##           9   0.3313     0.5761       -1.2425      1.9051 
##          10  -0.1042     0.7782       -2.2300      2.0215 
## ---
## Signif. codes: `*' confidence band does not cover 0
## 
## Control Group:  Not Yet Treated,  Anticipation Periods:  0
## Estimation Method:  Doubly Robust
## NULL

Enhanced Event Study Plot with Sant’Anna & Callaway Labeling

# Extract event study data for plotting
es_data <- data.frame(
  period = sc_event_study$egt,
  estimate = sc_event_study$att.egt,
  std_error = sc_event_study$se.egt,
  conf_low = sc_event_study$att.egt - 1.96 * sc_event_study$se.egt,
  conf_high = sc_event_study$att.egt + 1.96 * sc_event_study$se.egt
)

# Test for pre-trends
pre_periods <- es_data %>% filter(period < 0)
pre_trend_test <- lm(estimate ~ period, data = pre_periods)
pre_trend_p <- summary(pre_trend_test)$coefficients[2, 4]

# Create enhanced plot with Sant'Anna & Callaway labeling
enhanced_plot <- ggplot(es_data, aes(x = period, y = estimate)) +
  # Background shading for pre/post periods
  annotate("rect", xmin = -10.5, xmax = -0.5, ymin = -Inf, ymax = Inf, 
           alpha = 0.03, fill = "red") +
  annotate("rect", xmin = -0.5, xmax = 10.5, ymin = -Inf, ymax = Inf, 
           alpha = 0.03, fill = "blue") +
  # Zero line
  geom_hline(yintercept = 0, linetype = "dashed", color = "black", alpha = 0.5) +
  # Treatment line
  geom_vline(xintercept = -0.5, linetype = "solid", color = "darkblue", size = 1) +
  # Confidence intervals
  geom_ribbon(aes(ymin = conf_low, ymax = conf_high), 
              fill = "#2E86AB", alpha = 0.3) +
  # Point estimates
  geom_line(color = "#2E86AB", size = 1.5) +
  geom_point(color = "#2E86AB", size = 4, fill = "white", shape = 21, stroke = 1.5) +
  # Scale and labels
  scale_x_continuous(
    breaks = seq(-10, 10, by = 2),
    labels = function(x) ifelse(x < 0, paste0(abs(x), "Y"), 
                               ifelse(x > 0, paste0(x, "Y"), "Treatment\nYear")),
    expand = expansion(mult = 0.05)
  ) +
  labs(
    title = "Dynamic Effects of Research Grants on Scientific Publications",
    subtitle = "Sant'Anna & Callaway (2021) Estimator • Two-Decade Analysis",
    x = "\nYears Relative to Grant Receipt",
    y = "Treatment Effect \n (Additional Publications per Year)",
    caption = paste(
      "Method: Sant'Anna & Callaway (2021) Robust DiD Estimator • Control: Not-Yet-Treated Authors •",
      "Pre-trend p-value:", round(pre_trend_p, 4),
      "\nData: Web of Science • Period: 1995-2020"
    )
  ) +
  theme_minimal(base_size = 16) +
  theme(
    plot.title = element_text(face = "bold", size = 22, hjust = 0.5, margin = margin(b = 10)),
    plot.subtitle = element_text(size = 16, hjust = 0.5, color = "gray40", margin = margin(b = 20)),
    plot.caption = element_text(size = 12, color = "gray50", hjust = 1, margin = margin(t = 15)),
    axis.title = element_text(face = "bold", size = 14),
    axis.text = element_text(size = 12),
    axis.text.x = element_text(angle = 0, hjust = 0.5, lineheight = 0.8),
    panel.grid.minor = element_blank(),
    panel.grid.major = element_line(color = "gray85", linewidth = 0.4),
    plot.background = element_rect(fill = "white", color = NA),
    panel.background = element_rect(fill = "white", color = NA),
    plot.margin = margin(1, 1, 1, 1, "cm")
  )

# Add methodological annotation
enhanced_plot <- enhanced_plot 
print(enhanced_plot)

Cohort-Specific Analysis with Sant’Anna & Callaway Estimator

# Get cohort-specific estimates using Sant'Anna & Callaway
sc_cohort_effects <- aggte(sc_event_attgt, type = "group", na.rm = TRUE)

sc_cohort_data <- data.frame(
  cohort = sc_cohort_effects$egt,
  att = sc_cohort_effects$att.egt,
  se = sc_cohort_effects$se.egt
) %>%
  mutate(
    conf_low = att - 1.96 * se,
    conf_high = att + 1.96 * se,
    significant = ifelse(conf_low > 0 | conf_high < 0, "Yes", "No")
  )

# Plot cohort-specific effects
cohort_plot <- ggplot(sc_cohort_data, aes(x = factor(cohort), y = att)) +
  geom_pointrange(aes(ymin = conf_low, ymax = conf_high, color = significant), 
                  size = 1, fatten = 2) +
  geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
  scale_color_manual(values = c("Yes" = "darkgreen", "No" = "gray60")) +
  labs(
    title = "Cohort-Specific Average Treatment Effects",
    subtitle = "Sant'Anna & Callaway Estimator by Treatment Cohort",
    x = "Treatment Cohort (Year)",
    y = "Average Treatment Effect on Publications",
    color = "Statistically\nSignificant"
  ) +
  theme_minimal(base_size = 14) +
  theme(
    legend.position = "top",
    axis.text.x = element_text(angle = 45, hjust = 1)
  )

print(cohort_plot)

Publication Effects of Slovak Research Grants

Tomas Oles

2025-11-14