# get the data for the no. of articles per journal-year
data_1 <- distinct(data[,1:7]) %>% group_by(Year, Journal) %>% summarise(Number = n())
## `summarise()` has grouped output by 'Year'. You can override using the
## `.groups` argument.
# create a overall variable
data_2 <- data_1 %>% group_by(Year) %>% summarise(Number = sum(Number))
data_2$Journal <- rep("Overall",26)
data_1 <- rbind(data_1, data_2)
rm(data_2)
# get a plot of the number of articles
no_year_overall <- data_1 %>% filter(Year != 2024 & Journal == "Overall") %>% ggline("Year", "Number") + rotate_x_text()
ggpar(no_year_overall, title = "Number of Papers Per Year (all journal)")
rm(data_1)
# get the data
data_1 <- data %>% group_by(Year, Research_Design) %>%
summarise(Number = n()) %>%
mutate(Research_Design = as.factor(Research_Design))
## `summarise()` has grouped output by 'Year'. You can override using the
## `.groups` argument.
# plot no. of papers at method-year
no_per_method_year <- data_1 %>%
filter(Year != 2024) %>%
ggline("Year", "Number",
linetype = "Research_Design",
shap = "Research_Design") +
rotate_x_text()
ggpar(no_per_method_year, legend.title = "Research Design")
rm(data_1)
# get the summary data
iv_by_year <- iv_data %>% group_by(Year) %>%
summarise(Exogeneity = sum(Examined_Exogeneity=="yes"),
Relevance = sum(Examined_Relevance=="yes"),
Exclusion = sum(Examined_Exclusion=="yes"),
Monotonicity = sum(Examined_Monotonicity=="yes"),
All_Examined = sum(Examined_Exogeneity=="yes" &
Examined_Relevance=="yes" &
Examined_Exclusion=="yes" &
Examined_Monotonicity=="yes"),
EER_Examined = sum(Examined_Exogeneity=="yes" &
Examined_Relevance=="yes" &
Examined_Exclusion=="yes"),
Total_Number = n()) %>%
gt() %>% tab_header("Assumption Examinations of IV Design across Years")
iv_by_year
| Assumption Examinations of IV Design across Years | |||||||
| Year | Exogeneity | Relevance | Exclusion | Monotonicity | All_Examined | EER_Examined | Total_Number |
|---|---|---|---|---|---|---|---|
| 1994 | 1 | 1 | 1 | 0 | 0 | 1 | 1 |
| 1999 | 1 | 0 | 0 | 0 | 0 | 0 | 1 |
| 2002 | 2 | 2 | 2 | 0 | 0 | 2 | 2 |
| 2003 | 3 | 3 | 2 | 1 | 1 | 2 | 3 |
| 2004 | 1 | 1 | 0 | 0 | 0 | 0 | 1 |
| 2005 | 2 | 2 | 0 | 0 | 0 | 0 | 2 |
| 2006 | 3 | 5 | 2 | 0 | 0 | 2 | 5 |
| 2007 | 4 | 4 | 2 | 0 | 0 | 2 | 5 |
| 2008 | 5 | 5 | 4 | 1 | 1 | 4 | 5 |
| 2009 | 7 | 7 | 6 | 1 | 1 | 6 | 7 |
| 2010 | 9 | 9 | 6 | 0 | 0 | 6 | 9 |
| 2011 | 21 | 20 | 12 | 0 | 0 | 12 | 23 |
| 2012 | 14 | 14 | 8 | 2 | 2 | 8 | 14 |
| 2013 | 34 | 34 | 20 | 0 | 0 | 20 | 34 |
| 2014 | 25 | 26 | 18 | 4 | 3 | 17 | 26 |
| 2015 | 25 | 26 | 18 | 1 | 0 | 18 | 28 |
| 2016 | 43 | 44 | 23 | 1 | 0 | 23 | 45 |
| 2017 | 35 | 36 | 21 | 0 | 0 | 20 | 36 |
| 2018 | 31 | 33 | 23 | 1 | 1 | 22 | 34 |
| 2019 | 28 | 28 | 21 | 1 | 0 | 21 | 28 |
| 2020 | 45 | 44 | 31 | 0 | 0 | 30 | 45 |
| 2021 | 45 | 45 | 32 | 2 | 2 | 31 | 46 |
| 2022 | 36 | 37 | 25 | 0 | 0 | 25 | 37 |
| 2023 | 38 | 40 | 27 | 7 | 6 | 24 | 41 |
| 2024 | 6 | 6 | 4 | 0 | 0 | 4 | 7 |
did_by_year <- did_data %>% group_by(Year) %>%
summarise(Parallel_Trend = sum(Examined_Parallel_Trend=="yes" |
Examined_Exogeneity=="yes"),
No_Anticipation = sum(Examined_No_Anticipation=="yes"),
Both_Examined = sum((Examined_Parallel_Trend=="yes" |
Examined_Exogeneity=="yes") &
Examined_No_Anticipation=="yes"),
Total_Number = n()) %>%
gt() %>% tab_header("Assumption Examinations of DiD Design across Years")
did_by_year
| Assumption Examinations of DiD Design across Years | ||||
| Year | Parallel_Trend | No_Anticipation | Both_Examined | Total_Number |
|---|---|---|---|---|
| 1998 | 1 | 0 | 0 | 1 |
| 2006 | 1 | 1 | 1 | 1 |
| 2008 | 1 | 0 | 0 | 1 |
| 2009 | 3 | 2 | 2 | 3 |
| 2010 | 2 | 2 | 2 | 2 |
| 2011 | 5 | 2 | 2 | 5 |
| 2012 | 6 | 4 | 4 | 6 |
| 2013 | 8 | 3 | 3 | 10 |
| 2014 | 9 | 3 | 3 | 9 |
| 2015 | 12 | 7 | 7 | 13 |
| 2016 | 16 | 4 | 4 | 16 |
| 2017 | 17 | 8 | 8 | 17 |
| 2018 | 24 | 12 | 12 | 25 |
| 2019 | 18 | 6 | 6 | 18 |
| 2020 | 30 | 10 | 10 | 30 |
| 2021 | 24 | 8 | 8 | 24 |
| 2022 | 25 | 12 | 12 | 25 |
| 2023 | 27 | 16 | 16 | 28 |
| 2024 | 12 | 5 | 5 | 13 |
match_by_year <- match_data %>% group_by(Year) %>%
summarise(Conditional_Exogeneity =
sum(Examined_Conditional_Exogeneity=="yes"),
Common_Support = sum(Examined_Common_Support=="yes"),
Both_Examined = sum(Examined_Conditional_Exogeneity=="yes" &
Examined_Common_Support=="yes"),
Total_Number = n()) %>%
gt() %>% tab_header("Assumption Examinations of Matchig across Years")
match_by_year
| Assumption Examinations of Matchig across Years | ||||
| Year | Conditional_Exogeneity | Common_Support | Both_Examined | Total_Number |
|---|---|---|---|---|
| 2006 | 1 | 0 | 0 | 1 |
| 2010 | 2 | 1 | 1 | 2 |
| 2011 | 5 | 2 | 2 | 5 |
| 2012 | 3 | 1 | 1 | 3 |
| 2013 | 9 | 5 | 5 | 10 |
| 2014 | 11 | 6 | 6 | 12 |
| 2015 | 17 | 5 | 5 | 17 |
| 2016 | 19 | 13 | 13 | 20 |
| 2017 | 24 | 12 | 12 | 25 |
| 2018 | 26 | 11 | 11 | 29 |
| 2019 | 25 | NA | NA | 26 |
| 2020 | 29 | 13 | 13 | 31 |
| 2021 | 32 | NA | NA | 34 |
| 2022 | 33 | 22 | 22 | 33 |
| 2023 | 57 | 29 | 29 | 58 |
| 2024 | 15 | 8 | 8 | 15 |
rdd_by_year <- rdd_data %>% group_by(Year) %>%
summarise(Continuity = sum(Examined_Continuity=="yes"),
No_Perfect_Manipulation = sum(
Examined_No_Perfect_Manipulation=="yes"),
Either_Examined = sum(Examined_Continuity=="yes" |
Examined_No_Perfect_Manipulation=="yes"),
Total_Number = n()) %>%
gt() %>% tab_header("Assumption Examinations of RDD across Years")
rdd_by_year
| Assumption Examinations of RDD across Years | ||||
| Year | Continuity | No_Perfect_Manipulation | Either_Examined | Total_Number |
|---|---|---|---|---|
| 2017 | 1 | 1 | 1 | 1 |
| 2018 | 2 | 2 | 2 | 2 |
| 2019 | 3 | 2 | 3 | 4 |
| 2020 | 1 | 1 | 1 | 1 |
| 2021 | 1 | 0 | 1 | 1 |
| 2022 | 2 | 2 | 2 | 2 |
| 2023 | 4 | 3 | 4 | 4 |
| 2024 | 1 | 1 | 1 | 1 |
iv_method_by_year <- iv_data %>% group_by(Year) %>%
summarise(Exogeneity_Conceptual_Discussion =
sum(Conceptual_Discussion_of_Exogeneity),
Relevance_Fstats = sum(Relevance_Fstats),
Relevance_Stock_Yogo_Test = sum(Relevance_Stock_Yogo_Test),
Exclusion_Conceptual_Discussion =
sum(Conceptual_Discussion_of_Exclusion),
Validity_Sargan_Hansen_Test =
sum(Specification_Sargan_Hansen_Test),
Validity_Hausman_Test = sum(Specification_Durbin_Wu_Hausman_Test),
Validity_Placebo_Test = sum(Placebo_Test)) %>%
gt() %>% tab_header("Methods to Examine IV Assumptions")
iv_method_by_year
| Methods to Examine IV Assumptions | |||||||
| Year | Exogeneity_Conceptual_Discussion | Relevance_Fstats | Relevance_Stock_Yogo_Test | Exclusion_Conceptual_Discussion | Validity_Sargan_Hansen_Test | Validity_Hausman_Test | Validity_Placebo_Test |
|---|---|---|---|---|---|---|---|
| 1994 | 1 | 0 | 0 | 1 | 0 | 0 | 0 |
| 1999 | 1 | 0 | 0 | 0 | 0 | 0 | 0 |
| 2002 | 2 | 2 | 0 | 0 | 0 | 1 | 1 |
| 2003 | 3 | 2 | 0 | 1 | 0 | 0 | 0 |
| 2004 | 1 | 1 | 0 | 0 | 0 | 0 | 0 |
| 2005 | 2 | 2 | 0 | 0 | 0 | 1 | 0 |
| 2006 | 4 | 5 | 0 | 2 | 0 | 1 | 0 |
| 2007 | 4 | 1 | 1 | 1 | 0 | 0 | 0 |
| 2008 | 4 | 4 | 0 | 1 | 0 | 1 | 0 |
| 2009 | 6 | 3 | 1 | 1 | 2 | 2 | 0 |
| 2010 | 7 | 7 | 0 | 1 | 2 | 3 | 0 |
| 2011 | 14 | 16 | 2 | 6 | 4 | 8 | 0 |
| 2012 | 11 | 6 | 1 | 5 | 4 | 5 | 0 |
| 2013 | 28 | 26 | 0 | 10 | 7 | 5 | 2 |
| 2014 | 22 | 19 | 2 | 7 | 6 | 6 | 0 |
| 2015 | 25 | 18 | 3 | 9 | 5 | 5 | 1 |
| 2016 | 38 | 39 | 2 | 7 | 16 | 13 | 3 |
| 2017 | 30 | 27 | 4 | 14 | 8 | 8 | 2 |
| 2018 | 23 | 23 | 3 | 12 | 10 | 5 | 2 |
| 2019 | 20 | 21 | 3 | 15 | 6 | 5 | 2 |
| 2020 | 36 | 33 | 2 | 14 | 11 | 6 | 3 |
| 2021 | 38 | 37 | 6 | 16 | 7 | 8 | 2 |
| 2022 | 32 | 35 | 4 | 11 | 8 | 5 | 2 |
| 2023 | 35 | 33 | 3 | 16 | 9 | 6 | 6 |
| 2024 | 4 | 5 | 0 | 3 | 1 | 1 | 1 |
did_method_by_year <- did_data %>% group_by(Year) %>%
summarise(Parallel_Trend_Conceptual_Discussion =
sum(Conceptual_Discussion_Of_Exogeneity),
Parallel_Trend_PreTrend = sum(PreTrend_Analysis |
Event_Study_Plot),
Parallel_Trend_Sensitivity_Analysis =
sum(Sensitivity_Analysis),
No_Anticipation_Conceptual_Discussion =
sum(Conceptual_Discussion_Of_No_Anticipation),
Validity_Placebo_Test = sum(Placebo_Test)) %>%
gt() %>% tab_header("Methods to Examine DiD Assumptions")
did_method_by_year
| Methods to Examine DiD Assumptions | |||||
| Year | Parallel_Trend_Conceptual_Discussion | Parallel_Trend_PreTrend | Parallel_Trend_Sensitivity_Analysis | No_Anticipation_Conceptual_Discussion | Validity_Placebo_Test |
|---|---|---|---|---|---|
| 1998 | 1 | 1 | 0 | 1 | 0 |
| 2006 | 1 | 1 | 0 | 1 | 0 |
| 2008 | 1 | 0 | 0 | 1 | 0 |
| 2009 | 3 | 3 | 0 | 2 | 0 |
| 2010 | 2 | 2 | 0 | 2 | 0 |
| 2011 | 3 | 4 | 0 | 4 | 2 |
| 2012 | 5 | 6 | 0 | 3 | 1 |
| 2013 | 8 | 7 | 0 | 5 | 5 |
| 2014 | 8 | 8 | 0 | 3 | 5 |
| 2015 | 7 | 11 | 0 | 7 | 6 |
| 2016 | 14 | 14 | 2 | 14 | 10 |
| 2017 | 16 | 16 | 0 | 13 | 9 |
| 2018 | 21 | 22 | 2 | 16 | 12 |
| 2019 | 15 | 16 | 1 | 13 | 6 |
| 2020 | 24 | 26 | 2 | 19 | 7 |
| 2021 | 17 | 21 | 2 | 17 | 6 |
| 2022 | 21 | 24 | 1 | 16 | 9 |
| 2023 | 18 | 22 | 2 | 21 | 12 |
| 2024 | 9 | 12 | 0 | 7 | 5 |
match_method_by_year <- match_data %>% group_by(Year) %>%
summarise(Conditional_Exogeneity_Sensitivity_Analysis =
sum(Sensitivity_Analysis),
Common_Support_Propensity_Scores =
sum(Propensity_Score_Distributions),
Match_Quality_Balance_Test = sum(Balance_Test),
Match_Quality_Distance_Metrics = sum(Distance_Measures)) %>%
gt() %>% tab_header("Methods to Examine Matching Assumptions")
match_method_by_year
| Methods to Examine Matching Assumptions | ||||
| Year | Conditional_Exogeneity_Sensitivity_Analysis | Common_Support_Propensity_Scores | Match_Quality_Balance_Test | Match_Quality_Distance_Metrics |
|---|---|---|---|---|
| 2006 | 0 | 1 | 1 | 0 |
| 2010 | 0 | 1 | 2 | 1 |
| 2011 | 0 | 5 | 5 | 2 |
| 2012 | 2 | 3 | 3 | 1 |
| 2013 | 1 | 7 | 9 | 2 |
| 2014 | 1 | 7 | 10 | 5 |
| 2015 | 3 | 8 | 17 | 6 |
| 2016 | 6 | 14 | 19 | 9 |
| 2017 | 4 | 21 | 24 | 7 |
| 2018 | 7 | 19 | 27 | 12 |
| 2019 | 7 | 15 | 24 | 6 |
| 2020 | 3 | 17 | 29 | 10 |
| 2021 | 7 | 20 | 32 | 10 |
| 2022 | 8 | 26 | 32 | 16 |
| 2023 | 8 | 37 | 53 | 22 |
| 2024 | 4 | 8 | 15 | 2 |
Note:
Matching
method has two potential biases: 1) the confounding bias due to the
violation of the conditional exogeneity, and 2) the specification bias
due to how samples are trimmed or how propensity scores are calculated
(e.g., with a logit model). The balance tests or the distance measures
are to reduce the second source of bais. Therefore, we need to put them
in a different category.
rdd_method_by_year <- rdd_data %>% group_by(Year) %>%
summarise(Continuity_Balance_Test = sum(Balance_Test),
Continuity_Density_Test = sum(McCrary_Density_Test)) %>%
gt() %>% tab_header("Methods to Examine RDD Assumptions")
rdd_method_by_year
| Methods to Examine RDD Assumptions | ||
| Year | Continuity_Balance_Test | Continuity_Density_Test |
|---|---|---|
| 2017 | 0 | 1 |
| 2018 | 2 | 2 |
| 2019 | 1 | 3 |
| 2020 | 1 | 1 |
| 2021 | 1 | 0 |
| 2022 | 1 | 2 |
| 2023 | 4 | 4 |
| 2024 | 1 | 1 |
Note:
The
continuity and no-perfect-manipulation are the same assumption viewed
and articulated from different perspectives - mathematical properties of
potential outcomes and local experiments, respectively. However, the two
perspectives lead different approaches to RDD estimation and validation.
For example, the continuity assumption implies variables that are
related to the focal outcome (but not influenced by the treatment)
should be continuous around the cutoff. Therefore, we examine the
distribution of these variables. In comparison, if RDD is treated as a
local experiment, then pre-determined covariates should be balanced as
in a randomized experiment, and hence the balance test.