Data
# === SAMPLE FILTERS (adjust here) ===
MIN_CAPACITY_MCM <- 1 # Set to NA to skip capacity filter
REQUIRE_CAPACITY <- TRUE # Set to TRUE to drop dams without capacity_mcm
analysis_data <- readRDS(file.path(strategic_dir,
"strategic_placement_analysis_data.rds"))
# Apply sample filters
n_full <- nrow(analysis_data)
if (REQUIRE_CAPACITY) {
analysis_data <- analysis_data %>% filter(!is.na(capacity_mcm))
}
if (!is.na(MIN_CAPACITY_MCM)) {
analysis_data <- analysis_data %>%
filter(is.na(capacity_mcm) | capacity_mcm >= MIN_CAPACITY_MCM)
}
# Ensure decade variable exists
analysis_data <- analysis_data %>%
mutate(decade = floor(as.numeric(year_fin) / 10) * 10)
# Basin grouping for FE
analysis_data <- analysis_data %>%
mutate(
basin_id = as.character(main_riv),
basin_group = ifelse(
dam_id %in% (analysis_data %>%
group_by(main_riv) %>%
filter(n() >= 5) %>%
pull(dam_id)),
as.character(main_riv),
"Other basins"
)
)
# Check which version columns are available
has_v1 <- "suitability_ratio_v1" %in% names(analysis_data)
has_v3 <- "suitability_ratio_v3" %in% names(analysis_data)
# Check if constrained (same-country) columns are available
has_constrained <- "suitability_ratio_c" %in% names(analysis_data) &&
sum(!is.na(analysis_data$suitability_ratio_c)) > 30
has_constrained_v1 <- has_constrained &&
"suitability_ratio_c_v1" %in% names(analysis_data)
has_constrained_v3 <- has_constrained &&
"suitability_ratio_c_v3" %in% names(analysis_data)
# Transboundary subset with along-river border distance
tb_data <- analysis_data %>%
filter(transboundary == 1, !is.na(dam_dist_border))
# Hydro/irrigation-only subsample
hydro_irrig_data <- analysis_data %>%
filter(hydropower == 1 | irrigation == 1)
# Version labels
versions <- c("v1", "v2", "v3")[c(has_v1, TRUE, has_v3)]
1. Sample Overview
This section describes the analysis sample after filtering from the
Global Dam Tracker (GDAT) universe. Dams are retained if they can be
matched to a HydroRIVERS segment, have a recorded completion year
between 1950–2025, and have a known primary purpose. The suitability
computation considers all candidate river segments within a +/-200 km
buffer along the river network, scoring each on hydropower potential
(flow, catchment area, hydraulic head) and population exposure at the
time of construction.
tibble(
Statistic = c("Total dams", "Transboundary", "Domestic",
"TB with along-river border distance",
"Hydropower dams", "Irrigation dams",
"Hydro or Irrigation subsample",
"Mean candidates per dam",
"Median candidates per dam"),
Value = c(
nrow(analysis_data),
sum(analysis_data$transboundary),
sum(1 - analysis_data$transboundary),
nrow(tb_data),
sum(analysis_data$hydropower),
sum(analysis_data$irrigation),
nrow(hydro_irrig_data),
sprintf("%.0f", mean(analysis_data$n_candidates, na.rm = TRUE)),
sprintf("%.0f", median(analysis_data$n_candidates, na.rm = TRUE))
)
) %>%
kbl(caption = "Sample Summary") %>%
kable_styling(bootstrap_options = c("striped", "hover"),
full_width = FALSE)
Sample Summary
|
Statistic
|
Value
|
|
Total dams
|
4901
|
|
Transboundary
|
2634
|
|
Domestic
|
2267
|
|
TB with along-river border distance
|
745
|
|
Hydropower dams
|
732
|
|
Irrigation dams
|
1054
|
|
Hydro or Irrigation subsample
|
1786
|
|
Mean candidates per dam
|
156
|
|
Median candidates per dam
|
59
|
analysis_data %>%
count(purpose_category, transboundary) %>%
mutate(transboundary = ifelse(transboundary == 1,
"Transboundary", "Domestic")) %>%
pivot_wider(names_from = transboundary, values_from = n, values_fill = 0) %>%
mutate(Total = Domestic + Transboundary) %>%
arrange(desc(Total)) %>%
kbl(caption = "Dams by Purpose and Transboundary Status") %>%
kable_styling(bootstrap_options = c("striped", "hover"),
full_width = FALSE)
Dams by Purpose and Transboundary Status
|
purpose_category
|
Domestic
|
Transboundary
|
Total
|
|
Other
|
692
|
688
|
1380
|
|
Flood Control
|
489
|
656
|
1145
|
|
Irrigation
|
456
|
598
|
1054
|
|
Hydropower
|
336
|
396
|
732
|
|
Water Supply
|
294
|
296
|
590
|
analysis_data %>%
count(decade, transboundary) %>%
mutate(transboundary = ifelse(transboundary == 1,
"Transboundary", "Domestic")) %>%
pivot_wider(names_from = transboundary, values_from = n, values_fill = 0) %>%
mutate(Total = Domestic + Transboundary) %>%
kbl(caption = "Dams by Decade") %>%
kable_styling(bootstrap_options = c("striped", "hover"),
full_width = FALSE)
Dams by Decade
|
decade
|
Domestic
|
Transboundary
|
Total
|
|
1950
|
363
|
402
|
765
|
|
1960
|
671
|
705
|
1376
|
|
1970
|
538
|
651
|
1189
|
|
1980
|
383
|
436
|
819
|
|
1990
|
220
|
220
|
440
|
|
2000
|
67
|
131
|
198
|
|
2010
|
24
|
63
|
87
|
|
2020
|
1
|
26
|
27
|
2. Suitability Ratios Across Specifications
I compute three alternative suitability indices to ensure the results
are not driven by a particular functional form:
- V1 (Baseline): Raw product of discharge, upstream
catchment area, and hydraulic head. Heavily right-skewed because a few
candidate sites with large rivers dominate the score.
- V2 (Gradient-weighted): Applies square-root
transforms to flow and catchment area, giving less weight to extreme
values. This is the primary specification.
- V3 (Percentile Ranks): Converts each component to
its within-river-system percentile rank before combining. This ensures
each component contributes roughly equally and yields ratios on a more
interpretable 0–1 scale.
The suitability ratio divides a dam’s actual site score by the best
available score within +/-200 km along the river network. A ratio of 1
means the dam sits at the optimal site; lower values indicate the dam
was placed at a less suitable location relative to nearby
alternatives.
suit_summary <- analysis_data %>%
mutate(tb_label = ifelse(transboundary == 1,
"Transboundary", "Domestic")) %>%
group_by(tb_label) %>%
summarise(
n = n(),
across(
all_of(paste0("suitability_ratio_", versions)),
list(mean = ~mean(.x, na.rm = TRUE),
median = ~median(.x, na.rm = TRUE)),
.names = "{.col}_{.fn}"
),
.groups = "drop"
)
suit_summary %>%
kbl(digits = 3,
caption = "Suitability Ratio by Specification and TB Status") %>%
kable_styling(bootstrap_options = c("striped", "hover"),
full_width = FALSE) %>%
add_header_above(c(" " = 2,
"V1 (Baseline)" = 2,
"V2 (Gradient)" = 2,
"V3 (Percentile)" = 2))
Suitability Ratio by Specification and TB Status
|
|
V1 (Baseline)
|
V2 (Gradient)
|
V3 (Percentile)
|
|
tb_label
|
n
|
suitability_ratio_v1_mean
|
suitability_ratio_v1_median
|
suitability_ratio_v2_mean
|
suitability_ratio_v2_median
|
suitability_ratio_v3_mean
|
suitability_ratio_v3_median
|
|
Domestic
|
2267
|
0.146
|
0.009
|
0.241
|
0.111
|
0.696
|
0.706
|
|
Transboundary
|
2634
|
0.080
|
0.002
|
0.140
|
0.057
|
0.647
|
0.655
|
Quick t-tests (no controls)
trb <- analysis_data %>% filter(transboundary == 1)
dom <- analysis_data %>% filter(transboundary == 0)
for (v in versions) {
col <- paste0("suitability_ratio_", v)
tt <- t.test(trb[[col]], dom[[col]])
cat(sprintf(
"**%s**: TRB mean = %.3f, DOM mean = %.3f, diff = %.3f (p = %.4f) \n",
toupper(v), tt$estimate[1], tt$estimate[2],
tt$estimate[1] - tt$estimate[2], tt$p.value
))
}
V1: TRB mean = 0.080, DOM mean = 0.146, diff =
-0.066 (p = 0.0000)
V2: TRB mean = 0.140, DOM mean = 0.241, diff = -0.101
(p = 0.0000)
V3: TRB mean = 0.647, DOM mean = 0.696, diff = -0.049
(p = 0.0000)
Across all three specifications, transboundary dams have lower mean
suitability ratios than domestic dams. The gap is largest for V1 and V2,
which are more sensitive to extreme suitability scores on large rivers.
V3 compresses the distribution by construction (percentile ranks are
bounded between 0 and 1), so the raw gap is smaller, but it remains
significant. This pattern is consistent with the hypothesis that
transboundary dams face political or strategic constraints that lead to
placement at sites that are sub-optimal from a purely engineering
perspective.
3. Suitability Ratio Density Plots
The density plots below visualize the full distribution of
suitability ratios for domestic and transboundary dams. Key patterns to
look for: (1) whether the transboundary distribution is shifted leftward
(toward worse sites), and (2) whether the shapes differ—for instance, a
heavier left tail for transboundary dams would suggest a subset of
particularly poorly-sited dams.
plot_density <- function(data, col, vlabel) {
ggplot(data, aes(x = .data[[col]],
fill = factor(transboundary))) +
geom_density(alpha = 0.6) +
geom_vline(xintercept = 1, linetype = "dashed", color = "black") +
scale_fill_manual(
values = c("0" = "steelblue", "1" = "coral"),
labels = c("Domestic", "Transboundary"),
name = ""
) +
labs(
title = paste("Distribution of Suitability Ratios -", vlabel),
subtitle = "Ratio of 1 = dam at optimal location; lower = worse site quality",
x = "Suitability Ratio (Actual / Optimal)",
y = "Density"
) +
theme_minimal(base_size = 13) +
theme(legend.position = "bottom")
}
V3 (Percentile Ranks)
plot_density(analysis_data, "suitability_ratio_v3",
"V3 (Percentile Ranks)")

V2 (Gradient-weighted)
plot_density(analysis_data, "suitability_ratio_v2",
"V2 (Gradient-weighted)")

V1 (Baseline)
plot_density(analysis_data, "suitability_ratio_v1",
"V1 (Baseline)")

4. Suitability by Dam Purpose
Different dam purposes may have different site selection criteria.
Hydropower dams, for example, are strongly constrained by topography and
river flow, so siting distortions may be more visible. Irrigation dams
may have more flexibility in placement. The bar charts below show mean
suitability ratios by purpose category, separately for domestic and
transboundary dams, with 95% confidence intervals.
plot_purpose <- function(data, col, vlabel) {
pdata <- data %>%
group_by(transboundary, purpose_category) %>%
summarise(
mean_suit = mean(.data[[col]], na.rm = TRUE),
se_suit = sd(.data[[col]], na.rm = TRUE) / sqrt(n()),
n = n(),
.groups = "drop"
) %>%
filter(n >= 5) %>%
mutate(tb_label = ifelse(transboundary == 1,
"Transboundary", "Domestic"))
ggplot(pdata, aes(x = purpose_category, y = mean_suit,
fill = tb_label)) +
geom_col(position = position_dodge(width = 0.8), width = 0.7) +
geom_errorbar(
aes(ymin = mean_suit - 1.96 * se_suit,
ymax = mean_suit + 1.96 * se_suit),
position = position_dodge(width = 0.8), width = 0.3
) +
scale_fill_manual(values = c("Domestic" = "steelblue",
"Transboundary" = "coral")) +
labs(
title = paste("Suitability by Purpose -", vlabel),
subtitle = "Error bars show 95% confidence intervals",
x = "Dam Purpose", y = "Mean Suitability Ratio", fill = ""
) +
theme_minimal(base_size = 13) +
theme(legend.position = "bottom",
axis.text.x = element_text(angle = 45, hjust = 1))
}
V3 (Percentile Ranks)
plot_purpose(analysis_data, "suitability_ratio_v3",
"V3 (Percentile Ranks)")

V2 (Gradient-weighted)
plot_purpose(analysis_data, "suitability_ratio_v2",
"V2 (Gradient-weighted)")

V1 (Baseline)
plot_purpose(analysis_data, "suitability_ratio_v1",
"V1 (Baseline)")

5. Main Regression Results (V2 Baseline)
I present a sequence of increasingly demanding specifications using
V2 (gradient-weighted suitability) as the baseline outcome. The
progression from simple OLS to country fixed effects allows me to assess
how much of the transboundary gap is explained by observable controls,
geographic sorting across continents, and country-level heterogeneity.
Controls include log reservoir capacity, and indicators for hydropower
and irrigation as primary purposes.
model1 <- feols(
suitability_ratio ~ transboundary,
data = analysis_data
)
model2 <- feols(
suitability_ratio ~ transboundary + log_capacity + hydropower + irrigation,
data = analysis_data
)
model3 <- feols(
suitability_ratio ~ transboundary + log_capacity + hydropower + irrigation |
continent + decade,
data = analysis_data,
cluster = ~continent
)
model4 <- feols(
suitability_ratio ~ transboundary + log_capacity + hydropower + irrigation |
admin0 + decade,
data = analysis_data,
cluster = ~admin0
)
etable(model1, model2, model3, model4,
headers = c("Simple", "Controls", "Continent FE", "Country FE"),
title = "Effect of Transboundary Status on Suitability Ratio (V2)")
## model1 model2 model3
## Simple Controls Continent FE
## Dependent Var.: suitability_ratio suitability_ratio suitability_ratio
##
## Constant 0.2408*** (0.0050) 0.1152*** (0.0075)
## transboundary -0.1013*** (0.0068) -0.0910*** (0.0065) -0.0794** (0.0144)
## log_capacity 0.0312*** (0.0017) 0.0318** (0.0055)
## hydropower 0.0561*** (0.0101) 0.0574. (0.0244)
## irrigation 0.0121 (0.0081) -0.0127 (0.0332)
## Fixed-Effects: ------------------- ------------------- ------------------
## continent No No Yes
## decade No No Yes
## admin0 No No No
## _______________ ___________________ ___________________ __________________
## S.E. type IID IID by: continent
## Observations 4,901 4,901 4,901
## R2 0.04288 0.14023 0.18403
## Within R2 -- -- 0.12103
##
## model4
## Country FE
## Dependent Var.: suitability_ratio
##
## Constant
## transboundary -0.0655*** (0.0074)
## log_capacity 0.0307*** (0.0027)
## hydropower 0.0369 (0.0258)
## irrigation -0.0285 (0.0217)
## Fixed-Effects: -------------------
## continent No
## decade Yes
## admin0 Yes
## _______________ ___________________
## S.E. type by: admin0
## Observations 4,901
## R2 0.26802
## Within R2 0.09185
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
b1 <- coef(model1)["transboundary"]
se1 <- se(model1)["transboundary"]
b2 <- coef(model2)["transboundary"]
b3 <- coef(model3)["transboundary"]
p3 <- pvalue(model3)["transboundary"]
b4 <- coef(model4)["transboundary"]
p4 <- pvalue(model4)["transboundary"]
b_cap3 <- coef(model3)["log_capacity"]
b_hydro3 <- coef(model3)["hydropower"]
cat(sprintf(
"The transboundary coefficient is negative and highly significant across all specifications. The simple OLS estimate (%.3f, p < 0.001) shrinks modestly with controls (%.3f) and further with continent and decade fixed effects (%.3f, p = %.3f), but remains significant even with country fixed effects (%.3f, p = %.3f). This last specification compares transboundary and domestic dams *within the same country*, controlling for any country-level differences in engineering capacity, regulatory environment, or river quality. The fact that the penalty persists at the within-country level suggests it is not driven by transboundary dams being concentrated in countries with generally worse dam sites.\n\n",
b1, b2, b3, p3, b4, p4
))
The transboundary coefficient is negative and highly significant
across all specifications. The simple OLS estimate (-0.101, p <
0.001) shrinks modestly with controls (-0.091) and further with
continent and decade fixed effects (-0.079, p = 0.005), but remains
significant even with country fixed effects (-0.065, p = 0.000). This
last specification compares transboundary and domestic dams within
the same country, controlling for any country-level differences in
engineering capacity, regulatory environment, or river quality. The fact
that the penalty persists at the within-country level suggests it is not
driven by transboundary dams being concentrated in countries with
generally worse dam sites.
cat(sprintf(
"The control variables behave as expected: larger dams (higher log capacity) are placed at better sites (%.3f, p < 0.01 across specifications), consistent with larger projects receiving more careful siting. Hydropower dams tend to be at somewhat better sites (%.3f in the continent FE model), consistent with stronger engineering constraints on hydropower siting. The irrigation indicator is small and insignificant.\n",
b_cap3, b_hydro3
))
The control variables behave as expected: larger dams (higher log
capacity) are placed at better sites (0.032, p < 0.01 across
specifications), consistent with larger projects receiving more careful
siting. Hydropower dams tend to be at somewhat better sites (0.057 in
the continent FE model), consistent with stronger engineering
constraints on hydropower siting. The irrigation indicator is small and
insignificant.
6. Heterogeneity by Dam Purpose
I explore whether the transboundary penalty varies by dam purpose.
Hydropower dams are most constrained by physical geography (they need
specific combinations of flow and head), so any political distortion of
siting should be most visible for these dams. Irrigation dams may have
more locational flexibility, potentially diffusing the signal.
Full Sample
hydro_data <- analysis_data %>% filter(hydropower == 1)
irrig_data <- analysis_data %>% filter(irrigation == 1)
model_hydro <- feols(
suitability_ratio ~ transboundary + log_capacity | continent + decade,
data = hydro_data, cluster = ~continent
)
model_irrig <- feols(
suitability_ratio ~ transboundary + log_capacity | continent + decade,
data = irrig_data, cluster = ~continent
)
model_purpose_interact <- feols(
suitability_ratio ~ transboundary * hydropower +
transboundary * irrigation + log_capacity | continent + decade,
data = analysis_data, cluster = ~continent
)
Table 2: Full Sample
etable(model_hydro, model_irrig, model_purpose_interact,
headers = c("Hydropower Only", "Irrigation Only", "Full Sample Interactions"),
title = "Effects by Dam Purpose (Full Sample)")
## model_hydro model_irrig
## Hydropower Only Irrigation Only
## Dependent Var.: suitability_ratio suitability_ratio
##
## transboundary -0.0485* (0.0145) -0.0670 (0.0527)
## log_capacity 0.0219*** (0.0012) 0.0415. (0.0113)
## hydropower
## irrigation
## transboundary x hydropower
## transboundary x irrigation
## Fixed-Effects: ------------------ -----------------
## continent Yes Yes
## decade Yes Yes
## __________________________ __________________ _________________
## S.E.: Clustered by: continent by: continent
## Observations 732 1,054
## R2 0.08431 0.19414
## Within R2 0.03821 0.13432
##
## model_purpose_i..
## Full Sample Interactions
## Dependent Var.: suitability_ratio
##
## transboundary -0.0987* (0.0278)
## log_capacity 0.0316** (0.0056)
## hydropower 0.0316 (0.0295)
## irrigation -0.0417 (0.0618)
## transboundary x hydropower 0.0531. (0.0216)
## transboundary x irrigation 0.0541 (0.0660)
## Fixed-Effects: -----------------
## continent Yes
## decade Yes
## __________________________ _________________
## S.E.: Clustered by: continent
## Observations 4,901
## R2 0.18672
## Within R2 0.12392
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
b_hydro <- coef(model_hydro)["transboundary"]
p_hydro <- pvalue(model_hydro)["transboundary"]
se_hydro <- se(model_hydro)["transboundary"]
n_hydro <- nobs(model_hydro)
b_irrig <- coef(model_irrig)["transboundary"]
p_irrig <- pvalue(model_irrig)["transboundary"]
se_irrig <- se(model_irrig)["transboundary"]
n_irrig <- nobs(model_irrig)
b_interact_base <- coef(model_purpose_interact)["transboundary"]
p_interact_base <- pvalue(model_purpose_interact)["transboundary"]
b_interact_hydro <- coef(model_purpose_interact)["transboundary:hydropower"]
p_interact_hydro <- pvalue(model_purpose_interact)["transboundary:hydropower"]
b_interact_irrig <- coef(model_purpose_interact)["transboundary:irrigation"]
p_interact_irrig <- pvalue(model_purpose_interact)["transboundary:irrigation"]
cat(sprintf(
"The split-sample results show that the transboundary penalty is present across dam types but with different precision. For hydropower dams alone (n = %d), the coefficient is %.3f (SE = %.3f, p = %.3f)---significant and negative. For irrigation dams (n = %d), the point estimate is actually larger in magnitude (%.3f) but not individually significant (SE = %.3f, p = %.3f), likely reflecting the small number of continent clusters rather than absence of an effect.\n\n",
n_hydro, b_hydro, se_hydro, p_hydro,
n_irrig, b_irrig, se_irrig, p_irrig
))
The split-sample results show that the transboundary penalty is
present across dam types but with different precision. For hydropower
dams alone (n = 732), the coefficient is -0.048 (SE = 0.015, p =
0.029)—significant and negative. For irrigation dams (n = 1054), the
point estimate is actually larger in magnitude (-0.067) but not
individually significant (SE = 0.053, p = 0.332), likely reflecting the
small number of continent clusters rather than absence of an effect.
net_hydro <- b_interact_base + b_interact_hydro
net_irrig <- b_interact_base + b_interact_irrig
cat(sprintf(
"The full-sample interaction model reveals a base transboundary penalty of %.3f (p = %.3f) for dams with purposes other than hydropower or irrigation. Both `transboundary x hydropower` (+%.3f, p = %.3f) and `transboundary x irrigation` (+%.3f, p = %.3f) are positive, partially offsetting the base penalty. The net transboundary effect for hydropower dams is approximately %.3f and for irrigation dams %.3f. The hydropower interaction is marginally significant, while the irrigation interaction is not---but both point in the same direction: the transboundary siting distortion may be somewhat smaller for dams whose purpose imposes stronger physical-geographic constraints on location.\n",
b_interact_base, p_interact_base,
b_interact_hydro, p_interact_hydro,
b_interact_irrig, p_interact_irrig,
net_hydro, net_irrig
))
The full-sample interaction model reveals a base transboundary
penalty of -0.099 (p = 0.024) for dams with purposes other than
hydropower or irrigation. Both transboundary x hydropower
(+0.053, p = 0.070) and transboundary x irrigation (+0.054,
p = 0.458) are positive, partially offsetting the base penalty. The net
transboundary effect for hydropower dams is approximately -0.046 and for
irrigation dams -0.045. The hydropower interaction is marginally
significant, while the irrigation interaction is not—but both point in
the same direction: the transboundary siting distortion may be somewhat
smaller for dams whose purpose imposes stronger physical-geographic
constraints on location.
Hydro/Irrigation Subsample
To obtain a cleaner comparison between dam types, I restrict to dams
whose primary purpose is either hydropower or irrigation (n = 1786). In
this subsample, the hydropower indicator distinguishes the
two types directly.
model_hi_base <- feols(
suitability_ratio ~ transboundary + log_capacity + hydropower |
continent + decade,
data = hydro_irrig_data,
cluster = ~continent
)
model_hi_interact <- feols(
suitability_ratio ~ transboundary * hydropower + log_capacity |
continent + decade,
data = hydro_irrig_data,
cluster = ~continent
)
Table 2b: Hydro/Irrigation Only
etable(model_hydro, model_irrig, model_hi_base, model_hi_interact,
headers = c("Hydropower Only", "Irrigation Only",
"H/I Combined", "H/I Interaction"),
title = "Effects by Dam Purpose (Hydro/Irrigation Sample)")
## model_hydro model_irrig
## Hydropower Only Irrigation Only
## Dependent Var.: suitability_ratio suitability_ratio
##
## transboundary -0.0485* (0.0145) -0.0670 (0.0527)
## log_capacity 0.0219*** (0.0012) 0.0415. (0.0113)
## hydropower
## transboundary x hydropower
## Fixed-Effects: ------------------ -----------------
## continent Yes Yes
## decade Yes Yes
## __________________________ __________________ _________________
## S.E.: Clustered by: continent by: continent
## Observations 732 1,054
## R2 0.08431 0.19414
## Within R2 0.03821 0.13432
##
## model_hi_base model_hi_interact
## H/I Combined H/I Interaction
## Dependent Var.: suitability_ratio suitability_ratio
##
## transboundary -0.0579 (0.0326) -0.0615 (0.0466)
## log_capacity 0.0323** (0.0063) 0.0322** (0.0062)
## hydropower 0.0744. (0.0290) 0.0693 (0.0434)
## transboundary x hydropower 0.0093 (0.0437)
## Fixed-Effects: ----------------- -----------------
## continent Yes Yes
## decade Yes Yes
## __________________________ _________________ _________________
## S.E.: Clustered by: continent by: continent
## Observations 1,786 1,786
## R2 0.16371 0.16377
## Within R2 0.13655 0.13662
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
b_hi_base <- coef(model_hi_base)["transboundary"]
p_hi_base <- pvalue(model_hi_base)["transboundary"]
has_hi_interact_coef <- "transboundary:hydropower" %in% names(coef(model_hi_interact))
if (has_hi_interact_coef) {
b_hi_interact <- coef(model_hi_interact)["transboundary:hydropower"]
p_hi_interact <- pvalue(model_hi_interact)["transboundary:hydropower"]
cat(sprintf(
"In the hydro/irrigation subsample, the base transboundary effect is %.3f (p = %.3f). The interaction `transboundary x hydropower` is %.3f (p = %.3f), indicating whether the transboundary penalty differs between hydropower and irrigation dams within this cleaner comparison group.\n",
b_hi_base, p_hi_base, b_hi_interact, p_hi_interact
))
} else {
cat(sprintf(
"In the hydro/irrigation subsample, the transboundary effect is %.3f (p = %.3f).\n",
b_hi_base, p_hi_base
))
}
In the hydro/irrigation subsample, the base transboundary effect is
-0.058 (p = 0.150). The interaction
transboundary x hydropower is 0.009 (p = 0.842), indicating
whether the transboundary penalty differs between hydropower and
irrigation dams within this cleaner comparison group.
7. Position Deviation from Optimal
Position deviation measures the distance (in km along the river)
between where a dam was actually built and where the
suitability-maximizing site would have been. Positive values mean the
dam was placed upstream of the optimal site; negative values mean
downstream. If transboundary dams are systematically displaced from
optimal locations—particularly toward upstream positions where the river
is still within the dam-building country’s territory—this would suggest
strategic positioning motivations.
plot_posdev <- function(data, col, vlabel) {
ggplot(data, aes(x = .data[[col]],
fill = factor(transboundary))) +
geom_density(alpha = 0.6) +
geom_vline(xintercept = 0, linetype = "dashed", color = "black") +
scale_fill_manual(
values = c("0" = "steelblue", "1" = "coral"),
labels = c("Domestic", "Transboundary"),
name = ""
) +
coord_cartesian(xlim = c(-200, 200)) +
labs(
title = paste("Position Deviation from Optimal -", vlabel),
subtitle = "Positive = upstream of optimal; negative = downstream",
x = "Position Deviation (km)", y = "Density"
) +
theme_minimal(base_size = 13) +
theme(legend.position = "bottom")
}
V3 (Percentile Ranks)
plot_posdev(analysis_data, "position_deviation_km_v3",
"V3 (Percentile Ranks)")

V2 (Gradient-weighted)
plot_posdev(analysis_data, "position_deviation_km_v2",
"V2 (Gradient-weighted)")

V1 (Baseline)
plot_posdev(analysis_data, "position_deviation_km_v1",
"V1 (Baseline)")

Table 3: Regressions (Full Sample)
I test whether transboundary status predicts the direction and
magnitude of displacement from the optimal site. The first two models
use position deviation (in km) as a continuous outcome; the last two
model the binary indicator for whether the dam moved downstream. Columns
2 and 4 add interactions with dam purpose to test whether hydropower and
irrigation dams exhibit different displacement patterns from other dam
types.
model_posdev <- feols(
position_deviation_km ~ transboundary + log_capacity + hydropower + irrigation |
continent + decade,
data = analysis_data, cluster = ~continent
)
model_posdev_interact <- feols(
position_deviation_km ~ transboundary * hydropower +
transboundary * irrigation + log_capacity | continent + decade,
data = analysis_data, cluster = ~continent
)
logit_downstream <- feglm(
moved_downstream ~ transboundary + log_capacity + hydropower + irrigation |
continent + decade,
data = analysis_data,
family = binomial(link = "logit")
)
logit_downstream_interact <- feglm(
moved_downstream ~ transboundary * hydropower +
transboundary * irrigation + log_capacity | continent + decade,
data = analysis_data,
family = binomial(link = "logit")
)
etable(model_posdev, model_posdev_interact,
logit_downstream, logit_downstream_interact,
headers = c("Deviation (km)", "Deviation (Interact)",
"Pr(Downstream)", "Pr(Downstream, Interact)"),
title = "Position Deviation from Optimal (V2)")
## model_posdev model_posdev_interact
## Deviation (km) Deviation (Interact)
## Dependent Var.: position_deviation_km position_deviation_km
##
## transboundary 19.50* (4.340) 21.75* (4.828)
## log_capacity -2.993 (1.969) -2.970 (2.029)
## hydropower -16.31* (4.484) -13.58 (6.979)
## irrigation 37.80* (11.32) 41.40 (22.43)
## transboundary x hydropower -5.648 (6.656)
## transboundary x irrigation -6.684 (21.58)
## Fixed-Effects: --------------------- ---------------------
## continent Yes Yes
## decade Yes Yes
## __________________________ _____________________ _____________________
## Family OLS OLS
## S.E.: Clustered by: continent by: continent
## Observations 4,901 4,901
## Squared Cor. 0.09775 0.09816
## Pseudo R2 0.00901 0.00905
## BIC 55,608.6 55,623.3
##
## logit_downstream logit_downstream..
## Pr(Downstream) Pr(Downstream, Interact)
## Dependent Var.: moved_downstream moved_downstream
##
## transboundary -0.1171 (0.1720) -0.2796 (0.1868)
## log_capacity 0.1579 (0.1049) 0.1552 (0.1083)
## hydropower 0.6165*** (0.0675) 0.5677*** (0.1710)
## irrigation -0.5836*** (0.1203) -1.042 (0.7818)
## transboundary x hydropower 0.1383 (0.3708)
## transboundary x irrigation 0.8162 (1.214)
## Fixed-Effects: ------------------- ------------------
## continent Yes Yes
## decade Yes Yes
## __________________________ ___________________ __________________
## Family Logit Logit
## S.E.: Clustered by: continent by: continent
## Observations 4,901 4,901
## Squared Cor. 0.02608 0.02702
## Pseudo R2 0.06709 0.06909
## BIC 1,839.4 1,852.8
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
b_pos <- coef(model_posdev)["transboundary"]
p_pos <- pvalue(model_posdev)["transboundary"]
b_pos_hydro <- coef(model_posdev)["hydropower"]
p_pos_hydro <- pvalue(model_posdev)["hydropower"]
b_pos_irrig <- coef(model_posdev)["irrigation"]
p_pos_irrig <- pvalue(model_posdev)["irrigation"]
b_pos_int_base <- coef(model_posdev_interact)["transboundary"]
b_pos_int_hydro <- coef(model_posdev_interact)["transboundary:hydropower"]
p_pos_int_hydro <- pvalue(model_posdev_interact)["transboundary:hydropower"]
b_pos_int_irrig <- coef(model_posdev_interact)["transboundary:irrigation"]
p_pos_int_irrig <- pvalue(model_posdev_interact)["transboundary:irrigation"]
b_logit <- coef(logit_downstream)["transboundary"]
p_logit <- pvalue(logit_downstream)["transboundary"]
b_logit_hydro <- coef(logit_downstream)["hydropower"]
b_logit_irrig <- coef(logit_downstream)["irrigation"]
cat("**Position deviation (km):**\n\n")
Position deviation (km):
cat(sprintf(
"Transboundary dams are placed approximately %.1f km **%s** of their optimal site (p = %.3f). %s",
abs(b_pos), ifelse(b_pos > 0, "upstream", "downstream"), p_pos,
ifelse(b_pos > 0,
"This is the key result---transboundary dams are systematically shifted upstream, consistent with capturing water before it crosses the border. ",
"")
))
Transboundary dams are placed approximately 19.5 km
upstream of their optimal site (p = 0.011). This is the
key result—transboundary dams are systematically shifted upstream,
consistent with capturing water before it crosses the border.
cat(sprintf(
"The control variables show sensible patterns: hydropower dams sit %.1f km downstream of optimal (p = %.3f), consistent with hydropower benefiting from cumulative flow further downstream. Irrigation dams sit %.1f km upstream of optimal (p = %.3f), consistent with diverting water at higher elevations for gravity-fed distribution.\n\n",
abs(b_pos_hydro), p_pos_hydro,
abs(b_pos_irrig), p_pos_irrig
))
The control variables show sensible patterns: hydropower dams sit
16.3 km downstream of optimal (p = 0.022), consistent with hydropower
benefiting from cumulative flow further downstream. Irrigation dams sit
37.8 km upstream of optimal (p = 0.029), consistent with diverting water
at higher elevations for gravity-fed distribution.
cat(sprintf(
"**Interactions with purpose:** The interaction terms (`transboundary x hydropower` = %.1f, p = %.3f; `transboundary x irrigation` = %.1f, p = %.3f) are small and insignificant. This means the transboundary upstream shift is roughly the **same magnitude regardless of dam purpose**---it does not differ meaningfully between hydropower, irrigation, and other dams.\n\n",
b_pos_int_hydro, p_pos_int_hydro,
b_pos_int_irrig, p_pos_int_irrig
))
Interactions with purpose: The interaction terms
(transboundary x hydropower = -5.6, p = 0.444;
transboundary x irrigation = -6.7, p = 0.772) are small and
insignificant. This means the transboundary upstream shift is roughly
the same magnitude regardless of dam purpose—it does
not differ meaningfully between hydropower, irrigation, and other
dams.
cat(sprintf(
"**Logit (Pr(Downstream)):** The transboundary coefficient is %.3f but not significant (p = %.3f), so there is no strong evidence that transboundary status shifts the binary probability of being downstream of optimal. The purpose controls are highly significant: hydropower dams are more likely to move downstream (%.3f, p < 0.001) while irrigation dams are less likely (%.3f, p < 0.001). Again, the interactions add nothing.\n",
b_logit, p_logit, b_logit_hydro, b_logit_irrig
))
Logit (Pr(Downstream)): The transboundary
coefficient is -0.117 but not significant (p = 0.496), so there is no
strong evidence that transboundary status shifts the binary probability
of being downstream of optimal. The purpose controls are highly
significant: hydropower dams are more likely to move downstream (0.616,
p < 0.001) while irrigation dams are less likely (-0.584, p <
0.001). Again, the interactions add nothing.
Table 3b: Hydro/Irrigation Subsample
model_posdev_hi <- feols(
position_deviation_km ~ transboundary * hydropower + log_capacity |
continent + decade,
data = hydro_irrig_data,
cluster = ~continent
)
etable(model_posdev_hi,
title = "Position Deviation: Hydro vs Irrigation (H/I Sample)",
headers = c("H/I Interact"))
## model_posdev_hi
## H/I Interact
## Dependent Var.: position_deviation_km
##
## transboundary 20.78 (19.40)
## hydropower -59.03* (13.06)
## log_capacity -0.8538 (0.8109)
## transboundary x hydropower -1.782 (16.05)
## Fixed-Effects: ---------------------
## continent Yes
## decade Yes
## __________________________ _____________________
## S.E.: Clustered by: continent
## Observations 1,786
## R2 0.15757
## Within R2 0.13650
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
b_pos_hi <- coef(model_posdev_hi)["transboundary"]
p_pos_hi <- pvalue(model_posdev_hi)["transboundary"]
has_pos_hi_interact <- "transboundary:hydropower" %in% names(coef(model_posdev_hi))
if (has_pos_hi_interact) {
b_pos_hi_int <- coef(model_posdev_hi)["transboundary:hydropower"]
p_pos_hi_int <- pvalue(model_posdev_hi)["transboundary:hydropower"]
cat(sprintf(
"Within the hydro/irrigation subsample, the base transboundary effect on position deviation is %.2f km (p = %.3f). The `transboundary x hydropower` interaction is %.2f km (p = %.3f), indicating whether hydropower dams are displaced differently from irrigation dams.\n",
b_pos_hi, p_pos_hi, b_pos_hi_int, p_pos_hi_int
))
} else {
cat(sprintf(
"Within the hydro/irrigation subsample, the transboundary effect on position deviation is %.2f km (p = %.3f).\n",
b_pos_hi, p_pos_hi
))
}
Within the hydro/irrigation subsample, the base transboundary effect
on position deviation is 20.78 km (p = 0.344). The
transboundary x hydropower interaction is -1.78 km (p =
0.917), indicating whether hydropower dams are displaced differently
from irrigation dams.
8. Transboundary Dams: Suitability vs Border
Distance
For the subset of transboundary dams where I can measure along-river
distance to the nearest international border crossing, I examine whether
proximity to the border is associated with worse site quality. If the
transboundary suitability penalty is driven by geographic constraints
near borders (e.g., narrow valleys, unfavorable terrain at boundary
locations), I would expect dams closer to the border to have
systematically lower suitability. If instead the penalty reflects
broader political-strategic considerations, the relationship between
border distance and suitability may be weak or absent.
plot_border <- function(data, col, vlabel) {
ggplot(data, aes(x = dam_dist_border, y = .data[[col]])) +
geom_point(alpha = 0.5, color = "coral") +
geom_smooth(method = "lm", se = TRUE, color = "darkred") +
labs(
title = paste("TB Dams: Suitability vs Border Distance -", vlabel),
subtitle = "Along-river distance to nearest border crossing",
x = "Distance to Border (km, along river)",
y = "Suitability Ratio"
) +
theme_minimal(base_size = 13)
}
V3 (Percentile Ranks)
plot_border(tb_data, "suitability_ratio_v3", "V3 (Percentile Ranks)")

V2 (Gradient-weighted)
plot_border(tb_data, "suitability_ratio_v2", "V2 (Gradient-weighted)")

V1 (Baseline)
plot_border(tb_data, "suitability_ratio_v1", "V1 (Baseline)")

Regressions
tb_data <- tb_data %>%
mutate(
log_dam_dist_border = log(dam_dist_border + 1),
border_dist_bin = cut(dam_dist_border,
breaks = c(0, 50, 100, 200, 500, Inf),
labels = c("<50km", "50-100km", "100-200km",
"200-500km", ">500km"))
)
model_tb_lin <- feols(
suitability_ratio ~ dam_dist_border + log_capacity +
hydropower + irrigation | continent + decade,
data = tb_data, cluster = ~continent
)
model_tb_log <- feols(
suitability_ratio ~ log_dam_dist_border + log_capacity +
hydropower + irrigation | continent + decade,
data = tb_data, cluster = ~continent
)
model_tb_bins <- feols(
suitability_ratio ~ border_dist_bin + log_capacity +
hydropower + irrigation | continent + decade,
data = tb_data, cluster = ~continent
)
etable(model_tb_lin, model_tb_log, model_tb_bins,
headers = c("Linear", "Log Distance", "Distance Bins"),
title = "Transboundary Dams: Suitability vs Along-River Border Distance")
## model_tb_lin model_tb_log model_tb_bins
## Linear Log Distance Distance Bins
## Dependent Var.: suitability_ratio suitability_ratio suitability_ratio
##
## dam_dist_border -8.34e-6 (4.26e-5)
## log_capacity 0.0367* (0.0073) 0.0366* (0.0073) 0.0381* (0.0078)
## hydropower 0.0672** (0.0095) 0.0658** (0.0100) 0.0694* (0.0212)
## irrigation -0.0195 (0.0259) -0.0201 (0.0235) -0.0131 (0.0264)
## log_dam_dist_border 0.0008 (0.0007)
## border_dist_bin50-100km -0.0161 (0.0301)
## border_dist_bin100-200km -0.0197 (0.0127)
## border_dist_bin200-500km -0.0252 (0.0163)
## border_dist_bin>500km -0.0213 (0.0415)
## Fixed-Effects: ------------------ ----------------- -----------------
## continent Yes Yes Yes
## decade Yes Yes Yes
## ________________________ __________________ _________________ _________________
## S.E.: Clustered by: continent by: continent by: continent
## Observations 745 745 712
## R2 0.19433 0.19431 0.19271
## Within R2 0.17247 0.17245 0.17301
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
b_lin <- coef(model_tb_lin)["dam_dist_border"]
p_lin <- pvalue(model_tb_lin)["dam_dist_border"]
b_log <- coef(model_tb_log)["log_dam_dist_border"]
p_log <- pvalue(model_tb_log)["log_dam_dist_border"]
cat(sprintf(
"The linear distance coefficient is %.4f (p = %.3f), and the log specification yields %.4f (p = %.3f). ",
b_lin, p_lin, b_log, p_log
))
The linear distance coefficient is -0.0000 (p = 0.857), and the log
specification yields 0.0008 (p = 0.334).
cat("None of the border distance specifications produce significant coefficients, and the binned specification shows no monotonic pattern across distance categories. This null result is informative: the transboundary suitability penalty is not concentrated among dams near the border. Instead, it appears to be a broad feature of transboundary dam siting---consistent with political-strategic considerations that affect project selection and approval processes rather than purely geographic constraints at border locations.\n")
None of the border distance specifications produce significant
coefficients, and the binned specification shows no monotonic pattern
across distance categories. This null result is informative: the
transboundary suitability penalty is not concentrated among dams near
the border. Instead, it appears to be a broad feature of transboundary
dam siting—consistent with political-strategic considerations that
affect project selection and approval processes rather than purely
geographic constraints at border locations.
9. Sensitivity: Transboundary Coefficient Across Specifications
A key concern is whether the transboundary suitability gap is an
artifact of the suitability index construction. To address this, I run
the same regression model (continent and decade fixed effects, standard
errors clustered by continent) using each of the three suitability
specifications as the dependent variable. If the transboundary
coefficient is consistently negative across specifications with very
different functional forms, this increases confidence that the result
reflects a real phenomenon rather than a measurement artifact.
Note that the V3 (percentile) coefficient is expected to be smaller
in magnitude since percentile ranks mechanically compress the scale. The
relevant comparison is the sign, significance, and consistency of the
coefficient across specifications.
sens_models <- list()
sens_coefs <- list()
for (v in versions) {
col <- paste0("suitability_ratio_", v)
fml <- as.formula(paste(col,
"~ transboundary + log_capacity + hydropower + irrigation",
"| continent + decade"))
m <- feols(fml, data = analysis_data, cluster = ~continent)
sens_models[[v]] <- m
sens_coefs[[v]] <- tibble(
spec = v,
coef = coef(m)["transboundary"],
se = se(m)["transboundary"],
pval = pvalue(m)["transboundary"]
)
}
sens_df <- bind_rows(sens_coefs) %>%
mutate(
spec_label = case_when(
spec == "v1" ~ "V1 (Baseline)",
spec == "v2" ~ "V2 (Gradient)",
spec == "v3" ~ "V3 (Percentile)"
),
sig = case_when(
pval < 0.01 ~ "***",
pval < 0.05 ~ "**",
pval < 0.10 ~ "*",
TRUE ~ ""
)
)
sens_df %>%
select(Specification = spec_label,
Coefficient = coef, SE = se,
`p-value` = pval, Sig = sig) %>%
kbl(digits = 4,
caption = "Transboundary Coefficient Across Suitability Specifications") %>%
kable_styling(bootstrap_options = c("striped", "hover"),
full_width = FALSE)
Transboundary Coefficient Across Suitability Specifications
|
Specification
|
Coefficient
|
SE
|
p-value
|
Sig
|
|
V1 (Baseline)
|
-0.0527
|
0.0141
|
0.0202
|
**
|
|
V2 (Gradient)
|
-0.0794
|
0.0144
|
0.0053
|
***
|
|
V3 (Percentile)
|
-0.0275
|
0.0107
|
0.0616
|
|
ggplot(sens_df, aes(x = spec_label, y = coef)) +
geom_point(size = 4, color = "coral") +
geom_errorbar(aes(ymin = coef - 1.96 * se,
ymax = coef + 1.96 * se),
width = 0.2, color = "coral") +
geom_hline(yintercept = 0, linetype = "dashed", color = "gray50") +
labs(
title = "Transboundary Effect Across Suitability Specifications",
subtitle = "95% CI; negative = lower suitability for transboundary dams",
x = "Specification",
y = "Coefficient on Transboundary"
) +
theme_minimal(base_size = 13)

all_neg <- all(sens_df$coef < 0)
cat(sprintf(
"The transboundary coefficient is %s across all three specifications. ",
ifelse(all_neg, "negative", "not consistently negative")
))
The transboundary coefficient is negative across all three
specifications.
v2_row <- sens_df %>% filter(spec == "v2")
v3_row <- sens_df %>% filter(spec == "v3")
cat(sprintf(
"The V2 gradient-weighted specification shows an effect of %.3f (p = %.3f), while V3 percentile ranks show %.3f (p = %.3f). The consistency across specifications with very different distributional properties supports the interpretation that transboundary dams are genuinely placed at less suitable sites.\n",
v2_row$coef, v2_row$pval, v3_row$coef, v3_row$pval
))
The V2 gradient-weighted specification shows an effect of -0.079 (p =
0.005), while V3 percentile ranks show -0.028 (p = 0.062). The
consistency across specifications with very different distributional
properties supports the interpretation that transboundary dams are
genuinely placed at less suitable sites.
10. Constrained (Same-Country) Suitability
Analysis
A natural concern is that the transboundary suitability penalty is
mechanical: perhaps the best engineering site within +/-200 km happens
to be across the border, and the country simply cannot build there. To
address this, I construct a constrained suitability
ratio (\(SR_c\)) that compares
each dam’s actual site only to the best available site within the
dam’s own country. By construction, \(SR_c \geq SR_u\) since the constrained
optimal is weakly worse than the unconstrained optimal.
If country borders explain the penalty, \(SR_c\) should be much closer to 1 than
\(SR_u\), and the transboundary
coefficient on \(SR_c\) should
attenuate substantially. If instead the penalty reflects within-country
choices, the attenuation should be small.
# Constrained columns available -- proceed with analysis
constrained_versions <- c("v1", "v2", "v3")[c(has_constrained_v1, TRUE, has_constrained_v3)]
Descriptive Comparison
constrained_summary <- analysis_data %>%
filter(!is.na(suitability_ratio_c), !is.na(optimal_outside_country)) %>%
group_by(transboundary) %>%
summarise(
n = n(),
mean_sr_u = mean(suitability_ratio, na.rm = TRUE),
mean_sr_c = mean(suitability_ratio_c, na.rm = TRUE),
median_sr_u = median(suitability_ratio, na.rm = TRUE),
median_sr_c = median(suitability_ratio_c, na.rm = TRUE),
pct_optimal_outside = mean(optimal_outside_country, na.rm = TRUE) * 100,
.groups = "drop"
) %>%
mutate(
tb_label = ifelse(transboundary == 1, "Transboundary", "Domestic"),
sr_gap = mean_sr_c - mean_sr_u
)
constrained_summary %>%
select(Status = tb_label, n,
`Mean SR_u` = mean_sr_u, `Mean SR_c` = mean_sr_c,
`Median SR_u` = median_sr_u, `Median SR_c` = median_sr_c,
`Gap (SR_c - SR_u)` = sr_gap,
`% Optimal Outside` = pct_optimal_outside) %>%
kbl(digits = 3,
caption = "Unconstrained vs Constrained Suitability Ratios (V2)") %>%
kable_styling(bootstrap_options = c("striped", "hover"),
full_width = FALSE)
Unconstrained vs Constrained Suitability Ratios (V2)
|
Status
|
n
|
Mean SR_u
|
Mean SR_c
|
Median SR_u
|
Median SR_c
|
Gap (SR_c - SR_u)
|
% Optimal Outside
|
|
Domestic
|
2266
|
0.241
|
0.244
|
0.110
|
0.111
|
0.003
|
0.000
|
|
Transboundary
|
2634
|
0.140
|
0.152
|
0.057
|
0.059
|
0.013
|
9.377
|
forced_domestic <- analysis_data %>%
filter(optimal_outside_country == TRUE, !is.na(suitability_ratio_c))
if (nrow(forced_domestic) > 0) {
cat(sprintf(
"Among dams whose unconstrained optimal is outside the country (n = %d): mean $SR_u$ = %.3f, mean $SR_c$ = %.3f, mean gap = %.3f. These are dams 'forced' to use a domestic site that is worse than the cross-border alternative.\n",
nrow(forced_domestic),
mean(forced_domestic$suitability_ratio, na.rm = TRUE),
mean(forced_domestic$suitability_ratio_c, na.rm = TRUE),
mean(forced_domestic$suitability_ratio_c - forced_domestic$suitability_ratio, na.rm = TRUE)
))
}
Among dams whose unconstrained optimal is outside the country (n =
247): mean \(SR_u\) = 0.175, mean \(SR_c\) = 0.308, mean gap = 0.133. These are
dams ‘forced’ to use a domestic site that is worse than the cross-border
alternative.
% Optimal Outside Country
p_outside_data <- analysis_data %>%
filter(!is.na(suitability_ratio_c), !is.na(optimal_outside_country)) %>%
group_by(transboundary) %>%
summarise(
n = n(),
pct_outside = mean(optimal_outside_country, na.rm = TRUE) * 100,
.groups = "drop"
) %>%
mutate(label = ifelse(transboundary == 1, "Transboundary", "Domestic"))
ggplot(p_outside_data, aes(x = label, y = pct_outside, fill = label)) +
geom_col(width = 0.6) +
geom_text(aes(label = sprintf("%.1f%%", pct_outside)),
vjust = -0.5, size = 4) +
scale_fill_manual(values = c("Domestic" = "steelblue",
"Transboundary" = "coral")) +
labs(
title = "Percentage of Dams with Optimal Location Outside Country",
subtitle = "i.e., country-constrained optimal differs from unconstrained optimal",
x = "", y = "% of Dams"
) +
theme_minimal(base_size = 13) +
theme(legend.position = "none") +
ylim(0, max(p_outside_data$pct_outside) * 1.3)

For domestic dams, the unconstrained and constrained optimals are
virtually always the same (candidates rarely cross borders). For
transboundary dams, only a modest share have their best site outside the
country. This immediately suggests that country borders are not
the primary driver of the suitability penalty.
Constrained Density Plots
plot_constrained_density <- function(data, col_c, col_u, vlabel) {
plot_data <- data %>%
filter(!is.na(.data[[col_c]])) %>%
select(transboundary, SR_u = !!sym(col_u), SR_c = !!sym(col_c)) %>%
pivot_longer(cols = c(SR_u, SR_c),
names_to = "measure", values_to = "ratio")
ggplot(plot_data, aes(x = ratio,
fill = factor(transboundary),
linetype = measure)) +
geom_density(alpha = 0.3) +
geom_vline(xintercept = 1, linetype = "dashed", color = "black") +
scale_fill_manual(
values = c("0" = "steelblue", "1" = "coral"),
labels = c("Domestic", "Transboundary"),
name = ""
) +
scale_linetype_manual(
values = c("SR_u" = "solid", "SR_c" = "dashed"),
labels = c(expression(SR[u]~"(unconstrained)"),
expression(SR[c]~"(same country)")),
name = ""
) +
labs(
title = paste("Unconstrained vs Constrained Suitability -", vlabel),
subtitle = "Solid = unconstrained; Dashed = constrained to same country",
x = "Suitability Ratio", y = "Density"
) +
theme_minimal(base_size = 13) +
theme(legend.position = "bottom")
}
V3 (Percentile Ranks)
plot_constrained_density(analysis_data,
"suitability_ratio_c_v3", "suitability_ratio_v3",
"V3 (Percentile Ranks)")

V2 (Gradient-weighted)
plot_constrained_density(analysis_data,
"suitability_ratio_c", "suitability_ratio",
"V2 (Gradient-weighted)")

V1 (Baseline)
plot_constrained_density(analysis_data,
"suitability_ratio_c_v1", "suitability_ratio_v1",
"V1 (Baseline)")

Constrained SR Density (Standalone)
p6_data <- analysis_data %>%
filter(!is.na(suitability_ratio_c), !is.na(optimal_outside_country))
ggplot(p6_data, aes(x = suitability_ratio_c,
fill = factor(transboundary))) +
geom_density(alpha = 0.6) +
geom_vline(xintercept = 1, linetype = "dashed", color = "black") +
scale_fill_manual(
values = c("0" = "steelblue", "1" = "coral"),
labels = c("Domestic", "Transboundary"),
name = ""
) +
labs(
title = "Constrained Suitability Ratio (Same-Country Optimal)",
subtitle = "Ratio of 1 = dam at best location within its own country",
x = expression(SR[c] ~ "(Actual / Optimal in same country)"),
y = "Density"
) +
theme_minimal(base_size = 13) +
theme(legend.position = "bottom")

SR_u vs SR_c Scatter
ggplot(p6_data, aes(x = suitability_ratio, y = suitability_ratio_c,
color = factor(transboundary))) +
geom_point(alpha = 0.3, size = 1.5) +
geom_abline(intercept = 0, slope = 1, linetype = "dashed", color = "gray40") +
scale_color_manual(
values = c("0" = "steelblue", "1" = "coral"),
labels = c("Domestic", "Transboundary"),
name = ""
) +
labs(
title = "Unconstrained vs Constrained Suitability Ratio",
subtitle = "Points above diagonal = constrained optimal worse than unconstrained (optimal outside country)",
x = expression(SR[u] ~ "(actual / optimal anywhere)"),
y = expression(SR[c] ~ "(actual / optimal in same country)")
) +
coord_equal(xlim = c(0, 1.05), ylim = c(0, 1.05)) +
theme_minimal(base_size = 13) +
theme(legend.position = "bottom")

Most points lie on the 45-degree line, meaning the unconstrained and
constrained optimals are the same segment. Points above the diagonal are
dams where the best site is outside the country; these are predominantly
transboundary dams (orange).
Constrained Regressions (V2)
model_c1 <- feols(
suitability_ratio_c ~ transboundary,
data = analysis_data
)
model_c2 <- feols(
suitability_ratio_c ~ transboundary + log_capacity + hydropower + irrigation,
data = analysis_data
)
model_c3 <- feols(
suitability_ratio_c ~ transboundary + log_capacity + hydropower + irrigation |
continent + decade,
data = analysis_data,
cluster = ~continent
)
model_c4 <- feols(
suitability_ratio_c ~ transboundary + log_capacity + hydropower + irrigation |
admin0 + decade,
data = analysis_data,
cluster = ~admin0
)
etable(model_c1, model_c2, model_c3, model_c4,
headers = c("Simple", "Controls", "Continent FE", "Country FE"),
title = "Effect of Transboundary Status on Constrained Suitability Ratio (SR_c)")
## model_c1 model_c2 model_c3
## Simple Controls Continent FE
## Dependent Var.: suitability_ratio_c suitability_ratio_c suitability_ratio_c
##
## Constant 0.2437*** (0.0052) 0.1201*** (0.0079)
## transboundary -0.0914*** (0.0071) -0.0816*** (0.0068) -0.0670** (0.0138)
## log_capacity 0.0301*** (0.0017) 0.0302** (0.0064)
## hydropower 0.0673*** (0.0106) 0.0669. (0.0250)
## irrigation 0.0161. (0.0085) -0.0137 (0.0330)
## Fixed-Effects: ------------------- ------------------- -------------------
## continent No No Yes
## decade No No Yes
## admin0 No No No
## _______________ ___________________ ___________________ ___________________
## S.E. type IID IID by: continent
## Observations 4,901 4,901 4,901
## R2 0.03240 0.12264 0.16998
## Within R2 -- -- 0.10300
##
## model_c4
## Country FE
## Dependent Var.: suitability_ratio_c
##
## Constant
## transboundary -0.0534*** (0.0089)
## log_capacity 0.0293*** (0.0031)
## hydropower 0.0421 (0.0260)
## irrigation -0.0294 (0.0215)
## Fixed-Effects: -------------------
## continent No
## decade Yes
## admin0 Yes
## _______________ ___________________
## S.E. type by: admin0
## Observations 4,901
## R2 0.25465
## Within R2 0.07633
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
cat("**Side-by-side comparison: Unconstrained vs Constrained**\n\n")
## **Side-by-side comparison: Unconstrained vs Constrained**
etable(model3, model_c3, model4, model_c4,
headers = c("SR_u (Cont. FE)", "SR_c (Cont. FE)",
"SR_u (Country FE)", "SR_c (Country FE)"),
title = "Unconstrained vs Constrained Suitability Ratio")
## model3 model_c3 model4
## SR_u (Cont. FE) SR_c (Cont. FE) SR_u (Country FE)
## Dependent Var.: suitability_ratio suitability_ratio_c suitability_ratio
##
## transboundary -0.0794** (0.0144) -0.0670** (0.0138) -0.0655*** (0.0074)
## log_capacity 0.0318** (0.0055) 0.0302** (0.0064) 0.0307*** (0.0027)
## hydropower 0.0574. (0.0244) 0.0669. (0.0250) 0.0369 (0.0258)
## irrigation -0.0127 (0.0332) -0.0137 (0.0330) -0.0285 (0.0217)
## Fixed-Effects: ------------------ ------------------- -------------------
## continent Yes Yes No
## decade Yes Yes Yes
## admin0 No No Yes
## _______________ __________________ ___________________ ___________________
## S.E.: Clustered by: continent by: continent by: admin0
## Observations 4,901 4,901 4,901
## R2 0.18403 0.16998 0.26802
## Within R2 0.12103 0.10300 0.09185
##
## model_c4
## SR_c (Country FE)
## Dependent Var.: suitability_ratio_c
##
## transboundary -0.0534*** (0.0089)
## log_capacity 0.0293*** (0.0031)
## hydropower 0.0421 (0.0260)
## irrigation -0.0294 (0.0215)
## Fixed-Effects: -------------------
## continent No
## decade Yes
## admin0 Yes
## _______________ ___________________
## S.E.: Clustered by: admin0
## Observations 4,901
## R2 0.25465
## Within R2 0.07633
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
coef_u <- coef(model3)["transboundary"]
coef_c <- coef(model_c3)["transboundary"]
p_c3 <- pvalue(model_c3)["transboundary"]
attenuation <- (1 - coef_c / coef_u) * 100
coef_u4 <- coef(model4)["transboundary"]
coef_c4 <- coef(model_c4)["transboundary"]
p_c4 <- pvalue(model_c4)["transboundary"]
attenuation4 <- (1 - coef_c4 / coef_u4) * 100
cat(sprintf(
"The constrained regressions mirror the unconstrained results closely. The transboundary coefficient on $SR_c$ is negative and significant across all four specifications, following the same attenuation pattern as $SR_u$ when moving from simple OLS to country fixed effects.\n\n"
))
The constrained regressions mirror the unconstrained results closely.
The transboundary coefficient on \(SR_c\) is negative and significant across
all four specifications, following the same attenuation pattern as \(SR_u\) when moving from simple OLS to
country fixed effects.
cat(sprintf(
"**With continent + decade FE:** The transboundary coefficient attenuates from %.4f ($SR_u$) to %.4f ($SR_c$, p = %.3f), a **%.1f%% reduction**. This means approximately **%.0f%% of the siting penalty occurs within the dam's own country**, not due to border constraints.\n\n",
coef_u, coef_c, p_c3, attenuation, 100 - attenuation
))
With continent + decade FE: The transboundary
coefficient attenuates from -0.0794 (\(SR_u\)) to -0.0670 (\(SR_c\), p = 0.008), a 15.6%
reduction. This means approximately 84% of the siting
penalty occurs within the dam’s own country, not due to border
constraints.
cat(sprintf(
"**With country FE:** The coefficient moves from %.4f ($SR_u$) to %.4f ($SR_c$, p = %.3f), an attenuation of **%.1f%%**. Even in the most demanding specification---comparing transboundary and domestic dams within the same country, using only same-country alternatives as the benchmark---the penalty remains large and highly significant. This rules out the mechanical explanation that transboundary dams look worse simply because the best site is across the border.\n",
coef_u4, coef_c4, p_c4, attenuation4
))
With country FE: The coefficient moves from -0.0655
(\(SR_u\)) to -0.0535 (\(SR_c\), p = 0.000), an attenuation of
18.3%. Even in the most demanding
specification—comparing transboundary and domestic dams within the same
country, using only same-country alternatives as the benchmark—the
penalty remains large and highly significant. This rules out the
mechanical explanation that transboundary dams look worse simply because
the best site is across the border.
Optimal Outside Country (LPM)
model_outside <- feols(
optimal_outside_country ~ transboundary + log_capacity + hydropower + irrigation |
continent + decade,
data = analysis_data %>% filter(!is.na(optimal_outside_country)),
cluster = ~continent
)
etable(model_outside,
headers = "Pr(Optimal Outside Country)",
title = "Does Transboundary Status Predict Optimal Being Outside Country?")
## model_outside
## Pr(Optimal Outside Country)
## Dependent Var.: optimal_outside_country
##
## transboundary 0.1053 (0.0639)
## log_capacity -0.0096 (0.0053)
## hydropower 0.0455* (0.0153)
## irrigation 0.0126 (0.0141)
## Fixed-Effects: -----------------------
## continent Yes
## decade Yes
## _______________ _______________________
## S.E.: Clustered by: continent
## Observations 4,900
## R2 0.10226
## Within R2 0.06362
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Sensitivity: Constrained Coefficient Across Specifications
c_sens_models <- list()
c_sens_coefs <- list()
for (v in constrained_versions) {
if (v == "v2") {
col_c <- "suitability_ratio_c"
col_u <- "suitability_ratio"
} else {
col_c <- paste0("suitability_ratio_c_", v)
col_u <- paste0("suitability_ratio_", v)
}
if (!col_c %in% names(analysis_data)) next
fml_u <- as.formula(paste(col_u,
"~ transboundary + log_capacity + hydropower + irrigation | continent + decade"))
fml_c <- as.formula(paste(col_c,
"~ transboundary + log_capacity + hydropower + irrigation | continent + decade"))
m_u <- feols(fml_u, data = analysis_data, cluster = ~continent)
m_c <- feols(fml_c, data = analysis_data, cluster = ~continent)
c_sens_coefs[[v]] <- tibble(
spec = v,
coef_u = coef(m_u)["transboundary"],
se_u = se(m_u)["transboundary"],
coef_c = coef(m_c)["transboundary"],
se_c = se(m_c)["transboundary"],
attenuation_pct = (1 - coef(m_c)["transboundary"] /
coef(m_u)["transboundary"]) * 100
)
}
c_sens_df <- bind_rows(c_sens_coefs) %>%
mutate(spec_label = case_when(
spec == "v1" ~ "V1 (Baseline)",
spec == "v2" ~ "V2 (Gradient)",
spec == "v3" ~ "V3 (Percentile)"
))
c_sens_df %>%
select(Specification = spec_label,
`Coef (SR_u)` = coef_u, `SE (SR_u)` = se_u,
`Coef (SR_c)` = coef_c, `SE (SR_c)` = se_c,
`Attenuation %` = attenuation_pct) %>%
kbl(digits = 4,
caption = "Transboundary Coefficient: Unconstrained vs Constrained Across Specs") %>%
kable_styling(bootstrap_options = c("striped", "hover"),
full_width = FALSE)
Transboundary Coefficient: Unconstrained vs Constrained Across Specs
|
Specification
|
Coef (SR_u)
|
SE (SR_u)
|
Coef (SR_c)
|
SE (SR_c)
|
Attenuation %
|
|
V1 (Baseline)
|
-0.0527
|
0.0141
|
-0.0411
|
0.0136
|
21.9403
|
|
V2 (Gradient)
|
-0.0794
|
0.0144
|
-0.0670
|
0.0138
|
15.5689
|
|
V3 (Percentile)
|
-0.0275
|
0.0107
|
-0.0174
|
0.0147
|
36.9104
|
c_plot_df <- c_sens_df %>%
select(spec_label, coef_u, se_u, coef_c, se_c) %>%
pivot_longer(
cols = c(coef_u, coef_c),
names_to = "type", values_to = "coef"
) %>%
mutate(
se = ifelse(type == "coef_u",
c_sens_df$se_u[match(spec_label, c_sens_df$spec_label)],
c_sens_df$se_c[match(spec_label, c_sens_df$spec_label)]),
type_label = ifelse(type == "coef_u",
"Unconstrained (SR_u)", "Constrained (SR_c)")
)
ggplot(c_plot_df, aes(x = spec_label, y = coef,
color = type_label, shape = type_label)) +
geom_point(size = 4, position = position_dodge(width = 0.4)) +
geom_errorbar(aes(ymin = coef - 1.96 * se,
ymax = coef + 1.96 * se),
width = 0.2,
position = position_dodge(width = 0.4)) +
geom_hline(yintercept = 0, linetype = "dashed", color = "gray50") +
scale_color_manual(values = c("Unconstrained (SR_u)" = "coral",
"Constrained (SR_c)" = "steelblue")) +
labs(
title = "Transboundary Coefficient: Unconstrained vs Constrained",
subtitle = "95% CI; attenuation from SR_u to SR_c measures role of country borders",
x = "Suitability Specification",
y = "Coefficient on Transboundary",
color = "", shape = ""
) +
theme_minimal(base_size = 13) +
theme(legend.position = "bottom")

Across all specifications, the constrained coefficient (\(SR_c\)) is only modestly smaller than the
unconstrained coefficient (\(SR_u\)).
The consistency of the small attenuation across V1, V2, and V3
reinforces the conclusion that country borders explain only a minor
fraction of the transboundary siting penalty.
11. Robustness Checks
I conduct three robustness exercises: (1) using the suitability
percentile (within-river-system rank) as the dependent variable instead
of the ratio; (2) restricting to dams with at least 10 candidate
segments (ensuring meaningful comparison sets); and (3) restricting to
post-1970 dams (where data quality is higher and the international water
governance regime was more established).
model_pctl <- feols(
suitability_percentile ~ transboundary + log_capacity +
hydropower + irrigation | continent + decade,
data = analysis_data, cluster = ~continent
)
model_high_qual <- feols(
suitability_ratio ~ transboundary + log_capacity +
hydropower + irrigation | continent + decade,
data = analysis_data %>% filter(n_candidates >= 10),
cluster = ~continent
)
model_post1970 <- feols(
suitability_ratio ~ transboundary + log_capacity +
hydropower + irrigation | continent + decade,
data = analysis_data %>% filter(decade >= 1970),
cluster = ~continent
)
etable(model_pctl, model_high_qual, model_post1970,
headers = c("Suitability Pctl", ">=10 Candidates", "Post-1970"),
title = "Robustness Checks (V2)")
## model_pctl model_high_qual model_post1970
## Suitability Pctl >=10 Candidates Post-1970
## Dependent Var.: suitability_percentile suitability_ratio suitability_ratio
##
## transboundary -0.0698** (0.0145) -0.0522* (0.0119) -0.0987** (0.0181)
## log_capacity 0.0638** (0.0099) 0.0368*** (0.0034) 0.0282* (0.0072)
## hydropower 0.1147** (0.0222) 0.0626* (0.0219) 0.0430 (0.0301)
## irrigation -0.1817* (0.0484) -0.0012 (0.0223) -0.0121 (0.0392)
## Fixed-Effects: ---------------------- ------------------ ------------------
## continent Yes Yes Yes
## decade Yes Yes Yes
## _______________ ______________________ __________________ __________________
## S.E.: Clustered by: continent by: continent by: continent
## Observations 4,901 4,743 2,760
## R2 0.31140 0.21150 0.19817
## Within R2 0.27009 0.15566 0.11289
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
b_pctl <- coef(model_pctl)["transboundary"]
p_pctl <- pvalue(model_pctl)["transboundary"]
b_hq <- coef(model_high_qual)["transboundary"]
p_hq <- pvalue(model_high_qual)["transboundary"]
b_70 <- coef(model_post1970)["transboundary"]
p_70 <- pvalue(model_post1970)["transboundary"]
cat(sprintf(
"All three robustness checks confirm the main finding. The suitability percentile specification yields a coefficient of %.3f (p = %.3f), indicating that transboundary dams sit about %.0f percentile points lower in the within-river-system suitability distribution. Restricting to dams with at least 10 candidate sites produces a coefficient of %.3f (p = %.3f), ruling out the concern that the result is driven by dams with very few alternatives. The post-1970 subsample shows a coefficient of %.3f (p = %.3f).\n",
b_pctl, p_pctl, abs(b_pctl) * 100,
b_hq, p_hq,
b_70, p_70
))
All three robustness checks confirm the main finding. The suitability
percentile specification yields a coefficient of -0.070 (p = 0.009),
indicating that transboundary dams sit about 7 percentile points lower
in the within-river-system suitability distribution. Restricting to dams
with at least 10 candidate sites produces a coefficient of -0.052 (p =
0.012), ruling out the concern that the result is driven by dams with
very few alternatives. The post-1970 subsample shows a coefficient of
-0.099 (p = 0.005).
Summary of Key Findings
# Main effect
main_effect <- coef(model3)["transboundary"]
main_pval <- pvalue(model3)["transboundary"]
country_effect <- coef(model4)["transboundary"]
cat(sprintf(
"1. **Transboundary dams are systematically placed at less suitable sites** compared to domestic dams, with the gap robust across three different suitability index constructions (V1 baseline, V2 gradient-weighted, V3 percentile ranks).\n\n"
))
- Transboundary dams are systematically placed at less
suitable sites compared to domestic dams, with the gap robust
across three different suitability index constructions (V1 baseline, V2
gradient-weighted, V3 percentile ranks).
cat(sprintf(
"2. **The penalty survives demanding specifications**: even comparing transboundary and domestic dams within the same country (country fixed effects), transboundary dams have suitability ratios approximately %.1f percentage points lower.\n\n",
abs(country_effect) * 100
))
- The penalty survives demanding specifications: even
comparing transboundary and domestic dams within the same country
(country fixed effects), transboundary dams have suitability ratios
approximately 6.5 percentage points lower.
cat(sprintf(
"3. **The penalty is not about country borders**: constraining the optimal to the dam's own country ($SR_c$) attenuates the transboundary coefficient by only ~%.0f%%. The vast majority of the siting penalty reflects within-country choices, not an inability to access better sites across the border.\n\n",
attenuation
))
- The penalty is not about country borders:
constraining the optimal to the dam’s own country (\(SR_c\)) attenuates the transboundary
coefficient by only ~16%. The vast majority of the siting penalty
reflects within-country choices, not an inability to access better sites
across the border.
cat("4. **The effect is present across dam types**, though the magnitude varies between hydropower and irrigation dams as shown in the interaction models.\n\n")
- The effect is present across dam types, though the
magnitude varies between hydropower and irrigation dams as shown in the
interaction models.
cat(sprintf(
"5. **The post-1970 subsample** shows a transboundary coefficient of %.3f, suggesting the siting distortion has %s over time.\n\n",
b_70,
ifelse(abs(b_70) > abs(main_effect), "not diminished (and may have grown)",
"modestly attenuated but persists")
))
- The post-1970 subsample shows a transboundary
coefficient of -0.099, suggesting the siting distortion has not
diminished (and may have grown) over time.
cat("6. **Border proximity does not explain the gap**: among transboundary dams, along-river distance to the border has no significant relationship with suitability, suggesting the penalty reflects broad political-strategic dynamics rather than geographic constraints at border locations.\n")
- Border proximity does not explain the gap: among
transboundary dams, along-river distance to the border has no
significant relationship with suitability, suggesting the penalty
reflects broad political-strategic dynamics rather than geographic
constraints at border locations.
Report generated with
strategic_placement_report.Rmd