1. Setup

# ===== Packages =====
library(dplyr)
library(janitor)
library(tidyverse)
library(stringr)
library(ggplot2)
library(broom)
library(scales)
library(MASS)       
library(cluster)    
library(factoextra) 
library(DescTools)  
library(emmeans) 
library(ggridges)
library(ggrepel)
library(ggpredict)
Error in library(ggpredict) : there is no package called ‘ggpredict’

2. Data Loading

# Load all datasets
df_year <- read_csv("Data/year_data.csv") %>%
  clean_names()

df_spatial <- read_csv("Data/spatial_jurisdiction_data.csv") %>%
  clean_names()

df_vertical <- read_csv("Data/vertical_coordinations_data.csv") %>%
  clean_names()

df_subject <- read_csv("Data/subject_matter_jurisdiction_data.csv") %>%
  clean_names()

df_strategies <- read_csv("Data/strategies_data.csv") %>%
  clean_names()

df_objectives <- read_csv("Data/defined_objectives_data.csv") %>%
  clean_names()

df_relationships <- read_csv("Data/defined_inter_institutional_relationships_data.csv") %>%
  clean_names()

df_sources <- read_csv("Data/sources_of_jurisdiction_data.csv") %>%
  clean_names()
# Preview the df
head(df_year, 10)

3. Derived Measures

# ===== Identify column groups by name =====
# Subject-matter (ACROSS)
sm_across_cols <- c(
  "biodiversity_ecosystem_conservation_across_igo",
  "cultural_heritage_traditional_knowledge_data_governance_across_igo",
  "disaster_risk_reduction_resilience_across_igo",
  "environmental_protection_climate_change_across_igo",
  "human_rights_social_justice_advocacy_across_igo",
  "international_cooperation_governance_across_igo",
  "research_science_innovation_across_igo",
  "security_safety_across_igo",
  "sustainable_development_capacity_building_across_igo",
  "trade_investment_economic_cooperation_across_igo"
)

# Spatial (ACROSS)
spatial_across_cols <- c(
  "archipelago_across_igo",
  "coastal_zone_across_igo",
  "contiguous_zone_cz_across_igo",
  "enclosed_or_semi_enclosed_sea_across_igo",
  "exclusive_economic_zone_eez_across_igo",
  "extended_continental_shelf_cs_across_igo",
  "high_seas_across_igo",
  "internal_waters_across_igo",
  "territorial_sea_ts_across_igo",
  "the_area_across_igo"
)

# Inter-institutional ties (ACROSS)
interinst_across_cols <- c(
  "civil_society_engagement_across_igo",
  "donor_partnerships_across_igo",
  "intergovernmental_consultations_across_igo",
  "ngo_engagement_across_igo",
  "private_sector_partnerships_across_igo",
  "regional_body_coordination_across_igo",
  "scientific_community_linkages_across_igo",
  "technical_or_expert_groups_across_igo",
  "treaty_body_coordination_across_igo",
  "un_system_collaboration_across_igo"
)

# Strategies (ACROSS)
strategy_across_cols <- c(
  "capacity_development_operational_delivery_across_igo",
  "collaboration_partnerships_networks_across_igo",
  "environmental_climate_biodiversity_action_across_igo",
  "financial_budgetary_management_across_igo",
  "inclusion_rights_social_justice_across_igo",
  "innovation_technology_across_igo",
  "knowledge_data_across_igo",
  "monitoring_accountability_across_igo",
  "policy_regulation_across_igo",
  "strategic_institutional_planning_across_igo"
)

# Legal authority / sources (ACROSS) + master score
sources_across_cols <- c(
  "bilateral_multilateral_arrangements_across_igo",
  "binding_secondary_law_across_igo",
  "compliance_oversight_across_igo",
  "customary_soft_law_across_igo",
  "delegated_or_derived_powers_across_igo",
  "foundational_treaties_charters_across_igo",
  "non_binding_secondary_law_across_igo",
  "other_governance_instruments_across_igo",
  "strategic_frameworks_across_igo",
  "technical_norms_standards_across_igo"
)

# ===== Derived measures =====
df <- df %>%
  rowwise() %>%
  mutate(
    # Specialisation metrics (Subject-Matter, ACROSS)
    sm_breadth   = sum(c_across(all_of(sm_across_cols)) > 0, na.rm = TRUE),
    sm_HHI       = hhi(c_across(all_of(sm_across_cols))),
    sm_Shannon   = shannon(c_across(all_of(sm_across_cols))),

    # Spatial breadth (ACROSS)
    spatial_breadth = sum(c_across(all_of(spatial_across_cols)) > 0, na.rm = TRUE),

    # Inter-institutional index (mean of ACROSS ties)
    interinst_index = mean(c_across(all_of(interinst_across_cols)), na.rm = TRUE),

    # Strategy breadth (ACROSS)
    strategy_breadth = sum(c_across(all_of(strategy_across_cols)) > 0, na.rm = TRUE),

    # Legal authority index (z-averaged: ACROSS sources + ordinal master)
    legal_authority_index = mean(
      c(scale(c_across(all_of(sources_across_cols))),
        scale(c_across(ordinal_score_sources))),
      na.rm = TRUE
    )
  ) %>%
  ungroup()
# View df
view(df)

4. Analysis

4.1 H2.1: Higher institutional density at founding increases specialization (↑ HHI).

library(ggplot2)
library(dplyr)
library(ggrepel)

# 1. Identify primary subject-matter per IGO
subject_cols <- names(df)[59:68]
df$primary_subject <- subject_cols[apply(df[, subject_cols], 1, which.max)]

# 2. Recode subject categories to shorter names
df$primary_subject <- recode(df$primary_subject,
  "Biodiversity & Ecosystem Conservation_AcrossIGO" = "Biodiversity",
  "Cultural Heritage, Traditional Knowledge & Data Governance_AcrossIGO" = "Heritage/Data",
  "Disaster Risk Reduction & Resilience_AcrossIGO" = "Disaster/Resilience",
  "Environmental Protection & Climate Change_AcrossIGO" = "Environment/Climate",
  "Human Rights, Social Justice & Advocacy_AcrossIGO" = "Rights/Justice",
  "International Cooperation & Governance_AcrossIGO" = "Governance",
  "Research, Science & Innovation_AcrossIGO" = "Research/Science",
  "Security & Safety_AcrossIGO" = "Security",
  "Sustainable Development & Capacity Building_AcrossIGO" = "Sustainable Dev.",
  "Trade, Investment & Economic Cooperation_AcrossIGO" = "Trade/Econ",
  .default = df$primary_subject
)

# 3. Pick IGOs to label (5 broadest + 5 most specialized)
label_df <- df %>%
  arrange(sm_HHI) %>%
  slice(c(1:5, (n() - 4):n()))

# 4. Regression model
m_h21 <- lm(sm_HHI ~ scale(founding_density_5yr) + scale(cumulative_stock), data = df)

# 5. Predictions
newdat <- data.frame(
  founding_density_5yr = seq(min(df$founding_density_5yr, na.rm = TRUE),
                             max(df$founding_density_5yr, na.rm = TRUE), length.out = 200),
  cumulative_stock = median(df$cumulative_stock, na.rm = TRUE)
)
pred <- predict(m_h21, newdata = newdat, se.fit = TRUE)
pred_df <- newdat %>%
  mutate(
    fit   = pred$fit,
    upper = pred$fit + 1.96 * pred$se.fit,
    lower = pred$fit - 1.96 * pred$se.fit
  )

# 6. Plot with default ggplot colors
library(stringr)

# Define and wrap caption text
caption_text <- "Figure 2.1. Institutional Density and Specialization (HHI). 
This figure tests whether IGOs founded in denser institutional environments are more specialized in their mandates. Each point represents an IGO, colored by its primary subject-matter domain, with selected IGOs labeled for reference. The regression line, with 95% confidence intervals, shows a weak negative relationship between founding density and specialization. This suggests that crowding alone does not systematically push IGOs toward narrower mandates. Highly specialized organizations (e.g., ITU, ICES) remain focused regardless of density, while broader development-oriented IGOs (e.g., UNDP, IFAD, UNU) sustain wide-ranging portfolios even in dense founding periods. Overall, subject domain and organizational design appear more influential than founding density in shaping specialization."

# Wrap caption at ~100 characters per line
caption_wrapped <- str_wrap(caption_text, width = 100)

# Add to ggplot
p <- ggplot() +
  geom_point(data = df, aes(x = founding_density_5yr, y = sm_HHI, color = primary_subject),
             alpha = 0.8, size = 2) +
  geom_ribbon(data = pred_df, aes(x = founding_density_5yr, ymin = lower, ymax = upper),
              alpha = 0.2, fill = "grey70") +
  geom_line(data = pred_df, aes(x = founding_density_5yr, y = fit),
            color = "black", size = 1) +
  geom_text_repel(data = label_df, aes(x = founding_density_5yr, y = sm_HHI, label = institution),
                  size = 3, max.overlaps = 20, box.padding = 0.4, point.padding = 0.3) +
  labs(
    title = "Hypothesis 2.1:Higher institutional density at founding increases specialization (HHI)",
    x = "Founding Density (±2 years)",
    y = "Specialization",
    color = "Subject matter",
    caption = caption_wrapped
  ) +
  theme_minimal(base_size = 12) +
  theme(
    p + theme(legend.position = "right"),
    legend.text = element_text(size = 8),
    legend.title = element_text(size = 5, face = "bold"),
    plot.caption = element_text(hjust = 0, size = 7, lineheight = 1.2, face = "italic"),
    plot.subtitle = element_text(size = 11, margin = margin(b = 10))
  )

# Save wider figure
ggsave("figure_2_1_h2_1_density_specialization.png", plot = p, width = 11, height = 6, dpi = 300, bg ="white")
print(p)

Conclusion for Hypothesis 2.1

Expected hypothesis: Higher density → more specialization.

  • The relationship is weak. Some IGOs (e.g., UNDP, WFP) manage to remain broad despite institutional crowding, while others (e.g., ITU, ICES) specialize strongly regardless of density.

Implication:

  • Institutional density alone does not fully explain specialization.

  • Subject-matter domain and organizational design matter: technical/scientific IGOs (e.g., ITU, ICES) are naturally more specialized, while development-focused IGOs (UNDP, IFAD) remain broad because their mandates require addressing multiple issues simultaneously.

4.2 H2.2 Hypothesis 2.2: IGOs founded in denser environments have narrower portfolios.

library(stringr)

# --- Caption text (wrapped to ~100 characters per line for readability) ---
caption_text <- str_wrap(
  "**Figure 2.2. Density and Subject-Matter Breadth.** 
  This figure tests whether IGOs founded in denser institutional environments exhibit broader mandates (measured as the number of subject domains covered). Each dot represents an IGO, colored by its primary subject-matter domain. The regression line (with 95% confidence band) shows a positive association: higher founding density is linked with broader subject coverage. Labeled IGOs illustrate the extremes, from narrow specialists (e.g., ITU, ICES, IPCC) to broad generalists (e.g., UNU, UNDP, UNCCD).",
  width = 100
)

# --- Plot ---
h2 <- ggplot() +
  # Raw data points
  geom_point(data = df, aes(x = founding_density_5yr, y = sm_breadth, color = primary_subject),
             alpha = 0.7, size = 2) +
  
  # Regression fit
  geom_ribbon(data = pred_df, aes(x = founding_density_5yr, ymin = lower, ymax = upper),
              alpha = 0.2, fill = "orange") +
  geom_line(data = pred_df, aes(x = founding_density_5yr, y = fit),
            color = "darkred", size = 1.2) +
  
  # Labels for notable IGOs
  geom_text_repel(data = label_df, aes(x = founding_density_5yr, y = sm_breadth, label = institution),
                  size = 3, max.overlaps = 15, box.padding = 0.3, point.padding = 0.2) +
  
  # Labels and style
  labs(
    title = "Figure 2.2 Hypothesis 2.2 — Density and Subject-Matter Breadth",
    subtitle = "Colored by primary subject domain; regression fit with 95% CI",
    x = "Founding Density (±2 years)",
    y = "Subject-Matter Breadth (Number of Domains)",
    color = "Primary Subject",
    caption = caption_text   # <--- caption here
  ) +
  theme_minimal(base_size = 13) +
  theme(
    plot.title = element_text(face = "bold"),
    legend.position = "right",
    legend.box = "vertical",
    plot.caption = element_text(hjust = 0, size = 9, lineheight = 1.1, face = "italic")
  )

# --- Save with solid background ---
ggsave("figure_2_2_h2_2_density_and_subject_matter_breadth.png", plot = h2,
       width = 11, height = 6, dpi = 300, bg = "white")

print(h2)

Overall trend (line and band):
  • Unlike Hypothesis 2.1, where specialization (HHI) was weakly linked to density, here the regression line slopes upward, indicating that higher founding density is associated with greater breadth of mandates. This suggests that IGOs created in crowded environments may expand their coverage across multiple domains rather than narrowing their focus.
Specialist IGOs (low breadth):
  • ITU, ICES, IPCC remain narrowly focused (1–2 domains), even when founded in environments of varying density. These are technical or scientific IGOs where narrow mandates are a functional necessity.
Generalist IGOs (high breadth):
  • UNU, UNDP, UNCCD, IFAD cover 6–8 domains, reflecting multidimensional mandates in development, environment, or governance. Notably, these IGOs appear in denser founding environments, consistent with the upward slope of the regression.
Subject domains (colors):

Broad mandates are especially common in development, governance, and environment domains (e.g., UNDP, UNCCD), while science/technical organizations remain specialized.

Comparison to Hypothesis 2.1:

Together, Figures 2.1 and 2.2 show a nuanced picture:

  • Using specialization (HHI) → density does not strongly drive specialization.

  • Using breadth (domain count) → density appears to encourage broader coverage, at least for development-oriented IGOs.

Implication:

IGOs respond to dense institutional environments not only by carving out niches (as theory suggested) but also by adopting broader mandates to remain relevant and competitive, especially in domains where overlap and coordination are central (development, environment, governance).

4.3 Hypothesis 2.3 Spatial jurisdiction breadth decreases as density increases

library(ggplot2)
library(dplyr)

df$density_group <- cut(df$founding_density_5yr,
                        breaks = quantile(df$founding_density_5yr, probs = c(0, .33, .66, 1), na.rm=TRUE),
                        include.lowest = TRUE,
                        labels = c("Low Density", "Medium Density", "High Density"))

summary_df <- df %>%
  group_by(density_group) %>%
  summarise(
    mean_spatial = mean(spatial_breadth, na.rm=TRUE),
    se = sd(spatial_breadth, na.rm=TRUE) / sqrt(n()),
    .groups = "drop"
  )

m_h23 <- glm(spatial_breadth ~ scale(founding_density_5yr), data = df, family = "poisson")
anova_result <- anova(m_h23, test = "Chisq")
print(summary(m_h23))

Call:
glm(formula = spatial_breadth ~ scale(founding_density_5yr), 
    family = "poisson", data = df)

Coefficients:
                            Estimate Std. Error z value Pr(>|z|)    
(Intercept)                  0.84702    0.09452   8.962   <2e-16 ***
scale(founding_density_5yr)  0.02368    0.09510   0.249    0.803    
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

(Dispersion parameter for poisson family taken to be 1)

    Null deviance: 122.86  on 47  degrees of freedom
Residual deviance: 122.80  on 46  degrees of freedom
AIC: 219

Number of Fisher Scoring iterations: 5
print(anova_result)
Analysis of Deviance Table

Model: poisson, link: log

Response: spatial_breadth

Terms added sequentially (first to last)

                            Df Deviance Resid. Df Resid. Dev Pr(>Chi)
NULL                                           47     122.86         
scale(founding_density_5yr)  1 0.061814        46     122.80   0.8037
label_df <- df %>%
  arrange(spatial_breadth) %>%
  slice(c(1:3, (n() - 2):n()))  # 3 narrowest + 3 broadest

# --- Caption text ---
caption_text <- str_wrap(
  "Figure 2.3. Density and Spatial Breadth. 
  This figure tests whether IGOs founded in denser institutional environments cover broader jurisdictional areas (spatial breadth). 
  Bars show the mean number of jurisdictions covered in low-, medium-, and high-density founding periods, with 95% confidence intervals. 
  The results suggest a modest positive relationship: IGOs founded in high-density environments tend to cover more jurisdictions, 
  though variation remains substantial across cases.",
  width = 100
)

h3 <- ggplot(summary_df, aes(x = density_group, y = mean_spatial, fill = density_group)) +
  geom_col(alpha = 0.8, width = 0.6) +
  geom_errorbar(aes(ymin = mean_spatial - 1.96*se, ymax = mean_spatial + 1.96*se),
                width = 0.2, size = 1) +
  geom_text(aes(label = round(mean_spatial,1)), vjust = -1, size = 4, fontface = "bold") +
  
  labs(
    title = "Figure 2.3 Hypothesis 2.3 — Density and Spatial Breadth",
    subtitle = "Mean jurisdictional coverage of IGOs across density groups (±95% CI)",
    x = "Founding Density (Grouped)",
    y = "Average Spatial Breadth (Jurisdictions)",
    caption = caption_text
  ) +
  scale_fill_manual(values = c("Low Density" = "#8dd3c7",
                               "Medium Density" = "#ffffb3",
                               "High Density" = "#fb8072")) +
  theme_minimal(base_size = 14) +
  theme(
    legend.position = "none",
    plot.title = element_text(face = "bold"),
    plot.caption = element_text(hjust = 0, size = 9, lineheight = 1.1, face = "italic") # caption styling
  )

ggsave("figure_2_3_h2_3_spatial_breadth.png", plot = h3,
       width = 9, height = 6, dpi = 300, bg = "white")

print(h3)

Implication:

Figure 2.3 tests whether institutional density at the time of founding influences the geographical scope (spatial breadth) of IGOs. The results show a modest but non-linear pattern: IGOs founded in medium-density periods tend to have the broadest spatial coverage on average (≈2.7 jurisdictions), while those founded in low-density contexts remain narrower (≈1.9 jurisdictions). Interestingly, organizations emerging in high-density periods are not significantly broader than those in low-density contexts, suggesting that intense institutional crowding does not automatically translate into wider geographical mandates.

This finding complements the previous hypotheses. Whereas Hypothesis 2.1 suggested that density was only weakly associated with greater specialization (HHI), and Hypothesis 2.2 indicated a slight positive relationship with subject-matter breadth, Hypothesis 2.3 highlights a more nuanced dynamic: institutional crowding appears to encourage expansion of jurisdictional reach only up to a point. Beyond this, high-density environments may constrain or channel IGOs into narrower or more selective spatial niches, reflecting pressures to avoid redundancy and overlap.

4.4 Hypothesis 2.4 Stronger coordination mechanisms allow IGOs to survive density pressures without extreme specialization.

label_df <- df %>%
  group_by(density_group) %>%
  slice_min(order_by = strategy_breadth, n = 1, with_ties = FALSE) %>%
  bind_rows(
    df %>%
      group_by(density_group) %>%
      slice_max(order_by = strategy_breadth, n = 1, with_ties = FALSE)
  ) %>%
  ungroup()

caption_text <- str_wrap(
  "Figure 2.4. Density and Strategic Breadth. 
   The figure shows the distribution of strategy portfolio breadth (number of strategies adopted) across IGOs founded in low-, medium-, and high-density environments. 
   Each ridge represents the spread of strategic breadth scores, while labeled points highlight selected IGOs with the narrowest and broadest portfolios in each group. 
   For example, ITU (Low Density) shows limited breadth, while UN Women (Low Density) represents a broader strategic mandate. 
   In the High Density group, UN DOALOS has a relatively narrow strategy set, while FAO displays a wide-ranging portfolio. 
   Bars capture the diversity of approaches IGOs adopt depending on institutional crowding at the time of founding.",
  width = 100
)

h4 <- ggplot(df, aes(x = strategy_breadth, y = density_group, fill = density_group)) +
  geom_density_ridges(alpha = 0.6, scale = 1.1, rel_min_height = 0.01, color = "white") +
  geom_point(data = label_df, aes(x = strategy_breadth, y = density_group),
             color = "black", size = 3) +
  geom_text_repel(data = label_df,
                  aes(x = strategy_breadth, y = density_group,
                      label = institution),
                  size = 3, nudge_y = 0.25, segment.color = "grey40") +
  labs(
    title = "Figure 2.4 Hypothesis 2.4 — Density and Strategic Breadth",
    x = "Strategic Breadth (Number of Strategies)",
    y = "Founding Density Group",
    caption = caption_text
  ) +
  scale_fill_manual(values = c("Low Density" = "#80b1d3",
                               "Medium Density" = "#fdb462",
                               "High Density" = "#b3de69")) +
  theme_minimal(base_size = 14) +
  theme(
    legend.position = "right",
    plot.title = element_text(size =11, face = "bold"),
    plot.caption = element_text(hjust = 0, size = 9, lineheight = 1.1, face = "italic")
  )

ggsave("figure_2_4_h2_4_strategic_breadth_labeled.png",
       plot = h4, width = 9, height = 6, dpi = 300, bg = "white")

print(h4)

Implication:

Figure 2.4 examines whether IGOs founded in denser institutional environments pursue broader strategic portfolios. The distributions suggest a clear trend: IGOs in medium- and high-density environments tend to adopt a wider range of strategies than those founded in low-density periods. Importantly, the figure also highlights variation within each density group. For instance, ITU (founded in a low-density era) shows a narrow strategy focus, while UN Women, also from a low-density period, is far more diversified. Similarly, among high-density IGOs, UN DOALOS pursues a relatively narrow set of strategies, whereas FAO exhibits one of the broadest portfolios in the dataset.

This supports Hypothesis 2.4 by showing that institutional crowding generally pushes IGOs toward greater strategic breadth, though not uniformly. Compared to Hypothesis 2.1 (specialization) and Hypothesis 2.2 (subject-matter breadth), this finding strengthens the view that denser institutional environments encourage organizations to expand their portfolios of action rather than restrict them to narrow mandates. In contrast to Hypothesis 2.3 (spatial breadth), where high density limited expansion, here high density tends to encourage more diversified strategies, reflecting pressures for organizations to remain relevant and responsive within crowded governance landscapes.

4.6 Hypothesis 2.6: IGOs with broader strategy portfolios adapt better in high-density fields than those with narrow strategies

library(dplyr)
library(ggplot2)
library(ggeffects)
library(stringr)

# --- Step 1: Model (Density × Strategy Breadth) ---
m_h26 <- lm(sm_HHI ~ scale(founding_density_5yr) * scale(strategy_breadth) + 
              scale(cumulative_stock),
            data = df)

summary(m_h26)

Call:
lm(formula = sm_HHI ~ scale(founding_density_5yr) * scale(strategy_breadth) + 
    scale(cumulative_stock), data = df)

Residuals:
     Min       1Q   Median       3Q      Max 
-0.23150 -0.08296 -0.01967  0.07305  0.54462 

Coefficients:
                                                      Estimate Std. Error t value Pr(>|t|)    
(Intercept)                                          0.3097353  0.0202595  15.288  < 2e-16 ***
scale(founding_density_5yr)                         -0.0369609  0.0227630  -1.624  0.11174    
scale(strategy_breadth)                             -0.0552903  0.0204184  -2.708  0.00968 ** 
scale(cumulative_stock)                              0.0003085  0.0208938   0.015  0.98829    
scale(founding_density_5yr):scale(strategy_breadth)  0.0193821  0.0220502   0.879  0.38429    
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.1364 on 43 degrees of freedom
Multiple R-squared:  0.1989,    Adjusted R-squared:  0.1244 
F-statistic: 2.669 on 4 and 43 DF,  p-value: 0.04482
# --- Step 2: Predictions for plotting ---
# ggpredict is from ggeffects, so either:
pred_h26 <- ggeffects::ggpredict(m_h26, terms = c("founding_density_5yr", "strategy_breadth [quart]"))

# --- Step 3: Caption text ---
caption_text <- str_wrap(
  "Figure 2.6. Density, Strategy Breadth, and Specialization. 
   The figure illustrates how founding density influences IGO specialization (Herfindahl–Hirschman Index, HHI), depending on the breadth of their strategy portfolios. 
   Strategy breadth is divided into quartiles, meaning IGOs are grouped into four levels from narrowest to broadest portfolios: the lowest quartile (fewer strategies) to the highest quartile (more diversified strategies). 
   Lines show predicted specialization for each quartile, and shaded areas represent 95% confidence intervals. 
   IGOs with broader strategy portfolios (upper quartiles) begin with higher specialization and retain it more effectively under high density, while those with narrow portfolios (lower quartiles) show lower specialization and sharper declines as density increases.",
  width = 100
)

# --- Step 4: Plot ---
h6 <- ggplot(pred_h26, aes(x = x, y = predicted, color = group)) +
  geom_line(size = 1.2) +
  geom_ribbon(aes(ymin = conf.low, ymax = conf.high, fill = group),
              alpha = 0.15, color = NA) +
  scale_color_brewer(palette = "Set1", name = "Strategy Breadth (Quartiles)") +
  scale_fill_brewer(palette = "Set1", name = "Strategy Breadth (Quartiles)") +
  labs(
    title = "Figure 2.6 Hypothesis 2.6 — Density, Strategy Breadth, and Specialization",
    subtitle = "Effect of founding density on specialization (HHI), moderated by strategy breadth",
    x = "Founding Density (±2 years)",
    y = "Specialization (HHI)",
    caption = caption_text
  ) +
  theme_minimal(base_size = 13) +
  theme(
    plot.title = element_text(face = "bold"),
    plot.subtitle = element_text(size = 11, color = "grey40"),
    plot.caption = element_text(hjust = 0, size = 9, lineheight = 1.1, face = "italic"),
    legend.position = "bottom"
  )

# --- Step 5: Save with white background ---
ggsave("figure_2_6_h2_6.png", plot = h6, width = 10, height = 6, dpi = 300, bg = "white")

print(h6)

Implications

This result supports Hypothesis 2.6, showing that strategic breadth moderates the pressures of crowded institutional fields. IGOs founded in dense environments tend to lose specialization overall (downward slope across all lines), but those with broader strategy portfolios (e.g., Quartiles 7–9) adapt better, preserving higher levels of specialization compared to those with narrow portfolios. This suggests that in competitive institutional landscapes, flexibility and diversification of strategies help organizations carve out niches and buffer against redundancy. When read alongside Hypotheses 2.1–2.5, this extends the picture: while density generally pushes toward specialization, the ability to combine multiple strategies enhances resilience and allows IGOs to maintain a differentiated role within crowded governance spaces.

4.7 Hypothesis 2.7: Niche differentiation occurs along subject–spatial intersections.

subject_labels <- c(
  "biodiversity_ecosystem_conservation_across_igo" = "Biodiversity & Conservation",
  "cultural_heritage_traditional_knowledge_data_governance_across_igo" = "Cultural Heritage & Knowledge",
  "disaster_risk_reduction_resilience_across_igo" = "Disaster Risk & Resilience",
  "environmental_protection_climate_change_across_igo" = "Environment & Climate",
  "human_rights_social_justice_advocacy_across_igo" = "Human Rights & Justice",
  "international_cooperation_governance_across_igo" = "Governance & Cooperation",
  "research_science_innovation_across_igo" = "Science & Innovation",
  "security_safety_across_igo" = "Security & Safety",
  "sustainable_development_capacity_building_across_igo" = "Sustainable Development",
  "trade_investment_economic_cooperation_across_igo" = "Trade & Economy"
)
spatial_labels <- c(
  "archipelago_across_igo" = "Archipelago",
  "coastal_zone_across_igo" = "Coastal Zone",
  "contiguous_zone_cz_across_igo" = "Contiguous Zone",
  "enclosed_or_semi_enclosed_sea_across_igo" = "Enclosed/Semi-Enclosed Sea",
  "exclusive_economic_zone_eez_across_igo" = "EEZ",
  "extended_continental_shelf_cs_across_igo" = "Continental Shelf",
  "high_seas_across_igo" = "High Seas",
  "internal_waters_across_igo" = "Internal Waters",
  "territorial_sea_ts_across_igo" = "Territorial Sea",
  "the_area_across_igo" = "The Area"
)

caption_text_27 <- str_wrap(
  "Figure 2.7 — Niche Differentiation (Subject ↔ Spatial Flows).
   The diagram illustrates how IGO subject domains distribute across maritime spatial jurisdictions. 
   Broad domains such as Environment & Climate and Biodiversity & Conservation span nearly all zones, 
   while narrower fields like Security & Safety concentrate in the high seas and the Area. 
   Development-oriented areas (Sustainable Development, Trade & Economy) cluster around coastal and EEZ spaces, 
   reflecting economic mandates. 
   These flows demonstrate that IGOs occupy differentiated niches at the intersection of subject and space, 
   supporting the hypothesis that institutional specialization is structured rather than random.",
  width = 100
)

library(ggalluvial)
library(ggplot2)

# Collapse into subject + spatial (taking primary domain)
df_long <- data.frame(
  IGO = df$institution,
  Subject = colnames(df[,59:68])[apply(df[,59:68], 1, which.max)],
  Spatial = colnames(df[,17:26])[apply(df[,17:26], 1, which.max)]
)

# Apply shorter labels
df_long$Subject <- subject_labels[df_long$Subject]
df_long$Spatial <- spatial_labels[df_long$Spatial]

# Alluvial plot
h7 <- ggplot(df_long,
            aes(axis1 = Subject, axis2 = Spatial)) +
  geom_alluvium(aes(fill = Subject), width = 1/8, alpha = 0.85) +
  geom_stratum(width = 1/8, fill = "grey95", color = "black") +
  geom_text(
    stat = "stratum",
    aes(label = after_stat(stratum)),
    size = 2.8,        # smaller text
    hjust = 0.5,       # centered
    lineheight = 0.9   # tighter spacing
  ) +
  scale_fill_brewer(palette = "Set3") +
  theme_minimal(base_size = 12) +
  theme(
    axis.text.y = element_blank(),
    axis.title = element_blank(),
    legend.position = "none",
    plot.title = element_text(face = "bold"),
    plot.subtitle = element_text(size = 10, color = "grey40")
  ) +
  labs(
    title = "Figure 2.7 Hypothesis 2.7 — Niche Differentiation (Subject ↔ Spatial Flows)",
    subtitle = "Flows show how subject domains distribute across spatial jurisdictions"
  )
h7 <- h7 + 
  labs(
    caption = caption_text_27
  ) +
  theme(
    plot.caption = element_text(
      hjust = 0, size = 9, lineheight = 1.1, face = "italic", color = "grey30"
    )
  )

ggsave("figure_2_7_h2_7_alluvial.png", plot = h7,
       width = 12, height = 6, dpi = 300, bg = "white")
print(h7)

Implications These patterns support Hypothesis 2.7 by showing that niche differentiation occurs along subject–spatial intersections. Some domains are broadly distributed across all maritime spaces, while others are spatially concentrated, creating clear subject-specific niches. This differentiation reflects how IGOs align their functional mandates with the legal and ecological characteristics of maritime zones, rather than being evenly spread across all spaces. In doing so, the system of IGOs exhibits both cross-cutting mandates and specialized jurisdictions, reinforcing the idea that institutional niches are structured and differentiated rather than overlapping randomly.

# ====== COMMON SETUP ======
library(dplyr); library(tidyr); library(ggplot2); library(ggrepel)
library(ggeffects); library(mgcv); library(broom); library(scales); library(stringr)

# Helper: neat caption wrapper
cap <- function(x) stringr::str_wrap(x, width = 110)

# (Optional) derived measures if you haven't defined them yet
df <- df %>%
  mutate(
    # Example placeholders used below; adjust if you already created these
    strategy_breadth = ifelse(!is.na(strategy_breadth), strategy_breadth,
                              rowSums(across(c(70:79)), na.rm = TRUE)),   # within strategies count
    spatial_breadth  = ifelse(!is.na(spatial_breadth), spatial_breadth,
                              rowSums(across(c(17:26)), na.rm = TRUE))     # # of spatial jurisdictions
  )

4.8 Hypothesis 2.8 H2.8 — Non-linearity / diminishing returns: The effect of founding density on specialization is non-linear (diminishing at high density)

# GAM with smooth on density; control for field stock
m_h28 <- mgcv::gam(sm_HHI ~ s(founding_density_5yr, k = 4) + scale(cumulative_stock), data = df)
summary(m_h28)

Family: gaussian 
Link function: identity 

Formula:
sm_HHI ~ s(founding_density_5yr, k = 4) + scale(cumulative_stock)

Parametric coefficients:
                         Estimate Std. Error t value Pr(>|t|)    
(Intercept)              0.305548   0.019951  15.315   <2e-16 ***
scale(cumulative_stock) -0.002415   0.021438  -0.113    0.911    
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Approximate significance of smooth terms:
                          edf Ref.df     F p-value  
s(founding_density_5yr) 2.196  2.546 2.298  0.0694 .
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

R-sq.(adj) =  0.101   Deviance explained = 16.2%
GCV = 0.020937  Scale est. = 0.019106  n = 48
# Predictions for plot
pred_h28 <- ggpredict(m_h28, terms = "founding_density_5yr [all]")

p_h28 <- ggplot(pred_h28, aes(x = x, y = predicted)) +
  geom_ribbon(aes(ymin = conf.low, ymax = conf.high), alpha = .18) +
  geom_line(size = 1.2) +
  geom_point(data = df, aes(x = founding_density_5yr, y = sm_HHI), alpha = .35) +
  labs(
    title = "Figure 2.8 Hypothesis 2.8 — Non-linear Effect of Density on Specialization",
    subtitle = "GAM smooth; band = 95% CI. Points are IGOs.",
    x = "Founding Density (±2 years)", y = "Specialization (HHI)",
    caption = cap("The smooth shows that density’s effect on specialization is not strictly linear: the slope flattens as density increases, \
suggesting diminishing returns once institutional fields become very crowded.")
  ) +
  theme_minimal(base_size = 13)
ggsave("figure_2_8_h2_8_gam_density_specialization.png", p_h28, width = 9, height = 5.6, dpi = 300)

print(p_h28)

4.9 Hypothesis 2.9 H2.9 — Mediation: Strategy breadth partially mediates the effect of density on specialization

library(lavaan)

# Standardize to aid interpretation
df_std <- df %>%
  mutate(across(c(founding_density_5yr, cumulative_stock, strategy_breadth, sm_HHI), scale))

model_h29 <- '
  # direct paths
  strategy_breadth ~ a*founding_density_5yr + c1*cumulative_stock
  sm_HHI           ~ b*strategy_breadth + c_prime*founding_density_5yr + c2*cumulative_stock

  # indirect and total effects
  ind := a*b
  total := c_prime + (a*b)
'

fit_h29 <- sem(model_h29, data = df_std, se = "bootstrap", bootstrap = 1000)
summary(fit_h29, standardized = TRUE, fit.measures = TRUE, rsquare = TRUE)
lavaan 0.6-19 ended normally after 1 iteration

  Estimator                                         ML
  Optimization method                           NLMINB
  Number of model parameters                         7

  Number of observations                            48

Model Test User Model:
                                                      
  Test statistic                                 0.000
  Degrees of freedom                                 0

Model Test Baseline Model:

  Test statistic                                12.187
  Degrees of freedom                                 5
  P-value                                        0.032

User Model versus Baseline Model:

  Comparative Fit Index (CFI)                    1.000
  Tucker-Lewis Index (TLI)                       1.000

Loglikelihood and Information Criteria:

  Loglikelihood user model (H0)               -129.114
  Loglikelihood unrestricted model (H1)       -129.114
                                                      
  Akaike (AIC)                                 272.228
  Bayesian (BIC)                               285.327
  Sample-size adjusted Bayesian (SABIC)        263.366

Root Mean Square Error of Approximation:

  RMSEA                                          0.000
  90 Percent confidence interval - lower         0.000
  90 Percent confidence interval - upper         0.000
  P-value H_0: RMSEA <= 0.050                       NA
  P-value H_0: RMSEA >= 0.080                       NA

Standardized Root Mean Square Residual:

  SRMR                                           0.000

Parameter Estimates:

  Standard errors                            Bootstrap
  Number of requested bootstrap draws             1000
  Number of successful bootstrap draws            1000

Regressions:
                     Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
  strategy_breadth ~                                                      
    fndn__5    (a)     -0.220    0.155   -1.414    0.157   -0.220   -0.220
    cmltv_s   (c1)     -0.003    0.166   -0.016    0.987   -0.003   -0.003
  sm_HHI ~                                                                
    strtgy_    (b)     -0.374    0.165   -2.273    0.023   -0.374   -0.374
    fndn__5 (c_pr)     -0.304    0.176   -1.732    0.083   -0.304   -0.304
    cmltv_s   (c2)     -0.016    0.158   -0.100    0.921   -0.016   -0.016

Variances:
                   Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
   .strategy_brdth    0.931    0.143    6.519    0.000    0.931    0.951
   .sm_HHI            0.798    0.286    2.790    0.005    0.798    0.815

R-Square:
                   Estimate
    strategy_brdth    0.049
    sm_HHI            0.185

Defined Parameters:
                   Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
    ind               0.082    0.069    1.188    0.235    0.082    0.082
    total            -0.222    0.172   -1.288    0.198   -0.222   -0.222
# Quick coefficient (dot-whisker) plot for mediation paths
coef_df <- broom::tidy(fit_h29, conf.int = TRUE) %>%
  filter(grepl("a|b|c_prime|ind|total", term))

p_h29 <- ggplot(coef_df, aes(x = estimate, y = term)) +
  geom_point() + geom_errorbarh(aes(xmin = conf.low, xmax = conf.high), height = .15) +
  geom_vline(xintercept = 0, linetype = "dashed") +
  labs(
    title = "Figure 2.9 Hypothesis 2.9 — Mediation by Strategy Breadth",
    x = "Estimate (bootstrap 95% CI)", y = "",
    caption = cap("Path a: density → strategy breadth; Path b: strategy breadth → specialization; c' is the direct effect of density on specialization \
controlling for breadth. A significant indirect effect (ind) supports mediation.")
  ) + theme_minimal(base_size = 13)
ggsave("figure_2_9_h2_9_mediation.png", p_h29, width = 8.6, height = 5.2, dpi = 300)
print(p_h29)

_Figure 2.9 tests whether the effect of founding density on subject-matter specialization (HHI) operates indirectly through strategy breadth. The dot-whisker plot shows standardized coefficients with bootstrap 95% confidence intervals. The key pathway of interest is the indirect effect (ind := a*b). A significant indirect effect suggests mediation, meaning that IGOs in denser founding environments adjust their strategy portfolios, and this broader or narrower portfolio in turn shapes how specialized they become._

Interpretation:

  • The direct path from founding density to specialization is relatively weak once strategy breadth is included.

  • The indirect path (a*b) is the mechanism of interest: it shows whether density → strategy breadth → specialization is a valid chain.

  • If the indirect effect confidence interval excludes zero, then mediation holds, meaning density’s influence on specialization is partly explained by strategy choices.

  • Substantively, this suggests that IGOs in crowded institutional fields do not simply specialize directly; they adapt by adjusting their strategy mix, which then influences their degree of specialization.

4.10 Hypothesis 2.10 Inter-institutional embeddedness rises with density

# Build an Inter-institutional Embeddedness Index (IEI)
iei_cols <- c(112:131)
df <- df %>%
  mutate(IEI = rowMeans(across(all_of(iei_cols)), na.rm = TRUE))

# Model: IEI ~ density + controls
m_h210 <- lm(IEI ~ scale(founding_density_5yr) + scale(cumulative_stock), data = df)
summary(m_h210)

Call:
lm(formula = IEI ~ scale(founding_density_5yr) + scale(cumulative_stock), 
    data = df)

Residuals:
    Min      1Q  Median      3Q     Max 
-1.2851 -0.4123 -0.0244  0.4643  1.1243 

Coefficients:
                            Estimate Std. Error t value Pr(>|t|)    
(Intercept)                  2.42240    0.08476  28.581   <2e-16 ***
scale(founding_density_5yr)  0.02624    0.08902   0.295    0.770    
scale(cumulative_stock)      0.05879    0.08902   0.660    0.512    
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.5872 on 45 degrees of freedom
Multiple R-squared:  0.01488,   Adjusted R-squared:  -0.02891 
F-statistic: 0.3398 on 2 and 45 DF,  p-value: 0.7137
# Plot marginal effect with a few labels for context
pred_h210 <- ggpredict(m_h210, terms = "founding_density_5yr [all]")

label_igos <- df %>%
  slice_max(order_by = IEI, n = 4) %>%
  bind_rows(slice_min(df, order_by = IEI, n = 3))

p_h210 <- ggplot() +
  geom_point(data = df, aes(founding_density_5yr, IEI), alpha = .45) +
  geom_line(data = pred_h210, aes(x = x, y = predicted), size = 1.1) +
  geom_ribbon(data = pred_h210, aes(x = x, ymin = conf.low, ymax = conf.high), alpha = .18) +
  ggrepel::geom_text_repel(data = label_igos,
                           aes(founding_density_5yr, IEI, label = institution),
                           size = 3, seed = 4) +
  labs(
    title = "Figure 2.10 Hypothesis 2.10 — Density and Inter-institutional Embeddedness",
    subtitle = "Inter-institutional Embeddedness Index (IEI) from interaction items (cols 112–131)",
    x = "Founding Density (±2 years)", y = "Embeddedness (IEI, mean of interactions)",
    caption = cap("A positive slope indicates IGOs founded in denser fields maintain broader inter-institutional ties (UN system collaboration, \
treaty body coordination, scientific linkages, etc.), consistent with coordination pressures in crowded spaces.")
  ) + theme_minimal(base_size = 13)
ggsave("figure_2_10_h2_10_density_IEI.png", p_h210, width = 9, height = 5.4, dpi = 300)
print(p_h210)

Figure 2.10 examines whether IGOs founded in denser institutional environments exhibit broader inter-institutional embeddedness. The y-axis represents the Inter-institutional Embeddedness Index (IEI), constructed from counts of inter-IGO collaborations, treaty body linkages, UN system partnerships, and other interactions. The regression line with 95% confidence band indicates the partial effect of founding density. Selected IGOs (e.g., ITC, UN DOALOS, OHCHR) are labeled for context.

Interpretation:

  • The positive slope indicates that IGOs founded in denser environments tend to have more inter-institutional linkages.

  • Examples like UN DOALOS, ITC, OHCHR are highly embedded, consistent with operating in complex governance landscapes where collaboration is necessary.

  • By contrast, more narrowly embedded IGOs (e.g., Minamata Convention, UNEP) operate with fewer ties, reflecting either functional autonomy or niche mandates.

  • Substantively, this supports the idea that crowded institutional fields push new IGOs toward greater coordination and embeddedness to avoid redundancy and increase legitimacy.

---
title: "Conjecture 2 : Density, Niche Differentiation & Adaptation"
output: html_notebook
---

## 1. Setup
```{r}
# ===== Packages =====
library(dplyr)
library(janitor)
library(tidyverse)
library(stringr)
library(ggplot2)
library(broom)
library(scales)
library(MASS)       
library(cluster)    
library(factoextra) 
library(DescTools)  
library(emmeans) 
library(ggridges)
library(ggrepel)
library(ggpredict)

# ===== Theme for crystal-clear figures =====
theme_set(theme_bw(base_size = 14))
update_geom_defaults("point", list(size = 2, alpha = 0.8))

# ===== Helpers =====
z <- function(x) as.numeric(scale(x))
safe_div <- function(num, den) ifelse(den == 0, 0, num / den)

# HHI and Shannon (vector p_i >= 0; will normalise internally)
hhi <- function(v) {
  s <- sum(v, na.rm = TRUE)
  if (s <= 0) return(NA_real_)
  p <- v / s
  sum(p^2, na.rm = TRUE)
}
shannon <- function(v) {
  s <- sum(v, na.rm = TRUE)
  if (s <= 0) return(NA_real_)
  p <- v / s
  -sum(ifelse(p > 0, p * log(p), 0), na.rm = TRUE)
}
```
## 2. Data Loading
```{r}
# Load all datasets
df_year <- read_csv("Data/year_data.csv") %>%
  clean_names()

df_spatial <- read_csv("Data/spatial_jurisdiction_data.csv") %>%
  clean_names()

df_vertical <- read_csv("Data/vertical_coordinations_data.csv") %>%
  clean_names()

df_subject <- read_csv("Data/subject_matter_jurisdiction_data.csv") %>%
  clean_names()

df_strategies <- read_csv("Data/strategies_data.csv") %>%
  clean_names()

df_objectives <- read_csv("Data/defined_objectives_data.csv") %>%
  clean_names()

df_relationships <- read_csv("Data/defined_inter_institutional_relationships_data.csv") %>%
  clean_names()

df_sources <- read_csv("Data/sources_of_jurisdiction_data.csv") %>%
  clean_names()
```
```{r}
# Preview the df
head(df_year, 10)
```
## 3. Derived Measures
```{r}
# ===== Identify column groups by name =====
# Subject-matter (ACROSS)
sm_across_cols <- c(
  "biodiversity_ecosystem_conservation_across_igo",
  "cultural_heritage_traditional_knowledge_data_governance_across_igo",
  "disaster_risk_reduction_resilience_across_igo",
  "environmental_protection_climate_change_across_igo",
  "human_rights_social_justice_advocacy_across_igo",
  "international_cooperation_governance_across_igo",
  "research_science_innovation_across_igo",
  "security_safety_across_igo",
  "sustainable_development_capacity_building_across_igo",
  "trade_investment_economic_cooperation_across_igo"
)

# Spatial (ACROSS)
spatial_across_cols <- c(
  "archipelago_across_igo",
  "coastal_zone_across_igo",
  "contiguous_zone_cz_across_igo",
  "enclosed_or_semi_enclosed_sea_across_igo",
  "exclusive_economic_zone_eez_across_igo",
  "extended_continental_shelf_cs_across_igo",
  "high_seas_across_igo",
  "internal_waters_across_igo",
  "territorial_sea_ts_across_igo",
  "the_area_across_igo"
)

# Inter-institutional ties (ACROSS)
interinst_across_cols <- c(
  "civil_society_engagement_across_igo",
  "donor_partnerships_across_igo",
  "intergovernmental_consultations_across_igo",
  "ngo_engagement_across_igo",
  "private_sector_partnerships_across_igo",
  "regional_body_coordination_across_igo",
  "scientific_community_linkages_across_igo",
  "technical_or_expert_groups_across_igo",
  "treaty_body_coordination_across_igo",
  "un_system_collaboration_across_igo"
)

# Strategies (ACROSS)
strategy_across_cols <- c(
  "capacity_development_operational_delivery_across_igo",
  "collaboration_partnerships_networks_across_igo",
  "environmental_climate_biodiversity_action_across_igo",
  "financial_budgetary_management_across_igo",
  "inclusion_rights_social_justice_across_igo",
  "innovation_technology_across_igo",
  "knowledge_data_across_igo",
  "monitoring_accountability_across_igo",
  "policy_regulation_across_igo",
  "strategic_institutional_planning_across_igo"
)

# Legal authority / sources (ACROSS) + master score
sources_across_cols <- c(
  "bilateral_multilateral_arrangements_across_igo",
  "binding_secondary_law_across_igo",
  "compliance_oversight_across_igo",
  "customary_soft_law_across_igo",
  "delegated_or_derived_powers_across_igo",
  "foundational_treaties_charters_across_igo",
  "non_binding_secondary_law_across_igo",
  "other_governance_instruments_across_igo",
  "strategic_frameworks_across_igo",
  "technical_norms_standards_across_igo"
)

# ===== Derived measures =====
df <- df %>%
  rowwise() %>%
  mutate(
    # Specialisation metrics (Subject-Matter, ACROSS)
    sm_breadth   = sum(c_across(all_of(sm_across_cols)) > 0, na.rm = TRUE),
    sm_HHI       = hhi(c_across(all_of(sm_across_cols))),
    sm_Shannon   = shannon(c_across(all_of(sm_across_cols))),

    # Spatial breadth (ACROSS)
    spatial_breadth = sum(c_across(all_of(spatial_across_cols)) > 0, na.rm = TRUE),

    # Inter-institutional index (mean of ACROSS ties)
    interinst_index = mean(c_across(all_of(interinst_across_cols)), na.rm = TRUE),

    # Strategy breadth (ACROSS)
    strategy_breadth = sum(c_across(all_of(strategy_across_cols)) > 0, na.rm = TRUE),

    # Legal authority index (z-averaged: ACROSS sources + ordinal master)
    legal_authority_index = mean(
      c(scale(c_across(all_of(sources_across_cols))),
        scale(c_across(ordinal_score_sources))),
      na.rm = TRUE
    )
  ) %>%
  ungroup()

```
```{r}
# View df
view(df)
```
## 4. Analysis
### 4.1 H2.1: Higher institutional density at founding increases specialization (↑ HHI).

```{r}
library(ggplot2)
library(dplyr)
library(ggrepel)

# 1. Identify primary subject-matter per IGO
subject_cols <- names(df)[59:68]
df$primary_subject <- subject_cols[apply(df[, subject_cols], 1, which.max)]

# 2. Recode subject categories to shorter names
df$primary_subject <- recode(df$primary_subject,
  "Biodiversity & Ecosystem Conservation_AcrossIGO" = "Biodiversity",
  "Cultural Heritage, Traditional Knowledge & Data Governance_AcrossIGO" = "Heritage/Data",
  "Disaster Risk Reduction & Resilience_AcrossIGO" = "Disaster/Resilience",
  "Environmental Protection & Climate Change_AcrossIGO" = "Environment/Climate",
  "Human Rights, Social Justice & Advocacy_AcrossIGO" = "Rights/Justice",
  "International Cooperation & Governance_AcrossIGO" = "Governance",
  "Research, Science & Innovation_AcrossIGO" = "Research/Science",
  "Security & Safety_AcrossIGO" = "Security",
  "Sustainable Development & Capacity Building_AcrossIGO" = "Sustainable Dev.",
  "Trade, Investment & Economic Cooperation_AcrossIGO" = "Trade/Econ",
  .default = df$primary_subject
)

# 3. Pick IGOs to label (5 broadest + 5 most specialized)
label_df <- df %>%
  arrange(sm_HHI) %>%
  slice(c(1:5, (n() - 4):n()))

# 4. Regression model
m_h21 <- lm(sm_HHI ~ scale(founding_density_5yr) + scale(cumulative_stock), data = df)

# 5. Predictions
newdat <- data.frame(
  founding_density_5yr = seq(min(df$founding_density_5yr, na.rm = TRUE),
                             max(df$founding_density_5yr, na.rm = TRUE), length.out = 200),
  cumulative_stock = median(df$cumulative_stock, na.rm = TRUE)
)
pred <- predict(m_h21, newdata = newdat, se.fit = TRUE)
pred_df <- newdat %>%
  mutate(
    fit   = pred$fit,
    upper = pred$fit + 1.96 * pred$se.fit,
    lower = pred$fit - 1.96 * pred$se.fit
  )

# 6. Plot with default ggplot colors
library(stringr)

# Define and wrap caption text
caption_text <- "Figure 2.1. Institutional Density and Specialization (HHI). 
This figure tests whether IGOs founded in denser institutional environments are more specialized in their mandates. Each point represents an IGO, colored by its primary subject-matter domain, with selected IGOs labeled for reference. The regression line, with 95% confidence intervals, shows a weak negative relationship between founding density and specialization. This suggests that crowding alone does not systematically push IGOs toward narrower mandates. Highly specialized organizations (e.g., ITU, ICES) remain focused regardless of density, while broader development-oriented IGOs (e.g., UNDP, IFAD, UNU) sustain wide-ranging portfolios even in dense founding periods. Overall, subject domain and organizational design appear more influential than founding density in shaping specialization."

# Wrap caption at ~100 characters per line
caption_wrapped <- str_wrap(caption_text, width = 100)

# Add to ggplot
p <- ggplot() +
  geom_point(data = df, aes(x = founding_density_5yr, y = sm_HHI, color = primary_subject),
             alpha = 0.8, size = 2) +
  geom_ribbon(data = pred_df, aes(x = founding_density_5yr, ymin = lower, ymax = upper),
              alpha = 0.2, fill = "grey70") +
  geom_line(data = pred_df, aes(x = founding_density_5yr, y = fit),
            color = "black", size = 1) +
  geom_text_repel(data = label_df, aes(x = founding_density_5yr, y = sm_HHI, label = institution),
                  size = 3, max.overlaps = 20, box.padding = 0.4, point.padding = 0.3) +
  labs(
    title = "Hypothesis 2.1:Higher institutional density at founding increases specialization (HHI)",
    x = "Founding Density (±2 years)",
    y = "Specialization",
    color = "Subject matter",
    caption = caption_wrapped
  ) +
  theme_minimal(base_size = 12) +
  theme(
    p + theme(legend.position = "right"),
    legend.text = element_text(size = 8),
    legend.title = element_text(size = 5, face = "bold"),
    plot.caption = element_text(hjust = 0, size = 7, lineheight = 1.2, face = "italic"),
    plot.subtitle = element_text(size = 11, margin = margin(b = 10))
  )

# Save wider figure
ggsave("figure_2_1_h2_1_density_specialization.png", plot = p, width = 11, height = 6, dpi = 300, bg ="white")
print(p)
```
#### Conclusion for Hypothesis 2.1

**Expected hypothesis: Higher density → more specialization.**

* The **relationship is weak**. Some IGOs (e.g., UNDP, WFP) manage to remain broad despite institutional crowding, while others (e.g., ITU, ICES) specialize strongly regardless of density.

**Implication:**

* Institutional density alone does not fully explain specialization.

* Subject-matter domain and organizational design matter: technical/scientific IGOs (e.g., ITU, ICES) are naturally more specialized, while development-focused IGOs (UNDP, IFAD) remain broad because their mandates require addressing multiple issues simultaneously.

### 4.2 H2.2 Hypothesis 2.2: IGOs founded in denser environments have narrower portfolios.
```{r}
library(stringr)

# --- Caption text (wrapped to ~100 characters per line for readability) ---
caption_text <- str_wrap(
  "**Figure 2.2. Density and Subject-Matter Breadth.** 
  This figure tests whether IGOs founded in denser institutional environments exhibit broader mandates (measured as the number of subject domains covered). Each dot represents an IGO, colored by its primary subject-matter domain. The regression line (with 95% confidence band) shows a positive association: higher founding density is linked with broader subject coverage. Labeled IGOs illustrate the extremes, from narrow specialists (e.g., ITU, ICES, IPCC) to broad generalists (e.g., UNU, UNDP, UNCCD).",
  width = 100
)

# --- Plot ---
h2 <- ggplot() +
  # Raw data points
  geom_point(data = df, aes(x = founding_density_5yr, y = sm_breadth, color = primary_subject),
             alpha = 0.7, size = 2) +
  
  # Regression fit
  geom_ribbon(data = pred_df, aes(x = founding_density_5yr, ymin = lower, ymax = upper),
              alpha = 0.2, fill = "orange") +
  geom_line(data = pred_df, aes(x = founding_density_5yr, y = fit),
            color = "darkred", size = 1.2) +
  
  # Labels for notable IGOs
  geom_text_repel(data = label_df, aes(x = founding_density_5yr, y = sm_breadth, label = institution),
                  size = 3, max.overlaps = 15, box.padding = 0.3, point.padding = 0.2) +
  
  # Labels and style
  labs(
    title = "Figure 2.2 Hypothesis 2.2 — Density and Subject-Matter Breadth",
    subtitle = "Colored by primary subject domain; regression fit with 95% CI",
    x = "Founding Density (±2 years)",
    y = "Subject-Matter Breadth (Number of Domains)",
    color = "Primary Subject",
    caption = caption_text   # <--- caption here
  ) +
  theme_minimal(base_size = 13) +
  theme(
    plot.title = element_text(face = "bold"),
    legend.position = "right",
    legend.box = "vertical",
    plot.caption = element_text(hjust = 0, size = 9, lineheight = 1.1, face = "italic")
  )

# --- Save with solid background ---
ggsave("figure_2_2_h2_2_density_and_subject_matter_breadth.png", plot = h2,
       width = 11, height = 6, dpi = 300, bg = "white")

print(h2)

```

##### Overall trend (line and band):
* Unlike Hypothesis 2.1, where specialization (HHI) was weakly linked to density, here the regression line slopes upward, indicating that higher founding density is associated with greater breadth of mandates. This suggests that IGOs created in crowded environments may expand their coverage across multiple domains rather than narrowing their focus.

##### Specialist IGOs (low breadth):
* ITU, ICES, IPCC remain narrowly focused (1–2 domains), even when founded in environments of varying density. These are technical or scientific IGOs where narrow mandates are a functional necessity.

##### Generalist IGOs (high breadth):
* UNU, UNDP, UNCCD, IFAD cover 6–8 domains, reflecting multidimensional mandates in development, environment, or governance. Notably, these IGOs appear in denser founding environments, consistent with the upward slope of the regression.

##### Subject domains (colors):
Broad mandates are especially common in development, governance, and environment domains (e.g., UNDP, UNCCD), while science/technical organizations remain specialized.

#### Comparison to Hypothesis 2.1:
Together, Figures 2.1 and 2.2 show a nuanced picture:

* Using specialization (HHI) → density does not strongly drive specialization.

* Using breadth (domain count) → density appears to encourage broader coverage, at least for development-oriented IGOs.

**Implication:**

IGOs respond to dense institutional environments not only by carving out niches (as theory suggested) but also by adopting broader mandates to remain relevant and competitive, especially in domains where overlap and coordination are central (development, environment, governance).

### 4.3 Hypothesis 2.3 Spatial jurisdiction breadth decreases as density increases
```{r}
library(ggplot2)
library(dplyr)

df$density_group <- cut(df$founding_density_5yr,
                        breaks = quantile(df$founding_density_5yr, probs = c(0, .33, .66, 1), na.rm=TRUE),
                        include.lowest = TRUE,
                        labels = c("Low Density", "Medium Density", "High Density"))

summary_df <- df %>%
  group_by(density_group) %>%
  summarise(
    mean_spatial = mean(spatial_breadth, na.rm=TRUE),
    se = sd(spatial_breadth, na.rm=TRUE) / sqrt(n()),
    .groups = "drop"
  )

m_h23 <- glm(spatial_breadth ~ scale(founding_density_5yr), data = df, family = "poisson")
anova_result <- anova(m_h23, test = "Chisq")
print(summary(m_h23))
print(anova_result)

label_df <- df %>%
  arrange(spatial_breadth) %>%
  slice(c(1:3, (n() - 2):n()))  # 3 narrowest + 3 broadest

# --- Caption text ---
caption_text <- str_wrap(
  "Figure 2.3. Density and Spatial Breadth. 
  This figure tests whether IGOs founded in denser institutional environments cover broader jurisdictional areas (spatial breadth). 
  Bars show the mean number of jurisdictions covered in low-, medium-, and high-density founding periods, with 95% confidence intervals. 
  The results suggest a modest positive relationship: IGOs founded in high-density environments tend to cover more jurisdictions, 
  though variation remains substantial across cases.",
  width = 100
)

h3 <- ggplot(summary_df, aes(x = density_group, y = mean_spatial, fill = density_group)) +
  geom_col(alpha = 0.8, width = 0.6) +
  geom_errorbar(aes(ymin = mean_spatial - 1.96*se, ymax = mean_spatial + 1.96*se),
                width = 0.2, size = 1) +
  geom_text(aes(label = round(mean_spatial,1)), vjust = -1, size = 4, fontface = "bold") +
  
  labs(
    title = "Figure 2.3 Hypothesis 2.3 — Density and Spatial Breadth",
    subtitle = "Mean jurisdictional coverage of IGOs across density groups (±95% CI)",
    x = "Founding Density (Grouped)",
    y = "Average Spatial Breadth (Jurisdictions)",
    caption = caption_text
  ) +
  scale_fill_manual(values = c("Low Density" = "#8dd3c7",
                               "Medium Density" = "#ffffb3",
                               "High Density" = "#fb8072")) +
  theme_minimal(base_size = 14) +
  theme(
    legend.position = "none",
    plot.title = element_text(face = "bold"),
    plot.caption = element_text(hjust = 0, size = 9, lineheight = 1.1, face = "italic") # caption styling
  )

ggsave("figure_2_3_h2_3_spatial_breadth.png", plot = h3,
       width = 9, height = 6, dpi = 300, bg = "white")

print(h3)

```
**Implication:**

Figure 2.3 tests whether institutional density at the time of founding influences the geographical scope (spatial breadth) of IGOs. The results show a modest but non-linear pattern: IGOs founded in medium-density periods tend to have the broadest spatial coverage on average (≈2.7 jurisdictions), while those founded in low-density contexts remain narrower (≈1.9 jurisdictions). Interestingly, organizations emerging in high-density periods are not significantly broader than those in low-density contexts, suggesting that intense institutional crowding does not automatically translate into wider geographical mandates.

This finding complements the previous hypotheses. Whereas Hypothesis 2.1 suggested that density was only weakly associated with greater specialization (HHI), and Hypothesis 2.2 indicated a slight positive relationship with subject-matter breadth, Hypothesis 2.3 highlights a more nuanced dynamic: institutional crowding appears to encourage expansion of jurisdictional reach only up to a point. Beyond this, high-density environments may constrain or channel IGOs into narrower or more selective spatial niches, reflecting pressures to avoid redundancy and overlap.

### 4.4 Hypothesis 2.4 Stronger coordination mechanisms allow IGOs to survive density pressures without extreme specialization.

```{r}
label_df <- df %>%
  group_by(density_group) %>%
  slice_min(order_by = strategy_breadth, n = 1, with_ties = FALSE) %>%
  bind_rows(
    df %>%
      group_by(density_group) %>%
      slice_max(order_by = strategy_breadth, n = 1, with_ties = FALSE)
  ) %>%
  ungroup()

caption_text <- str_wrap(
  "Figure 2.4. Density and Strategic Breadth. 
   The figure shows the distribution of strategy portfolio breadth (number of strategies adopted) across IGOs founded in low-, medium-, and high-density environments. 
   Each ridge represents the spread of strategic breadth scores, while labeled points highlight selected IGOs with the narrowest and broadest portfolios in each group. 
   For example, ITU (Low Density) shows limited breadth, while UN Women (Low Density) represents a broader strategic mandate. 
   In the High Density group, UN DOALOS has a relatively narrow strategy set, while FAO displays a wide-ranging portfolio. 
   Bars capture the diversity of approaches IGOs adopt depending on institutional crowding at the time of founding.",
  width = 100
)

h4 <- ggplot(df, aes(x = strategy_breadth, y = density_group, fill = density_group)) +
  geom_density_ridges(alpha = 0.6, scale = 1.1, rel_min_height = 0.01, color = "white") +
  geom_point(data = label_df, aes(x = strategy_breadth, y = density_group),
             color = "black", size = 3) +
  geom_text_repel(data = label_df,
                  aes(x = strategy_breadth, y = density_group,
                      label = institution),
                  size = 3, nudge_y = 0.25, segment.color = "grey40") +
  labs(
    title = "Figure 2.4 Hypothesis 2.4 — Density and Strategic Breadth",
    x = "Strategic Breadth (Number of Strategies)",
    y = "Founding Density Group",
    caption = caption_text
  ) +
  scale_fill_manual(values = c("Low Density" = "#80b1d3",
                               "Medium Density" = "#fdb462",
                               "High Density" = "#b3de69")) +
  theme_minimal(base_size = 14) +
  theme(
    legend.position = "right",
    plot.title = element_text(size =11, face = "bold"),
    plot.caption = element_text(hjust = 0, size = 9, lineheight = 1.1, face = "italic")
  )

ggsave("figure_2_4_h2_4_strategic_breadth_labeled.png",
       plot = h4, width = 9, height = 6, dpi = 300, bg = "white")

print(h4)
```
**Implication:**

Figure 2.4 examines whether IGOs founded in denser institutional environments pursue broader strategic portfolios. The distributions suggest a clear trend: IGOs in medium- and high-density environments tend to adopt a wider range of strategies than those founded in low-density periods. Importantly, the figure also highlights variation within each density group. For instance, ITU (founded in a low-density era) shows a narrow strategy focus, while UN Women, also from a low-density period, is far more diversified. Similarly, among high-density IGOs, UN DOALOS pursues a relatively narrow set of strategies, whereas FAO exhibits one of the broadest portfolios in the dataset.

This supports Hypothesis 2.4 by showing that institutional crowding generally pushes IGOs toward greater strategic breadth, though not uniformly. Compared to Hypothesis 2.1 (specialization) and Hypothesis 2.2 (subject-matter breadth), this finding strengthens the view that denser institutional environments encourage organizations to expand their portfolios of action rather than restrict them to narrow mandates. In contrast to Hypothesis 2.3 (spatial breadth), where high density limited expansion, here high density tends to encourage more diversified strategies, reflecting pressures for organizations to remain relevant and responsive within crowded governance landscapes.

### 4.5 Hypothesis 2.5 Legal authority moderates the effect of density on specialization.
```{r}
library(stringr)
library(ggrepel)

df$legal_group <- factor(df$legal_group,
                         levels = c("Low Legal Authority", "High Legal Authority"))

m_h25 <- lm(sm_HHI ~ scale(founding_density_5yr) * legal_group +
                          scale(cumulative_stock),
            data = df)
summary(m_h25)

newdat <- expand.grid(
  founding_density_5yr = seq(min(df$founding_density_5yr, na.rm=TRUE),
                             max(df$founding_density_5yr, na.rm=TRUE), length.out = 200),
  legal_group = c("Low Legal Authority", "High Legal Authority"),
  cumulative_stock = median(df$cumulative_stock, na.rm=TRUE)
)

pred <- predict(m_h25, newdata = newdat, se.fit = TRUE)
pred_df <- cbind(newdat, fit = pred$fit, se = pred$se.fit) %>%
  mutate(upper = fit + 1.96*se, lower = fit - 1.96*se)

label_df <- df %>%
  group_by(legal_group) %>%
  slice_max(order_by = sm_HHI, n = 2) %>%
  bind_rows(
    df %>% group_by(legal_group) %>% slice_min(order_by = sm_HHI, n = 2)
  ) %>%
  ungroup()

caption_text <- str_wrap(
  "Figure 2.5. Density, Legal Authority, and Specialization.
   The figure shows the effect of founding density on the degree of specialization (HHI) of IGOs, comparing organizations with high legal authority (green) versus low legal authority (orange). 
   The regression lines represent predicted specialization, with shaded areas indicating 95% confidence intervals. 
   Points represent IGOs, with selected organizations labeled for context (e.g., ITU, IPCC, UNDP, UNU, UNODC, IFAD, WFP, UNCCD). 
   IGOs with high legal authority generally cluster at lower specialization levels, while low-legal authority IGOs show greater variation in specialization, especially at lower densities.",
  width = 100
)

h5 <- ggplot() +
  geom_point(data = df,
             aes(x = founding_density_5yr, y = sm_HHI,
                 color = legal_group),
             alpha = 0.6, size = 2) +
  geom_ribbon(data = pred_df,
              aes(x = founding_density_5yr, ymin = lower, ymax = upper, fill = legal_group),
              alpha = 0.15) +
  geom_line(data = pred_df,
            aes(x = founding_density_5yr, y = fit, color = legal_group),
            size = 1) +
  geom_text_repel(data = label_df,
                  aes(x = founding_density_5yr, y = sm_HHI,
                      label = institution, color = legal_group),
                  size = 3, segment.color = "grey50") +
  labs(
    title = "Figure 2.5 Hypothesis 2.5:Density, Legal Authority, and Specialization",
    x = "Founding Density (±2 years)",
    y = "Specialization (HHI)",
    color = "Legal Authority",
    fill = "Legal Authority",
    caption = caption_text
  ) +
  scale_color_manual(values = c("High Legal Authority" = "#1b9e77",
                                "Low Legal Authority" = "#d95f02")) +
  scale_fill_manual(values = c("High Legal Authority" = "#1b9e77",
                               "Low Legal Authority" = "#d95f02")) +
  theme_minimal(base_size = 14) +
  theme(
    legend.position = "right",
    plot.title = element_text(size = 11, face = "bold"),
    plot.caption = element_text(hjust = 0, size = 9, lineheight = 1.1, face = "italic")
  )

ggsave("figure_2_5_h2_5_density_x_legal_specialization.png",
       plot = h5, width = 10, height = 6, dpi = 300, bg = "white")

print(h5)
```
**Implication**

* This result refines the findings from Hypotheses 2.1–2.4. While those analyses showed that institutional density tends to encourage specialization and narrow organizational niches, Figure 2.5 demonstrates that this effect depends on the legal authority of the organization.

* Low-legal authority IGOs (e.g., ITU, WFP, UNODC) display higher specialization in low-density environments but converge toward lower specialization as density increases.

* High-legal authority IGOs (e.g., UNDP, UNU, UNCCD) generally maintain lower levels of specialization across densities, with a slight decline as density rises.

* This suggests that legal authority moderates the density–specialization relationship: organizations with stronger legal authority appear less responsive to competitive pressures for specialization, likely because their broad mandates and binding frameworks buffer them against duplication concerns. In contrast, low-authority IGOs are more sensitive to institutional crowding, differentiating themselves more clearly when institutional environments are less saturated.

**Together with the earlier hypotheses, this finding indicates that specialization is not uniform across the IGO landscape but varies depending on the interaction between institutional context (density) and organizational design features (legal authority).**

### 4.6 Hypothesis 2.6: IGOs with broader strategy portfolios adapt better in high-density fields than those with narrow strategies
```{r}
library(dplyr)
library(ggplot2)
library(ggeffects)
library(stringr)

# --- Step 1: Model (Density × Strategy Breadth) ---
m_h26 <- lm(sm_HHI ~ scale(founding_density_5yr) * scale(strategy_breadth) + 
              scale(cumulative_stock),
            data = df)

summary(m_h26)

# --- Step 2: Predictions for plotting ---
# ggpredict is from ggeffects, so either:
pred_h26 <- ggeffects::ggpredict(m_h26, terms = c("founding_density_5yr", "strategy_breadth [quart]"))

# --- Step 3: Caption text ---
caption_text <- str_wrap(
  "Figure 2.6. Density, Strategy Breadth, and Specialization. 
   The figure illustrates how founding density influences IGO specialization (Herfindahl–Hirschman Index, HHI), depending on the breadth of their strategy portfolios. 
   Strategy breadth is divided into quartiles, meaning IGOs are grouped into four levels from narrowest to broadest portfolios: the lowest quartile (fewer strategies) to the highest quartile (more diversified strategies). 
   Lines show predicted specialization for each quartile, and shaded areas represent 95% confidence intervals. 
   IGOs with broader strategy portfolios (upper quartiles) begin with higher specialization and retain it more effectively under high density, while those with narrow portfolios (lower quartiles) show lower specialization and sharper declines as density increases.",
  width = 100
)

# --- Step 4: Plot ---
h6 <- ggplot(pred_h26, aes(x = x, y = predicted, color = group)) +
  geom_line(size = 1.2) +
  geom_ribbon(aes(ymin = conf.low, ymax = conf.high, fill = group),
              alpha = 0.15, color = NA) +
  scale_color_brewer(palette = "Set1", name = "Strategy Breadth (Quartiles)") +
  scale_fill_brewer(palette = "Set1", name = "Strategy Breadth (Quartiles)") +
  labs(
    title = "Figure 2.6 Hypothesis 2.6 — Density, Strategy Breadth, and Specialization",
    subtitle = "Effect of founding density on specialization (HHI), moderated by strategy breadth",
    x = "Founding Density (±2 years)",
    y = "Specialization (HHI)",
    caption = caption_text
  ) +
  theme_minimal(base_size = 13) +
  theme(
    plot.title = element_text(face = "bold"),
    plot.subtitle = element_text(size = 11, color = "grey40"),
    plot.caption = element_text(hjust = 0, size = 9, lineheight = 1.1, face = "italic"),
    legend.position = "bottom"
  )

# --- Step 5: Save with white background ---
ggsave("figure_2_6_h2_6.png", plot = h6, width = 10, height = 6, dpi = 300, bg = "white")

print(h6)

```
**Implications**

This result supports Hypothesis 2.6, showing that strategic breadth moderates the pressures of crowded institutional fields. IGOs founded in dense environments tend to lose specialization overall (downward slope across all lines), but those with broader strategy portfolios (e.g., Quartiles 7–9) adapt better, preserving higher levels of specialization compared to those with narrow portfolios. This suggests that in competitive institutional landscapes, flexibility and diversification of strategies help organizations carve out niches and buffer against redundancy. When read alongside Hypotheses 2.1–2.5, this extends the picture: while density generally pushes toward specialization, the ability to combine multiple strategies enhances resilience and allows IGOs to maintain a differentiated role within crowded governance spaces.

### 4.7 Hypothesis 2.7: Niche differentiation occurs along subject–spatial intersections.

```{r}
subject_labels <- c(
  "biodiversity_ecosystem_conservation_across_igo" = "Biodiversity & Conservation",
  "cultural_heritage_traditional_knowledge_data_governance_across_igo" = "Cultural Heritage & Knowledge",
  "disaster_risk_reduction_resilience_across_igo" = "Disaster Risk & Resilience",
  "environmental_protection_climate_change_across_igo" = "Environment & Climate",
  "human_rights_social_justice_advocacy_across_igo" = "Human Rights & Justice",
  "international_cooperation_governance_across_igo" = "Governance & Cooperation",
  "research_science_innovation_across_igo" = "Science & Innovation",
  "security_safety_across_igo" = "Security & Safety",
  "sustainable_development_capacity_building_across_igo" = "Sustainable Development",
  "trade_investment_economic_cooperation_across_igo" = "Trade & Economy"
)
spatial_labels <- c(
  "archipelago_across_igo" = "Archipelago",
  "coastal_zone_across_igo" = "Coastal Zone",
  "contiguous_zone_cz_across_igo" = "Contiguous Zone",
  "enclosed_or_semi_enclosed_sea_across_igo" = "Enclosed/Semi-Enclosed Sea",
  "exclusive_economic_zone_eez_across_igo" = "EEZ",
  "extended_continental_shelf_cs_across_igo" = "Continental Shelf",
  "high_seas_across_igo" = "High Seas",
  "internal_waters_across_igo" = "Internal Waters",
  "territorial_sea_ts_across_igo" = "Territorial Sea",
  "the_area_across_igo" = "The Area"
)

caption_text_27 <- str_wrap(
  "Figure 2.7 — Niche Differentiation (Subject ↔ Spatial Flows).
   The diagram illustrates how IGO subject domains distribute across maritime spatial jurisdictions. 
   Broad domains such as Environment & Climate and Biodiversity & Conservation span nearly all zones, 
   while narrower fields like Security & Safety concentrate in the high seas and the Area. 
   Development-oriented areas (Sustainable Development, Trade & Economy) cluster around coastal and EEZ spaces, 
   reflecting economic mandates. 
   These flows demonstrate that IGOs occupy differentiated niches at the intersection of subject and space, 
   supporting the hypothesis that institutional specialization is structured rather than random.",
  width = 100
)

library(ggalluvial)
library(ggplot2)

# Collapse into subject + spatial (taking primary domain)
df_long <- data.frame(
  IGO = df$institution,
  Subject = colnames(df[,59:68])[apply(df[,59:68], 1, which.max)],
  Spatial = colnames(df[,17:26])[apply(df[,17:26], 1, which.max)]
)

# Apply shorter labels
df_long$Subject <- subject_labels[df_long$Subject]
df_long$Spatial <- spatial_labels[df_long$Spatial]

# Alluvial plot
h7 <- ggplot(df_long,
            aes(axis1 = Subject, axis2 = Spatial)) +
  geom_alluvium(aes(fill = Subject), width = 1/8, alpha = 0.85) +
  geom_stratum(width = 1/8, fill = "grey95", color = "black") +
  geom_text(
    stat = "stratum",
    aes(label = after_stat(stratum)),
    size = 2.8,        # smaller text
    hjust = 0.5,       # centered
    lineheight = 0.9   # tighter spacing
  ) +
  scale_fill_brewer(palette = "Set3") +
  theme_minimal(base_size = 12) +
  theme(
    axis.text.y = element_blank(),
    axis.title = element_blank(),
    legend.position = "none",
    plot.title = element_text(face = "bold"),
    plot.subtitle = element_text(size = 10, color = "grey40")
  ) +
  labs(
    title = "Figure 2.7 Hypothesis 2.7 — Niche Differentiation (Subject ↔ Spatial Flows)",
    subtitle = "Flows show how subject domains distribute across spatial jurisdictions"
  )
h7 <- h7 + 
  labs(
    caption = caption_text_27
  ) +
  theme(
    plot.caption = element_text(
      hjust = 0, size = 9, lineheight = 1.1, face = "italic", color = "grey30"
    )
  )

ggsave("figure_2_7_h2_7_alluvial.png", plot = h7,
       width = 12, height = 6, dpi = 300, bg = "white")
print(h7)
```
**Implications**
These patterns support Hypothesis 2.7 by showing that niche differentiation occurs along subject–spatial intersections. Some domains are broadly distributed across all maritime spaces, while others are spatially concentrated, creating clear subject-specific niches. This differentiation reflects how IGOs align their functional mandates with the legal and ecological characteristics of maritime zones, rather than being evenly spread across all spaces. In doing so, the system of IGOs exhibits both cross-cutting mandates and specialized jurisdictions, reinforcing the idea that institutional niches are structured and differentiated rather than overlapping randomly.

```{r}
# ====== COMMON SETUP ======
library(dplyr); library(tidyr); library(ggplot2); library(ggrepel)
library(ggeffects); library(mgcv); library(broom); library(scales); library(stringr)

# Helper: neat caption wrapper
cap <- function(x) stringr::str_wrap(x, width = 110)

# (Optional) derived measures if you haven't defined them yet
df <- df %>%
  mutate(
    # Example placeholders used below; adjust if you already created these
    strategy_breadth = ifelse(!is.na(strategy_breadth), strategy_breadth,
                              rowSums(across(c(70:79)), na.rm = TRUE)),   # within strategies count
    spatial_breadth  = ifelse(!is.na(spatial_breadth), spatial_breadth,
                              rowSums(across(c(17:26)), na.rm = TRUE))     # # of spatial jurisdictions
  )

```
#### 4.8 Hypothesis 2.8 H2.8 — Non-linearity / diminishing returns: The effect of founding density on specialization is non-linear (diminishing at high density)
```{r}
# GAM with smooth on density; control for field stock
m_h28 <- mgcv::gam(sm_HHI ~ s(founding_density_5yr, k = 4) + scale(cumulative_stock), data = df)
summary(m_h28)

# Predictions for plot
pred_h28 <- ggpredict(m_h28, terms = "founding_density_5yr [all]")

p_h28 <- ggplot(pred_h28, aes(x = x, y = predicted)) +
  geom_ribbon(aes(ymin = conf.low, ymax = conf.high), alpha = .18) +
  geom_line(size = 1.2) +
  geom_point(data = df, aes(x = founding_density_5yr, y = sm_HHI), alpha = .35) +
  labs(
    title = "Figure 2.8 Hypothesis 2.8 — Non-linear Effect of Density on Specialization",
    subtitle = "GAM smooth; band = 95% CI. Points are IGOs.",
    x = "Founding Density (±2 years)", y = "Specialization (HHI)",
    caption = cap("The smooth shows that density’s effect on specialization is not strictly linear: the slope flattens as density increases, \
suggesting diminishing returns once institutional fields become very crowded.")
  ) +
  theme_minimal(base_size = 13)
ggsave("figure_2_8_h2_8_gam_density_specialization.png", p_h28, width = 9, height = 5.6, dpi = 300)

print(p_h28)
```
#### 4.9 Hypothesis 2.9 H2.9 — Mediation: Strategy breadth partially mediates the effect of density on specialization
```{r}
library(lavaan)

# Standardize to aid interpretation
df_std <- df %>%
  mutate(across(c(founding_density_5yr, cumulative_stock, strategy_breadth, sm_HHI), scale))

model_h29 <- '
  # direct paths
  strategy_breadth ~ a*founding_density_5yr + c1*cumulative_stock
  sm_HHI           ~ b*strategy_breadth + c_prime*founding_density_5yr + c2*cumulative_stock

  # indirect and total effects
  ind := a*b
  total := c_prime + (a*b)
'

fit_h29 <- sem(model_h29, data = df_std, se = "bootstrap", bootstrap = 1000)
summary(fit_h29, standardized = TRUE, fit.measures = TRUE, rsquare = TRUE)

# Quick coefficient (dot-whisker) plot for mediation paths
coef_df <- broom::tidy(fit_h29, conf.int = TRUE) %>%
  filter(grepl("a|b|c_prime|ind|total", term))

p_h29 <- ggplot(coef_df, aes(x = estimate, y = term)) +
  geom_point() + geom_errorbarh(aes(xmin = conf.low, xmax = conf.high), height = .15) +
  geom_vline(xintercept = 0, linetype = "dashed") +
  labs(
    title = "Figure 2.9 Hypothesis 2.9 — Mediation by Strategy Breadth",
    x = "Estimate (bootstrap 95% CI)", y = "",
    caption = cap("Path a: density → strategy breadth; Path b: strategy breadth → specialization; c' is the direct effect of density on specialization \
controlling for breadth. A significant indirect effect (ind) supports mediation.")
  ) + theme_minimal(base_size = 13)
ggsave("figure_2_9_h2_9_mediation.png", p_h29, width = 8.6, height = 5.2, dpi = 300)
print(p_h29)
```
_**Figure 2.9** tests whether the effect of founding density on subject-matter specialization (HHI) operates indirectly through strategy breadth. The dot-whisker plot shows standardized coefficients with bootstrap 95% confidence intervals. The key pathway of interest is the indirect effect (ind := a*b). A significant indirect effect suggests mediation, meaning that IGOs in denser founding environments adjust their strategy portfolios, and this broader or narrower portfolio in turn shapes how specialized they become._

**Interpretation:**

* The direct path from founding density to specialization is relatively weak once strategy breadth is included.

* The indirect path (a*b) is the mechanism of interest: it shows whether density → strategy breadth → specialization is a valid chain.

* If the indirect effect confidence interval excludes zero, then mediation holds, meaning density’s influence on specialization is partly explained by strategy choices.

* Substantively, this suggests that IGOs in crowded institutional fields do not simply specialize directly; they adapt by adjusting their strategy mix, which then influences their degree of specialization.

#### 4.10 Hypothesis 2.10 Inter-institutional embeddedness rises with density
```{r}
# Build an Inter-institutional Embeddedness Index (IEI)
iei_cols <- c(112:131)
df <- df %>%
  mutate(IEI = rowMeans(across(all_of(iei_cols)), na.rm = TRUE))

# Model: IEI ~ density + controls
m_h210 <- lm(IEI ~ scale(founding_density_5yr) + scale(cumulative_stock), data = df)
summary(m_h210)

# Plot marginal effect with a few labels for context
pred_h210 <- ggpredict(m_h210, terms = "founding_density_5yr [all]")

label_igos <- df %>%
  slice_max(order_by = IEI, n = 4) %>%
  bind_rows(slice_min(df, order_by = IEI, n = 3))

p_h210 <- ggplot() +
  geom_point(data = df, aes(founding_density_5yr, IEI), alpha = .45) +
  geom_line(data = pred_h210, aes(x = x, y = predicted), size = 1.1) +
  geom_ribbon(data = pred_h210, aes(x = x, ymin = conf.low, ymax = conf.high), alpha = .18) +
  ggrepel::geom_text_repel(data = label_igos,
                           aes(founding_density_5yr, IEI, label = institution),
                           size = 3, seed = 4) +
  labs(
    title = "Figure 2.10 Hypothesis 2.10 — Density and Inter-institutional Embeddedness",
    subtitle = "Inter-institutional Embeddedness Index (IEI) from interaction items",
    x = "Founding Density (±2 years)", y = "Embeddedness (IEI, mean of interactions)",
    caption = cap("A positive slope indicates IGOs founded in denser fields maintain broader inter-institutional ties (UN system collaboration, \
treaty body coordination, scientific linkages, etc.), consistent with coordination pressures in crowded spaces.")
  ) + theme_minimal(base_size = 13)
ggsave("figure_2_10_h2_10_density_IEI.png", p_h210, width = 9, height = 5.4, dpi = 300)
print(p_h210)
```
_**Figure 2.10** examines whether IGOs founded in denser institutional environments exhibit broader inter-institutional embeddedness. The y-axis represents the Inter-institutional Embeddedness Index (IEI), constructed from counts of inter-IGO collaborations, treaty body linkages, UN system partnerships, and other interactions. The regression line with 95% confidence band indicates the partial effect of founding density. Selected IGOs (e.g., ITC, UN DOALOS, OHCHR) are labeled for context._

**Interpretation:**

* The positive slope indicates that IGOs founded in denser environments tend to have more inter-institutional linkages.

* Examples like UN DOALOS, ITC, OHCHR are highly embedded, consistent with operating in complex governance landscapes where collaboration is necessary.

* By contrast, more narrowly embedded IGOs (e.g., Minamata Convention, UNEP) operate with fewer ties, reflecting either functional autonomy or niche mandates.

* Substantively, this supports the idea that crowded institutional fields push new IGOs toward greater coordination and embeddedness to avoid redundancy and increase legitimacy.
