#load libraries
library(dplyr)
library(readr)
library(tidyr)
library(lubridate)
library(fixest)
library(data.table)
library(stringr)
library(sf)
# Turn off scientific notation globally
#options(scipen = 999)

#load relevant files
#readRDS("C:/Users/yoshiara/OneDrive - RAND Corporation/Desktop/Dissertation/eastern drc/panel_p3_yearly.rds")

setwd("C:/Users/yoshiara/OneDrive - RAND Corporation/Desktop/Dissertation/eastern drc")

#Descriptive Stats

##Cross-sectional (2021 only)

###Mining vs. non-mining grids

What do mining vs. non-mining grid cells look like? Table below includes grids covering all of DRC. Some things that stand out: - Mining grids are closer to national borders - Mining grids have more cropland (all types - irrigated, rainfed, other). For all grids, there is more rainfed than irrigated cropland. - Mining grids have fewer permanent bodies of water - Mining grids have higher population – but this could just be because the non-mining grids may capture many unpopulated areas. This matches the pattern for impervious/manmade surfaces but not for nightlight density which is the same for both mining and non-mining grids (weird) - Mining grids have higher conflict counts (across all conflict categories)

#descriptive stats (means table) mining vs non_mining####
# 1) Restrict to latest year where all vars exist (2021)
df_latest <- panel_p3_yearly %>%
  filter(year == 2021) %>%
  mutate(mine_group = ifelse(total_mine_count == 0, "No mines", ">=1 mine"))

# 2) Variables to summarize
vars <- c(
  "dist_to_border",
  "forest","grassland","impervious","irrigated_crops",
  "nightlights","other_crops","permanent_water",
  "rainfed_crops","seasonal_water","shrubland",
  "sparse","swamp","pop", "n_conflicts", "n_battles", "n_protests"
)

# helper: coerce to numeric if needed (handles factors/characters)
to_num <- function(x) {
  if (is.numeric(x)) return(x)
  if (is.factor(x))  return(as.numeric(as.character(x)))
  if (is.character(x)) return(suppressWarnings(as.numeric(x)))
  return(suppressWarnings(as.numeric(x)))
}

# 3) Run t-tests + build results
t_test_results <- lapply(vars, function(v) {
  x <- to_num(df_latest[[v]])
  g <- df_latest$mine_group
  
  # group means
  m0 <- mean(x[g == "No mines"], na.rm = TRUE)
  m1 <- mean(x[g == ">=1 mine"], na.rm = TRUE)
  
  # t-test (Welch)
  tt <- t.test(x ~ g)
  
  data.frame(
    variable = v,
    mean_no_mines = m0,
    mean_mines = m1,
    p_value = tt$p.value
  )
}) %>%
  bind_rows() %>%
  mutate(sig = case_when(
    p_value < 0.001 ~ "***",
    p_value < 0.01  ~ "**",
    p_value < 0.05  ~ "*",
    TRUE ~ ""
  ))

# Turn off scientific notation globally
options(scipen = 999)

# Round numeric columns to 3 decimals
t_test_results_clean <- t_test_results %>%
  mutate(across(where(is.numeric), ~ round(.x, 3)))

# --- 1) Count grids in each group ---
group_counts <- df_latest %>%
  group_by(mine_group) %>%
  summarise(N_grids = n(), .groups = "drop")

# Extract counts
N_no_mines <- group_counts$N_grids[group_counts$mine_group == "No mines"]
N_mines    <- group_counts$N_grids[group_counts$mine_group == ">=1 mine"]

# --- 2) Add counts to results table ---
t_test_results_clean <- t_test_results %>%
  mutate(
    N_no_mines = N_no_mines,
    N_mines    = N_mines
  ) %>%
  mutate(across(where(is.numeric), ~ round(.x, 3)))

# --- 3) View table ---

#turn of pagination for tables
DT::datatable(
  t_test_results_clean,
  options = list(
    pageLength = nrow(t_test_results_clean),  # show all rows
    paging = FALSE             # turn pagination off
  )
)
NA
NA

###Mining vs. non-mining grids (excluding grids with fewer than 25 people)

This is the same table as the one above, except now I’m restricting to grid cells with at least 25 people. The results are generally consistent with the patterns in the previous table.

#descriptive stats (means table) mining vs non_mining (population >= 25)####
# 1) Restrict to latest year where all vars exist (2021)
df_latest_pop <- df_latest %>%
  filter(pop >= 25) 

# 2) Variables to summarize
vars <- c(
  "dist_to_border",
  "forest","grassland","impervious","irrigated_crops",
  "nightlights","other_crops","permanent_water",
  "rainfed_crops","seasonal_water","shrubland",
  "sparse","swamp","pop", "n_conflicts", "n_battles", "n_protests"
)

# helper: coerce to numeric if needed (handles factors/characters)
to_num <- function(x) {
  if (is.numeric(x)) return(x)
  if (is.factor(x))  return(as.numeric(as.character(x)))
  if (is.character(x)) return(suppressWarnings(as.numeric(x)))
  return(suppressWarnings(as.numeric(x)))
}

# 3) Run t-tests + build results
t_test_results <- lapply(vars, function(v) {
  x <- to_num(df_latest_pop[[v]])
  g <- df_latest_pop$mine_group
  
  # group means
  m0 <- mean(x[g == "No mines"], na.rm = TRUE)
  m1 <- mean(x[g == ">=1 mine"], na.rm = TRUE)
  
  # t-test (Welch)
  tt <- t.test(x ~ g)
  
  data.frame(
    variable = v,
    mean_no_mines = m0,
    mean_mines = m1,
    p_value = tt$p.value
  )
}) %>%
  bind_rows() %>%
  mutate(sig = case_when(
    p_value < 0.001 ~ "***",
    p_value < 0.01  ~ "**",
    p_value < 0.05  ~ "*",
    TRUE ~ ""
  ))



# Round numeric columns to 3 decimals
t_test_results_clean <- t_test_results %>%
  mutate(across(where(is.numeric), ~ round(.x, 3)))

# --- 1) Count grids in each group ---
group_counts <- df_latest_pop %>%
  group_by(mine_group) %>%
  summarise(N_grids = n(), .groups = "drop")

# Extract counts
N_no_mines <- group_counts$N_grids[group_counts$mine_group == "No mines"]
N_mines    <- group_counts$N_grids[group_counts$mine_group == ">=1 mine"]

# --- 2) Add counts to results table ---
t_test_results_clean <- t_test_results %>%
  mutate(
    N_no_mines = N_no_mines,
    N_mines    = N_mines
  ) %>%
  mutate(across(where(is.numeric), ~ round(.x, 3)))

# --- 3) View table ---
#turn of pagination for tables
DT::datatable(
  t_test_results_clean,
  options = list(
    pageLength = nrow(t_test_results_clean),  # show all rows
    paging = FALSE             # turn pagination off
  )
)
NA

###Mineral type treatment categories (3TG vs. non-3TG mines)

Here I’m again restricting to mining grids, but comparing grids that mine the 3TG (tin,tantalum, tungsten, and gold) with grids that mine other materials. The 3TG are the minerals covered by the DF Act. This is the alternate identification strategy I want to explore in paper 3. There is only one grid cell with both 3TG and non-3TG mines.Here we see that: - 3TG grids have more crops (irrigated and rainfed) - Much higher population in non-3TG mining grids, which tracks with impervious surfaces and nightlights - More battles in 3TG grids, more protests in non-DF grids

#descriptive stats (means table) df vs non df mining grids####
# 1) Restrict to latest year where all vars exist (2021)
df_latest_mining <- df_latest %>%
  filter(mine_group == ">=1 mine") 

# 2) Variables to summarize
vars <- c(
  "dist_to_border",
  "forest","grassland","impervious","irrigated_crops",
  "nightlights","other_crops","permanent_water",
  "rainfed_crops","seasonal_water","shrubland",
  "sparse","swamp","pop",
  "n_conflicts","n_battles","n_protests"
)

# helper: coerce to numeric if needed (handles factors/characters)
to_num <- function(x) {
  if (is.numeric(x)) return(x)
  if (is.factor(x))  return(as.numeric(as.character(x)))
  if (is.character(x)) return(suppressWarnings(as.numeric(x)))
  return(suppressWarnings(as.numeric(x)))
}

# 3) Run t-tests + build results
t_test_results <- lapply(vars, function(v) {
  x <- to_num(df_latest_mining[[v]])
  g <- df_latest_mining$has_df_mine_dummy
  
  # group means
  m0 <- mean(x[g == 0], na.rm = TRUE)
  m1 <- mean(x[g == 1], na.rm = TRUE)
  
  # t-test (Welch)
  tt <- t.test(x ~ g)
  
  data.frame(
    variable = v,
    mean_non_df = m0,
    mean_df = m1,
    p_value = tt$p.value
  )
}) %>%
  bind_rows() %>%
  mutate(sig = case_when(
    p_value < 0.001 ~ "***",
    p_value < 0.01  ~ "**",
    p_value < 0.05  ~ "*",
    TRUE ~ ""
  ))


# Round numeric columns to 3 decimals
t_test_results_clean <- t_test_results %>%
  mutate(across(where(is.numeric), ~ round(.x, 3)))

# --- 1) Count grids in each group ---
group_counts <- df_latest_mining %>%
  group_by(has_df_mine_dummy) %>%
  summarise(N_grids = n(), .groups = "drop")

# Extract counts
N_non_df_mines <- group_counts$N_grids[group_counts$has_df_mine_dummy == 0]
N_df_mines    <- group_counts$N_grids[group_counts$has_df_mine_dummy == 1]

# --- 2) Add counts to results table ---
t_test_results_clean <- t_test_results %>%
  mutate(
    N_non_df_mines = N_non_df_mines,
    N_df_mines    = N_df_mines
  ) %>%
  mutate(across(where(is.numeric), ~ round(.x, 3)))

# --- 3) View table ---
DT::datatable(
  t_test_results_clean,
  options = list(
    pageLength = nrow(t_test_results_clean),  # show all rows
    paging = FALSE             # turn pagination off
  )
)
NA

###Parker and Vadheim/Dodd-Frank treated grids vs. untreated grids

Now I’m looking only a mining grids, comparing grids that were “treated” per the PV study (i.e. that are located in conflict affected mining areas per the 2011 State Department map) to untreated mining grids per the PV study: - There is less of a population delta than in the previous table (the 3TG vs. non-3TG mining grid comparison). I’m not sure what this means in terms of thinking through PV’s vs. my identification strategy. - No statistically significant difference in mean conflict counts in PV-DF treated vs. untreated mining grids. Again, not sure what that implies.

# helper: coerce to numeric if needed (handles factors/characters)
to_num <- function(x) {
  if (is.numeric(x)) return(x)
  if (is.factor(x))  return(as.numeric(as.character(x)))
  if (is.character(x)) return(suppressWarnings(as.numeric(x)))
  return(suppressWarnings(as.numeric(x)))
}

# 3) Run t-tests + build results
t_test_results <- lapply(vars, function(v) {
  x <- to_num(df_latest_mining[[v]])
  g <- df_latest_mining$policy_terr
  
  # group means
  m0 <- mean(x[g == 0], na.rm = TRUE)
  m1 <- mean(x[g == 1], na.rm = TRUE)
  
  # t-test (Welch)
  tt <- t.test(x ~ g)
  
  data.frame(
    variable = v,
    mean_non_df = m0,
    mean_df = m1,
    p_value = tt$p.value
  )
}) %>%
  bind_rows() %>%
  mutate(sig = case_when(
    p_value < 0.001 ~ "***",
    p_value < 0.01  ~ "**",
    p_value < 0.05  ~ "*",
    TRUE ~ ""
  ))



# Round numeric columns to 3 decimals
t_test_results_clean <- t_test_results %>%
  mutate(across(where(is.numeric), ~ round(.x, 3)))

# --- 1) Count grids in each group ---
group_counts <- df_latest_mining %>%
  group_by(policy_terr) %>%
  summarise(N_grids = n(), .groups = "drop")

# Extract counts
N_non_df_terr <- group_counts$N_grids[group_counts$policy_terr == 0]
N_df_terr    <- group_counts$N_grids[group_counts$policy_terr == 1]

# --- 2) Add counts to results table ---
t_test_results_clean <- t_test_results %>%
  mutate(
    N_non_df_terr = N_non_df_terr,
    N_df_terr    = N_df_terr
  ) %>%
  mutate(across(where(is.numeric), ~ round(.x, 3)))

# --- 3) View table ---
DT::datatable(
  t_test_results_clean,
  options = list(
    pageLength = nrow(t_test_results_clean),  # show all rows
    paging = FALSE             # turn pagination off
  )
)

##Rate of change (comparing 2003-2006 and 2019-2021)

###Mining vs. non-mining grids - More deforestation in mining grids - More cropland growth (irrigated, rainfed) in mining grids - More population growth in mining grids - Larger increase in conflict in mining grids

#annual changes mining v. non-mining####
# -----------------------------
# 0) Choose the yearly variables (edit this list if you want)
# -----------------------------
yearly_vars <- c(
  "forest","grassland","impervious","irrigated_crops",
  "nightlights","other_crops","permanent_water",
  "rainfed_crops","seasonal_water","shrubland","sparse","swamp","pop",
  "n_conflicts","n_battles","n_protests"
)

# helper: coerce to numeric safely (handles factor/character numbers)
to_num <- function(x) {
  if (is.numeric(x)) return(x)
  if (is.factor(x))  return(as.numeric(as.character(x)))
  if (is.character(x)) return(suppressWarnings(as.numeric(x)))
  return(suppressWarnings(as.numeric(x)))
}

# -----------------------------
# 1) Create a static mine group per hex_id (No mines vs >=1 mine)
#    Uses the maximum total_mine_count observed for each hex (robust)
# -----------------------------
mine_group_by_hex <- panel_p3_yearly %>%
  group_by(hex_id) %>%
  summarise(
    total_mine_count_static = max(to_num(total_mine_count), na.rm = TRUE),
    mine_group = ifelse(total_mine_count_static == 0, "No mines", ">=1 mine"),
    .groups = "drop"
  )

# -----------------------------
# 2) Compute hex-level averages in each period, then change (post - pre)
# -----------------------------
hex_period_avgs <- panel_p3_yearly %>%
  select(hex_id, year, all_of(yearly_vars)) %>%
  mutate(year = to_num(year)) %>%
  filter(year %in% c(2003:2006, 2019:2021)) %>%
  mutate(period = ifelse(year <= 2006, "pre_2003_06", "post_2019_21")) %>%
  # ensure vars numeric
  mutate(across(all_of(yearly_vars), to_num)) %>%
  group_by(hex_id, period) %>%
  summarise(across(all_of(yearly_vars), ~ mean(.x, na.rm = TRUE)), .groups = "drop") %>%
  pivot_wider(
    names_from = period,
    values_from = all_of(yearly_vars),
    names_sep = "__"
  )

# compute change for each var: post - pre
# Join mine group onto the wide period-averaged data
tmp <- hex_period_avgs %>%
  left_join(mine_group_by_hex, by = "hex_id")

# Create change columns: post - pre
for (v in yearly_vars) {
  post_col <- paste0(v, "__post_2019_21")
  pre_col  <- paste0(v, "__pre_2003_06")
  tmp[[paste0("chg__", v)]] <- tmp[[post_col]] - tmp[[pre_col]]
}

# Keep only what you need
hex_changes <- tmp %>%
  select(hex_id, mine_group, starts_with("chg__"))

# -----------------------------
# 3) Compare mean changes across mine groups (Welch t-test), build table
# -----------------------------
chg_vars <- paste0("chg__", yearly_vars)

diff_in_changes <- lapply(chg_vars, function(v) {
  x <- hex_changes[[v]]
  g <- hex_changes$mine_group
  
  ok <- !is.na(x) & !is.na(g)
  x <- x[ok]; g <- g[ok]
  
  m0 <- mean(x[g == "No mines"], na.rm = TRUE)
  m1 <- mean(x[g == ">=1 mine"], na.rm = TRUE)
  
  tt <- t.test(x ~ g)  # Welch
  
  data.frame(
    variable = sub("^chg__", "", v),
    mean_change_no_mines = m0,
    mean_change_mines    = m1,
    diff_change          = m1 - m0,
    p_value              = tt$p.value
  )
}) %>%
  bind_rows() %>%
  mutate(
    sig = case_when(
      p_value < 0.001 ~ "***",
      p_value < 0.01  ~ "**",
      p_value < 0.05  ~ "*",
      TRUE ~ ""
    )
  ) %>%
  mutate(across(where(is.numeric), ~ round(.x, 3)))

# group Ns (number of hexes contributing to the change analysis)
group_Ns <- hex_changes %>%
  count(mine_group)

group_Ns

DT::datatable(
  diff_in_changes,
  options = list(
    pageLength = nrow(diff_in_changes),  # show all rows
    paging = FALSE             # turn pagination off
  )
)
Registered S3 method overwritten by 'htmlwidgets':
  method           from         
  print.htmlwidget tools:rstudio

###Mining vs. non-mining grids (excluding grids with fewer than 25 people)

Same table as the one above but restricting to grids with at least 25 people. Results generally track with the previous table.

#annual changes mining v. non-mining (pop>=25)####


# -----------------------------------
# 1) Restrict to hexes with pre-period avg pop >= 25
# -----------------------------------

hex_changes_pop25 <- hex_period_avgs %>%
  left_join(mine_group_by_hex, by = "hex_id") %>%
  filter(pop__pre_2003_06 >= 25)

# -----------------------------------
# 2) Create change variables
# -----------------------------------

tmp <- hex_changes_pop25

for (v in yearly_vars) {
  post_col <- paste0(v, "__post_2019_21")
  pre_col  <- paste0(v, "__pre_2003_06")
  tmp[[paste0("chg__", v)]] <- tmp[[post_col]] - tmp[[pre_col]]
}

hex_changes_pop25 <- tmp %>%
  select(hex_id, mine_group, starts_with("chg__"))

# -----------------------------------
# 3) Compare mean changes across mine groups
# -----------------------------------

chg_vars <- paste0("chg__", yearly_vars)

diff_in_changes_pop25 <- lapply(chg_vars, function(v) {
  
  x <- hex_changes_pop25[[v]]
  g <- hex_changes_pop25$mine_group
  
  ok <- !is.na(x) & !is.na(g)
  x <- x[ok]; g <- g[ok]
  
  m0 <- mean(x[g == "No mines"], na.rm = TRUE)
  m1 <- mean(x[g == ">=1 mine"], na.rm = TRUE)
  
  tt <- t.test(x ~ g)
  
  data.frame(
    variable = sub("^chg__", "", v),
    mean_change_no_mines = m0,
    mean_change_mines    = m1,
    diff_change          = m1 - m0,
    p_value              = tt$p.value
  )
}) %>%
  bind_rows() %>%
  mutate(
    sig = case_when(
      p_value < 0.001 ~ "***",
      p_value < 0.01  ~ "**",
      p_value < 0.05  ~ "*",
      TRUE ~ ""
    )
  ) %>%
  mutate(across(where(is.numeric), ~ round(.x, 3)))

# -----------------------------------
# 4) Group counts
# -----------------------------------

group_Ns_pop25 <- hex_changes_pop25 %>%
  count(mine_group)

group_Ns_pop25

DT::datatable(
  diff_in_changes_pop25,
  options = list(
    pageLength = nrow(diff_in_changes_pop25),  # show all rows
    paging = FALSE             # turn pagination off
  )
)
NA

###Mineral type treatment categories (3TG vs. non-3TG mines)

#annual changes df vs. non df mineral grids####
# ------------------------------------------------------------
# 1) Build a hex-level DF grouping variable, and restrict to mining grids
# ------------------------------------------------------------
df_group_by_hex <- panel_p3_yearly %>%
  group_by(hex_id) %>%
  summarise(
    total_mine_count_static = first(na.omit(to_num(total_mine_count))),
    has_df_static           = first(na.omit(to_num(has_df_mine_dummy))),
    df_group = ifelse(has_df_static == 1, "DF mine", "Non-DF mine"),
    .groups = "drop"
  ) %>%
  filter(total_mine_count_static >= 1)

# ------------------------------------------------------------
# 2) Join onto the period-average wide data and compute changes
# ------------------------------------------------------------
tmp <- hex_period_avgs %>%
  inner_join(df_group_by_hex %>% select(hex_id, df_group), by = "hex_id")

# change columns: post - pre
for (v in yearly_vars) {
  post_col <- paste0(v, "__post_2019_21")
  pre_col  <- paste0(v, "__pre_2003_06")
  tmp[[paste0("chg__", v)]] <- tmp[[post_col]] - tmp[[pre_col]]
}

hex_changes_df_vs_nondf <- tmp %>%
  select(hex_id, df_group, starts_with("chg__"))

# ------------------------------------------------------------
# 3) Compare mean changes across DF vs non-DF (Welch t-tests)
# ------------------------------------------------------------
chg_vars <- paste0("chg__", yearly_vars)

diff_in_changes_df_vs_nondf <- lapply(chg_vars, function(v) {
  
  x <- hex_changes_df_vs_nondf[[v]]
  g <- hex_changes_df_vs_nondf$df_group
  
  ok <- !is.na(x) & !is.na(g)
  x <- x[ok]; g <- g[ok]
  
  m0 <- mean(x[g == "Non-DF mine"], na.rm = TRUE)
  m1 <- mean(x[g == "DF mine"], na.rm = TRUE)
  
  tt <- t.test(x ~ g)  # Welch
  
  data.frame(
    variable = sub("^chg__", "", v),
    mean_change_non_df = m0,
    mean_change_df     = m1,
    diff_change        = m1 - m0,
    p_value            = tt$p.value
  )
}) %>%
  bind_rows() %>%
  mutate(
    sig = case_when(
      p_value < 0.001 ~ "***",
      p_value < 0.01  ~ "**",
      p_value < 0.05  ~ "*",
      TRUE ~ ""
    )
  ) %>%
  mutate(across(where(is.numeric), ~ round(.x, 3)))

# ------------------------------------------------------------
# 4) Group counts
# ------------------------------------------------------------
group_Ns_df_vs_nondf <- hex_changes_df_vs_nondf %>%
  count(df_group)

group_Ns_df_vs_nondf

DT::datatable(
  diff_in_changes_df_vs_nondf,
  options = list(
    pageLength = nrow(diff_in_changes_df_vs_nondf),  # show all rows
    paging = FALSE             # turn pagination off
  )
)
NA

###Parker and Vadheim/Dodd-Frank mining grids vs. non-DF mining grids

#annual changes df vs. non df policy grids####
# ------------------------------------------------------------
# 1) Build a hex-level DF grouping variable, and restrict to mining grids
# ------------------------------------------------------------
df_group_by_hex <- panel_p3_yearly %>%
  group_by(hex_id) %>%
  summarise(
    total_mine_count_static = first(na.omit(to_num(total_mine_count))),
    has_policy_static           = first(na.omit(to_num(policy_terr))),
    policy_group = ifelse(has_policy_static == 1, "DF mine", "Non-DF mine"),
    .groups = "drop"
  ) %>%
  filter(total_mine_count_static >= 1)

# ------------------------------------------------------------
# 2) Join onto the period-average wide data and compute changes
# ------------------------------------------------------------
tmp <- hex_period_avgs %>%
  inner_join(df_group_by_hex %>% select(hex_id, policy_group), by = "hex_id")

# change columns: post - pre
for (v in yearly_vars) {
  post_col <- paste0(v, "__post_2019_21")
  pre_col  <- paste0(v, "__pre_2003_06")
  tmp[[paste0("chg__", v)]] <- tmp[[post_col]] - tmp[[pre_col]]
}

hex_changes_df_vs_nondf <- tmp %>%
  select(hex_id, policy_group, starts_with("chg__"))

# ------------------------------------------------------------
# 3) Compare mean changes across DF vs non-DF (Welch t-tests)
# ------------------------------------------------------------
chg_vars <- paste0("chg__", yearly_vars)

diff_in_changes_df_vs_nondf <- lapply(chg_vars, function(v) {
  
  x <- hex_changes_df_vs_nondf[[v]]
  g <- hex_changes_df_vs_nondf$policy_group
  
  ok <- !is.na(x) & !is.na(g)
  x <- x[ok]; g <- g[ok]
  
  m0 <- mean(x[g == "Non-DF mine"], na.rm = TRUE)
  m1 <- mean(x[g == "DF mine"], na.rm = TRUE)
  
  tt <- t.test(x ~ g)  # Welch
  
  data.frame(
    variable = sub("^chg__", "", v),
    mean_change_non_df = m0,
    mean_change_df     = m1,
    diff_change        = m1 - m0,
    p_value            = tt$p.value
  )
}) %>%
  bind_rows() %>%
  mutate(
    sig = case_when(
      p_value < 0.001 ~ "***",
      p_value < 0.01  ~ "**",
      p_value < 0.05  ~ "*",
      TRUE ~ ""
    )
  ) %>%
  mutate(across(where(is.numeric), ~ round(.x, 3)))

# ------------------------------------------------------------
# 4) Group counts
# ------------------------------------------------------------
group_Ns_df_vs_nondf <- hex_changes_df_vs_nondf %>%
  count(policy_group)

group_Ns_df_vs_nondf

DT::datatable(
  diff_in_changes_df_vs_nondf,
  options = list(
    pageLength = nrow(diff_in_changes_df_vs_nondf),  # show all rows
    paging = FALSE             # turn pagination off
  )
)
NA

#Regressions

##Outcome: Pr(Battle Occurence)

###PV baseline model but with grids Looking at the outcome: probability of any battle occurring. Same as PV study but using grid cells instead of territories as the unit of analysis (so a much smaller united of analysis that doesn’t follow political boundaries). Only policy_dummy (indicator for grids in a territory flagged as conflict-affected by State Dep in 2008-2010) is stat sig (at ***, 0.000187). Interaction terms with number of mine variables aren’t.

#any battles, matching PV but at grid vs. territory level####
mod <- feols(
  any_battles ~ policy_dummy + policy_dummy:is_3t_count + policy_dummy:is_gold_count | hex_id + time_id,
  data = pv_months_grids
)

# Cluster-robust summary
summary(mod, cluster = "hex_id")
OLS estimation, Dep. Var.: any_battles
Observations: 7,528,572
Fixed-effects: hex_id: 69,709,  time_id: 108
Standard-errors: Clustered (hex_id) 
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
RMSE: 0.010095     Adj. R2: 0.117854
                 Within R2: 2.187e-5

###PV baseline model with grid-level treatment assignment

Now I’m assigning treatment at the grid level, rather than at the territory level, based on whether the grid is in a State Department policy territory + has a 3TG mine. When I assign treatment more granularly, the treatment effect goes away.

#any battles, matching PV but at grid vs. territory level####
mod <- feols(
  any_battles ~ df_policy_grids + df_policy_grids:is_3t_count + df_policy_grids:is_gold_count | hex_id + time_id,
  data = pv_months_grids
)

# Cluster-robust summary
summary(mod, cluster = "hex_id")
OLS estimation, Dep. Var.: any_battles
Observations: 7,528,572
Fixed-effects: hex_id: 69,709,  time_id: 108
Standard-errors: Clustered (hex_id) 
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
RMSE: 0.010095     Adj. R2: 0.117839
                 Within R2: 4.347e-6

###Government ban treatment

Now I”m ignoring Dodd-Frank and looking instead at the government ban on artisanal mining that occurred at the same time that DF was passed. Here I’m using the same approach as PV (but at the grid level vs territory level). The ban covered 3 provinces, which encompass 44 of the 51 treated territories in the PV study. There is a reduction in the probability of experiencing a battle in gov_ban provinces.


#what if we just look at the gov ban####
mod <- feols(
  any_battles ~ gov_ban + gov_ban:is_3t_count + gov_ban:is_gold_count | hex_id + time_id,
  data = pv_months_grids
)

# Cluster-robust summary
summary(mod, cluster = "hex_id")
OLS estimation, Dep. Var.: any_battles
Observations: 7,528,572
Fixed-effects: hex_id: 69,709,  time_id: 108
Standard-errors: Clustered (hex_id) 
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
RMSE: 0.010095     Adj. R2: 0.11784 
                 Within R2: 5.427e-6

###Government ban assigned at the grid level

Now I’m assigning the same government ban treatment but at the grid level instead of at the territory level, based on whether the grid is in a province targeted by the government ban + has a 3TG mine. When I assign treatment more granularly, the coefficient on the Pr(battle) remains negative, but now the interaction term with the number of gold mines becomes statistically significant (and is positive).

#what if we assign gov ban treatment at the grid level
mod <- feols(
  any_battles ~ gov_ban_grids + gov_ban_grids:is_3t_count + gov_ban_grids:is_gold_count | hex_id + time_id,
  data = pv_months_grids
)

# Cluster-robust summary
summary(mod, cluster = "hex_id")
OLS estimation, Dep. Var.: any_battles
Observations: 7,528,572
Fixed-effects: hex_id: 69,709,  time_id: 108
Standard-errors: Clustered (hex_id) 
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
RMSE: 0.010095     Adj. R2: 0.117839
                 Within R2: 4.331e-6

###Both PV-DF treatment and government ban

Now I’m looking at the PV-DF treatment + government ban simultaneously. First regression has treatment assigned at the territory or province level. Second regression has treatment assigned at the grid level. When treatment is assigned at the territory/province level, the two policies work in opposite directions and are both statistically significant. When treatment is assigned at the grid level, they are both negative and stat insignificant.


#what is we look at both df and gov grids
mod <- feols(
  any_battles ~ policy_dummy + gov_ban | hex_id + time_id,
  data = pv_months_grids
)

# Cluster-robust summary
summary(mod, cluster = "hex_id")
OLS estimation, Dep. Var.: any_battles
Observations: 7,528,572
Fixed-effects: hex_id: 69,709,  time_id: 108
Standard-errors: Clustered (hex_id) 
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
RMSE: 0.010095     Adj. R2: 0.117856
                 Within R2: 2.34e-5 
#same regression but with treatment assigned at the grid level
mod <- feols(
  any_battles ~ df_policy_grids + gov_ban_grids | hex_id + time_id,
  data = pv_months_grids
)

# Cluster-robust summary
summary(mod, cluster = "hex_id")
OLS estimation, Dep. Var.: any_battles
Observations: 7,528,572
Fixed-effects: hex_id: 69,709,  time_id: 108
Standard-errors: Clustered (hex_id) 
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
RMSE: 0.010095     Adj. R2: 0.117837
                 Within R2: 2.95e-6 

###Alternate treatment timing (May 2013)

Now I’m exploring whether using a later treatment starting time changes the original PV results. Specifically, while Dodd-Frank was passed in July 2010, companies didn’t have to file their first conflict mineral disclosure reports until May 2014. I’m using June 2013 – one year before the first company filing – as my alternate treatment start timing. I’m also extending the timeframe from 2012 (PV study) to 2020. All three coefficients of interest are positive and statistically significant.

#May 2013 treatment####

mod <- feols(
  any_battles ~ policy_dummy_2013 + policy_dummy_2013:is_3t_count + policy_dummy_2013:is_gold_count | hex_id + time_id,
  data = full_months_grids
)

# Cluster-robust summary
summary(mod, cluster = "hex_id")
OLS estimation, Dep. Var.: any_battles
Observations: 14,220,636
Fixed-effects: hex_id: 69,709,  time_id: 204
Standard-errors: Clustered (hex_id) 
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
RMSE: 0.015611     Adj. R2: 0.088314
                 Within R2: 1.772e-4

###Mineral type treatment assignment

Now I’m exploring the use of an alternate treatment assignment based on whether the grid cell has a 3TG mine vs non-3TG mine. First regression uses the July 2010 treatment timing, second the June 2013 timing. Both show a positive, statistically significant treatment effect.

#Mineral type treatment and 2010 or 2013 treatment####
#2013
panel_month <- panel_month %>%
  mutate(
    policy_dummy_2013_timeonly = as.integer(time_id >= 201306L)
  )

full_months_grids <- panel_month %>%
  dplyr::filter(
    year >= 2004,
    year <= 2020,
    eastern_terr == 1
  )

#2010
panel_month <- panel_month %>%
  mutate(
    policy_dummy_2010_timeonly = as.integer(time_id >= 201007L)
  )



pv_months_grids <- panel_month %>%
  dplyr::filter(
    year >= 2004,
    year <= 2012,
    eastern_terr == 1
  )

#2013 reg
mod <- feols(
  any_battles ~ policy_dummy_2013_timeonly:has_df_mine_dummy | hex_id + time_id,
  data = full_months_grids
)

# Cluster-robust summary
summary(mod, cluster = "hex_id")
OLS estimation, Dep. Var.: any_battles
Observations: 14,220,636
Fixed-effects: hex_id: 69,709,  time_id: 204
Standard-errors: Clustered (hex_id) 
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
RMSE: 0.015612     Adj. R2: 0.088213
                 Within R2: 6.65e-5 
#2010 reg
mod <- feols(
  any_battles ~ policy_dummy_2010_timeonly:has_df_mine_dummy | hex_id + time_id,
  data = pv_months_grids
)

# Cluster-robust summary
summary(mod, cluster = "hex_id")
OLS estimation, Dep. Var.: any_battles
Observations: 7,528,572
Fixed-effects: hex_id: 69,709,  time_id: 108
Standard-errors: Clustered (hex_id) 
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
RMSE: 0.010095     Adj. R2: 0.117844
                 Within R2: 1.032e-5

Here I’m introducing some yearly covariates (population, nightlights and select land use categories). This is the baseline PV specification (2010 treatment, State Department map). Interesting that while the policy summy remains positive and statistically significant, the interaction term on the number of gold mines does not. The only covariate that seems to pop is rainfed crops.

#baseline PV with RS controls

mod <- feols(
  any_battles ~ policy_dummy + policy_dummy:is_3t_count + policy_dummy:is_gold_count + pop + nightlights + irrigated_crops + rainfed_crops + forest | hex_id + time_id,
  data = pv_months_grids
)

# Cluster-robust summary
summary(mod, cluster = "hex_id")
OLS estimation, Dep. Var.: any_battles
Observations: 7,528,572
Fixed-effects: hex_id: 69,709,  time_id: 108
Standard-errors: Clustered (hex_id) 
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
RMSE: 0.010092     Adj. R2: 0.118337
                 Within R2: 5.704e-4

Add a new chunk by clicking the Insert Chunk button on the toolbar or by pressing Ctrl+Alt+I.

When you save the notebook, an HTML file containing the code and output will be saved alongside it (click the Preview button or press Ctrl+Shift+K to preview the HTML file).

The preview shows you a rendered HTML copy of the contents of the editor. Consequently, unlike Knit, Preview does not run any R code chunks. Instead, the output of the chunk when it was last run in the editor is displayed.

---
title: "Paper 3/DRC Exploratory Regs"
output: html_notebook
---

```{r}
#load libraries
library(dplyr)
library(readr)
library(tidyr)
library(lubridate)
library(fixest)
library(data.table)
library(stringr)
library(sf)

```

```{r}
# Turn off scientific notation globally
#options(scipen = 999)

#load relevant files
#readRDS("C:/Users/yoshiara/OneDrive - RAND Corporation/Desktop/Dissertation/eastern drc/panel_p3_yearly.rds")

setwd("C:/Users/yoshiara/OneDrive - RAND Corporation/Desktop/Dissertation/eastern drc")
```


#Descriptive Stats

##Cross-sectional (2021 only)

###Mining vs. non-mining grids

What do mining vs. non-mining grid cells look like? Table below includes grids covering all of DRC. Some things that stand out:
- Mining grids are closer to national borders
- Mining grids have more cropland (all types - irrigated, rainfed, other). For all grids, there is more rainfed than irrigated cropland.
- Mining grids have fewer permanent bodies of water
- Mining grids have higher population – but this could just be because the non-mining grids may capture many unpopulated areas. This matches the pattern for impervious/manmade surfaces but not for nightlight density which is the same for both mining and non-mining grids (weird)
- Mining grids have higher conflict counts (across all conflict categories)
```{r}
#descriptive stats (means table) mining vs non_mining####
# 1) Restrict to latest year where all vars exist (2021)
df_latest <- panel_p3_yearly %>%
  filter(year == 2021) %>%
  mutate(mine_group = ifelse(total_mine_count == 0, "No mines", ">=1 mine"))

# 2) Variables to summarize
vars <- c(
  "dist_to_border",
  "forest","grassland","impervious","irrigated_crops",
  "nightlights","other_crops","permanent_water",
  "rainfed_crops","seasonal_water","shrubland",
  "sparse","swamp","pop", "n_conflicts", "n_battles", "n_protests"
)

# helper: coerce to numeric if needed (handles factors/characters)
to_num <- function(x) {
  if (is.numeric(x)) return(x)
  if (is.factor(x))  return(as.numeric(as.character(x)))
  if (is.character(x)) return(suppressWarnings(as.numeric(x)))
  return(suppressWarnings(as.numeric(x)))
}

# 3) Run t-tests + build results
t_test_results <- lapply(vars, function(v) {
  x <- to_num(df_latest[[v]])
  g <- df_latest$mine_group
  
  # group means
  m0 <- mean(x[g == "No mines"], na.rm = TRUE)
  m1 <- mean(x[g == ">=1 mine"], na.rm = TRUE)
  
  # t-test (Welch)
  tt <- t.test(x ~ g)
  
  data.frame(
    variable = v,
    mean_no_mines = m0,
    mean_mines = m1,
    p_value = tt$p.value
  )
}) %>%
  bind_rows() %>%
  mutate(sig = case_when(
    p_value < 0.001 ~ "***",
    p_value < 0.01  ~ "**",
    p_value < 0.05  ~ "*",
    TRUE ~ ""
  ))



# Round numeric columns to 3 decimals
t_test_results_clean <- t_test_results %>%
  mutate(across(where(is.numeric), ~ round(.x, 3)))

# --- 1) Count grids in each group ---
group_counts <- df_latest %>%
  group_by(mine_group) %>%
  summarise(N_grids = n(), .groups = "drop")

# Extract counts
N_no_mines <- group_counts$N_grids[group_counts$mine_group == "No mines"]
N_mines    <- group_counts$N_grids[group_counts$mine_group == ">=1 mine"]

# --- 2) Add counts to results table ---
t_test_results_clean <- t_test_results %>%
  mutate(
    N_no_mines = N_no_mines,
    N_mines    = N_mines
  ) %>%
  mutate(across(where(is.numeric), ~ round(.x, 3)))

# --- 3) View table ---

#turn of pagination for tables
DT::datatable(
  t_test_results_clean,
  options = list(
    pageLength = nrow(t_test_results_clean),  # show all rows
    paging = FALSE             # turn pagination off
  )
)


```
###Mining vs. non-mining grids (excluding grids with fewer than 25 people)

This is the same table as the one above, except now I'm restricting to grid cells with at least 25 people. The results are generally consistent with the patterns in the previous table.
```{r}
#descriptive stats (means table) mining vs non_mining (population >= 25)####
# 1) Restrict to latest year where all vars exist (2021)
df_latest_pop <- df_latest %>%
  filter(pop >= 25) 


# 3) Run t-tests + build results
t_test_results <- lapply(vars, function(v) {
  x <- to_num(df_latest_pop[[v]])
  g <- df_latest_pop$mine_group
  
  # group means
  m0 <- mean(x[g == "No mines"], na.rm = TRUE)
  m1 <- mean(x[g == ">=1 mine"], na.rm = TRUE)
  
  # t-test (Welch)
  tt <- t.test(x ~ g)
  
  data.frame(
    variable = v,
    mean_no_mines = m0,
    mean_mines = m1,
    p_value = tt$p.value
  )
}) %>%
  bind_rows() %>%
  mutate(sig = case_when(
    p_value < 0.001 ~ "***",
    p_value < 0.01  ~ "**",
    p_value < 0.05  ~ "*",
    TRUE ~ ""
  ))



# Round numeric columns to 3 decimals
t_test_results_clean <- t_test_results %>%
  mutate(across(where(is.numeric), ~ round(.x, 3)))

# --- 1) Count grids in each group ---
group_counts <- df_latest_pop %>%
  group_by(mine_group) %>%
  summarise(N_grids = n(), .groups = "drop")

# Extract counts
N_no_mines <- group_counts$N_grids[group_counts$mine_group == "No mines"]
N_mines    <- group_counts$N_grids[group_counts$mine_group == ">=1 mine"]

# --- 2) Add counts to results table ---
t_test_results_clean <- t_test_results %>%
  mutate(
    N_no_mines = N_no_mines,
    N_mines    = N_mines
  ) %>%
  mutate(across(where(is.numeric), ~ round(.x, 3)))

# --- 3) View table ---
#turn of pagination for tables
DT::datatable(
  t_test_results_clean,
  options = list(
    pageLength = nrow(t_test_results_clean),  # show all rows
    paging = FALSE             # turn pagination off
  )
)

```
###Mineral type treatment categories (3TG vs. non-3TG mines)

Here I'm again restricting to mining grids, but comparing grids that mine the 3TG (tin,tantalum, tungsten, and gold) with grids that mine other materials. The 3TG are the minerals covered by the DF Act. This is the alternate identification strategy I want to explore in paper 3. There is only one grid cell with both 3TG and non-3TG mines.Here we see that:
- 3TG grids have more crops (irrigated and rainfed)
- Much higher population in non-3TG mining grids, which tracks with impervious surfaces and nightlights
- More battles in 3TG grids, more protests in non-DF grids
```{r}
#descriptive stats (means table) df vs non df mining grids####
# 1) Restrict to latest year where all vars exist (2021)
df_latest_mining <- df_latest %>%
  filter(mine_group == ">=1 mine") 


# 3) Run t-tests + build results
t_test_results <- lapply(vars, function(v) {
  x <- to_num(df_latest_mining[[v]])
  g <- df_latest_mining$has_df_mine_dummy
  
  # group means
  m0 <- mean(x[g == 0], na.rm = TRUE)
  m1 <- mean(x[g == 1], na.rm = TRUE)
  
  # t-test (Welch)
  tt <- t.test(x ~ g)
  
  data.frame(
    variable = v,
    mean_non_df = m0,
    mean_df = m1,
    p_value = tt$p.value
  )
}) %>%
  bind_rows() %>%
  mutate(sig = case_when(
    p_value < 0.001 ~ "***",
    p_value < 0.01  ~ "**",
    p_value < 0.05  ~ "*",
    TRUE ~ ""
  ))


# Round numeric columns to 3 decimals
t_test_results_clean <- t_test_results %>%
  mutate(across(where(is.numeric), ~ round(.x, 3)))

# --- 1) Count grids in each group ---
group_counts <- df_latest_mining %>%
  group_by(has_df_mine_dummy) %>%
  summarise(N_grids = n(), .groups = "drop")

# Extract counts
N_non_df_mines <- group_counts$N_grids[group_counts$has_df_mine_dummy == 0]
N_df_mines    <- group_counts$N_grids[group_counts$has_df_mine_dummy == 1]

# --- 2) Add counts to results table ---
t_test_results_clean <- t_test_results %>%
  mutate(
    N_non_df_mines = N_non_df_mines,
    N_df_mines    = N_df_mines
  ) %>%
  mutate(across(where(is.numeric), ~ round(.x, 3)))

# --- 3) View table ---
DT::datatable(
  t_test_results_clean,
  options = list(
    pageLength = nrow(t_test_results_clean),  # show all rows
    paging = FALSE             # turn pagination off
  )
)

```
###Parker and Vadheim/Dodd-Frank treated grids vs. untreated grids

Now I'm looking only a mining grids, comparing grids that were "treated" per the PV study (i.e. that are located in conflict affected mining areas per the 2011 State Department map) to untreated mining grids per the PV study:
- There is less of a population delta than in the previous table (the 3TG vs. non-3TG mining grid comparison). I'm not sure what this means in terms of thinking through PV's vs. my identification strategy.
- No statistically significant difference in mean conflict counts in PV-DF treated vs. untreated mining grids. Again, not sure what that implies.
```{r}


# 3) Run t-tests + build results
t_test_results <- lapply(vars, function(v) {
  x <- to_num(df_latest_mining[[v]])
  g <- df_latest_mining$policy_terr
  
  # group means
  m0 <- mean(x[g == 0], na.rm = TRUE)
  m1 <- mean(x[g == 1], na.rm = TRUE)
  
  # t-test (Welch)
  tt <- t.test(x ~ g)
  
  data.frame(
    variable = v,
    mean_non_df = m0,
    mean_df = m1,
    p_value = tt$p.value
  )
}) %>%
  bind_rows() %>%
  mutate(sig = case_when(
    p_value < 0.001 ~ "***",
    p_value < 0.01  ~ "**",
    p_value < 0.05  ~ "*",
    TRUE ~ ""
  ))



# Round numeric columns to 3 decimals
t_test_results_clean <- t_test_results %>%
  mutate(across(where(is.numeric), ~ round(.x, 3)))

# --- 1) Count grids in each group ---
group_counts <- df_latest_mining %>%
  group_by(policy_terr) %>%
  summarise(N_grids = n(), .groups = "drop")

# Extract counts
N_non_df_terr <- group_counts$N_grids[group_counts$policy_terr == 0]
N_df_terr    <- group_counts$N_grids[group_counts$policy_terr == 1]

# --- 2) Add counts to results table ---
t_test_results_clean <- t_test_results %>%
  mutate(
    N_non_df_terr = N_non_df_terr,
    N_df_terr    = N_df_terr
  ) %>%
  mutate(across(where(is.numeric), ~ round(.x, 3)))

# --- 3) View table ---
DT::datatable(
  t_test_results_clean,
  options = list(
    pageLength = nrow(t_test_results_clean),  # show all rows
    paging = FALSE             # turn pagination off
  )
)
```
##Rate of change (comparing 2003-2006 and 2019-2021)

###Mining vs. non-mining grids
- More deforestation in mining grids
- More cropland growth (irrigated, rainfed) in mining grids
- More population growth in mining grids
- Larger increase in conflict in mining grids
```{r}
#annual changes mining v. non-mining####
# -----------------------------
# 0) Choose the yearly variables (edit this list if you want)
# -----------------------------
yearly_vars <- c(
  "forest","grassland","impervious","irrigated_crops",
  "nightlights","other_crops","permanent_water",
  "rainfed_crops","seasonal_water","shrubland","sparse","swamp","pop",
  "n_conflicts","n_battles","n_protests"
)

# helper: coerce to numeric safely (handles factor/character numbers)
to_num <- function(x) {
  if (is.numeric(x)) return(x)
  if (is.factor(x))  return(as.numeric(as.character(x)))
  if (is.character(x)) return(suppressWarnings(as.numeric(x)))
  return(suppressWarnings(as.numeric(x)))
}

# -----------------------------
# 1) Create a static mine group per hex_id (No mines vs >=1 mine)
#    Uses the maximum total_mine_count observed for each hex (robust)
# -----------------------------
mine_group_by_hex <- panel_p3_yearly %>%
  group_by(hex_id) %>%
  summarise(
    total_mine_count_static = max(to_num(total_mine_count), na.rm = TRUE),
    mine_group = ifelse(total_mine_count_static == 0, "No mines", ">=1 mine"),
    .groups = "drop"
  )

# -----------------------------
# 2) Compute hex-level averages in each period, then change (post - pre)
# -----------------------------
hex_period_avgs <- panel_p3_yearly %>%
  select(hex_id, year, all_of(yearly_vars)) %>%
  mutate(year = to_num(year)) %>%
  filter(year %in% c(2003:2006, 2019:2021)) %>%
  mutate(period = ifelse(year <= 2006, "pre_2003_06", "post_2019_21")) %>%
  # ensure vars numeric
  mutate(across(all_of(yearly_vars), to_num)) %>%
  group_by(hex_id, period) %>%
  summarise(across(all_of(yearly_vars), ~ mean(.x, na.rm = TRUE)), .groups = "drop") %>%
  pivot_wider(
    names_from = period,
    values_from = all_of(yearly_vars),
    names_sep = "__"
  )

# compute change for each var: post - pre
# Join mine group onto the wide period-averaged data
tmp <- hex_period_avgs %>%
  left_join(mine_group_by_hex, by = "hex_id")

# Create change columns: post - pre
for (v in yearly_vars) {
  post_col <- paste0(v, "__post_2019_21")
  pre_col  <- paste0(v, "__pre_2003_06")
  tmp[[paste0("chg__", v)]] <- tmp[[post_col]] - tmp[[pre_col]]
}

# Keep only what you need
hex_changes <- tmp %>%
  select(hex_id, mine_group, starts_with("chg__"))

# -----------------------------
# 3) Compare mean changes across mine groups (Welch t-test), build table
# -----------------------------
chg_vars <- paste0("chg__", yearly_vars)

diff_in_changes <- lapply(chg_vars, function(v) {
  x <- hex_changes[[v]]
  g <- hex_changes$mine_group
  
  ok <- !is.na(x) & !is.na(g)
  x <- x[ok]; g <- g[ok]
  
  m0 <- mean(x[g == "No mines"], na.rm = TRUE)
  m1 <- mean(x[g == ">=1 mine"], na.rm = TRUE)
  
  tt <- t.test(x ~ g)  # Welch
  
  data.frame(
    variable = sub("^chg__", "", v),
    mean_change_no_mines = m0,
    mean_change_mines    = m1,
    diff_change          = m1 - m0,
    p_value              = tt$p.value
  )
}) %>%
  bind_rows() %>%
  mutate(
    sig = case_when(
      p_value < 0.001 ~ "***",
      p_value < 0.01  ~ "**",
      p_value < 0.05  ~ "*",
      TRUE ~ ""
    )
  ) %>%
  mutate(across(where(is.numeric), ~ round(.x, 3)))

# group Ns (number of hexes contributing to the change analysis)
group_Ns <- hex_changes %>%
  count(mine_group)

group_Ns

DT::datatable(
  diff_in_changes,
  options = list(
    pageLength = nrow(diff_in_changes),  # show all rows
    paging = FALSE             # turn pagination off
  )
)

```
###Mining vs. non-mining grids (excluding grids with fewer than 25 people)

Same table as the one above but restricting to grids with at least 25 people. Results generally track with the previous table.
```{r}
#annual changes mining v. non-mining (pop>=25)####


# -----------------------------------
# 1) Restrict to hexes with pre-period avg pop >= 25
# -----------------------------------

hex_changes_pop25 <- hex_period_avgs %>%
  left_join(mine_group_by_hex, by = "hex_id") %>%
  filter(pop__pre_2003_06 >= 25)

# -----------------------------------
# 2) Create change variables
# -----------------------------------

tmp <- hex_changes_pop25

for (v in yearly_vars) {
  post_col <- paste0(v, "__post_2019_21")
  pre_col  <- paste0(v, "__pre_2003_06")
  tmp[[paste0("chg__", v)]] <- tmp[[post_col]] - tmp[[pre_col]]
}

hex_changes_pop25 <- tmp %>%
  select(hex_id, mine_group, starts_with("chg__"))

# -----------------------------------
# 3) Compare mean changes across mine groups
# -----------------------------------

chg_vars <- paste0("chg__", yearly_vars)

diff_in_changes_pop25 <- lapply(chg_vars, function(v) {
  
  x <- hex_changes_pop25[[v]]
  g <- hex_changes_pop25$mine_group
  
  ok <- !is.na(x) & !is.na(g)
  x <- x[ok]; g <- g[ok]
  
  m0 <- mean(x[g == "No mines"], na.rm = TRUE)
  m1 <- mean(x[g == ">=1 mine"], na.rm = TRUE)
  
  tt <- t.test(x ~ g)
  
  data.frame(
    variable = sub("^chg__", "", v),
    mean_change_no_mines = m0,
    mean_change_mines    = m1,
    diff_change          = m1 - m0,
    p_value              = tt$p.value
  )
}) %>%
  bind_rows() %>%
  mutate(
    sig = case_when(
      p_value < 0.001 ~ "***",
      p_value < 0.01  ~ "**",
      p_value < 0.05  ~ "*",
      TRUE ~ ""
    )
  ) %>%
  mutate(across(where(is.numeric), ~ round(.x, 3)))

# -----------------------------------
# 4) Group counts
# -----------------------------------

group_Ns_pop25 <- hex_changes_pop25 %>%
  count(mine_group)

group_Ns_pop25

DT::datatable(
  diff_in_changes_pop25,
  options = list(
    pageLength = nrow(diff_in_changes_pop25),  # show all rows
    paging = FALSE             # turn pagination off
  )
)

```

###Mineral type treatment categories (3TG vs. non-3TG mines)

- Less forest loss in 3TG mineral grids, which tracks with lower nightlight, pop and impervious growth
- Higher cropland growth (irrigated, rainfed) in 3TG mineral grids
- Higher growth in conflicts overall and battles but not protests for 3TG mineral grids
```{r}
#annual changes df vs. non df mineral grids####
# ------------------------------------------------------------
# 1) Build a hex-level DF grouping variable, and restrict to mining grids
# ------------------------------------------------------------
df_group_by_hex <- panel_p3_yearly %>%
  group_by(hex_id) %>%
  summarise(
    total_mine_count_static = first(na.omit(to_num(total_mine_count))),
    has_df_static           = first(na.omit(to_num(has_df_mine_dummy))),
    df_group = ifelse(has_df_static == 1, "DF mine", "Non-DF mine"),
    .groups = "drop"
  ) %>%
  filter(total_mine_count_static >= 1)

# ------------------------------------------------------------
# 2) Join onto the period-average wide data and compute changes
# ------------------------------------------------------------
tmp <- hex_period_avgs %>%
  inner_join(df_group_by_hex %>% select(hex_id, df_group), by = "hex_id")

# change columns: post - pre
for (v in yearly_vars) {
  post_col <- paste0(v, "__post_2019_21")
  pre_col  <- paste0(v, "__pre_2003_06")
  tmp[[paste0("chg__", v)]] <- tmp[[post_col]] - tmp[[pre_col]]
}

hex_changes_df_vs_nondf <- tmp %>%
  select(hex_id, df_group, starts_with("chg__"))

# ------------------------------------------------------------
# 3) Compare mean changes across DF vs non-DF (Welch t-tests)
# ------------------------------------------------------------
chg_vars <- paste0("chg__", yearly_vars)

diff_in_changes_df_vs_nondf <- lapply(chg_vars, function(v) {
  
  x <- hex_changes_df_vs_nondf[[v]]
  g <- hex_changes_df_vs_nondf$df_group
  
  ok <- !is.na(x) & !is.na(g)
  x <- x[ok]; g <- g[ok]
  
  m0 <- mean(x[g == "Non-DF mine"], na.rm = TRUE)
  m1 <- mean(x[g == "DF mine"], na.rm = TRUE)
  
  tt <- t.test(x ~ g)  # Welch
  
  data.frame(
    variable = sub("^chg__", "", v),
    mean_change_non_df = m0,
    mean_change_df     = m1,
    diff_change        = m1 - m0,
    p_value            = tt$p.value
  )
}) %>%
  bind_rows() %>%
  mutate(
    sig = case_when(
      p_value < 0.001 ~ "***",
      p_value < 0.01  ~ "**",
      p_value < 0.05  ~ "*",
      TRUE ~ ""
    )
  ) %>%
  mutate(across(where(is.numeric), ~ round(.x, 3)))

# ------------------------------------------------------------
# 4) Group counts
# ------------------------------------------------------------
group_Ns_df_vs_nondf <- hex_changes_df_vs_nondf %>%
  count(df_group)

group_Ns_df_vs_nondf

DT::datatable(
  diff_in_changes_df_vs_nondf,
  options = list(
    pageLength = nrow(diff_in_changes_df_vs_nondf),  # show all rows
    paging = FALSE             # turn pagination off
  )
)

```

###Parker and Vadheim/Dodd-Frank mining grids vs. non-DF mining grids

- No statistically significant difference in conflict count growth in PV-DF treated vs. untreated mining grids.
```{r}
#annual changes df vs. non df policy grids####
# ------------------------------------------------------------
# 1) Build a hex-level DF grouping variable, and restrict to mining grids
# ------------------------------------------------------------
df_group_by_hex <- panel_p3_yearly %>%
  group_by(hex_id) %>%
  summarise(
    total_mine_count_static = first(na.omit(to_num(total_mine_count))),
    has_policy_static           = first(na.omit(to_num(policy_terr))),
    policy_group = ifelse(has_policy_static == 1, "DF mine", "Non-DF mine"),
    .groups = "drop"
  ) %>%
  filter(total_mine_count_static >= 1)

# ------------------------------------------------------------
# 2) Join onto the period-average wide data and compute changes
# ------------------------------------------------------------
tmp <- hex_period_avgs %>%
  inner_join(df_group_by_hex %>% select(hex_id, policy_group), by = "hex_id")

# change columns: post - pre
for (v in yearly_vars) {
  post_col <- paste0(v, "__post_2019_21")
  pre_col  <- paste0(v, "__pre_2003_06")
  tmp[[paste0("chg__", v)]] <- tmp[[post_col]] - tmp[[pre_col]]
}

hex_changes_df_vs_nondf <- tmp %>%
  select(hex_id, policy_group, starts_with("chg__"))

# ------------------------------------------------------------
# 3) Compare mean changes across DF vs non-DF (Welch t-tests)
# ------------------------------------------------------------
chg_vars <- paste0("chg__", yearly_vars)

diff_in_changes_df_vs_nondf <- lapply(chg_vars, function(v) {
  
  x <- hex_changes_df_vs_nondf[[v]]
  g <- hex_changes_df_vs_nondf$policy_group
  
  ok <- !is.na(x) & !is.na(g)
  x <- x[ok]; g <- g[ok]
  
  m0 <- mean(x[g == "Non-DF mine"], na.rm = TRUE)
  m1 <- mean(x[g == "DF mine"], na.rm = TRUE)
  
  tt <- t.test(x ~ g)  # Welch
  
  data.frame(
    variable = sub("^chg__", "", v),
    mean_change_non_df = m0,
    mean_change_df     = m1,
    diff_change        = m1 - m0,
    p_value            = tt$p.value
  )
}) %>%
  bind_rows() %>%
  mutate(
    sig = case_when(
      p_value < 0.001 ~ "***",
      p_value < 0.01  ~ "**",
      p_value < 0.05  ~ "*",
      TRUE ~ ""
    )
  ) %>%
  mutate(across(where(is.numeric), ~ round(.x, 3)))

# ------------------------------------------------------------
# 4) Group counts
# ------------------------------------------------------------
group_Ns_df_vs_nondf <- hex_changes_df_vs_nondf %>%
  count(policy_group)

group_Ns_df_vs_nondf

DT::datatable(
  diff_in_changes_df_vs_nondf,
  options = list(
    pageLength = nrow(diff_in_changes_df_vs_nondf),  # show all rows
    paging = FALSE             # turn pagination off
  )
)

```
#Regressions

##Outcome: Pr(Battle Occurence)

###PV baseline model but with grids
Looking at the outcome: probability of any battle occurring. Same as PV study but using grid cells instead of territories as the unit of analysis (so a much smaller united of analysis that doesn't follow political boundaries). Only policy_dummy (indicator for grids in a territory flagged as conflict-affected by State Dep in 2008-2010) is stat sig (at ***, 0.000187). Interaction terms with number of mine variables aren't.
```{r}
#any battles, matching PV but at grid vs. territory level####
mod <- feols(
  any_battles ~ policy_dummy + policy_dummy:is_3t_count + policy_dummy:is_gold_count | hex_id + time_id,
  data = pv_months_grids
)

# Cluster-robust summary
summary(mod, cluster = "hex_id")
```
###PV baseline model with grid-level treatment assignment

Now I'm assigning treatment at the grid level, rather than at the territory level, based on whether the grid is in a State Department policy territory + has a 3TG mine. When I assign treatment more granularly, the treatment effect goes away.
```{r}
#any battles, matching PV but at grid vs. territory level####
mod <- feols(
  any_battles ~ df_policy_grids + df_policy_grids:is_3t_count + df_policy_grids:is_gold_count | hex_id + time_id,
  data = pv_months_grids
)

# Cluster-robust summary
summary(mod, cluster = "hex_id")
```
###Government ban treatment

Now I"m ignoring Dodd-Frank and looking instead at the government ban on artisanal mining that occurred at the same time that DF was passed. Here I'm using the same approach as PV (but at the grid level vs territory level). The ban covered 3 provinces, which encompass 44 of the 51 treated territories in the PV study. There is a reduction in the probability of experiencing a battle in gov_ban provinces. 
```{r}

#what if we just look at the gov ban####
mod <- feols(
  any_battles ~ gov_ban + gov_ban:is_3t_count + gov_ban:is_gold_count | hex_id + time_id,
  data = pv_months_grids
)

# Cluster-robust summary
summary(mod, cluster = "hex_id")
```

###Government ban assigned at the grid level

Now I'm assigning the same government ban treatment but at the grid level instead of at the territory level, based on whether the grid is in a province targeted by the government ban + has a 3TG mine. When I assign treatment more granularly, the coefficient on the Pr(battle) remains negative, but now the interaction term with the number of gold mines becomes statistically significant (and is positive).
```{r}
#what if we assign gov ban treatment at the grid level
mod <- feols(
  any_battles ~ gov_ban_grids + gov_ban_grids:is_3t_count + gov_ban_grids:is_gold_count | hex_id + time_id,
  data = pv_months_grids
)

# Cluster-robust summary
summary(mod, cluster = "hex_id")

```

###Both PV-DF treatment and government ban

Now I'm looking at the PV-DF treatment + government ban simultaneously. First regression has treatment assigned at the territory or province level. Second regression has treatment assigned at the grid level. When treatment is assigned at the territory/province level, the two policies work in opposite directions and are both statistically significant. When treatment is assigned at the grid level, they are both negative and stat insignificant.
```{r}

#what is we look at both df and gov grids
mod <- feols(
  any_battles ~ policy_dummy + gov_ban | hex_id + time_id,
  data = pv_months_grids
)

# Cluster-robust summary
summary(mod, cluster = "hex_id")

#same regression but with treatment assigned at the grid level
mod <- feols(
  any_battles ~ df_policy_grids + gov_ban_grids | hex_id + time_id,
  data = pv_months_grids
)

# Cluster-robust summary
summary(mod, cluster = "hex_id")

```
###Alternate treatment timing (May 2013)

Now I'm exploring whether using a later treatment starting time changes the original PV results. Specifically, while Dodd-Frank was passed in July 2010, companies didn't have to file their first conflict mineral disclosure reports until May 2014. I'm using June 2013 -- one year before the first company filing -- as my alternate treatment start timing. I'm also extending the timeframe from 2012 (PV study) to 2020. All three coefficients of interest are positive and statistically significant.
```{r}
#May 2013 treatment####

mod <- feols(
  any_battles ~ policy_dummy_2013 + policy_dummy_2013:is_3t_count + policy_dummy_2013:is_gold_count | hex_id + time_id,
  data = full_months_grids
)

# Cluster-robust summary
summary(mod, cluster = "hex_id")


```
###Mineral type treatment assignment

Now I'm exploring the use of an alternate treatment assignment based on whether the grid cell has a 3TG mine vs non-3TG mine. First regression uses the July 2010 treatment timing, second the June 2013 timing. Both show a positive, statistically significant treatment effect.

```{r}
#Mineral type treatment and 2010 or 2013 treatment####
#2013
panel_month <- panel_month %>%
  mutate(
    policy_dummy_2013_timeonly = as.integer(time_id >= 201306L)
  )

full_months_grids <- panel_month %>%
  dplyr::filter(
    year >= 2004,
    year <= 2020,
    eastern_terr == 1
  )

#2010
panel_month <- panel_month %>%
  mutate(
    policy_dummy_2010_timeonly = as.integer(time_id >= 201007L)
  )



pv_months_grids <- panel_month %>%
  dplyr::filter(
    year >= 2004,
    year <= 2012,
    eastern_terr == 1
  )

#2013 reg
mod <- feols(
  any_battles ~ policy_dummy_2013_timeonly:has_df_mine_dummy | hex_id + time_id,
  data = full_months_grids
)

# Cluster-robust summary
summary(mod, cluster = "hex_id")

#2010 reg
mod <- feols(
  any_battles ~ policy_dummy_2010_timeonly:has_df_mine_dummy | hex_id + time_id,
  data = pv_months_grids
)

# Cluster-robust summary
summary(mod, cluster = "hex_id")



```
Here I'm introducing some yearly covariates (population, nightlights and select land use categories). This is the baseline PV specification (2010 treatment, State Department map). Interesting that while the policy summy remains positive and statistically significant, the interaction term on the number of gold mines does not. The only covariate that seems to pop is rainfed crops.

```{r}
#baseline PV with RS controls

mod <- feols(
  any_battles ~ policy_dummy + policy_dummy:is_3t_count + policy_dummy:is_gold_count + pop + nightlights + irrigated_crops + rainfed_crops + forest | hex_id + time_id,
  data = pv_months_grids
)

# Cluster-robust summary
summary(mod, cluster = "hex_id")
```

Add a new chunk by clicking the *Insert Chunk* button on the toolbar or by pressing *Ctrl+Alt+I*.

When you save the notebook, an HTML file containing the code and output will be saved alongside it (click the *Preview* button or press *Ctrl+Shift+K* to preview the HTML file).

The preview shows you a rendered HTML copy of the contents of the editor. Consequently, unlike *Knit*, *Preview* does not run any R code chunks. Instead, the output of the chunk when it was last run in the editor is displayed.
