#load required packages / settings
install.packages("car") # Skip if already installed
library(car)
options(scipen = 999)
Look at conflict counts over time:
# Summarize conflict counts by year
conflict_by_year <- subsample_panel_2018_2022 %>%
group_by(year) %>%
summarise(total_conflicts = sum(event_id_cnty, na.rm = TRUE),
avg_conflicts = mean(event_id_cnty, na.rm = TRUE),
median_conflicts = median(event_id_cnty, na.rm = TRUE),
max_conflicts = max(event_id_cnty, na.rm = TRUE),
min_conflicts = min(event_id_cnty, na.rm = TRUE),
count_observations = n())
# Display the result
print(conflict_by_year)
NA
Which hexgrids are associated with the highest conflict count? Compared to conflict grids overall, these grids tend to be closer to borders - on average, by about 6km. They also have significantly higher population (48K vs 7K) – impervious and nightlight values follows this pattern. These are likely cities and other high-density urban areas – agriculture and other rural indicators are lower compared to conflict grids overall.
Variable | Mean | Min | Max |
---|---|---|---|
number_conflicts | 30.759 | 6.000 | 154.000 |
min_distance_to_conflict | 0.000 | 0.000 | 0.000 |
distance_to_border | 14,610.049 | 0.000 | 100,480.553 |
road_length | 145,623.429 | 12,055.066 | 273,896.717 |
distance_to_nearest_road | 0.000 | 0.000 | 0.000 |
distance_to_nearest_mine_active_inactive | 40,112.286 | 0.000 | 132,347.179 |
distance_to_active_mine | 61,947.266 | 13,512.491 | 132,347.179 |
distance_to_inactive_mine | 45,089.536 | 0.000 | 138,542.050 |
hexgrid_landscan_pop | 48,072.084 | 562.918 | 165,170.518 |
hexgrid_nightlight | 38.555 | 7.000 | 63.000 |
hexgrid_percent_grassland | 0.124 | 0.000 | 0.779 |
hexgrid_percent_impervious | 0.551 | 0.000 | 1.000 |
hexgrid_percent_irrigated_ag | 0.221 | 0.000 | 0.921 |
hexgrid_percent_permanent_water | 0.001 | 0.000 | 0.023 |
hexgrid_percent_rainfed_ag | 0.000 | 0.000 | 0.000 |
hexgrid_percent_seasonal_water | 0.005 | 0.000 | 0.071 |
neighbor_landscan_pop | 196,182.944 | 2,865.451 | 838,796.937 |
neighbor_nightlight | 32.602 | 5.200 | 62.976 |
neighbor_percent_impervious | 0.347 | 0.000 | 0.970 |
neighbor_percent_irrigated_ag | 0.329 | 0.030 | 0.972 |
neighbor_percent_permanent_water | 0.001 | 0.000 | 0.016 |
neighbor_percent_rainfed_ag | 0.000 | 0.000 | 0.008 |
neighbor_percent_seasonal_water | 0.006 | 0.000 | 0.065 |
active_mine_area | 0.000 | 0.000 | 0.000 |
inactive_mine_area | 30,565.909 | 0.000 | 886,411.366 |
hexgrid_elevation | 799.587 | 267.208 | 1,711.110 |
hexgrid_slope | 4.216 | 0.818 | 23.840 |
mine_flag | 0.034 | 0.000 | 1.000 |
mine_neighbor_flag | 0.034 | 0.000 | 1.000 |
conflict_flag | 1.000 | 1.000 | 1.000 |
conflict_neighbor_flag | 0.621 | 0.000 | 1.000 |
pp_change_landscan_pop | -856.694 | -15,344.775 | 13,058.792 |
pp_change_grassland | -0.011 | -0.202 | 0.034 |
pp_change_impervious | 0.046 | -0.011 | 0.268 |
pp_change_irrigated_ag | -0.034 | -0.256 | 0.085 |
pp_change_rainfed_ag | -0.002 | -0.067 | 0.000 |
pp_change_nightlights | 5.091 | 1.000 | 15.385 |
pp_change_permanent_water | 0.000 | -0.002 | 0.001 |
pp_change_seasonal_water | 0.002 | -0.001 | 0.022 |
combined_mine_area | 30,565.909 | 0.000 | 886,411.366 |
log_active_mine_area | 0.000 | 0.000 | 0.000 |
log_combined_mine_area | 0.472 | 0.000 | 13.695 |
log_distance_to_nearest_mine | 10.056 | 0.000 | 11.793 |
log_distance_to_active_mine | 10.948 | 9.511 | 11.793 |
log_hexgrid_landscan_pop | 9.970 | 6.333 | 12.015 |
log_neighbor_landscan_pop | 11.045 | 7.960 | 13.640 |
log_distance_to_border | 7.442 | 0.000 | 11.518 |
log_road_length | 11.688 | 9.397 | 12.521 |
event_id_cnty | 4.897 | 0.000 | 42.000 |
conflict_occurrence | 0.697 | 0.000 | 1.000 |
mine_dist_irrigated_ag | 2.237 | 0.000 | 9.893 |
mine_dist_irrigated_ag_neighbor | 3.393 | 0.000 | 10.770 |
mine_dist_permanent_water | 0.011 | 0.000 | 0.230 |
mine_dist_permanent_water_neighbor | 0.009 | 0.000 | 0.166 |
mine_dist_seasonal_water | 0.056 | 0.000 | 0.789 |
mine_dist_seasonal_water_neighbor | 0.067 | 0.000 | 0.666 |
mine_dist_border | 75.324 | 0.000 | 130.184 |
distance_to_mine_active_inactive_km | 40.112 | 0.000 | 132.347 |
Basic linear probability model regressing distance to any mine (either active or inactive):
plm_model <- plm(
as.formula(conflict_occurrence ~ log_distance_to_nearest_mine),
data = subsample_panel_2018_2022,
model = 'pooling',
na.action = na.exclude
)
plm_model_coeftest_results <- coeftest(plm_model, vcovHC(plm_model, type = 'HC0', cluster = 'group'))
print(plm_model_coeftest_results)
t test of coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.03781547 0.00686046 5.5121 0.0000000356 ***
log_distance_to_nearest_mine -0.00276520 0.00062807 -4.4027 0.0000107100 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Treatment coefficient goes up slightly when I add in country and year dummies:
plm_model <- plm(
as.formula(conflict_occurrence ~ log_distance_to_nearest_mine + factor(year) + factor(country)),
data = subsample_panel_2018_2022,
model = 'pooling',
na.action = na.exclude
)
plm_model_coeftest_results <- coeftest(plm_model, vcovHC(plm_model, type = 'HC0', cluster = 'group'))
print(plm_model_coeftest_results)
t test of coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.03000506 0.00810453 3.7023 0.0002139 ***
log_distance_to_nearest_mine -0.00278213 0.00067624 -4.1141 0.000038912631225 ***
factor(year)2019 0.00096046 0.00079229 1.2123 0.2254196
factor(year)2020 0.00424204 0.00094965 4.4670 0.000007947498415 ***
factor(year)2021 0.00728350 0.00103227 7.0558 0.000000000001734 ***
factor(year)2022 0.00344165 0.00088713 3.8795 0.0001048 ***
factor(country)Kazakhstan 0.00929824 0.00201894 4.6055 0.000004122536445 ***
factor(country)Kyrgyzstan 0.00468865 0.00102684 4.5661 0.000004978440550 ***
factor(country)Tajikistan -0.00239841 0.00123810 -1.9372 0.0527294 .
factor(country)Uzbekistan 0.00949321 0.00197446 4.8080 0.000001527972423 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Now I want to see what happens when I start adding in controls, starting with population (landscan population + nightlights and percent impervious which can be thought of as population proxies). The three variables are highly correlated. I’m going to choose one: population because there’s a clear theoretical relationship between population and conflict.
#are population, nightlight, and impervious collinear?
cor(subsample_panel_2018_2022$hexgrid_landscan_pop, subsample_panel_2018_2022$hexgrid_nightlight, use = "complete.obs")
[1] 0.6710751
cor(subsample_panel_2018_2022$hexgrid_percent_impervious, subsample_panel_2018_2022$hexgrid_nightlight, use = "complete.obs")
[1] 0.7558135
cor(subsample_panel_2018_2022$hexgrid_landscan_pop, subsample_panel_2018_2022$hexgrid_percent_impervious, use = "complete.obs")
[1] 0.8278345
Here’s my regression with population as a covariate. Treatment size goes down; population is highly statistically significant. I also add in neighbor grid population because a densely populated neighboring area could contribute to increased conflict within the hexgrid itself.
plm_model <- plm(
as.formula(conflict_occurrence ~ log_distance_to_nearest_mine + hexgrid_landscan_pop + neighbor_landscan_pop + factor(year) + factor(country)),
data = subsample_panel_2018_2022,
model = 'pooling',
na.action = na.exclude
)
plm_model_coeftest_results <- coeftest(plm_model, vcovHC(plm_model, type = 'HC0', cluster = 'group'))
print(plm_model_coeftest_results)
t test of coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.02284390362 0.00728688440 3.1349 0.0017197 **
log_distance_to_nearest_mine -0.00217854360 0.00060678582 -3.5903 0.0003305 ***
hexgrid_landscan_pop 0.00000624329 0.00000062725 9.9534 < 0.00000000000000022 ***
neighbor_landscan_pop -0.00000042989 0.00000010841 -3.9656 0.000073303017687 ***
factor(year)2019 0.00090828946 0.00079198714 1.1468 0.2514485
factor(year)2020 0.00413873336 0.00094724702 4.3692 0.000012489017277 ***
factor(year)2021 0.00713359992 0.00102859473 6.9353 0.000000000004093 ***
factor(year)2022 0.00322124910 0.00088144586 3.6545 0.0002579 ***
factor(country)Kazakhstan 0.00608887003 0.00185512943 3.2822 0.0010306 **
factor(country)Kyrgyzstan 0.00373047977 0.00091716964 4.0674 0.000047602436885 ***
factor(country)Tajikistan -0.00680872708 0.00125270541 -5.4352 0.000000054933727 ***
factor(country)Uzbekistan -0.00786629946 0.00187173549 -4.2027 0.000026414312362 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Now I want to look at the water-related variables (irrigated agriculture and permanent water, and seasonal water). I’m going to start with adding them to the simple regression (y on x with year and country dummies, leaving out population). Starting with irrigated agriculture – both hexgrid and neighbor variables are statistically significant. Interestingly, neighboring areas have a larger impact on conflict, which suggests that being surrounded by grids with more irrigated agriculture is a bigger contributing factor to conflict than being directly located in a grid with more irrigated agriculture. The treatment effect doesn’t really change compared to the simple regression that doesn’t control for irrigated agriculture. Coefficients on hexgrid irrigated ag and neighbor irrigated ag jump around depending on whether I interact treatment with either of those two variables. Why? Does it matter, especially given that treatment effect stays relatively stable across the models?
Irrigated ag variable and neighbor irrigated ag * distance to mine are jointly highly statistically significant. In other words, conflict is more likely when mines are surrounded by irrigated agricultural land, and the effect of being located closer or farther away from a mine on conflict is conditional on how much of the surrounding land is being used for irrigated agriculture. *************TO-DO:***********see what happens when you increase unit of analysis to hexgrid+its 6 neighbors.
#add irrigated ag for both hexgrid and neighbors
plm_model_irrigated_ag <- plm(
as.formula(conflict_occurrence ~ log_distance_to_nearest_mine + hexgrid_percent_irrigated_ag + neighbor_percent_irrigated_ag + factor(year) + factor(country)),
data = subsample_panel_2018_2022,
model = 'pooling',
na.action = na.exclude
)
plm_model_coeftest_results <- coeftest(plm_model_irrigated_ag, vcovHC(plm_model_irrigated_ag, type = 'HC0', cluster = 'group'))
print(plm_model_coeftest_results)
t test of coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.02895503 0.00804489 3.5992 0.0003195 ***
log_distance_to_nearest_mine -0.00271269 0.00067135 -4.0406 0.000053374793398 ***
hexgrid_percent_irrigated_ag -0.02383097 0.01050122 -2.2694 0.0232503 *
neighbor_percent_irrigated_ag 0.04638282 0.01214436 3.8193 0.0001340 ***
factor(year)2019 0.00097180 0.00079229 1.2266 0.2199895
factor(year)2020 0.00426532 0.00094945 4.4924 0.000007054571850 ***
factor(year)2021 0.00726789 0.00103219 7.0412 0.000000000001925 ***
factor(year)2022 0.00340596 0.00088719 3.8391 0.0001236 ***
factor(country)Kazakhstan 0.00360582 0.00203013 1.7762 0.0757124 .
factor(country)Kyrgyzstan 0.00258093 0.00097882 2.6368 0.0083719 **
factor(country)Tajikistan -0.00588707 0.00135066 -4.3586 0.000013107603175 ***
factor(country)Uzbekistan -0.00277517 0.00306663 -0.9050 0.3654918
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
#interaction term between distance from mine and hexgrid percent irrigated ag
subsample_panel_2018_2022 <- subsample_panel_2018_2022 %>%
mutate(mine_dist_irrigated_ag=log_distance_to_nearest_mine*hexgrid_percent_irrigated_ag)
plm_model_interaction <- plm(
as.formula(conflict_occurrence ~ log_distance_to_nearest_mine + hexgrid_percent_irrigated_ag + neighbor_percent_irrigated_ag + mine_dist_irrigated_ag + factor(year)+ factor(country)),
data = subsample_panel_2018_2022,
model = 'pooling',
na.action = na.exclude
)
plm_model_coeftest_results_interaction <- coeftest(plm_model_interaction, vcovHC(plm_model_interaction, type = 'HC0', cluster = 'group'))
print(plm_model_coeftest_results_interaction)
t test of coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.02339087 0.00861108 2.7164 0.0066020 **
log_distance_to_nearest_mine -0.00224357 0.00071825 -3.1237 0.0017870 **
hexgrid_percent_irrigated_ag 0.00636694 0.02157419 0.2951 0.7679046
neighbor_percent_irrigated_ag 0.04727088 0.01218482 3.8795 0.0001048 ***
mine_dist_irrigated_ag -0.00292824 0.00185452 -1.5790 0.1143478
factor(year)2019 0.00097210 0.00079231 1.2269 0.2198615
factor(year)2020 0.00426862 0.00094949 4.4957 0.000006946247830 ***
factor(year)2021 0.00727100 0.00103230 7.0435 0.000000000001894 ***
factor(year)2022 0.00340883 0.00088729 3.8418 0.0001222 ***
factor(country)Kazakhstan 0.00432266 0.00200137 2.1598 0.0307881 *
factor(country)Kyrgyzstan 0.00310958 0.00102762 3.0260 0.0024790 **
factor(country)Tajikistan -0.00534272 0.00138082 -3.8692 0.0001093 ***
factor(country)Uzbekistan -0.00222293 0.00307758 -0.7223 0.4701138
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
# Conduct the F-test for joint significance
f_test_results <- linearHypothesis(plm_model_interaction,
c("hexgrid_percent_irrigated_ag = 0",
"mine_dist_irrigated_ag = 0"),
vcov = vcovHC(plm_model_interaction, type = "HC0", cluster = "group"))
# Print the F-test results
print(f_test_results)
Linear hypothesis test:
hexgrid_percent_irrigated_ag = 0
mine_dist_irrigated_ag = 0
Model 1: restricted model
Model 2: conflict_occurrence ~ log_distance_to_nearest_mine + hexgrid_percent_irrigated_ag +
neighbor_percent_irrigated_ag + mine_dist_irrigated_ag +
factor(year) + factor(country)
Note: Coefficient covariance matrix supplied.
Res.Df Df Chisq Pr(>Chisq)
1 62459
2 62457 2 7.4619 0.02397 *
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
#interaction term between distance from mine and neighbor percent irrigated ag
subsample_panel_2018_2022 <- subsample_panel_2018_2022 %>%
mutate(mine_dist_irrigated_ag_neighbor=log_distance_to_nearest_mine*neighbor_percent_irrigated_ag)
plm_model_interaction <- plm(
as.formula(conflict_occurrence ~ log_distance_to_nearest_mine + hexgrid_percent_irrigated_ag + neighbor_percent_irrigated_ag + mine_dist_irrigated_ag_neighbor + factor(year)+ factor(country)),
data = subsample_panel_2018_2022,
model = 'pooling',
na.action = na.exclude
)
plm_model_coeftest_results_interaction <- coeftest(plm_model_interaction, vcovHC(plm_model_interaction, type = 'HC0', cluster = 'group'))
print(plm_model_coeftest_results_interaction)
t test of coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.02129452 0.00817737 2.6041 0.0092143 **
log_distance_to_nearest_mine -0.00206707 0.00068188 -3.0314 0.0024351 **
hexgrid_percent_irrigated_ag -0.02415587 0.01053726 -2.2924 0.0218844 *
neighbor_percent_irrigated_ag 0.08696772 0.03424583 2.5395 0.0111031 *
mine_dist_irrigated_ag_neighbor -0.00379000 0.00280284 -1.3522 0.1763163
factor(year)2019 0.00097276 0.00079231 1.2277 0.2195472
factor(year)2020 0.00427029 0.00094961 4.4969 0.000006908116106 ***
factor(year)2021 0.00727235 0.00103219 7.0456 0.000000000001866 ***
factor(year)2022 0.00341053 0.00088731 3.8437 0.0001213 ***
factor(country)Kazakhstan 0.00454131 0.00198989 2.2822 0.0224815 *
factor(country)Kyrgyzstan 0.00331026 0.00097121 3.4084 0.0006539 ***
factor(country)Tajikistan -0.00515980 0.00131825 -3.9141 0.000090825244282 ***
factor(country)Uzbekistan -0.00200559 0.00303010 -0.6619 0.5080448
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
# Conduct the F-test for joint significance
f_test_results <- linearHypothesis(plm_model_interaction,
c("neighbor_percent_irrigated_ag = 0",
"mine_dist_irrigated_ag_neighbor = 0"),
vcov = vcovHC(plm_model_interaction, type = "HC0", cluster = "group"))
# Print the F-test results
print(f_test_results)
Linear hypothesis test:
neighbor_percent_irrigated_ag = 0
mine_dist_irrigated_ag_neighbor = 0
Model 1: restricted model
Model 2: conflict_occurrence ~ log_distance_to_nearest_mine + hexgrid_percent_irrigated_ag +
neighbor_percent_irrigated_ag + mine_dist_irrigated_ag_neighbor +
factor(year) + factor(country)
Note: Coefficient covariance matrix supplied.
Res.Df Df Chisq Pr(>Chisq)
1 62459
2 62457 2 15.101 0.0005259 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Do I still see this effect when I narrow in on active mines? No, but it could be that the sample is too small to detect an effect. ***************TO-DO:**************** look at GoogleEarth coding notes to see if changing the subsample of active mines changes results.
#interaction term between distance from mine and neighbor percent irrigated ag
plm_model_interaction <- plm(
as.formula(conflict_occurrence ~ log_distance_to_active_mine + hexgrid_percent_permanent_water + neighbor_percent_permanent_water + mine_dist_permanent_water_neighbor + factor(year)+ factor(country)),
data = subsample_panel_2018_2022,
model = 'pooling',
na.action = na.exclude
)
plm_model_coeftest_results_interaction <- coeftest(plm_model_interaction, vcovHC(plm_model_interaction, type = 'HC0', cluster = 'group'))
print(plm_model_coeftest_results_interaction)
t test of coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.02720642 0.01282434 2.1215 0.03389 *
log_distance_to_active_mine -0.00250244 0.00105391 -2.3744 0.01758 *
hexgrid_percent_permanent_water -0.02786297 0.02380785 -1.1703 0.24187
neighbor_percent_permanent_water 0.01106011 0.07699449 0.1436 0.88578
mine_dist_permanent_water_neighbor 0.00152765 0.00566448 0.2697 0.78740
factor(year)2019 0.00096061 0.00079229 1.2124 0.22535
factor(year)2020 0.00424193 0.00094967 4.4667 0.000007959845310 ***
factor(year)2021 0.00728325 0.00103232 7.0552 0.000000000001746 ***
factor(country)Kazakhstan 0.00897678 0.00214299 4.1889 0.000028078183945 ***
factor(country)Kyrgyzstan 0.00595708 0.00123391 4.8278 0.000001384410780 ***
factor(country)Tajikistan -0.00082648 0.00126158 -0.6551 0.51240
factor(country)Uzbekistan 0.00990690 0.00231093 4.2870 0.000018145622383 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
# Conduct the F-test for joint significance
f_test_results <- linearHypothesis(plm_model_interaction,
c("neighbor_percent_permanent_water = 0",
"mine_dist_permanent_water_neighbor = 0"),
vcov = vcovHC(plm_model_interaction, type = "HC0", cluster = "group"))
# Print the F-test results
print(f_test_results)
Linear hypothesis test:
neighbor_percent_permanent_water = 0
mine_dist_permanent_water_neighbor = 0
Model 1: restricted model
Model 2: conflict_occurrence ~ log_distance_to_active_mine + hexgrid_percent_permanent_water +
neighbor_percent_permanent_water + mine_dist_permanent_water_neighbor +
factor(year) + factor(country)
Note: Coefficient covariance matrix supplied.
Res.Df Df Chisq Pr(>Chisq)
1 49966
2 49964 2 0.9059 0.6358
Let’s look now at permanent water. Neither hexgrid nor neighbor are statistically significant. Including them doesn’t really change the treatment effect. Hexgrid percent permanent water becomes statistically significant when I add the interaction term between hexgrid permanent water and distance to mine. P-value for the f-test of the joint significance of these two variables is slightly above 10%. Again, the treatment effect doesn’t really change when I add these additional controls. Interacting distance to mine with neighbor percent permanent water doesn’t result in any statistically significant results.
#add permanent water for both hexgrid and neighbors
plm_model_permanent_water <- plm(
as.formula(conflict_occurrence ~ log_distance_to_nearest_mine + hexgrid_percent_permanent_water + neighbor_percent_permanent_water + factor(year) + factor(country)),
data = subsample_panel_2018_2022,
model = 'pooling',
na.action = na.exclude
)
plm_model_coeftest_results <- coeftest(plm_model_permanent_water, vcovHC(plm_model_irrigated_ag, type = 'HC0', cluster = 'group'))
print(plm_model_coeftest_results)
t test of coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.02872180 0.00804489 3.5702 0.0003571 ***
log_distance_to_nearest_mine -0.00266923 0.00067135 -3.9759 0.000070214670762 ***
factor(year)2019 0.00096047 0.00079229 1.2123 0.2254160
factor(year)2020 0.00424213 0.00094945 4.4680 0.000007913215197 ***
factor(year)2021 0.00728371 0.00103219 7.0565 0.000000000001729 ***
factor(country)Kazakhstan 0.00938859 0.00203013 4.6246 0.000003762135280 ***
factor(country)Kyrgyzstan 0.00478763 0.00097882 4.8912 0.000001005221518 ***
factor(country)Tajikistan -0.00248718 0.00135066 -1.8415 0.0655614 .
factor(country)Uzbekistan 0.00963483 0.00306663 3.1418 0.0016799 **
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
#interaction term between distance from mine and hexgrid percent permanent water
subsample_panel_2018_2022 <- subsample_panel_2018_2022 %>%
mutate(mine_dist_permanent_water=log_distance_to_nearest_mine*hexgrid_percent_permanent_water)
plm_model_interaction <- plm(
as.formula(conflict_occurrence ~ log_distance_to_nearest_mine + hexgrid_percent_permanent_water + neighbor_percent_permanent_water + mine_dist_permanent_water + factor(year)+ factor(country)),
data = subsample_panel_2018_2022,
model = 'pooling',
na.action = na.exclude
)
plm_model_coeftest_results_interaction <- coeftest(plm_model_interaction, vcovHC(plm_model_interaction, type = 'HC0', cluster = 'group'))
print(plm_model_coeftest_results_interaction)
t test of coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.02912276 0.00795408 3.6614 0.0002511 ***
log_distance_to_nearest_mine -0.00270332 0.00066366 -4.0734 0.000046407914344 ***
hexgrid_percent_permanent_water -0.08171053 0.03875370 -2.1085 0.0349964 *
neighbor_percent_permanent_water 0.02847405 0.03390859 0.8397 0.4010640
mine_dist_permanent_water 0.00502165 0.00326369 1.5386 0.1238981
factor(year)2019 0.00096085 0.00079232 1.2127 0.2252509
factor(year)2020 0.00424186 0.00094968 4.4666 0.000007963541380 ***
factor(year)2021 0.00728331 0.00103231 7.0554 0.000000000001744 ***
factor(country)Kazakhstan 0.00937635 0.00208489 4.4973 0.000006897936161 ***
factor(country)Kyrgyzstan 0.00473809 0.00101281 4.6781 0.000002902376824 ***
factor(country)Tajikistan -0.00246758 0.00117398 -2.1019 0.0355684 *
factor(country)Uzbekistan 0.00959254 0.00195878 4.8972 0.000000975210790 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
# Conduct the F-test for joint significance
f_test_results <- linearHypothesis(plm_model_interaction,
c("hexgrid_percent_permanent_water = 0",
"mine_dist_permanent_water = 0"),
vcov = vcovHC(plm_model_interaction, type = "HC0", cluster = "group"))
# Print the F-test results
print(f_test_results)
Linear hypothesis test:
hexgrid_percent_permanent_water = 0
mine_dist_permanent_water = 0
Model 1: restricted model
Model 2: conflict_occurrence ~ log_distance_to_nearest_mine + hexgrid_percent_permanent_water +
neighbor_percent_permanent_water + mine_dist_permanent_water +
factor(year) + factor(country)
Note: Coefficient covariance matrix supplied.
Res.Df Df Chisq Pr(>Chisq)
1 49966
2 49964 2 4.4985 0.1055
#interaction term between distance from mine and neighbor percent irrigated ag
subsample_panel_2018_2022 <- subsample_panel_2018_2022 %>%
mutate(mine_dist_permanent_water_neighbor=log_distance_to_nearest_mine*neighbor_percent_permanent_water)
plm_model_interaction <- plm(
as.formula(conflict_occurrence ~ log_distance_to_nearest_mine + hexgrid_percent_permanent_water + neighbor_percent_permanent_water + mine_dist_permanent_water_neighbor + factor(year)+ factor(country)),
data = subsample_panel_2018_2022,
model = 'pooling',
na.action = na.exclude
)
plm_model_coeftest_results_interaction <- coeftest(plm_model_interaction, vcovHC(plm_model_interaction, type = 'HC0', cluster = 'group'))
print(plm_model_coeftest_results_interaction)
t test of coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.02917289 0.00798528 3.6533 0.0002591 ***
log_distance_to_nearest_mine -0.00270768 0.00066637 -4.0633 0.000048457018141 ***
hexgrid_percent_permanent_water -0.02836038 0.02378071 -1.1926 0.2330397
neighbor_percent_permanent_water -0.04319946 0.07240054 -0.5967 0.5507283
mine_dist_permanent_water_neighbor 0.00673715 0.00522401 1.2896 0.1971783
factor(year)2019 0.00096089 0.00079232 1.2128 0.2252276
factor(year)2020 0.00424179 0.00094967 4.4666 0.000007965200195 ***
factor(year)2021 0.00728314 0.00103231 7.0552 0.000000000001746 ***
factor(country)Kazakhstan 0.00937453 0.00208505 4.4961 0.000006937688168 ***
factor(country)Kyrgyzstan 0.00473021 0.00101645 4.6536 0.000003269585948 ***
factor(country)Tajikistan -0.00246069 0.00117159 -2.1003 0.0357073 *
factor(country)Uzbekistan 0.00958973 0.00195948 4.8940 0.000000991001440 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
# Conduct the F-test for joint significance
f_test_results <- linearHypothesis(plm_model_interaction,
c("neighbor_percent_permanent_water = 0",
"mine_dist_permanent_water_neighbor = 0"),
vcov = vcovHC(plm_model_interaction, type = "HC0", cluster = "group"))
# Print the F-test results
print(f_test_results)
Linear hypothesis test:
neighbor_percent_permanent_water = 0
mine_dist_permanent_water_neighbor = 0
Model 1: restricted model
Model 2: conflict_occurrence ~ log_distance_to_nearest_mine + hexgrid_percent_permanent_water +
neighbor_percent_permanent_water + mine_dist_permanent_water_neighbor +
factor(year) + factor(country)
Note: Coefficient covariance matrix supplied.
Res.Df Df Chisq Pr(>Chisq)
1 49966
2 49964 2 3.1581 0.2062
No statistically significant relationships for seasonal water.
#add irrigated ag for both hexgrid and neighbors
plm_model_seasonal_water <- plm(
as.formula(conflict_occurrence ~ log_distance_to_nearest_mine + hexgrid_percent_seasonal_water + neighbor_percent_seasonal_water + factor(year) + factor(country)),
data = subsample_panel_2018_2022,
model = 'pooling',
na.action = na.exclude
)
plm_model_coeftest_results <- coeftest(plm_model_seasonal_water, vcovHC(plm_model_seasonal_water, type = 'HC0', cluster = 'group'))
print(plm_model_coeftest_results)
t test of coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.02873051 0.00779534 3.6856 0.0002284 ***
log_distance_to_nearest_mine -0.00266768 0.00065015 -4.1032 0.000040816547191 ***
hexgrid_percent_seasonal_water 0.00022641 0.01723529 0.0131 0.9895190
neighbor_percent_seasonal_water -0.00864419 0.02088937 -0.4138 0.6790165
factor(year)2019 0.00096026 0.00079229 1.2120 0.2255168
factor(year)2020 0.00424144 0.00094962 4.4664 0.000007970640422 ***
factor(year)2021 0.00728340 0.00103226 7.0558 0.000000000001739 ***
factor(country)Kazakhstan 0.00959415 0.00213312 4.4977 0.000006884477163 ***
factor(country)Kyrgyzstan 0.00479166 0.00099731 4.8046 0.000001555091054 ***
factor(country)Tajikistan -0.00244710 0.00117398 -2.0844 0.0371247 *
factor(country)Uzbekistan 0.00975006 0.00196089 4.9723 0.000000663968954 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
#interaction term between distance from mine and hexgrid percent irrigated ag
subsample_panel_2018_2022 <- subsample_panel_2018_2022 %>%
mutate(mine_dist_seasonal_water=log_distance_to_nearest_mine*hexgrid_percent_seasonal_water)
plm_model_interaction <- plm(
as.formula(conflict_occurrence ~ log_distance_to_nearest_mine + hexgrid_percent_seasonal_water + neighbor_percent_seasonal_water + mine_dist_seasonal_water + factor(year)+ factor(country)),
data = subsample_panel_2018_2022,
model = 'pooling',
na.action = na.exclude
)
plm_model_coeftest_results_interaction <- coeftest(plm_model_interaction, vcovHC(plm_model_interaction, type = 'HC0', cluster = 'group'))
print(plm_model_coeftest_results_interaction)
t test of coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.02910441 0.00790325 3.6826 0.0002311 ***
log_distance_to_nearest_mine -0.00269997 0.00065944 -4.0943 0.000042409091129 ***
hexgrid_percent_seasonal_water -0.05681729 0.10366032 -0.5481 0.5836186
neighbor_percent_seasonal_water -0.01193005 0.02310429 -0.5164 0.6056077
mine_dist_seasonal_water 0.00538165 0.00995388 0.5407 0.5887457
factor(year)2019 0.00095864 0.00079243 1.2097 0.2263824
factor(year)2020 0.00424073 0.00094965 4.4656 0.000008002425993 ***
factor(year)2021 0.00728323 0.00103227 7.0555 0.000000000001742 ***
factor(country)Kazakhstan 0.00956221 0.00213503 4.4787 0.000007525378731 ***
factor(country)Kyrgyzstan 0.00476695 0.00100026 4.7657 0.000001887311911 ***
factor(country)Tajikistan -0.00242860 0.00117155 -2.0730 0.0381796 *
factor(country)Uzbekistan 0.00976851 0.00195881 4.9870 0.000000615432905 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
# Conduct the F-test for joint significance
f_test_results <- linearHypothesis(plm_model_interaction,
c("hexgrid_percent_seasonal_water = 0",
"mine_dist_seasonal_water = 0"),
vcov = vcovHC(plm_model_interaction, type = "HC0", cluster = "group"))
# Print the F-test results
print(f_test_results)
Linear hypothesis test:
hexgrid_percent_seasonal_water = 0
mine_dist_seasonal_water = 0
Model 1: restricted model
Model 2: conflict_occurrence ~ log_distance_to_nearest_mine + hexgrid_percent_seasonal_water +
neighbor_percent_seasonal_water + mine_dist_seasonal_water +
factor(year) + factor(country)
Note: Coefficient covariance matrix supplied.
Res.Df Df Chisq Pr(>Chisq)
1 49966
2 49964 2 0.3004 0.8605
#interaction term between distance from mine and neighbor percent irrigated ag
subsample_panel_2018_2022 <- subsample_panel_2018_2022 %>%
mutate(mine_dist_seasonal_water_neighbor=log_distance_to_nearest_mine*neighbor_percent_seasonal_water)
plm_model_interaction <- plm(
as.formula(conflict_occurrence ~ log_distance_to_nearest_mine + hexgrid_percent_seasonal_water + neighbor_percent_seasonal_water + mine_dist_seasonal_water_neighbor + factor(year)+ factor(country)),
data = subsample_panel_2018_2022,
model = 'pooling',
na.action = na.exclude
)
plm_model_coeftest_results_interaction <- coeftest(plm_model_interaction, vcovHC(plm_model_interaction, type = 'HC0', cluster = 'group'))
print(plm_model_coeftest_results_interaction)
t test of coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.02867384 0.00790315 3.6282 0.0002857 ***
log_distance_to_nearest_mine -0.00266274 0.00065951 -4.0374 0.000054119108006 ***
hexgrid_percent_seasonal_water 0.00025150 0.01717654 0.0146 0.9883176
neighbor_percent_seasonal_water 0.00236221 0.29062534 0.0081 0.9935149
mine_dist_seasonal_water_neighbor -0.00098914 0.02572960 -0.0384 0.9693340
factor(year)2019 0.00096054 0.00079257 1.2119 0.2255417
factor(year)2020 0.00424159 0.00094980 4.4658 0.000007995307241 ***
factor(year)2021 0.00728346 0.00103234 7.0553 0.000000000001745 ***
factor(country)Kazakhstan 0.00960078 0.00214622 4.4733 0.000007717584001 ***
factor(country)Kyrgyzstan 0.00479480 0.00099427 4.8224 0.000001422510485 ***
factor(country)Tajikistan -0.00245108 0.00118229 -2.0732 0.0381620 *
factor(country)Uzbekistan 0.00974607 0.00196292 4.9651 0.000000688956868 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
# Conduct the F-test for joint significance
f_test_results <- linearHypothesis(plm_model_interaction,
c("neighbor_percent_seasonal_water = 0",
"mine_dist_seasonal_water_neighbor = 0"),
vcov = vcovHC(plm_model_interaction, type = "HC0", cluster = "group"))
# Print the F-test results
print(f_test_results)
Linear hypothesis test:
neighbor_percent_seasonal_water = 0
mine_dist_seasonal_water_neighbor = 0
Model 1: restricted model
Model 2: conflict_occurrence ~ log_distance_to_nearest_mine + hexgrid_percent_seasonal_water +
neighbor_percent_seasonal_water + mine_dist_seasonal_water_neighbor +
factor(year) + factor(country)
Note: Coefficient covariance matrix supplied.
Res.Df Df Chisq Pr(>Chisq)
1 49966
2 49964 2 0.1837 0.9123
Distance to border is highly statistically significant. Including the logged version of distance to border as well as an interacted term (distance to border * distance to mine) causes the treatment effect to increase significantly (from about .003 to .01).
#add irrigated ag for both hexgrid and neighbors
plm_model_borders <- plm(
as.formula(conflict_occurrence ~ log_distance_to_nearest_mine + log_distance_to_border + factor(year) + factor(country)),
data = subsample_panel_2018_2022,
model = 'pooling',
na.action = na.exclude
)
plm_model_coeftest_results <- coeftest(plm_model_borders, vcovHC(plm_model_borders, type = 'HC0', cluster = 'group'))
print(plm_model_coeftest_results)
t test of coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.04200067 0.00863024 4.8667 0.000001137627425 ***
log_distance_to_nearest_mine -0.00233262 0.00066768 -3.4936 0.0004769 ***
log_distance_to_border -0.00176375 0.00032395 -5.4444 0.000000052163866 ***
factor(year)2019 0.00096046 0.00079229 1.2123 0.2254196
factor(year)2020 0.00424204 0.00094965 4.4670 0.000007947498638 ***
factor(year)2021 0.00728350 0.00103227 7.0558 0.000000000001734 ***
factor(year)2022 0.00344165 0.00088713 3.8795 0.0001048 ***
factor(country)Kazakhstan 0.00722508 0.00203925 3.5430 0.0003959 ***
factor(country)Kyrgyzstan 0.00459895 0.00102857 4.4712 0.000007791589301 ***
factor(country)Tajikistan -0.00546789 0.00139235 -3.9271 0.000086067122272 ***
factor(country)Uzbekistan 0.00895574 0.00198976 4.5009 0.000006778605126 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
#interaction term between distance from mine and hexgrid percent irrigated ag
subsample_panel_2018_2022 <- subsample_panel_2018_2022 %>%
mutate(mine_dist_border=log_distance_to_nearest_mine*log_distance_to_border)
plm_model_interaction <- plm(
as.formula(conflict_occurrence ~ log_distance_to_nearest_mine + log_distance_to_border + mine_dist_border + factor(year)+ factor(country)),
data = subsample_panel_2018_2022,
model = 'pooling',
na.action = na.exclude
)
plm_model_coeftest_results_interaction <- coeftest(plm_model_interaction, vcovHC(plm_model_interaction, type = 'HC0', cluster = 'group'))
print(plm_model_coeftest_results_interaction)
t test of coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.11782216 0.03434121 3.4309 0.0006019 ***
log_distance_to_nearest_mine -0.00972360 0.00317693 -3.0607 0.0022092 **
log_distance_to_border -0.01065693 0.00365640 -2.9146 0.0035627 **
mine_dist_border 0.00084952 0.00033656 2.5241 0.0116007 *
factor(year)2019 0.00096046 0.00079229 1.2123 0.2254196
factor(year)2020 0.00424204 0.00094965 4.4670 0.000007947498861 ***
factor(year)2021 0.00728350 0.00103227 7.0558 0.000000000001734 ***
factor(year)2022 0.00344165 0.00088713 3.8795 0.0001048 ***
factor(country)Kazakhstan 0.00835169 0.00201494 4.1449 0.000034044063294 ***
factor(country)Kyrgyzstan 0.00563689 0.00105837 5.3260 0.000000100742497 ***
factor(country)Tajikistan -0.00453093 0.00131242 -3.4523 0.0005561 ***
factor(country)Uzbekistan 0.01046177 0.00196827 5.3152 0.000000106907119 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
# Conduct the F-test for joint significance
f_test_results <- linearHypothesis(plm_model_interaction,
c("log_distance_to_border = 0",
"mine_dist_border = 0"),
vcov = vcovHC(plm_model_interaction, type = "HC0", cluster = "group"))
# Print the F-test results
print(f_test_results)
Linear hypothesis test:
log_distance_to_border = 0
mine_dist_border = 0
Model 1: restricted model
Model 2: conflict_occurrence ~ log_distance_to_nearest_mine + log_distance_to_border +
mine_dist_border + factor(year) + factor(country)
Note: Coefficient covariance matrix supplied.
Res.Df Df Chisq Pr(>Chisq)
1 62460
2 62458 2 30.308 0.0000002622 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
What happens when we make a non-parametric version of the treatment variable? Start by creating a non-parametric treatment variable based on 5km buckets:
# Convert distance_to_mine from meters to kilometers
subsample_panel_2018_2022 <- subsample_panel_2018_2022 %>%
mutate(distance_to_mine_active_inactive_km=(distance_to_nearest_mine_active_inactive / 1000))
# Verify the conversion
summary(subsample_panel_2018_2022$distance_to_mine_active_inactive_km)
total sum of squares: 124818700
id time
1 0
Min. 1st Qu. Median Mean 3rd Qu. Max.
0.00 27.57 53.04 63.02 91.85 219.32
subsample_panel_2018_2022$distance_bucket <- cut(
subsample_panel_2018_2022$distance_to_mine_active_inactive_km, # Original distance variable
breaks = c(0, 5, 10, 15, 20, 25, 30, 35, 40, 45, 50, Inf), # Break points for intervals
labels = c("0-5km", "5-10km", "10-15km", "15-20km", "20-25km",
"25-30km", "30-35km", "35-40km", "40-45km", "45-50km", ">50km"), # Bucket names
include.lowest = TRUE, # Include values on the boundary (e.g., 0km)
right = TRUE # Intervals are closed on the right (e.g., 0-5 includes 5km)
)
# Check the distribution of the buckets
table(subsample_panel_2018_2022$distance_bucket)
0-5km 5-10km 10-15km 15-20km 20-25km 25-30km 30-35km 35-40km 40-45km 45-50km >50km
2485 2435 2785 3005 3230 3400 3320 3270 3030 2765 32975
What is the mean of conflict_occurence by distance bucket to mine?
# Calculate mean conflict occurrence for each distance bucket
mean_conflict_by_bucket <- subsample_panel_2018_2022 %>%
group_by(distance_bucket) %>%
summarise(mean_conflict = mean(conflict_occurrence, na.rm = TRUE),
count = n()) # Optional: Include the count of observations per bucket
# View the results
print(mean_conflict_by_bucket)
What is the mean count of conflicts by distance bucket to mine?
mean_count_by_bucket <- subsample_panel_2018_2022 %>%
group_by(distance_bucket) %>%
summarise(mean_conflict = mean(event_id_cnty, na.rm = TRUE),
count = n()) # Optional: Include the count of observations per bucket
# View the results
print(mean_count_by_bucket)
Redo everything but with 10km buckets
subsample_panel_2018_2022$distance_bucket_10km <- cut(
subsample_panel_2018_2022$distance_to_mine_active_inactive_km, # Original distance variable
breaks = c(0, 10, 20, 30, 40, 50, Inf), # Break points for intervals
labels = c("0-10km", "10-20km", "20-30km", "30-40km", "40-50km", ">50km"), # Bucket names
include.lowest = TRUE, # Include values on the boundary (e.g., 0km)
right = TRUE # Intervals are closed on the right (e.g., 0-5 includes 5km)
)
# Check the distribution of the buckets
table(subsample_panel_2018_2022$distance_bucket_10km)
0-10km 10-20km 20-30km 30-40km 40-50km >50km
4920 5790 6630 6590 5795 32975
What is the mean of conflict_occurence by distance bucket to mine?
# Calculate mean conflict occurrence for each distance bucket
mean_conflict_by_bucket_10km <- subsample_panel_2018_2022 %>%
group_by(distance_bucket_10km) %>%
summarise(mean_conflict = mean(conflict_occurrence, na.rm = TRUE),
count = n()) # Optional: Include the count of observations per bucket
# View the results
print(mean_conflict_by_bucket_10km)
What is the mean count of conflicts by distance bucket to mine? It looks like there is a decline then spike around 30-40km then decline again. What is going on? Could be be that on average mines are located to smaller towns population centers and the nearest city is in this range (30-40km)? Looks like there is a spike in population in this distance bin (second table). 30-40km mean pop is almost double compared to 0-10km yet average conflict count is about the same!
mean_count_by_bucket_10km <- subsample_panel_2018_2022 %>%
group_by(distance_bucket_10km) %>%
summarise(mean_conflict = mean(event_id_cnty, na.rm = TRUE),
count = n()) # Optional: Include the count of observations per bucket
# View the results
print(mean_count_by_bucket_10km)
#population by distance bin
mean_pop_by_bucket_10km <- subsample_panel_2018_2022 %>%
group_by(distance_bucket_10km) %>%
summarise(mean_pop = mean(hexgrid_landscan_pop, na.rm = TRUE),
count = n()) # Optional: Include the count of observations per bucket
# View the results
print(mean_pop_by_bucket_10km)
#irrigated ag by distance bin
mean_irrigated_by_bucket_10km <- subsample_panel_2018_2022 %>%
group_by(distance_bucket_10km) %>%
summarise(mean_irrigated_ag = mean(hexgrid_percent_irrigated_ag, na.rm = TRUE),
count = n()) # Optional: Include the count of observations per bucket
# View the results
print(mean_irrigated_by_bucket_10km)
Let’s include this non-parametric treatment variable in some regressions. Reference category is >50km from a mine. Note: second two output tables control for population.
# Relevel the distance_bucket factor to set >50km as the reference category
subsample_panel_2018_2022$distance_bucket <- relevel(
as.factor(subsample_panel_2018_2022$distance_bucket),
ref = ">50km"
)
subsample_panel_2018_2022$distance_bucket_10km <- relevel(
as.factor(subsample_panel_2018_2022$distance_bucket_10km),
ref = ">50km"
)
plm_model <- plm(
as.formula(conflict_occurrence ~ factor(distance_bucket_10km) + factor(year) + factor(country)),
data = subsample_panel_2018_2022,
model = 'pooling',
na.action = na.exclude
)
plm_model_coeftest_results <- coeftest(plm_model, vcovHC(plm_model, type = 'HC0', cluster = 'group'))
print(plm_model_coeftest_results)
t test of coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -0.00318553 0.00057356 -5.5539 0.00000002804748556 ***
factor(distance_bucket_10km)0-10km 0.01099300 0.00282772 3.8876 0.0001014 ***
factor(distance_bucket_10km)10-20km 0.00631845 0.00252180 2.5055 0.0122291 *
factor(distance_bucket_10km)20-30km 0.00435630 0.00199925 2.1790 0.0293374 *
factor(distance_bucket_10km)30-40km 0.00556190 0.00223600 2.4874 0.0128695 *
factor(distance_bucket_10km)40-50km 0.00120081 0.00201965 0.5946 0.5521355
factor(year)2019 0.00096046 0.00079229 1.2123 0.2254196
factor(year)2020 0.00424204 0.00094965 4.4670 0.00000794749930601 ***
factor(year)2021 0.00728350 0.00103227 7.0558 0.00000000000173380 ***
factor(year)2022 0.00344165 0.00088713 3.8795 0.0001048 ***
factor(country)Kazakhstan 0.01033435 0.00200971 5.1422 0.00000027233197179 ***
factor(country)Kyrgyzstan 0.00563064 0.00073186 7.6936 0.00000000000001451 ***
factor(country)Tajikistan -0.00097165 0.00086575 -1.1223 0.2617303
factor(country)Uzbekistan 0.01079633 0.00191789 5.6293 0.00000001817382706 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
plm_model <- plm(
as.formula(conflict_occurrence ~ factor(distance_bucket) + factor(year) + factor(country)),
data = subsample_panel_2018_2022,
model = 'pooling',
na.action = na.exclude
)
plm_model_coeftest_results <- coeftest(plm_model, vcovHC(plm_model, type = 'HC0', cluster = 'group'))
print(plm_model_coeftest_results)
t test of coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -0.00318553 0.00057356 -5.5539 0.0000000280474947 ***
factor(distance_bucket)0-5km 0.01284881 0.00408631 3.1444 0.0016653 **
factor(distance_bucket)5-10km 0.00910176 0.00374401 2.4310 0.0150591 *
factor(distance_bucket)10-15km 0.00512845 0.00309045 1.6594 0.0970302 .
factor(distance_bucket)15-20km 0.00741935 0.00381163 1.9465 0.0515990 .
factor(distance_bucket)20-25km 0.00659982 0.00291064 2.2675 0.0233643 *
factor(distance_bucket)25-30km 0.00222091 0.00255988 0.8676 0.3856261
factor(distance_bucket)30-35km 0.00540271 0.00323168 1.6718 0.0945698 .
factor(distance_bucket)35-40km 0.00572472 0.00287609 1.9904 0.0465458 *
factor(distance_bucket)40-45km 0.00411078 0.00320832 1.2813 0.2000967
factor(distance_bucket)45-50km -0.00198423 0.00205503 -0.9655 0.3342749
factor(year)2019 0.00096046 0.00079229 1.2123 0.2254196
factor(year)2020 0.00424204 0.00094965 4.4670 0.0000079475004196 ***
factor(year)2021 0.00728350 0.00103227 7.0558 0.0000000000017338 ***
factor(year)2022 0.00344165 0.00088713 3.8795 0.0001048 ***
factor(country)Kazakhstan 0.01033435 0.00200971 5.1422 0.0000002723320373 ***
factor(country)Kyrgyzstan 0.00564952 0.00073085 7.7301 0.0000000000000109 ***
factor(country)Tajikistan -0.00098301 0.00086462 -1.1369 0.2555741
factor(country)Uzbekistan 0.01074894 0.00191532 5.6121 0.0000000200734455 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
#controlling for population
plm_model <- plm(
as.formula(conflict_occurrence ~ factor(distance_bucket_10km) + factor(year) + factor(country)+hexgrid_landscan_pop),
data = subsample_panel_2018_2022,
model = 'pooling',
na.action = na.exclude
)
plm_model_coeftest_results <- coeftest(plm_model, vcovHC(plm_model, type = 'HC0', cluster = 'group'))
print(plm_model_coeftest_results)
t test of coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -0.00313532946 0.00057147163 -5.4864 0.0000000411764646403 ***
factor(distance_bucket_10km)0-10km 0.00867673103 0.00258735395 3.3535 0.0007984 ***
factor(distance_bucket_10km)10-20km 0.00510403843 0.00232874275 2.1918 0.0284007 *
factor(distance_bucket_10km)20-30km 0.00166694845 0.00171787471 0.9704 0.3318733
factor(distance_bucket_10km)30-40km -0.00195321011 0.00200776033 -0.9728 0.3306414
factor(distance_bucket_10km)40-50km -0.00305747365 0.00188192789 -1.6246 0.1042423
factor(year)2019 0.00089444184 0.00079173287 1.1297 0.2585957
factor(year)2020 0.00411107709 0.00094711920 4.3406 0.0000142309415592184 ***
factor(year)2021 0.00709356491 0.00102823875 6.8988 0.0000000000052958645 ***
factor(year)2022 0.00316346421 0.00088063206 3.5923 0.0003281 ***
factor(country)Kazakhstan 0.00587303154 0.00184461056 3.1839 0.0014538 **
factor(country)Kyrgyzstan 0.00537382682 0.00065844725 8.1614 0.0000000000000003374 ***
factor(country)Tajikistan -0.00633010064 0.00092986082 -6.8076 0.0000000000100148394 ***
factor(country)Uzbekistan -0.01003674532 0.00166712856 -6.0204 0.0000000017497672964 ***
hexgrid_landscan_pop 0.00000467295 0.00000038144 12.2509 < 0.00000000000000022 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
plm_model <- plm(
as.formula(conflict_occurrence ~ factor(distance_bucket) + factor(year) + factor(country)+hexgrid_landscan_pop),
data = subsample_panel_2018_2022,
model = 'pooling',
na.action = na.exclude
)
plm_model_coeftest_results <- coeftest(plm_model, vcovHC(plm_model, type = 'HC0', cluster = 'group'))
print(plm_model_coeftest_results)
t test of coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -0.00313530839 0.00057147173 -5.4864 0.0000000411852956053 ***
factor(distance_bucket)0-5km 0.00934445974 0.00367004694 2.5461 0.0108945 *
factor(distance_bucket)5-10km 0.00799271971 0.00354116212 2.2571 0.0240059 *
factor(distance_bucket)10-15km 0.00454172885 0.00276588187 1.6421 0.1005837
factor(distance_bucket)15-20km 0.00562416244 0.00361630370 1.5552 0.1198978
factor(distance_bucket)20-25km 0.00437382054 0.00254290471 1.7200 0.0854356 .
factor(distance_bucket)25-30km -0.00090593118 0.00217966499 -0.4156 0.6776832
factor(distance_bucket)30-35km -0.00127425324 0.00296948348 -0.4291 0.6678402
factor(distance_bucket)35-40km -0.00264415063 0.00254154085 -1.0404 0.2981706
factor(distance_bucket)40-45km 0.00057214365 0.00299014788 0.1913 0.8482576
factor(distance_bucket)45-50km -0.00703335376 0.00197842898 -3.5550 0.0003782 ***
factor(year)2019 0.00089441414 0.00079173112 1.1297 0.2586094
factor(year)2020 0.00411102213 0.00094711977 4.3406 0.0000142348689554029 ***
factor(year)2021 0.00709348521 0.00102824188 6.8987 0.0000000000052995387 ***
factor(year)2022 0.00316334747 0.00088063911 3.5921 0.0003283 ***
factor(country)Kazakhstan 0.00587115943 0.00184453885 3.1830 0.0014583 **
factor(country)Kyrgyzstan 0.00537687426 0.00065799218 8.1716 0.0000000000000003099 ***
factor(country)Tajikistan -0.00632055345 0.00092974628 -6.7981 0.0000000000106920725 ***
factor(country)Uzbekistan -0.01006523496 0.00166673640 -6.0389 0.0000000015605267229 ***
hexgrid_landscan_pop 0.00000467492 0.00000038132 12.2597 < 0.00000000000000022 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Putting together some of the more meaningful explanatory variables based on the above in a single model - with a parametric and non-parametric treatment variable. Why does irrigated ag lose its statistical significance when I add in distance to border and population?
plm_model_interaction <- plm(
as.formula(conflict_occurrence ~ log_distance_to_nearest_mine + hexgrid_landscan_pop+ neighbor_landscan_pop+ distance_to_border+ hexgrid_percent_irrigated_ag + neighbor_percent_irrigated_ag + mine_dist_irrigated_ag_neighbor + factor(year)+ factor(country)),
data = subsample_panel_2018_2022,
model = 'pooling',
na.action = na.exclude
)
plm_model_coeftest_results_interaction <- coeftest(plm_model_interaction, vcovHC(plm_model_interaction, type = 'HC0', cluster = 'group'))
print(plm_model_coeftest_results_interaction)
t test of coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.021333362297 0.007773170687 2.7445 0.0060623 **
log_distance_to_nearest_mine -0.001861945976 0.000653106019 -2.8509 0.0043609 **
hexgrid_landscan_pop 0.000006309962 0.000000623731 10.1165 < 0.00000000000000022 ***
neighbor_landscan_pop -0.000000458724 0.000000111755 -4.1047 0.000040532919415 ***
distance_to_border -0.000000073295 0.000000018266 -4.0126 0.000060116277285 ***
hexgrid_percent_irrigated_ag 0.009096198734 0.007681170672 1.1842 0.2363303
neighbor_percent_irrigated_ag -0.000445816893 0.028482485942 -0.0157 0.9875118
mine_dist_irrigated_ag_neighbor -0.000091986560 0.002408976224 -0.0382 0.9695404
factor(year)2019 0.000913727531 0.000792094393 1.1536 0.2486855
factor(year)2020 0.004149721158 0.000947321010 4.3805 0.000011861043449 ***
factor(year)2021 0.007132630182 0.001028359840 6.9359 0.000000000004075 ***
factor(year)2022 0.003215691002 0.000881311309 3.6488 0.0002637 ***
factor(country)Kazakhstan 0.004090256274 0.001776579895 2.3023 0.0213204 *
factor(country)Kyrgyzstan 0.004195987460 0.001022770734 4.1026 0.000040910555720 ***
factor(country)Tajikistan -0.009008886500 0.001317117764 -6.8398 0.000000000008000 ***
factor(country)Uzbekistan -0.012379661185 0.002109647822 -5.8681 0.000000004429880 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
# Conduct the F-test for joint significance
f_test_results <- linearHypothesis(plm_model_interaction,
c("neighbor_percent_irrigated_ag = 0",
"mine_dist_irrigated_ag_neighbor = 0"),
vcov = vcovHC(plm_model_interaction, type = "HC0", cluster = "group"))
# Print the F-test results
print(f_test_results)
Linear hypothesis test:
neighbor_percent_irrigated_ag = 0
mine_dist_irrigated_ag_neighbor = 0
Model 1: restricted model
Model 2: conflict_occurrence ~ log_distance_to_nearest_mine + hexgrid_landscan_pop +
neighbor_landscan_pop + distance_to_border + hexgrid_percent_irrigated_ag +
neighbor_percent_irrigated_ag + mine_dist_irrigated_ag_neighbor +
factor(year) + factor(country)
Note: Coefficient covariance matrix supplied.
Res.Df Df Chisq Pr(>Chisq)
1 62456
2 62454 2 0.0292 0.9855
plm_model_interaction <- plm(
as.formula(conflict_occurrence ~ factor(distance_bucket_10km) + hexgrid_landscan_pop+ neighbor_landscan_pop+ distance_to_border+ hexgrid_percent_irrigated_ag + neighbor_percent_irrigated_ag + mine_dist_irrigated_ag_neighbor + factor(year)+ factor(country)),
data = subsample_panel_2018_2022,
model = 'pooling',
na.action = na.exclude
)
plm_model_coeftest_results_interaction <- coeftest(plm_model_interaction, vcovHC(plm_model_interaction, type = 'HC0', cluster = 'group'))
print(plm_model_coeftest_results_interaction)
t test of coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -0.000707794441 0.000794076774 -0.8913 0.3727488
factor(distance_bucket_10km)0-10km 0.006450149090 0.002604958913 2.4761 0.0132851 *
factor(distance_bucket_10km)10-20km 0.003393484389 0.002207681551 1.5371 0.1242676
factor(distance_bucket_10km)20-30km 0.000213942806 0.001659638361 0.1289 0.8974298
factor(distance_bucket_10km)30-40km -0.003084402952 0.002064068278 -1.4943 0.1350940
factor(distance_bucket_10km)40-50km -0.003197092512 0.001888073145 -1.6933 0.0904015 .
hexgrid_landscan_pop 0.000006301536 0.000000623022 10.1145 < 0.00000000000000022 ***
neighbor_landscan_pop -0.000000452576 0.000000111729 -4.0507 0.000051137076410 ***
distance_to_border -0.000000078445 0.000000016920 -4.6362 0.000003556141640 ***
hexgrid_percent_irrigated_ag 0.008805102743 0.007673907758 1.1474 0.2512175
neighbor_percent_irrigated_ag 0.018864523973 0.028958969124 0.6514 0.5147762
mine_dist_irrigated_ag_neighbor -0.001771756353 0.002445991017 -0.7244 0.4688529
factor(year)2019 0.000914352781 0.000792049074 1.1544 0.2483348
factor(year)2020 0.004152346283 0.000947386356 4.3829 0.000011727418102 ***
factor(year)2021 0.007132647384 0.001028308200 6.9363 0.000000000004064 ***
factor(year)2022 0.003214212862 0.000881104647 3.6479 0.0002646 ***
factor(country)Kazakhstan 0.004744988202 0.001782739371 2.6616 0.0077784 **
factor(country)Kyrgyzstan 0.006490352365 0.000744358786 8.7194 < 0.00000000000000022 ***
factor(country)Tajikistan -0.007466393921 0.001082166483 -6.8995 0.000000000005269 ***
factor(country)Uzbekistan -0.010482712785 0.002016509637 -5.1984 0.000000201594642 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1