library(tidyverse)
library(plm)
library(stargazer)
library(lmtest)
library(sandwich)
library(magrittr)
library(corrr)
library(knitr)
library(kableExtra)
library(PerformanceAnalytics)
library(hhi)

Descriptive statistics

Bump Plot - Top ranking countries recieving aid as a proportion of total aid given THAT YEAR

devtools::install_github("davidsjoberg/ggbump")
library(ggbump)

if(!require(ggflags)) devtools::install_github("rensa/ggflags")
library(ggflags)


pd_df %>% 
  # filter(pop > 999999) %>% 
  group_by(year) %>% 
  mutate(total_annual_budget = sum(budget)) %>% 
  ungroup() %>% 
  group_by(un_code, year) %>% 
  mutate(budget_proportion = budget / total_annual_budget) %>% 
  ungroup() %>% 
  group_by(year) %>%
  mutate(rank_budget = rank(-budget_proportion, ties.method = "random")) %>%
  ungroup() %>% 
  group_by(country_name) %>% 
  mutate(mean_rank = mean(rank_budget)) %>% 
  ungroup()  %>% 
  select(country_name, year, budget_proportion, rank_budget, mean_rank) -> hi

# Only keep top 10 mean recipient countries
bump_df <- hi %>% 
  filter(country_name %in% (hi %>% 
           distinct(country_name, mean_rank) %>% 
           top_n(10, -mean_rank) %>% 
           pull(country_name)))


bump_df$iso2 <- countrycode::countrycode(bump_df$country_name, "country.name", "iso2c")

bump_df$iso2 <- tolower(bump_df$iso2 )

bump_df$country_name  <- with(bump_df, reorder(country_name, mean_rank))


bump_df %>% 
  filter(year != "2013") %>% 
  ggplot(aes(year, rank_budget, 
             group = country_name, 
             color = country_name, 
             fill = country_name)) +
  geom_bump(aes(smooth = 10), 
            size = 1.5, 
            lineend = "round") + 
    geom_flag(data = bump_df %>% 
                filter(year == max(year)), 
            aes(country = iso2),
            size = 8,
            color = "black") +
  scale_y_reverse(breaks = 1:100) + 
  bbplot::bbc_style() + 
  theme(legend.text = element_text(size = 10),
        panel.grid = element_blank(),
        panel.background = element_rect(fill = "#C2C2C2", color = "transparent"),
        plot.background = element_rect(fill = "#C2C2C2"),
        text = element_text(color = "white"))

We see a very big increase in spending to Russia from low of 21st country to among top 5 countries from 2016 onwards

#Looking at top ranking countries with budget as a percentage of GDP per captita

pd_df %>% 
  group_by(country_name, year) %>% 
  mutate(budget_per_GDP = budget / GDP_per_cap_constant_2010 ) %>% 
  ungroup() %>% 
  group_by(year) %>%
  mutate(rank_budget = rank(-budget_per_GDP, ties.method = "min")) %>%
  ungroup() %>% 
  group_by(country_name) %>% 
  mutate(mean_rank = mean(-budget_per_GDP)) %>% 
  ungroup()  %>% 
  select(country_name, year, rank_budget, mean_rank) -> hi2 

# Only keep top 10 mean recipient countries
 bump_df <- hi2 %>%  filter(country_name %in% (hi2 %>% 
           distinct(country_name, mean_rank) %>% 
           top_n(10, -mean_rank) %>% 
           pull(country_name)))

bump_df$iso2 <- countrycode::countrycode(bump_df$country_name, "country.name", "iso2c")
bump_df$iso2 <- tolower(bump_df$iso2)
bump_df$country_name  <- with(bump_df, reorder(country_name, rank_budget))

bump_df %>% 
  filter(year != "2013") %>% 
  ggplot(aes(year, rank_budget, 
             group = country_name, 
             color = country_name, 
             fill = country_name)) +
  geom_bump(aes(smooth = 10), 
            size = 1.5, 
            lineend = "round") + 
    geom_flag(data = bump_df %>% 
             filter(year == max(year)), 
             aes(country = iso2),
             size = 8,
             color = "black") +
  scale_y_reverse(breaks = 1:100) + 
  bbplot::bbc_style() + 
  theme(legend.text = element_text(size = 10),
        panel.grid = element_blank(),
        panel.background = element_rect(fill = "#C2C2C2", color = "transparent"),
        plot.background = element_rect(fill = "#C2C2C2"),
        text = element_text(color = "white"))

pd_df %>% 
  group_by(country_name, year) %>% 
  mutate(budget_per_GDP = budget / GDP_per_cap_constant_2010 ) %>% 
  ungroup() %>% 
  group_by(year) %>%
  mutate(rank_budget = rank(-budget_per_GDP, ties.method = "min")) %>%
  ungroup() %>% 
  group_by(country_name) %>% 
  mutate(mean_rank = mean(budget_per_GDP)) %>% 
  ungroup()  %>% 
  select(country_name, year, rank_budget, mean_rank) %>% 

# Only keep top 10 mean recipient countries
  filter(country_name %in% (hi2 %>% 
           distinct(country_name, mean_rank) %>% 
           top_n(10, -mean_rank) %>% 
           pull(country_name)))

## # A tibble: 70 x 4
##    country_name      year rank_budget mean_rank
##    <chr>            <int>       <int>     <dbl>
##  1 Afghanistan       2017           1    79358.
##  2 Afghanistan       2016           1    79358.
##  3 Afghanistan       2013           1    79358.
##  4 Afghanistan       2014           1    79358.
##  5 Afghanistan       2018           1    79358.
##  6 Afghanistan       2015           1    79358.
##  7 Afghanistan       2019           1    79358.
##  8 Congo, Dem. Rep.  2015           6     7824.
##  9 Congo, Dem. Rep.  2018           3     7824.
## 10 Congo, Dem. Rep.  2016           3     7824.
## # ... with 60 more rows

bump_df$iso2 <- countrycode::countrycode(bump_df$country_name, "country.name", "iso2c")
bump_df$iso2 <- tolower(bump_df$iso2)
bump_df$country_name  <- with(bump_df, reorder(country_name, rank_budget))

bump_df %>% 
  filter(year != "2013") %>% 
  ggplot(aes(year, rank_budget, 
             group = country_name, 
             color = country_name, 
             fill = country_name)) +
  geom_bump(aes(smooth = 10), 
            size = 1.5, 
            lineend = "round") + 
    geom_flag(data = bump_df %>% 
             filter(year == max(year)), 
             aes(country = iso2),
             size = 8,
             color = "black") +
  scale_y_reverse(breaks = 1:100) + 
  bbplot::bbc_style() + 
  theme(legend.text = element_text(size = 10),
        # panel.grid = element_blank(),
        panel.background = element_rect(fill = "#C2C2C2", color = "transparent"),
        plot.background = element_rect(fill = "#C2C2C2"),
        text = element_text(color = "white"))


## Mean spending per country by the US per year

We can see a big drop in the average amount of money that went to each country in 2016! What happened in President Obama's last year? Or did budgets fall after Trump administration?

```r
pd_df %>% 
  filter(year != 2013) %>%
  group_by( year) %>% 
  summarise(mean_budget = mean(budget, na.rm = TRUE)) %>% 
  ggplot(aes(x = year, y = mean_budget)) + 
  geom_line(size = 2, alpha = 0.7) +
  # geom_smooth(method = "lm") +
    bbplot::bbc_style() + 
   theme(legend.position = "bottom",
         legend.text = element_text(size = 10))

Look at median budget per country per year

Median spending over the years a bit more volatile!

pd_df %>% 
  filter(year != 2013) %>%
  group_by(year) %>% 
  summarise(median_budget = median(budget, na.rm = TRUE)) %>% 
  ggplot(aes(x = year, y = median_budget)) + 
  geom_line(size = 2, alpha = 0.7) +
  # geom_smooth(method = "lm") +
    bbplot::bbc_style() + 
   theme(legend.position = "bottom",
         legend.text = element_text(size = 10))

## Extent of recipient concentration

Using the Hirschman - Herfindahl Index of Concentration formula

pd_df %>% 
   filter(pop > 999999) %>% 
  # group_by(year) %>% 
  mutate(total_annual_budget = sum(budget)) %>% 
  ungroup() %>% 
  group_by(un_code) %>% 
  mutate(budget_proportion = budget / total_annual_budget) %>% 
  ungroup() %>% 
  # filter(year == 2019) %>% 
  group_by(country_name) %>% 
  summarise(country_prop = sum(budget_proportion, na.rm = TRUE)) -> hhi_df

# Square the percentage "market share" for each country
hhi_df_square <- sapply(hhi_df$country_prop, function(x) x^2)

hhi_sum_df <- hhi_df %>%
  mutate(sum_square = sum(hhi_df_square, na.rm = TRUE))



# hhi_df_country <- hhi_df %>% pull(country_name)
# 
# hhi_df <- data.frame(hhi_df_country, hhi_df_square)
# 
# # Sum all squares together
# 
# hhi(hhi_df, "country_prop")
# 
# View(hhi_sum_square)

Top 10 budget recipient countries summed over the seven years

pd_df %>% 
  group_by(country.x) %>% 
  summarise(country_sum = sum(budget)) %>% 
  arrange(desc(country_sum)) %>% 
  head(n = 10) %>% kbl() %>% kable_paper("hover", full_width = F)

country.x	country_sum
Afghanistan	320227200
Pakistan	271562912
Iraq	93217522
Russia	83698073
India	79224873
China	73776214
Japan	73150831
Kenya	70679051
Indonesia	66611463
Brazil	65248400

Bottom 10 budget recipient countries - summed over the seven years

pd_df %>% 
  group_by(country.x) %>% 
  summarise(country_sum = sum(budget)) %>% 
  arrange(desc(country_sum)) %>% 
  tail(n = 10) %>% kbl() %>% kable_paper("hover", full_width = F)

country.x	country_sum
Vatican City	919013
Guyana	910827
Taiwan	851513
Cape Verde	796005
Guinea-Bissau	627305
Marshall Islands	429697
Palau	415811
The Bahamas	247400
The Gambia	183550
Curacao	5880

Top 10 budget per capita recipient countries summed over the seven years

Excluding population under 1 million

pd_df %>% 
  filter(pop > 999999) %>% 
  mutate(budget_pd = budget / pop ) %>% 
  group_by(country.x) %>% 
  summarise(country_sum = sum(budget_pd, na.rm = TRUE)) %>% 
  arrange(desc(country_sum)) %>% 
  head(n = 10) %>% kbl() %>% kable_paper("hover", full_width = F)

country.x	country_sum
Botswana	13.436588
Cyprus	10.089509
Afghanistan	9.201695
Moldova	7.840552
Georgia	7.756889
Swaziland	6.174381
Palestinian Territories	6.168170
Bahrain	5.852899
Bosnia and Herzegovina	5.382142
Estonia	4.835109

Bottom 10 budget per capita recipient countries summed over the seven years

Excluding population under 1 million

pd_df %>% 
  filter(pop > 999999) %>% 
  mutate(budget_pd = budget / pop ) %>% 
  group_by(country_name) %>% 
  summarise(country_sum = sum(budget_pd, na.rm = TRUE)) %>% 
  arrange(desc(country_sum)) %>% 
  tail(n = 10) %>% kbl() %>% kable_paper("hover", full_width = F)

country_name	country_sum
Indonesia	0.2582964
Nigeria	0.2425019
Angola	0.2399337
Madagascar	0.2259801
Somalia	0.2139121
Philippines	0.2083287
Sudan	0.1459121
Bangladesh	0.1242950
India	0.0603926
China	0.0537257

TOP 10 budget as a percentage of their GDP - summed over the seven years

Excluding population under 1 million

pd_df %>% 
  filter(pop > 999999) %>% 
  mutate(budget_per_GDP = budget / GDP_per_cap_constant_2010 ) %>% 
  group_by(country_name) %>% 
  summarise(country_sum = sum(budget_per_GDP, na.rm = TRUE)) %>% 
  arrange(desc(country_sum)) %>% 
  head(n = 10) %>% kbl() %>% kable_paper("hover", full_width = F)

country_name	country_sum
Afghanistan	555506.84
Pakistan	246565.56
Ethiopia	106876.77
Mozambique	73074.41
Kenya	65855.17
Congo, Dem. Rep.	54764.73
India	45287.30
Tanzania	37525.35
Uganda	28829.60
Zimbabwe	27249.68

Bottom 10 budget as a percentage of their GDP - summed over the seven years

Excluding population under 1 million

pd_df %>% 
  filter(pop > 999999) %>% 
  mutate(budget_per_GDP = budget / GDP_per_cap_constant_2010 ) %>% 
  group_by(country_name) %>% 
  summarise(country_sum = sum(budget_per_GDP, na.rm = TRUE)) %>% 
  arrange(desc(country_sum)) %>% 
  tail(n = 10) %>% kbl() %>% kable_paper("hover", full_width = F)

country_name	country_sum
Netherlands	196.28710
Sweden	160.77004
Equatorial Guinea	142.52215
Qatar	135.63607
Denmark	113.49647
Ireland	98.87856
Norway	81.39777
Switzerland	59.20861
Somalia	0.00000
Syrian Arab Republic	0.00000

TOP 10 budget as a PROPORTION OF TOTAL PD BUDGET ALLOCATION - summed over the seven years

Excluding population under 1 million

Afghanistan, Pakistan and Iraq account for almost 20% of all PD budget spending

pd_df %>% 
   filter(pop > 999999) %>% 
  # group_by(year) %>% 
  mutate(total_annual_budget = sum(budget)) %>% 
  ungroup() %>% 
  group_by(un_code) %>% 
  mutate(budget_proportion = budget / total_annual_budget) %>% 
  ungroup() %>% 
  group_by(country_name) %>% 
  summarise(country_sum = sum(budget_proportion, na.rm = TRUE)) %>% 
  arrange(desc(country_sum)) %>% 
  head(n = 20) %>% kbl() %>% kable_paper("hover", full_width = F)

country_name	country_sum
Afghanistan	0.0934030
Pakistan	0.0792088
Iraq	0.0271894
Russian Federation	0.0244128
India	0.0231081
China	0.0215189
Kenya	0.0206155
Indonesia	0.0194291
Brazil	0.0190315
South Africa	0.0172208
Ukraine	0.0169231
Germany	0.0163838
Japan	0.0163828
Ethiopia	0.0143528
Nigeria	0.0130050
Mexico	0.0123035
Mozambique	0.0118875
Israel	0.0115187
Korea, Rep.	0.0113068
Egypt, Arab Rep.	0.0109773

pd_df %>% 
   filter(pop > 999999) %>% 
  # group_by(year) %>% 
  mutate(total_annual_budget = sum(budget)) %>% 
  ungroup() %>% 
  group_by(un_code) %>% 
  mutate(budget_proportion = budget / total_annual_budget) %>% 
  ungroup() %>% 
  group_by(country_name) %>% 
  summarise(country_sum = sum(budget_proportion, na.rm = TRUE)) %>% 
  mutate(rank_budget = rank(country_sum)) %>%
  ungroup() %>% 
  mutate(var = ifelse(rank_budget < 150, "Rest", country_name)) %>% 
  group_by(var) %>% 
  summarise(sum_bud = sum(country_sum, na.rm = TRUE)) -> df3
  # slice_max(order_by = rank_budget, n = 5)  
  
# df4 <-  df3 %>% 
#     mutate(csum = rev(cumsum(rev(var))), 
#          pos = sum_bud/2 + lead(csum, 1),
#          pos = if_else(is.na(pos), sum_bud/2, pos),
#          csum = round(csum, 2),
#          pos = round(pos, 2))

df3 %>% 
  ggplot(aes(x="", y = sum_bud, 
             fill = fct_inorder(as.factor(var)))) +
  geom_bar(stat = "identity") +
  coord_polar("y", start = 1) +
  scale_fill_brewer(palette = "Set1") +
  guides(fill = guide_legend(title = "Country")) + 
  theme_void() +
  theme(legend.position = "bottom")  + 
  ggtitle("Comparing the rest of the countries (purple) with \n 
  Afghanistan, Iraq, Pakistan proportion of total budget")

  # ggrepel::geom_label_repel(data = df4, aes(y = round(pos, 2), label = paste0(pos, "%")), size = 5, nudge_x = 1, show.legend = FALSE) +

Smallest proportion of budget (EXCLUDING countries under 1 million)

pd_df %>% 
   filter(pop > 999999) %>% 
  mutate(total_annual_budget = sum(budget)) %>% 
  ungroup() %>% 
  group_by(un_code) %>% 
  mutate(budget_proportion = budget / total_annual_budget) %>% 
  ungroup() %>% 
  group_by(country_name) %>% 
  summarise(country_sum = sum(budget_proportion, na.rm = TRUE)) %>% 
  arrange(desc(country_sum)) %>% 
  tail(n = 10) %>% kbl() %>% kable_paper("hover", full_width = F)

country_name	country_sum
Lesotho	0.0011192
Timor-Leste	0.0011032
Mauritius	0.0010210
Somalia	0.0008679
Congo, Rep.	0.0008091
Gabon	0.0007296
Central African Republic	0.0005856
Equatorial Guinea	0.0005139
Gambia, The	0.0005050
Guinea-Bissau	0.0001830

Smallest proportion of budget (including countries under 1 million)

pd_df %>% 
  # filter(pop > 999999) %>% 
  group_by(year) %>% 
  mutate(total_annual_budget = sum(budget)) %>% 
  ungroup() %>% 
  group_by(un_code, year) %>% 
  mutate(budget_proportion = budget / total_annual_budget) %>% 
  ungroup() %>% 
  group_by(country_name) %>% 
  summarise(country_sum = sum(budget_proportion, na.rm = TRUE)) %>% 
  arrange(desc(country_sum)) %>% 
  tail(n = 10) %>% kbl() %>% kable_paper("hover", full_width = F)

country_name	country_sum
Gambia, The	0.0036858
Suriname	0.0029780
Malta	0.0028351
Samoa	0.0024180
Belize	0.0023101
Micronesia, Fed. Sts.	0.0020952
Guyana	0.0018022
Guinea-Bissau	0.0012978
Palau	0.0008973
Marshall Islands	0.0008554

Histogram distribution of total PD budget

library(skimr)
# skim(pd_df)

pd_df %>% 
ggplot(aes(x=log(budget))) + 
  geom_histogram( aes(fill = as.factor(year))) + facet_wrap(~year)

Histogram distribution of PD budget PER CAPITA

pd_df %>% 
ggplot(aes(x=log(budget_pc))) + 
  geom_histogram( aes(fill = as.factor(year))) + facet_wrap(~year)

Looking at Department of State Bureaus across the years.

Looking at the total budget across different Department of State bureau over the years 2014 to 2019

pd_df %>% 
  filter(year != 2013) %>% 
  group_by(bureau, year) %>% 
  summarise(sum_budget = sum(budget, na.rm = TRUE)) %>% 
  ggplot(aes(x = year, y = sum_budget)) + 
  geom_line(aes(color = bureau), size = 2, alpha = 0.7) +
    bbplot::bbc_style() + 
   theme(legend.position = "bottom",
         legend.text = element_text(size = 10))

Looking at the budget PER CAPITA across different Department of State bureau over the years 2014 to 2019

African Affairs (AF),
East Asian and Pacific Affairs (EAP),
European and Eurasian Affairs (EUR),
Near Eastern Affairs (NEA),
South and Central Asian Affairs (SCA)
and Western Hemisphere Affairs (WHA).

pd_df %>% 
  filter(year != 2013) %>% 
  group_by(bureau, year) %>% 
  summarise(sum_budget = sum(budget_pc, na.rm = TRUE)) %>% 
  ggplot(aes(x = year, y = sum_budget)) + 
  geom_line(aes(color = bureau), size = 2, alpha = 0.7) +
    bbplot::bbc_style() + 
   theme(legend.position = "bottom",
         legend.text = element_text(size = 10))

Looking at income groups across the years.

Looking at the total budget across different economic groups over the years 2014 to 2019

pd_df %>% 
  filter(year != 2013) %>% 
  filter(!is.na(economy)) %>% 
  group_by(economy, year) %>% 
  summarise(sum_budget = sum(budget, na.rm = TRUE)) %>% 
  ggplot(aes(x = year, y = sum_budget)) + 
  geom_line(aes(color = economy), size = 2, alpha = 0.7) +
    bbplot::bbc_style() + 
   theme(legend.position = "bottom",
         legend.text = element_text(size = 10))

Looking at the budget PER CAPITA across different economic groups over the years 2014 to 2019

pd_df %>% 
  filter(year != 2013) %>% 
  filter(!is.na(economy)) %>% 
  group_by(economy, year) %>% 
  summarise(sum_budget = sum(budget_pc, na.rm = TRUE)) %>% 
  ggplot(aes(x = year, y = sum_budget)) + 
  geom_line(aes(color = economy), size = 2, alpha = 0.7) +
    bbplot::bbc_style() + 
   theme(legend.position = "bottom",
         legend.text = element_text(size = 10))

Looking at sum of budget PER CAPITA across different economic groups over the years 2014 to 2019 with populations OVER 1 MILLION

pd_df %>% 
  filter(pop > 1000000) %>% 
  filter(year != 2013) %>% 
  filter(!is.na(economy)) %>% 
  group_by(economy, year) %>% 
  summarise(sum_budget = sum(budget_pc, na.rm = TRUE)) %>% 
  ggplot(aes(x = year, y = sum_budget)) + 
  geom_line(aes(color = economy), size = 2, alpha = 0.7) +
    bbplot::bbc_style() + 
   theme(legend.position = "bottom",
         legend.text = element_text(size = 10))

Mean budgets per income groups

Looking at mean total budgets specifically in DEVELOPED regions

plyr::count(pd_df$economy)

##                            x freq
## 1    1. Developed region: G7   42
## 2 2. Developed region: nonG7  216
## 3   3. Emerging region: BRIC   28
## 4   4. Emerging region: MIKT   28
## 5    5. Emerging region: G20  126
## 6       6. Developing region  484
## 7  7. Least developed region  287
## 8                       <NA>   11

pd_df %>% 
  filter(year != 2013) %>% 
  filter(!is.na(economy)) %>% 
  filter(economy == "1. Developed region: G7" | economy == "2. Developed region: nonG7") %>% 
  group_by(economy, year) %>% 
  summarise(sum_budget = mean(budget, na.rm = TRUE)) %>% 
  ggplot(aes(x = year, y = sum_budget)) + 
  geom_line(aes(color = economy), size = 2, alpha = 0.7) +
    bbplot::bbc_style() + 
   theme(legend.position = "bottom",
         legend.text = element_text(size = 10))

Looking at mean total budgets specifically in EMERGING regions

pd_df %>% 
  filter(year != 2013) %>% 
  filter(!is.na(economy)) %>% 
  filter(economy == "3. Emerging region: BRIC" | economy == "4. Emerging region: MIKT" | economy == "5. Emerging region: G20") %>% 
  group_by(economy, year) %>% 
  summarise(sum_budget = mean(budget, na.rm = TRUE)) %>% 
  ggplot(aes(x = year, y = sum_budget)) + 
  geom_line(aes(color = economy), size = 2, alpha = 0.7) +
    bbplot::bbc_style() + 
   theme(legend.position = "bottom",
         legend.text = element_text(size = 10))

Looking at mean total budgets specifically in developed and least developed regions

pd_df %>% 
  filter(year != 2013) %>% 
  filter(!is.na(economy)) %>% 
  filter(economy == "6. Developing region" | economy ==  "7. Least developed region" ) %>% 
  group_by(economy, year) %>% 
  summarise(sum_budget = mean(budget, na.rm = TRUE)) %>% 
  ggplot(aes(x = year, y = sum_budget)) + 
  geom_line(aes(color = economy), size = 2, alpha = 0.7) +
    bbplot::bbc_style() + 
   theme(legend.position = "bottom",
         legend.text = element_text(size = 10))

Mean total budget across all income groups

pd_df %>% 
  filter(year != 2013) %>% 
  filter(!is.na(economy)) %>% 
  # filter(economy == "6. Developing region" | economy ==  "7. Least developed region" ) %>% 
  group_by(economy, year) %>% 
  summarise(sum_budget = mean(budget, na.rm = TRUE)) %>% 
  ggplot(aes(x = year, y = sum_budget)) + 
  geom_line(aes(color = economy), size = 2, alpha = 0.7) +
    bbplot::bbc_style() + 
   theme(legend.position = "bottom",
         legend.text = element_text(size = 10))

Mean budget PER CAPITA across all income groups

pd_df %>% 
  filter(year != 2013) %>% 
  filter(!is.na(economy)) %>% 
  # filter(economy == "6. Developing region" | economy ==  "7. Least developed region" ) %>% 
  group_by(economy, year) %>% 
  summarise(sum_budget = mean(budget_pc, na.rm = TRUE)) %>% 
  ggplot(aes(x = year, y = sum_budget)) + 
  geom_line(aes(color = economy), size = 2, alpha = 0.7) +
    bbplot::bbc_style() + 
   theme(legend.position = "bottom",
         legend.text = element_text(size = 10))

Mean budget PER CAPITA across all income groups with populations ABOVE 1 million

pd_df %>% 
  filter(pop > 1000000) %>% 
  filter(year != 2013) %>% 
  filter(!is.na(economy)) %>% 
  # filter(economy == "6. Developing region" | economy ==  "7. Least developed region" ) %>% 
  group_by(economy, year) %>% 
  summarise(sum_budget = mean(budget_pc, na.rm = TRUE)) %>% 
  ggplot(aes(x = year, y = sum_budget)) + 
  geom_line(aes(color = economy), size = 2, alpha = 0.7) +
    bbplot::bbc_style() + 
   theme(legend.position = "bottom",
         legend.text = element_text(size = 10))

Next we can look at total budgets different income groups across the years

Total budgets in each income group

pd_df$income_group_name <- as.factor(pd_df$income_group_name)

pd_df$income_group_name <- factor(pd_df$income_group_name, levels = c("Low Income Country", "Lower Middle Income Country", "Upper Middle Income Country", "High Income Country"))

plyr::count(pd_df$income_group_name)

##                             x freq
## 1          Low Income Country  195
## 2 Lower Middle Income Country  278
## 3 Upper Middle Income Country  333
## 4         High Income Country  357
## 5                        <NA>   59

pd_df %>% 
  # filter(pop > 1000000) %>%
  filter(!is.na(income_group_name)) %>% 
  filter(income_group_name != "NULL") %>% 
  filter(year != 2013) %>% 
  group_by(income_group_name, year) %>% 
  summarise(sum_budget = sum(budget, na.rm = TRUE)) %>% 
  ggplot(aes(x = year, y = sum_budget)) + 
  # geom_point(alpha = 0.8) + 
  geom_line(aes(color = income_group_name),  size = 2, alpha = 0.7) +
    bbplot::bbc_style() +
  theme(legend.position = "bottom",
        legend.text = element_text(size = 10))

Next we can look at the relationship between total budget and GDP with populations above 1 million

pd_df %>% 
  # filter(year == 2019) %>% 
  filter(pop > 1000000) %>%
  # filter(!is.na(OECD_member)) %>% 
   filter(!is.na(income_group_name)) %>% 
  filter(income_group_name != "NULL") %>% 
  filter(year != 2013) %>% 
  # group_by(income_group_name, year) %>% 
  # summarise(sum_budget = sum(budget, na.rm = TRUE)) %>% 
  ggplot(aes(x = log(GDP_per_cap_constant_2010), y = log(budget), color = as.factor(income_group_name))) + 
  geom_point( alpha = 0.3) +
  geom_smooth(method = "glm", se = FALSE, size = 2) +
  facet_wrap(~ year) + 
  bbplot::bbc_style() + 
  theme(legend.position = "bottom",
        legend.text = element_text(size = 10))

Next we can look at the relationship between budget PER CAPITA and GDP with populations above 1 million

pd_df %>% 
  filter(pop > 1000000) %>%
  # filter(!is.na(OECD_member)) %>% 
   filter(!is.na(income_group_name)) %>% 
  filter(income_group_name != "NULL") %>%
  filter(year != 2013) %>% 
  # group_by(income_group_name, year) %>% 
  # summarise(sum_budget = sum(budget, na.rm = TRUE)) %>% 
  ggplot(aes(x = log(GDP_per_cap_constant_2010), y = log(budget_pc), color = as.factor(income_group_name))) + 
  geom_point(aes(color = income_group_name), alpha = 0.4) +
  geom_smooth(method = "glm", se = FALSE) +
  facet_wrap(~ year) + 
  bbplot::bbc_style() + 
  theme(legend.position = "bottom",
        legend.text = element_text(size = 10))

library(PerformanceAnalytics)


pd_df %>% 
  filter(year == 2019) %>% 
  # filter(pop > 1000000) %>% 
  select(budget, exports_constant_2010, imports_constant_2010, GDP_constant_2010, pop) %>% 
  mutate(budget_pc = budget / pop, 
         exports_pc = exports_constant_2010 / pop,
         imports_pc = imports_constant_2010 / pop,
         gdp_pc = GDP_constant_2010 / pop) %>% 
  select(budget_pc, exports_pc, imports_pc, gdp_pc, pop) -> pd_corr

Looking at the correlation between budget per capita and exports per capita, imports per capita and GDP per capita in 2015

pd_corr <- apply(pd_corr, 2, log)

chart.Correlation(pd_corr, histogram=TRUE, pch=19)

pd_df$income_group_name <- as.factor(pd_df$income_group_name)

pd_df$income_group_name <- factor(pd_df$income_group_name, levels = c("Low Income Country", "Lower Middle Income Country", "Upper Middle Income Country", "High Income Country"))

plyr::count(pd_df$income_group_name)

##                             x freq
## 1          Low Income Country  195
## 2 Lower Middle Income Country  278
## 3 Upper Middle Income Country  333
## 4         High Income Country  357
## 5                        <NA>   59

pd_df %>% 
  # filter(pop > 1000000) %>%
  filter(!is.na(income_group_name)) %>% 
  filter(income_group_name != "NULL") %>% 
  filter(year != 2013) %>% 
  group_by(income_group_name, year) %>% 
  summarise(sum_budget = sum(budget, na.rm = TRUE)) %>% 
  ggplot(aes(x = year, y = sum_budget)) + 
  # geom_point(alpha = 0.8) + 
  geom_line(aes(color = income_group_name),  size = 2, alpha = 0.7) +
    bbplot::bbc_style() +
  theme(legend.position = "bottom",
        legend.text = element_text(size = 10))

Next we can look at the relationship between total budget and GDP with populations above 1 million

pd_df %>% 
  # filter(year == 2019) %>% 
  filter(pop > 1000000) %>%
  # filter(!is.na(OECD_member)) %>% 
   filter(!is.na(income_group_name)) %>% 
  filter(income_group_name != "NULL") %>% 
  filter(year != 2013) %>% 
  # group_by(income_group_name, year) %>% 
  # summarise(sum_budget = sum(budget, na.rm = TRUE)) %>% 
  ggplot(aes(x = log(GDP_per_cap_constant_2010), y = log(budget), color = as.factor(income_group_name))) + 
  geom_point( alpha = 0.3) +
  geom_smooth(method = "glm", se = FALSE, size = 2) +
  facet_wrap(~ year) + 
  bbplot::bbc_style() + 
  theme(legend.position = "bottom",
        legend.text = element_text(size = 10))

Next we can look at the relationship between budget PER CAPITA and GDP with populations above 1 million

pd_df %>% 
  filter(pop > 1000000) %>%
  # filter(!is.na(OECD_member)) %>% 
   filter(!is.na(income_group_name)) %>% 
  filter(income_group_name != "NULL") %>%
  filter(year != 2013) %>% 
  # group_by(income_group_name, year) %>% 
  # summarise(sum_budget = sum(budget, na.rm = TRUE)) %>% 
  ggplot(aes(x = log(GDP_per_cap_constant_2010), y = log(budget_pc), color = as.factor(income_group_name))) + 
  geom_point(aes(color = income_group_name), alpha = 0.4) +
  geom_smooth(method = "glm", se = FALSE) +
  facet_wrap(~ year) + 
  theme(legend.position = "bottom") + bbplot::bbc_style() + 
  theme(legend.position = "bottom",
        legend.text = element_text(size = 10))

Looking at correlation matrix in 2019

pd_df %>% 
  filter(year == 2015) %>% 
  # filter(pop > 1000000) %>% 
  mutate(budget_pc = budget / pop, 
         exports_pc = exports_constant_2010 / pop,
         imports_pc = imports_constant_2010 / pop,
         ln_budget = log(budget), 
         ln_budget_pc = log(budget_pc),
         ln_manufac_exports = log(manufac_exports + 1),
         ln_manufac_imports = log(manufac_imports + 1),
         ln_energy_exports = log(energy_exports + 1),
         ln_energy_imports = log(energy_imports + 1),
         ln_agri_exports = log(agri_exports + 1),
         ln_agri_imports = log(agri_imports + 1)) %>% 
  select(ln_budget, ln_budget_pc, ln_manufac_exports, ln_manufac_imports, ln_energy_exports, ln_energy_imports, ln_agri_exports, ln_agri_imports) -> trade_corr

Looking at the correlation between budget per capita and different exports from US and imports to US in 2015

# trade_corr <- apply(trade_corr, 2, log)

chart.Correlation(trade_corr, histogram=TRUE, pch=19)

Looking at the exports from US / imports to US and their correlation to PD budgets in Higher Income Countries in 2015

pd_df %>% 
  filter(income_group_name == "High Income Country" | income_group_name ==  "Higher Middle Income Country") %>% 
  filter(year == 2015) %>% 
  # filter(pop > 1000000) %>% 
  mutate(budget_pc = budget / pop, 
         exports_pc = exports_constant_2010 / pop,
         imports_pc = imports_constant_2010 / pop,
         ln_budget = log(budget), 
         ln_budget_pc = log(budget_pc),
         ln_manufac_exports = log(manufac_exports + 1),
         ln_manufac_imports = log(manufac_imports + 1),
         ln_energy_exports = log(energy_exports + 1),
         ln_energy_imports = log(energy_imports + 1),
         ln_agri_exports = log(agri_exports + 1),
         ln_agri_imports = log(agri_imports + 1)) %>% 
  select(ln_budget, ln_budget_pc, ln_manufac_exports, ln_manufac_imports, ln_energy_exports, ln_energy_imports, ln_agri_exports, ln_agri_imports) -> trade_corr

chart.Correlation(trade_corr, histogram=TRUE, pch=19)

Looking at the exports from US / imports to US and their correlation to PD budgets in Lower Income Countries in 2015

pd_df %>% 
  filter(income_group_name == "Low Income Country" | income_group_name ==  "Lower Middle Income Country") %>% 
  filter(year == 2015) %>% 
  # filter(pop > 1000000) %>% 
  mutate(budget_pc = budget / pop, 
         exports_pc = exports_constant_2010 / pop,
         imports_pc = imports_constant_2010 / pop,
         ln_budget = log(budget), 
         ln_budget_pc = log(budget_pc),
         ln_manufac_exports = log(manufac_exports + 1),
         ln_manufac_imports = log(manufac_imports + 1),
         ln_energy_exports = log(energy_exports + 1),
         ln_energy_imports = log(energy_imports + 1),
         ln_agri_exports = log(agri_exports + 1),
         ln_agri_imports = log(agri_imports + 1)) %>% 
  select(ln_budget, ln_budget_pc, ln_manufac_exports, ln_manufac_imports, ln_energy_exports, ln_energy_imports, ln_agri_exports, ln_agri_imports) -> trade_corr

chart.Correlation(trade_corr, histogram=TRUE, pch=19)

Looking at correlation between total budget and polity score, percentage muslim, percentage christian, distance to DC

pd_df %>% 
   filter(year == 2015) %>% 
  # filter(pop > 1000000) %>% 
  mutate(budget_pc = budget / pop) %>% 
  mutate(ln_budget_pc = log(budget_pc)) %>% 
  mutate(ln_budget = log(budget),
         ln_oda_pc = log(oda_per_cap_current + 1)) %>%
  select(ln_budget, ln_budget_pc, ln_oda_pc, percent_muslim, percent_muslim, percent_christian, polyarchy_demo, cso_repress) -> more_corr

# more_corr <- apply(more_corr, 2, log)

chart.Correlation(more_corr, histogram=TRUE, pch=19)

We can look at 2019 only and look at populations over 1 million

The relationship between budget (per capita and total) and US bases, net FDI inflows, US military military aid

pd_df %>% 
  filter(year == 2019) %>% 
  filter(pop > 1000000) %>% 
  mutate(budget_pc = budget / pop) %>% 
  mutate(ln_budget_pc = log(budget_pc)) %>% 
  mutate(ln_budget = log(budget)) %>%
  mutate(ln_bases = log(all_bases  + 1)) %>% 
  mutate(ln_fdi_in_net = log(fdi_in_net  + 1)) %>% 
  mutate(ln_sum_mil_aid = log(sum_mil_aid + 1)) %>% 
  select(ln_budget_pc, ln_budget, ln_bases, ln_fdi_in_net, ln_sum_mil_aid) -> more_corr

# more_corr <- apply(more_corr, 2, log)

chart.Correlation(more_corr, histogram=TRUE, pch=19)

Correlations in Europe

We can look at the relationship between different globalization scores and budgets per capita in Europe (populations over 1 million)

pd_df %>% 
  # filter(year == 2016) %>% 
  filter(continent.x == "Europe") %>% 
  filter(pop > 1000000) %>% 
  select(budget_pc, kof, 
         # kof_df, kof_dj, 
         cultural_kof, 
         # cultural_kof_df, cultural_kof_dj, 
         finance_kof,
         # finance_kof_df, finance_kof_dj, 
         info_kof, 
         # info_kof_df, info_kof_dj, 
         political_kof, 
         # political_kof_df, political_kof_dj
         ) -> kof_corr

kof_corr <- apply(kof_corr, 2, log)

chart.Correlation(kof_corr, histogram=TRUE)

Correlations in Africa

We can look at the relationship between different globalization scores and budgets per capita in Africa (populations over 1 million)

pd_df %>% 
  filter(continent.x == "Africa") %>% 
  filter(pop > 1000000) %>% 
  select(budget_pc, kof, 
         # kof_df, kof_dj, 
         cultural_kof, 
         # cultural_kof_df, cultural_kof_dj, 
         finance_kof,
         # finance_kof_df, finance_kof_dj, 
         info_kof, 
         # info_kof_df, info_kof_dj, 
         political_kof, 
         # political_kof_df, political_kof_dj
         ) -> kof_corr

kof_corr <- apply(kof_corr, 2, log)

chart.Correlation(kof_corr, histogram=TRUE)

Correlations in Asia

We can look at the relationship between different globalization scores and budgets per capita in Asia (populations over 1 million)

pd_df %>% 
  # filter(year == 2016) %>% 
  filter(continent.x == "Asia") %>% 
  filter(pop > 1000000) %>% 
  select(budget_pc, kof, 
         # kof_df, kof_dj, 
         cultural_kof, 
         # cultural_kof_df, cultural_kof_dj, 
         finance_kof,
         # finance_kof_df, finance_kof_dj, 
         info_kof, 
         # info_kof_df, info_kof_dj, 
         political_kof, 
         # political_kof_df, political_kof_dj
         ) -> kof_corr

kof_corr <- apply(kof_corr, 2, log)

chart.Correlation(kof_corr, histogram=TRUE)

Basic demographics

Hypothesis

Null Hyp: that there is NO relationship between US PD spending and GDP, population and democracy scores

Alt Hyp 1: that the US spends more PD budget in countries with higher GDP

Alt Hyp 2: that the US spends more PD budget in countries with lower GDP

Regression - Budget Per Capita

pd_df %<>% 
  distinct(un_code, year, .keep_all = TRUE) 

basic_model_fe_ind <- plm(log(budget_pc) ~ log(GDP_per_cap_constant_2010) + log(pop) + polity_score,  data = pd_df, index = c("un_code", "year"), model = "within", effect = "individual")

basic_model_fe_time <- plm(log(budget_pc) ~ log(GDP_per_cap_constant_2010) + log(pop) + polity_score, data = pd_df, index = c("un_code", "year"), model = "within", effect = "time")

basic_model_fe_two <- plm(log(budget_pc) ~ log(GDP_per_cap_constant_2010) + log(pop) + polity_score,  data = pd_df, index = c("un_code", "year"), model = "within", effect = "twoways")
 
basic_model_re <- plm(log(budget_pc) ~ log(GDP_per_cap_constant_2010) + log(pop) + polity_score, data = pd_df, index = c("un_code", "year"), model = "random")

stargazer(basic_model_fe_ind, basic_model_fe_time, basic_model_fe_two, basic_model_re,
          column.labels =c("FE Individ", "FE Time", "FE Twoway", "RE Swar"),
          type = "text")

## 
## ====================================================================================================================
##                                                                 Dependent variable:                                 
##                                -------------------------------------------------------------------------------------
##                                                                   log(budget_pc)                                    
##                                       FE Individ                 FE Time                 FE Twoway         RE Swar  
##                                          (1)                       (2)                      (3)              (4)    
## --------------------------------------------------------------------------------------------------------------------
## log(GDP_per_cap_constant_2010)        -0.663***                 0.128***                  -0.109            0.075*  
##                                        (0.211)                   (0.015)                  (0.215)          (0.040)  
##                                                                                                                     
## log(pop)                              -7.395***                 -0.501***                -5.417***        -0.561*** 
##                                        (0.373)                   (0.014)                  (0.505)          (0.038)  
##                                                                                                                     
## polity_score                           0.027***                  0.007*                  0.025***           0.013   
##                                        (0.011)                   (0.004)                  (0.009)          (0.008)  
##                                                                                                                     
## Constant                                                                                                   6.444*** 
##                                                                                                            (0.753)  
##                                                                                                                     
## --------------------------------------------------------------------------------------------------------------------
## Observations                            1,109                     1,109                    1,109            1,109   
## R2                                      0.321                     0.572                    0.125            0.184   
## Adjusted R2                             0.205                     0.568                   -0.031            0.181   
## F Statistic                    149.225*** (df = 3; 946) 488.745*** (df = 3; 1099) 44.837*** (df = 3; 940) 242.825***
## ====================================================================================================================
## Note:                                                                                    *p<0.1; **p<0.05; ***p<0.01

Regression - Total Budget

basic_total_fe_ind <- plm(log(budget) ~ log(GDP_per_cap_constant_2010) + log(pop) + polity_score,  data = pd_df, index = c("un_code", "year"), model = "within", effect = "individual")

basic_total_fe_time <- plm(log(budget) ~ log(GDP_per_cap_constant_2010) + log(pop) + polity_score, data = pd_df, index = c("un_code", "year"), model = "within", effect = "time")

basic_total_fe_two <- plm(log(budget) ~ log(GDP_per_cap_constant_2010) + log(pop) + polity_score,  data = pd_df, index = c("un_code", "year"), model = "within", effect = "twoways")
 
basic_total_re <- plm(log(budget) ~ log(GDP_per_cap_constant_2010) + log(pop) + polity_score, data = pd_df, index = c("un_code", "year"), model = "random")

stargazer(basic_total_fe_ind, basic_total_fe_time, basic_total_fe_two, basic_total_re,
          column.labels =c("FE Individ", "FE Time", "FE Twoway", "RE Swar"),
          type = "text")

## 
## ====================================================================================================================
##                                                                 Dependent variable:                                 
##                                -------------------------------------------------------------------------------------
##                                                                     log(budget)                                     
##                                       FE Individ                 FE Time                 FE Twoway         RE Swar  
##                                          (1)                       (2)                      (3)              (4)    
## --------------------------------------------------------------------------------------------------------------------
## log(GDP_per_cap_constant_2010)        -0.663***                 0.128***                  -0.109            0.075*  
##                                        (0.211)                   (0.015)                  (0.215)          (0.040)  
##                                                                                                                     
## log(pop)                              -6.395***                 0.499***                 -4.417***         0.439*** 
##                                        (0.373)                   (0.014)                  (0.505)          (0.038)  
##                                                                                                                     
## polity_score                           0.027***                  0.007*                  0.025***           0.013   
##                                        (0.011)                   (0.004)                  (0.009)          (0.008)  
##                                                                                                                     
## Constant                                                                                                   6.444*** 
##                                                                                                            (0.753)  
##                                                                                                                     
## --------------------------------------------------------------------------------------------------------------------
## Observations                            1,109                     1,109                    1,109            1,109   
## R2                                      0.265                     0.527                    0.089            0.166   
## Adjusted R2                             0.139                     0.523                   -0.074            0.164   
## F Statistic                    113.796*** (df = 3; 946) 407.951*** (df = 3; 1099) 30.461*** (df = 3; 940) 133.374***
## ====================================================================================================================
## Note:                                                                                    *p<0.1; **p<0.05; ***p<0.01

Choosing the correct Model for Panel Data

Testing which panel model is better (random / fixed)

First run the Hausman test.

If the p is smaller than 0.05, use fixed effects

plm::phtest(basic_model_re, basic_model_fe_ind)

## 
##  Hausman Test
## 
## data:  log(budget_pc) ~ log(GDP_per_cap_constant_2010) + log(pop) +  ...
## chisq = 396.22, df = 3, p-value < 2.2e-16
## alternative hypothesis: one model is inconsistent

The p value is under 0.05 so we can focus on the fixed effects model

Testing which effects are better (individual / time / twoways)

Run the Lagrange Multiplier Test for time effects

Breusch-Pagan type for unbalanced data

If the p is smaller than 0.05, we use time-fixed effects (null is NO time fixed effects)

Breusch Pagan time effects - Lagrange Mutliplier Test for unbalanced data

If p is smaller than 0.05, use time-fixed effects (null is NO time fixed effects)

plmtest(basic_model_fe_ind, c("time"), type=("bp"))

## 
##  Lagrange Multiplier Test - time effects (Breusch-Pagan) for unbalanced
##  panels
## 
## data:  log(budget_pc) ~ log(GDP_per_cap_constant_2010) + log(pop) +  ...
## chisq = 2498.1, df = 1, p-value < 2.2e-16
## alternative hypothesis: significant effects

# NB alternatives to Breusch Pagan ("bp")
# honda
# ghm
# kw

F-Test for time fixed effects

The null is that no time-fixed effects are needed

pFtest(basic_model_fe_ind, basic_model_fe_time)

## 
##  F test for individual effects
## 
## data:  log(budget_pc) ~ log(GDP_per_cap_constant_2010) + log(pop) +  ...
## F = 12.834, df1 = 153, df2 = 946, p-value < 2.2e-16
## alternative hypothesis: significant effects

# If p is smaller than 0.05, use time-fixed effects (null is NO time fixed effects)

Correct for heteroskedasticity

Correcting coefficients with robust standard errors

basic_total_fe_time <- plm(log(budget) ~ log(GDP_per_cap_constant_2010) + log(pop) + polity_score + I(polity_score^2) + as.factor(subregion), data = pd_df, index = c("un_code", "year"), model = "within", effect = "time")

usaid_total_fe_time <- plm(log(sum_usaid + 1) ~ log(GDP_per_cap_constant_2010) + log(pop) + polity_score + I(polity_score^2) + as.factor(subregion), data = pd_df, index = c("un_code", "year"), model = "within", effect = "time")

plyr::count(pd_df$subregion)

##                            x freq
## 1  Australia and New Zealand   14
## 2                  Caribbean   51
## 3            Central America   56
## 4               Central Asia   35
## 5             Eastern Africa  112
## 6               Eastern Asia   35
## 7             Eastern Europe   70
## 8                  Melanesia   14
## 9                 Micronesia   21
## 10             Middle Africa   56
## 11           Northern Africa   42
## 12          Northern America    7
## 13           Northern Europe   70
## 14                 Polynesia    7
## 15        South-Eastern Asia   77
## 16             South America   84
## 17           Southern Africa   35
## 18             Southern Asia   42
## 19           Southern Europe   91
## 20            Western Africa  112
## 21              Western Asia  125
## 22            Western Europe   48
## 23                      <NA>   11

stargazer(basic_total_fe_time,
          # coeftest(basic_total_fe_time, vcovHC(basic_total_fe_time, type = "HC0")),
          coeftest(basic_total_fe_time, vcovHC(basic_total_fe_time, type = "HC1")),
          # coeftest(basic_total_fe_time, vcovHC(basic_total_fe_time, type = "HC2")),
          coeftest(basic_total_fe_time, vcovHC(basic_total_fe_time, type = "HC3")),
          
          usaid_total_fe_time, 
          
          type = "text")

## 
## =============================================================================================================
##                                                                 Dependent variable:                          
##                                        ----------------------------------------------------------------------
##                                               log(budget)                               log(sum_usaid + 1)   
##                                                  panel               coefficient              panel          
##                                                 linear                  test                  linear         
##                                                   (1)               (2)       (3)              (4)           
## -------------------------------------------------------------------------------------------------------------
## log(GDP_per_cap_constant_2010)                  0.051**            0.051     0.051          -3.087***        
##                                                 (0.024)           (0.048)   (0.048)          (0.175)         
##                                                                                                              
## log(pop)                                       0.490***          0.490***  0.490***          1.130***        
##                                                 (0.014)           (0.034)   (0.034)          (0.109)         
##                                                                                                              
## polity_score                                   0.029***          0.029***  0.029***          0.171***        
##                                                 (0.005)           (0.008)   (0.008)          (0.034)         
##                                                                                                              
## I(polity_score2)                               -0.002***          -0.002    -0.002          -0.035***        
##                                                 (0.001)           (0.002)   (0.002)          (0.007)         
##                                                                                                              
## as.factor(subregion)Caribbean                   -0.177            -0.177    -0.177           8.050***        
##                                                 (0.201)           (0.245)   (0.249)          (1.745)         
##                                                                                                              
## as.factor(subregion)Central America             -0.269            -0.269*   -0.269*          8.904***        
##                                                 (0.197)           (0.147)   (0.149)          (1.720)         
##                                                                                                              
## as.factor(subregion)Central Asia               0.569***           0.569**   0.569**          8.322***        
##                                                 (0.216)           (0.226)   (0.230)          (1.829)         
##                                                                                                              
## as.factor(subregion)Eastern Africa             -0.470**           -0.470*   -0.470*          4.030**         
##                                                 (0.203)           (0.256)   (0.259)          (1.767)         
##                                                                                                              
## as.factor(subregion)Eastern Asia                -0.145            -0.145    -0.145           5.770***        
##                                                 (0.202)           (0.154)   (0.157)          (1.756)         
##                                                                                                              
## as.factor(subregion)Eastern Europe              -0.097            -0.097    -0.097           5.578***        
##                                                 (0.188)           (0.161)   (0.163)          (1.692)         
##                                                                                                              
## as.factor(subregion)Melanesia                   -0.480*           -0.480    -0.480           4.805**         
##                                                 (0.251)           (0.466)   (0.492)          (2.119)         
##                                                                                                              
## as.factor(subregion)Middle Africa              -0.947***         -0.947*** -0.947***         4.413**         
##                                                 (0.208)           (0.240)   (0.243)          (1.807)         
##                                                                                                              
## as.factor(subregion)Northern Africa            -0.510**           -0.510*   -0.510*          7.172***        
##                                                 (0.209)           (0.287)   (0.291)          (1.790)         
##                                                                                                              
## as.factor(subregion)Northern America            -0.372           -0.372*** -0.372***          3.243          
##                                                 (0.292)           (0.054)   (0.056)          (2.266)         
##                                                                                                              
## as.factor(subregion)Northern Europe            -0.370**          -0.370*** -0.370***          1.016          
##                                                 (0.185)           (0.111)   (0.112)          (1.696)         
##                                                                                                              
## as.factor(subregion)South-Eastern Asia          -0.352*           -0.352*   -0.352*          5.632***        
##                                                 (0.197)           (0.181)   (0.184)          (1.717)         
##                                                                                                              
## as.factor(subregion)South America               -0.332*           -0.332*   -0.332*          4.940***        
##                                                 (0.187)           (0.177)   (0.179)          (1.664)         
##                                                                                                              
## as.factor(subregion)Southern Africa              0.153             0.153     0.153          11.520***        
##                                                 (0.208)           (0.252)   (0.257)          (1.773)         
##                                                                                                              
## as.factor(subregion)Southern Asia                0.210             0.210     0.210           4.130**         
##                                                 (0.213)           (0.556)   (0.564)          (1.816)         
##                                                                                                              
## as.factor(subregion)Southern Europe             -0.016            -0.016    -0.016           4.881***        
##                                                 (0.184)           (0.159)   (0.160)          (1.645)         
##                                                                                                              
## as.factor(subregion)Western Africa             -0.884***         -0.884*** -0.884***          3.291*         
##                                                 (0.199)           (0.228)   (0.230)          (1.743)         
##                                                                                                              
## as.factor(subregion)Western Asia                0.435**           0.435**   0.435**          8.163***        
##                                                 (0.188)           (0.183)   (0.185)          (1.672)         
##                                                                                                              
## as.factor(subregion)Western Europe              -0.373*          -0.373**  -0.373**           -0.462         
##                                                 (0.191)           (0.168)   (0.170)          (1.726)         
##                                                                                                              
## -------------------------------------------------------------------------------------------------------------
## Observations                                     1,109                                         979           
## R2                                               0.650                                        0.657          
## Adjusted R2                                      0.641                                        0.646          
## F Statistic                            87.150*** (df = 23; 1079)                     78.973*** (df = 23; 949)
## =============================================================================================================
## Note:                                                                             *p<0.1; **p<0.05; ***p<0.01

Cultural affinity

Hypothesis

Null Hyp: that there is NO relationship between US PD spending in country j and the cultural affinity between country j and the US

Alt Hyp 1: that the US spends more PD budget in countries with whom it has higher cultural affinity

Alt Hyp 2: that the US spends more PD budget in countries with whom it has lower cultural affinity

Operationalization

Similar religion
Similar legal history
Similar colonial history
“Western” / OECD countries

Regression - Budget Per Capita

culture_model_fe_ind <- plm(log(budget_pc)  ~  agree_with_US_score + cultural_kof + log(dist_to_dc) + percent_christian + as.factor(OECD_member), data = pd_df, index = c("un_code", "year"), model = "within", effect = "individual")

culture_model_fe_time <- plm(log(budget_pc) ~  agree_with_US_score + cultural_kof + log(dist_to_dc) + percent_christian + as.factor(OECD_member), data = pd_df, index = c("un_code", "year"), model = "within", effect = "time")

culture_model_fe_two <- plm(log(budget_pc)  ~  agree_with_US_score + cultural_kof + log(dist_to_dc) + percent_christian + as.factor(OECD_member), data = pd_df, index = c("un_code", "year"), model = "within", effect = "twoways")
 
culture_model_re <- plm(log(budget_pc) ~  agree_with_US_score + cultural_kof + log(dist_to_dc) + percent_christian + as.factor(OECD_member), data = pd_df, index = c("un_code", "year"), model = "random")

stargazer(culture_model_fe_ind, culture_model_fe_time, culture_model_fe_two, culture_model_re,
          column.labels =c("FE Individ", "FE Time", "FE Twoway", "RE Swar"),
          type = "text")

## 
## =======================================================================================================
##                                                       Dependent variable:                              
##                         -------------------------------------------------------------------------------
##                                                         log(budget_pc)                                 
##                               FE Individ                FE Time               FE Twoway        RE Swar 
##                                   (1)                     (2)                    (3)             (4)   
## -------------------------------------------------------------------------------------------------------
## agree_with_US_score            -1.676***               1.231***               -1.294***       -1.490***
##                                 (0.222)                 (0.355)                (0.404)         (0.218) 
##                                                                                                        
## cultural_kof                    -0.004                 0.019***                 0.001         0.020*** 
##                                 (0.010)                 (0.002)                (0.007)         (0.004) 
##                                                                                                        
## log(dist_to_dc)                                         -0.074                                 -0.089  
##                                                         (0.073)                                (0.170) 
##                                                                                                        
## percent_christian              0.152***                  0.001                  0.007           0.003  
##                                 (0.054)                 (0.001)                (0.042)         (0.002) 
##                                                                                                        
## as.factor(OECD_member)1         -0.441                 -0.985***                0.084          -0.323  
##                                 (0.448)                 (0.123)                (0.337)         (0.216) 
##                                                                                                        
## Constant                                                                                       -1.746  
##                                                                                                (1.600) 
##                                                                                                        
## -------------------------------------------------------------------------------------------------------
## Observations                      954                     954                    954             954   
## R2                               0.078                   0.155                  0.013           0.068  
## Adjusted R2                     -0.111                   0.146                 -0.197           0.063  
## F Statistic             16.668*** (df = 4; 791) 34.699*** (df = 5; 943) 2.617** (df = 4; 786) 69.495***
## =======================================================================================================
## Note:                                                                       *p<0.1; **p<0.05; ***p<0.01

Regression - Total Budget

culture_model_fe_ind <- plm(log(budget)  ~  agree_with_US_score + cultural_kof + log(dist_to_dc) + percent_christian + as.factor(OECD_member), data = pd_df, index = c("un_code", "year"), model = "within", effect = "individual")

culture_model_fe_time <- plm(log(budget) ~  agree_with_US_score + cultural_kof + log(dist_to_dc) + percent_christian + as.factor(OECD_member), data = pd_df, index = c("un_code", "year"), model = "within", effect = "time")

culture_model_fe_two <- plm(log(budget)  ~  agree_with_US_score + cultural_kof + log(dist_to_dc) + percent_christian + as.factor(OECD_member), data = pd_df, index = c("un_code", "year"), model = "within", effect = "twoways")
 
culture_model_re <- plm(log(budget) ~  agree_with_US_score + cultural_kof + log(dist_to_dc) + percent_christian + as.factor(OECD_member), data = pd_df, index = c("un_code", "year"), model = "random")

stargazer(culture_model_fe_ind, culture_model_fe_time, culture_model_fe_two, culture_model_re, 
          column.labels =c("FE Individ", "FE Time", "FE Twoway", "RE Swar"),
          type = "text")

## 
## =======================================================================================================
##                                                       Dependent variable:                              
##                         -------------------------------------------------------------------------------
##                                                           log(budget)                                  
##                               FE Individ                FE Time               FE Twoway        RE Swar 
##                                   (1)                     (2)                    (3)             (4)   
## -------------------------------------------------------------------------------------------------------
## agree_with_US_score            -1.612***                -0.357                -1.216***       -1.529***
##                                 (0.214)                 (0.362)                (0.396)         (0.209) 
##                                                                                                        
## cultural_kof                    -0.005                 0.008***                -0.003          0.009** 
##                                 (0.009)                 (0.002)                (0.007)         (0.004) 
##                                                                                                        
## log(dist_to_dc)                                        0.260***                                 0.242  
##                                                         (0.075)                                (0.174) 
##                                                                                                        
## percent_christian              0.152***                -0.004***                0.016          -0.003  
##                                 (0.052)                 (0.001)                (0.041)         (0.002) 
##                                                                                                        
## as.factor(OECD_member)1         -0.467                   0.168                  0.007           0.308  
##                                 (0.432)                 (0.125)                (0.332)         (0.216) 
##                                                                                                        
## Constant                                                                                      12.250***
##                                                                                                (1.633) 
##                                                                                                        
## -------------------------------------------------------------------------------------------------------
## Observations                      960                     960                    960             960   
## R2                               0.078                   0.060                  0.012           0.060  
## Adjusted R2                     -0.111                   0.050                 -0.198           0.056  
## F Statistic             16.869*** (df = 4; 796) 12.099*** (df = 5; 949) 2.453** (df = 4; 791) 61.426***
## =======================================================================================================
## Note:                                                                       *p<0.1; **p<0.05; ***p<0.01

Choosing the correct Model for Panel Data

Testing which panel model is better (random / fixed)

First run the Hausman test.

If the p is smaller than 0.05, use fixed effects

plm::phtest(culture_model_re,  culture_model_fe_ind)

## 
##  Hausman Test
## 
## data:  log(budget) ~ agree_with_US_score + cultural_kof + log(dist_to_dc) +  ...
## chisq = 17.094, df = 4, p-value = 0.001853
## alternative hypothesis: one model is inconsistent

The p value is under 0.05 so we can focus on the fixed effects model

Testing which effects are better (individual / time / twoways)

Run the Lagrange Multiplier Test for time effects

Breusch-Pagan type for unbalanced data

If the p is smaller than 0.05, we use time-fixed effects (null is NO time fixed effects)

plmtest(culture_model_fe_ind, c("time"), type=("bp"))

## 
##  Lagrange Multiplier Test - time effects (Breusch-Pagan) for balanced
##  panels
## 
## data:  log(budget) ~ agree_with_US_score + cultural_kof + log(dist_to_dc) +  ...
## chisq = 518.05, df = 1, p-value < 2.2e-16
## alternative hypothesis: significant effects

# NB alternatives to Breusch Pagan ("bp")
# honda
# ghm
# kw

pFtest(culture_model_fe_ind, culture_model_fe_time)

## 
##  F test for individual effects
## 
## data:  log(budget) ~ agree_with_US_score + cultural_kof + log(dist_to_dc) +  ...
## F = 19.031, df1 = 153, df2 = 796, p-value < 2.2e-16
## alternative hypothesis: significant effects

# If p is smaller than 0.05, use time-fixed effects (null is NO time fixed effects)

phtest(culture_model_re, culture_model_fe_ind)

## 
##  Hausman Test
## 
## data:  log(budget) ~ agree_with_US_score + cultural_kof + log(dist_to_dc) +  ...
## chisq = 17.094, df = 4, p-value = 0.001853
## alternative hypothesis: one model is inconsistent

Economic motivations and Public Diplomacy Spending

Hypothesis

Null: that there is relationship between US PD spending in country j and the levels of economic activity between country j and the US

Alt 1: that the US spends more PD budget in countries with whom it has more economic interests

Alt 2: that the US spends more PD budget in countries with whom it has fewer economic interests

Operationalization

Distance to DC
Level of trade (imports / exports) with the US
Trade openness (total imports and total exports divided by total GDP)

pd_df %<>% 
   mutate(budget_pc = budget / pop) %>% 
   mutate(energy_imp_pc = energy_imports / pop) %>% 
   mutate(agri_imp_pc = agri_imports / pop) %>% 
   mutate(services_imp_pc = services_imports / pop) %>% 
   mutate(manufac_imp_pc = manufac_imports / pop) %>% 
   
   mutate(energy_exp_pc = energy_exports / pop) %>% 
   mutate(agri_exp_pc = agri_exports / pop) %>% 
   mutate(services_exp_pc = services_exports / pop) %>% 
   mutate(manufac_exp_pc = manufac_exports / pop)

Scatterplot

Is there a a relationship between PD budget per capita and and energy imports per capita to the US per capita?

pd_df %>% 
  filter(year < 2017) -> pd_trade

pd_trade %>% 
  # filter(year == 2013) %>% 
  filter(!is.na(income_group_name)) %>% 
  filter(income_group_name != "NULL") %>% 
  mutate(income_group_name = as.factor(income_group_name)) %>% 
  ggplot(aes(x = log(budget_pc), y = log(energy_imp_pc), group = as.factor(year))) +
  geom_point(aes(color = as.factor(year))) +
  geom_smooth(method = "lm", se = FALSE, aes(color = as.factor(year))) +
  facet_wrap(~region_name)

Is there a a relationship between PD budget per capita and and manufacturing exports per capita to the US per capita?

pd_df %>% 
  filter(year < 2017) -> pd_trade

pd_trade %>% 
  # filter(year == 2013) %>% 
  filter(!is.na(income_group_name)) %>% 
  filter(income_group_name != "NULL") %>% 
  ggplot(aes(x = log(budget_pc), y = log(manufac_exp_pc), group = as.factor(year))) +
  geom_point(aes(color = as.factor(year))) +
  geom_smooth(method = "lm", se = FALSE, aes(color = as.factor(year))) +
  facet_wrap(~income_group_name)

# pd_df %>% 
#   filter(pop > 1000000) -> pd_small
# 
# econ_model_fe_ind <- plm(log(budget_pc) ~  log(trade_openess) + log(GDP_per_cap_constant_2010) + log(pop),
#                          data = pd_small, index = c("un_code", "year"), model = "within", effect = "individual")
# 
# econ_model_fe_time <- plm(log(budget_pc) ~   log(trade_openess) + log(GDP_per_cap_constant_2010) + log(pop), 
#                           data = pd_small, index = c("un_code", "year"), model = "within", effect = "time")
# 
# econ_model_fe_two <- plm(log(budget_pc) ~   log(trade_openess) + log(GDP_per_cap_constant_2010) + log(pop),
#                          data = pd_small, index = c("un_code", "year"), model = "within", effect = "twoways")
#  
# econ_model_re <- plm(log(budget_pc) ~   log(trade_openess) + log(GDP_per_cap_constant_2010) + log(pop), 
#                      data = pd_small, index = c("un_code", "year"), model = "random")
#                      
# stargazer(econ_model_fe_ind, econ_model_fe_time, econ_model_fe_two, econ_model_re, 
#           column.labels =c("FE Individ", "FE Time", "FE Twoway", "RE Swar"),
#           type = "text")
# 
# 
# plm::phtest(econ_model_re,  econ_model_fe_ind)
# 
# plmtest(econ_model_fe_ind, c("time"), type=("bp"))

# NB alternatives to Breusch Pagan ("bp")
# honda
# ghm
# kw

# pFtest(econ_model_fe_ind, econ_model_fe_time)
# If p is smaller than 0.05, use time-fixed effects (null is NO time fixed effects)

The following regressions are from 2013 to 2016

Economic relations

# transform(INCOME = factor(income_group_name ,levels = c("Low Income Country","Lower Middle Income Country","Upper Middle Income Country", "High Income Country", "NULL")), 

# log(energy_exp_pc + 1) + log(agri_exp_pc + 1) + log(manufac_exp_pc + 1) + log(energy_imp_pc + 1) + log(agri_imp_pc + 1) + log(manufac_imp_pc + 1)

econ_model_fe_ind <- plm(log(budget_pc) ~  log(fdi_in_per_cap + 1) + finance_kof + log(energy_imports + 1) + log(manufac_exports + 1), 
                         data = pd_trade, index = c("un_code", "year"), model = "within", effect = "individual")

econ_model_fe_time <- plm(log(budget_pc) ~  log(fdi_in_per_cap + 1) + finance_kof + log(energy_imports + 1) + log(manufac_exports + 1),  
                          data = pd_trade, index = c("un_code", "year"), model = "within", effect = "time")

econ_model_fe_two <- plm(log(budget_pc) ~  log(fdi_in_per_cap + 1) + finance_kof + log(energy_imports + 1) + log(manufac_exports + 1), 
                         data = pd_trade, index = c("un_code", "year"), model = "within", effect = "twoways")
 
econ_model_re <- plm(log(budget_pc) ~  log(fdi_in_per_cap + 1) + finance_kof + log(energy_imports + 1) + log(manufac_exports + 1), 
                     data = pd_trade, index = c("un_code", "year"), model = "random")
                     
stargazer(econ_model_fe_ind, econ_model_fe_time, econ_model_fe_two, econ_model_re, 
          column.labels =c("FE Individ", "FE Time", "FE Twoway", "RE Swar"),
          type = "text")

## 
## ======================================================================================================
##                                                       Dependent variable:                             
##                          -----------------------------------------------------------------------------
##                                                         log(budget_pc)                                
##                                FE Individ                FE Time              FE Twoway       RE Swar 
##                                    (1)                     (2)                   (3)            (4)   
## ------------------------------------------------------------------------------------------------------
## log(fdi_in_per_cap + 1)          0.090*                   0.007                 0.034          0.070  
##                                  (0.052)                 (0.042)               (0.035)        (0.047) 
##                                                                                                       
## finance_kof                     -0.056***               0.034***               -0.009        0.017*** 
##                                  (0.012)                 (0.003)               (0.008)        (0.005) 
##                                                                                                       
## log(energy_imports + 1)          0.069**                 -0.013                 0.027          0.003  
##                                  (0.034)                 (0.016)               (0.023)        (0.024) 
##                                                                                                       
## log(manufac_exports + 1)        0.270***                -0.206***               0.029        -0.117***
##                                  (0.075)                 (0.021)               (0.054)        (0.035) 
##                                                                                                       
## Constant                                                                                     -2.102***
##                                                                                               (0.277) 
##                                                                                                       
## ------------------------------------------------------------------------------------------------------
## Observations                       624                     624                   624            624   
## R2                                0.093                   0.298                 0.009          0.035  
## Adjusted R2                      -0.239                   0.290                -0.363          0.028  
## F Statistic              11.738*** (df = 4; 456) 65.288*** (df = 4; 616) 0.996 (df = 4; 453) 26.636***
## ======================================================================================================
## Note:                                                                      *p<0.1; **p<0.05; ***p<0.01

Choosing the correct Model for Panel Data

Testing which panel model is better (random / fixed)

First run the Hausman test.

If the p is smaller than 0.05, use fixed effects

plm::phtest(econ_model_re,  econ_model_fe_ind)

## 
##  Hausman Test
## 
## data:  log(budget_pc) ~ log(fdi_in_per_cap + 1) + finance_kof + log(energy_imports +  ...
## chisq = 101.03, df = 4, p-value < 2.2e-16
## alternative hypothesis: one model is inconsistent

The p value is under 0.05 so we can focus on the fixed effects model

Testing which effects are better (individual / time / twoways)

Run the Lagrange Multiplier Test for time effects

Breusch-Pagan type for unbalanced data

If the p is smaller than 0.05, we use time-fixed effects (null is NO time fixed effects)

plmtest(econ_model_fe_ind, c("time"), type=("bp"))

## 
##  Lagrange Multiplier Test - time effects (Breusch-Pagan) for unbalanced
##  panels
## 
## data:  log(budget_pc) ~ log(fdi_in_per_cap + 1) + finance_kof + log(energy_imports +  ...
## chisq = 1596, df = 1, p-value < 2.2e-16
## alternative hypothesis: significant effects

# NB alternatives to Breusch Pagan ("bp")
# honda for balanced data
# kw

pFtest(culture_model_fe_ind, culture_model_fe_time)

## 
##  F test for individual effects
## 
## data:  log(budget) ~ agree_with_US_score + cultural_kof + log(dist_to_dc) +  ...
## F = 19.031, df1 = 153, df2 = 796, p-value < 2.2e-16
## alternative hypothesis: significant effects

Compare to Overseas Development Aid

Dependent variable : ODA per capita received

Independent variables : * FDI inflows per capita * Financial globalization

oda_model_fe_ind <- plm(log(oda_per_cap_current + 1) ~  log(fdi_in_per_cap + 1) + finance_kof, data = pd_trade, index = c("un_code", "year"), model = "within", effect = "twoways")

oda_model_fe_time <- plm(log(oda_per_cap_current + 1) ~ log(fdi_in_per_cap  + 1) + finance_kof, data = pd_trade, index = c("un_code", "year"), model = "within", effect = "time")

oda_model_fe_two <- plm(log(oda_per_cap_current + 1) ~  log(fdi_in_per_cap  + 1) + finance_kof, data = pd_trade, index = c("un_code", "year"), model = "within", effect = "twoways")
 
oda_model_re <- plm(log(oda_per_cap_current + 1) ~  log(fdi_in_per_cap  + 1) + finance_kof, data = pd_trade, index = c("un_code", "year"), model = "random")

stargazer(econ_model_fe_ind, econ_model_fe_time, econ_model_fe_two, econ_model_re,
          oda_model_fe_ind, oda_model_fe_time, oda_model_fe_two, oda_model_re, type = "text")

## 
## ==============================================================================================================================================================================
##                                                                                           Dependent variable:                                                                 
##                          -----------------------------------------------------------------------------------------------------------------------------------------------------
##                                                         log(budget_pc)                                                      log(oda_per_cap_current + 1)                      
##                                    (1)                     (2)                   (3)            (4)            (5)                  (6)                   (7)           (8)   
## ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
## log(fdi_in_per_cap + 1)          0.090*                   0.007                 0.034          0.070          0.001               0.279***               0.001         0.034  
##                                  (0.052)                 (0.042)               (0.035)        (0.047)        (0.033)              (0.079)               (0.033)       (0.033) 
##                                                                                                                                                                               
## finance_kof                     -0.056***               0.034***               -0.009        0.017***         0.003                0.005                 0.003         0.004  
##                                  (0.012)                 (0.003)               (0.008)        (0.005)        (0.007)              (0.005)               (0.007)       (0.006) 
##                                                                                                                                                                               
## log(energy_imports + 1)          0.069**                 -0.013                 0.027          0.003                                                                          
##                                  (0.034)                 (0.016)               (0.023)        (0.024)                                                                         
##                                                                                                                                                                               
## log(manufac_exports + 1)        0.270***                -0.206***               0.029        -0.117***                                                                        
##                                  (0.075)                 (0.021)               (0.054)        (0.035)                                                                         
##                                                                                                                                                                               
## Constant                                                                                     -2.102***                                                                3.392***
##                                                                                               (0.277)                                                                 (0.311) 
##                                                                                                                                                                               
## ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
## Observations                       624                     624                   624            624            448                  448                   448           448   
## R2                                0.093                   0.298                 0.009          0.035          0.001                0.037                 0.001         0.009  
## Adjusted R2                      -0.239                   0.290                -0.363          0.028         -0.370                0.026                -0.370         0.005  
## F Statistic              11.738*** (df = 4; 456) 65.288*** (df = 4; 616) 0.996 (df = 4; 453) 26.636*** 0.088 (df = 2; 326) 8.441*** (df = 2; 442) 0.088 (df = 2; 326)  1.904  
## ==============================================================================================================================================================================
## Note:                                                                                                                                              *p<0.1; **p<0.05; ***p<0.01

pd_df %>% 
  # filter(year == 2015) %>% 
  ggplot(aes(x = cso_repress, y = log(budget + 1))) +
  geom_smooth(method = "lm") + 
  geom_text(aes(label = country.x, color = income_group_name), size = 2) +
  facet_wrap(~income_group_name, scales = "free")

China

pd_df %<>% 
  mutate(is_Africa = ifelse(continent.x == "Africa", 1, 0))

china_influence <- plm(log(budget_pc + 1) ~ common_relig_china +
                        as.factor(rta_china) + 
                        log(tradeflow_with_china)*as.factor(is_Africa) +
                        # log(import_to_china_cow) + 
                        # log(export_from_china_cow) + 
                        int_org_with_china,  
                        # log(dist_to_beijing),
                        # as.factor(contiguity_china), 
                      data = pd_df, 
                      index = c("un_code", "year"), 
                      model = "random")


stargazer(china_influence, type = "text")

## 
## ===========================================================================
##                                                     Dependent variable:    
##                                                 ---------------------------
##                                                     log(budget_pc + 1)     
## ---------------------------------------------------------------------------
## common_relig_china                                         1.688           
##                                                           (3.594)          
##                                                                            
## as.factor(rta_china)1                                     -0.128*          
##                                                           (0.077)          
##                                                                            
## log(tradeflow_with_china)                                -0.060***         
##                                                           (0.014)          
##                                                                            
## as.factor(is_Africa)1                                    -0.708***         
##                                                           (0.254)          
##                                                                            
## int_org_with_china                                         0.005           
##                                                           (0.004)          
##                                                                            
## log(tradeflow_with_china):as.factor(is_Africa)1           0.042**          
##                                                           (0.020)          
##                                                                            
## Constant                                                 0.956***          
##                                                           (0.170)          
##                                                                            
## ---------------------------------------------------------------------------
## Observations                                                194            
## R2                                                         0.140           
## Adjusted R2                                                0.112           
## F Statistic                                              30.436***         
## ===========================================================================
## Note:                                           *p<0.1; **p<0.05; ***p<0.01

Russia

pd_df %<>% 
  mutate(is_Africa = ifelse(continent.x == "Africa", 1, 0))

russia_influence <- plm(log(budget_pc + 1) ~ 
                         common_relig_russia +
                          log(dist_to_moscow) +
                         # as.factor(rta_russia) + 
                         as.factor(common_ethn_lang_russia) +
                         as.factor(colonial_relationship_russia) +
                         log(tradeflow_with_russia) + 
                         log(int_org_with_russia),  
                        # log(import_to_china_cow) + 
                        # log(export_from_china_cow) + 
                        # log(dist_to_beijing),
                        # as.factor(contiguity_china), 
                      data = pd_df, 
                      index = c("un_code", "year"), 
                      model = "within", effect = "time")


stargazer(russia_influence, type = "text")

## 
## ====================================================================
##                                              Dependent variable:    
##                                          ---------------------------
##                                              log(budget_pc + 1)     
## --------------------------------------------------------------------
## common_relig_russia                               -1.189**          
##                                                    (0.572)          
##                                                                     
## log(dist_to_moscow)                               -0.127***         
##                                                    (0.030)          
##                                                                     
## as.factor(common_ethn_lang_russia)1                 0.124           
##                                                    (0.095)          
##                                                                     
## as.factor(colonial_relationship_russia)1            0.016           
##                                                    (0.185)          
##                                                                     
## log(tradeflow_with_russia)                        -0.016**          
##                                                    (0.007)          
##                                                                     
## log(int_org_with_russia)                           -0.215           
##                                                    (0.137)          
##                                                                     
## --------------------------------------------------------------------
## Observations                                         191            
## R2                                                  0.159           
## Adjusted R2                                         0.127           
## F Statistic                                5.755*** (df = 6; 183)   
## ====================================================================
## Note:                                    *p<0.1; **p<0.05; ***p<0.01

Military Interests

Scatterplot to look at the relationship between total budget and number of US military bases in each region across the years

pd_df %>% 
  filter(year != 2013) %>%
  filter(country.x != 'Syria') %>% 
  # filter(all_bases != 0) %>% 
  ggplot(aes(x = log(budget), y = log(all_bases))) +
  geom_point(aes(color = bureau), alpha = 0.4) +
  geom_smooth(method = "lm", se = FALSE, aes(color = bureau)) +
  # geom_text(aes(label = country.x, color = bureau)) +
  facet_wrap(~year) + bbplot::bbc_style()

Look at number of US military bases in the country and budget PER CAPITA

pd_df %>% 
  filter(year != 2013) %>%
  filter(country.x != 'Syria') %>% 
  # filter(all_bases != 0) %>% 
  ggplot(aes(x = log(budget_pc), y = log(all_bases))) +
  geom_point(aes(color = bureau), alpha = 0.4) +
  geom_smooth(method = "lm", se = FALSE, aes(color = bureau)) +
  # geom_text(aes(label = country.x, color = bureau)) +
  facet_wrap(~year) + bbplot::bbc_style()

They look very similar, so I look at 2014 more closely

We can look only at countries with bases (remove all countries from the scatterplot with no military bases) in 2014

pd_df %>% 
  filter(country.x != 'Syria') %>% 
  filter(year == 2014) %>% 
  filter(all_bases != 0) %>% 
  ggplot(aes(x = log(budget), y = log(all_bases))) +
  # geom_point(aes(color = bureau), alpha = 0.4) +
  geom_smooth(method = "lm", se = FALSE, aes(color = bureau), alpha = 0.2) +
  geom_text(aes(label = country.x, color = bureau)) +
  facet_wrap(~year)

Regressions - Budget per capita

pd_df %<>% 
  distinct(un_code, year, .keep_all = TRUE)

military_model_time <- plm(log(budget) ~ log(military_exp + 1) + all_bases*as.factor(al_qaeda_main) + log(troops + 1) , 
                      data = pd_df, 
                      index = c("un_code", "year"), 
                      model = "within", effect = "time")

military_model<- plm(log(budget) ~ log(military_exp + 1) + all_bases*as.factor(al_qaeda_main) + log(troops + 1)
                     , 
                      data = pd_df, 
                      index = c("un_code", "year"), 
                      model = "within")

military_model_two <- plm(log(budget) ~log(military_exp + 1) + all_bases*as.factor(al_qaeda_main) + log(troops + 1)
                          , 
                      data = pd_df, 
                      index = c("un_code", "year"), 
                      model = "within", effect = "twoway")

military_model_re <- plm(log(budget) ~ log(military_exp + 1) + all_bases*as.factor(al_qaeda_main) + log(troops + 1)
                         ,
                      data = pd_df, 
                      index = c("un_code", "year"), 
                      model = "random")

stargazer(military_model, military_model_time, military_model_two, military_model_re,
          type = "text")

## 
## ================================================================================================================
##                                                                 Dependent variable:                             
##                                     ----------------------------------------------------------------------------
##                                                                     log(budget)                                 
##                                              (1)                    (2)                    (3)            (4)   
## ----------------------------------------------------------------------------------------------------------------
## log(military_exp + 1)                      -0.117*                0.078***               -0.084        0.095*** 
##                                            (0.067)                (0.009)                (0.069)        (0.018) 
##                                                                                                                 
## all_bases                                                          -0.003                               0.032** 
##                                                                   (0.007)                               (0.013) 
##                                                                                                                 
## as.factor(al_qaeda_main)1                                         3.535***                               2.033  
##                                                                   (1.267)                               (2.868) 
##                                                                                                                 
## log(troops + 1)                           -0.041**                0.156***               -0.029          0.007  
##                                            (0.021)                (0.017)                (0.023)        (0.019) 
##                                                                                                                 
## all_bases:as.factor(al_qaeda_main)1                                -0.102                                0.143  
##                                                                   (0.167)                               (0.375) 
##                                                                                                                 
## Constant                                                                                               12.079***
##                                                                                                         (0.353) 
##                                                                                                                 
## ----------------------------------------------------------------------------------------------------------------
## Observations                                 888                    888                    888            888   
## R2                                          0.010                  0.377                  0.004          0.333  
## Adjusted R2                                -0.200                  0.370                 -0.215          0.329  
## F Statistic                         3.673** (df = 2; 732) 106.313*** (df = 5; 877) 1.447 (df = 2; 727) 80.097***
## ================================================================================================================
## Note:                                                                                *p<0.1; **p<0.05; ***p<0.01

plm::phtest(military_model_re, military_model)

## 
##  Hausman Test
## 
## data:  log(budget) ~ log(military_exp + 1) + all_bases * as.factor(al_qaeda_main) +  ...
## chisq = 39.146, df = 2, p-value = 3.159e-09
## alternative hypothesis: one model is inconsistent

pFtest( military_model, military_model_time)

## 
##  F test for individual effects
## 
## data:  log(budget) ~ log(military_exp + 1) + all_bases * as.factor(al_qaeda_main) +  ...
## F = 23.957, df1 = 145, df2 = 732, p-value < 2.2e-16
## alternative hypothesis: significant effects

plmtest(military_model_two, c("ind"), type=("bp"))

## 
##  Lagrange Multiplier Test - (Breusch-Pagan) for unbalanced panels
## 
## data:  log(budget) ~ log(military_exp + 1) + all_bases * as.factor(al_qaeda_main) +  ...
## chisq = 1226.2, df = 1, p-value < 2.2e-16
## alternative hypothesis: significant effects

Information and media censorship

Looking at the role of information globalization and level of government censorship of the media.

Do we see a negative relatiosni

censor_model_time <- plm(log(budget_pc) ~ gov_censor_media + info_kof,
                      data = pd_df, 
                      index = c("un_code", "year"), 
                      model = "within", effect = "time")

censor_model_ind <- plm(log(budget_pc) ~ gov_censor_media + info_kof,
                      data = pd_df, 
                      index = c("un_code", "year"), 
                      model = "within", effect = "individual")

censor_model_two <- plm(log(budget_pc) ~ gov_censor_media + info_kof,
                      data = pd_df, 
                      index = c("un_code", "year"), 
                      model = "within", effect = "twoways")

censor_model_swar <- plm(log(budget_pc) ~ gov_censor_media + info_kof,
                      data = pd_df, 
                      index = c("un_code", "year"), 
                      model = "random")

censor_model_wal <- plm(log(budget_pc) ~ gov_censor_media + info_kof,
                      data = pd_df, 
                      index = c("un_code", "year"), 
                      model = "random",
                      random.method = "walhus")

censor_model_ner <- plm(log(budget_pc) ~ gov_censor_media + info_kof,
                      data = pd_df, 
                      index = c("un_code", "year"), 
                      model = "random",
                      random.method = "nerlove")

censor_model_amemiya <- plm(log(budget_pc) ~ gov_censor_media + info_kof,
                      data = pd_df, 
                      index = c("un_code", "year"), 
                      model = "random",
                      random.method = "amemiya")

stargazer(censor_model_time, censor_model_ind,censor_model_two, censor_model_swar,
          column.labels = c("Time FE", "Individual FE", "Twoway FE","Swamy RE"), type = "text")

## 
## ==============================================================================================
##                                               Dependent variable:                             
##                  -----------------------------------------------------------------------------
##                                                 log(budget_pc)                                
##                          Time FE              Individual FE           Twoway FE      Swamy RE 
##                            (1)                     (2)                   (3)            (4)   
## ----------------------------------------------------------------------------------------------
## gov_censor_media          0.002                 0.213***               0.073**       0.159*** 
##                          (0.025)                 (0.049)               (0.037)        (0.039) 
##                                                                                               
## info_kof                0.024***                -0.056***               0.001          0.002  
##                          (0.003)                 (0.010)               (0.008)        (0.005) 
##                                                                                               
## Constant                                                                             -2.168***
##                                                                                       (0.368) 
##                                                                                               
## ----------------------------------------------------------------------------------------------
## Observations               959                     959                   959            959   
## R2                        0.103                   0.056                 0.005          0.020  
## Adjusted R2               0.097                  -0.135                -0.204          0.018  
## F Statistic      54.717*** (df = 2; 951) 23.488*** (df = 2; 797) 1.988 (df = 2; 792) 19.356***
## ==============================================================================================
## Note:                                                              *p<0.1; **p<0.05; ***p<0.01

phtest(censor_model_swar, censor_model_ind)

## 
##  Hausman Test
## 
## data:  log(budget_pc) ~ gov_censor_media + info_kof
## chisq = 48.81, df = 2, p-value = 2.518e-11
## alternative hypothesis: one model is inconsistent

pFtest(censor_model_ind, censor_model_time)

## 
##  F test for individual effects
## 
## data:  log(budget_pc) ~ gov_censor_media + info_kof
## F = 17.346, df1 = 154, df2 = 797, p-value < 2.2e-16
## alternative hypothesis: significant effects

plm::pooltest(log(budget_pc) ~ gov_censor_media + info_kof,
                      data = pd_df, 
                      index = c("un_code", "year"), 
                      model = "within")

## 
##  F statistic
## 
## data:  log(budget_pc) ~ gov_censor_media + info_kof
## F = 1.4723, df1 = 318, df2 = 479, p-value = 6.571e-05
## alternative hypothesis: unstability

pFtest(log(budget_pc) ~ gov_censor_media + info_kof,
                      data = pd_df, 
                      index = c("un_code", "year"), 
                      model = "within", effect = "time", type = "ghm")

## 
##  F test for time effects
## 
## data:  log(budget_pc) ~ gov_censor_media + info_kof
## F = 23.937, df1 = 5, df2 = 951, p-value < 2.2e-16
## alternative hypothesis: significant effects

## PUBLIC DIPLOMACY 
# pd_df %>% 
#   filter(year == 2014) %>% 
#   # filter(!is.na(region_name)) %>% 
#   filter(income_group_name != "NULL") %>% 
#   mutate(mil_aid_pc = sum_mil_aid / pop) %>%
#   mutate(mil_exp_pc = military_exp / pop) %>% 
#   ggplot(aes(x = log(budget_pc), y = trade_kof_dj)) + geom_point(aes(color = income_group_name)) + geom_smooth(method = "lm") + facet_wrap(~region_name)

pd_df %>% 
    filter(year != 2019) %>%
    filter(income_group_name != "NULL") %>% 
  filter(!is.na(income_group_name)) %>% 
  ggplot(aes(y = log(budget_pc), x = trade_kof_dj)) + geom_point(aes(color = income_group_name)) + geom_smooth(method = "lm") + facet_wrap(~year)

## ODA 


pd_df %>% 
  filter(year == 2016) %>% 
  ggplot(aes(x = log(oda_per_cap_current), y = trade_kof_dj)) + geom_point(aes(color = region_name)) + geom_smooth(method = "lm") + facet_wrap(~bureau)

Distance to DC and budget PER CAPITA (logged)

pd_df %>% 
  # filter(year == 2014) %>%
  # filter(region_name == "Sub-Saharan Africa") %>% 
  filter(!is.na(region_name)) %>% 
  # filter(continent.x != "Pacific") %>% 
  ggplot(aes(x = log(budget_pc), y = log(dist_to_dc))) + 
  geom_point(aes(color = region_name)) + 
  geom_smooth(method = "lm") + 
  facet_wrap(~year) +
  bbplot::bbc_style() + 
  theme(legend.position = "bottom",
        legend.text = element_text(size = 10))

Distance to Beijing and budget PER CAPITA (logged)

pd_df %>% 
  # filter(year == 2014) %>%
  # filter(region_name == "Sub-Saharan Africa") %>% 
  filter(!is.na(region_name)) %>% 
  # filter(continent.x != "Pacific") %>% 
  ggplot(aes(x = log(budget_pc), y = log(dist_to_beijing))) + 
  geom_point(aes(color = region_name)) + 
  geom_smooth(method = "lm") +
  facet_wrap(~year) +
  bbplot::bbc_style() + 
  theme(legend.position = "bottom",
        legend.text = element_text(size = 10))

Distance to Russia and budget PER CAPITA (logged)

pd_df %>% 
  filter(year == 2014) %>%
  # filter(region_name == "Sub-Saharan Africa") %>% 
  filter(!is.na(region_name)) %>% 
  # filter(continent.x != "Pacific") %>% 
  ggplot(aes(x = log(budget_pc), y = log(dist_to_dc))) + geom_point(aes(color = region_name)) + geom_smooth(method = "lm") + facet_wrap(~bureau) +
  bbplot::bbc_style() +   
  theme(legend.position = "bottom",
        legend.text = element_text(size = 10))

censor_model_time <- plm(log(budget_pc) ~ rule_of_law,
                      data = pd_df, 
                      index = c("un_code", "year"), 
                      model = "within", effect = "time")

censor_model_ind <- plm(log(budget_pc) ~rule_of_law,
                      data = pd_df, 
                      index = c("un_code", "year"), 
                      model = "within", effect = "individual")

censor_model_two <- plm(log(budget_pc) ~ rule_of_law,
                      data = pd_df, 
                      index = c("un_code", "year"), 
                      model = "within", effect = "twoways")

censor_model_swar <- plm(log(budget_pc) ~ rule_of_law,
                      data = pd_df, 
                      index = c("un_code", "year"), 
                      model = "random")

censor_model_wal <- plm(log(budget_pc) ~  rule_of_law,
                      data = pd_df, 
                      index = c("un_code", "year"), 
                      model = "random",
                      random.method = "walhus")

censor_model_ner <- plm(log(budget_pc) ~ rule_of_law,
                      data = pd_df, 
                      index = c("un_code", "year"), 
                      model = "random",
                      random.method = "nerlove")

censor_model_amemiya <- plm(log(budget_pc) ~ rule_of_law,
                      data = pd_df, 
                      index = c("un_code", "year"), 
                      model = "random",
                      random.method = "amemiya")

stargazer(censor_model_time, censor_model_ind,censor_model_two, censor_model_swar, censor_model_wal, censor_model_ner, censor_model_amemiya,  
                    column.labels = c(  "Time FE", "Individual FE", "Twoway FE","Swamy RE", "Walhus RE", "Nerlove RE","Amemiya RE"),
type = "text") %>%  kbl() %>% kable_paper()

## 
## =======================================================================================================================
##                                                         Dependent variable:                                            
##              ----------------------------------------------------------------------------------------------------------
##                                                            log(budget_pc)                                              
##                      Time FE             Individual FE         Twoway FE      Swamy RE  Walhus RE Nerlove RE Amemiya RE
##                        (1)                    (2)                 (3)            (4)       (5)       (6)        (7)    
## -----------------------------------------------------------------------------------------------------------------------
## rule_of_law          0.967***                0.148               0.390        0.658***  0.659***   0.609***   0.649*** 
##                      (0.104)                (0.329)             (0.243)        (0.205)   (0.204)   (0.216)    (0.207)  
##                                                                                                                        
## Constant                                                                      -2.342*** -2.342*** -2.314***  -2.337*** 
##                                                                                (0.138)   (0.138)   (0.148)    (0.140)  
##                                                                                                                        
## -----------------------------------------------------------------------------------------------------------------------
## Observations          1,118                  1,118               1,118          1,118     1,118     1,118      1,118   
## R2                    0.073                 0.0002               0.003          0.010     0.010     0.008      0.009   
## Adjusted R2           0.067                 -0.167              -0.171          0.009     0.009     0.007      0.008   
## F Statistic  86.895*** (df = 1; 1110) 0.204 (df = 1; 957) 2.580 (df = 1; 951) 10.345*** 10.396***  7.922***   9.845*** 
## =======================================================================================================================
## Note:                                                                                       *p<0.1; **p<0.05; ***p<0.01

x

=======================================================================================================================
Dependent variable:
———————————————————————————————————-
log(budget_pc)
Time FE Individual FE Twoway FE Swamy RE Walhus RE Nerlove RE Amemiya RE
`(2) (3) (4) (5) (6) (7) </td>`
———————————————————————————————————————–
rule_of_law 0.967* 0.148 0.390 0.658* 0.659* 0.609* 0.649***
(0.104) (0.329) (0.243) (0.205) (0.204) (0.216) (0.207)

Constant -2.342* -2.342* -2.314* -2.337*
(0.138) (0.138) (0.148) (0.140)

———————————————————————————————————————–
Observations 1,118 1,118 1,118 1,118 1,118 1,118 1,118
R2 0.073 0.0002 0.003 0.010 0.010 0.008 0.009
Adjusted R2 0.067 -0.167 -0.171 0.009 0.009 0.007 0.008
F Statistic 86.895* (df = 1; 1110) 0.204 (df = 1; 957) 2.580 (df = 1; 951) 10.345* 10.396* 7.922* 9.845***
=======================================================================================================================
Note: p<0.1; p<0.05; p<0.01

Scatter Plot

Is there a correlation between PD budgets per capita and level of financial globalization?

Scatterplot

Is there a correlation between PD budgets per capita and manufacturing exports from the US per capita?

We can break it down by region

pd_df %>% 
  mutate(budget_pc = budget / pop) %>% 
  mutate(energy_imp_pc = energy_imports / pop) %>% 
  mutate(agri_imp_pc = agri_imports / pop) %>% 
  mutate(services_imp_pc = services_imports / pop) %>% 
  mutate(manufac_imp_pc = manufac_imports / pop) %>% 
  
  mutate(energy_exp_pc = energy_exports / pop) %>% 
  mutate(agri_exp_pc = agri_exports / pop) %>% 
  mutate(services_exp_pc = services_exports / pop) %>% 
  mutate(manufac_exp_pc = manufac_exports / pop) %>% 

  filter(year == 2016) %>% 
  
  ggplot(aes(x = log(budget_pc), 
             y = log(cultural_kof_dj ))) +
  geom_point(aes(color = bureau)) +
  facet_wrap(~bureau)

Review

Paula

7/20/2021

Descriptive statistics

Bump Plot - Top ranking countries recieving aid as a proportion of total aid given THAT YEAR

Look at median budget per country per year

Top 10 budget recipient countries summed over the seven years

Bottom 10 budget recipient countries - summed over the seven years

Top 10 budget per capita recipient countries summed over the seven years

Excluding population under 1 million

Bottom 10 budget per capita recipient countries summed over the seven years

Excluding population under 1 million

TOP 10 budget as a percentage of their GDP - summed over the seven years

Excluding population under 1 million

Bottom 10 budget as a percentage of their GDP - summed over the seven years

Excluding population under 1 million

TOP 10 budget as a PROPORTION OF TOTAL PD BUDGET ALLOCATION - summed over the seven years

Excluding population under 1 million

Histogram distribution of total PD budget

Histogram distribution of PD budget PER CAPITA

Looking at Department of State Bureaus across the years.

Looking at income groups across the years.

Mean budgets per income groups

Correlations in Europe

Correlations in Africa

Correlations in Asia

Basic demographics

Hypothesis

Regression - Budget Per Capita

Regression - Total Budget

Choosing the correct Model for Panel Data

Testing which panel model is better (random / fixed)

Testing which effects are better (individual / time / twoways)

Breusch Pagan time effects - Lagrange Mutliplier Test for unbalanced data

F-Test for time fixed effects

Correct for heteroskedasticity

Correcting coefficients with robust standard errors

Cultural affinity

Hypothesis

Operationalization

Regression - Budget Per Capita

Regression - Total Budget

Choosing the correct Model for Panel Data

Testing which panel model is better (random / fixed)

Testing which effects are better (individual / time / twoways)

Economic motivations and Public Diplomacy Spending

Hypothesis

Operationalization

Scatterplot

The following regressions are from 2013 to 2016

Choosing the correct Model for Panel Data

Testing which panel model is better (random / fixed)

Testing which effects are better (individual / time / twoways)

Compare to Overseas Development Aid

China

Russia

Military Interests

Regressions - Budget per capita

Information and media censorship

Distance to DC and budget PER CAPITA (logged)

Distance to Beijing and budget PER CAPITA (logged)

Distance to Russia and budget PER CAPITA (logged)

Scatter Plot

Scatterplot