Data available here: https://github.com/dmorgan26/UN-Votes

Source: Erik Voeten “Data and Analyses of Voting in the UN General Assembly” Routledge Handbook of International Organization, edited by Bob Reinalda (published 2017-06-19)

Analysis of the Agreeableness of Member States at the UN General Assembly

library(readr)
unvotes <- read_csv("C:\\Users\\Dan\\Desktop\\R\\Markdowns\\UN-Votes\\votes.csv", col_names=TRUE)

Variable Names

library(dplyr)
glimpse(unvotes)
## Observations: 1,048,575
## Variables: 4
## $ assembly_session <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...
## $ rc_id            <int> 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, ...
## $ state_code       <int> 2, 20, 31, 40, 41, 42, 51, 52, 53, 54, 55, 56...
## $ vote             <int> 1, 3, 9, 1, 1, 1, 9, 9, 9, 9, 9, 9, 9, 9, 9, ...

Data Cleaning

Removed absences and replaced assembly_session with year. First assembly vote in the data set was in 1946. .

votes_processed <- unvotes %>%
                      filter(vote <= 3) %>%
                      mutate(year = assembly_session + 1945)

votes_processed <- votes_processed[,-1]

glimpse(votes_processed)
## Observations: 722,082
## Variables: 4
## $ rc_id      <int> 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, ...
## $ state_code <int> 2, 20, 40, 41, 42, 70, 90, 91, 92, 93, 94, 95, 100,...
## $ vote       <int> 1, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...
## $ year       <dbl> 1946, 1946, 1946, 1946, 1946, 1946, 1946, 1946, 194...

Replaced state_code with country name

library(countrycode)
votes_processed <- votes_processed %>%
                      mutate(country = countrycode(state_code, "cown", "country.name"))
## Warning in countrycode(state_code, "cown", "country.name"): Some values were not matched unambiguously: 260, 816

The data gave ‘state_code’ as Correlates of War codes

260 = German Federal Republic (commonly known as West Germany). 816 = Republic of Vietnam (commonly known as South Vietnam).

Other relevant codes

255 = Germany
265 = East Germany
817 = Vietnam

code_260 <- votes_processed %>%
    filter(state_code == "260")

range(code_260$year)
## [1] 1973 1989

Dataset contained West German votes from 1973-1989

code_265 <- votes_processed %>%
    filter(state_code == "265")

range(code_265$year)
## [1] 1973 1989

Dataset contained East German votes from 1973-1989

code_255 <- votes_processed %>%
              filter(state_code == "255") 

range(code_255$year)
## [1] 1990 2014

Dataset contained (unified) German votes from 1990-2014

There wasn’t any overlap of the dates so I assigned state_code 260 to West Germany

votes_processed$country[votes_processed$state_code == "260"] <- "West Germany"

code_816 <- votes_processed %>%
    filter(state_code == "816")

range(code_816$year)
## [1] 1977 2014

Dataset contained Vietnamese votes from 1977-2014

code_817 <- votes_processed %>%
              filter(state_code == "817")

dim(code_817)
## [1] 0 5

There was no data for South Vietnam so I assigned state_code 816 to Vietnam

votes_processed$country[votes_processed$state_code == "816"] <- "Vietnam"

sum(is.na(votes_processed$country))
## [1] 0
votes_processed <- votes_processed[,-2]

glimpse(votes_processed)
## Observations: 722,082
## Variables: 4
## $ rc_id   <int> 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, ...
## $ vote    <int> 1, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...
## $ year    <dbl> 1946, 1946, 1946, 1946, 1946, 1946, 1946, 1946, 1946, ...
## $ country <chr> "United States", "Canada", "Cuba", "Haiti", "Dominican...

Proportion of “Yes” votes

percent_yes <- votes_processed %>%
    summarize (total = n(), percent_yes = mean(vote == 1))

percent_yes
## # A tibble: 1 x 2
##    total percent_yes
##    <int>       <dbl>
## 1 722082       0.797

79.9% of all votes on proposed resolutions were for “Yes”

Which member states were the most/least agreeable?

by_country <- votes_processed %>%
  group_by(country) %>%
  summarize(total = n(), percent_yes = mean(vote == 1)) %>%
  arrange(percent_yes)

head(by_country)
## # A tibble: 6 x 3
##   country                          total percent_yes
##   <chr>                            <int>       <dbl>
## 1 Zanzibar                             2       0    
## 2 United States                     5298       0.284
## 3 Palau                              829       0.306
## 4 Israel                            4850       0.348
## 5 West Germany                      2067       0.396
## 6 Micronesia (Federated States of)  1390       0.410

Zanzibar was a suspected outlier with only two votes

library(ggplot2)
library(ggthemes)
ggplot(by_country, aes(x=total)) + geom_histogram(binwidth=100) + 
    theme_fivethirtyeight() + 
    labs(title = "Frequency distribution of total number of votes cast") + 
    theme(axis.title = element_text(), axis.title.x = element_text()) + ylab("Frequency") + xlab("Total Votes Cast")

The data showed left skew so I used a log transformation

ggplot(by_country, aes(x="", y=log(total))) + geom_boxplot() + coord_flip() +
      labs(title="Distribution of total number of votes cast by country") + 
      theme_fivethirtyeight() + 
      theme(axis.title = element_text(), axis.title.x = element_text()) + ylab("Log(Total Votes)") + xlab("")

The box plot showed three clear outliers over 1.5*IQR away from the median.

by_country %>%
    arrange(total)
## # A tibble: 200 x 3
##    country     total percent_yes
##    <chr>       <int>       <dbl>
##  1 Zanzibar        2       0    
##  2 South Sudan   114       0.649
##  3 Kiribati      145       0.876
##  4 Nauru         615       0.603
##  5 Montenegro    619       0.646
##  6 Tuvalu        629       0.838
##  7 Timor-Leste   755       0.968
##  8 Tonga         828       0.731
##  9 Palau         829       0.306
## 10 Switzerland   918       0.659
## # ... with 190 more rows

I filtered for total votes greater than 150 to remove the three outliers

by_country <- by_country %>%
                    filter(total > 150)

by_country %>%
    arrange(percent_yes)
## # A tibble: 197 x 3
##    country                          total percent_yes
##    <chr>                            <int>       <dbl>
##  1 United States                     5298       0.284
##  2 Palau                              829       0.306
##  3 Israel                            4850       0.348
##  4 West Germany                      2067       0.396
##  5 Micronesia (Federated States of)  1390       0.410
##  6 United Kingdom                    5279       0.428
##  7 France                            5232       0.433
##  8 Marshall Islands                  1520       0.482
##  9 Belgium                           5298       0.494
## 10 Canada                            5316       0.512
## # ... with 187 more rows

The United States, Palau and Israel were the least agreeable member states

by_country %>%
    arrange(desc(percent_yes))
## # A tibble: 197 x 3
##    country              total percent_yes
##    <chr>                <int>       <dbl>
##  1 Seychelles            1757       0.978
##  2 Timor-Leste            755       0.968
##  3 São Tomé & Príncipe   2382       0.967
##  4 Cape Verde            3203       0.959
##  5 Djibouti              3253       0.957
##  6 Guinea-Bissau         2986       0.955
##  7 Comoros               2461       0.946
##  8 Mozambique            3366       0.944
##  9 Yemen                 1586       0.943
## 10 United Arab Emirates  3937       0.943
## # ... with 187 more rows

Seychelles, Timor-Leste and Sao Tome & Principe were the most agreeable member states

Agreeableness of the Assembly Over Time

by_year <- votes_processed %>%
  group_by(year) %>%
    summarize(total = n(), percent_yes = mean(vote == 1))
ggplot(by_year, aes(x=year, y=percent_yes)) + geom_line() + 
  scale_y_continuous(breaks = seq(0,1,0.05), labels=scales::percent) + 
  scale_x_discrete(limits = c(1946, seq(1950, 2010, 5), 2014)) +
  labs(title = "% Yes votes at the UN General Assembly by Year", y = "% Yes", x = "Year") +
  theme_fivethirtyeight()

Investigation of the huge dip

which.min(by_year$percent_yes)
## [1] 19
by_year[17:21,]
## # A tibble: 5 x 3
##    year total percent_yes
##   <dbl> <int>       <dbl>
## 1  1962  4642      0.600 
## 2  1963  3308      0.729 
## 3  1964   112      0.0179
## 4  1965  4382      0.708 
## 5  1966  5868      0.611

The data set showed 1964 total percentage of yes votes as 1.7%. The numbers for 1963 and 1965 were as expected at 72.9% and 70.8% respectively, so I assumed that this was an error and removed the year 1964 from the data set.

by_year <- by_year[-19,]

votes_processed <- votes_processed %>%
                      filter(year != "1964")

ggplot(by_year, aes(x=year, y=percent_yes)) + 
  geom_line() + 
  scale_y_continuous(breaks = seq(0,1,0.05), labels=scales::percent) + 
  scale_x_discrete(limits = c(1946, seq(1950, 2010, 5), 2014)) +
  labs(title = "% Yes votes at the UN General Assembly by Year", y = "% Yes", x = "Year") +
  theme_fivethirtyeight()

Trend Visualisation - Least Squares Regression

library(scales)
ggplot(by_year, aes(x=year, y=percent_yes)) + 
  geom_point() + 
  geom_smooth(method="lm") + 
  scale_x_discrete(limits = c(1946, seq(1950, 2010, 5), 2014)) +
  scale_y_continuous(limits = c(0.4, 1), breaks = seq(0.4, 1, 0.05), labels=scales::percent) +  
  labs(title = "Least Squares % Yes votes at the UN General Assembly by Year", y = "% Yes", x = "Year") +
  theme_fivethirtyeight()

Comparison of Countries

countries_line <- c("United Kingdom","United States","India","China")

by_year_country %>%
    filter(country %in% countries_line) %>%
    ggplot(aes(x=year, y=percent_yes, col=country)) + geom_line() +
      scale_x_discrete(limits = c(1946, seq(1950, 2010, 5), 2014)) +
      scale_y_continuous(limits = c(0, 1), breaks = seq(0, 1, 0.1), labels=scales::percent) + 
      scale_color_discrete(name="Country") +
      labs(title = "Vote Yes % By Country", y = "% Yes", x = "Year") +
  theme_fivethirtyeight()

Comparison of too many countries to maintain a de-cluttered line plot

countries_facet <- c("United Kingdom","United States","India","China", "Japan", "France", "Italy", "Egypt","Turkey")

by_year_country %>%
  filter(country %in% countries_facet) %>%
  ggplot(aes(x=year, y=percent_yes)) + geom_line() + facet_wrap(~country) +
      scale_y_continuous(limits = c(0, 1), breaks = seq(0, 1, 0.2), labels=scales::percent) +
      labs(title = "Vote Yes % By Country", y = "% Yes", x = "Year") +
  theme_fivethirtyeight() +
  theme(strip.background = element_rect(fill="lightgrey"))

Modelling the trend in agreeableness for a given country

library(tidyr)
library(purrr)
library(broom)
by_year_country <- ungroup(by_year_country)

country_coefficients <- by_year_country %>%
                        nest(-country) %>%
                    mutate(model = map(data, ~ lm(percent_yes ~ year, data = .)), 
                           tidied = map(model, tidy)) %>%
                        unnest(tidied) %>%
                        filter(term == "year")

Obtained adjusted p-values since I would expect some raw p-values to be < 0.05 by chance when there are 200 models

signif_country_coefficients <- country_coefficients %>%
                                  mutate(p.adjusted = p.adjust(p.value)) %>%
                                  filter(p.adjusted < 0.05)

Which countries showed the largest change in agreeableness over time?

signif_country_coefficients %>%
  select(country, estimate) %>%
  arrange(desc(estimate))
## # A tibble: 108 x 2
##    country                    estimate
##    <chr>                         <dbl>
##  1 Tuvalu                      0.0378 
##  2 German Democratic Republic  0.0193 
##  3 Kyrgyzstan                  0.0119 
##  4 Tajikistan                  0.0116 
##  5 Kazakhstan                  0.0115 
##  6 Yemen Arab Republic         0.0115 
##  7 South Africa                0.0115 
##  8 Malawi                      0.0103 
##  9 Dominican Republic          0.00797
## 10 Mongolia                    0.00775
## # ... with 98 more rows

Tuvalu is the nation whose agreeableness increased the most over their membership. The rate of increase is far larger than any other country so warrants further investigation

tuvalu <- by_year_country %>%
          filter(country == "Tuvalu")

max(tuvalu$year) - min(tuvalu$year)
## [1] 13

Tuvalu had only been voting for 13 years. How many votes did this equate to?

tuvalu %>%
    ungroup() %>%
    mutate(cumsum = cumsum(total)) %>%
    summarize(total_votes = max(cumsum))
## # A tibble: 1 x 1
##   total_votes
##         <dbl>
## 1         629

Tuvalu participated in 629 votes, how did this compare to other nations?

summary(by_country$total)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##     615    2407    4116    3664    4840    5316

Tuvalu is an outlier here in a data set with significant left skew.

If data is normally distributed I could see whether Tuvalu falls within three standard deviations of the mean

Did the total number of votes cast by each nation follow a normal distribution?

library(ggpubr)
ggqqplot(by_country$total, main = "Q-Q Plot of Total Votes Cast") +
  theme_fivethirtyeight() +
  theme(axis.title = element_text()) + ylab('Votes') + xlab('Theoretical Quantiles')

Shapiro-Wilk test

H0 : The data are normally distributed HA : The data are not normally distributed

shapiro.test(by_country$total)
## 
##  Shapiro-Wilk normality test
## 
## data:  by_country$total
## W = 0.88463, p-value = 3.706e-11

A p-value < 0.05 and near zero suggested that the I should reject the null hypothesis that the data are normally distributed. Visual inspection of the Q-Q plot also suggested that the distribution is not normal.

560 votes seems like a large enough sample so I will keep Tuvalu in the data set.

Consequently, Tuvalu is the member state whose agreeableness increased the most.

Whose agreeableness declined the most over time?

signif_country_coefficients %>%
  arrange(estimate)
## # A tibble: 108 x 7
##    country              term  estimate std.error statis~  p.value p.adjus~
##    <chr>                <chr>    <dbl>     <dbl>   <dbl>    <dbl>    <dbl>
##  1 Bosnia & Herzegovina year  -0.0131   0.00264   - 4.96 7.53e- 5 7.69e- 3
##  2 Vanuatu              year  -0.0111   0.00228   - 4.87 3.09e- 5 3.30e- 3
##  3 South Korea          year  -0.00797  0.000947  - 8.42 2.52e- 8 3.67e- 6
##  4 United States        year  -0.00692  0.000663  -10.4  1.31e-15 2.48e-13
##  5 Israel               year  -0.00661  0.000798  - 8.28 1.03e-11 1.72e- 9
##  6 Yemen                year  -0.00395  0.000893  - 4.42 2.18e- 4 2.21e- 2
##  7 Djibouti             year  -0.00155  0.000401  - 3.87 4.41e- 4 4.10e- 2
##  8 Ukraine              year   0.00259  0.000677    3.83 2.85e- 4 2.74e- 2
##  9 Austria              year   0.00259  0.000581    4.46 3.95e- 5 4.15e- 3
## 10 Sweden               year   0.00263  0.000672    3.91 2.24e- 4 2.24e- 2
## # ... with 98 more rows

Bosnia & Herzagovinas agreeableness declined the most

Votes by Issue

resolutions <- read_csv("C:\\Users\\Dan\\Desktop\\R\\Markdowns\\UN-Votes\\resolutions.csv", col_names=TRUE)

Cleaning to join to votes_processed

Locating NAs in the data set

nas <- vector(length = 15)

for (i in 1:15) {
  nas[i] <- sum(is.na(resolutions[,i]))
}

nas
##  [1]    0    0  151 2585    0    0    0    0    0   30   30   30   30   30
## [15]   32

The vast majority of the NA values are mostly in the ‘amendment’ column which is of no interest so I omitted all NA values

resolutions <- na.omit(resolutions[,-c(4,6:9)])

resolutions_processed <- resolutions %>%
    mutate(year = assembly_session + 1945) %>%
    rename(rc_id = vote_id) %>%
    select(-assembly_session)

#Re-ordering columns to show the new 'Year' column first
resolutions_processed <- resolutions_processed[,c(10,1:9)]

votes_joined <- inner_join(votes_processed, resolutions_processed, by=c("rc_id","year"))

votes_joined_processed <- votes_joined %>%
                          rename(col = colonization, hr = human_rights, 
                          mid = israel_palestine, dis = disarmament, 
                          nuke = nuclear_weapons, eco = economic_development)

How agreeable was a given member state on each issue?

library(stringr)
plot_agreeableness_by_issue <- function(x, y) {
  
by_issue_output <- votes_joined_processed %>%
                    filter(country == x & y == 1) %>%
                    group_by(year) %>%
                    mutate(percent_yes = mean(vote == 1))
  
y <- deparse(substitute(y))

topics <- c("Colonisation", "Human Rights", "Israel Palestine", "Arms Control and Disarmament", "Nuclear Weapons", "Economic Development")

names(topics) <- c("col", "hr", "mid", "dis", "nuke", "eco")
   
  topiclabel <- str_sub(y, start = 24)
  label_output <- topics[topiclabel]
  

   ggplot(data = by_issue_output, aes(x = year, y = percent_yes)) + geom_line() +
          scale_y_continuous(labels = scales::percent) +
          labs(title = sprintf("Agreeableness of %s on the topic of %s", x, label_output)) +
          theme_fivethirtyeight() + theme(plot.title = element_text(size=12)) +
          scale_x_discrete(limits = c(1946, seq(1950, 2010, 5), 2014)) +
          theme(axis.title = element_text(), axis.title.x = element_text()) + ylab('%Yes') + xlab('Year')
  
  
}

How did the US vote over time on issues of Nuclear Weapons?

plot_agreeableness_by_issue("United States", votes_joined_processed$nuke)

How did France vote over time on issues of Israel Palestine?

plot_agreeableness_by_issue("France", votes_joined_processed$mid)

How did a given country vote on each issue?

votes_gathered <- gather(votes_joined_processed, topic, has_topic, col:eco) %>%
                  filter(has_topic == 1)

votes_tidied <- votes_gathered %>%
  mutate(topic = recode(topic,
                        mid = "Palestinian Conflict",
                        nuke = "Nuclear Weapons and Nuclear Material",
                        dis = "Arms Control and Disarmament",
                        hr = "Human Rights",
                        col = "Colonialism",
                        eco = "Economic Development"))

by_country_year_topic <- votes_tidied %>%
                      group_by(country, year, topic) %>%
                        summarize(total = n(), percent_yes = mean(vote==1)) %>%
                        ungroup()
 
country_vote_all_topics <- function(x) {
  
  by_country_year_topic %>%
      filter(country == x) %>%
      ggplot(aes(x = year, y = percent_yes)) + geom_line() + facet_wrap(~ topic) +
      labs(title = sprintf("Agreeableness of %s On All Issues", x), x = "Year", y = "% Yes") +
      scale_y_continuous(label = scales::percent) +
      theme_fivethirtyeight() +
      theme(strip.background = element_rect(fill="lightgrey")) +
      theme(strip.text = element_text(size = 8))
   
  
}

country_vote_all_topics("Belgium")

country_vote_all_topics("Nigeria")

How did the agreeableness of two countries compare over time on each issue?

two_country_comparison <- function(x,y) {
  
  by_country_year_topic %>%
      filter(country == x | country == y) %>%
      ggplot(aes(x = year, y = percent_yes, color=country)) + geom_line() + facet_wrap(~ topic) +
      labs(title = paste(x,"vs", y, "Agreeableness On All Issues"), x = "Year", y = "% Yes") +
      scale_y_continuous(label = scales::percent) + 
      scale_color_discrete(name = NULL) +
      theme_fivethirtyeight() + 
      theme(strip.background = element_rect(fill="lightgrey")) +
      theme(strip.text = element_text(size = 8))
  
  
}

two_country_comparison("Japan","Australia")

Whose agreeableness increased/decreased the most over time on each issue?

country_topic_coefficients <- by_country_year_topic %>%
                              nest(-country, -topic) %>%
                              mutate(model = map(data, ~ lm(percent_yes ~ year, data = .)),
                                      tidied = map(model, tidy)) %>%
                                      unnest(tidied) %>%
                                      filter(term == "year") %>%
                                      mutate(p.adjusted = p.adjust(p.value)) %>%
                                      filter(p.adjusted < 0.05) %>%
                                      select(country, topic, estimate)

country_topic_coefficients %>% arrange(desc(estimate))
## # A tibble: 248 x 3
##    country                    topic                                estima~
##    <chr>                      <chr>                                  <dbl>
##  1 Uzbekistan                 Economic Development                  0.0422
##  2 Tuvalu                     Human Rights                          0.0405
##  3 German Democratic Republic Nuclear Weapons and Nuclear Material  0.0362
##  4 German Democratic Republic Arms Control and Disarmament          0.0350
##  5 Turkmenistan               Human Rights                          0.0341
##  6 Georgia                    Colonialism                           0.0307
##  7 Kyrgyzstan                 Human Rights                          0.0297
##  8 Germany                    Colonialism                           0.0248
##  9 Estonia                    Colonialism                           0.0239
## 10 Latvia                     Colonialism                           0.0213
## # ... with 238 more rows

Uzbek attitudes to Economic Development and Tuvaluan attitudes to Human Rights saw the largest increases in conformity

country_topic_coefficients %>% arrange(estimate)
## # A tibble: 248 x 3
##    country              topic                                estimate
##    <chr>                <chr>                                   <dbl>
##  1 Vanuatu              Palestinian Conflict                  -0.0286
##  2 Bosnia & Herzegovina Human Rights                          -0.0218
##  3 Marshall Islands     Palestinian Conflict                  -0.0201
##  4 Marshall Islands     Human Rights                          -0.0194
##  5 Vanuatu              Colonialism                           -0.0171
##  6 Vanuatu              Human Rights                          -0.0157
##  7 South Korea          Human Rights                          -0.0145
##  8 Malta                Nuclear Weapons and Nuclear Material  -0.0113
##  9 Cameroon             Human Rights                          -0.0103
## 10 United States        Palestinian Conflict                  -0.0102
## # ... with 238 more rows

Vanuatan attitudes to the Palestinian Conflict and Bosnian attitudes to Human Rights showed the strongest non-conformist shift in voting.