Data available here: https://github.com/dmorgan26/UN-Votes
Source: Erik Voeten “Data and Analyses of Voting in the UN General Assembly” Routledge Handbook of International Organization, edited by Bob Reinalda (published 2017-06-19)
library(readr)
unvotes <- read_csv("C:\\Users\\Dan\\Desktop\\R\\Markdowns\\UN-Votes\\votes.csv", col_names=TRUE)
library(dplyr)
glimpse(unvotes)
## Observations: 1,048,575
## Variables: 4
## $ assembly_session <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...
## $ rc_id <int> 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, ...
## $ state_code <int> 2, 20, 31, 40, 41, 42, 51, 52, 53, 54, 55, 56...
## $ vote <int> 1, 3, 9, 1, 1, 1, 9, 9, 9, 9, 9, 9, 9, 9, 9, ...
Removed absences and replaced assembly_session with year. First assembly vote in the data set was in 1946. .
votes_processed <- unvotes %>%
filter(vote <= 3) %>%
mutate(year = assembly_session + 1945)
votes_processed <- votes_processed[,-1]
glimpse(votes_processed)
## Observations: 722,082
## Variables: 4
## $ rc_id <int> 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, ...
## $ state_code <int> 2, 20, 40, 41, 42, 70, 90, 91, 92, 93, 94, 95, 100,...
## $ vote <int> 1, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...
## $ year <dbl> 1946, 1946, 1946, 1946, 1946, 1946, 1946, 1946, 194...
Replaced state_code with country name
library(countrycode)
votes_processed <- votes_processed %>%
mutate(country = countrycode(state_code, "cown", "country.name"))
## Warning in countrycode(state_code, "cown", "country.name"): Some values were not matched unambiguously: 260, 816
The data gave ‘state_code’ as Correlates of War codes
260 = German Federal Republic (commonly known as West Germany). 816 = Republic of Vietnam (commonly known as South Vietnam).
Other relevant codes
255 = Germany
265 = East Germany
817 = Vietnam
code_260 <- votes_processed %>%
filter(state_code == "260")
range(code_260$year)
## [1] 1973 1989
Dataset contained West German votes from 1973-1989
code_265 <- votes_processed %>%
filter(state_code == "265")
range(code_265$year)
## [1] 1973 1989
Dataset contained East German votes from 1973-1989
code_255 <- votes_processed %>%
filter(state_code == "255")
range(code_255$year)
## [1] 1990 2014
Dataset contained (unified) German votes from 1990-2014
There wasn’t any overlap of the dates so I assigned state_code 260 to West Germany
votes_processed$country[votes_processed$state_code == "260"] <- "West Germany"
code_816 <- votes_processed %>%
filter(state_code == "816")
range(code_816$year)
## [1] 1977 2014
Dataset contained Vietnamese votes from 1977-2014
code_817 <- votes_processed %>%
filter(state_code == "817")
dim(code_817)
## [1] 0 5
There was no data for South Vietnam so I assigned state_code 816 to Vietnam
votes_processed$country[votes_processed$state_code == "816"] <- "Vietnam"
sum(is.na(votes_processed$country))
## [1] 0
votes_processed <- votes_processed[,-2]
glimpse(votes_processed)
## Observations: 722,082
## Variables: 4
## $ rc_id <int> 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, ...
## $ vote <int> 1, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...
## $ year <dbl> 1946, 1946, 1946, 1946, 1946, 1946, 1946, 1946, 1946, ...
## $ country <chr> "United States", "Canada", "Cuba", "Haiti", "Dominican...
percent_yes <- votes_processed %>%
summarize (total = n(), percent_yes = mean(vote == 1))
percent_yes
## # A tibble: 1 x 2
## total percent_yes
## <int> <dbl>
## 1 722082 0.797
79.9% of all votes on proposed resolutions were for “Yes”
by_country <- votes_processed %>%
group_by(country) %>%
summarize(total = n(), percent_yes = mean(vote == 1)) %>%
arrange(percent_yes)
head(by_country)
## # A tibble: 6 x 3
## country total percent_yes
## <chr> <int> <dbl>
## 1 Zanzibar 2 0
## 2 United States 5298 0.284
## 3 Palau 829 0.306
## 4 Israel 4850 0.348
## 5 West Germany 2067 0.396
## 6 Micronesia (Federated States of) 1390 0.410
Zanzibar was a suspected outlier with only two votes
library(ggplot2)
library(ggthemes)
ggplot(by_country, aes(x=total)) + geom_histogram(binwidth=100) +
theme_fivethirtyeight() +
labs(title = "Frequency distribution of total number of votes cast") +
theme(axis.title = element_text(), axis.title.x = element_text()) + ylab("Frequency") + xlab("Total Votes Cast")
The data showed left skew so I used a log transformation
ggplot(by_country, aes(x="", y=log(total))) + geom_boxplot() + coord_flip() +
labs(title="Distribution of total number of votes cast by country") +
theme_fivethirtyeight() +
theme(axis.title = element_text(), axis.title.x = element_text()) + ylab("Log(Total Votes)") + xlab("")
The box plot showed three clear outliers over 1.5*IQR away from the median.
by_country %>%
arrange(total)
## # A tibble: 200 x 3
## country total percent_yes
## <chr> <int> <dbl>
## 1 Zanzibar 2 0
## 2 South Sudan 114 0.649
## 3 Kiribati 145 0.876
## 4 Nauru 615 0.603
## 5 Montenegro 619 0.646
## 6 Tuvalu 629 0.838
## 7 Timor-Leste 755 0.968
## 8 Tonga 828 0.731
## 9 Palau 829 0.306
## 10 Switzerland 918 0.659
## # ... with 190 more rows
I filtered for total votes greater than 150 to remove the three outliers
by_country <- by_country %>%
filter(total > 150)
by_country %>%
arrange(percent_yes)
## # A tibble: 197 x 3
## country total percent_yes
## <chr> <int> <dbl>
## 1 United States 5298 0.284
## 2 Palau 829 0.306
## 3 Israel 4850 0.348
## 4 West Germany 2067 0.396
## 5 Micronesia (Federated States of) 1390 0.410
## 6 United Kingdom 5279 0.428
## 7 France 5232 0.433
## 8 Marshall Islands 1520 0.482
## 9 Belgium 5298 0.494
## 10 Canada 5316 0.512
## # ... with 187 more rows
The United States, Palau and Israel were the least agreeable member states
by_country %>%
arrange(desc(percent_yes))
## # A tibble: 197 x 3
## country total percent_yes
## <chr> <int> <dbl>
## 1 Seychelles 1757 0.978
## 2 Timor-Leste 755 0.968
## 3 São Tomé & Príncipe 2382 0.967
## 4 Cape Verde 3203 0.959
## 5 Djibouti 3253 0.957
## 6 Guinea-Bissau 2986 0.955
## 7 Comoros 2461 0.946
## 8 Mozambique 3366 0.944
## 9 Yemen 1586 0.943
## 10 United Arab Emirates 3937 0.943
## # ... with 187 more rows
Seychelles, Timor-Leste and Sao Tome & Principe were the most agreeable member states
by_year <- votes_processed %>%
group_by(year) %>%
summarize(total = n(), percent_yes = mean(vote == 1))
ggplot(by_year, aes(x=year, y=percent_yes)) + geom_line() +
scale_y_continuous(breaks = seq(0,1,0.05), labels=scales::percent) +
scale_x_discrete(limits = c(1946, seq(1950, 2010, 5), 2014)) +
labs(title = "% Yes votes at the UN General Assembly by Year", y = "% Yes", x = "Year") +
theme_fivethirtyeight()
Investigation of the huge dip
which.min(by_year$percent_yes)
## [1] 19
by_year[17:21,]
## # A tibble: 5 x 3
## year total percent_yes
## <dbl> <int> <dbl>
## 1 1962 4642 0.600
## 2 1963 3308 0.729
## 3 1964 112 0.0179
## 4 1965 4382 0.708
## 5 1966 5868 0.611
The data set showed 1964 total percentage of yes votes as 1.7%. The numbers for 1963 and 1965 were as expected at 72.9% and 70.8% respectively, so I assumed that this was an error and removed the year 1964 from the data set.
by_year <- by_year[-19,]
votes_processed <- votes_processed %>%
filter(year != "1964")
ggplot(by_year, aes(x=year, y=percent_yes)) +
geom_line() +
scale_y_continuous(breaks = seq(0,1,0.05), labels=scales::percent) +
scale_x_discrete(limits = c(1946, seq(1950, 2010, 5), 2014)) +
labs(title = "% Yes votes at the UN General Assembly by Year", y = "% Yes", x = "Year") +
theme_fivethirtyeight()
library(scales)
ggplot(by_year, aes(x=year, y=percent_yes)) +
geom_point() +
geom_smooth(method="lm") +
scale_x_discrete(limits = c(1946, seq(1950, 2010, 5), 2014)) +
scale_y_continuous(limits = c(0.4, 1), breaks = seq(0.4, 1, 0.05), labels=scales::percent) +
labs(title = "Least Squares % Yes votes at the UN General Assembly by Year", y = "% Yes", x = "Year") +
theme_fivethirtyeight()
by_year_country <- votes_processed %>%
group_by(year, country) %>%
summarize(total = n(), percent_yes = mean(vote == 1))
UK Votes Over Time
by_year_country %>%
filter(country == "United Kingdom") %>%
ggplot(aes(x=year, y=percent_yes)) +
geom_line(col="blue") +
scale_x_discrete(limits = c(1946, seq(1950, 2010, 5), 2014)) +
scale_y_continuous(limits = c(0.2, 0.7), breaks = seq(0.2, 0.7, 0.05), labels=scales::percent) +
labs(title = "UK % Yes Vote", y = "% Yes", x = "Year") +
theme_fivethirtyeight()
countries_line <- c("United Kingdom","United States","India","China")
by_year_country %>%
filter(country %in% countries_line) %>%
ggplot(aes(x=year, y=percent_yes, col=country)) + geom_line() +
scale_x_discrete(limits = c(1946, seq(1950, 2010, 5), 2014)) +
scale_y_continuous(limits = c(0, 1), breaks = seq(0, 1, 0.1), labels=scales::percent) +
scale_color_discrete(name="Country") +
labs(title = "Vote Yes % By Country", y = "% Yes", x = "Year") +
theme_fivethirtyeight()
Comparison of too many countries to maintain a de-cluttered line plot
countries_facet <- c("United Kingdom","United States","India","China", "Japan", "France", "Italy", "Egypt","Turkey")
by_year_country %>%
filter(country %in% countries_facet) %>%
ggplot(aes(x=year, y=percent_yes)) + geom_line() + facet_wrap(~country) +
scale_y_continuous(limits = c(0, 1), breaks = seq(0, 1, 0.2), labels=scales::percent) +
labs(title = "Vote Yes % By Country", y = "% Yes", x = "Year") +
theme_fivethirtyeight() +
theme(strip.background = element_rect(fill="lightgrey"))
library(tidyr)
library(purrr)
library(broom)
by_year_country <- ungroup(by_year_country)
country_coefficients <- by_year_country %>%
nest(-country) %>%
mutate(model = map(data, ~ lm(percent_yes ~ year, data = .)),
tidied = map(model, tidy)) %>%
unnest(tidied) %>%
filter(term == "year")
Obtained adjusted p-values since I would expect some raw p-values to be < 0.05 by chance when there are 200 models
signif_country_coefficients <- country_coefficients %>%
mutate(p.adjusted = p.adjust(p.value)) %>%
filter(p.adjusted < 0.05)
signif_country_coefficients %>%
select(country, estimate) %>%
arrange(desc(estimate))
## # A tibble: 108 x 2
## country estimate
## <chr> <dbl>
## 1 Tuvalu 0.0378
## 2 German Democratic Republic 0.0193
## 3 Kyrgyzstan 0.0119
## 4 Tajikistan 0.0116
## 5 Kazakhstan 0.0115
## 6 Yemen Arab Republic 0.0115
## 7 South Africa 0.0115
## 8 Malawi 0.0103
## 9 Dominican Republic 0.00797
## 10 Mongolia 0.00775
## # ... with 98 more rows
Tuvalu is the nation whose agreeableness increased the most over their membership. The rate of increase is far larger than any other country so warrants further investigation
tuvalu <- by_year_country %>%
filter(country == "Tuvalu")
max(tuvalu$year) - min(tuvalu$year)
## [1] 13
Tuvalu had only been voting for 13 years. How many votes did this equate to?
tuvalu %>%
ungroup() %>%
mutate(cumsum = cumsum(total)) %>%
summarize(total_votes = max(cumsum))
## # A tibble: 1 x 1
## total_votes
## <dbl>
## 1 629
Tuvalu participated in 629 votes, how did this compare to other nations?
summary(by_country$total)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 615 2407 4116 3664 4840 5316
Tuvalu is an outlier here in a data set with significant left skew.
If data is normally distributed I could see whether Tuvalu falls within three standard deviations of the mean
Did the total number of votes cast by each nation follow a normal distribution?
library(ggpubr)
ggqqplot(by_country$total, main = "Q-Q Plot of Total Votes Cast") +
theme_fivethirtyeight() +
theme(axis.title = element_text()) + ylab('Votes') + xlab('Theoretical Quantiles')
Shapiro-Wilk test
H0 : The data are normally distributed HA : The data are not normally distributed
shapiro.test(by_country$total)
##
## Shapiro-Wilk normality test
##
## data: by_country$total
## W = 0.88463, p-value = 3.706e-11
A p-value < 0.05 and near zero suggested that the I should reject the null hypothesis that the data are normally distributed. Visual inspection of the Q-Q plot also suggested that the distribution is not normal.
560 votes seems like a large enough sample so I will keep Tuvalu in the data set.
Consequently, Tuvalu is the member state whose agreeableness increased the most.
signif_country_coefficients %>%
arrange(estimate)
## # A tibble: 108 x 7
## country term estimate std.error statis~ p.value p.adjus~
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Bosnia & Herzegovina year -0.0131 0.00264 - 4.96 7.53e- 5 7.69e- 3
## 2 Vanuatu year -0.0111 0.00228 - 4.87 3.09e- 5 3.30e- 3
## 3 South Korea year -0.00797 0.000947 - 8.42 2.52e- 8 3.67e- 6
## 4 United States year -0.00692 0.000663 -10.4 1.31e-15 2.48e-13
## 5 Israel year -0.00661 0.000798 - 8.28 1.03e-11 1.72e- 9
## 6 Yemen year -0.00395 0.000893 - 4.42 2.18e- 4 2.21e- 2
## 7 Djibouti year -0.00155 0.000401 - 3.87 4.41e- 4 4.10e- 2
## 8 Ukraine year 0.00259 0.000677 3.83 2.85e- 4 2.74e- 2
## 9 Austria year 0.00259 0.000581 4.46 3.95e- 5 4.15e- 3
## 10 Sweden year 0.00263 0.000672 3.91 2.24e- 4 2.24e- 2
## # ... with 98 more rows
Bosnia & Herzagovinas agreeableness declined the most
resolutions <- read_csv("C:\\Users\\Dan\\Desktop\\R\\Markdowns\\UN-Votes\\resolutions.csv", col_names=TRUE)
Locating NAs in the data set
nas <- vector(length = 15)
for (i in 1:15) {
nas[i] <- sum(is.na(resolutions[,i]))
}
nas
## [1] 0 0 151 2585 0 0 0 0 0 30 30 30 30 30
## [15] 32
The vast majority of the NA values are mostly in the ‘amendment’ column which is of no interest so I omitted all NA values
resolutions <- na.omit(resolutions[,-c(4,6:9)])
resolutions_processed <- resolutions %>%
mutate(year = assembly_session + 1945) %>%
rename(rc_id = vote_id) %>%
select(-assembly_session)
#Re-ordering columns to show the new 'Year' column first
resolutions_processed <- resolutions_processed[,c(10,1:9)]
votes_joined <- inner_join(votes_processed, resolutions_processed, by=c("rc_id","year"))
votes_joined_processed <- votes_joined %>%
rename(col = colonization, hr = human_rights,
mid = israel_palestine, dis = disarmament,
nuke = nuclear_weapons, eco = economic_development)
library(stringr)
plot_agreeableness_by_issue <- function(x, y) {
by_issue_output <- votes_joined_processed %>%
filter(country == x & y == 1) %>%
group_by(year) %>%
mutate(percent_yes = mean(vote == 1))
y <- deparse(substitute(y))
topics <- c("Colonisation", "Human Rights", "Israel Palestine", "Arms Control and Disarmament", "Nuclear Weapons", "Economic Development")
names(topics) <- c("col", "hr", "mid", "dis", "nuke", "eco")
topiclabel <- str_sub(y, start = 24)
label_output <- topics[topiclabel]
ggplot(data = by_issue_output, aes(x = year, y = percent_yes)) + geom_line() +
scale_y_continuous(labels = scales::percent) +
labs(title = sprintf("Agreeableness of %s on the topic of %s", x, label_output)) +
theme_fivethirtyeight() + theme(plot.title = element_text(size=12)) +
scale_x_discrete(limits = c(1946, seq(1950, 2010, 5), 2014)) +
theme(axis.title = element_text(), axis.title.x = element_text()) + ylab('%Yes') + xlab('Year')
}
How did the US vote over time on issues of Nuclear Weapons?
plot_agreeableness_by_issue("United States", votes_joined_processed$nuke)
How did France vote over time on issues of Israel Palestine?
plot_agreeableness_by_issue("France", votes_joined_processed$mid)
votes_gathered <- gather(votes_joined_processed, topic, has_topic, col:eco) %>%
filter(has_topic == 1)
votes_tidied <- votes_gathered %>%
mutate(topic = recode(topic,
mid = "Palestinian Conflict",
nuke = "Nuclear Weapons and Nuclear Material",
dis = "Arms Control and Disarmament",
hr = "Human Rights",
col = "Colonialism",
eco = "Economic Development"))
by_country_year_topic <- votes_tidied %>%
group_by(country, year, topic) %>%
summarize(total = n(), percent_yes = mean(vote==1)) %>%
ungroup()
country_vote_all_topics <- function(x) {
by_country_year_topic %>%
filter(country == x) %>%
ggplot(aes(x = year, y = percent_yes)) + geom_line() + facet_wrap(~ topic) +
labs(title = sprintf("Agreeableness of %s On All Issues", x), x = "Year", y = "% Yes") +
scale_y_continuous(label = scales::percent) +
theme_fivethirtyeight() +
theme(strip.background = element_rect(fill="lightgrey")) +
theme(strip.text = element_text(size = 8))
}
country_vote_all_topics("Belgium")
country_vote_all_topics("Nigeria")
two_country_comparison <- function(x,y) {
by_country_year_topic %>%
filter(country == x | country == y) %>%
ggplot(aes(x = year, y = percent_yes, color=country)) + geom_line() + facet_wrap(~ topic) +
labs(title = paste(x,"vs", y, "Agreeableness On All Issues"), x = "Year", y = "% Yes") +
scale_y_continuous(label = scales::percent) +
scale_color_discrete(name = NULL) +
theme_fivethirtyeight() +
theme(strip.background = element_rect(fill="lightgrey")) +
theme(strip.text = element_text(size = 8))
}
two_country_comparison("Japan","Australia")
country_topic_coefficients <- by_country_year_topic %>%
nest(-country, -topic) %>%
mutate(model = map(data, ~ lm(percent_yes ~ year, data = .)),
tidied = map(model, tidy)) %>%
unnest(tidied) %>%
filter(term == "year") %>%
mutate(p.adjusted = p.adjust(p.value)) %>%
filter(p.adjusted < 0.05) %>%
select(country, topic, estimate)
country_topic_coefficients %>% arrange(desc(estimate))
## # A tibble: 248 x 3
## country topic estima~
## <chr> <chr> <dbl>
## 1 Uzbekistan Economic Development 0.0422
## 2 Tuvalu Human Rights 0.0405
## 3 German Democratic Republic Nuclear Weapons and Nuclear Material 0.0362
## 4 German Democratic Republic Arms Control and Disarmament 0.0350
## 5 Turkmenistan Human Rights 0.0341
## 6 Georgia Colonialism 0.0307
## 7 Kyrgyzstan Human Rights 0.0297
## 8 Germany Colonialism 0.0248
## 9 Estonia Colonialism 0.0239
## 10 Latvia Colonialism 0.0213
## # ... with 238 more rows
Uzbek attitudes to Economic Development and Tuvaluan attitudes to Human Rights saw the largest increases in conformity
country_topic_coefficients %>% arrange(estimate)
## # A tibble: 248 x 3
## country topic estimate
## <chr> <chr> <dbl>
## 1 Vanuatu Palestinian Conflict -0.0286
## 2 Bosnia & Herzegovina Human Rights -0.0218
## 3 Marshall Islands Palestinian Conflict -0.0201
## 4 Marshall Islands Human Rights -0.0194
## 5 Vanuatu Colonialism -0.0171
## 6 Vanuatu Human Rights -0.0157
## 7 South Korea Human Rights -0.0145
## 8 Malta Nuclear Weapons and Nuclear Material -0.0113
## 9 Cameroon Human Rights -0.0103
## 10 United States Palestinian Conflict -0.0102
## # ... with 238 more rows
Vanuatan attitudes to the Palestinian Conflict and Bosnian attitudes to Human Rights showed the strongest non-conformist shift in voting.