Targets at UNDRIP 10,26-30,32
# Common vector space ----
# UNGA speeches and tpdocs as one document
docs <- bind_rows((ungdc18[,c(2,3)]),undrip[,c(1,3)])
# Matrix 1: ungdc
d1 <- ungdc18
it1 <- itoken(d1$text)
# Matrix 2: tpdocs
d2 <- undrip
it2 <- itoken(d2$text)
# Project Documents in Common Vector Space; includes vocabulary from all documents
it = itoken(docs$text, progressbar = FALSE)
v = create_vocabulary(it)
v = prune_vocabulary(v, doc_proportion_max = 0.1, term_count_min = 5)
vectorizer = vocab_vectorizer(v)
# Create separate document-term matrix
dtm_ungdc = create_dtm(it1, vectorizer)
dim(dtm_ungdc)
## [1] 8093 76682
dtm_undrip = create_dtm(it2, vectorizer)
dim(dtm_undrip)
## [1] 47 76682
# cosine similarity
dtm_cos_sim = sim2(dtm_ungdc, dtm_undrip, method = "cosine", norm = "l2")
# UNDRIP target articles ----
# Cosine Similarity to undrip_10
cos_undrip10 <- dtm_cos_sim[1:8093,10]
cos_undrip10 <- sort(cos_undrip10, decreasing = TRUE)
# Cosine Similarity to undrip_25
cos_undrip25 <- dtm_cos_sim[1:8093,25]
cos_undrip25 <- sort(cos_undrip25, decreasing = TRUE)
# Cosine Similarity to undrip_26
cos_undrip26 <- dtm_cos_sim[1:8093,26]
cos_undrip26 <- sort(cos_undrip26, decreasing = TRUE)
# Cosine Similarity to undrip_27
cos_undrip27 <- dtm_cos_sim[1:8093,27]
cos_undrip27 <- sort(cos_undrip27, decreasing = TRUE)
# Cosine Similarity to undrip_28
cos_undrip28 <- dtm_cos_sim[1:8093,28]
cos_undrip28 <- sort(cos_undrip28, decreasing = TRUE)
# Cosine Similarity to undrip_29
cos_undrip29 <- dtm_cos_sim[1:8093,29]
cos_undrip29 <- sort(cos_undrip29, decreasing = TRUE)
# Cosine Similarity to undrip_30
cos_undrip30 <- dtm_cos_sim[1:8093,30]
cos_undrip30 <- sort(cos_undrip30, decreasing = TRUE)
# Cosine Similarity to undrip_32
cos_undrip32 <- dtm_cos_sim[1:8093,32]
cos_undrip32 <- sort(cos_undrip32, decreasing = TRUE)
# Add to dataframe ----
# cos_undrip10 to dataframe ----
cos_undrip10 <- data.frame(cos_undrip10)
index <- rownames(cos_undrip10)
cos_undrip10$index <- index
rownames(cos_undrip10)=NULL
cos_undrip10 <- cos_undrip10[,c(2,1)]
cos_undrip10$index <- as.numeric(cos_undrip10$index)
ungdc18 <- left_join(ungdc18,cos_undrip10)
## Joining, by = "index"
# cos_undrip25 to dataframe ----
cos_undrip25 <- data.frame(cos_undrip25)
index <- rownames(cos_undrip25)
cos_undrip25$index <- index
rownames(cos_undrip25)=NULL
cos_undrip25 <- cos_undrip25[,c(2,1)]
cos_undrip25$index <- as.numeric(cos_undrip25$index)
ungdc18 <- left_join(ungdc18,cos_undrip25)
## Joining, by = "index"
# cos_undrip26 to dataframe ----
cos_undrip26 <- data.frame(cos_undrip26)
index <- rownames(cos_undrip26)
cos_undrip26$index <- index
rownames(cos_undrip26)=NULL
cos_undrip26 <- cos_undrip26[,c(2,1)]
cos_undrip26$index <- as.numeric(cos_undrip26$index)
ungdc18 <- left_join(ungdc18,cos_undrip26)
## Joining, by = "index"
# cos_undrip27 to dataframe ----
cos_undrip27 <- data.frame(cos_undrip27)
index <- rownames(cos_undrip27)
cos_undrip27$index <- index
rownames(cos_undrip27)=NULL
cos_undrip27 <- cos_undrip27[,c(2,1)]
cos_undrip27$index <- as.numeric(cos_undrip27$index)
ungdc18 <- left_join(ungdc18,cos_undrip27)
## Joining, by = "index"
# cos_undrip28 to dataframe ----
cos_undrip28 <- data.frame(cos_undrip28)
index <- rownames(cos_undrip28)
cos_undrip28$index <- index
rownames(cos_undrip28)=NULL
cos_undrip28 <- cos_undrip28[,c(2,1)]
cos_undrip28$index <- as.numeric(cos_undrip28$index)
ungdc18 <- left_join(ungdc18,cos_undrip28)
## Joining, by = "index"
# cos_undrip29 to dataframe ----
cos_undrip29 <- data.frame(cos_undrip29)
index <- rownames(cos_undrip29)
cos_undrip29$index <- index
rownames(cos_undrip29)=NULL
cos_undrip29 <- cos_undrip29[,c(2,1)]
cos_undrip29$index <- as.numeric(cos_undrip29$index)
ungdc18 <- left_join(ungdc18,cos_undrip29)
## Joining, by = "index"
# cos_undrip30 to dataframe ----
cos_undrip30 <- data.frame(cos_undrip30)
index <- rownames(cos_undrip30)
cos_undrip30$index <- index
rownames(cos_undrip30)=NULL
cos_undrip30 <- cos_undrip30[,c(2,1)]
cos_undrip30$index <- as.numeric(cos_undrip30$index)
ungdc18 <- left_join(ungdc18,cos_undrip30)
## Joining, by = "index"
# cos_undrip32 to dataframe ----
cos_undrip32 <- data.frame(cos_undrip32)
index <- rownames(cos_undrip32)
cos_undrip32$index <- index
rownames(cos_undrip32)=NULL
cos_undrip32 <- cos_undrip32[,c(2,1)]
cos_undrip32$index <- as.numeric(cos_undrip32$index)
ungdc18 <- left_join(ungdc18,cos_undrip32)
## Joining, by = "index"
# Clear Extra Objects
rm(cos_undrip10,cos_undrip25,cos_undrip26,cos_undrip27,cos_undrip28,cos_undrip29,cos_undrip30,cos_undrip32)
# Top 20 for each UNDRIP article ----
# Top 20 cos_undrip10
top_undrip10 <- head(ungdc18[ which(ungdc18$year >= 2007),c("doc_id","cos_undrip10")] %>% arrange(desc(cos_undrip10)), n = 20)
# Top 20 cos_undrip25
top_undrip25 <- head(ungdc18[ which(ungdc18$year >= 2007),c("doc_id","cos_undrip25")] %>% arrange(desc(cos_undrip25)), n = 20)
# Top 20 cos_undrip26
top_undrip26 <- head(ungdc18[ which(ungdc18$year >= 2007),c("doc_id","cos_undrip26")] %>% arrange(desc(cos_undrip26)), n = 20)
# Top 20 cos_undrip27
top_undrip27 <- head(ungdc18[ which(ungdc18$year >= 2007),c("doc_id","cos_undrip27")] %>% arrange(desc(cos_undrip27)), n = 20)
# Top 20 cos_undrip28
top_undrip28 <- head(ungdc18[ which(ungdc18$year >= 2007),c("doc_id","cos_undrip28")] %>% arrange(desc(cos_undrip28)), n = 20)
# Top 20 cos_undrip29
top_undrip29 <- head(ungdc18[ which(ungdc18$year >= 2007),c("doc_id","cos_undrip29")] %>% arrange(desc(cos_undrip29)), n = 20)
# Top 20 cos_undrip30
top_undrip30 <- head(ungdc18[ which(ungdc18$year >= 2007),c("doc_id","cos_undrip30")] %>% arrange(desc(cos_undrip30)), n = 20)
# Top 20 cos_undrip32
top_undrip32 <- head(ungdc18[ which(ungdc18$year >= 2007),c("doc_id","cos_undrip32")] %>% arrange(desc(cos_undrip32)), n = 20)
# Sum Cosine Similarity
ungdc18 <- ungdc18 %>%
mutate(cos_sum = rowSums(.[22:29]))
top_undrip <- head(ungdc18[ which(ungdc18$year >= 2008),c("doc_id","cos_sum")] %>% arrange(desc(cos_sum)), n = 20)
top_undrip
## doc_id cos_sum
## 1 CAN_72_2017.txt 1.8602805
## 2 DMA_64_2009.txt 0.4986725
## 3 VUT_68_2013.txt 0.4307465
## 4 PRY_63_2008.txt 0.4003682
## 5 BOL_63_2008.txt 0.3971559
## 6 ECU_72_2017.txt 0.3646951
## 7 PRY_69_2014.txt 0.3632261
## 8 BOL_64_2009.txt 0.3612142
## 9 VUT_66_2011.txt 0.3599404
## 10 PER_66_2011.txt 0.3585237
## 11 GTM_64_2009.txt 0.3448412
## 12 GTM_72_2017.txt 0.3338650
## 13 COG_65_2010.txt 0.3325180
## 14 BOL_70_2015.txt 0.3125577
## 15 BOL_65_2010.txt 0.2839817
## 16 AUS_65_2010.txt 0.2612376
## 17 DMA_71_2016.txt 0.2552000
## 18 VUT_70_2015.txt 0.2540069
## 19 SWZ_69_2014.txt 0.2430754
## 20 ECU_64_2009.txt 0.2416352
#stargazer(top_undrip, summary = FALSE)
# Environment
ungdc18$cmd_environment <- ungdc_unnest %>%
cast_dtm(term = word,
document = doc_id,
value = n,
weighting = tm::weightTf) %>%
removeSparseTerms(.999) %>%
CMDist(cw =c("environment"),wv = my.wv) %>% select(,2)
ungdc18$cmd_environment <- unlist(ungdc18$cmd_environment)
ungdc18$cmd_environment <- as.numeric(ungdc18$cmd_environment)
# Indigenous
ungdc18$cmd_indigenous <- ungdc_unnest %>%
cast_dtm(term = word,
document = doc_id,
value = n,
weighting = tm::weightTf) %>%
removeSparseTerms(.999) %>%
CMDist(cw =c("indigenous"),wv = my.wv) %>% select(,2)
ungdc18$cmd_indigenous <- unlist(ungdc18$cmd_indigenous)
ungdc18$cmd_indigenous <- as.numeric(ungdc18$cmd_indigenous)
# Indigenous Environment
ungdc18$cmd_indenv <- ungdc_unnest %>%
cast_dtm(term = word,
document = doc_id,
value = n,
weighting = tm::weightTf) %>%
removeSparseTerms(.999) %>%
CMDist(cw =c("indigenous environment"),wv = my.wv) %>% select(,2)
ungdc18$cmd_indenv <- unlist(ungdc18$cmd_indenv)
ungdc18$cmd_indenv <- as.numeric(ungdc18$cmd_indenv)
# Colonialism
ungdc18$cmd_colonialism <- ungdc_unnest %>%
cast_dtm(term = word,
document = doc_id,
value = n,
weighting = tm::weightTf) %>%
removeSparseTerms(.999) %>%
CMDist(cw =c("colonialism"),wv = my.wv) %>% select(,2)
ungdc18$cmd_colonialism <- unlist(ungdc18$cmd_colonialism)
ungdc18$cmd_colonialism <- as.numeric(ungdc18$cmd_colonialism)
# Corporation
ungdc18$cmd_corporation <- ungdc_unnest %>%
cast_dtm(term = word,
document = doc_id,
value = n,
weighting = tm::weightTf) %>%
removeSparseTerms(.999) %>%
CMDist(cw =c("corporation"),wv = my.wv) %>% select(,2)
ungdc18$cmd_corporation <- unlist(ungdc18$cmd_corporation)
ungdc18$cmd_corporation <- as.numeric(ungdc18$cmd_corporation)
# Indigenous Corporation
ungdc18$cmd_indcorp <- ungdc_unnest %>%
cast_dtm(term = word,
document = doc_id,
value = n,
weighting = tm::weightTf) %>%
removeSparseTerms(.999) %>%
CMDist(cw =c("indigenous corporation"),wv = my.wv) %>% select(,2)
ungdc18$cmd_indcorp <- unlist(ungdc18$cmd_indcorp)
ungdc18$cmd_indcorp <- as.numeric(ungdc18$cmd_indcorp)
# Free Trade
ungdc18$cmd_freetrade <- ungdc_unnest %>%
cast_dtm(term = word,
document = doc_id,
value = n,
weighting = tm::weightTf) %>%
removeSparseTerms(.999) %>%
CMDist(cw =c("free trade"),wv = my.wv) %>% select(,2)
ungdc18$cmd_freetrade <- unlist(ungdc18$cmd_freetrade)
ungdc18$cmd_freetrade <- as.numeric(ungdc18$cmd_freetrade)
# Free Market
ungdc18$cmd_freemarket <- ungdc_unnest %>%
cast_dtm(term = word,
document = doc_id,
value = n,
weighting = tm::weightTf) %>%
removeSparseTerms(.999) %>%
CMDist(cw =c("free market"),wv = my.wv) %>% select(,2)
ungdc18$cmd_freemarket <- unlist(ungdc18$cmd_freemarket)
ungdc18$cmd_freemarket <- as.numeric(ungdc18$cmd_freemarket)
# Nature
ungdc18$cmd_nature <- ungdc_unnest %>%
cast_dtm(term = word,
document = doc_id,
value = n,
weighting = tm::weightTf) %>%
removeSparseTerms(.999) %>%
CMDist(cw =c("nature"),wv = my.wv) %>% select(,2)
ungdc18$cmd_nature <- unlist(ungdc18$cmd_nature)
ungdc18$cmd_nature <- as.numeric(ungdc18$cmd_nature)
# Environmental Stewardship
ungdc18$cmd_envsteward <- ungdc_unnest %>%
cast_dtm(term = word,
document = doc_id,
value = n,
weighting = tm::weightTf) %>%
removeSparseTerms(.999) %>%
CMDist(cw =c("environmental stewardship"),wv = my.wv) %>% select(,2)
ungdc18$cmd_envsteward <- unlist(ungdc18$cmd_envsteward)
ungdc18$cmd_envsteward <- as.numeric(ungdc18$cmd_envsteward)
# Indigenous Self-determination
ungdc18$cmd_indself <- ungdc_unnest %>%
cast_dtm(term = word,
document = doc_id,
value = n,
weighting = tm::weightTf) %>%
removeSparseTerms(.999) %>%
CMDist(cw =c("indigenous self-determination"),wv = my.wv) %>% select(,2)
ungdc18$cmd_indself <- unlist(ungdc18$cmd_indself)
ungdc18$cmd_indself <- as.numeric(ungdc18$cmd_indself)
# Socioeconomic Inequality
ungdc18$cmd_socinq <- ungdc_unnest %>%
cast_dtm(term = word,
document = doc_id,
value = n,
weighting = tm::weightTf) %>%
removeSparseTerms(.999) %>%
CMDist(cw =c("socioeconomic inequality"),wv = my.wv) %>% select(,2)
ungdc18$cmd_socinq <- unlist(ungdc18$cmd_socinq)
ungdc18$cmd_socinq <- as.numeric(ungdc18$cmd_socinq)
# CMD to Environmental Stewardship ----
ungdc18[ which(ungdc18$UN_REGION != "OTHER"),] %>%
ggplot(mapping = aes(x = year, y = cmd_envsteward, colour = UN_REGION)) +
geom_point() +
geom_smooth() +
geom_hline(yintercept = 1, linetype = "dashed") +
labs(title = "Concept Mover Distance to 'Environmental Stewardship'",subtitle = "Subset by UN Region", x = "year", y = "Conceptual Engagement with 'Environmental Stewardship'")
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
# Proportion Engagement with Environmental Stewardship ----
propeng_envsteward <- ungdc18[ which(ungdc18$UN_REGION != "OTHER"),] %>%
group_by(year, UN_REGION) %>%
summarise(n_regionyear = n(),
n_engage = sum(cmd_envsteward > 1))
## `summarise()` regrouping output by 'year' (override with `.groups` argument)
yeartotal <- propeng_envsteward %>%
group_by(year) %>%
summarise(n_total = sum(n_regionyear))
## `summarise()` ungrouping output (override with `.groups` argument)
propeng_envsteward <- full_join(propeng_envsteward,yeartotal)
## Joining, by = "year"
propeng_envsteward %>%
group_by(year,UN_REGION) %>%
summarise(propeng_region = (n_engage/n_total)) %>%
ggplot(mapping = aes(x = year, y = propeng_region, fill = UN_REGION)) +
geom_area() +
labs(title = "Proportion of Speeches Highly Engaged with 'Environmental Stewardship'", subtitle = "Subset by UN Region", x = "year", y = "Proportion of Highly Engaged Speeches")
## `summarise()` regrouping output by 'year' (override with `.groups` argument)
# Indignous Self-determination ----
ungdc18[ which(ungdc18$UN_REGION != "OTHER"),] %>%
ggplot(mapping = aes(x = year, y = cmd_indself, colour = UN_REGION)) +
geom_point() +
geom_smooth() +
geom_hline(yintercept = 1, linetype = "dashed") +
labs(title = "Concept Mover Distance to 'Indigenous Self-determination'")
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
# Proportion Engagement with Indigenous Self-Determination ----
propeng_indself <- ungdc18[ which(ungdc18$UN_REGION != "OTHER"),] %>%
group_by(year, UN_REGION) %>%
summarise(n_regionyear = n(),
n_engage = sum(cmd_indself > 1))
## `summarise()` regrouping output by 'year' (override with `.groups` argument)
yeartotal <- propeng_indself %>%
group_by(year) %>%
summarise(n_total = sum(n_regionyear))
## `summarise()` ungrouping output (override with `.groups` argument)
propeng_indself <- full_join(propeng_indself,yeartotal)
## Joining, by = "year"
propeng_indself %>%
group_by(year,UN_REGION) %>%
summarise(propeng_region = (n_engage/n_total)) %>%
ggplot(mapping = aes(x = year, y = propeng_region, fill = UN_REGION)) +
geom_area() +
labs(title = "Proportion of Speeches Highly Engaged with 'Indigenous Self-Determination'", subtitle = "Subset by UN Region", x = "year", y = "Proportion of Highly Engaged Speeches")
## `summarise()` regrouping output by 'year' (override with `.groups` argument)
# Socioeconomic Inequality ----
ungdc18[ which(ungdc18$UN_REGION != "OTHER"),] %>%
ggplot(mapping = aes(x = year, y = cmd_socinq, colour = UN_REGION)) +
geom_point() +
geom_smooth() +
geom_hline(yintercept = 1, linetype = "dashed") +
labs(title = "Concept Mover Distance to 'Socioeconomic Inequality'")
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
# Proportion Engagement with Socioeconomic Inequality ----
propeng_socinq <- ungdc18[ which(ungdc18$UN_REGION != "OTHER"),] %>%
group_by(year, UN_REGION) %>%
summarise(n_regionyear = n(),
n_engage = sum(cmd_socinq > 1))
## `summarise()` regrouping output by 'year' (override with `.groups` argument)
yeartotal <- propeng_socinq %>%
group_by(year) %>%
summarise(n_total = sum(n_regionyear))
## `summarise()` ungrouping output (override with `.groups` argument)
propeng_socinq <- full_join(propeng_socinq,yeartotal)
## Joining, by = "year"
propeng_socinq %>%
group_by(year,UN_REGION) %>%
summarise(propeng_region = (n_engage/n_total)) %>%
ggplot(mapping = aes(x = year, y = propeng_region, fill = UN_REGION)) +
geom_area() +
labs(title = "Proportion of Speeches Highly Engaged with 'Socioeconomic Inequality'", subtitle = "Subset by UN Region", x = "year", y = "Proportion of Highly Engaged Speeches")
## `summarise()` regrouping output by 'year' (override with `.groups` argument)