Cosine Similarity to UNDRIP:

Targets at UNDRIP 10,26-30,32

# Common vector space ----

# UNGA speeches and tpdocs as one document
docs <- bind_rows((ungdc18[,c(2,3)]),undrip[,c(1,3)]) 

# Matrix 1: ungdc
d1 <- ungdc18
it1 <- itoken(d1$text)

# Matrix 2: tpdocs
d2 <- undrip
it2 <- itoken(d2$text)

# Project Documents in Common Vector Space; includes vocabulary from all documents
it = itoken(docs$text, progressbar = FALSE)
v = create_vocabulary(it)
v = prune_vocabulary(v, doc_proportion_max = 0.1, term_count_min = 5)
vectorizer = vocab_vectorizer(v)

# Create separate document-term matrix
dtm_ungdc = create_dtm(it1, vectorizer)
dim(dtm_ungdc)

## [1]  8093 76682

dtm_undrip = create_dtm(it2, vectorizer)
dim(dtm_undrip)

## [1]    47 76682

# cosine similarity
dtm_cos_sim = sim2(dtm_ungdc, dtm_undrip, method = "cosine", norm = "l2")


# UNDRIP target articles ----

# Cosine Similarity to undrip_10
cos_undrip10 <- dtm_cos_sim[1:8093,10]
cos_undrip10 <- sort(cos_undrip10, decreasing = TRUE)

# Cosine Similarity to undrip_25
cos_undrip25 <- dtm_cos_sim[1:8093,25]
cos_undrip25 <- sort(cos_undrip25, decreasing = TRUE)

# Cosine Similarity to undrip_26
cos_undrip26 <- dtm_cos_sim[1:8093,26]
cos_undrip26 <- sort(cos_undrip26, decreasing = TRUE)

# Cosine Similarity to undrip_27
cos_undrip27 <- dtm_cos_sim[1:8093,27]
cos_undrip27 <- sort(cos_undrip27, decreasing = TRUE)

# Cosine Similarity to undrip_28
cos_undrip28 <- dtm_cos_sim[1:8093,28]
cos_undrip28 <- sort(cos_undrip28, decreasing = TRUE)

# Cosine Similarity to undrip_29
cos_undrip29 <- dtm_cos_sim[1:8093,29]
cos_undrip29 <- sort(cos_undrip29, decreasing = TRUE)

# Cosine Similarity to undrip_30
cos_undrip30 <- dtm_cos_sim[1:8093,30]
cos_undrip30 <- sort(cos_undrip30, decreasing = TRUE)

# Cosine Similarity to undrip_32
cos_undrip32 <- dtm_cos_sim[1:8093,32]
cos_undrip32 <- sort(cos_undrip32, decreasing = TRUE)

# Add to dataframe ----

# cos_undrip10 to dataframe ----
cos_undrip10 <- data.frame(cos_undrip10)
index <- rownames(cos_undrip10)

cos_undrip10$index <- index
rownames(cos_undrip10)=NULL

cos_undrip10 <- cos_undrip10[,c(2,1)]

cos_undrip10$index <- as.numeric(cos_undrip10$index)

ungdc18 <- left_join(ungdc18,cos_undrip10)

## Joining, by = "index"

# cos_undrip25 to dataframe ----
cos_undrip25 <- data.frame(cos_undrip25)
index <- rownames(cos_undrip25)

cos_undrip25$index <- index
rownames(cos_undrip25)=NULL

cos_undrip25 <- cos_undrip25[,c(2,1)]

cos_undrip25$index <- as.numeric(cos_undrip25$index)

ungdc18 <- left_join(ungdc18,cos_undrip25)

## Joining, by = "index"

# cos_undrip26 to dataframe ----
cos_undrip26 <- data.frame(cos_undrip26)
index <- rownames(cos_undrip26)

cos_undrip26$index <- index
rownames(cos_undrip26)=NULL

cos_undrip26 <- cos_undrip26[,c(2,1)]

cos_undrip26$index <- as.numeric(cos_undrip26$index)

ungdc18 <- left_join(ungdc18,cos_undrip26)

## Joining, by = "index"

# cos_undrip27 to dataframe ----
cos_undrip27 <- data.frame(cos_undrip27)
index <- rownames(cos_undrip27)

cos_undrip27$index <- index
rownames(cos_undrip27)=NULL

cos_undrip27 <- cos_undrip27[,c(2,1)]

cos_undrip27$index <- as.numeric(cos_undrip27$index)

ungdc18 <- left_join(ungdc18,cos_undrip27)

## Joining, by = "index"

# cos_undrip28 to dataframe ----
cos_undrip28 <- data.frame(cos_undrip28)
index <- rownames(cos_undrip28)

cos_undrip28$index <- index
rownames(cos_undrip28)=NULL

cos_undrip28 <- cos_undrip28[,c(2,1)]

cos_undrip28$index <- as.numeric(cos_undrip28$index)

ungdc18 <- left_join(ungdc18,cos_undrip28)

## Joining, by = "index"

# cos_undrip29 to dataframe ----
cos_undrip29 <- data.frame(cos_undrip29)
index <- rownames(cos_undrip29)

cos_undrip29$index <- index
rownames(cos_undrip29)=NULL

cos_undrip29 <- cos_undrip29[,c(2,1)]

cos_undrip29$index <- as.numeric(cos_undrip29$index)

ungdc18 <- left_join(ungdc18,cos_undrip29)

## Joining, by = "index"

# cos_undrip30 to dataframe ----
cos_undrip30 <- data.frame(cos_undrip30)
index <- rownames(cos_undrip30)

cos_undrip30$index <- index
rownames(cos_undrip30)=NULL

cos_undrip30 <- cos_undrip30[,c(2,1)]

cos_undrip30$index <- as.numeric(cos_undrip30$index)

ungdc18 <- left_join(ungdc18,cos_undrip30)

## Joining, by = "index"

# cos_undrip32 to dataframe ----
cos_undrip32 <- data.frame(cos_undrip32)
index <- rownames(cos_undrip32)

cos_undrip32$index <- index
rownames(cos_undrip32)=NULL

cos_undrip32 <- cos_undrip32[,c(2,1)]

cos_undrip32$index <- as.numeric(cos_undrip32$index)

ungdc18 <- left_join(ungdc18,cos_undrip32)

## Joining, by = "index"

# Clear Extra Objects
rm(cos_undrip10,cos_undrip25,cos_undrip26,cos_undrip27,cos_undrip28,cos_undrip29,cos_undrip30,cos_undrip32)

Top Cosine Similarity

# Top 20 for each UNDRIP article ----

# Top 20 cos_undrip10
top_undrip10 <- head(ungdc18[ which(ungdc18$year >= 2007),c("doc_id","cos_undrip10")] %>% arrange(desc(cos_undrip10)), n = 20)

# Top 20 cos_undrip25
top_undrip25 <- head(ungdc18[ which(ungdc18$year >= 2007),c("doc_id","cos_undrip25")] %>% arrange(desc(cos_undrip25)), n = 20)

# Top 20 cos_undrip26
top_undrip26 <- head(ungdc18[ which(ungdc18$year >= 2007),c("doc_id","cos_undrip26")] %>% arrange(desc(cos_undrip26)), n = 20)

# Top 20 cos_undrip27
top_undrip27 <- head(ungdc18[ which(ungdc18$year >= 2007),c("doc_id","cos_undrip27")] %>% arrange(desc(cos_undrip27)), n = 20)

# Top 20 cos_undrip28
top_undrip28 <- head(ungdc18[ which(ungdc18$year >= 2007),c("doc_id","cos_undrip28")] %>% arrange(desc(cos_undrip28)), n = 20)

# Top 20 cos_undrip29
top_undrip29 <- head(ungdc18[ which(ungdc18$year >= 2007),c("doc_id","cos_undrip29")] %>% arrange(desc(cos_undrip29)), n = 20)

# Top 20 cos_undrip30
top_undrip30 <- head(ungdc18[ which(ungdc18$year >= 2007),c("doc_id","cos_undrip30")] %>% arrange(desc(cos_undrip30)), n = 20)

# Top 20 cos_undrip32
top_undrip32 <- head(ungdc18[ which(ungdc18$year >= 2007),c("doc_id","cos_undrip32")] %>% arrange(desc(cos_undrip32)), n = 20)

# Sum Cosine Similarity

ungdc18 <- ungdc18 %>%
  mutate(cos_sum = rowSums(.[22:29]))

top_undrip <- head(ungdc18[ which(ungdc18$year >= 2008),c("doc_id","cos_sum")] %>% arrange(desc(cos_sum)), n = 20)

top_undrip

##             doc_id   cos_sum
## 1  CAN_72_2017.txt 1.8602805
## 2  DMA_64_2009.txt 0.4986725
## 3  VUT_68_2013.txt 0.4307465
## 4  PRY_63_2008.txt 0.4003682
## 5  BOL_63_2008.txt 0.3971559
## 6  ECU_72_2017.txt 0.3646951
## 7  PRY_69_2014.txt 0.3632261
## 8  BOL_64_2009.txt 0.3612142
## 9  VUT_66_2011.txt 0.3599404
## 10 PER_66_2011.txt 0.3585237
## 11 GTM_64_2009.txt 0.3448412
## 12 GTM_72_2017.txt 0.3338650
## 13 COG_65_2010.txt 0.3325180
## 14 BOL_70_2015.txt 0.3125577
## 15 BOL_65_2010.txt 0.2839817
## 16 AUS_65_2010.txt 0.2612376
## 17 DMA_71_2016.txt 0.2552000
## 18 VUT_70_2015.txt 0.2540069
## 19 SWZ_69_2014.txt 0.2430754
## 20 ECU_64_2009.txt 0.2416352

#stargazer(top_undrip, summary = FALSE)

CMD terms

# Environment 
ungdc18$cmd_environment <- ungdc_unnest %>% 
                    cast_dtm(term = word, 
                                document = doc_id, 
                                value = n, 
                                weighting = tm::weightTf) %>%
                            removeSparseTerms(.999) %>%
                    CMDist(cw =c("environment"),wv = my.wv) %>% select(,2)
ungdc18$cmd_environment <- unlist(ungdc18$cmd_environment)
ungdc18$cmd_environment <- as.numeric(ungdc18$cmd_environment)


# Indigenous
ungdc18$cmd_indigenous <- ungdc_unnest %>% 
                    cast_dtm(term = word, 
                                document = doc_id, 
                                value = n, 
                                weighting = tm::weightTf) %>%
                            removeSparseTerms(.999) %>%
                    CMDist(cw =c("indigenous"),wv = my.wv) %>% select(,2)
ungdc18$cmd_indigenous <- unlist(ungdc18$cmd_indigenous)
ungdc18$cmd_indigenous <- as.numeric(ungdc18$cmd_indigenous)


# Indigenous Environment

ungdc18$cmd_indenv <- ungdc_unnest %>% 
                    cast_dtm(term = word, 
                                document = doc_id, 
                                value = n, 
                                weighting = tm::weightTf) %>%
                            removeSparseTerms(.999) %>%
                    CMDist(cw =c("indigenous environment"),wv = my.wv) %>% select(,2)
ungdc18$cmd_indenv <- unlist(ungdc18$cmd_indenv)
ungdc18$cmd_indenv <- as.numeric(ungdc18$cmd_indenv)

# Colonialism

ungdc18$cmd_colonialism <- ungdc_unnest %>% 
                    cast_dtm(term = word, 
                                document = doc_id, 
                                value = n, 
                                weighting = tm::weightTf) %>%
                            removeSparseTerms(.999) %>%
                    CMDist(cw =c("colonialism"),wv = my.wv) %>% select(,2)
ungdc18$cmd_colonialism <- unlist(ungdc18$cmd_colonialism)
ungdc18$cmd_colonialism <- as.numeric(ungdc18$cmd_colonialism)

# Corporation

ungdc18$cmd_corporation <- ungdc_unnest %>% 
                    cast_dtm(term = word, 
                                document = doc_id, 
                                value = n, 
                                weighting = tm::weightTf) %>%
                            removeSparseTerms(.999) %>%
                    CMDist(cw =c("corporation"),wv = my.wv) %>% select(,2)
ungdc18$cmd_corporation <- unlist(ungdc18$cmd_corporation)
ungdc18$cmd_corporation <- as.numeric(ungdc18$cmd_corporation)


# Indigenous Corporation

ungdc18$cmd_indcorp <- ungdc_unnest %>% 
                    cast_dtm(term = word, 
                                document = doc_id, 
                                value = n, 
                                weighting = tm::weightTf) %>%
                            removeSparseTerms(.999) %>%
                    CMDist(cw =c("indigenous corporation"),wv = my.wv) %>% select(,2)
ungdc18$cmd_indcorp <- unlist(ungdc18$cmd_indcorp)
ungdc18$cmd_indcorp <- as.numeric(ungdc18$cmd_indcorp)

# Free Trade

ungdc18$cmd_freetrade <- ungdc_unnest %>% 
                    cast_dtm(term = word, 
                                document = doc_id, 
                                value = n, 
                                weighting = tm::weightTf) %>%
                            removeSparseTerms(.999) %>%
                    CMDist(cw =c("free trade"),wv = my.wv) %>% select(,2)
ungdc18$cmd_freetrade <- unlist(ungdc18$cmd_freetrade)
ungdc18$cmd_freetrade <- as.numeric(ungdc18$cmd_freetrade)

# Free Market

ungdc18$cmd_freemarket <- ungdc_unnest %>% 
                    cast_dtm(term = word, 
                                document = doc_id, 
                                value = n, 
                                weighting = tm::weightTf) %>%
                            removeSparseTerms(.999) %>%
                    CMDist(cw =c("free market"),wv = my.wv) %>% select(,2)
ungdc18$cmd_freemarket <- unlist(ungdc18$cmd_freemarket)
ungdc18$cmd_freemarket <- as.numeric(ungdc18$cmd_freemarket)


# Nature

ungdc18$cmd_nature <- ungdc_unnest %>% 
                    cast_dtm(term = word, 
                                document = doc_id, 
                                value = n, 
                                weighting = tm::weightTf) %>%
                            removeSparseTerms(.999) %>%
                    CMDist(cw =c("nature"),wv = my.wv) %>% select(,2)
ungdc18$cmd_nature <- unlist(ungdc18$cmd_nature)
ungdc18$cmd_nature <- as.numeric(ungdc18$cmd_nature)

# Environmental Stewardship

ungdc18$cmd_envsteward <- ungdc_unnest %>% 
                    cast_dtm(term = word, 
                                document = doc_id, 
                                value = n, 
                                weighting = tm::weightTf) %>%
                            removeSparseTerms(.999) %>%
                    CMDist(cw =c("environmental stewardship"),wv = my.wv) %>% select(,2)
ungdc18$cmd_envsteward <- unlist(ungdc18$cmd_envsteward)
ungdc18$cmd_envsteward <- as.numeric(ungdc18$cmd_envsteward)

# Indigenous Self-determination

ungdc18$cmd_indself <- ungdc_unnest %>% 
                    cast_dtm(term = word, 
                                document = doc_id, 
                                value = n, 
                                weighting = tm::weightTf) %>%
                            removeSparseTerms(.999) %>%
                    CMDist(cw =c("indigenous self-determination"),wv = my.wv) %>% select(,2)
ungdc18$cmd_indself <- unlist(ungdc18$cmd_indself)
ungdc18$cmd_indself <- as.numeric(ungdc18$cmd_indself)

# Socioeconomic Inequality

ungdc18$cmd_socinq <- ungdc_unnest %>% 
                    cast_dtm(term = word, 
                                document = doc_id, 
                                value = n, 
                                weighting = tm::weightTf) %>%
                            removeSparseTerms(.999) %>%
                    CMDist(cw =c("socioeconomic inequality"),wv = my.wv) %>% select(,2)
ungdc18$cmd_socinq <- unlist(ungdc18$cmd_socinq)
ungdc18$cmd_socinq <- as.numeric(ungdc18$cmd_socinq)

CMD Figures

# CMD to Environmental Stewardship ----
ungdc18[ which(ungdc18$UN_REGION != "OTHER"),] %>%
  ggplot(mapping = aes(x = year, y = cmd_envsteward, colour = UN_REGION)) +
  geom_point() +
  geom_smooth() +
  geom_hline(yintercept = 1, linetype = "dashed") +
  labs(title = "Concept Mover Distance to 'Environmental Stewardship'",subtitle = "Subset by UN Region", x = "year", y = "Conceptual Engagement with 'Environmental Stewardship'")

## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'

# Proportion Engagement with Environmental Stewardship ----
propeng_envsteward <- ungdc18[ which(ungdc18$UN_REGION != "OTHER"),] %>% 
  group_by(year, UN_REGION) %>%
  summarise(n_regionyear = n(),
            n_engage = sum(cmd_envsteward > 1))

## `summarise()` regrouping output by 'year' (override with `.groups` argument)

yeartotal <- propeng_envsteward %>%
  group_by(year) %>%
  summarise(n_total = sum(n_regionyear))

## `summarise()` ungrouping output (override with `.groups` argument)

propeng_envsteward <- full_join(propeng_envsteward,yeartotal)

## Joining, by = "year"

propeng_envsteward %>%
  group_by(year,UN_REGION) %>%
  summarise(propeng_region = (n_engage/n_total)) %>%
  ggplot(mapping = aes(x = year, y = propeng_region, fill = UN_REGION)) +
  geom_area() +
  labs(title = "Proportion of Speeches Highly Engaged with 'Environmental Stewardship'", subtitle = "Subset by UN Region", x = "year", y = "Proportion of Highly Engaged Speeches")

## `summarise()` regrouping output by 'year' (override with `.groups` argument)

# Indignous Self-determination ----
ungdc18[ which(ungdc18$UN_REGION != "OTHER"),] %>%
  ggplot(mapping = aes(x = year, y = cmd_indself, colour = UN_REGION)) +
  geom_point() +
  geom_smooth() +
  geom_hline(yintercept = 1, linetype = "dashed") +
  labs(title = "Concept Mover Distance to 'Indigenous Self-determination'")

## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'

# Proportion Engagement with Indigenous Self-Determination ----
propeng_indself <- ungdc18[ which(ungdc18$UN_REGION != "OTHER"),] %>% 
  group_by(year, UN_REGION) %>%
  summarise(n_regionyear = n(),
            n_engage = sum(cmd_indself > 1))

## `summarise()` regrouping output by 'year' (override with `.groups` argument)

yeartotal <- propeng_indself %>%
  group_by(year) %>%
  summarise(n_total = sum(n_regionyear))

## `summarise()` ungrouping output (override with `.groups` argument)

propeng_indself <- full_join(propeng_indself,yeartotal)

## Joining, by = "year"

propeng_indself %>%
  group_by(year,UN_REGION) %>%
  summarise(propeng_region = (n_engage/n_total)) %>%
  ggplot(mapping = aes(x = year, y = propeng_region, fill = UN_REGION)) +
  geom_area() +
  labs(title = "Proportion of Speeches Highly Engaged with 'Indigenous Self-Determination'", subtitle = "Subset by UN Region", x = "year", y = "Proportion of Highly Engaged Speeches")

## `summarise()` regrouping output by 'year' (override with `.groups` argument)

# Socioeconomic Inequality ----
ungdc18[ which(ungdc18$UN_REGION != "OTHER"),] %>%
  ggplot(mapping = aes(x = year, y = cmd_socinq, colour = UN_REGION)) +
  geom_point() +
  geom_smooth() +
  geom_hline(yintercept = 1, linetype = "dashed") +
  labs(title = "Concept Mover Distance to 'Socioeconomic Inequality'")

## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'

# Proportion Engagement with Socioeconomic Inequality ----
propeng_socinq <- ungdc18[ which(ungdc18$UN_REGION != "OTHER"),] %>% 
  group_by(year, UN_REGION) %>%
  summarise(n_regionyear = n(),
            n_engage = sum(cmd_socinq > 1))

## `summarise()` regrouping output by 'year' (override with `.groups` argument)

yeartotal <- propeng_socinq %>%
  group_by(year) %>%
  summarise(n_total = sum(n_regionyear))

## `summarise()` ungrouping output (override with `.groups` argument)

propeng_socinq <- full_join(propeng_socinq,yeartotal)

## Joining, by = "year"

propeng_socinq %>%
  group_by(year,UN_REGION) %>%
  summarise(propeng_region = (n_engage/n_total)) %>%
  ggplot(mapping = aes(x = year, y = propeng_region, fill = UN_REGION)) +
  geom_area() +
  labs(title = "Proportion of Speeches Highly Engaged with 'Socioeconomic Inequality'", subtitle = "Subset by UN Region", x = "year", y = "Proportion of Highly Engaged Speeches")

## `summarise()` regrouping output by 'year' (override with `.groups` argument)

Replication File: Global Environmental Governance

Cosine Similarity to UNDRIP:

Top Cosine Similarity

CMD terms

CMD Figures