APPENDIX

This appendix provides supplementary information for the thesis on Topic

APPENDIX A : Code for generating figures

DATA MANAGEMENT

Insert English to explain data management.

# ============================================================
# Loading Libraries
# ============================================================
library(dplyr)
library(stringr)
library(tidyr)
library(tibble)
library(readxl)
library(ggplot2)
library(scales)
library(tidytext)
library(quanteda)
library(patchwork)

# ============================================================
# Data Preparation and Management
# ============================================================
cop28_files <- list.files(pattern = "COP_28_comment_nr_.*\\.xlsx")
cop29_files <- list.files(pattern = "COP_29_comment_nr_.*\\.xlsx")

cop28 <- bind_rows(lapply(cop28_files, read_excel)) %>% mutate(session = "COP28")
cop29 <- bind_rows(lapply(cop29_files, read_excel)) %>% mutate(session = "COP29")

all_comments <- bind_rows(cop28, cop29) %>%
  transmute(
    session,
    conf = session,
    comment_content = as.character(comment_content)
  ) %>%
  mutate(
    comment_content = str_to_lower(comment_content),
    comment_content = str_replace_all(comment_content, "http\\S+|www\\S+", " "),
    comment_content = str_replace_all(comment_content, "[^a-z\\s]", " "),
    comment_content = str_squish(comment_content)
  ) %>%
  filter(!is.na(comment_content), nchar(comment_content) > 0)

custom_stop <- c(
  "comment", "deleted", "removed", "http", "https", "www", "reddit",
  "cop28", "cop29", "bla", "alex", "newman", "baker", "creek", "gates",
  "zayed", "sunak", "anderson", "sultan", "charles", "aliyev", "kerry",
  "starmer", "gore", "pannier", "soylent", "sincerely", "gotta", "rent",
  "kool", "aid", "baku", "karabakh", "azerbaijani", "archived", "version",
  "charts", "ourworldindata", "worldometers", "freedomhouse", "na",
  "thrown", "dioxide", "prematurely", "billion"
)

all_stopwords <- bind_rows(
  stop_words,
  tibble(word = custom_stop, lexicon = "custom")
)

bigrams_sep <- all_comments %>%
  unnest_tokens(bigram, comment_content, token = "ngrams", n = 2) %>%
  filter(!is.na(bigram)) %>%
  separate(bigram, c("word1", "word2"), sep = " ") %>%
  filter(
    !word1 %in% all_stopwords$word,
    !word2 %in% all_stopwords$word,
    !str_detect(word1, "\\d"),
    !str_detect(word2, "\\d")
  ) %>%
  unite(bigram, word1, word2, sep = " ") %>%
  mutate(bigram = case_when(
    bigram == "fossil fuels"         ~ "fossil fuel",
    bigram == "greenhouse gases"     ~ "greenhouse gas",
    bigram == "developing nations"   ~ "developing nation",
    bigram == "developing countries" ~ "developing country",
    bigram == "carbon emissions"     ~ "carbon emission",
    bigram == "gas emissions"        ~ "gas emission",
    bigram == "poor nations"         ~ "poor nation",
    TRUE ~ bigram
  ))

bigrams_themed <- bigrams_sep %>%
  mutate(theme = case_when(
    str_detect(bigram, "finance|financing|investment|fund|funding|ncqg|donor|aid|grant|accelerator|guilt") ~ "finance",
    str_detect(bigram, "equity|justice|developing|poor nation|global south|vulnerable|responsibility")     ~ "justice",
    str_detect(bigram, "renewable|fossil|fuel|oil|gas|coal|solar|wind|nuclear|electricity|greenhouse")     ~ "energy",
    str_detect(bigram, "adaptation|resilience|flood|drought|disaster|sea level|heat wave|damage|vulnerability") ~ "adaptation",
    str_detect(bigram, "carbon|market|pricing|technology|ccs|emission|neutral")                            ~ "markets",
    TRUE ~ NA_character_
  )) %>%
  filter(!is.na(theme))

# Top thematic bigrams (Figure 1)
top_bigrams_plot <- bigrams_themed %>%
  count(session, bigram, sort = TRUE) %>%
  group_by(session) %>%
  slice_max(n, n = 10, with_ties = FALSE) %>%
  ungroup() %>%
  mutate(
    bigram = str_to_title(bigram),
    bigram = reorder_within(bigram, n, session)
  )

# Frequency per 10k for the diverging-shift figure
bigram_freq <- bigrams_sep %>%
  count(session, bigram) %>%
  group_by(session) %>%
  mutate(
    total       = sum(n),
    freq_per10k = n / total * 10000
  ) %>%
  ungroup()

corp <- corpus(all_comments, text_field = "comment_content")

toks <- tokens(corp,
               remove_punct   = TRUE,
               remove_numbers = TRUE,
               remove_symbols = TRUE) %>%
  tokens_tolower() %>%
  tokens_remove(stopwords("en")) %>%
  tokens_remove(pattern = c("https", "http", "www", "wikipedia", "wiki", "en")) %>%
  tokens_keep(min_nchar = 3)

toks_bg <- tokens_ngrams(toks, n = 2)
dfm_bg  <- dfm(toks_bg)
dfm_g   <- dfm_group(dfm_bg, groups = all_comments$conf)

# Counts per group (used for cross-corpus appearance)
counts_long <- convert(dfm_g, to = "data.frame") %>%
  rename(conf = doc_id) %>%
  pivot_longer(-conf, names_to = "bigram", values_to = "count") %>%
  mutate(bigram = str_replace_all(bigram, "_", " "))

totals <- counts_long %>%
  group_by(conf) %>%
  summarise(total_bigrams = sum(count, na.rm = TRUE), .groups = "drop")

# TF-IDF
tfidf <- dfm_tfidf(dfm_g)
tfidf_long <- convert(tfidf, to = "data.frame") %>%
  rename(conf = doc_id) %>%
  pivot_longer(-conf, names_to = "bigram", values_to = "tf_idf") %>%
  filter(tf_idf > 0) %>%
  mutate(bigram = str_replace_all(bigram, "_", " ")) %>%
  filter(str_detect(bigram, " "))

themes <- list(
  finance    = c("finance", "financing", "investment", "ncqg", "fund", "funding"),
  justice    = c("equity", "justice", "climate justice", "cbdrrc", "responsibility"),
  energy     = c("energy", "renewable", "fossil", "fuel", "oil", "gas", "coal"),
  adaptation = c("adaptation", "resilience", "loss", "damage", "vulnerability"),
  markets    = c("carbon", "market", "pricing", "article", "technology", "ccs")
)

theme_lookup <- tibble(
  theme   = rep(names(themes), times = sapply(themes, length)),
  keyword = unlist(themes)
) %>%
  mutate(keyword = str_to_lower(keyword))

theme_order <- names(themes)

# Tag policy-relevant bigrams
policy_bigrams_one <- tfidf_long %>%
  crossing(theme_lookup) %>%
  filter(str_detect(bigram, fixed(keyword))) %>%
  distinct(conf, bigram, tf_idf, theme) %>%
  mutate(theme = factor(theme, levels = theme_order)) %>%
  arrange(conf, bigram, theme) %>%
  group_by(conf, bigram) %>%
  slice(1) %>%
  ungroup()

top_policy_phrases_top10 <- policy_bigrams_one %>%
  group_by(conf) %>%
  slice_max(tf_idf, n = 10, with_ties = FALSE) %>%
  ungroup() %>%
  mutate(bigram = str_to_sentence(bigram))

score_terms_across <- function(terms, source_conf, target_conf,
                               counts_long, tfidf_long, totals) {
  src_total <- totals$total_bigrams[totals$conf == source_conf]
  tgt_total <- totals$total_bigrams[totals$conf == target_conf]

  tibble(bigram = terms) %>%
    left_join(counts_long %>% filter(conf == source_conf) %>%
                select(bigram, src_count = count), by = "bigram") %>%
    left_join(counts_long %>% filter(conf == target_conf) %>%
                select(bigram, tgt_count = count), by = "bigram") %>%
    left_join(tfidf_long %>% filter(conf == source_conf) %>%
                select(bigram, src_tfidf = tf_idf), by = "bigram") %>%
    left_join(tfidf_long %>% filter(conf == target_conf) %>%
                select(bigram, tgt_tfidf = tf_idf), by = "bigram") %>%
    mutate(
      src_count       = replace_na(src_count, 0),
      tgt_count       = replace_na(tgt_count, 0),
      src_per10k      = (src_count / src_total) * 10000,
      tgt_per10k      = (tgt_count / tgt_total) * 10000,
      carryover_ratio = ifelse(src_per10k == 0, NA_real_, tgt_per10k / src_per10k)
    ) %>%
    arrange(desc(tgt_per10k), desc(tgt_count))
}

cop28_terms <- c(
  "renewable share", "natural gas", "coal power", "coal production",
  "coal electricity", "coal based", "buying oil", "blocking progress",
  "baffling oil", "authoritarian oil"
)

cop29_terms <- c(
  "developing nation", "developing country", "russian gas", "oil drilling",
  "expanding coal", "developing world", "burn coal", "actual action",
  "accelerator fund", "greenhouse gas", "gas emissions"
)

cop28_in_cop29 <- score_terms_across(
  terms = cop28_terms,
  source_conf = "COP28", target_conf = "COP29",
  counts_long = counts_long, tfidf_long = tfidf_long, totals = totals
)

cop29_in_cop28 <- score_terms_across(
  terms = cop29_terms,
  source_conf = "COP29", target_conf = "COP28",
  counts_long = counts_long, tfidf_long = tfidf_long, totals = totals
)

plot_28_shift <- cop28_in_cop29 %>%
  select(bigram, src_per10k, tgt_per10k) %>%
  pivot_longer(c(src_per10k, tgt_per10k), names_to = "where", values_to = "per10k") %>%
  mutate(
    COP = recode(where,
                 src_per10k = "COP28 (original corpus)",
                 tgt_per10k = "COP29 (appearance of COP28 terms)"),
    bigram = str_to_sentence(bigram)
  )

plot_29_shift <- cop29_in_cop28 %>%
  select(bigram, src_per10k, tgt_per10k) %>%
  pivot_longer(c(src_per10k, tgt_per10k), names_to = "where", values_to = "per10k") %>%
  mutate(
    COP = recode(where,
                 src_per10k = "COP29 (original corpus)",
                 tgt_per10k = "COP28 (appearance of COP29 terms)"),
    bigram = str_to_sentence(bigram)
  )

relevant_bigrams <- c(
  "renewable share", "natural gas", "coal power",
  "coal production", "coal electricity", "coal based",
  "buying oil", "blocking progress",
  "developing nation", "developing country", "developing world",
  "greenhouse gas", "gas emissions", "russian gas",
  "oil drilling", "burn coal", "expanding coal",
  "accelerator fund"
)

diverging_data <- bigram_freq %>%
  filter(bigram %in% relevant_bigrams) %>%
  select(session, bigram, freq_per10k) %>%
  pivot_wider(names_from = session, values_from = freq_per10k, values_fill = 0) %>%
  mutate(
    change    = COP29 - COP28,
    direction = if_else(change > 0,
                        "More prominent in COP29",
                        "More prominent in COP28"),
    bigram    = str_to_sentence(bigram)
  )

comment_sentiment <- all_comments %>%
  mutate(id = row_number()) %>%
  unnest_tokens(word, comment_content) %>%
  left_join(get_sentiments("bing"), by = "word") %>%
  group_by(session, id) %>%
  summarise(
    pos = sum(sentiment == "positive", na.rm = TRUE),
    neg = sum(sentiment == "negative", na.rm = TRUE),
    .groups = "drop"
  ) %>%
  mutate(label = case_when(
    pos > neg ~ "positive",
    neg > pos ~ "negative",
    TRUE      ~ "neutral"
  ))

sentiment_all <- comment_sentiment %>%
  count(session, label) %>%
  group_by(session) %>%
  mutate(proportion = n / sum(n)) %>%
  ungroup() %>%
  rename(sentiment = label) %>%
  mutate(
    sentiment = str_to_sentence(sentiment),
    sentiment = factor(sentiment, levels = c("Negative", "Neutral", "Positive"))
  )

nrc_filtered <- get_sentiments("nrc") %>%
  filter(!sentiment %in% c("positive", "negative"))

nrc_scores <- all_comments %>%
  unnest_tokens(word, comment_content) %>%
  inner_join(nrc_filtered, by = "word", relationship = "many-to-many") %>%
  count(session, sentiment) %>%
  group_by(session) %>%
  mutate(proportion = n / sum(n)) %>%
  ungroup() %>%
  mutate(sentiment = str_to_sentence(sentiment))

Figures in Thesis ….title

Figure 1: Top Thematic Bigrams

twophrases <- top_bigrams_plot %>%
  ggplot(aes(x = n, y = bigram, fill = session)) +
  geom_col(show.legend = FALSE) +
  facet_wrap(~ session, scales = "free_y") +
  scale_y_reordered() +
  scale_fill_manual(values = c("COP28" = "#298c8c", "COP29" = "#9fc8c8")) +
  labs(x = "Frequency", y = "Two Word Policy Phrases") +
  theme_minimal(base_family = "Times New Roman", base_size = 12) +
  theme(
    axis.title.y       = element_text(size = 13, margin = margin(r = 15)),
    axis.title.x       = element_text(size = 13, margin = margin(t = 12)),
    axis.text          = element_text(size = 12),
    panel.spacing.x    = unit(1.5, "lines"),
    strip.text         = element_text(size = 12),
    panel.grid.minor   = element_blank(),
    panel.grid.major.y = element_blank(),
    plot.margin        = margin(10, 15, 10, 10)
  )

twophrases

ggsave(twophrases,
       filename = "~/Desktop/Plots_thesis_20260424/twowordpolicyphrases.png",
       width = 9, height = 6, dpi = 300)

Figure 2: Distinctiveness of Policy Phrases (Bigram TF-IDF)

cop_year_colors <- c("COP28" = "#9fc8c8", "COP29" = "#298c8c")

distinct10 <- ggplot(top_policy_phrases_top10,
                     aes(x = tf_idf,
                         y = reorder_within(bigram, tf_idf, conf),
                         fill = conf)) +
  geom_col(show.legend = FALSE) +
  facet_wrap(~ conf, scales = "free_y") +
  scale_y_reordered() +
  scale_fill_manual(values = cop_year_colors) +
  labs(x = "TF-IDF (distinctiveness by conference)", y = "Policy Concept") +
  theme_minimal(base_family = "Times New Roman", base_size = 12) +
  theme(
    axis.title.y       = element_text(size = 13, margin = margin(r = 15)),
    axis.title.x       = element_text(size = 13, margin = margin(t = 12)),
    axis.text          = element_text(size = 12),
    panel.spacing.x    = unit(1.5, "lines"),
    strip.text         = element_text(size = 12),
    panel.grid.minor   = element_blank(),
    panel.grid.major.y = element_blank(),
    plot.margin        = margin(10, 15, 10, 10)
  )

distinct10

ggsave(distinct10,
       filename = "~/Desktop/Plots_thesis_20260424/distinct10.png",
       width = 10, height = 7, dpi = 300)

Figure 3: COP29 Distinctive Phrases — Original vs Appearance in COP28

phrases29 <- ggplot(plot_29_shift,
                    aes(x = per10k,
                        y = reorder(bigram, per10k, FUN = max),
                        fill = COP)) +
  geom_col(position = position_dodge(width = 0.75), width = 0.7) +
  scale_fill_manual(values = c(
    "COP29 (original corpus)"           = "#298c8c",
    "COP28 (appearance of COP29 terms)" = "#9fc8c8"
  )) +
  labs(x = "Frequency (per 10k bigrams)", y = "Distinctive Phrases") +
  theme_minimal(base_family = "Times New Roman", base_size = 12) +
  theme(
    axis.title.y       = element_text(size = 13, margin = margin(r = 15)),
    axis.title.x       = element_text(size = 13, margin = margin(t = 12)),
    axis.text          = element_text(size = 12),
    panel.spacing.x    = unit(1.5, "lines"),
    strip.text         = element_text(size = 12),
    panel.grid.minor   = element_blank(),
    panel.grid.major.y = element_blank(),
    plot.margin        = margin(10, 15, 10, 10)
  )

phrases29

ggsave(phrases29,
       filename = "~/Desktop/Plots_thesis_20260424/Phrases29in28.png",
       width = 9, height = 6, dpi = 300)

Figure 4: COP28 Distinctive Phrases — Original vs Appearance in COP29

phrases28 <- ggplot(plot_28_shift,
                    aes(x = per10k,
                        y = reorder(bigram, per10k, FUN = max),
                        fill = COP)) +
  geom_col(position = position_dodge(width = 0.75), width = 0.7) +
  scale_fill_manual(values = c(
    "COP28 (original corpus)"           = "#298c8c",
    "COP29 (appearance of COP28 terms)" = "#9fc8c8"
  )) +
  labs(x = "Frequency (per 10k bigrams)", y = "Distinctive Phrases") +
  theme_minimal(base_family = "Times New Roman", base_size = 12) +
  theme(
    axis.title.y       = element_text(size = 13, margin = margin(r = 15)),
    axis.title.x       = element_text(size = 13, margin = margin(t = 12)),
    axis.text          = element_text(size = 12),
    panel.spacing.x    = unit(1.5, "lines"),
    strip.text         = element_text(size = 12),
    panel.grid.minor   = element_blank(),
    panel.grid.major.y = element_blank(),
    plot.margin        = margin(10, 15, 10, 10)
  )

phrases28

ggsave(phrases28,
       filename = "~/Desktop/Plots_thesis_20260424/Phrases28in29.png",
       width = 9, height = 6, dpi = 300)

Figure 5: Direction and Magnitude of Phrase Shift (COP28 → COP29)

shift_max <- ceiling(max(abs(diverging_data$change), na.rm = TRUE) / 2) * 2

shift <- ggplot(diverging_data,
                aes(x = change,
                    y = reorder(bigram, change),
                    fill = direction)) +
  geom_col(width = 0.7) +
  geom_vline(xintercept = 0, linewidth = 0.8, color = "grey30") +
  scale_fill_manual(values = c(
    "More prominent in COP29" = "#298c8c",
    "More prominent in COP28" = "#9fc8c8"
  )) +
  scale_x_continuous(
    limits = c(-30, 50),
    breaks = seq(-30, 50, by = 10),
    expand = c(0, 0)
  ) +
  labs(
    x    = "Change in Frequency (per 10,000 bigrams)",
    y    = "Bigram Phrase",
    fill = NULL
  ) +
  theme_classic(base_family = "Times New Roman", base_size = 12) +
  theme(
    axis.title.y      = element_text(size = 13, margin = margin(r = 15)),
    axis.title.x      = element_text(size = 13, margin = margin(t = 12)),
    axis.text         = element_text(size = 12),
    axis.line.x       = element_line(color = "grey30", linewidth = 0.5),
    axis.ticks.x      = element_line(color = "grey30"),
    axis.ticks.length = unit(0.25, "cm"),
    panel.grid        = element_blank(),
    plot.margin       = margin(10, 20, 10, 10),
    legend.position   = "bottom",
    legend.text       = element_text(size = 12)
  )

shift

ggsave(shift,
       filename = "~/Desktop/Plots_thesis_20260424/shift.png",
       width = 9, height = 7, dpi = 300)

Figure 6: Sentiment Distribution (Bing Lexicon)

Sentiment <- ggplot(sentiment_all,
                    aes(x = sentiment, y = proportion, fill = session)) +
  geom_col(position = "dodge", width = 0.6) +
  scale_fill_manual(values = c("COP28" = "#298c8c", "COP29" = "#9fc8c8")) +
  scale_y_continuous(
    labels = label_percent(suffix = ""),
    limits = c(0, 0.55),
    expand = expansion(mult = c(0, 0.05))
  ) +
  labs(
    x    = "Sentiment Category",
    y    = "Proportion of Comments (Percent)",
    fill = "COP"
  ) +
  theme_minimal(base_family = "Times New Roman", base_size = 12) +
  theme(
    axis.title.y       = element_text(size = 13, margin = margin(r = 15)),
    axis.title.x       = element_text(size = 13, margin = margin(t = 12)),
    axis.text          = element_text(size = 12),
    panel.grid.minor   = element_blank(),
    panel.grid.major.y = element_blank(),
    plot.margin        = margin(10, 15, 10, 10),
    aspect.ratio       = 1.8
  )

Sentiment

ggsave(Sentiment,
       filename = "~/Desktop/Plots_thesis_20260424/Sentimentdistribution.png",
       width = 8, height = 6, dpi = 300)

Figure 7: NRC Emotion Distribution

emotion <- ggplot(nrc_scores,
                  aes(y = reorder(sentiment, proportion, FUN = max),
                      x = proportion,
                      fill = session)) +
  geom_col(position = position_dodge(width = 0.8), width = 0.75) +
  scale_fill_manual(values = c("COP28" = "#298c8c", "COP29" = "#9fc8c8")) +
  scale_x_continuous(
    labels = label_percent(suffix = ""),
    limits = c(0, 0.25),
    expand = expansion(mult = c(0, 0.05))
  ) +
  labs(
    x    = "Proportion of detected emotion words (Percent)",
    y    = "NRC Emotion",
    fill = "COP"
  ) +
  theme_minimal(base_family = "Times New Roman", base_size = 12) +
  theme(
    axis.title.y       = element_text(size = 13, margin = margin(r = 15)),
    axis.title.x       = element_text(size = 13, margin = margin(t = 12)),
    axis.text          = element_text(size = 12),
    panel.grid.minor   = element_blank(),
    panel.grid.major.y = element_blank(),
    panel.grid.major.x = element_line(color = "grey90"),
    plot.margin        = margin(10, 15, 10, 10),
    legend.position    = "right",
    legend.title       = element_text(face = "bold")
  )

emotion

ggsave(emotion,
       filename = "~/Desktop/Plots_thesis_20260424/emotion.png",
       width = 9, height = 6, dpi = 300)

Put Thesis title

Miriam ……..

2026-04-25

APPENDIX

APPENDIX A : Code for generating figures

DATA MANAGEMENT

Figures in Thesis ….title

Figure 1: Top Thematic Bigrams

Figure 2: Distinctiveness of Policy Phrases (Bigram TF-IDF)

Figure 3: COP29 Distinctive Phrases — Original vs Appearance in COP28

Figure 4: COP28 Distinctive Phrases — Original vs Appearance in COP29

Figure 5: Direction and Magnitude of Phrase Shift (COP28 → COP29)

Figure 6: Sentiment Distribution (Bing Lexicon)

Figure 7: NRC Emotion Distribution