COST Action CA24121 — LLMs × Knowledge Graphs

---
title: "COST Action CA24121 — LLMs × Knowledge Graphs"
subtitle: "WG2: Domain-specific Tasks of LLMs Based on KGs"
output:
  flexdashboard::flex_dashboard:
    orientation: rows
    vertical_layout: scroll
    theme:
      version: 4
      bootswatch: flatly
    navbar:
      - { title: "CA24121", href: "https://www.cost.eu/actions/CA24121/", align: right }
    source_code: embed
---

```{css custom-styles}
/* ── Typography & base ── */
@import url('https://fonts.googleapis.com/css2?family=DM+Sans:wght@400;500;700&family=JetBrains+Mono:wght@400;500&display=swap');

body {
  font-family: 'DM Sans', sans-serif;
  background: #f5f6fa;
}

.navbar {
  background: linear-gradient(135deg, #1b1f3b 0%, #2d3561 100%) !important;
  border-bottom: 3px solid #6c5ce7;
  box-shadow: 0 2px 12px rgba(27,31,59,0.3);
}

.navbar-brand { font-weight: 700; letter-spacing: 0.6px; }

/* ── Value boxes ── */
.value-box {
  border-radius: 10px;
  box-shadow: 0 3px 12px rgba(0,0,0,0.07);
  border: none;
}
.value-box .inner { padding: 14px 18px; }
.value-box .value { font-size: 34px; font-weight: 700; font-family: 'JetBrains Mono', monospace; }
.value-box .caption {
  font-size: 12px; font-weight: 600;
  text-transform: uppercase; letter-spacing: 1px;
}

/* ── Chart panels ── */
.chart-wrapper {
  border-radius: 10px;
  box-shadow: 0 2px 10px rgba(0,0,0,0.05);
  background: white;
}
.chart-title {
  font-weight: 700; font-size: 13.5px;
  color: #1b1f3b; letter-spacing: 0.3px;
  border-bottom: 2px solid #6c5ce7;
  padding-bottom: 5px;
}

/* ── DT tables ── */
.dataTables_wrapper { font-size: 12px; }
table.dataTable thead th {
  background: linear-gradient(135deg, #1b1f3b 0%, #2d3561 100%) !important;
  color: white !important;
  font-weight: 600; font-size: 11px;
  text-transform: uppercase; letter-spacing: 0.5px;
}
table.dataTable tbody tr:hover { background-color: #f0edff !important; }
table.dataTable.stripe tbody tr.odd { background-color: #fafaff; }
```

```{r setup, include=FALSE}
library(flexdashboard)
library(tidyverse)
library(plotly)
library(DT)
library(scales)

knitr::opts_chunk$set(echo = FALSE, warning = FALSE, message = FALSE)

# ── Colour system ──
pal <- list(
  navy    = "#1b1f3b",
  purple  = "#6c5ce7",
  violet  = "#a29bfe",
  teal    = "#00b894",
  coral   = "#e17055",
  amber   = "#fdcb6e",
  slate   = "#636e72",
  rose    = "#d63031",
  blue    = "#0984e3",
  mint    = "#55efc4",
  bg      = "#f5f6fa"
)

pal_wg <- c(
  "WG1" = "#0984e3",
  "WG2" = "#6c5ce7",
  "WG3" = "#00b894",
  "WG4" = "#fdcb6e",
  "WG5" = "#e17055",
  "WG6" = "#d63031"
)

# ── Load data ──
df_all <- read.csv2("WG_applications_export_CA24121.csv",
                    fileEncoding = "UTF-8-BOM", sep = ";",
                    quote = '"', stringsAsFactors = FALSE)

# ── WG column mapping ──
wg_cols <- grep("^WG", names(df_all), value = TRUE)
wg_short <- paste0("WG", 1:length(wg_cols))
names(wg_cols) <- wg_short

wg_labels <- c(
  "WG1" = "Augmenting LLMs with KGs",
  "WG2" = "Domain-specific Tasks (LLMs + KGs)",
  "WG3" = "KG Construction assisted by LLMs",
  "WG4" = "Multilinguality for KGs and LLMs",
  "WG5" = "Bias and Ethics",
  "WG6" = "Evaluation & Validation Frameworks"
)

# ── Convert y/n → logical ──
for (col in wg_cols) df_all[[col]] <- tolower(trimws(df_all[[col]])) == "y"

# ── Clean fields ──
df_all$country     <- gsub('"', '', df_all$country)
df_all$country_iso <- gsub(".*\\(([A-Z]{2})\\).*", "\\1", df_all$country)
df_all$country_name <- gsub("\\s*\\([A-Z]{2}\\)\\s*", "", df_all$country)
df_all$affiliation <- gsub('^"|"$', '', df_all$affiliation)
df_all$n_wgs       <- rowSums(df_all[, wg_cols])

# ── WG2 subset ──
wg2_col <- wg_cols["WG2"]
df <- df_all %>% filter(!!sym(wg2_col))

# ── Other WG flags for WG2 members ──
df$other_wgs <- apply(df[, wg_cols], 1, function(x) {
  paste(wg_short[x], collapse = ", ")
})
```

# WG2 Overview {data-icon="fa-chart-line"}

## Row {data-height=115}

### WG2 Members {.value-box}

```{r}
valueBox(nrow(df), icon = "fa-users", color = pal$purple)
```

### Approved {.value-box}

```{r}
valueBox(sum(df$status == "approved"), icon = "fa-check-circle", color = pal$teal)
```

### Pending {.value-box}

```{r}
valueBox(sum(df$status == "submitted"), icon = "fa-clock", color = pal$amber)
```

### Countries {.value-box}

```{r}
valueBox(n_distinct(df$country_iso), icon = "fa-globe-europe", color = pal$blue)
```

### Female % {.value-box}

```{r}
pf <- round(100 * sum(df$gender == "Female") / nrow(df), 1)
valueBox(paste0(pf, "%"), icon = "fa-venus", color = pal$coral)
```

### Young Researchers {.value-box}

```{r}
py <- round(100 * sum(tolower(df$youngResearcher) == "y") / nrow(df), 1)
valueBox(paste0(py, "%"), icon = "fa-seedling", color = pal$navy)
```

## Row {data-height=410}

### Geographic Distribution (Top 15 Countries)

```{r}
geo <- df %>%
  count(country_name, country_iso, sort = TRUE) %>%
  head(15) %>%
  arrange(n)

plot_ly(geo,
  y = ~reorder(paste0(country_name, " (", country_iso, ")"), n),
  x = ~n, type = "bar", orientation = "h",
  marker = list(
    color = colorRampPalette(c(pal$violet, pal$navy))(15),
    line = list(color = pal$navy, width = 0.5)
  ),
  text = ~n, textposition = "outside",
  hoverinfo = "y+x"
) %>%
  layout(
    xaxis = list(title = "Applicants", gridcolor = "#eee", zeroline = FALSE),
    yaxis = list(title = "", tickfont = list(size = 11)),
    margin = list(l = 180), plot_bgcolor = "white", paper_bgcolor = "white"
  ) %>% config(displayModeBar = FALSE)
```

### Cross-WG Membership of WG2 Members

```{r}
cross <- tibble(
  wg = wg_short,
  label = wg_labels[wg_short],
  n  = sapply(wg_cols, function(col) sum(df[[col]]))
)

# Highlight WG2
cross$is_wg2 <- cross$wg == "WG2"

plot_ly(cross,
  x = ~wg, y = ~n, type = "bar",
  marker = list(
    color = ifelse(cross$is_wg2, pal$purple, pal$violet),
    line  = list(color = pal$navy, width = 1)
  ),
  text = ~paste0(wg, ": ", label, "<br><b>", n, "</b> WG2 members also in this WG"),
  hoverinfo = "text"
) %>%
  layout(
    xaxis = list(title = ""),
    yaxis = list(title = "WG2 Members", gridcolor = "#eee"),
    plot_bgcolor = "white", paper_bgcolor = "white",
    annotations = list(
      x = "WG2", y = max(cross$n) + 5,
      text = "← WG2 total", showarrow = FALSE,
      font = list(size = 10, color = pal$purple)
    )
  ) %>% config(displayModeBar = FALSE)
```

## Row {data-height=390}

### Demographics Breakdown

```{r}
# Gender × YR stacked
demo <- df %>%
  mutate(
    YR = ifelse(tolower(youngResearcher) == "y", "Young Researcher", "Established"),
    Gender = gender
  ) %>%
  count(Gender, YR)

plot_ly(demo,
  x = ~Gender, y = ~n, color = ~YR,
  colors = c("Young Researcher" = pal$teal, "Established" = pal$navy),
  type = "bar",
  text = ~n, textposition = "inside",
  hoverinfo = "x+text+name"
) %>%
  layout(
    barmode = "stack",
    xaxis = list(title = ""),
    yaxis = list(title = "Applicants", gridcolor = "#eee"),
    legend = list(orientation = "h", y = -0.15, x = 0.15),
    plot_bgcolor = "white", paper_bgcolor = "white"
  ) %>% config(displayModeBar = FALSE)
```

### ITC vs Non-ITC

```{r}
itc_df <- df %>%
  mutate(ITC = ifelse(tolower(itc) == "y", "ITC Country", "Non-ITC")) %>%
  count(ITC) %>%
  mutate(pct = round(100 * n / sum(n), 1))

plot_ly(itc_df,
  labels = ~ITC, values = ~n, type = "pie", hole = 0.55,
  textinfo = "label+value+percent",
  textfont = list(size = 13, family = "DM Sans"),
  marker = list(
    colors = c("ITC Country" = pal$purple, "Non-ITC" = pal$slate),
    line = list(color = "white", width = 2)
  )
) %>%
  layout(
    showlegend = FALSE,
    annotations = list(
      text = paste0("<b>", nrow(df), "</b><br>WG2"),
      showarrow = FALSE,
      font = list(size = 15, color = pal$navy)
    ),
    plot_bgcolor = "white", paper_bgcolor = "white"
  ) %>% config(displayModeBar = FALSE)
```

### Multi-WG Engagement

```{r}
wg_dist <- df %>%
  count(n_wgs) %>%
  mutate(pct = round(100 * n / sum(n), 1))

grad_colors <- colorRampPalette(c(pal$violet, pal$purple, pal$navy))(nrow(wg_dist))

plot_ly(wg_dist,
  x = ~factor(n_wgs), y = ~n, type = "bar",
  marker = list(color = grad_colors, line = list(color = pal$navy, width = 0.8)),
  text = ~paste0(n, " (", pct, "%)"),
  textposition = "outside", hoverinfo = "text"
) %>%
  layout(
    xaxis = list(title = "Number of WGs joined"),
    yaxis = list(title = "Applicants", gridcolor = "#eee"),
    plot_bgcolor = "white", paper_bgcolor = "white"
  ) %>% config(displayModeBar = FALSE)
```


# Expertise {data-icon="fa-brain"}

## Row {data-height=460}

### Scientific Expertise Areas (Top 12)

```{r fig.height=5}
# Parse semicolon-separated expertise fields
expertise_list <- df$scientificExpertise %>%
  strsplit(";") %>%
  unlist() %>%
  trimws() %>%
  .[. != ""]

exp_counts <- as.data.frame(table(expertise_list), stringsAsFactors = FALSE) %>%
  rename(field = expertise_list, n = Freq) %>%
  arrange(desc(n)) %>%
  head(12) %>%
  arrange(n)

# Truncate long labels
exp_counts$label <- ifelse(
  nchar(exp_counts$field) > 55,
  paste0(substr(exp_counts$field, 1, 52), "..."),
  exp_counts$field
)

plot_ly(exp_counts,
  y = ~reorder(label, n), x = ~n,
  type = "bar", orientation = "h",
  marker = list(
    color = colorRampPalette(c(pal$mint, pal$teal, pal$navy))(12),
    line = list(color = pal$navy, width = 0.5)
  ),
  text = ~paste0("<b>", field, "</b><br>", n, " applicants"),
  hoverinfo = "text"
) %>%
  layout(
    xaxis = list(title = "Applicants", gridcolor = "#eee"),
    yaxis = list(title = "", tickfont = list(size = 10.5)),
    margin = list(l = 320),
    plot_bgcolor = "white", paper_bgcolor = "white"
  ) %>% config(displayModeBar = FALSE)
```

### COST Action Experience

```{r}
cost_exp <- df %>%
  mutate(
    running = as.numeric(runningCOSTActionsWGsCount),
    ended   = as.numeric(endedCOSTActionsWGsCount)
  ) %>%
  mutate(
    running = ifelse(is.na(running), 0, running),
    ended   = ifelse(is.na(ended), 0, ended),
    total_cost = running + ended,
    experience = case_when(
      total_cost == 0 ~ "First COST Action",
      total_cost <= 2 ~ "1–2 prior Actions",
      total_cost <= 5 ~ "3–5 prior Actions",
      TRUE            ~ "6+ prior Actions"
    )
  )

cost_exp$experience <- factor(cost_exp$experience,
  levels = c("First COST Action", "1–2 prior Actions", "3–5 prior Actions", "6+ prior Actions"))

cost_summary <- cost_exp %>% count(experience) %>% mutate(pct = round(100 * n / sum(n), 1))

plot_ly(cost_summary,
  x = ~experience, y = ~n, type = "bar",
  marker = list(
    color = c(pal$violet, pal$blue, pal$teal, pal$navy),
    line  = list(color = pal$navy, width = 0.8)
  ),
  text = ~paste0(n, " (", pct, "%)"),
  textposition = "outside", hoverinfo = "text"
) %>%
  layout(
    xaxis = list(title = "", tickfont = list(size = 11)),
    yaxis = list(title = "Applicants", gridcolor = "#eee"),
    plot_bgcolor = "white", paper_bgcolor = "white"
  ) %>% config(displayModeBar = FALSE)
```

## Row {data-height=420}

### Expertise × Country Heatmap (Top 6 Expertise × Top 10 Countries)

```{r fig.height=4.5}
# Top 6 expertise fields
top_exp <- as.data.frame(table(
  unlist(strsplit(df$scientificExpertise, ";")) %>% trimws() %>% .[. != ""]
), stringsAsFactors = FALSE) %>%
  rename(field = Var1, n = Freq) %>%
  arrange(desc(n)) %>%
  head(6)

# Top 10 countries
top_ctry <- df %>% count(country_iso, sort = TRUE) %>% head(10)

# Build matrix
heat_data <- expand.grid(
  field = top_exp$field,
  country = top_ctry$country_iso,
  stringsAsFactors = FALSE
)

heat_data$n <- mapply(function(f, c) {
  sum(grepl(fixed(f), df$scientificExpertise[df$country_iso == c], fixed = TRUE))
}, heat_data$field, heat_data$country)

# Short labels
heat_data$field_short <- ifelse(
  nchar(heat_data$field) > 40,
  paste0(substr(heat_data$field, 1, 37), "..."),
  heat_data$field
)

mat <- heat_data %>%
  select(field_short, country, n) %>%
  pivot_wider(names_from = country, values_from = n, values_fill = 0)

mat_vals <- as.matrix(mat[, -1])
rownames(mat_vals) <- mat$field_short

plot_ly(
  z = mat_vals,
  x = colnames(mat_vals),
  y = rownames(mat_vals),
  type = "heatmap",
  colorscale = list(c(0, "#f5f6fa"), c(0.3, "#a29bfe"), c(0.6, "#6c5ce7"), c(1, "#1b1f3b")),
  text = round(mat_vals, 0),
  hoverinfo = "x+y+z",
  showscale = TRUE,
  colorbar = list(title = "Count", len = 0.5)
) %>%
  layout(
    xaxis = list(title = "", tickfont = list(size = 11)),
    yaxis = list(title = "", tickfont = list(size = 9.5), autorange = "reversed"),
    margin = list(l = 260, b = 50),
    plot_bgcolor = "white", paper_bgcolor = "white"
  ) %>% config(displayModeBar = FALSE)
```


# Member Directory {data-icon="fa-address-book"}

## Row

### WG2 Members — Full Searchable Directory {data-height=800}

```{r}
tbl <- df %>%
  mutate(
    Name      = paste(firstName, lastName),
    Country   = country_name,
    Status    = status,
    Gender    = gender,
    YR        = ifelse(tolower(youngResearcher) == "y", "Yes", "No"),
    ITC       = ifelse(tolower(itc) == "y", "Yes", "No"),
    `All WGs` = other_wgs,
    Expertise = substr(scientificExpertise, 1, 100)
  ) %>%
  select(Name, affiliation, Country, Status, Gender, YR, ITC, `All WGs`, n_wgs, Expertise) %>%
  rename(Affiliation = affiliation, `WG Count` = n_wgs) %>%
  arrange(Name)

datatable(
  tbl,
  filter     = "top",
  rownames   = FALSE,
  extensions = c("Buttons", "Scroller"),
  options    = list(
    dom        = "Bfrtip",
    buttons    = c("csv", "excel", "pdf"),
    scrollY    = "540px",
    scroller   = TRUE,
    pageLength = 150,
    autoWidth  = TRUE,
    columnDefs = list(
      list(width = "130px", targets = 0),
      list(width = "170px", targets = 1),
      list(width = "70px",  targets = 2),
      list(width = "55px",  targets = c(3,4,5,6,8)),
      list(width = "100px", targets = 7),
      list(width = "200px", targets = 9)
    ),
    language = list(
      search = "Search WG2 members:",
      info   = "Showing _START_ to _END_ of _TOTAL_ members"
    )
  )
) %>%
  formatStyle("Status",
    backgroundColor = styleEqual(
      c("approved", "submitted"),
      c("#e8f5e9", "#fff8e1")
    ),
    fontWeight = "bold"
  ) %>%
  formatStyle("WG Count",
    background = styleColorBar(c(0, 6), pal$purple),
    backgroundSize = "92% 65%",
    backgroundRepeat = "no-repeat",
    backgroundPosition = "center"
  )
```


# Action Context {data-icon="fa-sitemap"}

## Row {data-height=400}

### All Working Groups — Membership Size

```{r}
all_wg <- tibble(
  wg    = wg_short,
  label = wg_labels[wg_short],
  n     = sapply(wg_cols, function(col) sum(df_all[[col]]))
) %>%
  arrange(desc(n))

plot_ly(all_wg,
  x = ~reorder(wg, n), y = ~n, type = "bar",
  marker = list(
    color = ifelse(all_wg$wg == "WG2",
                   pal$purple,
                   paste0(pal_wg[all_wg$wg], "88")),
    line = list(color = pal$navy, width = 1)
  ),
  text = ~paste0("<b>", wg, "</b>: ", label, "<br>", n, " applicants",
                 ifelse(wg == "WG2", " ← this dashboard", "")),
  hoverinfo = "text"
) %>%
  layout(
    xaxis = list(title = ""),
    yaxis = list(title = "Total Applicants", gridcolor = "#eee"),
    plot_bgcolor = "white", paper_bgcolor = "white"
  ) %>% config(displayModeBar = FALSE)
```

### WG Co-occurrence Matrix (Full Action)

```{r}
n_wg <- length(wg_cols)
co_mat <- matrix(0, n_wg, n_wg, dimnames = list(wg_short, wg_short))
for (i in 1:n_wg) for (j in 1:n_wg)
  co_mat[i,j] <- sum(df_all[[wg_cols[i]]] & df_all[[wg_cols[j]]])

co_disp <- co_mat
diag(co_disp) <- NA

hover <- matrix("", n_wg, n_wg)
for (i in 1:n_wg) for (j in 1:n_wg)
  hover[i,j] <- ifelse(i == j,
    paste0(wg_short[i], " total: ", co_mat[i,j]),
    paste0(wg_short[i], " ∩ ", wg_short[j], ": ", co_mat[i,j]))

p <- plot_ly(
  z = co_disp, x = wg_short, y = wg_short,
  type = "heatmap",
  colorscale = list(c(0,"#f0edff"), c(0.5,"#6c5ce7"), c(1,"#1b1f3b")),
  text = hover, hoverinfo = "text",
  showscale = TRUE,
  colorbar = list(title = "Shared", len = 0.5)
) %>%
  layout(
    xaxis = list(title = "", side = "top"),
    yaxis = list(title = "", autorange = "reversed"),
    margin = list(t = 50),
    plot_bgcolor = "white", paper_bgcolor = "white"
  ) %>% config(displayModeBar = FALSE)

for (i in 1:n_wg) for (j in 1:n_wg) {
  p <- p %>% add_annotations(
    x = wg_short[j], y = wg_short[i],
    text = co_mat[i,j], showarrow = FALSE,
    font = list(
      size = 11,
      color = ifelse(i == j, pal$amber, ifelse(co_mat[i,j] > 60, "white", pal$navy)),
      family = "JetBrains Mono"
    )
  )
}
p
```

## Row {data-height=250}

### About this Dashboard

**COST Action CA24121 — Unifying Large Language Models and Knowledge Graphs (UniLLMKG)**
<br><br>
This dashboard focuses on **WG2: Domain-specific Tasks of LLMs Based on Knowledge Graphs**, providing an overview of the `r nrow(df)` applicants who selected WG2 as one of their working groups.

**Dashboard contents:**

- **WG2 Overview** — KPIs, geographic distribution, cross-WG membership patterns, demographics, ITC balance, and multi-group engagement.
- **Expertise** — Scientific expertise distribution, COST experience levels, and a country × expertise heatmap.
- **Member Directory** — Searchable, filterable, exportable (CSV/Excel/PDF) table of all WG2 members.
- **Action Context** — Full action WG sizes and co-occurrence heatmap to position WG2 within the broader CA24121 landscape.

**Data:** e-COST export, 12 February 2026 (`r nrow(df_all)` total applicants, `r nrow(df)` in WG2).

**Required R packages:** `flexdashboard`, `tidyverse`, `plotly`, `DT`, `scales`.

*To publish: Knit in RStudio → Publish → RPubs.*