Packages used in R

ipak <- function(pkg){ 
  new.pkg <- pkg[!(pkg %in% installed.packages()[, "Package"])] 
  if (length(new.pkg))  
    install.packages(new.pkg, dependencies = TRUE) 
  sapply(pkg, require, character.only = TRUE) 
} 


packages <- c("readxl","widyr","dplyr","stringr","tm","tidytext","ggplot2",
              "topicmodels","tidyr","textdata","writexl","textclean","igraph",
              "ggraph","tibble","widyr","DescTools","purrr","flextable","officer",
              "ggraph", "psych", "rpart", "rpart.plot", "quanteda")

ipak(packages)

Prepare data

setwd("C:/Users/Usuario/OneDrive - Universitat de Barcelona/0_RECERCA/Congresos/2025/ECCES/Final")
data <- read_excel("unico.xlsx")
data_cleaned <- data
data_cleaned <- data_cleaned %>%
  rename(`EU Grant` = `EU Grant award in euros (This amount represents the grant awarded after the selection stage and is indicative. Please note that any changes made during or after the project's lifetime will not be reflected here.)`)
data_cleaned <- data_cleaned %>%
  mutate(
    `Key Action` = as.factor(`Key Action`),
    `Action Type` = as.factor(`Action Type`),
    `Funding Year` = as.factor(`Funding Year`),
    `Coordinator's country` = as.factor(`Coordinator's country`),
    `Is Good Practice` = as.factor(`Is Good Practice`),
    Ongoing = as.factor(Ongoing),
    `Topic_Model`=as.factor (`Topic_Model`), #added for this version
    `EU Grant` = as.numeric(`EU Grant`)
  )
summary(data_cleaned)

##  Key Action                                                Action Type  
##  CIEGP:1546   European policy experimentation in higher education:  10  
##  PCEP :1611   HigherEd                                           :1461  
##  SPR  :  47   Policy_Exp                                         :  37  
##               School                                             :1696  
##                                                                         
##                                                                         
##                                                                         
##   Funding Year Project Identifier Project Title      Project Summary   
##  2023   :722   Length:3204        Length:3204        Length:3204       
##  2024   :593   Class :character   Class :character   Class :character  
##  2022   :302   Mode  :character   Mode  :character   Mode  :character  
##  2014   :298                                                           
##  2020   :295                                                           
##  2019   :237                                                           
##  (Other):757                                                           
##     Topics          Results Platform Project Card
##  Length:3204        Length:3204                  
##  Class :character   Class :character             
##  Mode  :character   Mode  :character             
##                                                  
##                                                  
##                                                  
##                                                  
##  Coordinating organisation name Coordinator's country Participating Countries
##  Length:3204                    ES     : 282          Length:3204            
##  Class :character               IT     : 274          Class :character       
##  Mode  :character               PL     : 258          Mode  :character       
##                                 FR     : 256                                 
##                                 DE     : 242                                 
##                                 NL     : 141                                 
##                                 (Other):1751                                 
##     EU Grant       Is Good Practice Ongoing   Topic_Model   Ethics Category   
##  Min.   :      0   No :1658         N:1593   Topic 1: 623   Length:3204       
##  1st Qu.: 222953   Yes:1546         Y:1611   Topic 2: 574   Class :character  
##  Median : 250000                             Topic 3:1022   Mode  :character  
##  Mean   : 291016                             Topic 4: 582                     
##  3rd Qu.: 400000                             Topic 5: 403                     
##  Max.   :2500000                                                              
##                                                                               
##  AI related project
##  Length:3204       
##  Class :character  
##  Mode  :character  
##                    
##                    
##                    
##

DT::datatable(data_cleaned)

Character or factor variables

cat_vars <- data_cleaned %>%
  select(where(~ is.character(.) | is.factor(.)))

Topic Modelling on whole corpus

Clean project summaries

data_cleaned <- data_cleaned %>%
  mutate(summary_only_clean = `Project Summary` %>%
           replace_non_ascii() %>%
           str_to_lower() %>%
           replace_contraction() %>%
           replace_symbol() %>%
           replace_number() %>%
           str_replace_all("[[:punct:]]", " ") %>%
           str_squish())

Create tokens from text

custom_stopwords <- c(
  stopwords("en"),
  "project", "students", "education", "europe", "school", "higher", "one", "thousand", 
  "educational", "three", "improve", "eu", "based", "needs", "challenges",
  "will", "use", "key", "need", "objectives", "countries", "four", "goal", "e", 
  "objective", "aims", "development", "s", "two", "aim", "develop", 
  "learning", "teachers", "main", "among"
)

tokens_clean <- tokens(data_cleaned$summary_only_clean,
                       what = "word",
                       remove_punct = TRUE,
                       remove_numbers = TRUE) %>%
  tokens_tolower() %>%
  tokens_remove(pattern = custom_stopwords)

Create dfm (document-feature matrix)

dfm_clean <- dfm(tokens_clean)

Trim sparse terms (similar to Python’s min_df = 10, max_df = 0.9)

dfm_clean <- dfm_trim(dfm_clean, min_termfreq = 10, max_docfreq = 0.9, docfreq_type = "prop")

Convert to topicmodels format

dtm_topicmodels <- convert(dfm_clean, to = "topicmodels")

Fit LDA model

lda_model <- LDA(dtm_topicmodels, k = 5, control = list(seed = 42))

Extract top 10 keywords per topic

terms_matrix <- terms(lda_model, 10)
print(terms_matrix)

##       Topic 1         Topic 2     Topic 3       Topic 4      Topic 5     
##  [1,] "digital"       "european"  "skills"      "schools"    "european"  
##  [2,] "european"      "training"  "european"    "skills"     "teaching"  
##  [3,] "social"        "create"    "digital"     "digital"    "innovative"
##  [4,] "future"        "awareness" "new"         "new"        "skills"    
##  [5,] "skills"        "schools"   "training"    "innovative" "climate"   
##  [6,] "children"      "promote"   "social"      "approach"   "knowledge" 
##  [7,] "environmental" "skills"    "competences" "knowledge"  "practices" 
##  [8,] "increase"      "social"    "teaching"    "children"   "inclusive" 
##  [9,] "world"         "teaching"  "young"       "pupils"     "specific"  
## [10,] "ai"            "primary"   "innovative"  "design"     "science"

Plot (dummy counts used — replace with real ones if available)

topic_counts <- data.frame(
  Topic = factor(1:5),
  Count = c(620, 570, 1020, 580, 400),
  Keywords = c(
    "digital\neuropean\nsocial\nfuture\nskills\nchildren\nenvironmental\nincrease\nworld\nAI",
    "european\ntraining\ncreate\nawareness\nschools\npromote\nskills\nsocial\nteaching\nprimary",
    "skills\neuropean\ndigital\nnew\ntraining\nsocial\ncompetences\nteaching\nyoung\ninnovative",
    "schools\nskills\ndigital\nnew\ninnovative\napproach\nknowledge\nchildren\npupils\ndesign",
    "european\nteaching\ninnovative\nskills\nclimate\nknowledge\npractices\ninclusive\nspecific\nscience"
  )
)

Optional: topic labels

topic_labels <- tibble(
  topic = factor(1:5),
  label = c(
    "European Digital Future",
    "European awareness",
    "Youth Skills and Social Change",
    "Early Education Digital Innovation",
    "Climate Change, Inclusion & STEAM")
)

Plot with keywords

custom_colors <- c("#e69f00", "#d55e00", "#cc79a7", "#56b4e9", "#009e73")

ggplot(topic_counts, aes(x = Keywords, y = Count, fill = Topic)) +
  geom_bar(stat = "identity", show.legend = FALSE) +
  geom_text(aes(label = Count), 
            vjust = 2.5, 
            size = 4) +
  scale_fill_manual(values = custom_colors) +
  labs(
    title = "Distribution of Dominant Topics across Projects",
    x = "Topics (Top Keywords)",
    y = "Number of Projects"
  ) +
  theme_minimal(base_size = 14) +
  theme(
    axis.text.x = element_text(size = 10, angle = 0, hjust = 0.5, vjust = 0.5),
    axis.text.y = element_text(size = 12),
    plot.title = element_text(face = "bold", size = 16)
  )

Topic Modelling on Ethics/Non-Ethics related projects

Define ethics dictionary

ethics_keywords <- c("ethics", "ethical", "accessibility", "vulnerability", "inclusion", "gender",
                     "gender equity", "equity", "technological sovereignty", "freedom",
                     "equality", "human", "moral", "critical", "critique")

print(ethics_keywords)

##  [1] "ethics"                    "ethical"                  
##  [3] "accessibility"             "vulnerability"            
##  [5] "inclusion"                 "gender"                   
##  [7] "gender equity"             "equity"                   
##  [9] "technological sovereignty" "freedom"                  
## [11] "equality"                  "human"                    
## [13] "moral"                     "critical"                 
## [15] "critique"

Create variable & Label summaries as ethics-related or not

data_cleaned$`Ethics Category` <- ifelse(  #correction here to make one variable that will be added to a final dataset generated after TM
  str_detect(data_cleaned$`Project Summary`, str_c(ethics_keywords, collapse = "|")),
  "Ethics & Equity Related", "Non Ethics & Equity Related")

Pie chart/Barchart of ethics labels

table(data$`Ethics Category`)

## 
##     Ethics & Equity Related Not Ethics & Equity Related 
##                         290                        2914

ethics_count <- data.frame(
  category = c("Ethics & Equity Related", "Non Ethics & Equity Related"),
  n = c(789, 2415))

ethics_count <- ethics_count %>%
  mutate(
    percent = round(100 * n / sum(n), 1),
    label = paste0(percent, "%")
  )


ggplot(ethics_count, aes(x = "", y = n, fill = `category`)) +
  geom_col(width = 1, color = "white") +
  coord_polar(theta = "y") +
  geom_text(aes(label = label),
            position = position_stack(vjust = 0.5),
            color = "white",
            size = 5) +
  labs(title = "Ethics & Equity Classification") +
  theme_void() +
  theme(legend.title = element_blank())

data_summary <- data_cleaned %>%
  group_by(`Funding Year`, `Ethics Category`) %>%
  summarise(count = n(), .groups = "drop")

Barchart

ggplot(data_summary, aes(x = factor(`Funding Year`), y = count, fill = `Ethics Category`)) +
  geom_bar(stat = "identity") +
  geom_text(aes(label = count), 
            position = position_stack(vjust = 0.5), 
            color = "black", 
            size = 3.5) +
  labs(
    title = "Projects by Year and Ethical Consideration",
    x = "Funding Year",
    y = "Number of Projects",
    fill = "") + theme_minimal()

Co-occurrence matrix for ethics and non-ethics projects

get_cooc <- function(texts) {
  words <- texts %>% 
    unnest_tokens(word,summary_only_clean) %>% #Create ID
    filter(word %in% ethics_keywords) %>% #Tokenize
    count(document = row_number(), word) %>% #Filter for keyword
    pairwise_count(word, document, sort = TRUE) #Co-occurrence
  return(words)
}

ethics_texts <- data_cleaned %>% filter(`Ethics Category` == "Ethics & Equity Related")
non_ethics_texts <- data_cleaned %>% filter(`Ethics Category` == "Non Ethics & Equity Related")
ethics_texts

ethics_cooc <- get_cooc(ethics_texts)
non_ethics_cooc <- get_cooc(non_ethics_texts)

Filter and keep only relevant columns

ethics_texts <- data_cleaned %>%
  filter(`Ethics Category` == "Ethics & Equity Related") %>%
  select(summary_only_clean) %>%
  filter(!is.na(summary_only_clean)) %>%
  mutate(doc_id = row_number())

Tokenize and compute co-occurrences

ethics_cooc <- ethics_texts %>%
  unnest_tokens(word, summary_only_clean) %>%
  filter(word %in% ethics_keywords) %>%
  pairwise_count(word, doc_id, sort = TRUE)

Inspect results

head(ethics_cooc)

if (nrow(ethics_cooc) > 0) {
  ggraph(graph_from_data_frame(ethics_cooc %>% filter(n > 1)), layout = "fr") +
    geom_edge_link(aes(edge_alpha = n), show.legend = FALSE) +
    geom_node_point(color = "red", size = 4) +
    geom_node_text(aes(label = name), repel = TRUE) +
    labs(title = "Ethics-related Keyword Co-occurrence")
} else {
  print("Still no co-occurrence found. Try adjusting the filters or using broader keywords.")
}

Relation between ethics & projects Define keywords of interest

focus_keywords <- c("gender", "critical", "inclusion", "equity", "equality", "critical", "human", "accessibility", "ethics")

Abbreviated project labels from the first 3 words of the title

abbreviate_words <- function(title) {
  words <- str_split(title, "\\s+")[[1]]
  abbrev <- str_sub(words[1:3], 1, 4)
  paste(abbrev, collapse = "-")
}

Abbreviation

data_simplified <- data_cleaned %>%
  filter(!is.na(`Project Title`), !is.na(summary_only_clean)) %>%
  mutate(
    project_label = sapply(`Project Title`, abbreviate_words),
    doc_id = row_number()
  ) %>%
  select(doc_id, summary_only_clean, project_label)

Tokenized summary and filter for focus keywords

keyword_links <- data_simplified %>%
  unnest_tokens(word, summary_only_clean) %>%
  filter(word %in% focus_keywords) %>%
  distinct(project_label, word)

Bipartite graph

graph <- graph_from_data_frame(keyword_links, directed = FALSE)

ggraph(graph, layout = "fr") +
  geom_edge_link(alpha = 0.3) +
  geom_node_point(size = 5, aes(color = ifelse(name %in% focus_keywords, "Keyword", "Project"))) +
  geom_node_text(aes(label = name), repel = TRUE, size = 3) +
  scale_color_manual(values = c("Keyword" = "firebrick", "Project" = "steelblue")) + 
  labs(title = "Projects Connected to Gender, Critical, Inclusion, Equity, and Equality") +
  theme_void()

Variable & Label summaries as AI-related or not

ai_keywords <- c("artificial intelligence", "ai", "machine learning", "deep learning", "neural network", "algorithmic")

data_cleaned$`AI Category` <- ifelse(  #correction here to make one variable that will be added to a final dataset generated after TM
  str_detect(data_cleaned$`Project Summary`, str_c(ai_keywords, collapse = "|")),
  "AI Related", "Non AI Related")

table(data_cleaned$`AI Category`)

## 
##     AI Related Non AI Related 
##           1518           1686

ggplot(data_cleaned, aes(x = `AI Category`, fill = `AI Category`)) +
  geom_bar() +
  labs(title = "",
       x = "AI Projects",
       y = "Projects") +
  theme_minimal()

Plot Ethics within AI projects

ai_projects <- data_cleaned %>%
  filter(`AI Category` == "AI Related") %>%
  count(`Ethics Category`, name = "Projects")
print(ai_projects)

## # A tibble: 2 × 2
##   `Ethics Category`           Projects
##   <chr>                          <int>
## 1 Ethics & Equity Related          122
## 2 Non Ethics & Equity Related     1396

Number of projects AI with Ethics category

ggplot(ai_projects, aes(x = `Ethics Category`, y = Projects, fill = `Ethics Category`)) +
  geom_col() +
  geom_text(aes(label = Projects), vjust = -0.3, size = 5) +
  labs(
    title = "Ethics Classification within AI Projects",
    y = "Projects",
    x = NULL
  ) +
  theme_minimal(base_size = 14) +
  theme(legend.position = "none")

AI & Ethics crosstabs [Country, Year]

Projects by country/year and ethics Create shortened year label

by_group <- data %>%
  group_by(`Coordinator's country`, `Funding Year`, `Ethics Category`) %>%
  summarise(Projects = n(), .groups = "drop") %>%
  mutate(FundingYearShort = substr(as.character(`Funding Year`), 3, 4))

Plot using short year labels

ggplot(by_group, aes(x = FundingYearShort, y = Projects, fill = `Ethics Category`)) +
  geom_col(position = "dodge") +
  facet_wrap(~`Coordinator's country`) +
  labs(
    title = "Projects by Country, Year, and Ethics Category",
    x = "Funding Year (last 2 digits)",
    y = "Number of Projects") +
  theme_minimal(base_size = 10)+
  theme(
    legend.position = "bottom",
    plot.title = element_text(face = "bold", size = 10, hjust = 0.5),
    axis.text.x = element_text(angle = 45, hjust = 1)
  )

ggplot(by_group, aes(x = `Ethics Category`, y = Projects, fill = `Ethics Category`)) +
  geom_col(position = "dodge") +
  facet_wrap(~`Coordinator's country`) +
  labs(
    title = "Projects by Country and Ethics Category",
    x = "",
    y = "Number of Projects") +
  theme_minimal(base_size = 10)+
  theme(
    legend.position = "bottom",
    plot.title = element_text(face = "bold", size = 10, hjust = 0.5),
    axis.text.x = element_text(angle = 45, hjust = 1)
  )

Count by country, year, AI and Ethics category

cross_tab <- data_cleaned %>%
  count(`Coordinator's country`, `Funding Year`, `AI Category`, `Ethics Category`) %>%
  rename(Count = n)

Shorten year (last 2 digits)

cross_tab <- cross_tab %>%
  mutate(FundingYearShort = substr(as.character(`Funding Year`), 3, 4))

Plot

ggplot(cross_tab, aes(x = `Funding Year`, y = Count, fill = `Ethics Category`)) +
  geom_col(position = "dodge", width = 0.8) +
  facet_wrap(~ `Coordinator's country`, scales = "free_y", ncol = 6) +
    labs(
    title = "Ethics Classification of AI Projects by Country and Year",
    x = "Funding Year",
    y = "Project Count",
    fill = "Ethics Category"
  ) +
  theme_minimal(base_size = 10) +
  theme(legend.position = "bottom",
    axis.text.x = element_text(angle = 45, hjust = 1, size = 5),
    strip.text = element_text(size = 8),
    plot.title = element_text(face = "bold", size = 14)
  )

Trends in Ethics-Related Projects Over Time

Transform data to long format for plot

df <- read_excel("unico_with_ethics_ai_labels.xlsx")

Create a combined Ethics-AI category

df <- df %>%
  mutate(Category = case_when(
    `Ethics Category` == "Ethics & Equity Related" & `AI Category` == "AI Related" ~ "Ethic-AI",
    `Ethics Category` == "Ethics & Equity Related" & `AI Category` == "Non AI Related" ~ "Ethic-NonAI",
    `Ethics Category` == "Non Ethics & Equity Related" & `AI Category` == "AI Related" ~ "NonEthic-AI",
    `Ethics Category` == "Non Ethics & Equity Related" & `AI Category` == "Non AI Related" ~ "NonEthic-NonAI",
    TRUE ~ "Other"
  ))
df_summary <- df %>%
  count(`Funding Year`, Category)
df_summary <- df_summary %>%
  mutate(`Funding Year` = `Funding Year` + 2013)

Plot

ggplot(df_summary, aes(x = `Funding Year`, y = n, color = Category, group = Category)) +
  geom_line(size = 1.2) +
  geom_point(size = 2) +
  labs(title = "Relationship between AI & Ethics within Projects",
    x = "Year",
    y = "Number of Projects",
    color = "Category") +
  theme_minimal(base_size = 14) +
  theme(
    plot.title = element_text(face = "bold", size = 16, hjust = 0.5),
    legend.position = "bottom",
    legend.title = element_text(size = 12),
    legend.text = element_text(size = 10),
    axis.title = element_text(size = 12),
    axis.text = element_text(size = 10))+
  scale_color_manual(values = c(
    "Ethic-AI" = "#009e73",
    "Ethic-NonAI" = "#d55e00",
    "NonEthic-AI" = "#cc79a7",
    "NonEthic-NonAI" = "#56b4e9")) +
  scale_x_continuous(breaks = 2014:2024)

Some inferentials

Count projects by Ethics and AI category

combine_df <- data_cleaned %>%
  count(`Ethics Category`, `AI Category`)

Plot

ggplot(combine_df, aes(x = `Ethics Category`, y = n, fill = `AI Category`)) +
  geom_col(position = "dodge") + 
  geom_text(aes(label = n), 
            position = position_dodge(width=0.9),        
            vjust = -0.5, 
                    size = 4) +
  labs(
    title = "Projects by Ethics & AI Categories",
    x = "Ethics Category",
    y = "Number of Projects",
    fill = "AI Category"
  ) +
  theme_minimal(base_size = 14)+
  theme(
    axis.text.x = element_text(size = 10, angle = 0, hjust = 0.5, vjust = 0.5),
    axis.text.y = element_text(size = 12),
    plot.title = element_text(face = "bold", size = 16)
  )

Clean and simplify variable names

data_prop <- data_cleaned %>%
  rename(
    Ethics = `Ethics Category`,
    AI = `AI Category`
  ) %>%
  filter(!is.na(Ethics), !is.na(AI))

Logistic regression to understand prediction between AI projects and ethical approach

table(data_cleaned$`Ethics Category`, data_cleaned$`AI Category`)

##                              
##                               AI Related Non AI Related
##   Ethics & Equity Related            122            131
##   Non Ethics & Equity Related       1396           1555

tab <- table(data_cleaned$`Ethics Category`, data_cleaned$`AI Category`)
phi(tab)

## [1] 0

data_cleaned$`Funding Year` <- as.numeric(data_cleaned$`Funding Year`)


data_cleaned$EthicsCategoryBinary <- 
  ifelse(data_cleaned$`Ethics Category` == "Ethics & Equity Related", 1, 0)

model_2 <- glm(EthicsCategoryBinary ~ `Funding Year` + `AI Category`, data = data_cleaned, family = binomial)
summary(model_2)

## 
## Call:
## glm(formula = EthicsCategoryBinary ~ `Funding Year` + `AI Category`, 
##     family = binomial, data = data_cleaned)
## 
## Coefficients:
##                             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                 -3.30360    0.21855 -15.116  < 2e-16 ***
## `Funding Year`               0.10114    0.02204   4.588 4.47e-06 ***
## `AI Category`Non AI Related  0.14460    0.13683   1.057    0.291    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1770.1  on 3203  degrees of freedom
## Residual deviance: 1747.2  on 3201  degrees of freedom
## AIC: 1753.2
## 
## Number of Fisher Scoring iterations: 5

model_3 <- glm(EthicsCategoryBinary ~ `Funding Year` + `AI Category` + `EU Grant` , data = data_cleaned, family = binomial)
summary(model_3)

## 
## Call:
## glm(formula = EthicsCategoryBinary ~ `Funding Year` + `AI Category` + 
##     `EU Grant`, family = binomial, data = data_cleaned)
## 
## Coefficients:
##                               Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                 -3.254e+00  2.419e-01 -13.454  < 2e-16 ***
## `Funding Year`               1.026e-01  2.230e-02   4.603 4.17e-06 ***
## `AI Category`Non AI Related  1.438e-01  1.369e-01   1.050    0.294    
## `EU Grant`                  -2.076e-07  4.462e-07  -0.465    0.642    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1770.1  on 3203  degrees of freedom
## Residual deviance: 1747.0  on 3200  degrees of freedom
## AIC: 1755
## 
## Number of Fisher Scoring iterations: 5

ECCES 2025

TechnoSolutionism vs. Ethical Action: How EU Educational Funding Shapes EdTech Future

Juliana E. Raffaghelli & Diego Calderón-Garrido

Packages used in R

Prepare data

Topic Modelling on whole corpus

AI & Ethics crosstabs [Country, Year]

Some inferentials

Count projects by Ethics and AI category

Logistic regression to understand prediction between AI projects and ethical approach