Setting Up the Environment

Loading packages

# Packages
library(sjlabelled)
library(sjPlot)
library(labelled)
library(dplyr)
library(tidyr)
library(purrr)
library(stringr)
library(janitor)
library(ggplot2)
library(forcats)
library(grid)
library(knitr)
library(kableExtra)
library(patchwork) 
library(tibble)

Working directory & file name

# Loading the dataset
setwd("C:/Users/Shobhana/OneDrive - University of Helsinki/Desktop/Data analysis") 
df_0 <- read.csv2("Thesis analysis_codebook.csv")
#str(df_0)

Variable labels

df_1 <- df_0 %>%
  set_variable_labels(
    k1 = "AADM for national security",
    k2 = "Accessibility",
    k3 = "Accountability",
    k4 = "Appeal compensates for no HITL",
    k5 = "Assumption of HITL during appeal process",
    k6 = "Chooses A",
    k7 = "Chooses B",
    k8 = "Chooses system A because it's free",
    k9 = "Chooses system B despite the cost",
    k10 = "AADM for controlled migration",
    k11 = "Cost doesn't matter",
    k12 = "Data privacy concerns",
    k13 = "Delegitimization by negative association to status-quo",
    k14 = "Didn't know about ETA prior to the interview",
    k15 = "Digital authoritarianism/monitoring of public with collected data",
    k16 = "Efficiency of the system",
    k17 = "ETA is unnecessary",
    k18 = "Extensive data collection is fine",
    k19 = "Familiarity with human made decisions",
    k20 = "Final decision by Human is important",
    k21 = "HITL compensates for no appeal",
    k22 = "Knew about ETA prior to the interview",
    k23 = "Legitimization of AADM in border control by comparing to similar existing systems",
    k24 = "Negative efficiency judgement for HITL",
    k25 = "Negative fairness judgement in AADM",
    k26 = "Negative fairness judgement in HITL",
    k27 = "No appeal as redflag",
    k28 = "No opinion on data being collected",
    k29 = "Normalization by comparing to ChatGPT",
    k30 = "Normalization by comparing to growing use of algorithmic systems",
    k31 = "Normalization by comparing to police",
    k32 = "Normalization by comparing to visa-application processes",
    k33 = "Positive efficiency judgement in AADM",
    k34 = "Positive fairness judgement in AADM",
    k35 = "Positive fairness judgement in HITL",
    k36 = "Too intrusive",
    k37 = "Transparency of process",
    k38 = "Transparency of system",
    k39 = "Wants collectively applicable migration policy on a Global scale",
    k40 = "Wants combination of HITL and AADM",
    k41 = "Wants extensive data collection",
    k42 = "Wants extensive data collection for system B",
    k43 = "Wants fully automated system",
    k44 = "Wants HITL",
    k45 = "Wants less intrusive data for system A",
    k46 = "Wants no appeal",
    k47 = "Wants no cost in system B",
    k48 = "Wants to add appeal to system B",
    k49 = "Wants to add cost to system A",
    k50 = "Wants to add HITL for system A"
  )

Separating variables starting with ‘k’

k_cols <- grep("^k\\d+$", names(df_1), value = TRUE)

Since the ‘main’ variables have a naming pattern (k’number’), we are collecting them with this pattern.

Checking labels

df_dict <- df_1 %>%
  generate_dictionary()
#df_dict %>%
#  knitr::kable()

Defining thematic groups

normk <- function(v) tolower(v)
groups <- list(
  `Choice of system`  = normk(c("k6","k7")),
  `Data` = normk(c("k28","k36","k41","k42","k45")),
  `Cost` = normk(c("k8","k9","k11","k47","k49")),
  `Appeal` = normk(c("k4","k5","k21","k27", "k46", "k48")),
  `Decision` = normk(c("k3","k18","k19","k33","k37","k44")),
  `Fairness` = normk(c("k25","k26", "k34", "k35")),
  `Efficiency` = normk(c("k24","k33")),
  `Legitimization` = normk(c("k1","k10","k23", "k24", "k26", "k29", "k30", "k31", "k32", "k33", "k34")),
  `Delegitimization` = normk(c("k12","k13","k15","k17","k19","k20","k25","k35")),
  `Policy perspective` = normk(c("k2","k3","k12","k16","k37","k38", "k39")),
  `ETA` = normk(c("k14", "k22"))
)
groups <- lapply(groups, \(v) intersect(v, k_cols))

To add a new theme, copy a line, give it a new name, and list the k-codes.

Creating group indicators (ANY + COUNT)

add_group_vars <- function(data, groups_list) {
  out <- data
  for (g in names(groups_list)) {
    cols <- groups_list[[g]]
    if (!length(cols)) next
    any_name   <- paste0(make_clean_names(g), "_any")
    count_name <- paste0(make_clean_names(g), "_count")
    out <- out %>%
      mutate(
        !!any_name   := as.integer(rowSums(across(all_of(cols))) > 0),
        !!count_name := rowSums(across(all_of(cols)))
      )
  }
  out
}
df_grp <- add_group_vars(df_1, groups)

Group prevalence summary and plot

group_summary <- function(data, groups_list) {
  map_dfr(names(groups_list), function(g) {
    any_col   <- paste0(make_clean_names(g), "_any")
    count_col <- paste0(make_clean_names(g), "_count")
    tibble(
      group      = g,
      n_any      = sum(data[[any_col]], na.rm = TRUE),
      pct_any    = mean(data[[any_col]], na.rm = TRUE),
      mean_count = mean(data[[count_col]], na.rm = TRUE)
    )
  }) %>% arrange(desc(pct_any))
}

grp_summ <- group_summary(df_grp, groups) %>%
  mutate(pct_any = scales::percent(pct_any, accuracy = 0.1),
         mean_count = round(mean_count, 2))

grp_summ

## # A tibble: 11 × 4
##    group              n_any pct_any mean_count
##    <chr>              <int> <chr>        <dbl>
##  1 Choice of system      35 100.0%        1   
##  2 Cost                  34 97.1%         1.06
##  3 Appeal                33 94.3%         1.37
##  4 ETA                   32 91.4%         0.91
##  5 Legitimization        30 85.7%         2   
##  6 Data                  29 82.9%         0.91
##  7 Delegitimization      28 80.0%         1.71
##  8 Policy perspective    27 77.1%         1.37
##  9 Decision              25 71.4%         1.14
## 10 Fairness              24 68.6%         1   
## 11 Efficiency             9 25.7%         0.29

Data Analysis

Choice of system

choice_cols <- groups[["Choice of system"]]


df_grp %>%
  summarise(
    Chooses_A = sum(k6, na.rm = TRUE),
    Chooses_B = sum(k7, na.rm = TRUE)
  ) %>%
  pivot_longer(cols = everything(),
               names_to = "choice",
               values_to = "count") %>%
  ggplot(aes(x = choice, y = count, fill = choice)) +
  geom_col() +
  geom_text(aes(label = count), vjust = -0.3, size = 4) +
  labs(
    title = "Choice of System (System A vs. System B)",
    x = "System Choice",
    y = "Count"
  ) +
  theme_minimal() +
   coord_cartesian(ylim = c(1, 25)) +
  theme(legend.position = "none") + 
  scale_x_discrete(labels = c("Chooses_A" = "System A",
                              "Chooses_B" = "System B"))

Cost variable

cost_table <- df_grp %>%
  summarise(
    `Chose system A because it is free` = sum(k8  == 1, na.rm = TRUE),
    `Chose system B despite the cost` = sum(k9  == 1, na.rm = TRUE),
    `Cost does not matter` = sum(k11 == 1, na.rm = TRUE),
    `Wants no cost in system B` = sum(k47 == 1, na.rm = TRUE),
    `Wants to add cost to system A` = sum(k49 == 1, na.rm = TRUE)
  ) %>%
  pivot_longer(cols = everything(),
               names_to = "Cost variable",
               values_to = "Number of participants")

cost_table %>%
  kable(
    caption   = "Cost-related considerations in system preference",
    col.names = c("Cost-related variable", "Number of participants"),
    align     = "c"
  ) %>%
  kable_styling(full_width = FALSE, position = "center", font_size = 12) %>%
  row_spec(0, bold = TRUE) %>%
  column_spec(1, bold = TRUE)

Cost-related considerations in system preference
Cost-related variable	Number of participants
Chose system A because it is free	19
Chose system B despite the cost	11
Cost does not matter	1
Wants no cost in system B	1
Wants to add cost to system A	5

Relation of cost vs choice of system A

total_A <- sum(df_grp$k6 == 1, na.rm = TRUE)         # 23
free_A <- sum(df_grp$k8 == 1, na.rm = TRUE)          # 19
other_A <- total_A - free_A                          # 23 - 19 = 4

df_donut <- data.frame(
  Reason = c("Chose system A because it's free", "Chose system A for other reasons"),
  Count = c(free_A, other_A)
)

ggplot(df_donut, aes(x = 2, y = Count, fill = Reason)) +
  geom_col(width = 1, color = "white") +
  coord_polar(theta = "y") +
  geom_text(aes(label = Count),
            position = position_stack(vjust = 0.5),
            size = 5, color = "black", fontface = "bold") +
  scale_fill_manual(values = c("skyblue", "lightgreen")) +
  xlim(0.5, 2.5) +
  theme_void() +
  labs(
    title = "Why Participants Chose System A",
    fill = "Reason for Choosing A"
  ) +
  theme(
    plot.title = element_text(hjust = 0.5, size = 16, face = "bold"),
    legend.title = element_text(size = 12, face = "bold"),
    legend.text = element_text(size = 11)
  )

Knowledge of ETA

df_grp %>%
  summarise(
    Knew_about_ETA = sum(k22, na.rm = TRUE),
    Didnt_know_about_ETA = sum(k14, na.rm = TRUE)
  ) %>%
  pivot_longer(cols = everything(),
               names_to = "choice",
               values_to = "count") %>%
  mutate(
    choice = recode(choice,
                    "Knew_about_ETA" = "Yes",
                    "Didnt_know_about_ETA" = "No"),
    pct = count / sum(count) * 100
  ) %>%
  ggplot(aes(x = "", y = pct, fill = choice)) +
  geom_col(width = 1) +
  coord_polar(theta = "y") +
  geom_text(aes(label = paste0(round(pct, 1), "%")),
            position = position_stack(vjust = 0.5),
            size = 5) +
  labs(
    title = "                      Prior knowledge of ETA",
    fill = "Had prior knowledge of ETA"
  ) +
  theme_void()

Prefered decision-making mechanism (AADM vs HITL)

df_grp <- df_grp %>%
  mutate(
    AADM_any = as.integer(rowSums(across(c(k43)), na.rm = TRUE) > 0),
    HITL_any = as.integer(rowSums(across(c(k20, k44, k50, k40)), na.rm = TRUE) > 0)
  )

df_chart <- tibble(
  pref_group = c("AADM only", "HITL"),
  n = c(
    sum(df_grp$AADM_any == 1 & df_grp$HITL_any == 0, na.rm = TRUE),
    sum(df_grp$HITL_any == 1 & df_grp$AADM_any == 0, na.rm = TRUE)
  )
)

df_chart

## # A tibble: 2 × 2
##   pref_group     n
##   <chr>      <int>
## 1 AADM only      4
## 2 HITL          31

ggplot(df_chart, aes(x = 1, y = n, fill = pref_group)) +
  geom_col(color = "white", width = 1) +
  coord_polar(theta = "y") +
  geom_text(aes(label = n),
            position = position_stack(vjust = 0.5),
            size = 5, fontface = "bold") +
  xlim(0.5, 1.5) +
  scale_fill_manual(values = c(
    "AADM only" = "pink",
    "HITL" = "lightgreen" 
  )) +
  labs(
    title = "Preference distribution: AADM only vs HITL",
    fill  = ""
  ) +
  theme_void() +
  theme(
    plot.title = element_text(hjust = 0.5, size = 14, face = "bold")
  )

Prefered decision-making mechanism vs. choice of system

df_grp <- df_grp %>%
  mutate(
    decision_pref = case_when(
      k43 == 1 ~ "AADM",
      k20 == 1 | k44 == 1 | k50 == 1 | k40 == 1~ "HITL",
      TRUE ~ "No explicit comments"
    ),
    system_choice = case_when(
      k6 == 1 ~ "Chooses A",
      k7 == 1 ~ "Chooses B",
      TRUE    ~ "No choice"
    )
  )


tab_display <- df_grp %>%
  count(decision_pref, system_choice) %>%
  filter(system_choice %in% c("Chooses A", "Chooses B")) %>%
  mutate(
    decision_pref = factor(
      decision_pref,
      levels = c("AADM", "HITL", "No explicit comments")
    ),
    system_choice = factor(system_choice,
                           levels = c("Chooses A", "Chooses B"))
  ) %>%
  arrange(decision_pref) %>%
  pivot_wider(
    names_from  = system_choice,
    values_from = n,
    values_fill = 0
  ) %>%
  adorn_totals(where = c("row", "col"))

tab_display %>%
  kable(
    align    = "c",
    col.names = c("Decision Preference", "Chooses A", "Chooses B", "Total")
  ) %>%
  kable_styling(full_width = FALSE, position = "center", font_size = 12) %>%
  row_spec(0, bold = TRUE) %>%
  column_spec(1, bold = TRUE)

Decision Preference	Chooses A	Chooses B	Total
AADM	4	0	4
HITL	19	12	31
Total	23	12	35

Fisher’s test

tab_for_test <- df_grp %>%
  count(decision_pref, system_choice) %>%
  filter(system_choice %in% c("Chooses A", "Chooses B"))

tab_matrix <- xtabs(n ~ decision_pref + system_choice, data = tab_for_test)

fisher.test(tab_matrix)

## 
##  Fisher's Exact Test for Count Data
## 
## data:  tab_matrix
## p-value = 0.2752
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
##  0.3511545       Inf
## sample estimates:
## odds ratio 
##        Inf

The Fisher’s Exact Test (p = 0.2752) indicates that the association between choosing System A or B and preferring AADM or HITL is not statistically significant. This indicates that there is no evidence of an association between the preferred decision-making mechanism (AADM vs. HITL) and the system chosen (System A vs. System B). In other words, participants’ preference for AADM or HITL does not reliably predict whether they chose System A or System B.

System A vs prefered decision-making mechanism

df_A <- df_grp %>%
  filter(system_choice == "Chooses A") %>%
  mutate(
    pref_group = case_when(
      decision_pref == "AADM" ~ "Fully automated",
      decision_pref %in% c("HITL") ~ "HITL",
      TRUE ~ NA_character_
    )
  ) %>%
  filter(!is.na(pref_group)) %>%
  count(pref_group)

ggplot(df_A, aes(x = pref_group, y = n, fill = pref_group)) +
  geom_col(width = 0.6) +
  geom_text(aes(label = n), vjust = -0.5, size = 5) +
  scale_fill_manual(values = c("Fully automated" = "#8DA0CB",
                               "HITL" = "#FC8D62")) +
  labs(
    title = "Among System A choosers: Fully automated vs HITL preference",
    x = "Decision preference",
    y = "Count"
  ) +
  theme_minimal() +
   coord_cartesian(ylim = c(1, 20)) +
  theme(
    legend.position = "none",
    plot.title = element_text(hjust = 0.3, face = "bold")
  )

System choice vs. appeal preference

df_grp <- df_grp %>%
  mutate(
    appeal_pref = case_when(
      k4 == 1 | k5 == 1 | k27 == 1 | k48 == 1 ~ "Appeal preference",
      k21 == 1 | k46 == 1                     ~ "No appeal preference",
      TRUE ~ NA_character_ 
    )
  )



df_grp %>%
  filter(system_choice %in% c("Chooses A", "Chooses B")) %>%
  count(system_choice, appeal_pref) %>%
  ggplot(aes(x = system_choice, y = n, fill = appeal_pref)) +
  geom_col(position = "dodge", width = 0.6) +
  geom_text(aes(label = n), position = position_dodge(0.6), vjust = -0.3) +
  labs(
    title = "Appeal Preference by System Choice",
    x = "System Choice",
    y = "Count",
    fill = "Appeal Preference"
  ) +
  scale_fill_manual(values = c("Appeal preference" = "#8DA0CB",
                               "No appeal preference" = "#FC8D62")) +
  theme_minimal() +
  coord_cartesian(ylim = c(1, 25)) +
  theme(plot.title = element_text(hjust = 0.5, face = "bold"))

Fisher’s Exact Test

tab_fisher <- df_grp %>%
  filter(system_choice %in% c("Chooses A", "Chooses B"),
         appeal_pref %in% c("Appeal preference", "No appeal preference")) %>%
  count(system_choice, appeal_pref) %>%
  tidyr::pivot_wider(
    names_from = appeal_pref,
    values_from = n,
    values_fill = 0
  )



tab_matrix <- as.matrix(tab_fisher[, -1])  # remove system_choice column

fisher.test(tab_matrix)

## 
##  Fisher's Exact Test for Count Data
## 
## data:  tab_matrix
## p-value = 0.02199
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
##  1.06376     Inf
## sample estimates:
## odds ratio 
##        Inf

tab_fisher %>%
  kable(
    col.names = c("System Choice", "Appeal Preference", "No Appeal Preference"),
    align = "c"
  ) %>%
  kable_styling(full_width = FALSE, position = "center")

System Choice	Appeal Preference	No Appeal Preference
Chooses A	23	0
Chooses B	7	3

The Fisher’s Exact Test revealed a statistically significant association between system choice and appeal preference (p = 0.02199), indicating that appeal support is not randomly distributed across system preference groups. Specifically, participants who chose System A were far more likely to support the availability of appeal mechanisms, whereas those who chose System B showed substantially lower support for appeal. This suggests that System A choosers place greater emphasis on procedural safeguards and contestability in automated decision-making processes.

Prefered decision-making mechanism vs. appeal preference

Is it necessary or is the previous visualization/analysis and Fisher’s Exact Test enough? I am confused as this analysis is not about system A vs. B, but this visualization doesn’t really give any anything.

df_grp %>%
  filter(decision_pref %in% c("AADM", "HITL")) %>%
  count(decision_pref, appeal_pref) %>%
  ggplot(aes(x = decision_pref, y = n, fill = appeal_pref)) +
  geom_col(position = "dodge", width = 0.6) +
  geom_text(aes(label = n), position = position_dodge(0.6), vjust = -0.3) +
  labs(
    title = "Appeal Preference by System Choice",
    x = "System Choice",
    y = "Count",
    fill = "Appeal Preference"
  ) +
  scale_fill_manual(values = c("Appeal preference" = "#8DA0CB",
                               "No appeal preference" = "#FC8D62")) +
  theme_minimal() +
  coord_cartesian(ylim = c(1, 30)) +
  theme(plot.title = element_text(hjust = 0.5, face = "bold"))

Fairness vs. efficiency

Fairness judgement

df_grp <- df_grp %>%
  mutate(
    Fair_AADM = as.integer(k34 == 1 | k26 == 1),  
    Fair_HITL = as.integer(k35 == 1 | k25 == 1),
    fairness_preference = case_when(
      Fair_AADM == 1 & Fair_HITL == 0 ~ "AADM is fairer",
      Fair_AADM == 0 & Fair_HITL == 1 ~ "HITL is fairer",
      Fair_AADM == 1 & Fair_HITL == 1 ~ "Both perceived fair",
      TRUE ~ "No fairness judgment"
    )
  )

df_fair_plot <- df_grp %>%
  filter(fairness_preference != "No fairness judgment") %>%
  count(fairness_preference)

ggplot(df_fair_plot, aes(x = fairness_preference, 
                         y = n, 
                         fill = fairness_preference)) +
  geom_col(width = 0.6) +
  geom_text(aes(label = n), vjust = -0.4, size = 5) +
  scale_fill_manual(values = c(
    "AADM is fairer" = "lightgreen",
    "HITL is fairer" = "skyblue",
    "Both perceived fair" = "purple"
  )) +
  labs(
    title = "Fairness Judgments: Which System is Perceived as Fairer?",
    x = "Fairness Preference",
    y = "Number of Participants"
  ) +
  theme_minimal() +
  coord_cartesian(ylim = c(1, 15)) +
  theme(
    legend.position = "none",
    axis.text.x = element_text(angle = 0, hjust = 0.5),
    plot.title = element_text(hjust = 0.5, face = "bold", size = 14)
  )

Efficiency judgement

df_grp <- df_grp %>%
  mutate(
    Eff_AADM = as.integer(k33 == 1 | k24 == 1),  # AADM considered efficient
    Eff_HITL = as.integer(0),                    # No HITL efficiency code
    efficiency_preference = case_when(
      Eff_AADM == 1 ~ "AADM is more efficient",
      TRUE ~ "No efficiency judgment"
    )
  )

df_eff_plot <- df_grp %>%
  filter(efficiency_preference == "AADM is more efficient") %>%
  count(efficiency_preference)

df_eff_plot <- df_grp %>%
  filter(efficiency_preference == "AADM is more efficient") %>%
  count() %>%
  mutate(label = "AADM is more efficient")

ggplot(df_eff_plot, aes(x = 2, y = n, fill = label)) +
  geom_col(width = 1, color = "white") +
  coord_polar(theta = "y") +
  xlim(0.5, 2.5) +
  scale_fill_manual(values = "#81C784") +
  labs(
    title = "Efficiency-Based Preference: 
    100% of Those Who Had Efficiency Judgement Prefer AADM",
    fill = ""
  ) +
  theme_void() +
  theme(
    plot.title = element_text(hjust = 0.5, face = "bold"),
    legend.position = "none"
  )

Fairness and Efficiency Reasoning by Preferred decision-making mechanism

panel1 <- df_grp %>%
  filter(!is.na(decision_pref)) %>%
  group_by(decision_pref) %>%
  summarise(
    Count = sum(k33 == 1, na.rm = TRUE),  
    .groups = "drop"
  )

panel2 <- df_grp %>%
  filter(decision_pref == "AADM") %>%
  summarise(
    `Negative efficiency of HITL` = sum(k24 == 1, na.rm = TRUE),
    `Negative fairness of AADM`   = sum(k25 == 1, na.rm = TRUE),
    `Negative fairness of HITL`   = sum(k26 == 1, na.rm = TRUE),
    `Positive fairness of AADM`   = sum(k34 == 1, na.rm = TRUE),
    `Positive fairness of HITL`   = sum(k35 == 1, na.rm = TRUE)
  ) %>%
  pivot_longer(cols = everything(),
               names_to = "Reason",
               values_to = "Count") %>%
  mutate(System = "AADM supporters")

panel3 <- df_grp %>%
  filter(decision_pref == "HITL") %>%
  summarise(
    `Negative efficiency of HITL` = sum(k24 == 1, na.rm = TRUE),
    `Negative fairness of AADM`   = sum(k25 == 1, na.rm = TRUE),
    `Negative fairness of HITL`   = sum(k26 == 1, na.rm = TRUE),
    `Positive fairness of AADM`   = sum(k34 == 1, na.rm = TRUE),
    `Positive fairness of HITL`   = sum(k35 == 1, na.rm = TRUE)
  ) %>%
  pivot_longer(cols = everything(),
               names_to = "Reason",
               values_to = "Count") %>%
  mutate(System = "HITL supporters")

reason_levels <- c(
  "Negative efficiency of HITL",
  "Negative fairness of AADM",
  "Negative fairness of HITL",
  "Positive fairness of AADM",
  "Positive fairness of HITL"
)
panel2$Reason <- factor(panel2$Reason, levels = reason_levels)
panel3$Reason <- factor(panel3$Reason, levels = reason_levels)


p1 <- ggplot(panel1, aes(x = decision_pref, y = Count)) +
  geom_col(fill = "#4C79C0") +
  geom_text(aes(label = Count), vjust = -0.3, size = 4) +
  coord_cartesian(ylim = c(0, max(panel1$Count) + 2)) +
  labs(
    x = "",
    y = "Number of participants",
    title = "AADM vs HITL: Positive efficiency of AADM"
  ) +
  theme_minimal() +
   coord_cartesian(ylim = c(1, 10)) +
  theme(
    plot.title  = element_text(hjust = 0.5, face = "bold"),
    axis.text.x = element_text(angle = 0, hjust = 0.5)
  )

## Coordinate system already present. Adding new coordinate system, which will
## replace the existing one.

p2 <- ggplot(panel2, aes(x = Reason, y = Count)) +
  geom_col(fill = "#4C79C0") +
  geom_text(aes(label = Count), vjust = -0.3, size = 4) +
  coord_cartesian(ylim = c(0, max(panel3$Count, panel2$Count) + 2)) +
  labs(
    x = "",
    y = " ",
    title = "AADM supporters"
  ) +
  theme_minimal() +
  theme(
    plot.title  = element_text(hjust = 0.5, face = "bold"),
    axis.text.x = element_text(angle = 55, hjust = 1)
  )


p3 <- ggplot(panel3, aes(x = Reason, y = Count)) +
  geom_col(fill = "#4C79C0") +
  geom_text(aes(label = Count), vjust = -0.3, size = 4) +
  coord_cartesian(ylim = c(0, max(panel3$Count, panel2$Count) + 2)) +
  labs(
    x = "",
    y = "Number of participants",
    title = "HITL supporters"
  ) +
  theme_minimal() +
   coord_cartesian(ylim = c(1, 20)) +
  theme(
    plot.title  = element_text(hjust = 0.5, face = "bold"),
    axis.text.x = element_text(angle = 55, hjust = 1)
  )

## Coordinate system already present. Adding new coordinate system, which will
## replace the existing one.

(p1) / (p2 | p3) +
  plot_annotation(
    title = "Fairness and Efficiency Reasoning 
    by Preferred decision-making mechanism
    ",
    theme = theme(plot.title = element_text(hjust = 0.5, face = "bold"))
  )

Table: Fairness vs. efficiency judgements about AADM and HITL

fairness_table <- df_grp %>%
  summarise(
    `Negative fairness in AADM` = sum(k25 == 1, na.rm = TRUE),
    `Negative fairness in HITL` = sum(k26 == 1, na.rm = TRUE),
    `Positive fairness in AADM` = sum(k34 == 1, na.rm = TRUE),
    `Positive fairness in HITL` = sum(k35 == 1, na.rm = TRUE)
  ) %>%
  pivot_longer(cols = everything(),
               names_to = "Fairness variable",
               values_to = "Number of participants")

fairness_table %>%
  kable(
    caption = "Fairness-related judgements about AADM and HITL",
    align = "c",
    col.names = c("Fairness variable", "Number of participants")
  ) %>%
  kable_styling(full_width = FALSE, position = "center", font_size = 12) %>%
  row_spec(0, bold = TRUE) %>%
  column_spec(1, bold = TRUE)

Fairness-related judgements about AADM and HITL
Fairness variable	Number of participants
Negative fairness in AADM	14
Negative fairness in HITL	5
Positive fairness in AADM	7
Positive fairness in HITL	9

efficiency_table <- df_grp %>%
  summarise(
    `Negative efficiency of HITL` = sum(k24 == 1, na.rm = TRUE),
    `Positive efficiency of AADM` = sum(k33 == 1, na.rm = TRUE)
  ) %>%
  pivot_longer(cols = everything(),
               names_to = "Efficiency variable",
               values_to = "Number of participants")

efficiency_table %>%
  kable(
    caption = "Efficiency-related judgements about AADM and HITL",
    align = "c",
    col.names = c("Efficiency variable", "Number of participants")
  ) %>%
  kable_styling(full_width = FALSE, position = "center", font_size = 12) %>%
  row_spec(0, bold = TRUE) %>%
  column_spec(1, bold = TRUE)

Efficiency-related judgements about AADM and HITL
Efficiency variable	Number of participants
Negative efficiency of HITL	1
Positive efficiency of AADM	9

Table: Fairness/efficiency typology vs Preferred decision-making mechanism

df_grp <- df_grp %>%
  mutate(
    any_efficiency = as.integer(k24 == 1 | k33 == 1),
    any_fairness = as.integer(k25 == 1 | k26 == 1 | k34 == 1 | k35 == 1),
    reasoning_category = case_when(
      any_efficiency == 1 & any_fairness == 0 ~ "Efficiency",
      any_efficiency == 0 & any_fairness == 1 ~ "Fairness",
      any_efficiency == 1 & any_fairness == 1 ~ "Both fairness & efficiency",
      any_efficiency == 0 & any_fairness == 0 ~ "Neither efficiency nor fairness",
      TRUE ~ "Neither efficiency nor fairness"
    )
  )
table_df <- df_grp %>%
  filter(!is.na(decision_pref),
         reasoning_category != "Neither") %>%
  count(decision_pref, reasoning_category) %>%
  complete(
    decision_pref,
    reasoning_category = c("Efficiency",
                           "Fairness",
                           "Both fairness & efficiency"),
    fill = list(n = 0)
  ) %>%
  arrange(decision_pref, reasoning_category)

table_df %>%
  kable(
    col.names = c("Decision Preference",
                  "Reasoning Category",
                  "Number of participants"),
    align = c("l", "l", "c"),
    booktabs = TRUE,
    caption = "Efficiency/Fairness Typology by Preferred Decision-Making Mechanism"
  ) %>%
  kable_styling(full_width = FALSE, position = "center")

Efficiency/Fairness Typology by Preferred Decision-Making Mechanism
Decision Preference	Reasoning Category	Number of participants
AADM	Both fairness & efficiency	1
AADM	Efficiency	0
AADM	Fairness	2
AADM	Neither efficiency nor fairness	1
HITL	Both fairness & efficiency	6
HITL	Efficiency	2
HITL	Fairness	15
HITL	Neither efficiency nor fairness	8

Heatmap: Fairness/efficiency typology vs Preferred decision-making mechanism

plot_df <- df_grp %>%
  filter(!is.na(decision_pref)) %>%
  count(decision_pref, reasoning_category) %>%
  tidyr::complete(
    decision_pref = c("HITL", "AADM"),
    reasoning_category = c(
      "Both fairness & efficiency",
      "Efficiency",
      "Fairness",
      "Neither efficiency nor fairness"
    ),
    fill = list(n = 0)
  )

plot_df$reasoning_category <- factor(
  plot_df$reasoning_category,
  levels = c(
    "Efficiency",
    "Both fairness & efficiency",
    "Fairness",
    "Neither efficiency nor fairness"
  )
)

ggplot(plot_df,
       aes(x = reasoning_category,
           y = decision_pref,
           fill = n)) +
  geom_tile(color = "lightblue") +
  geom_text(aes(label = ifelse(n == 0, " ", n)),
            size = 4.5, fontface = "bold") +
  scale_fill_gradient(low = "lightblue", high = "lightgreen") +
  labs(
    title = "Efficiency/Fairness Typology by Preferred Decision-Making Mechanism",
    x = "Reasoning Type",
    y = "Preferred System",
    fill = "Count"
  ) +
  coord_fixed() +
  theme_minimal() +
  theme(
    axis.text.x  = element_text(angle = 0, hjust = 0.5),
    plot.title   = element_text(hjust = 0.5, face = "bold"),
    panel.grid   = element_blank()
  )

Legitimization/delegitimization of AADM

df_grp <- df_grp %>%
  mutate(
    Legitimization = as.integer(rowSums(
      across(c(k1, k10, k23, k24, k26, k29, k30, k31, k32, k33, k34)), 
      na.rm = TRUE) > 0),
    Delegitimization = as.integer(rowSums(
      across(c(k12, k13, k15, k17, k19, k20, k25, k35)), na.rm = TRUE) > 0)
  )

df_grp <- df_grp %>%
  mutate(
    legitimacy_view = case_when(
      Legitimization == 1 & Delegitimization == 0 ~ "Legitimizing",
      Legitimization == 0 & Delegitimization == 1 ~ "Delegitimizing",
      Legitimization == 1 & Delegitimization == 1 ~ "Mixed",
      TRUE ~ "No legitimacy argument"
    )
  )
#df_grp %>%
#  count(legitimacy_view) %>%
#  mutate(pct = round(n / sum(n) * 100, 1))

df_grp %>%
  count(legitimacy_view) %>%
  mutate(Percentage = paste0(round(n / sum(n) * 100, 1), "%")) %>%
  kable(
    caption = "Distribution of Legitimization vs Delegitimization Reasoning",
    col.names = c("Legitimacy View", "Number of Participants", "Percentage"),
    align = "c"
  ) %>%
  kable_styling(full_width = FALSE, position = "center", font_size = 12) %>%
  row_spec(0, bold = TRUE) %>%
  column_spec(1, bold = TRUE)

Distribution of Legitimization vs Delegitimization Reasoning
Legitimacy View	Number of Participants	Percentage
Delegitimizing	4	11.4%
Legitimizing	6	17.1%
Mixed	24	68.6%
No legitimacy argument	1	2.9%

A majority of participants (68.6%) fell into the Mixed legitimacy view, indicating that most respondents did not take an extreme position. Rather, they simultaneously, both, legitimize AADM by eg. linking it to national security and controlled migration and delegitimize because it violates the norm of human being in decision-making authority schemata, data privacy concerns and risk of digital authoritarianism. This suggests that public perceptions of automated decision-making systems are complex and conditional rather than simply supportive or resistant, and they depend on various cognitive and normative factors.

Legitimization/delegitimization arguments

legit_vars <- c("k1","k10","k23","k24","k26","k29","k30","k31","k32","k33","k34")
delegit_vars <- c("k12","k13","k15","k17","k19","k20","k25","k35")

legit_counts <- df_grp %>%
  summarise(across(all_of(legit_vars), ~ sum(. == 1, na.rm = TRUE))) %>%
  pivot_longer(cols = everything(),
               names_to = "code",
               values_to = "n") %>%
  mutate(group = "Legitimization")

delegit_counts <- df_grp %>%
  summarise(across(all_of(delegit_vars), ~ sum(. == 1, na.rm = TRUE))) %>%
  pivot_longer(cols = everything(),
               names_to = "code",
               values_to = "n") %>%
  mutate(group = "Delegitimization")

leg_counts_all <- bind_rows(legit_counts, delegit_counts)


code_labels <- c(
  k1  = "AADM for national security",
  k10 = "AADM for controlled migration",
  k23 = "Legitimization via existing AADM systems",
  k24 = "Negative efficiency judgement for HITL",
  k26 = "Negative fairness judgement in HITL",
  k29 = "Normalization via ChatGPT",
  k30 = "Normalization via algorithmic systems",
  k31 = "Normalization via police",
  k32 = "Normalization via visa processes",
  k33 = "Positive efficiency judgement in AADM",
  k34 = "Positive fairness judgement in AADM",
  k12 = "Data privacy concerns",
  k13 = "Negative association with status quo",
  k15 = "Digital authoritarianism",
  k17 = "ETA is unnecessary",
  k19 = "Familiarity (to human decision-making)",
  k20 = "Final decision by human is important",
  k25 = "Negative fairness judgement in AADM",
  k35 = "Positive fairness judgement in HITL"
)

ggplot(leg_counts_all,
       aes(x = reorder(code, n), y = n, fill = group)) +
  geom_col(show.legend = FALSE) +
  coord_flip() +
  facet_wrap(~ group, scales = "free_y") +
  scale_x_discrete(labels = code_labels) +
  labs(
    title = "
Delegitimization vs. Legitimization Arguments for AADM",
    x = "Arguments",
    y = "Number of Participants"
  ) +
  theme_minimal() +
  theme(
    plot.title = element_text(hjust = 0.5, face = "bold")
  )

Data collection

df_data_table <- df_grp %>%
  summarise(
    `No opinion on data being collected` = sum(k28 == 1, na.rm = TRUE),
    `Too intrusive` = sum(k36 == 1, na.rm = TRUE),
    `Wanted extensive data collection` = sum(k41 == 1, na.rm = TRUE),
    `Wanted extensive data collection for system B` = sum(k42 == 1, na.rm = TRUE),
    `Wanted less intrusive data collection for system A` = sum(k45 == 1, na.rm = TRUE)
  ) %>%
  tidyr::pivot_longer(cols = everything(),
                      names_to = "Data View",
                      values_to = "Count")

df_data_table %>%
  kable(
    caption = "Participant Views on Data Collection",
    col.names = c("Data View", "Number of Participants"),
    align = "l"
  ) %>%
  kable_styling(full_width = FALSE, position = "center", font_size = 12) %>%
  row_spec(0, bold = TRUE)

Participant Views on Data Collection
Data View	Number of Participants
No opinion on data being collected	3
Too intrusive	16
Wanted extensive data collection	10
Wanted extensive data collection for system B	1
Wanted less intrusive data collection for system A	2

(Legitimacy) reasons behind data collection preferences

df_plot <- df_grp %>%
  mutate(
    data_pref = case_when(
      k41 == 1 | k42 == 1 ~ "Extensive data collection preference",
      k36 == 1 | k45 == 1 ~ "Less intrusive data collection preference",
      TRUE ~ NA_character_
    )
  ) %>%
  filter(!is.na(data_pref)) %>% 
  summarise(
    `AADM for national security` = sum(k1  == 1, na.rm = TRUE),
    `AADM for controlled migration` = sum(k10 == 1, na.rm = TRUE),
    `Data privacy concerns` = sum(k12 == 1, na.rm = TRUE),
    `Digital authoritarianism` = sum(k15 == 1, na.rm = TRUE),
    .by = data_pref
  ) %>%
  pivot_longer(
    cols = -data_pref,
    names_to = "Reason",
    values_to = "Count"
  )

# 2. Faceted bar plot: reasons within each data preference group
ggplot(df_plot, aes(x = Reason, y = Count)) +
  geom_col(fill = "#4C79C0") +
  geom_text(aes(label = Count),
            vjust = -0.3, size = 4) +
  facet_wrap(~ data_pref, ncol = 1) +
  coord_cartesian(ylim = c(0, max(df_plot$Count) + 2)) +
  labs(
    title = "Reasons & Concerns Behind Data Collection Preferences",
    x = "",
    y = "Number of participants"
  ) +
  theme_minimal() +
  theme(
    plot.title  = element_text(hjust = 0.5, face = "bold"),
    axis.text.x = element_text(angle = 55, hjust = 1),
    strip.text  = element_text(face = "bold")
  )

Policy perspective (preference)

policy_vars <- c("k2","k3","k12","k16","k37","k38","k39")

policy_counts <- df_grp %>%
  summarise(across(all_of(policy_vars), ~ sum(. == 1, na.rm = TRUE))) %>%
  pivot_longer(cols = everything(),
               names_to = "code",
               values_to = "n") %>%
  mutate(
    label = dplyr::recode(
      code,
      k2  = "Accessibility",
      k3  = "Accountability",
      k12 = "Data privacy",
      k16 = "Efficiency of the system",
      k37 = "Transparency of process",
      k38 = "Transparency of system",
      k39 = "Globally applicable migration policy"
    )
  )

ggplot(policy_counts, aes(x = reorder(label, n), y = n)) +
  geom_col(fill = "#4CAF50") +
  geom_text(aes(label = n),
            hjust = -0.2, size = 4) +
  coord_flip() +
  expand_limits(y = max(policy_counts$n) + 1) +
  labs(
    title = "Policy Preferences: 
    Qualities Considered Important for AADM Legitimacy",
    x = "Policy recommendations",
    y = "Number of participants"
  ) +
  theme_minimal() +
  theme(
    plot.title = element_text(hjust = 0.5, face = "bold")
  )

Master’s Thesis 2026

Shobhana Sharma

2026-05-05