Setup, Filter, General Descriptives

# 1. why Kendall's tau? Ties? spearson tho seems more granular?
# 2. cannot do t test on the thing? Need non-parametric? think about Olivier's transformation to z; probabily fine.
# 3. some how only estimate a weight for positive ranking upward? Only counts when using that to rank upward?
# 4. let's say that the x axis is tau, what the y axis? the 


# Load required package
library(gtools)  # for permutations

# Generate all permutations of 1:6
perms <- permutations(n = 6, r = 6)
ref <- 1:6
n <- length(ref)

# Initialize vectors
kendall_vals <- numeric(nrow(perms))
spearman_vals <- numeric(nrow(perms))
footrule_vals <- numeric(nrow(perms))

# Compute all metrics
for (i in 1:nrow(perms)) {
  p <- perms[i, ]
  
  # Kendall's tau
  kendall_vals[i] <- cor(p, ref, method = "kendall")
  
  # Spearman's rho
  spearman_vals[i] <- cor(p, ref, method = "spearman")
  
  # Footrule distance (normalized to range [–1, 1])
  raw_footrule <- sum(abs(p - ref))
  max_footrule <- sum(abs(rev(ref) - ref))  # = 18 for n = 6
  norm_footrule <- 1 - (2 * raw_footrule / max_footrule)
  footrule_vals[i] <- norm_footrule
}

# Count unique values
cat("Unique Kendall tau values:   ", length(unique(kendall_vals)), "\n")
## Unique Kendall tau values:    16
cat("Unique Spearman rho values:  ", length(unique(spearman_vals)), "\n")
## Unique Spearman rho values:   36
cat("Unique Footrule scores:      ", length(unique(footrule_vals)), "\n")
## Unique Footrule scores:       10
## Getting started ##

# Set WD
#setwd("C:/Users/Arian/Documents/GitHub/SAB-Lab/Dynamic Rank Order Processes Tracing (DROPT)/Shape and Lottery Experiments/Lottery Study_Preferences/Arians-Sandbox")

# Get both data files; pick and choose from each
data_num <- read.csv("DROPT_Lotteries_full_num.csv") %>%
  dplyr::slice(3:n())
data_text <- read.csv("DROPT_Lotteries_full_text.csv") %>%
  dplyr::slice(3:n())

# set numeric file as base
dat <- data_num

# Convert all columns that are not yet numeric but can be to numeric
dat[] <- lapply(dat, function(col) {
  if (is.character(col) || is.factor(col)) {
    suppressWarnings(num_col <- as.numeric(as.character(col)))
    # If most values are not NA after conversion, assume it's a valid numeric column
    if (sum(!is.na(num_col)) > 0.8 * length(num_col)) {
      return(num_col)
    } else {
      return(col)  # leave it as-is
    }
  } else {
    return(col)
  }
})
## Setup: Merge data frames - take some of the demographic information from the text df
dat$birthYear <- as.numeric(as.character(data_text$age))
dat$age <- 2025 - dat$birthYear
dat$gender <- factor(data_text$gender)
dat$gender_binary <- ifelse(dat$gender == "Male", "Male",
                             ifelse(dat$gender == "Female", "Female", NA))
dat$education <- data_text$education
dat$education_num <- as.numeric(data_num$education)
dat$income <- ifelse(dat$income != 10, as.numeric(dat$income), NA)


## necessary recoding
correct_answer <- "1,2,3"

dat <- dat %>%
  mutate(dose.coded = ifelse(Dose == correct_answer, "Correct", "Incorrect"))
dat$attn1.coded <- ifelse(dat$attn1 == 2, "Correct", "Incorrect")

dat$dose.coded <- as.factor(dat$dose.coded)
dat$attn1.coded <- as.factor(dat$attn1.coded)

dat$bonus_belief [dat$bonus_belief== 1] = 'Yes'
dat$bonus_belief [dat$bonus_belief== 0] = 'No'

dat$bonus_belief <- factor(dat$bonus_belief, levels = names(sort(table(dat$bonus_belief), decreasing = TRUE)))

dat <- dat %>%
  mutate(StartDate = as.Date(StartDate, format = "%Y-%m-%d")) %>% 
  rename(Bot_score = "Q_RecaptchaScore") %>% 
  rename(Fraud_score = "Q_RelevantIDFraudScore") %>%
  rename(Duration = "Duration..in.seconds.") 


## Apply filters

dat <- dat %>%
  filter(PROLIFIC_PID != "") %>%
  filter(DistributionChannel != "preview") %>% #
  filter(Bot_score >= 0.50) %>% #
  #filter(Fraud_score <= 30) %>%
  filter(attn1.coded == "Correct") #


# failed attention
# bots (0.5)
# fraudulent (score?)
# tau less than 0.5, but only exclude in payoff and prob

filter_summary <- data.frame(
  Filter_Name = c(
    'Test Runs (Preview)',
    'Bot_score =< 0.50',
    'Attention Check failed',
    'Sum'
  ),
  People_Filtered_Out = c(1,3,6,10)
)

filter_summary 
## Create long data frames
dat_long1 <- dat %>%
  pivot_longer(
    cols = matches("Set1_L[1-6]_(Prob|Amt)"),
    names_to = c("lottery", ".value"),
    names_pattern = "Set1_(L[1-6])_(Prob|Amt)"
  ) %>%
  mutate(item.f = case_when(
    lottery == "L1" ~ "Pr6_Amt1",
    lottery == "L2" ~ "Pr5_Amt2",
    lottery == "L3" ~ "Pr4_Amt3",
    lottery == "L4" ~ "Pr3_Amt4",
    lottery == "L5" ~ "Pr2_Amt5",
    lottery == "L6" ~ "Pr1_Amt6"
  ))

dat_long2 <- dat %>%
  pivot_longer(
    cols = matches("Set2_L[1-6]_(Prob|Amt)"),
    names_to = c("lottery", ".value"),
    names_pattern = "Set2_(L[1-6])_(Prob|Amt)"
  ) %>%
  mutate(item.f = case_when(
    lottery == "L1" ~ "Pr6_Amt1",
    lottery == "L2" ~ "Pr5_Amt2",
    lottery == "L3" ~ "Pr4_Amt3",
    lottery == "L4" ~ "Pr3_Amt4",
    lottery == "L5" ~ "Pr2_Amt5",
    lottery == "L6" ~ "Pr1_Amt6"
  ))


# write a function for dfs later
process_task <- function(data, rank_column, task_label) {
  # Extract the initial order
  initial_order <- sub("^\\{([^}]*)\\}.*", "\\1", data[[rank_column]]) # captures the content within the first {} in the string; we will apply this to the RankProcess column; Using double bracket to capture a vector

  initial_order <- gsub("0; ", "", initial_order) # remove the 0 timestamp
  initial_order_split <- strsplit(initial_order, ",") # separate the strings into list

  # Identify unique items
  unique_items <- unique(unlist(initial_order_split))

  # Create a data frame to store initial ranks
  initial_positions_df <- data.frame(matrix(ncol = length(unique_items), nrow = length(initial_order_split))) # nrow is the number of respondents
  names(initial_positions_df) <- paste0("initial.items_", unique_items)

  # Fill initial ranks
  for (i in seq_along(initial_order_split)) {
    initial_order <- initial_order_split[[i]] # for each respondent, extract the string of initial order
    for (j in seq_along(initial_order)) {
      item <- initial_order[j]
      initial_positions_df[i, paste0("initial.items_", item)] <- j
    }
  }

  data <- cbind(data, initial_positions_df)
  assign(paste0("initial.dat_", task_label), data, envir = .GlobalEnv)

  cor_results <- data.frame(
    item = paste0("rank_", task_label, "_", unique_items),
    initial_item = paste0("initial.items_", unique_items),
    correlation = NA_real_,
    p_value = NA_real_
  )

  cor_results <- cor_results %>% # it is important to do this step by Task, because IDs are only unique and consistent within each quiz.
    rowwise() %>%
    dplyr::mutate(
      correlation = cor.test(data[[item]], data[[initial_item]])$estimate,
      p_value = cor.test(data[[item]], data[[initial_item]])$p.value
    ) %>%
    dplyr::mutate(sig = p_value < 0.05)

  cor_results$task <- task_label
  cor_results
}

# List of datasets, rank columns, and task labels
tasks <- list(
  list(data = dat, rank_column = "RankProcess_Prob", task_label = "prob"),
  list(data = dat, rank_column = "RankProcess_Amount", task_label = "amount"),
  list(data = dat, rank_column = "RankProcess_Prefer1", task_label = "pref1"),
  list(data = dat, rank_column = "RankProcess_Prefer2", task_label = "pref2")
)


all_results <- bind_rows(lapply(tasks, function(t) {
  process_task(t$data, t$rank_column, t$task_label)
}))

New Prep

### Collection of Wrapper functions that can be applied to all dfs ###

## COUNT ##

get_DROPT_count <- function(dat, rank_col, rank_all_col, item_numbers, item_labels,
                            condition, initial_dat, dat_long) {
  # Load required packages
  library(dplyr)
  library(tidyr)
  library(rlang)
  library(stringr)
  
  # Validate inputs
  if (length(item_numbers) != length(item_labels)) {
    stop("item_numbers and item_labels must be the same length.")
  }
  
  item_map <- setNames(item_labels, item_numbers)
  
  # --- Helper: Create Rank Process ---
  create_RankProcess <- function(data, col_name) {
    col_sym <- sym(col_name)
    data %>%
      select(ResponseId, !!col_sym) %>%
      separate_rows(!!col_sym, sep = "}") %>%
      mutate(!!col_sym := gsub("[{}]", "", !!col_sym)) %>%
      filter(!!col_sym != "") %>%
      separate(!!col_sym, into = c("timing", "order"), sep = ";") %>%
      mutate(order = trimws(order)) %>%
      group_by(ResponseId) %>%
      mutate(step = row_number() - 1) %>%
      select(step, everything()) %>%
      ungroup()
  }
  
  # --- Helper: Create Rank Process All ---
  create_RankProcess_all <- function(data, col_name) {
    col_sym <- sym(col_name)
    data %>%
      select(ResponseId, !!col_sym) %>%
      separate_rows(!!col_sym, sep = "}") %>%
      mutate(!!col_sym := gsub("[{}]", "", !!col_sym)) %>%
      filter(!!col_sym != "") %>%
      separate(!!col_sym, into = c("timing", "order_all"), sep = ";")
  }
  
  # --- Helper: Process and Clean Data ---
  process_RankProcess_data <- function(rank_data, rank_all_data, item_map) {
    item_numbers <- names(item_map)
    
    processed <- rank_data %>%
      left_join(rank_all_data, by = c("ResponseId", "timing")) %>%
      mutate(order_all = trimws(order_all),
             item_moved = as.numeric(sub(",.*", "", order_all))) %>%
      ungroup() %>%
      mutate(item.f = as.factor(item_map[as.character(item_moved)]))
    
    na_subj <- processed %>%
      filter(is.na(item_moved)) %>%
      pull(ResponseId)
    
    is_valid_order <- grepl("^\\d+(,\\d+){5}$", processed$order_all)
    bug_respondents <- processed %>%
      filter(is_valid_order & timing != 0) %>%
      pull(ResponseId)
    
    cleaned <- processed %>%
      filter(!ResponseId %in% c(na_subj, bug_respondents))
    
    drag_counts <- cleaned %>%
      filter(step != 0) %>%
      group_by(ResponseId) %>%
      dplyr::summarize(
        !!!setNames(
          lapply(item_numbers, function(x) {
            rlang::expr(sum(item_moved == !!as.numeric(x)))
          }),
          paste0("item_", item_numbers, "_moved.N")
        ),
        .groups = "drop"
      )
    
    return(list(
      cleaned_data = cleaned,
      bug_respondent = unique(bug_respondents),
      na_subj = unique(na_subj),
      drag_and_drop_count = drag_counts
    ))
  }
  
  # --- Main workflow ---
  rank_data <- create_RankProcess(dat, rank_col)
  rank_all_data <- create_RankProcess_all(dat, rank_all_col)
  result <- process_RankProcess_data(rank_data, rank_all_data, item_map)
  
  # --- Create long-format data ---
  drag_and_drop_count_long <- result$drag_and_drop_count %>%
    pivot_longer(
      cols = starts_with("item_"),
      names_to = c("item_number", ".value"),
      names_sep = "_moved\\."
    ) %>%
    mutate(
      condition = condition,
      item_number = as.numeric(gsub("item_", "", item_number)),
      item.f = as.factor(item_map[as.character(item_number)])
    ) %>%
    mutate(
      rank.Amount = case_when(
        item.f == "Pr6_Amt1" ~ 1,
        item.f == "Pr5_Amt2" ~ 2,
        item.f == "Pr4_Amt3" ~ 3,
        item.f == "Pr3_Amt4" ~ 4,
        item.f == "Pr2_Amt5" ~ 5,
        item.f == "Pr1_Amt6" ~ 6
      ),
      rank.Prob = case_when(
        item.f == "Pr6_Amt1" ~ 6,
        item.f == "Pr5_Amt2" ~ 5,
        item.f == "Pr4_Amt3" ~ 4,
        item.f == "Pr3_Amt4" ~ 3,
        item.f == "Pr2_Amt5" ~ 2,
        item.f == "Pr1_Amt6" ~ 1
      )
    ) %>%
    left_join(initial_dat %>% select(ResponseId, starts_with("initial.items_")), by = "ResponseId") %>%
    mutate(
      initial.rank = case_when(
        item.f == "Pr6_Amt1" ~ initial.items_49,
        item.f == "Pr5_Amt2" ~ initial.items_50,
        item.f == "Pr4_Amt3" ~ initial.items_64,
        item.f == "Pr3_Amt4" ~ initial.items_65,
        item.f == "Pr2_Amt5" ~ initial.items_67,
        item.f == "Pr1_Amt6" ~ initial.items_68
      ),
      initial.rank.r = relevel(factor(7 - initial.rank), ref = 6),
      N_ind = ifelse(N == 0, 0, 1)
    ) %>%
    select(-c(starts_with("initial.items_"))) %>%
    left_join(dat_long %>% select(ResponseId, item.f, Prob, Amt), by = c("ResponseId", "item.f"))
  
  result$drag_and_drop_count_long <- drag_and_drop_count_long
  
  return(result)
}

## DISTANCE ##

get_DROPT_distance <- function(
    data,
    items,
    item_labels,
    order_col = "order",
    step_col = "step",
    timing_col = "timing",
    item_f_col = "item.f",
    response_id_col = "ResponseId",
    condition_label = "Condition"
) {
  # Step 1: Split 'order' into ranks
  data_processed <- data %>%
    group_by(.data[[response_id_col]]) %>%
    mutate(parts = str_split(.data[[order_col]], ",")) %>%
    mutate(
      Rank1 = sapply(parts, function(x) x[1]),
      Rank2 = sapply(parts, function(x) x[2]),
      Rank3 = sapply(parts, function(x) x[3]),
      Rank4 = sapply(parts, function(x) x[4]),
      Rank5 = sapply(parts, function(x) x[5]),
      Rank6 = sapply(parts, function(x) ifelse(length(x) > 5, x[6], NA))
    ) %>%
    select(-parts) %>%
    ungroup()
  
  # Step 2: Add current_* columns
  for (item in items) {
    data_processed[[paste0("current_", item)]] <- NA_integer_
  }
  
  data_processed <- data_processed %>%
    rowwise() %>%
    mutate(across(
      starts_with("current_"),
      ~ {
        item_number <- str_remove(cur_column(), "current_")
        case_when(
          Rank1 == item_number ~ 1,
          Rank2 == item_number ~ 2,
          Rank3 == item_number ~ 3,
          Rank4 == item_number ~ 4,
          Rank5 == item_number ~ 5,
          Rank6 == item_number ~ 6,
          TRUE ~ 1
        )
      }
    )) %>%
    ungroup()
  
  # Step 3: Add lagged positions
  for (item in items) {
    data_processed[[paste0("last_", item)]] <- lag(data_processed[[paste0("current_", item)]])
  }
  
  # Step 4: Compute movement direction
  data_processed <- data_processed %>%
    group_by(.data[[response_id_col]]) %>%
    rowwise() %>%
    mutate(
      current_item_moved = get(paste0("current_", get("item_moved"))),
      last_item_moved = get(paste0("last_", get("item_moved"))),
      move_direction = case_when(
        is.na(last_item_moved) ~ "no_change",
        current_item_moved < last_item_moved ~ "up",
        current_item_moved > last_item_moved ~ "down",
        TRUE ~ "no_change"
      )
    ) %>%
    ungroup()
  
  # Step 5: Filter step != 0
  data_processed <- data_processed %>%
    filter(.data[[step_col]] != 0)
  
  # Step 6: Distance and time difference
  distance_data <- data_processed %>%
    separate(.data[[timing_col]], into = c("drag_time", "drop_time"), sep = ", ", convert = TRUE) %>%
    mutate(across(starts_with("current_"), as.numeric),
           across(starts_with("last_"), as.numeric),
           DD_diff = drop_time - drag_time,
           condition = condition_label)
  
  for (item in items) {
    distance_data[[paste0("distance_", item)]] <- distance_data[[paste0("current_", item)]] - distance_data[[paste0("last_", item)]]
  }
  
  distance_data <- distance_data %>%
    select(drag_time, DD_diff, starts_with("distance_"), all_of(c(order_col, item_f_col, step_col, response_id_col, "condition")))
  
  # Step 7: Keep only latest step per item.f
  distance_data <- distance_data %>%
    group_by(.data[[response_id_col]]) %>%
    arrange(.data[[step_col]]) %>%
    filter(!duplicated(.data[[item_f_col]])) %>%
    ungroup()
  
  # Step 8: Create full grid of items per respondent
  unique_ids <- distance_data %>%
    distinct(.data[[response_id_col]]) %>%
    pull()
  
  distance_grid <- expand.grid(
    ResponseId = unique_ids,
    item.f = item_labels
  )
  
  # Step 9: Merge and compute final distance columns
  full_df <- distance_grid %>%
    left_join(distance_data, by = c("ResponseId", "item.f")) %>%
    arrange(ResponseId) %>%
    mutate(distance = case_when(
      item.f == item_labels[1] ~ distance_49,
      item.f == item_labels[2] ~ distance_50,
      item.f == item_labels[3] ~ distance_64,
      item.f == item_labels[4] ~ distance_65,
      item.f == item_labels[5] ~ distance_67,
      item.f == item_labels[6] ~ distance_68,
      TRUE ~ 0
    ),
    distance = replace_na(distance, 0),
    distance.abs = abs(distance),
    distance.r = distance * -1)
  
  return(full_df)
}


## ORDER ##

get_DROPT_order <- function(
    data,
    dat_long,
    initial_data,
    item_labels,
    item_codes,
    condition_label = "condition",
    step_col = "step",
    response_id_col = "ResponseId",
    item_col = "item.f",
    item_moved_col = "item_moved",
    prefix_initial = "initial.items_",
    condition_value = "condition_name"  # example: "pref1"
) {
  # Step 1: Filter and order
  touch_order <- data %>%
    filter(.data[[step_col]] != 0) %>%
    group_by(.data[[response_id_col]]) %>%
    arrange(.data[[step_col]]) %>%
    filter(!duplicated(.data[[item_moved_col]])) %>%
    mutate(order = row_number()) %>%
    ungroup() %>%
    mutate(!!condition_label := condition_value)
  
  # Step 2: Expand to all items × respondent
  long_df <- expand_grid(
    !!response_id_col := unique(touch_order[[response_id_col]]),
    !!item_col := unique(touch_order[[item_col]])
  )
  
  # Step 3: Add in order and condition
  order_max <- long_df %>%
    left_join(touch_order %>% select(all_of(c(response_id_col, item_col, "order"))),
              by = c(response_id_col, item_col)) %>%
    left_join(touch_order %>%
                select(all_of(c(response_id_col, condition_label))) %>%
                filter(!duplicated(.data[[response_id_col]])),
              by = response_id_col) %>%
    group_by(.data[[response_id_col]]) %>%
    dplyr::summarize(max_order = max(order, na.rm = TRUE), .groups = "drop")
  
  long_df <- long_df %>%
    left_join(touch_order %>% select(all_of(c(response_id_col, item_col, "order"))),
              by = c(response_id_col, item_col)) %>%
    left_join(touch_order %>%
                select(all_of(c(response_id_col, condition_label))) %>%
                filter(!duplicated(.data[[response_id_col]])),
              by = response_id_col) %>%
    left_join(order_max, by = response_id_col) %>%
    mutate(order = if_else(!is.na(order), order, max_order + 1))
  
  # Step 4: Add rank.Amount and rank.Prob
  long_df <- long_df %>%
    mutate(
      rank.Amount = case_when(
        !!sym(item_col) == item_labels[1] ~ 1,
        !!sym(item_col) == item_labels[2] ~ 2,
        !!sym(item_col) == item_labels[3] ~ 3,
        !!sym(item_col) == item_labels[4] ~ 4,
        !!sym(item_col) == item_labels[5] ~ 5,
        !!sym(item_col) == item_labels[6] ~ 6
      ),
      rank.Prob = case_when(
        !!sym(item_col) == item_labels[1] ~ 6,
        !!sym(item_col) == item_labels[2] ~ 5,
        !!sym(item_col) == item_labels[3] ~ 4,
        !!sym(item_col) == item_labels[4] ~ 3,
        !!sym(item_col) == item_labels[5] ~ 2,
        !!sym(item_col) == item_labels[6] ~ 1
      )
    )
  
  # Step 5: Add initial rank
  long_df <- long_df %>%
    left_join(initial_data %>%
                select(all_of(c(response_id_col, paste0(prefix_initial, item_codes)))),
              by = response_id_col) %>%
    mutate(
      initial.rank = case_when(
        !!sym(item_col) == item_labels[1] ~ .data[[paste0(prefix_initial, item_codes[1])]],
        !!sym(item_col) == item_labels[2] ~ .data[[paste0(prefix_initial, item_codes[2])]],
        !!sym(item_col) == item_labels[3] ~ .data[[paste0(prefix_initial, item_codes[3])]],
        !!sym(item_col) == item_labels[4] ~ .data[[paste0(prefix_initial, item_codes[4])]],
        !!sym(item_col) == item_labels[5] ~ .data[[paste0(prefix_initial, item_codes[5])]],
        !!sym(item_col) == item_labels[6] ~ .data[[paste0(prefix_initial, item_codes[6])]]
      ),
      initial.rank.r = 7 - initial.rank,
      initial.rank.r = relevel(factor(initial.rank.r), ref = 6)
    ) %>%
    select(-starts_with(prefix_initial))
  
  # Step 6: Merge with dat_long
  long_df <- long_df %>%
    left_join(dat_long %>%
                select(all_of(c(response_id_col, item_col, "Prob", "Amt"))),
              by = c(response_id_col, item_col))
  
  return(long_df)
}
### AMOUNT ###

## COUNT ##

result_amount <- get_DROPT_count(
  dat = dat,
  rank_col = "RankProcess_Amount",
  rank_all_col = "RankProcess_all_Amount",
  item_numbers = c(49, 50, 64, 65, 67, 68),
  item_labels = c("Pr6_Amt1", "Pr5_Amt2", "Pr4_Amt3", "Pr3_Amt4", "Pr2_Amt5", "Pr1_Amt6"),
  condition = "Amount",
  initial_dat = initial.dat_amount,
  dat_long = dat_long1
)

# Extract outputs as needed
RankProcess_Amount <- result_amount$cleaned_data
bug_respondent_Amount <- result_amount$bug_respondent
na_subj_Amount <- result_amount$na_subj
drag_and_drop_count_Amount <- result_amount$drag_and_drop_count
drag_and_drop_count_Amount_long <- result_amount$drag_and_drop_count_long

# Create summary df and filter for tau > 0.5

Summary_data_Amount<- expand_grid(
  ResponseId = unique(RankProcess_Amount$ResponseId),
  item.f = unique(RankProcess_Amount$item.f))

Summary_data_Amount<-Summary_data_Amount%>%
  mutate(rank.amount=
           case_when(
             item.f=="Pr6_Amt1" ~1,
             item.f=="Pr5_Amt2" ~ 2,
             item.f== "Pr4_Amt3" ~ 3,
             item.f== "Pr3_Amt4" ~ 4,
             item.f == "Pr2_Amt5" ~ 5,
             item.f == "Pr1_Amt6" ~6),
         rank.Prob=case_when(
           item.f=="Pr6_Amt1" ~6,
           item.f=="Pr5_Amt2" ~ 5,
           item.f== "Pr4_Amt3" ~ 4,
           item.f== "Pr3_Amt4" ~ 3,
           item.f == "Pr2_Amt5" ~ 2,
           item.f == "Pr1_Amt6" ~1
         ))%>%
  left_join(dat%>%select(ResponseId,starts_with("rank_amount_")),by="ResponseId")%>%
  mutate(Subj.rank=case_when(
    item.f=="Pr6_Amt1" ~ rank_amount_49,
    item.f=="Pr5_Amt2" ~ rank_amount_50,
    item.f== "Pr4_Amt3" ~ rank_amount_64,
    item.f== "Pr3_Amt4" ~ rank_amount_65,
    item.f == "Pr2_Amt5" ~ rank_amount_67,
    item.f == "Pr1_Amt6" ~ rank_amount_68))%>%
  select(-c(starts_with("rank_amount_")))%>%
  group_by(ResponseId) %>%
  mutate(Tau = -cor(Subj.rank, rank.amount, method = "kendall")) %>%
  ungroup()

# Check for tau less than 0.5 - used later
Amount_tau_less0.5 <- Summary_data_Amount %>%
  filter(Tau <= 0.5) %>%
  distinct(ResponseId) %>%
  pull(ResponseId)

## Count: fix long df ##

# Further recoding of the long df
drag_and_drop_count_Amount_long<-drag_and_drop_count_Amount_long%>%
  filter(ResponseId%notin%Amount_tau_less0.5)%>%
  mutate(rank.Amount=case_when(
    item.f=="Pr6_Amt1" ~1,
    item.f=="Pr5_Amt2" ~ 2,
    item.f== "Pr4_Amt3" ~ 3,
    item.f== "Pr3_Amt4" ~ 4,
    item.f == "Pr2_Amt5" ~ 5,
    item.f == "Pr1_Amt6" ~6
  ),
  rank.Prob=case_when(
    item.f=="Pr6_Amt1" ~6,
    item.f=="Pr5_Amt2" ~ 5,
    item.f== "Pr4_Amt3" ~ 4,
    item.f== "Pr3_Amt4" ~ 3,
    item.f == "Pr2_Amt5" ~ 2,
    item.f == "Pr1_Amt6" ~1))%>%
  left_join(initial.dat_amount%>%select(ResponseId,starts_with("initial.items_")),by="ResponseId")%>%
  mutate(initial.rank=case_when(
    item.f=="Pr6_Amt1" ~ initial.items_49,
    item.f=="Pr5_Amt2" ~ initial.items_50,
    item.f=="Pr4_Amt3" ~ initial.items_64,
    item.f=="Pr3_Amt4" ~ initial.items_65,
    item.f=="Pr2_Amt5" ~ initial.items_67,
    item.f=="Pr1_Amt6" ~ initial.items_68
  ),
  initial.rank.r=relevel(factor(7-initial.rank), ref = 6),
  N_ind=case_when(
    N==0~0,
    TRUE~1)
  )%>%
  select(-c(starts_with("initial.items_")))


## DISTANCE ##

Distance_Amount.cleanup.df <- get_DROPT_distance(
  data = RankProcess_Amount,
  items = c("49", "50", "64", "65", "67", "68"),
  item_labels = c("Pr6_Amt1", "Pr5_Amt2", "Pr4_Amt3", "Pr3_Amt4", "Pr2_Amt5", "Pr1_Amt6"),
  condition_label = "Amount"
)

## ORDER ##

touch_order_analysis.long_Amount <- get_DROPT_order(
  data = RankProcess_Amount,
  dat_long = dat_long1,
  initial_data = initial.dat_amount,
  item_labels = c("Pr6_Amt1", "Pr5_Amt2", "Pr4_Amt3", "Pr3_Amt4", "Pr2_Amt5", "Pr1_Amt6"),
  item_codes = c("49", "50", "64", "65", "67", "68"),
  condition_value = "Amount"
)
### PROBABILITY ###

## COUNT ##

result_prob <- get_DROPT_count(
  dat = dat,
  rank_col = "RankProcess_Prob",
  rank_all_col = "RankProcess_all_Prob",
  item_numbers = c(49, 50, 64, 65, 67, 68),
  item_labels = c("Pr6_Amt1", "Pr5_Amt2", "Pr4_Amt3", "Pr3_Amt4", "Pr2_Amt5", "Pr1_Amt6"),
  condition = "Prob",
  initial_dat = initial.dat_prob,
  dat_long = dat_long1
)

# Extract outputs as needed
RankProcess_Prob <- result_prob$cleaned_data
bug_respondent_Prob <- result_prob$bug_respondent
na_subj_Prob <- result_prob$na_subj
drag_and_drop_count_Prob <- result_prob$drag_and_drop_count
drag_and_drop_count_Prob_long <- result_prob$drag_and_drop_count_long

# Create summary df and filter for tau > 0.5
Summary_data_Prob<- expand_grid(
 ResponseId = unique(RankProcess_Prob$ResponseId),
 item.f = unique(RankProcess_Prob$item.f))

Summary_data_Prob<-Summary_data_Prob%>%
  mutate(rank.Amount=
           case_when(
    item.f=="Pr6_Amt1" ~1,
    item.f=="Pr5_Amt2" ~ 2,
    item.f== "Pr4_Amt3" ~ 3,
    item.f== "Pr3_Amt4" ~ 4,
    item.f == "Pr2_Amt5" ~ 5,
    item.f == "Pr1_Amt6" ~6),
         rank.prob=case_when(
    item.f=="Pr6_Amt1" ~6,
    item.f=="Pr5_Amt2" ~ 5,
    item.f== "Pr4_Amt3" ~ 4,
    item.f== "Pr3_Amt4" ~ 3,
    item.f == "Pr2_Amt5" ~ 2,
    item.f == "Pr1_Amt6" ~1
         ))%>%
  left_join(dat%>%select(ResponseId,starts_with("rank_prob_")),by="ResponseId")%>%
  mutate(Subj.rank=case_when(
    item.f=="Pr6_Amt1" ~ rank_prob_49,
    item.f=="Pr5_Amt2" ~ rank_prob_50,
    item.f== "Pr4_Amt3" ~ rank_prob_64,
    item.f== "Pr3_Amt4" ~ rank_prob_65,
    item.f == "Pr2_Amt5" ~ rank_prob_67,
    item.f == "Pr1_Amt6" ~ rank_prob_68))%>%
  select(-c(starts_with("rank_prob_")))%>%
  group_by(ResponseId) %>%
  mutate(Tau =- cor(Subj.rank, rank.prob, method = "kendall")) %>%
  ungroup()

# Check for tau less than 0.5 - used later
Prob_tau_less0.5 <- Summary_data_Prob %>%
  filter(Tau <= 0.5) %>%
  distinct(ResponseId) %>%
  pull(ResponseId)

## Count: fix long df ##

# Further recoding of the long df
drag_and_drop_count_Prob_long<-drag_and_drop_count_Prob_long%>%
  filter(ResponseId%notin%Prob_tau_less0.5)%>%
  mutate(rank.Prob=case_when(
    item.f=="Pr6_Amt1" ~1,
    item.f=="Pr5_Amt2" ~ 2,
    item.f== "Pr4_Amt3" ~ 3,
    item.f== "Pr3_Amt4" ~ 4,
    item.f == "Pr2_Amt5" ~ 5,
    item.f == "Pr1_Amt6" ~6
  ),
  rank.Prob=case_when(
    item.f=="Pr6_Amt1" ~6,
    item.f=="Pr5_Amt2" ~ 5,
    item.f== "Pr4_Amt3" ~ 4,
    item.f== "Pr3_Amt4" ~ 3,
    item.f == "Pr2_Amt5" ~ 2,
    item.f == "Pr1_Amt6" ~1))%>%
  left_join(initial.dat_prob%>%
  select(ResponseId,starts_with("initial.items_")),by="ResponseId")%>%
  mutate(initial.rank=case_when(
    item.f=="Pr6_Amt1" ~ initial.items_49,
    item.f=="Pr5_Amt2" ~ initial.items_50,
    item.f=="Pr4_Amt3" ~ initial.items_64,
    item.f=="Pr3_Amt4" ~ initial.items_65,
    item.f=="Pr2_Amt5" ~ initial.items_67,
    item.f=="Pr1_Amt6" ~ initial.items_68
  ),
  initial.rank.r=relevel(factor(7-initial.rank), ref = 6),
  N_ind=case_when(
    N==0~0,
    TRUE~1)
  )%>%
  select(-c(starts_with("initial.items_")))


## DISTANCE ##

Distance_Prob.cleanup.df <- get_DROPT_distance(
  data = RankProcess_Prob,
  items = c("49", "50", "64", "65", "67", "68"),
  item_labels = c("Pr6_Amt1", "Pr5_Amt2", "Pr4_Amt3", "Pr3_Amt4", "Pr2_Amt5", "Pr1_Amt6"),
  condition_label = "Prob"
)

## ORDER ##

touch_order_analysis.long_Prob <- get_DROPT_order(
  data = RankProcess_Prob,
  dat_long = dat_long1,
  initial_data = initial.dat_prob,
  item_labels = c("Pr6_Amt1", "Pr5_Amt2", "Pr4_Amt3", "Pr3_Amt4", "Pr2_Amt5", "Pr1_Amt6"),
  item_codes = c("49", "50", "64", "65", "67", "68"),
  condition_value = "Prob"
)
### PREFERENCE 1 ###

## COUNT ##

result_prefer1 <- get_DROPT_count(
  dat = dat,
  rank_col = "RankProcess_Prefer1",
  rank_all_col = "RankProcess_all_Prefer1",
  item_numbers = c(49, 50, 64, 65, 67, 68),
  item_labels = c("Pr6_Amt1", "Pr5_Amt2", "Pr4_Amt3", "Pr3_Amt4", "Pr2_Amt5", "Pr1_Amt6"),
  condition = "Prefer1",
  initial_dat = initial.dat_pref1,
  dat_long = dat_long1
)

# Extract outputs as needed
RankProcess_Prefer1 <- result_prefer1$cleaned_data
bug_respondent_Prefer1 <- result_prefer1$bug_respondent
na_subj_Prefer1 <- result_prefer1$na_subj
drag_and_drop_count_Prefer1 <- result_prefer1$drag_and_drop_count
drag_and_drop_count_Prefer1_long <- result_prefer1$drag_and_drop_count_long

## DISTANCE ##

Distance_pref1.cleanup.df <- get_DROPT_distance(
  data = RankProcess_Prefer1,
  items = c("49", "50", "64", "65", "67", "68"),
  item_labels = c("Pr6_Amt1", "Pr5_Amt2", "Pr4_Amt3", "Pr3_Amt4", "Pr2_Amt5", "Pr1_Amt6"),
  condition_label = "Pref1"
)

## ORDER ##

touch_order_analysis.long_pref1 <- get_DROPT_order(
  data = RankProcess_Prefer1,
  dat_long = dat_long1,
  initial_data = initial.dat_pref1,
  item_labels = c("Pr6_Amt1", "Pr5_Amt2", "Pr4_Amt3", "Pr3_Amt4", "Pr2_Amt5", "Pr1_Amt6"),
  item_codes = c("49", "50", "64", "65", "67", "68"),
  condition_value = "Pref1"
)
### PREFERENCE 2 ###

result_prefer2 <- get_DROPT_count(
  dat = dat,
  rank_col = "RankProcess_Prefer2",
  rank_all_col = "RankProcess_all_Prefer2",
  item_numbers = c(49, 50, 64, 65, 67, 68),
  item_labels = c("Pr6_Amt1", "Pr5_Amt2", "Pr4_Amt3", "Pr3_Amt4", "Pr2_Amt5", "Pr1_Amt6"),
  condition = "Prefer2",
  initial_dat = initial.dat_pref2,
  dat_long = dat_long2
)

# Extract outputs as needed
RankProcess_Prefer2 <- result_prefer2$cleaned_data
bug_respondent_Prefer2 <- result_prefer2$bug_respondent
na_subj_Prefer2 <- result_prefer2$na_subj
drag_and_drop_count_Prefer2 <- result_prefer2$drag_and_drop_count
drag_and_drop_count_Prefer2_long <- result_prefer2$drag_and_drop_count_long

## DISTANCE ##

Distance_pref2.cleanup.df <- get_DROPT_distance(
  data = RankProcess_Prefer2,
  items = c("49", "50", "64", "65", "67", "68"),
  item_labels = c("Pr6_Amt1", "Pr5_Amt2", "Pr4_Amt3", "Pr3_Amt4", "Pr2_Amt5", "Pr1_Amt6"),
  condition_label = "Pref2"
)

## ORDER ##

touch_order_analysis.long_pref2 <- get_DROPT_order(
  data = RankProcess_Prefer2,
  dat_long = dat_long2,
  initial_data = initial.dat_pref2,
  item_labels = c("Pr6_Amt1", "Pr5_Amt2", "Pr4_Amt3", "Pr3_Amt4", "Pr2_Amt5", "Pr1_Amt6"),
  item_codes = c("49", "50", "64", "65", "67", "68"),
  condition_value = "Pref2"
)
### Combine Preference Rankings into a Master df, which is in the long format ###

# load function
make.rm <-
  function (constant, repeated, data, contrasts) 
  {
    if (!missing(constant) && is.vector(constant)) {
      if (!missing(repeated) && is.vector(repeated)) {
        if (!missing(data)) {
          dd <- dim(data)
          replen <- length(repeated)
          if (missing(contrasts)) 
            contrasts <- ordered(sapply(paste("T", 1:length(repeated), 
                                              sep = ""), rep, dd[1]))
          else contrasts <- matrix(sapply(contrasts, rep, 
                                          dd[1]), ncol = dim(contrasts)[2])
          if (length(constant) == 1) 
            cons.col <- rep(data[, constant], replen)
          else cons.col <- lapply(data[, constant], rep, 
                                  replen)
          new.df <- data.frame(cons.col, repdat = as.vector(data.matrix(data[, 
                                                                             repeated])), contrasts)
          return(new.df)
        }
      }
    }
    cat("Usage: make.rm(constant, repeated, data [, contrasts])\n")
    cat("\tWhere 'constant' is a vector of indices of non-repeated data and\n")
    cat("\t'repeated' is a vector of indices of the repeated measures data.\n")
  }


# Preselect columns
set1amt_cols <- dat %>%
  select(starts_with("Set1_L") & ends_with("_Amt")) %>%
  colnames()

set1prob_cols <- dat %>%
  select(starts_with("Set1_L") & ends_with("_Prob")) %>%
  colnames()

set1rank_cols <- dat %>%
  select(starts_with("rank_pref1")) %>%
  colnames()

set2amt_cols <- dat %>%
  select(starts_with("Set2_L") & ends_with("_Amt")) %>%
  colnames()

set2prob_cols <- dat %>%
  select(starts_with("Set2_L") & ends_with("_Prob")) %>%
  colnames()

set2rank_cols <- dat %>%
  select(starts_with("rank_pref2")) %>%
  colnames()

amtrank_cols <- dat %>%
  select(starts_with("rank_prob_")) %>%
  colnames()

probrank_cols <- dat %>%
  select(starts_with("rank_amount_")) %>%
  colnames()

# Now call the make.rm function and create the first df
master_df <- make.rm(
  constant = c("ResponseId", "age", "gender_binary", "education_num", "income"),
  repeated = set1amt_cols,
  data = dat
)

# rename repdat column
master_df <- master_df %>%
  rename(set1_amt = "repdat")

# since the order of the other columns is identical, we can just create a new set for all the other columns
set1prob.rm <- make.rm(
  constant = c("ResponseId"),
  repeated = set1prob_cols,
  data = dat
)

set1rank.rm <- make.rm(
  constant = c("ResponseId"),
  repeated = set1rank_cols,
  data = dat
)

set2amt.rm <- make.rm(
  constant = c("ResponseId"),
  repeated = set2amt_cols,
  data = dat
)

set2prob.rm <- make.rm(
  constant = c("ResponseId"),
  repeated = set2prob_cols,
  data = dat
)

set2rank.rm <- make.rm(
  constant = c("ResponseId"),
  repeated = set2rank_cols,
  data = dat
)

amtranks.rm <- make.rm(
  constant = c("ResponseId"),
  repeated = amtrank_cols,
  data = dat
)

probranks.rm <- make.rm(
  constant = c("ResponseId"),
  repeated = probrank_cols,
  data = dat
)

# Add the columns from the other repeated measures data frames

master_df$set1_prob <- set1prob.rm$repdat
master_df$set1_rank <- set1rank.rm$repdat
master_df$set2_amt <- set2amt.rm$repdat
master_df$set2_prob <- set2prob.rm$repdat
master_df$set2_rank <- set2rank.rm$repdat
master_df$amt_subj_rank <- amtranks.rm$repdat
master_df$prob_subj_rank <- probranks.rm$repdat

# Add labels for the bets
master_df$bet_label[master_df$contrasts == "T1"] <- "Pr6_Amt1"
master_df$bet_label[master_df$contrasts == "T2"] <- "Pr5_Amt2"
master_df$bet_label[master_df$contrasts == "T3"] <- "Pr4_Amt3"
master_df$bet_label[master_df$contrasts == "T4"] <- "Pr3_Amt4"
master_df$bet_label[master_df$contrasts == "T5"] <- "Pr2_Amt5"
master_df$bet_label[master_df$contrasts == "T6"] <- "Pr1_Amt6"

### Finally, add the 3F measures from the data frames created before
# Set 1: Touch count df to add touch count (numeric and binary)
master_df <- master_df %>%
  left_join(
    drag_and_drop_count_Prefer1_long %>%
      select(ResponseId, item.f, N, N_ind) %>%
      rename(
        bet_label = item.f,
        set1_touch_count = N,
        set1_touch_count_binary = N_ind
      ),
    by = c("ResponseId", "bet_label")
  )

# Set 1: Touch order (order and max order)
master_df <- master_df %>%
  left_join(
    touch_order_analysis.long_pref1 %>%
      select(ResponseId, item.f, order, max_order) %>%
      rename(
        bet_label = item.f,
        set1_order = order,
        set1_max_order = max_order
      ),
    by = c("ResponseId", "bet_label")
  )


# Set 1: Drag distance
master_df <- master_df %>%
  left_join(
    Distance_pref1.cleanup.df %>%
      select(ResponseId, item.f, distance.r) %>%
      rename(
        bet_label = item.f,
        set1_drag_distance.r = distance.r
      ),
    by = c("ResponseId", "bet_label")
  )

# Set 1: initial order
# first, create a tmp df.
dat_tmp1 <- dat %>%
  select(ResponseId, RankProcess_Prefer1)

dat_extracted1 <- dat_tmp1 %>%
  mutate(
    item_string = str_extract(RankProcess_Prefer1, "\\{[^}]*\\}"),  # get first curly bracket contents
    item_string = str_remove_all(item_string, "\\{|\\}"),           # remove braces
    item_string = str_split(item_string, ";") %>% 
                  sapply(function(x) x[2]),                          # get text after semicolon
    item_numbers = str_split(item_string, ",")                      # split into vector of numbers
  )

dat_tmp1_long <- dat_extracted1 %>%
  select(ResponseId, item_numbers) %>%
  unnest(item_numbers) %>%
  group_by(ResponseId) %>%
  mutate(
    item_number = as.integer(str_trim(item_numbers)),
    set1_initial_order = row_number(),
    bet_label = case_when(
      item_number == 49 ~ "Pr6_Amt1",
      item_number == 50 ~ "Pr5_Amt2",
      item_number == 64 ~ "Pr4_Amt3",
      item_number == 65 ~ "Pr3_Amt4",
      item_number == 67 ~ "Pr2_Amt5",
      item_number == 68 ~ "Pr1_Amt6",
      TRUE ~ NA_character_
    )
  ) %>%
  select(ResponseId, item_number, set1_initial_order, bet_label) %>%
  ungroup()

# merge with master_df
master_df <- master_df %>%
  left_join(
    dat_tmp1_long %>%
      select(ResponseId, bet_label, set1_initial_order),
    by = c("ResponseId", "bet_label")
  )

### Set 2 3F
# Set 2: Touch count df to add touch count (numeric and binary)
master_df <- master_df %>%
  left_join(
    drag_and_drop_count_Prefer2_long %>%
      select(ResponseId, item.f, N, N_ind) %>%
      rename(
        bet_label = item.f,
        set2_touch_count = N,
        set2_touch_count_binary = N_ind
      ),
    by = c("ResponseId", "bet_label")
  )

# Set 2: Touch order (order and max order)
master_df <- master_df %>%
  left_join(
    touch_order_analysis.long_pref2 %>%
      select(ResponseId, item.f, order, max_order) %>%
      rename(
        bet_label = item.f,
        set2_order = order,
        set2_max_order = max_order
      ),
    by = c("ResponseId", "bet_label")
  )


# Set 2: Drag distance
master_df <- master_df %>%
  left_join(
    Distance_pref2.cleanup.df %>%
      select(ResponseId, item.f, distance.r) %>%
      rename(
        bet_label = item.f,
        set2_drag_distance.r = distance.r
      ),
    by = c("ResponseId", "bet_label")
  )

# Set 2: initial order
# first, create a tmp df.
dat_tmp2 <- dat %>%
  select(ResponseId, RankProcess_Prefer2)

dat_extracted2 <- dat_tmp2 %>%
  mutate(
    item_string = str_extract(RankProcess_Prefer2, "\\{[^}]*\\}"),  # get first curly bracket contents
    item_string = str_remove_all(item_string, "\\{|\\}"),           # remove braces
    item_string = str_split(item_string, ";") %>% 
                  sapply(function(x) x[2]),                          # get text after semicolon
    item_numbers = str_split(item_string, ",")                      # split into vector of numbers
  )

dat_tmp2_long <- dat_extracted2 %>%
  select(ResponseId, item_numbers) %>%
  unnest(item_numbers) %>%
  group_by(ResponseId) %>%
  mutate(
    item_number = as.integer(str_trim(item_numbers)),
    set2_initial_order = row_number(),
    bet_label = case_when(
      item_number == 49 ~ "Pr6_Amt1",
      item_number == 50 ~ "Pr5_Amt2",
      item_number == 64 ~ "Pr4_Amt3",
      item_number == 65 ~ "Pr3_Amt4",
      item_number == 67 ~ "Pr2_Amt5",
      item_number == 68 ~ "Pr1_Amt6",
      TRUE ~ NA_character_
    )
  ) %>%
  select(ResponseId, item_number, set2_initial_order, bet_label) %>%
  ungroup()

# merge with master_df
master_df <- master_df %>%
  left_join(
    dat_tmp2_long %>%
      select(ResponseId, bet_label, set2_initial_order),
    by = c("ResponseId", "bet_label")
  )

# add centered values for amount and probability
master_df$set1_amt.c <- scale(master_df$set1_amt)
master_df$set2_amt.c <- scale(master_df$set2_amt)
master_df$set1_prob.c <- scale(master_df$set1_prob)
master_df$set2_prob.c <- scale(master_df$set2_prob)
### PREP: Binary Data ###

binary_cols <- dat %>%
  select(starts_with("Binary")) %>%
  colnames()

# Now call the make.rm function and create the first df
binary.rm <- make.rm(
  constant = c("ResponseId", "age", "gender_binary", "education_num", "income"),
  repeated = binary_cols,
  data = dat
)

# rename repdat column
binary.rm <- binary.rm %>%
  rename(binary_choice = "repdat")

binary.rm$bet_number[binary.rm$contrasts == "T1"] <- "Bet1"
binary.rm$bet_number[binary.rm$contrasts == "T2"] <- "Bet2"
binary.rm$bet_number[binary.rm$contrasts == "T3"] <- "Bet3"
binary.rm$bet_number[binary.rm$contrasts == "T4"] <- "Bet4"
binary.rm$bet_number[binary.rm$contrasts == "T5"] <- "Bet5"

binary.rm$Chose_P_Bet <- ifelse(binary.rm$binary_choice == 2, 1, 0)

Data Recording Check

Sometimes, recording errors occur using the DROP-T Method. The code below shows how many observations are “thrown out” and why.

  • Bug Respondents: Bug respondents have a recording error in the RankProcess_all column. For instance, they have a comma at the end of a string, which shouldn’t happen
  • NA Respondents: After coding, these respondents have errors in the RankProcess column, such as two commas after one another.
# Amount
n_bug_amount <- length(bug_respondent_Amount)
n_na_amount  <- length(na_subj_Amount)

# Probability
n_bug_prob <- length(bug_respondent_Prob)
n_na_prob  <- length(na_subj_Prob)

# Preference 1
n_bug_pref1 <- length(bug_respondent_Prefer1)
n_na_pref1  <- length(na_subj_Prefer1)

# Preference 2
n_bug_pref2 <- length(bug_respondent_Prefer2)
n_na_pref2  <- length(na_subj_Prefer2)

# Table
bug_table <- data.frame(
  Task = c("Amount", "Probability", "Preference 1", "Preference 2"),
  Bug = c(n_bug_amount, n_bug_prob, n_bug_pref1, n_bug_pref2),
  `NA` = c(n_na_amount,  n_na_prob,  n_na_pref1,  n_na_pref2)
)

print(bug_table)
##           Task Bug NA.
## 1       Amount   1   5
## 2  Probability   2   7
## 3 Preference 1   2   7
## 4 Preference 2   4   5

Descriptive Statistics of Respondent Effort in the Task

Drag-and-drop count

Definition: The number of drag-and-drop actions (recorded with the RankCount) - only counts when the a drag-and-drop action results in an order change

### plot ###

plot_quiz <- function(data, column, title, item_n) {
  n_obs <- nrow(data)  # Total number of rows
  mean_val <- mean(data[[column]], na.rm = TRUE)  # Calculate mean
  median_val <- median(data[[column]], na.rm = TRUE)  # Calculate median
  subtitle_text <- paste0("Item.N = ", item_n, "; Subj.N = ", n_obs,
                          ", Mean = ", round(mean_val, 2),
                          ", Median = ", round(median_val, 2))
  
  ggplot(data, aes(x = "", y = !!sym(column))) + 
    geom_violin(fill = "lightblue", alpha = 0.5) + 
    geom_boxplot(width = 0.1, color = "black", alpha = 0.8) + 
    geom_jitter(width = 0.1, size = 1.5, color = "black", alpha = 0.6)+ 
    theme_minimal() + 
    labs(
      title = title,
      subtitle = subtitle_text,
      x = "",
      y = "Drag count"
    ) + 
    theme(plot.title = element_text(hjust = 0.5, face="bold"), plot.subtitle = element_text(hjust = 0.5)) +
    scale_y_continuous(breaks = seq(0, 10, by = 1), limits = c(0, 10))
}

# Create plots for each quiz

Quiz_WarmUp <- plot_quiz(dat, "RankCount_WarmUp", "Warm Up", item_n = 5)
Quiz_Prob  <- plot_quiz(dat, "RankCount_Prob", "Probability", item_n = 6)
Quiz_Amt <- plot_quiz(dat, "RankCount_Amount", "Amount", item_n = 6)
Quiz_pref1  <- plot_quiz(dat, "RankCount_Prefer1", "Preference (1st)", item_n = 6)
Quiz_Pref2  <- plot_quiz(dat, "RankCount_Prefer2", "Preference (2nd)", item_n = 6)

# Combine all plots into one graph
combined_plot <- (Quiz_WarmUp | Quiz_Prob | Quiz_Amt | Quiz_pref1| Quiz_Pref2)
combined_plot

Correlation between initial and final rank

  • RankProcess data format: {0, initial rank order}{timestamp, new rank order}, etc.
  • low correlations between initial and final rank for all 6 items across quiz conditions.
summary_stats <- all_results %>%
  group_by(task) %>%
  summarise(
    Mean = round(mean(correlation, na.rm = TRUE), 2),
    Median = round(median(correlation, na.rm = TRUE), 2),
    N = n()
  )

# summary_stats

all_results$task <- as.factor(all_results$task)
all_results$task <- factor(all_results$task, levels = c("amount", "prob", "pref1", "pref2"))



combined_plot <- ggplot(all_results, aes(x = task, y = correlation)) +
  geom_violin(fill = "lightblue", alpha = 0.5) +
  geom_jitter(aes(color = sig), width = 0.1, size = 1.5, alpha = 0.6) +
  scale_color_manual(
    values = c("TRUE" = "red", "FALSE" = "black"),
    labels = c("ns.", "p<.05"),
    name = "p value"
  ) +
  theme_minimal() +
  labs(
    title = " Correlations for Initial and Final Ranks Across Tasks",
        subtitle = paste(
      " Color Task: Mean =", summary_stats$Mean[1], ", Median =", summary_stats$Median[1], ", N =", summary_stats$N[1], "\n",
      " Prob Task: Mean =", summary_stats$Mean[2], ", Median =", summary_stats$Median[2], ", N =", summary_stats$N[2], "\n"
    ),
    x = "Task",
    y = "Correlation"
  ) +
  theme(
    plot.title = element_text(hjust = 0.5),
    plot.subtitle = element_text(hjust = 0.5)
  )

# Print the plot
print(combined_plot)

Rank Quiz Page Duration (seconds)

# each extract a dataset for each task and then do the psych mean thing

summarize_task <- function(data, column_name, task_name) {
  data %>%
    summarise(
      Task = task_name,
      Mean_t = mean(.data[[column_name]], na.rm = TRUE),
      Median_t = median(.data[[column_name]], na.rm = TRUE),
      SD = sd(.data[[column_name]], na.rm = TRUE),
      Min = min(.data[[column_name]], na.rm = TRUE),
      Max = max(.data[[column_name]], na.rm = TRUE),
      N = sum(!is.na(.data[[column_name]]))
    )
}

# Apply the function to each dataset


summary_warmup <- summarize_task(dat, "t_warmup_Page.Submit", "WarmUp")
summary_prob <- summarize_task(dat, "t_rank_prob_Page.Submit", "Probability")
summary_amt <- summarize_task(dat, "t_rank_amount_Page.Submit", "Amount")
summary_pref1 <- summarize_task(dat, "t_rank_pref1_Page.Submit", "Preference 1ST")
summary_pref2 <- summarize_task(dat, "t_rank_pref2_Page.Submit", "Preference 2ND")
# summary_Binary <- summarize_task(dat, "t_Prob_Page.Submit", "Prob")



# Combine all summaries into one table
all_summaries <- bind_rows(summary_warmup,summary_prob, summary_amt, summary_pref1,summary_pref2 )


# t.test(dat$t_Prob_Page.Submit,dat$rank_color_t_Page.Submit)
all_summaries

Preliminary Checks

Jitter Randomization Check

1ST Preference Ranking Task

# --- True lottery values and jitter ranges ---
prob <- c(5, 9, 17, 29, 54, 94)
amount <- c(56.7, 31.5, 17.5, 9.7, 5.4, 2.9)

bounded_jitter <- list(
  prob = c(2, 2, 6, 6, 18, 5),
  amt  = c(15.95, 9.25, 4.75, 3.05, 1.25, 1.25)
)

# --- Mapping from Set1_L1–L6 to correct lottery ---
lottery_mapping <- tibble(
  input_lottery = 1:6,
  lottery = factor(7 - input_lottery)
)

# --- Reshape wide-format jitter data to long format ---
jitter_check <- bind_rows(lapply(1:6, function(i) {
  tibble(
    ResponseId = dat$ResponseId,
    input_lottery = i,
    prob = dat[[paste0("Set1_L", i, "_Prob")]],
    amount = dat[[paste0("Set1_L", i, "_Amt")]]
  )
})) %>%
  left_join(lottery_mapping, by = "input_lottery") %>%
  select(ResponseId, lottery, prob, amount) %>%
  filter(!is.na(prob))

# --- Jitter bounds and true values per lottery ---
jitter_bounds <- tibble(
  lottery = factor(1:6),
  prob_min = prob - bounded_jitter$prob,
  prob_max = prob + bounded_jitter$prob,
  amt_min  = amount - bounded_jitter$amt,
  amt_max  = amount + bounded_jitter$amt,
  true_prob = prob,
  true_amount = amount
)

# --- Plot: Probability ---
ggplot(jitter_check, aes(x = lottery, y = prob)) +
  geom_jitter(width = 0.1, alpha = 0.4, size = 1, color = "black") +
  geom_point(data = jitter_bounds, aes(y = true_prob), color = "red", size = 3) +
  geom_linerange(data = jitter_bounds, aes(ymin = prob_min, ymax = prob_max), color = "red", size = 0.8) +
  theme_minimal(base_size = 13) +
  labs(
    title = "Probability of Win per Lottery",
    subtitle = "Black = actual shown values; Red line = intended jitter range",
    x = "Lottery ID", y = "Probability (%)"
  )

# --- Plot: Amount ---
ggplot(jitter_check, aes(x = lottery, y = amount)) +
  geom_jitter(width = 0.1, alpha = 0.4, size = 1, color = "black") +
  geom_point(data = jitter_bounds, aes(y = true_amount), color = "red", size = 3) +
  geom_linerange(data = jitter_bounds, aes(ymin = amt_min, ymax = amt_max), color = "red", size = 0.8) +
  theme_minimal(base_size = 13) +
  labs(
    title = "Amount to Win per Lottery",
    subtitle = "Black = actual shown values; Red line = intended jitter range",
    x = "Lottery ID", y = "Amount ($)"
  )

# --- Flag violations ---
jitter_check_flagged <- jitter_check %>%
  left_join(jitter_bounds, by = "lottery") %>%
  mutate(
    prob_out_of_range = prob < prob_min | prob > prob_max,
    amount_out_of_range = amount < amt_min | amount > amt_max
  ) %>%
  select(
    ResponseId, lottery,
    prob, prob_min, prob_max, prob_out_of_range,
    amount, amt_min, amt_max, amount_out_of_range,
    true_prob, true_amount
  )

jitter_check_flagged%>%
  filter(prob_out_of_range=="TRUE"|amount_out_of_range=="TRUE") #empty table - good!
# --- Compute EV and summary ---
jitter_check <- jitter_check %>%
  mutate(ev = (prob / 100) * amount)

ev_summary <- jitter_check %>%
  group_by(lottery) %>%
  summarise(ev_mean = mean(ev, na.rm = TRUE))

# --- Plot: Expected Value with reference line ---
ggplot(jitter_check, aes(x = lottery, y = ev)) +
  geom_jitter(width = 0.1, alpha = 0.4, size = 1, color = "black") +
  geom_point(data = ev_summary, aes(x = lottery, y = ev_mean), color = "red", size = 3) +
  geom_hline(yintercept = 2.835, linetype = "dashed", color = "blue", linewidth = 1) +
  theme_minimal(base_size = 13) +
  labs(
    title = "Expected Value per Lottery",
    subtitle = "Black = individual EVs; Red = mean EV; Blue dashed = intended EV = 2.835",
    x = "Lottery ID", y = "Expected Value ($)"
  )

2ND Preference Ranking Task

# --- Step 0: True values ---

prob <- c(3, 6, 15, 31, 63, 84)
amount <- c(93.4, 47.7, 18.7, 9.1, 4.4, 3.4)

# BOUNDED jitter ranges used for each lottery (for visual comparison)
bounded_jitter <- list(
  prob = c(2, 1, 8, 8, 11, 10),
  amt  = c(22, 23.7, 5.3, 4.3, 0.4, 0.4)
)

# --- Step 1: Reverse mapping: Set1_L1 = lottery 6, ..., Set1_L6 = lottery 1 ---
lottery_mapping <- tibble(
  input_lottery = 1:6,
  lottery = factor(7 - input_lottery)  # Reverse order
)

# --- Step 2: Reshape wide-format data into long-format jitter_check ---
jitter_check <- bind_rows(lapply(1:6, function(i) {
  tibble(
    ResponseId = dat$ResponseId,
    input_lottery = i,
    prob = dat[[paste0("Set2_L", i, "_Prob")]],
    amount = dat[[paste0("Set2_L", i, "_Amt")]]
  )
})) %>%
  left_join(lottery_mapping, by = "input_lottery") %>%
  select(ResponseId, lottery, prob, amount) %>%
  filter(!is.na(prob))

# --- Step 3: Jitter bounds and true value per lottery ---
jitter_bounds <- tibble(
  lottery = factor(1:6),
  prob = c(3, 6, 15, 31, 63, 84),
  prob_min = prob - bounded_jitter$prob,
  prob_max = prob + bounded_jitter$prob,
  amount = c(93.4, 47.7, 18.7, 9.1, 4.4, 3.4),
  amt_min = amount - bounded_jitter$amt,
  amt_max = amount + bounded_jitter$amt
)

# -------------------------------
# Plot 1: Probability (dots + jitter range + true center)
# -------------------------------
ggplot(jitter_check, aes(x = lottery, y = prob)) +
  geom_jitter(width = 0.1, alpha = 0.4, size = 1, color = "black") +  # actual shown values
  geom_point(data = jitter_bounds, aes(y = prob), color = "red", size = 3) +  # true center
  geom_linerange(data = jitter_bounds, aes(ymin = prob_min, ymax = prob_max), color = "red", size = 0.8) +  # expected jitter range
  theme_minimal(base_size = 13) +
  labs(
    title = "Probability of Win per Lottery",
    subtitle = "Black = actual shown values;  Red line = intended jitter range",
    x = "Lottery ID", y = "Probability (%)"
  )

# -------------------------------
# Plot 2: Amount (dots + jitter range + true center)
# -------------------------------
ggplot(jitter_check, aes(x = lottery, y = amount)) +
  geom_jitter(width = 0.1, alpha = 0.4, size = 1, color = "black") +
  geom_point(data = jitter_bounds, aes(y = amount), color = "red", size = 3) +
  geom_linerange(data = jitter_bounds, aes(ymin = amt_min, ymax = amt_max), color = "red", size = 0.8) +
  theme_minimal(base_size = 13) +
  labs(
    title = "Amount to Win per Lottery",
    subtitle = "Black = actual shown values; RRed line = intended jitter range",
    x = "Lottery ID", y = "Amount ($)"
  )

jitter_bounds_renamed <- jitter_bounds %>%
  rename(
    true_prob = prob,
    true_amount = amount
  )

jitter_check_flagged<-jitter_check %>%
  left_join(jitter_bounds_renamed, by = "lottery") %>%
  mutate(
    prob_out_of_range = prob < prob_min | prob > prob_max,
    amount_out_of_range = amount < amt_min | amount > amt_max
  ) %>%
  select(
    ResponseId, lottery,
    prob, prob_min, prob_max, prob_out_of_range,
    amount, amt_min, amt_max, amount_out_of_range,
    true_prob, true_amount
  )
jitter_check_flagged%>%
  filter(prob_out_of_range=="TRUE"|amount_out_of_range=="TRUE")
jitter_check <- jitter_check %>%
  mutate(ev = (prob / 100) * amount)
ev_summary <- jitter_check %>%
  group_by(lottery) %>%
  summarise(ev_mean = mean(ev, na.rm = TRUE))
ggplot(jitter_check, aes(x = lottery, y = ev)) +
  geom_jitter(width = 0.1, alpha = 0.4, size = 1, color = "black") +
  geom_point(data = ev_summary, aes(x = lottery, y = ev_mean), color = "red", size = 3) +
  geom_hline(yintercept = 2.835, linetype = "dashed", color = "blue", linewidth = 1) +
  theme_minimal(base_size = 13) +
  labs(
    title = "Expected Value per Lottery",
    subtitle = "Black = individual EVs; Red = mean EV; Blue dashed = intended EV = 2.835",
    x = "Lottery ID", y = "Expected Value ($)"
  )

Warm Up Task

  • The majority of participants passed the warm-up test on their first attempt.
  • Reminder: Each participant is given up to 3 attempts to pass.
ggplot(dat, aes(x = factor(WarmUpAttempt_N+1))) +
  geom_bar(fill = "steelblue", color = "black", alpha = 0.8) +
  scale_x_discrete(limits = as.character(1:3)) +
  labs(
    title = "Distribution of Warm-Up Attempts",
    x = "Number of Warm-Up Attempts",
    y = "Count"
  ) +
  theme_minimal() +
  theme(
    plot.title = element_text(hjust = 0.5),
    axis.text = element_text(size = 12),
    axis.title = element_text(size = 13)
  )

Attention and Dosage Question

  • ATTN: I work 28 hours a day (TRUE/FALSE/UNSURE)
  • DOSE: After participants completed ranking by probability, amount, and how much they like each lottery, we asked them to identify/recall the tasks they had previously ranked
dat_long2 <- dat %>%
  pivot_longer(cols = c(dose.coded, attn1.coded), names_to = "Question", values_to = "Response")

ggplot(dat_long2, aes(x = Response, fill = Question)) +
  geom_bar(position = "dodge") +  # Bar plot using counts
  geom_text(stat = "count", aes(label = after_stat(count)), vjust = -0.5, size = 5) +  # Add count labels
  facet_wrap(~Question, scales = "free_x") +  # Separate plots for dose.coded and attn1.coded
  labs(x = "Response", y = "Count", title = "Count of Correct & Incorrect Responses") +
  theme_bw() +
  ylim(0, 200)  # Set y-axis limit

dose.wrong.subj<-dat%>%filter(dose.coded=="Incorrect")%>%pull(ResponseId)

# Display the actual things people select

Belief in Bonus

After the second ranking by “how much you like each lottery” question, we asked participants “Do you believe that you have the chance to win a bonus in the task you just completed?”

ggplot(dat%>%filter(!is.na(bonus_belief)), aes(x = bonus_belief)) +
  geom_bar() +  # Use counts instead of proportions
  geom_text(stat = "count", aes(label = after_stat(count)), vjust = -0.5, size = 5) +  # Add count labels
  labs(x = "Belief in Getting Bonus", y = "Count", title = "Did you believe that your responses in the ranking task you\njust completed will affect your bonus payment?") +
  theme_bw() +
  theme(legend.position = "none") + # Tilt x-axis labels
ylim(c(0,200))

Device Used During the Ranking Quiz

Possible Answers: + Mouse (wired or wireless) + Trackpad (touchpad) + Touchscreen (finger or stylus) + Others

dat$device [dat$device== 1] = 'Mouse (Wired or Wireless)'
dat$device [dat$device== 2] = 'Trackpad (touchpad)'
dat$device [dat$device == 4] = 'Touchscreen (finger or stylus)'
dat$device [dat$device == 3] = 'Other'

dat$device <- factor(dat$device, levels = names(sort(table(dat$device), decreasing = TRUE)))

ggplot(dat%>%filter(!is.na(device)), aes(x = device)) +
  geom_bar() +  # Use counts instead of proportions
  geom_text(stat = "count", aes(label = after_stat(count)), vjust = -0.5, size = 5) +  # Add count labels
  labs(x = "Device Type", y = "Count", title = "Distribution of Ranking Approaches") +
  theme_bw() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1), legend.position = "none") + # Tilt x-axis labels
ylim(c(0,150))

Technical Issues

“Did you run into any technical issue during the survey?”

tech_issues <- sum(dat$technical == 1)

In total, 2 people say they had technical issues.

They say:

  • I was about completing the study at first when the page refreshed and I had to start again.
  • I had a pop up clicked on it and went back again to my work

Study Purpose

“In a few sentences, please explain what you think the purpose of this study is. If you are not sure, please give your best guess.”

# install.packages("reactable")
library(reactable)
# Filter out missing or blank responses
purpose_table <- dat %>%
  filter(!is.na(purpose_open), purpose_open != "") %>%
  select(purpose_open)

# Create interactive table
reactable(purpose_table,
          searchable = TRUE,
          pagination = TRUE,
          defaultPageSize = 10,
          highlight = TRUE,
          bordered = TRUE,
          striped = TRUE,
          columns = list(
          purpose_open = colDef(name = "Open-Ended Responses: Purpose")
          ))

Accuracy in Probability and Payoff Ranking Tasks

Amount Task: 87% (169/194) Tau = 1
Probability condition: 90% (174/194) Tau = 1

# Check how many people per group have a tau of 1
# Summary_data %>%
#   filter(Tau == 1) %>%
#   group_by(Group) %>%
#   summarise(Count = n())

# Create a combined summary data frame for the both tasks. 
Summary_data <- data.frame(
  Tau = c(Summary_data_Prob$Tau, Summary_data_Amount$Tau),
  Group = rep(c("Prob", "Amount"), c(length(Summary_data_Prob$Tau), length(Summary_data_Amount$Tau))),
  ResponseId = c(Summary_data_Prob$ResponseId, Summary_data_Amount$ResponseId)  # Add ResponseId
) %>%
  filter(!duplicated(paste(ResponseId,Group)))

# now, compute mean values for the combined df
mean_values <- Summary_data %>%
  group_by(Group) %>%
  dplyr::summarize(mean_Tau = mean(Tau, na.rm = TRUE))

# Create violin plots for the combined df
ggplot(Summary_data, aes(x = Group, y = Tau, fill = Group)) +
  geom_violin(trim = FALSE, alpha = 0.5) +  # Violin plot with transparency
  geom_jitter(width = 0.1, alpha = 0.5, size = 1.5) +  # Add jitter points
  stat_summary(fun = mean, geom = "point", shape = 23, size = 4, fill = "white") +  # Show mean as point
  geom_text(data = mean_values, aes(x = Group, y = mean_Tau, label = sprintf("%.2f", mean_Tau)),
            hjust=2, fontface = "bold", size = 5, Amount = "black") +  # Add mean text labels
  scale_fill_manual(values = c("steelblue", "darkorange")) +  # Custom Amounts
  labs(
       x = "Condition",
       y = "Tau") +
  theme_minimal(base_size = 14) +
  theme(legend.position = "none",  # Remove redundant legend
        axis.title = element_text(face = "bold"),
        axis.text = element_text(face = "bold"))

It appears that in the Amount Condition, several people misunderstood the task (and ranked the items the other way around), dragging the average tau down more than in the Prob task.

Move Direction

# Amount
n_up_amount <- Distance_Amount.cleanup.df %>%
                        filter(distance.r>0) %>%
                        nrow()
n_down_amount  <- Distance_Amount.cleanup.df %>%
                        filter(distance.r<0) %>%
                        nrow()
n_none_amount  <- Distance_Amount.cleanup.df %>%
                        filter(distance.r==0) %>%
                        nrow()

# Probability
n_up_prob <- Distance_Prob.cleanup.df %>%
                        filter(distance.r>0) %>%
                        nrow()
n_down_prob  <- Distance_Prob.cleanup.df %>%
                        filter(distance.r<0) %>%
                        nrow()
n_none_prob  <- Distance_Prob.cleanup.df %>%
                        filter(distance.r==0) %>%
                        nrow()

# Preference 1
n_up_pref1 <- Distance_pref1.cleanup.df %>%
                        filter(distance.r>0) %>%
                        nrow()
n_down_pref1  <- Distance_pref1.cleanup.df %>%
                        filter(distance.r<0) %>%
                        nrow()
n_none_pref1  <- Distance_pref1.cleanup.df %>%
                        filter(distance.r==0) %>%
                        nrow()

# Preference 2
n_up_pref2 <- Distance_pref2.cleanup.df %>%
                        filter(distance.r>0) %>%
                        nrow()
n_down_pref2  <- Distance_pref2.cleanup.df %>%
                        filter(distance.r<0) %>%
                        nrow()
n_none_pref2  <- Distance_pref2.cleanup.df %>%
                        filter(distance.r==0) %>%
                        nrow()

# Table
moving_table <- data.frame(
  Task = c("Amount", "Probability", "Preference 1", "Preference 2"),
  Move_up = c(n_up_amount, n_up_prob, n_up_pref1, n_up_pref2),
  Move_down = c(n_down_amount, n_down_prob, n_down_pref1, n_down_pref2),
  Not_moved = c(n_none_amount, n_none_prob, n_none_pref1, n_none_pref2)
)

print(moving_table)
##           Task Move_up Move_down Not_moved
## 1       Amount     603        45       480
## 2  Probability     623        51       436
## 3 Preference 1     630        56       430
## 4 Preference 2     630        41       445

Ranking by Amount and Probability: 3F Analysis

In our 3F Hypothesis we argue that participants will rank items scoring higher on an attribute first, more frequently, and further up.

Note on rank coding: Throughout the note book, 6 refers to the highest rank (at the top) and 1 refers to the lowest (at the bottom)

DV1: Drag Count

Distribution of Drag Count by Item

The following set of histograms illustrates the number of times each item is touched.

## Amount: Count the number of times each item was touched
drag_drop_counts_Amount <- drag_and_drop_count_Amount_long %>%
  filter(ResponseId%notin%Amount_tau_less0.5)%>%
  count(item.f,N) %>%
  group_by(item.f)%>%
  mutate(percentage = n / sum(n) * 100,
         condition="Amount")%>%
  ungroup()

## Probability: Count the number of times each item was touched
drag_drop_counts_Prob <- drag_and_drop_count_Prob_long %>%
  filter(ResponseId%notin%Prob_tau_less0.5)%>%
  count(item.f,N) %>%
  group_by(item.f)%>%
  mutate(percentage = n / sum(n) * 100,
         condition="Prob")%>%
  ungroup()

# Combine both counts into one data frame
drag_drop_counts_combined<-rbind(drag_drop_counts_Amount,drag_drop_counts_Prob)

# Plot the combined data frame
ggplot(drag_drop_counts_combined, aes(x = factor(N), y = n)) +
  geom_bar(
    stat = "identity",
    color = "black"
  ) +
  geom_text(
    aes(
      label = paste0(n, " (", round(percentage, 1), "%)")),
    vjust = -0.5,
    size = 5,
    fontface="bold"
  ) +
  labs(
    title = "Drag Count by Item and Quiz Condition",
    x = "Drag Count",
    y = "Frequency"
  ) +
  theme_minimal() +
  theme(
    strip.text = element_text(size = 12, face = "bold"),  # Increased size and bold text
    plot.title = element_text(hjust = 0.5),
    axis.title = element_text(size = 12),  # Adjust axis titles size if needed
    axis.text = element_text(size = 10)    # Adjust axis labels size if needed
  ) +
  facet_wrap(~ item.f * condition,ncol=2) +
  ylim(0, 200)

Model-free visualizations

The following plot illustrates the mean number (with standard errors) of drag counts for each item, grouped by condition. Note: this is based on indicator variables, i.e., whether an item was touched or not.

# Create a summary data frame for AMOUNT; this time, code as binary whether an item was touched or not
summary_data_Amount_ind<- drag_and_drop_count_Amount_long %>%
  filter(ResponseId%notin%Amount_tau_less0.5)%>%
  mutate(N=case_when(
    N==0~0,
    TRUE~1
  ))%>%
  dplyr::group_by(condition, item.f) %>%
  dplyr::summarize(drag_drop_mean = mean(N, na.rm = TRUE),
            drag_drop_sd = sd(N, na.rm = TRUE),
            n = n(),
            se = drag_drop_sd / sqrt(n),
            .groups = "drop")

# Create a summary data frame for PROB; this time, code as binary whether an item was touched or not
summary_data_Prob_ind<- drag_and_drop_count_Prob_long %>%
  filter(ResponseId%notin%Prob_tau_less0.5)%>%
  mutate(N=case_when(
    N==0~0,
    TRUE~1
  ))%>%
  dplyr::group_by(condition, item.f) %>%
  dplyr::summarize(drag_drop_mean = mean(N, na.rm = TRUE),
            drag_drop_sd = sd(N, na.rm = TRUE),
            n = n(),
            se = drag_drop_sd / sqrt(n),
            .groups = "drop")

# Combine the two data frames into one
summary_data_combined_ind <- bind_rows(summary_data_Amount_ind, summary_data_Prob_ind)

# Define colors for the six bets. Logic: Strong green is indicative of the $-Bet; the red color is complementary and signals the focus on probability. Use this color scheme throughout

bet_colors <- c(
  "Pr6_Amt1" = "#d73027",  # Strong red
  "Pr5_Amt2" = "#f46d43",  # Medium red
  "Pr4_Amt3" = "#fdae61",  # Light red (orange-ish)
  "Pr3_Amt4" = "#a6d96a",  # Light green
  "Pr2_Amt5" = "#66bd63",  # Medium green
  "Pr1_Amt6" = "#1a9850"   # Strong green
)

summary_data_combined_ind$item.f = factor(summary_data_combined_ind$item.f, levels = rev(c(  "Pr6_Amt1","Pr5_Amt2", "Pr4_Amt3","Pr3_Amt4", "Pr2_Amt5","Pr1_Amt6")), ordered = TRUE)


# Plot
ggplot(summary_data_combined_ind, aes(x = condition, y = drag_drop_mean, 
                                      group = item.f, color = item.f, shape = item.f)) +
  geom_line(linewidth = 1, position = position_dodge(0.3)) +
  geom_point(size = 6, position = position_dodge(0.3)) +
  geom_errorbar(
    aes(
      ymin = drag_drop_mean - se,
      ymax = drag_drop_mean + se
    ),
    width = 0.2,
    position = position_dodge(0.3)
  ) +
  labs(
    x = "Condition",
    y = "Mean ± SE Drag Count",
    title = "Mean Drag Count by Condition"
  ) +
  scale_color_manual(values = bet_colors) +  
  scale_shape_manual(values = c("Pr6_Amt1" = 21, "Pr5_Amt2" = 22, 
                                "Pr4_Amt3" = 23, "Pr3_Amt4" = 24, 
                                "Pr2_Amt5" = 25, "Pr1_Amt6" = 11)) +
  theme_minimal() +
  theme(
    legend.position = "top", # Place legend at the top
    legend.title = element_text(face = "bold"),
    axis.title = element_text(face = "bold"),
    plot.subtitle = element_text(hjust = 0.5),
    plot.title = element_text(face = "bold", hjust = 0.5)
  )

# Calculate correlation - valid for both dfs
cor_result <- cor.test(dat_long1$Prob, dat_long1$Amt)
cor_estimate <- round(cor_result$estimate, 2)
# Create a summary df for amount for subsequent plotting
summary_data_Amount <- drag_and_drop_count_Amount_long%>%
  dplyr::group_by(condition, item.f) %>%
  dplyr::summarize(drag_mean = mean(N_ind, na.rm = TRUE),
            drag_sd = sd(N_ind, na.rm = TRUE),
            n = n(),
            se = drag_sd / sqrt(n),  # Standard error
            .groups = "drop",
            Avg.Amount=mean(Amt),
            Avg.Prob=mean(Prob))

summary_data_Prob <- drag_and_drop_count_Prob_long%>%
  dplyr::group_by(condition, item.f) %>%
  dplyr::summarize(drag_mean = mean(N_ind, na.rm = TRUE),
            drag_sd = sd(N_ind, na.rm = TRUE),
            n = n(),
            se = drag_sd / sqrt(n),  # Standard error
            .groups = "drop",
            Avg.Amount=mean(Amt),
            Avg.Prob=mean(Prob))
# Create plot with correlation in the caption
ggplot(summary_data_Amount, aes(x = Avg.Amount, y = Avg.Prob, label = item.f)) +
  geom_point(size = 3, color = "black") +
  geom_text(hjust = 0, nudge_x = 1) +  # nudges text to the right
  xlim(min(summary_data_Amount$Avg.Amount), max(summary_data_Amount$Avg.Amount) + 10) +
  theme_minimal() +
  labs(
    title = "Attributes of Lotteries: Ranking by Amount",
    x = "Mean Amount",
    y = "Mean Probability",
    subtitle = paste("Correlation in dat.long: r =", cor_estimate)
  ) +
  theme(
    axis.title = element_text(face = "bold"), 
    plot.subtitle = element_text(hjust = 0.5), 
    plot.title = element_text(face = "bold", hjust = 0.5)
  ) +
  geom_smooth(method = "lm", se = FALSE, color = "blue", linetype = "dashed")

Amount Task

# Create a graph showing the correlation between the amount and mean number of drags per item in the amount task
ggplot(summary_data_Amount, aes(x = Avg.Amount, y = drag_mean, label = item.f)) +
  geom_point(size = 3, color = "black") +
  geom_text(vjust = -1, hjust = 1) +
  theme_minimal() +
  labs(title = "Drag Count and Amount Attribute", subtitle = "Amount Task", x = "Avg. Amt", y = "Avg. Drag Count Indicator") +
  theme(axis.title = element_text(face = "bold"), 
        plot.subtitle = element_text(hjust = 0.5), 
        plot.title = element_text(face = "bold", hjust = 0.5))+
  geom_smooth(method = "lm", se = FALSE, color = "blue", linetype = "dashed") 

# Create a graph showing the correlation between the probability and mean number of drags per item in the probability task
ggplot(summary_data_Amount, aes(x = Avg.Prob, y = drag_mean, label = item.f)) +
  geom_point(size = 3, color = "black") +
  geom_text(vjust = -1, hjust = 1) +
  theme_minimal() +
  labs(title = "Drag Count and Prob Attribute", subtitle = "Prob Task", x = "Avg. Prob", y = "Avg. Drag Count Indicator") +
  theme(axis.title = element_text(face = "bold"), 
        plot.subtitle = element_text(hjust = 0.5), 
        plot.title = element_text(face = "bold", hjust = 0.5))+
  geom_smooth(method = "lm", se = FALSE, color = "blue", linetype = "dashed") 

Prob Task

# Create a graph showing the correlation between the amount and mean number of drags per item in the amount task
ggplot(summary_data_Prob, aes(x = Avg.Amount, y = drag_mean, label = item.f)) +
  geom_point(size = 3, color = "black") +
  geom_text(vjust = -1, hjust = 1) +
  theme_minimal() +
  labs(title = "Drag Count and Amount Attribute", subtitle = "Prob Task", x = "Avg. Amt", y = "Avg. Drag Count Indicator") +
  theme(axis.title = element_text(face = "bold"), 
        plot.subtitle = element_text(hjust = 0.5), 
        plot.title = element_text(face = "bold", hjust = 0.5))+
  geom_smooth(method = "lm", se = FALSE, color = "blue", linetype = "dashed") 

# Create a graph showing the correlation between the probability and mean number of drags per item in the probability task
ggplot(summary_data_Prob, aes(x = Avg.Prob, y = drag_mean, label = item.f)) +
  geom_point(size = 3, color = "black") +
  geom_text(vjust = -1, hjust = 1) +
  theme_minimal() +
  labs(title = "Drag Count and Prob Attribute", subtitle = "Prob Task", x = "Avg. Prob", y = "Avg. Drag Count Indicator") +
  theme(axis.title = element_text(face = "bold"), 
        plot.subtitle = element_text(hjust = 0.5), 
        plot.title = element_text(face = "bold", hjust = 0.5))+
  geom_smooth(method = "lm", se = FALSE, color = "blue", linetype = "dashed") 

Regressions

Nested Models

  • We created nested versions of the rank variables by coding them as follows:
    • When the condition matches the nested variable (e.g., condition == “Amount” for Amount/Prob rank [nested within Amount] ), we used mean centered value. This helps reduce multicollinearity with the condition variable, see Collinearity Check below.
    • When the condition does not match, the value was set to 0.
# Prep for nested model
drag_and_drop_count_long.combined<-rbind(drag_and_drop_count_Amount_long, drag_and_drop_count_Prob_long)%>%
  mutate(Prob.c=Prob-mean(Prob),
         Amount.c=Amt-mean(Amt))

drag_and_drop_count_long.combined<-drag_and_drop_count_long.combined%>%
  mutate(Amt.Nested_Amount=case_when(
    condition == "Amount" ~ Amt,
    condition == "Prob" ~ 0
  ),
  Amt.Nested_Prob=case_when(
    condition == "Amount" ~ 0,
    condition == "Prob" ~  Amt
  ),
  Prob.Nested_Amount=case_when(
    condition == "Amount" ~ Prob,
    condition == "Prob" ~ 0
  ),
  Prob.Nested_Prob=case_when(
    condition == "Amount" ~ 0,
    condition == "Prob" ~ Prob
  ),
  Amt.Nested_Amount.c=case_when(
    condition == "Amount" ~Amount.c,
    condition == "Prob" ~ 0
  ),
  Amt.Nested_Prob.c=case_when(
    condition == "Amount" ~ 0,
    condition == "Prob" ~ Amount.c
  ),
  Prob.Nested_Amount.c=case_when(
    condition == "Amount" ~Prob.c,
    condition == "Prob" ~ 0
  ),
  Prob.Nested_Prob.c=case_when(
    condition == "Amount" ~ 0,
    condition == "Prob" ~ Prob.c
  )
)
M1<-glmer(N_ind~Amt.Nested_Amount.c+Amt.Nested_Prob.c+Prob.Nested_Amount.c+Prob.Nested_Prob.c+condition+(1|ResponseId),
          drag_and_drop_count_long.combined,
          family=binomial)

M2<-glmer(N_ind~Amt.Nested_Amount.c+Amt.Nested_Prob.c+Prob.Nested_Amount.c+Prob.Nested_Prob.c+condition+initial.rank.r+(1|ResponseId),
          drag_and_drop_count_long.combined,
          family=binomial)

tab_model(M1,M2,
          show.se = T,
          show.ci = F,
          show.stat = T,
          pred.labels =
            c("Intercept",
              "Amount [Nested in Amount]",
              "Amount [Nested in Probability]",
              "Probability [Nested in Amount]",
              "Probability [Nested in Probability]",
              "Condition [Probability]",
              "Initial Rank [6]","Initial Rank [5]","Initial Rank [4]","Initial Rank [3]","Initial Rank [2]"),
          dv.labels = 
            c("Drag and Drop Count<br>(coded as binary DV; only Subject RE)",
              "Drag and Drop Count<br>(coded as binary DV; Subject RE and initial Position)"))
  Drag and Drop Count
(coded as binary DV; only Subject RE)
Drag and Drop Count
(coded as binary DV; Subject RE and initial Position)
Predictors Odds Ratios std. Error Statistic p Odds Ratios std. Error Statistic p
Intercept 1.38 0.10 4.55 <0.001 0.07 0.01 -13.95 <0.001
Amount [Nested in Amount] 1.01 0.01 1.63 0.103 1.02 0.01 2.83 0.005
Amount [Nested in Probability] 0.96 0.01 -7.21 <0.001 0.95 0.01 -7.82 <0.001
Probability [Nested in Amount] 0.97 0.00 -8.11 <0.001 0.96 0.00 -9.36 <0.001
Probability [Nested in Probability] 1.01 0.00 2.04 0.041 1.02 0.00 3.60 <0.001
Condition Probability 1.18 0.12 1.63 0.103 1.19 0.15 1.36 0.172
Initial Rank [6] 115.59 31.03 17.70 <0.001
Initial Rank [5] 94.67 24.62 17.50 <0.001
Initial Rank [4] 49.66 12.17 15.94 <0.001
Initial Rank [3] 30.26 7.10 14.54 <0.001
Initial Rank [2] 10.10 2.23 10.49 <0.001
Random Effects
σ2 3.29 3.29
τ00 0.00 ResponseId 0.14 ResponseId
ICC   0.04
N 189 ResponseId 189 ResponseId
Observations 2118 2118
Marginal R2 / Conditional R2 0.215 / NA 0.572 / 0.590

The first model has singularity issues; the second is fine.

Collinearity Checks

Model 1

car::vif(M1)
##  Amt.Nested_Amount.c    Amt.Nested_Prob.c Prob.Nested_Amount.c 
##             1.939544             1.972481             1.928123 
##   Prob.Nested_Prob.c            condition 
##             1.994129             1.032145

Model 2

car::vif(M2)
##                          GVIF Df GVIF^(1/(2*Df))
## Amt.Nested_Amount.c  2.074821  1        1.440424
## Amt.Nested_Prob.c    2.021856  1        1.421920
## Prob.Nested_Amount.c 2.021591  1        1.421827
## Prob.Nested_Prob.c   2.095049  1        1.447429
## condition            1.065344  1        1.032155
## initial.rank.r       1.407624  5        1.034782

DV2: Drag Order

Drag order is the sequence in which items are dragged and dropped. Items that are not dragged are assigned a value of (1 + the total number of dragged items). For example, if a participant moves item A three times, item B twice, and item C once, while items D, E, and F remain untouched, the drag order is: * A = 1, B = 2, C = 3, D = E = F = 4. * This coding approach simplifies cases where an item is touched multiple times (as in the example). However, as seen, it is relatively rare that participants drag the same item repeatedly, justifying this simplification.

touch_order_analysis_Amount<-RankProcess_Amount%>%
  filter(step!=0)%>%
  group_by(ResponseId)%>%
  arrange(step)%>%
  filter(!duplicated(item_moved))%>% # retains only the first instance
  mutate(order=row_number())%>%
  ungroup()%>%
  mutate(condition="Amount")


touch_order_analysis.long_Amount <- expand_grid(
  ResponseId = unique(touch_order_analysis_Amount$ResponseId),
  item.f = unique(touch_order_analysis_Amount$item.f)
)

order_max.SUBJ_Amount<-touch_order_analysis.long_Amount%>%
  left_join(touch_order_analysis_Amount%>%select(ResponseId,item.f,order),by=c("ResponseId","item.f"))%>%
  left_join(touch_order_analysis_Amount%>%select(ResponseId,condition)%>%filter(!duplicated(ResponseId)),by=c("ResponseId"))%>%
  group_by(ResponseId)%>%
  dplyr::summarize(max_order=max(order,na.rm = T))

touch_order_analysis.long_Amount<-touch_order_analysis.long_Amount%>%
  left_join(touch_order_analysis_Amount%>%select(ResponseId,item.f,order),by=c("ResponseId","item.f"))%>%
  left_join(touch_order_analysis_Amount%>%select(ResponseId,condition)%>%filter(!duplicated(ResponseId)),by=c("ResponseId"))%>%left_join(order_max.SUBJ_Amount,by="ResponseId")%>%
  mutate(order = case_when(!is.na(order)~order,
                           TRUE~max_order+1))

touch_order_analysis_Prob<-RankProcess_Prob%>%
  filter(step!=0)%>%
  group_by(ResponseId)%>%
  arrange(step)%>%
  filter(!duplicated(item_moved))%>%
  mutate(order=row_number())%>%
  ungroup()%>%
  mutate(condition="Prob")



touch_order_analysis.long_Prob <- expand_grid(
  ResponseId = unique(touch_order_analysis_Prob$ResponseId),
  item.f = unique(touch_order_analysis_Prob$item.f)
)


order_max.SUBJ_Prob<-touch_order_analysis.long_Prob%>%
  left_join(touch_order_analysis_Prob%>%select(ResponseId,item.f,order),by=c("ResponseId","item.f"))%>%
  left_join(touch_order_analysis_Prob%>%select(ResponseId,condition)%>%filter(!duplicated(ResponseId)),by=c("ResponseId"))%>%
  group_by(ResponseId)%>%
  dplyr::summarize(max_order=max(order,na.rm = T))

touch_order_analysis.long_Prob<-touch_order_analysis.long_Prob%>%
  left_join(touch_order_analysis_Prob%>%select(ResponseId,item.f,order),by=c("ResponseId","item.f"))%>%
  left_join(touch_order_analysis_Prob%>%select(ResponseId,condition)%>%filter(!duplicated(ResponseId)),by=c("ResponseId"))%>%left_join(order_max.SUBJ_Prob,by="ResponseId")%>%
  mutate(order = case_when(!is.na(order)~order,
                           TRUE~max_order+1))



# length(unique(touch_order_analysis.long_A$ResponseId)) #142, good.
# touch_order_analysis.long_A%>%
#   group_by(ResponseId)%>%
#   summarize(n_6=n_distinct(item.f))%>%
#    filter(n_6!=6) # none, good
# psych::describe(drag_order_analysis.long$order) # between 1 and 6, good.

Distribution of Drag Order by Item

touch_order_Amount <- touch_order_analysis.long_Amount %>%
  filter(ResponseId%notin%Amount_tau_less0.5)%>%
  count(item.f,order,condition) %>%
  group_by(item.f)%>%
  mutate(percentage = n / sum(n) * 100)%>%
  ungroup()

touch_order_Prob <- touch_order_analysis.long_Prob %>%
  filter(ResponseId%notin%Prob_tau_less0.5)%>%
  count(item.f,order,condition) %>%
  group_by(item.f)%>%
  mutate(percentage = n / sum(n) * 100)%>%
  ungroup()
touch_order_combined<-rbind(touch_order_Prob,touch_order_Amount)


ggplot(touch_order_combined, aes(x = factor(order), y = n)) +
  geom_bar(
    stat = "identity",
    color = "black"
  ) +
  geom_text(
    aes(
      label = paste0(n, " (", round(percentage, 1), "%)")
    ),
    vjust = -0.5,
    size = 5,
    fontface="bold"
  ) +
  labs(
    title = "Drag Order by item and Condition",
    x = "Drag Order",
    y = "Frequency"
  ) +
  theme_minimal() +
  theme(
    strip.text = element_text(size = 12, face = "bold"),  # Facet label adjustments
    plot.title = element_text(hjust = 0.5, face = "bold"),
    axis.title = element_text(size = 12),
    axis.text = element_text(size = 10)
  ) +
  facet_wrap(~ item.f * condition,ncol = 2) +
  ylim(0, 150)

Distribution of Mean Drag Order

mean_order.subj_Prob <- touch_order_analysis.long_Prob %>%
  filter(ResponseId%notin%Prob_tau_less0.5)%>%
  group_by(ResponseId)%>%
  mutate(mean_order = mean(order),
         condition="Prob")%>%
  ungroup()
mean_order.subj_Amount<- touch_order_analysis.long_Amount %>%
  filter(ResponseId%notin%Amount_tau_less0.5)%>%
  group_by(ResponseId)%>%
  mutate(mean_order = mean(order),
         condition="Amount")%>%
  ungroup()


mean_order.subj_combined<-rbind(mean_order.subj_Amount,mean_order.subj_Prob)

ggplot(mean_order.subj_combined, aes(x = mean_order)) +
  geom_density(fill = "lightblue", color = "black", alpha = 0.5) +
  geom_rug(sides = "b", color = "blue") +  # Rug plot along the bottom (x-axis) for individual data points
  labs(
    title = "Density Plot of Mean Drag Order",
    x = "Mean Drag Order",
    y = "Density"
  ) +
  facet_grid(~condition)

Model-free Visualization

summary_data_Amount<- touch_order_analysis.long_Amount%>%
  dplyr::group_by(condition, item.f) %>%
  dplyr::summarize(order_mean = mean(order, na.rm = TRUE),
            order_sd = sd(order, na.rm = TRUE),
            n = n(),
            se = order_sd / sqrt(n),  # Standard error
            .groups = "drop")


summary_data_Prob <- touch_order_analysis.long_Prob%>%
  dplyr::group_by(condition, item.f) %>%
  dplyr::summarize(order_mean = mean(order, na.rm = TRUE),
            order_sd = sd(order, na.rm = TRUE),
            n = n(),
            se = order_sd / sqrt(n),  # Standard error
            .groups = "drop")


summary_data_combined <- bind_rows(summary_data_Amount, summary_data_Prob)

summary_data_combined_ind$item.f = factor(summary_data_combined_ind$item.f, levels = rev(c(  "Pr6_Amt1","Pr5_Amt2", "Pr4_Amt3","Pr3_Amt4", "Pr2_Amt5","Pr1_Amt6")), ordered = TRUE)



ggplot(summary_data_combined, aes(x = condition, y = order_mean, 
                                      group = item.f, color = item.f, shape = item.f)) +
  geom_line(linewidth = 1, position = position_dodge(0.3)) +
  geom_point(size = 6, position = position_dodge(0.3)) +
  geom_errorbar(
    aes(
      ymin = order_mean - se,
      ymax = order_mean + se
    ),
    width = 0.2,
    position = position_dodge(0.3)
  ) +
  labs(
    x = "Condition",
    y = "Mean ± SE Drag Order",
    title = "Mean Drag Order by Condition"
  ) +
  scale_color_manual(values = bet_colors) +  
  scale_shape_manual(values = c("Pr6_Amt1" = 21, "Pr5_Amt2" = 22, 
                                "Pr4_Amt3" = 23, "Pr3_Amt4" = 24, 
                                "Pr2_Amt5" = 25, "Pr1_Amt6" = 11)) +
  theme_minimal() +
  theme(
    legend.position = "top", # Place legend at the top
    legend.title = element_text(face = "bold"),
    axis.title = element_text(face = "bold"),
    plot.subtitle = element_text(hjust = 0.5),
    plot.title = element_text(face = "bold", hjust = 0.5)
  )

Amount Task

summary_data_Amount <- touch_order_analysis.long_Amount%>%
  dplyr::group_by(condition, item.f) %>%
  dplyr::summarize(order_mean = mean(order, na.rm = TRUE),
            order_sd = sd(order, na.rm = TRUE),
            n = n(),
            se = order_sd / sqrt(n),  # Standard error
            .groups = "drop",
            Avg.Amount=mean(Amt),
            Avg.Prob=mean(Prob))


ggplot(summary_data_Amount, aes(x = Avg.Amount, y = order_mean, label = item.f)) +
  geom_point(size = 3, color = "black") +
  geom_text(vjust = -1, hjust = 1) +
  theme_minimal() +
  labs(title = "Drag Order and Amount Attribute", subtitle = "Amount Task", x = "Avg. Amt", y = "Avg. Drag Order") +
  theme(axis.title = element_text(face = "bold"),
        plot.subtitle = element_text(hjust = 0.5),
        plot.title = element_text(face = "bold", hjust = 0.5))+
  geom_smooth(method = "lm", se = FALSE, color = "blue", linetype = "dashed")

ggplot(summary_data_Amount, aes(x = Avg.Prob, y = order_mean, label = item.f)) +
  geom_point(size = 3, color = "black") +
  geom_text(vjust = -1, hjust = 1) +
  theme_minimal() +
  labs(title = "Drag Order and Prob Attribute", subtitle = "Amount Task", x = "Avg. Prob", y = "Avg. Drag Order") +
  theme(axis.title = element_text(face = "bold"),
        plot.subtitle = element_text(hjust = 0.5),
        plot.title = element_text(face = "bold", hjust = 0.5))+
  geom_smooth(method = "lm", se = FALSE, color = "blue", linetype = "dashed") 

Prob Task

summary_data_Prob <- touch_order_analysis.long_Prob%>%
  dplyr::group_by(condition, item.f) %>%
  dplyr::summarize(order_mean = mean(order, na.rm = TRUE),
            order_sd = sd(order, na.rm = TRUE),
            n = n(),
            se = order_sd / sqrt(n),  # Standard error
            .groups = "drop",
            Avg.Amount=mean(Amt),
            Avg.Prob=mean(Prob))


ggplot(summary_data_Prob, aes(x = Avg.Amount, y = order_mean, label = item.f)) +
  geom_point(size = 3, color = "black") +
  geom_text(vjust = -1, hjust = 1) +
  theme_minimal() +
  labs(title = "Drag Order and Amount Attribute", subtitle = "Prob Task", x = "Avg. Amt", y = "Avg. Drag Order") +
  theme(axis.title = element_text(face = "bold"),
        plot.subtitle = element_text(hjust = 0.5),
        plot.title = element_text(face = "bold", hjust = 0.5))+
  geom_smooth(method = "lm", se = FALSE, color = "blue", linetype = "dashed")

ggplot(summary_data_Prob, aes(x = Avg.Prob, y = order_mean, label = item.f)) +
  geom_point(size = 3, color = "black") +
  geom_text(vjust = -1, hjust = 1) +
  theme_minimal() +
  labs(title = "Drag Order and Prob Attribute", subtitle = "Prob Task", x  = "Avg. Prob", y = "Avg. Drag Order") +
  theme(axis.title = element_text(face = "bold"),
        plot.subtitle = element_text(hjust = 0.5),
        plot.title = element_text(face = "bold", hjust = 0.5))+
  geom_smooth(method = "lm", se = FALSE, color = "blue", linetype = "dashed") 

Regressions

Amount and Prob Attribute values are centered before being entered into the model.

touch_order_analysis.long_Amount$condition<-"Amount"
touch_order_analysis.long_Prob$condition<-"Prob"

touch_order_analysis.long.combined<-rbind(touch_order_analysis.long_Prob, touch_order_analysis.long_Amount)%>%
  mutate(Prob.c=Prob-mean(Prob),
         Amount.c=Amt-mean(Amt))

Nested Models

##### Nested Model
touch_order_analysis.long.combined<-touch_order_analysis.long.combined%>%
  mutate(Amt.Nested_Amount=case_when(
    condition == "Amount" ~Amt,
    condition == "Prob" ~ 0
  ),
  Amt.Nested_Prob=case_when(
    condition == "Amount" ~ 0,
    condition == "Prob" ~ Amt
  ),
  Prob.Nested_Amount=case_when(
    condition == "Amount" ~Prob,
    condition == "Prob" ~ 0
  ),
  Prob.Nested_Prob=case_when(
    condition == "Amount" ~ 0,
    condition == "Prob" ~ Prob
  ),
  Amt.Nested_Amount.c=case_when(
    condition == "Amount" ~Amount.c,
    condition == "Prob" ~ 0
  ),
  Amt.Nested_Prob.c=case_when(
    condition == "Amount" ~ 0,
    condition == "Prob" ~ Amount.c
  ),
  Prob.Nested_Amount.c=case_when(
    condition == "Amount" ~Prob.c,
    condition == "Prob" ~ 0
  ),
  Prob.Nested_Prob.c=case_when(
    condition == "Amount" ~ 0,
    condition == "Prob" ~ Prob.c
  ))

M1<-clmm(factor(order)~Amt.Nested_Amount.c+Amt.Nested_Prob.c+Prob.Nested_Amount.c+Prob.Nested_Prob.c+condition + (1|ResponseId),touch_order_analysis.long.combined)

M2<-clmm(factor(order)~Amt.Nested_Amount.c+Amt.Nested_Prob.c+Prob.Nested_Amount.c+Prob.Nested_Prob.c+condition+initial.rank.r+(1|ResponseId),touch_order_analysis.long.combined)

tab_model(M1,M2,
          show.se = T,
          show.ci = F,
          show.stat = T,
          pred.labels =
            c("1|2","2|3", "3|4", "4|5", "5|6",
              "Amount [Nested in Amount]",
              "Amount [Nested in Probability]",
              "Probability [Nested in Amount]",
              "Probability [Nested in Probability]",
              "Condition [Probability]",
              "Initial Rank [6]","Initial Rank [5]","Initial Rank [4]","Initial Rank [3]","Initial Rank [2]"),
          dv.labels = 
            c("Drag and Drop Order<br>(coded as binary DV; only Subject RE)",
              "Drag and Drop Order<br>(coded as binary DV; Subject RE and initial Position)"))
  Drag and Drop Order
(coded as binary DV; only Subject RE)
Drag and Drop Order
(coded as binary DV; Subject RE and initial Position)
Predictors Odds Ratios std. Error Statistic p Odds Ratios std. Error Statistic p
1|2 0.12 0.01 -26.06 <0.001 0.01 0.00 -31.83 <0.001
2|3 0.52 0.03 -10.24 <0.001 0.03 0.00 -24.48 <0.001
3|4 1.82 0.11 9.59 <0.001 0.16 0.02 -14.92 <0.001
4|5 7.49 0.55 27.35 <0.001 0.91 0.11 -0.77 0.439
5|6 66.39 9.27 30.06 <0.001 14.23 2.26 16.68 <0.001
Amount [Nested in Amount] 0.95 0.00 -11.69 <0.001 0.94 0.00 -12.70 <0.001
Amount [Nested in Probability] 1.01 0.00 3.44 0.001 1.02 0.00 5.12 <0.001
Probability [Nested in Amount] 1.01 0.00 3.86 <0.001 1.02 0.00 6.42 <0.001
Probability [Nested in Probability] 0.96 0.00 -14.50 <0.001 0.95 0.00 -16.12 <0.001
Condition Probability 1.03 0.08 0.33 0.738 1.06 0.08 0.74 0.461
Initial Rank [6] 0.04 0.01 -20.73 <0.001
Initial Rank [5] 0.04 0.01 -21.08 <0.001
Initial Rank [4] 0.04 0.01 -20.08 <0.001
Initial Rank [3] 0.05 0.01 -19.44 <0.001
Initial Rank [2] 0.10 0.01 -15.74 <0.001
Random Effects
σ2 3.29 3.29
τ00 0.00 ResponseId 0.04 ResponseId
ICC   0.01
N 193 ResponseId 193 ResponseId
Observations 2238 2238
Marginal R2 / Conditional R2 0.366 / NA 0.552 / 0.558

Collinearity Check

This is not possible for the clmm() family; have to find a different way

# car::vif(M1)
# car::vif(M2)

DV3: Drag Distance

Coding of Distance

After each dragging item, the distance is calculated as Last Position - Current Position of the dragged item. Positive values indicate that the item was ranked up (e.g., moved from 2nd place to the 1st place), and positive values indicate that the item is ranked down (e.g., moved from 1st place to the second place). items that are not dragged for each respondent are assigned a distance of 0

Distribution of Drag Distance

summary_stats <- Distance_Amount.cleanup.df %>%
  group_by(item.f) %>%
  dplyr::summarize(
    mean_distance = mean(distance, na.rm = TRUE),
    median_distance = median(distance, na.rm = TRUE)
  )

Distance_Amount.cleanup.df$item.f<- factor(Distance_Amount.cleanup.df$item.f, levels = rev(c(  "Pr6_Amt1","Pr5_Amt2", "Pr4_Amt3","Pr3_Amt4", "Pr2_Amt5","Pr1_Amt6")), ordered = TRUE)




ggplot(Distance_Amount.cleanup.df ,
       aes(x = -distance, fill = item.f)) +
  geom_histogram(binwidth = 1, alpha = 0.3, position = "identity") +
  labs(
    title = "Distribution of Drag Distance - Amount Task",
    x = "Distance",
    y = "Count",
    fill = "item"
  ) +
  theme_minimal()+
  facet_grid(~item.f)+
  xlim(6,-6)+
    scale_fill_manual(values = bet_colors)

summary_stats <- Distance_Prob.cleanup.df %>%
  group_by(item.f) %>%
  dplyr::summarize(
    mean_distance = mean(distance, na.rm = TRUE),
    median_distance = median(distance, na.rm = TRUE)
  )


 Distance_Prob.cleanup.df$item.f<- factor(Distance_Prob.cleanup.df$item.f, levels = rev(c(  "Pr6_Amt1","Pr5_Amt2", "Pr4_Amt3","Pr3_Amt4", "Pr2_Amt5","Pr1_Amt6")), ordered = TRUE)

ggplot(Distance_Prob.cleanup.df ,
       aes(x = -distance, fill = item.f)) +
  geom_histogram(binwidth = 1, alpha = 0.3, position = "identity") +
  labs(
    title = "Distribution of Drag Distance - Prob Task",
    x = "Distance",
    y = "Count",
    fill = "item"
  ) +
  theme_minimal()+
  facet_grid(~item.f)+
    scale_fill_manual(values = bet_colors)+
  xlim(6,-6)

Model-Free Visualization

Distance_Amount_cleanup.df.test<-Distance_Amount.cleanup.df%>%
  select(ResponseId, item.f,distance,distance.abs)%>%
  mutate(condition="Amount")

Distance_Prob_cleanup.df.test<-Distance_Prob.cleanup.df%>%
  select(ResponseId, item.f,distance,distance.abs)%>%
  mutate(condition="Prob")

Distance_cleanup.df.combined<-rbind(Distance_Amount_cleanup.df.test,Distance_Prob_cleanup.df.test)

summary_distance_data <- Distance_cleanup.df.combined %>%
  mutate(condition=as.factor(condition),
         distance.abs=(distance))%>%
  group_by(condition, item.f) %>%
  dplyr::summarize(
    distance_mean = -mean(distance, na.rm = TRUE),
    distance_sd = sd(distance, na.rm = TRUE),
    n = n(),
    se = distance_sd / sqrt(n),
    .groups = "drop"
  )


summary_data_combined_ind$item.f = factor(summary_data_combined_ind$item.f, levels = rev(c(  "Pr6_Amt1","Pr5_Amt2", "Pr4_Amt3","Pr3_Amt4", "Pr2_Amt5","Pr1_Amt6")), ordered = TRUE)


ggplot(summary_distance_data, aes(x = condition, y = distance_mean, group = item.f, color = item.f,shape=item.f)) +
  geom_line(linewidth = 1, position = position_dodge(0.3)) +
  geom_point(size = 6, position = position_dodge(0.3)) +
  geom_errorbar(
    aes(
      ymin = distance_mean - se,
      ymax = distance_mean + se
    ),
    width = 0.2,
    position = position_dodge(0.3)
  ) +
  labs(
    x = "Condition",
    y = "Mean ± SE Drag Distance",
    title = "Mean Drag Distance by Condition"
  ) +
  theme_minimal() +
  theme(
    legend.position = "top", # Place legend at the top
    legend.title = element_text(face = "bold"),
    axis.title = element_text(face = "bold"),
    plot.subtitle = element_text(hjust = 0.5),
    plot.title = element_text(face = "bold", hjust = 0.5)
  )+
  scale_shape_manual(values = c("Pr6_Amt1" = 21, "Pr5_Amt2" = 22, 
                                "Pr4_Amt3" = 23, "Pr3_Amt4" = 24, 
                                "Pr2_Amt5" = 25, "Pr1_Amt6" = 11)) +
    scale_color_manual(values = bet_colors)

If we plot the ABSOLUTE VALUE of Distance:

summary_distance_data <- Distance_cleanup.df.combined %>%
  mutate(condition=as.factor(condition),
         distance.abs=abs(distance))%>%
  group_by(condition, item.f) %>%
  dplyr::summarize(
    distance_mean = mean(distance.abs, na.rm = TRUE),
    distance_sd = sd(distance.abs, na.rm = TRUE),
    n = n(),
    se = distance_sd / sqrt(n),
    .groups = "drop"
  )

summary_data_combined_ind$item.f = factor(summary_data_combined_ind$item.f, levels = rev(c(  "Pr6_Amt1","Pr5_Amt2", "Pr4_Amt3","Pr3_Amt4", "Pr2_Amt5","Pr1_Amt6")), ordered = TRUE)

ggplot(summary_distance_data, aes(x = condition, y = distance_mean, group = item.f, color = item.f,shape=item.f)) +
  geom_line(linewidth = 1, position = position_dodge(0.3)) +
  geom_point(size = 6, position = position_dodge(0.3)) +
  geom_errorbar(
    aes(
      ymin = distance_mean - se,
      ymax = distance_mean + se
    ),
    width = 0.2,
    position = position_dodge(0.3)
  ) +
  labs(
    x = "Condition",
    y = "Mean ± SE Drag Order",
    title = "Mean Drag Order by Condition"
  ) +
  theme_minimal() +
  theme(
    legend.position = "top", # Place legend at the top
    legend.title = element_text(face = "bold"),
    axis.title = element_text(face = "bold"),
    plot.subtitle = element_text(hjust = 0.5),
    plot.title = element_text(face = "bold", hjust = 0.5)
  )+
  scale_shape_manual(values = c("Pr6_Amt1" = 21, "Pr5_Amt2" = 22, 
                                "Pr4_Amt3" = 23, "Pr3_Amt4" = 24, 
                                "Pr2_Amt5" = 25, "Pr1_Amt6" = 11)) +
    scale_color_manual(values = bet_colors)

Amount Task

Distance_Amount.cleanup.df<-Distance_Amount.cleanup.df%>%
    filter(ResponseId%notin%Amount_tau_less0.5)%>%
  mutate(rank.Amount=case_when(
    item.f=="Pr6_Amt1" ~1,
    item.f=="Pr5_Amt2" ~ 2,
    item.f== "Pr4_Amt3" ~ 3,
    item.f== "Pr3_Amt4" ~ 4,
    item.f == "Pr2_Amt5" ~ 5,
    item.f == "Pr1_Amt6" ~6
  ),
  rank.Prob=case_when(
    item.f=="Pr6_Amt1" ~6,
    item.f=="Pr5_Amt2" ~ 5,
    item.f== "Pr4_Amt3" ~ 4,
    item.f== "Pr3_Amt4" ~ 3,
    item.f == "Pr2_Amt5" ~ 2,
    item.f == "Pr1_Amt6" ~1))%>%
  left_join(initial.dat_amount%>%select(ResponseId,starts_with("initial.items_")),by="ResponseId")%>%
  mutate(initial.rank=case_when(
    item.f=="Pr6_Amt1" ~ initial.items_49,
    item.f=="Pr5_Amt2" ~ initial.items_50,
    item.f=="Pr4_Amt3" ~ initial.items_64,
    item.f=="Pr3_Amt4" ~ initial.items_65,
    item.f=="Pr2_Amt5" ~ initial.items_67,
    item.f=="Pr1_Amt6" ~ initial.items_68
  ),
  initial.rank.r=7-initial.rank,
  initial.rank.r = relevel(factor(initial.rank.r), ref = 6)
  )%>%
  select(-c(starts_with("initial.items_")))%>%
  left_join(dat_long1%>%select(ResponseId,item.f,Prob,Amt),by=c("ResponseId","item.f"))


summary_data_Amount <- Distance_Amount.cleanup.df%>%
  dplyr::group_by(item.f) %>%
  dplyr::summarize(distance_mean = mean(distance, na.rm = TRUE),
            distance_sd = sd(distance, na.rm = TRUE),
            n = n(),
            se = distance_sd / sqrt(n),  # Standard error
            .groups = "drop",
            Avg.Amount=mean(Amt),
            Avg.Prob=mean(Prob))
  • Model Specification: Drag Count predicted by Amount and Prob attribute rank
  • Note: A negative sign was added to the distance DV. So a positive coefficient indicates that a higher value of the predictor contributes to the item being ranked further up
ggplot(summary_data_Amount, aes(x = Avg.Amount, y = -distance_mean, label = item.f)) +
  geom_point(size = 3, color = "black") +
  geom_text(vjust = -1, hjust = 1) +
  theme_minimal() +
  labs(title = "Drag Distance and Amount Attribute", subtitle = "Amount Task", x = "Avg. Amt", y = "Avg. Drag Distance") +
  theme(axis.title = element_text(face = "bold"),
        plot.subtitle = element_text(hjust = 0.5),
        plot.title = element_text(face = "bold", hjust = 0.5))+
  geom_smooth(method = "lm", se = FALSE, color = "blue", linetype = "dashed") 

ggplot(summary_data_Amount, aes(x = Avg.Prob, y = -distance_mean, label = item.f)) +
  geom_point(size = 3, color = "black") +
  geom_text(vjust = -1, hjust = 1) +
  theme_minimal() +
  labs(title = "Drag Distance and Prob Attribute", subtitle = "Amount Task", x = "Avg. Prob", y = "Avg. Drag Distance") +
  theme(axis.title = element_text(face = "bold"),
        plot.subtitle = element_text(hjust = 0.5),
        plot.title = element_text(face = "bold", hjust = 0.5))+
  geom_smooth(method = "lm", se = FALSE, color = "blue", linetype = "dashed") 

Prob Task

Distance_Prob.cleanup.df<-Distance_Prob.cleanup.df%>%
  filter(ResponseId%notin%Prob_tau_less0.5)%>%
  mutate(rank.Amount=case_when(
    item.f=="Pr6_Amt1" ~1,
    item.f=="Pr5_Amt2" ~ 2,
    item.f== "Pr4_Amt3" ~ 3,
    item.f== "Pr3_Amt4" ~ 4,
    item.f == "Pr2_Amt5" ~ 5,
    item.f == "Pr1_Amt6" ~6
  ),
  rank.Prob=case_when(
    item.f=="Pr6_Amt1" ~6,
    item.f=="Pr5_Amt2" ~ 5,
    item.f== "Pr4_Amt3" ~ 4,
    item.f== "Pr3_Amt4" ~ 3,
    item.f == "Pr2_Amt5" ~ 2,
    item.f == "Pr1_Amt6" ~1))%>%
  left_join(initial.dat_prob%>%select(ResponseId,starts_with("initial.items_")),by="ResponseId")%>%
  mutate(initial.rank=case_when(
    item.f=="Pr6_Amt1" ~ initial.items_49,
    item.f=="Pr5_Amt2" ~ initial.items_50,
    item.f=="Pr4_Amt3" ~ initial.items_64,
    item.f=="Pr3_Amt4" ~ initial.items_65,
    item.f=="Pr2_Amt5" ~ initial.items_67,
    item.f=="Pr1_Amt6" ~ initial.items_68
  ),
  initial.rank.r=7-initial.rank,
  initial.rank.r = relevel(factor(initial.rank.r), ref = 6)
  )%>%
  select(-c(starts_with("initial.items_")))%>%
  left_join(dat_long1%>%select(ResponseId,item.f,Prob,Amt),by=c("ResponseId","item.f"))



summary_data_Prob <- Distance_Prob.cleanup.df%>%
  dplyr::group_by(item.f) %>%
  dplyr::summarize(distance_mean = mean(distance, na.rm = TRUE),
            distance_sd = sd(distance, na.rm = TRUE),
            n = n(),
            se = distance_sd / sqrt(n),  # Standard error
            .groups = "drop",
            Avg.Amount=mean(Amt),
            Avg.Prob=mean(Prob))
ggplot(summary_data_Prob, aes(x = Avg.Amount, y = -distance_mean, label = item.f)) +
  geom_point(size = 3, color = "black") +
  geom_text(vjust = -1, hjust = 1) +
  theme_minimal() +
  labs(title = "Drag Distance and Amount Attribute", subtitle = "Prob Task", x = "Avg. Amt", y = "Avg. Drag Distance") +
  theme(axis.title = element_text(face = "bold"),
        plot.subtitle = element_text(hjust = 0.5),
        plot.title = element_text(face = "bold", hjust = 0.5))+
  geom_smooth(method = "lm", se = FALSE, color = "blue", linetype = "dashed") 

ggplot(summary_data_Prob, aes(x = Avg.Prob, y = -distance_mean, label = item.f)) +
  geom_point(size = 3, color = "black") +
  geom_text(vjust = -1, hjust = 1) +
  theme_minimal() +
  labs(title = "Drag Distance and Prob Attribute", subtitle = "Prob Task", x = "Avg. Prob", y = "Avg. Drag Distance") +
  theme(axis.title = element_text(face = "bold"),
        plot.subtitle = element_text(hjust = 0.5),
        plot.title = element_text(face = "bold", hjust = 0.5))+
  geom_smooth(method = "lm", se = FALSE, color = "blue", linetype = "dashed") 

Regressions

Amount and Prob Attribute Ranks are centered before being entered into the model.

Distance_Amount.cleanup.df$condition<-"Amount"
Distance_Prob.cleanup.df$condition<-"Prob"
Distance.cleanup.combined<-rbind(Distance_Amount.cleanup.df,Distance_Prob.cleanup.df)%>%
  mutate(Prob.c=Prob-mean(Prob),
         Amount.c=Amt-mean(Amt))

Nested Models

##### Nested Model

Distance.cleanup.combined<-Distance.cleanup.combined%>%
  mutate(Amt.Nested_Amount=case_when(
    condition == "Amount" ~Amt,
    condition == "Prob" ~ 0
  ),
  Amt.Nested_Prob=case_when(
    condition == "Amount" ~ 0,
    condition == "Prob" ~ Amt
  ),
  Prob.Nested_Amount=case_when(
    condition == "Amount" ~Prob,
    condition == "Prob" ~ 0
  ),
  Prob.Nested_Prob=case_when(
    condition == "Amount" ~ 0,
    condition == "Prob" ~ Prob
  ),
  Amt.Nested_Amount.c=case_when(
    condition == "Amount" ~Amount.c,
    condition == "Prob" ~ 0
  ),
  Amt.Nested_Prob.c=case_when(
    condition == "Amount" ~ 0,
    condition == "Prob" ~ Amount.c
  ),
  Prob.Nested_Amount.c=case_when(
    condition == "Amount" ~ Prob.c,
    condition == "Prob" ~ 0
  ),
  Prob.Nested_Prob.c=case_when(
    condition == "Amount" ~ 0,
    condition == "Prob" ~ Prob.c
  ))

M1<-lmer(distance.r~Amt.Nested_Amount.c+Amt.Nested_Prob.c+Prob.Nested_Amount.c+Prob.Nested_Prob.c+condition+(1|ResponseId),Distance.cleanup.combined)
M2<-lmer(distance.r~Amt.Nested_Amount.c+Amt.Nested_Prob.c+Prob.Nested_Amount.c+Prob.Nested_Prob.c+condition+initial.rank.r+(1|ResponseId),Distance.cleanup.combined)

tab_model(
  M1, M2,
  dv.labels = c(
    "Drag and Drop Distance<br>(Only Subject RE)",
    "Drag and Drop Distance<br>(Subject RE + Added Initial Position)"
  ),
  pred.labels =
            c("Intercept",
              "Amount [Nested in Amount]",
              "Amount [Nested in Probability]",
              "Probability [Nested in Amount]",
              "Probability [Nested in Probability]",
              "Condition [Probability]",
              "Initial Rank [6]","Initial Rank [5]","Initial Rank [4]","Initial Rank [3]","Initial Rank [2]"),
  show.se = T,
  show.ci = F,
  show.stat = T
)
  Drag and Drop Distance
(Only Subject RE)
Drag and Drop Distance
(Subject RE + Added Initial Position)
Predictors Estimates std. Error Statistic p Estimates std. Error Statistic p
Intercept 1.11 0.05 23.68 <0.001 -0.24 0.06 -4.23 <0.001
Amount [Nested in Amount] 0.02 0.00 8.95 <0.001 0.02 0.00 14.10 <0.001
Amount [Nested in Probability] -0.02 0.00 -8.71 <0.001 -0.02 0.00 -12.16 <0.001
Probability [Nested in Amount] -0.02 0.00 -9.61 <0.001 -0.02 0.00 -15.99 <0.001
Probability [Nested in Probability] 0.01 0.00 8.96 <0.001 0.02 0.00 16.37 <0.001
Condition Probability 0.01 0.05 0.15 0.881 0.00 0.03 0.12 0.903
Initial Rank [6] 2.69 0.06 48.01 <0.001
Initial Rank [5] 2.12 0.06 37.89 <0.001
Initial Rank [4] 1.57 0.06 28.01 <0.001
Initial Rank [3] 1.14 0.06 20.39 <0.001
Initial Rank [2] 0.57 0.06 10.10 <0.001
Random Effects
σ2 1.46 0.55
τ00 0.14 ResponseId 0.23 ResponseId
ICC 0.09 0.29
N 189 ResponseId 189 ResponseId
Observations 2118 2118
Marginal R2 / Conditional R2 0.330 / 0.389 0.673 / 0.768

Collinearity Check

car::vif(M1)
##  Amt.Nested_Amount.c    Amt.Nested_Prob.c Prob.Nested_Amount.c 
##             2.032084             2.036722             2.032065 
##   Prob.Nested_Prob.c            condition 
##             2.036704             1.000001
car::vif(M2)
##                          GVIF Df GVIF^(1/(2*Df))
## Amt.Nested_Amount.c  2.045435  1        1.430187
## Amt.Nested_Prob.c    2.050920  1        1.432103
## Prob.Nested_Amount.c 2.049421  1        1.431580
## Prob.Nested_Prob.c   2.049804  1        1.431714
## condition            1.000004  1        1.000002
## initial.rank.r       1.017056  5        1.001693

Ranking Over Time

Distance_Amount<-RankProcess_Amount %>%
  group_by(ResponseId)%>%
  mutate(
    parts = str_split(order, ",")
  ) %>%
  mutate(
    Rank1 = sapply(parts, function(x) x[1]),  # Extract before 1st comma
    Rank2 = sapply(parts, function(x) x[2]),  # Extract before 2nd comma
    Rank3 = sapply(parts, function(x) x[3]),  # Extract before 3rd comma
    Rank4 = sapply(parts, function(x) x[4]),  # Extract before 4th comma
    Rank5 = sapply(parts, function(x) x[5]),  # Extract before 5th comma
    Rank6 = sapply(parts, function(x) ifelse(length(x) > 5, x[6], NA))  # Extract after 5th comma
  ) %>%
  select(-parts)

items_Amount <- c("49", "50", "64", "65", "67", "68")
for (item in items_Amount) {
  Distance_Amount[[paste0("current_", item)]] <- NA_integer_
}


Distance_Amount <- Distance_Amount %>%
  rowwise() %>%
  mutate(
    across(
      starts_with("current_"),
      ~ {
        item_number <- str_remove(cur_column(), "current_")  # Extract the item number
        case_when(
          Rank1 == item_number ~ 1,
          Rank2 == item_number ~ 2,
          Rank3 == item_number ~ 3,
          Rank4 == item_number ~ 4,
          Rank5 == item_number ~ 5,
          Rank6 == item_number ~ 6,
          TRUE ~ 1
        )
      }
    )
  ) %>%
  ungroup()


for (item in items_Amount) {
  Distance_Amount[[paste0("last_", item)]] <- lag(Distance_Amount[[paste0("current_", item)]])
}


Distance_Amount<-Distance_Amount%>%
  group_by(ResponseId)%>%
  rowwise() %>%
  mutate(
    current_item_moved = get(paste0("current_", item_moved)),  
    last_item_moved = get(paste0("last_", item_moved)),        
    move_direction = case_when(
      is.na(last_item_moved) ~ "no_change",  
      current_item_moved < last_item_moved ~ "up",
      current_item_moved > last_item_moved ~ "down",
      TRUE ~ "no_change"
    )
  ) %>%
  ungroup()

Distance_Amount <- Distance_Amount %>%
  group_by(ResponseId)%>%
  filter(step!=0)

TimeAnalysis.Amount<-Distance_Amount%>%
  filter(ResponseId%notin%Amount_tau_less0.5)%>%
  separate(timing, into = c("drag_time", "drop_time"), sep = ", ", convert = TRUE)%>%
  mutate(DD_diff=drop_time-drag_time,
         condition="Amount")%>%
  select(step,ResponseId,condition,item.f,drag_time,drop_time,DD_diff,current_49:current_68)

duplicated.n<-nrow(TimeAnalysis.Amount)
item<-c("Pr6_Amt1","Pr5_Amt2", "Pr4_Amt3","Pr3_Amt4", "Pr2_Amt5","Pr1_Amt6")


TimeAnalysis.Amount <- TimeAnalysis.Amount %>%
  uncount(weights = 6) 

TimeAnalysis.Amount$item.f<- rep(item, times = duplicated.n)
TimeAnalysis.Amount<-TimeAnalysis.Amount%>%
  mutate(current_rank=case_when(
    item.f=="Pr6_Amt1" ~ current_49,
    item.f=="Pr5_Amt2" ~ current_50,
    item.f=="Pr4_Amt3" ~ current_64,
    item.f=="Pr3_Amt4" ~ current_65,
    item.f=="Pr2_Amt5" ~ current_67,
    item.f=="Pr1_Amt6" ~ current_68
  ))%>%
  select(-c(current_49:current_68))

item_Amounts <- c(
  "Pr6_Amt1" = "#d73027",  # Strong red
  "Pr5_Amt2" = "#f46d43",  # Medium red
  "Pr4_Amt3" = "#fdae61",  # Light red (orange-ish)
  "Pr3_Amt4" = "#a6d96a",  # Light green
  "Pr2_Amt5" = "#66bd63",  # Medium green
  "Pr1_Amt6" = "#1a9850"   # Strong green
)

item_shapes <- c("Pr6_Amt1" = 21, "Pr5_Amt2" = 22, 
                 "Pr4_Amt3" = 23, "Pr3_Amt4" = 24, 
                 "Pr2_Amt5" = 25, "Pr1_Amt6" = 11)
# TimeAnalysis.Amount #one more step here to expand the dataset

TimeAnalysis.Amount.expand <- expand_grid(
  ResponseId = unique(TimeAnalysis.Amount$ResponseId),
  step = unique(TimeAnalysis.Amount$step),
  item.f = unique(TimeAnalysis.Amount$item.f)
) %>%
  left_join(
    TimeAnalysis.Amount %>% select(ResponseId, step, item.f, current_rank),
    by = c("ResponseId", "step", "item.f")
  ) %>%
  arrange(ResponseId, item.f, step) %>%
  group_by(ResponseId, item.f) %>%
  fill(current_rank, .direction = "down")%>%
  ungroup()


Summary.Amount <- TimeAnalysis.Amount.expand %>%
  group_by(step, item.f) %>%
  dplyr::summarize(mean.current_rank = mean(current_rank),
            sd.current_rank = sd(current_rank),
            n = n(),  
            se = sd.current_rank / sqrt(n),  
            .groups = "drop")



initial.rank<-touch_order_analysis.long_Amount%>%
  filter(ResponseId%notin%Amount_tau_less0.5)%>%
  group_by(item.f)%>%
  mutate(initial.rank=as.numeric(initial.rank))%>%
  dplyr::summarize(mean.current_rank = 7-mean(initial.rank),
            sd.current_rank = sd(initial.rank),
            n = n(),  
            se = sd.current_rank / sqrt(n),  
            .groups = "drop")%>%
  mutate(step=0)

Summary.Amount<-rbind(Summary.Amount,
                     initial.rank)

ggplot(Summary.Amount, aes(x = step, y = mean.current_rank, 
                          color = item.f,  shape = item.f)) +
  geom_line(size = 1) +  
  geom_point(size = 6, fill = "white") +  
  geom_errorbar(aes(ymin = mean.current_rank - se, ymax = mean.current_rank + se), 
                width = 0.3, size = 1.2, alpha = 0.8) + 
  scale_color_manual(values = item_Amounts) +  
  scale_shape_manual(values = item_shapes) +  
  labs(title = "Mean Rank by Step (Amount Task)",
       x = "Step",
       y = "Mean Rank",
       color = "Item",
       linetype = "Item",
       shape = "Item") +
  theme_minimal() +  # Clean theme
  theme(legend.position = "right",
    axis.title.x = element_text(face = "bold", size = 14),  # Bold x-axis label
    axis.title.y = element_text(face = "bold", size = 14),  # Bold y-axis label
    axis.text.x = element_text(face = "bold", size = 12),   # Bold x-axis text
    axis.text.y = element_text(face = "bold", size = 12)    # Bold y-axis text
  )+
  scale_y_continuous(breaks = 6:1) +
  scale_x_continuous(breaks = 0:8) 

Distance_Prob<-RankProcess_Prob %>%
  group_by(ResponseId)%>%
  mutate(
    # Split the string into parts based on commas
    parts = str_split(order, ",")
  ) %>%
  mutate(
    Rank1 = sapply(parts, function(x) x[1]),  # Extract before 1st comma
    Rank2 = sapply(parts, function(x) x[2]),  # Extract before 2nd comma
    Rank3 = sapply(parts, function(x) x[3]),  # Extract before 3rd comma
    Rank4 = sapply(parts, function(x) x[4]),  # Extract before 4th comma
    Rank5 = sapply(parts, function(x) x[5]),  # Extract before 5th comma
    Rank6 = sapply(parts, function(x) ifelse(length(x) > 5, x[6], NA))  # Extract after 5th comma
  ) %>%
  select(-parts)

items_Prob <- c("49", "50", "64", "65", "67", "68")
for (item in items_Prob) {
  Distance_Prob[[paste0("current_", item)]] <- NA_integer_
}


Distance_Prob <- Distance_Prob %>%
  rowwise() %>%
  mutate(
    across(
      starts_with("current_"),
      ~ {
        item_number <- str_remove(cur_column(), "current_")  # Extract the item number
        case_when(
          Rank1 == item_number ~ 1,
          Rank2 == item_number ~ 2,
          Rank3 == item_number ~ 3,
          Rank4 == item_number ~ 4,
          Rank5 == item_number ~ 5,
          Rank6 == item_number ~ 6,
          TRUE ~ 1 # Distance_A %>% mutate(NA_count = rowSums(is.na(select(., starts_with("current_"))))); this code somehow results in the first item always gets an NA, so manually fix this error
        )
      }
    )
  ) %>%
  ungroup()


for (item in items_Prob) {
  Distance_Prob[[paste0("last_", item)]] <- lag(Distance_Prob[[paste0("current_", item)]])
}


Distance_Prob<-Distance_Prob%>%
  group_by(ResponseId)%>%
  rowwise() %>%
  mutate(
    current_item_moved = get(paste0("current_", item_moved)),  # Get the rank of the moved item from current columns
    last_item_moved = get(paste0("last_", item_moved)),        # Get the rank of the moved item from last columns
    # Determine the movement direction; we should not see any "no_change"
    move_direction = case_when(
      is.na(last_item_moved) ~ "no_change",  
      current_item_moved < last_item_moved ~ "up",
      current_item_moved > last_item_moved ~ "down",
      TRUE ~ "no_change"
    )
  ) %>%
  ungroup()


Distance_Prob <- Distance_Prob %>%
  group_by(ResponseId)%>%
  filter(step!=0) # need to retain step 0 for steps that come before


TimeAnalysis.Prob<-Distance_Prob%>%
  filter(ResponseId%notin%Prob_tau_less0.5)%>%
  separate(timing, into = c("drag_time", "drop_time"), sep = ", ", convert = TRUE)%>%
  mutate(DD_diff=drop_time-drag_time,
         condition="Prob")%>%
  select(step,ResponseId,condition,item.f,drag_time,drop_time,DD_diff,current_49:current_68)

duplicated.n<-nrow(TimeAnalysis.Prob)
item<-c("Pr6_Amt1","Pr5_Amt2", "Pr4_Amt3","Pr3_Amt4", "Pr2_Amt5","Pr1_Amt6")

TimeAnalysis.Prob <- TimeAnalysis.Prob %>%
  uncount(weights = 6) 
TimeAnalysis.Prob$item.f<- rep(item, times = duplicated.n)

TimeAnalysis.Prob<-TimeAnalysis.Prob%>%
  mutate(current_rank=case_when(
    item.f=="Pr6_Amt1" ~ current_49,
    item.f=="Pr5_Amt2" ~ current_50,
    item.f=="Pr4_Amt3" ~ current_64,
    item.f=="Pr3_Amt4" ~ current_65,
    item.f=="Pr2_Amt5" ~ current_67,
    item.f=="Pr1_Amt6" ~ current_68
  ))%>%
  select(-c(current_49:current_68))

item_Amounts <- c(
  "Pr6_Amt1" = "#d73027",  # Strong red
  "Pr5_Amt2" = "#f46d43",  # Medium red
  "Pr4_Amt3" = "#fdae61",  # Light red (orange-ish)
  "Pr3_Amt4" = "#a6d96a",  # Light green
  "Pr2_Amt5" = "#66bd63",  # Medium green
  "Pr1_Amt6" = "#1a9850"   # Strong green
)

item_shapes <- c("Pr6_Amt1" = 21, "Pr5_Amt2" = 22, 
                 "Pr4_Amt3" = 23, "Pr3_Amt4" = 24, 
                 "Pr2_Amt5" = 25, "Pr1_Amt6" = 11)

TimeAnalysis.Prob.expand <- expand_grid(
  ResponseId = unique(TimeAnalysis.Prob$ResponseId),
  step = unique(TimeAnalysis.Prob$step),
  item.f = unique(TimeAnalysis.Prob$item.f)
) %>%
  left_join(
    TimeAnalysis.Prob %>% select(ResponseId, step, item.f, current_rank),
    by = c("ResponseId", "step", "item.f")
  ) %>%
  arrange(ResponseId, item.f, step) %>%
  group_by(ResponseId, item.f) %>%
  fill(current_rank, .direction = "down")%>%
  ungroup()


Summary.Prob <- TimeAnalysis.Prob.expand %>%
  group_by(step, item.f) %>%
  dplyr::summarize(mean.current_rank = mean(current_rank),
            sd.current_rank = sd(current_rank),
            n = n(),  
            se = sd.current_rank / sqrt(n),  
            .groups = "drop")


initial.rank<-touch_order_analysis.long_Prob%>%
  filter(ResponseId%notin%Prob_tau_less0.5)%>%
  group_by(item.f)%>%
  mutate(initial.rank=as.numeric(initial.rank))%>%
  dplyr::summarize(mean.current_rank = 7- mean(initial.rank),
            sd.current_rank = sd(initial.rank),
            n = n(),  
            se = sd.current_rank / sqrt(n),  
            .groups = "drop")%>%
  mutate(step=0)

Summary.Prob<-rbind(Summary.Prob,
                     initial.rank)

ggplot(Summary.Prob, aes(x = step, y = mean.current_rank, 
                          color = item.f,  shape = item.f)) +
  geom_line(size = 1) +  
  geom_point(size = 6, fill = "white") +  
  geom_errorbar(aes(ymin = mean.current_rank - se, ymax = mean.current_rank + se), 
                width = 0.3, size = 1.2, alpha = 0.8) + 
  scale_color_manual(values = item_Amounts) +  
  scale_shape_manual(values = item_shapes) +  
  labs(title = "Mean Rank by Step (Prob Task)",
       x = "Step",
       y = "Mean Rank",
       color = "Item",
       linetype = "Item",
       shape = "Item") +
  theme_minimal() +  # Clean theme
  theme(legend.position = "right",
    axis.title.x = element_text(face = "bold", size = 14),  # Bold x-axis label
    axis.title.y = element_text(face = "bold", size = 14),  # Bold y-axis label
    axis.text.x = element_text(face = "bold", size = 12),   # Bold x-axis text
    axis.text.y = element_text(face = "bold", size = 12)    # Bold y-axis text
  )+
  scale_y_continuous(breaks = 6:1) +
  scale_x_continuous(breaks = 0:10) 

Correlation between 3F Measures

This only concerns the ranking-by-preference tasks

Preference 1 Ranking

# Display correlation matrix
print(round(cor(master_df[, c("set1_touch_count", "set1_order", "set1_drag_distance.r")], 
                use = "pairwise.complete.obs"), 2))
##                      set1_touch_count set1_order set1_drag_distance.r
## set1_touch_count                 1.00      -0.48                 0.51
## set1_order                      -0.48       1.00                -0.47
## set1_drag_distance.r             0.51      -0.47                 1.00
# Create SPLOM
ggpairs(master_df, 
        columns = c("set1_touch_count", "set1_order", "set1_drag_distance.r"),
        lower = list(continuous = "smooth"), 
        diag = list(continuous = "densityDiag"), 
        upper = list(continuous = "cor")) +
  theme_minimal()

Preference 2 Ranking

# Display correlation matrix
print(round(cor(master_df[, c("set2_touch_count", "set2_order", "set2_drag_distance.r")], 
                use = "pairwise.complete.obs"), 2))
##                      set2_touch_count set2_order set2_drag_distance.r
## set2_touch_count                 1.00      -0.49                 0.54
## set2_order                      -0.49       1.00                -0.51
## set2_drag_distance.r             0.54      -0.51                 1.00
# Create SPLOM
ggpairs(master_df, 
        columns = c("set2_touch_count", "set2_order", "set2_drag_distance.r"),
        lower = list(continuous = "smooth"), 
        diag = list(continuous = "densityDiag"), 
        upper = list(continuous = "cor")) +
  theme_minimal()

DV: Subjective Rank

Regress the subjective ranks on the amount and probability tasks.

Amount

amt_rob <- lm_robust(amt_subj_rank ~ set1_amt.c + set1_prob.c, data = master_df, cluster = ResponseId, se_type = "stata")

amt_lmer <- lmer(amt_subj_rank ~ set1_amt.c + set1_prob.c + (1 | ResponseId), data = master_df)

amt_clmm <- clmm(factor(amt_subj_rank) ~ set1_amt.c + set1_prob.c + (1 | ResponseId), data = master_df)

tab_model(amt_rob, amt_lmer, amt_clmm,
          show.se = T,
          show.ci = F,
          show.stat = T,
          pred.labels =
            c("Intercept",
              "Amount (centered)",
              "Probability (centered)",
              "1|2","2|3", "3|4", "4|5", "5|6"),
          dv.labels = 
            c("DV = Amount (subj. Rank)<br>lm_robust",
              "DV = Amount (subj. Rank)<br>linear, repeated",
              "DV = Amount (subj. Rank)<br>ordinal, repeated"))
  DV = Amount (subj. Rank)
lm_robust
DV = Amount (subj. Rank)
linear, repeated
DV = Amount (subj. Rank)
ordinal, repeated
Predictors Estimates std. Error Statistic p Estimates std. Error Statistic p Odds Ratios std. Error Statistic p
Intercept 3.50 0.01 502.70 <0.001 3.50 0.01 255.13 <0.001
Amount (centered) 0.79 0.03 31.40 <0.001 0.79 0.02 40.52 <0.001 55.33 NaN NaN NaN
Probability (centered) -0.98 0.02 -47.78 <0.001 -0.98 0.02 -50.24 <0.001 0.00 NaN NaN NaN
1|2 0.00 NaN NaN NaN
2|3 0.03 NaN NaN NaN
3|4 5.34 NaN NaN NaN
4|5 895.14 NaN NaN NaN
5|6 204562.15 NaN NaN NaN
Random Effects
σ2   0.22 3.29
τ00   0.00 ResponseId 0.00 ResponseId
N   194 ResponseId 194 ResponseId
Observations 1164 1164 1164
R2 / R2 adjusted 0.925 / 0.925 0.925 / NA 0.978 / NA

Probability

prob_rob <- lm_robust(prob_subj_rank ~ set1_amt.c + set1_prob.c, data = master_df, cluster = ResponseId, se_type = "stata")

prob_lmer <- lmer(prob_subj_rank ~ set1_amt.c + set1_prob.c + (1 | ResponseId), data = master_df)

prob_clmm <- clmm(factor(prob_subj_rank) ~ set1_amt.c + set1_prob.c + (1 | ResponseId), data = master_df)

tab_model(prob_rob, prob_lmer, prob_clmm,
          show.se = T,
          show.ci = F,
          show.stat = T,
          pred.labels =
            c("Intercept",
              "Amount (centered)",
              "Probability (centered)",
              "1|2","2|3", "3|4", "4|5", "5|6"),
          dv.labels = 
            c("DV = Prob (subj. Rank)<br>lm_robust",
              "DV = Prob (subj. Rank)<br>linear, repeated",
              "DV = Prob (subj. Rank)<br>ordinal, repeated"))
  DV = Prob (subj. Rank)
lm_robust
DV = Prob (subj. Rank)
linear, repeated
DV = Prob (subj. Rank)
ordinal, repeated
Predictors Estimates std. Error Statistic p Estimates std. Error Statistic p Odds Ratios std. Error Statistic p
Intercept 3.50 0.01 568.20 <0.001 3.50 0.03 122.35 <0.001
Amount (centered) -0.75 0.04 -20.34 <0.001 -0.75 0.04 -18.34 <0.001 0.08 NaN NaN NaN
Probability (centered) 0.77 0.05 15.27 <0.001 0.77 0.04 18.85 <0.001 10.31 NaN NaN NaN
1|2 0.01 NaN NaN NaN
2|3 0.13 NaN NaN NaN
3|4 1.07 NaN NaN NaN
4|5 8.71 NaN NaN NaN
5|6 152.85 NaN NaN NaN
Random Effects
σ2   0.95 3.29
τ00   0.00 ResponseId 0.00 ResponseId
N   194 ResponseId 194 ResponseId
Observations 1164 1164 1164
R2 / R2 adjusted 0.674 / 0.674 0.674 / NA 0.859 / NA

Ranking by Preference Analysis

Descriptives - 1ST Preference Task

rank_data <- dat %>%
  select(
    `49` = rank_pref1_49,
    `50` = rank_pref1_50,
    `64` = rank_pref1_64,
    `65` = rank_pref1_65,
    `67` = rank_pref1_67,
    `68` = rank_pref1_68
  ) %>%
  pivot_longer(
    everything(),
    names_to = "item_moved",
    values_to = "Subj_rank"
  ) %>%
  mutate(
    item_moved = as.integer(item_moved),
    item_label = case_when(
      item_moved == 49 ~ "Pr6_Amt1",
      item_moved == 50 ~ "Pr5_Amt2",
      item_moved == 64 ~ "Pr4_Amt3",
      item_moved == 65 ~ "Pr3_Amt4",
      item_moved == 67 ~ "Pr2_Amt5",
      item_moved == 68 ~ "Pr1_Amt6"
    ),
    item_label = factor(item_label, levels = c("Pr6_Amt1", "Pr5_Amt2", "Pr4_Amt3", "Pr3_Amt4", "Pr2_Amt5", "Pr1_Amt6"))
  )


ggplot(rank_data, aes(x = item_label, y = Subj_rank)) +  # reverse coded so 6 = top rank
  geom_violin(trim = FALSE, fill = "lightblue", color = "darkblue", alpha = 0.5) +
  geom_jitter(width = 0.15, height = 0, alpha = 0.6, size = 1.5, color = "black") +
  stat_summary(fun = mean, geom = "point", shape = 21, size = 3, fill = "red", color = "red") +
  labs(
    title = "Distribution of pref1 Ranks by Item",
    x = "Item",
    y = "Rank"
  ) +
  scale_y_continuous(breaks = 1:6, limits = c(1, 6)) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

What’s the distribution of final ranks?

# Step 1: Define the mapping
id_to_label <- c(
  "49" = "Pr6_Amt1",
  "50" = "Pr5_Amt2",
  "64" = "Pr4_Amt3",
  "65" = "Pr3_Amt4",
  "67" = "Pr2_Amt5",
  "68" = "Pr1_Amt6"
)

# Step 2: Select relevant columns and count number of 1s
rank_counts1 <- dat %>%
  select(starts_with("rank_pref1_")) %>%
  summarise(across(everything(), ~ sum(. == 1, na.rm = TRUE))) %>%
  pivot_longer(cols = everything(), names_to = "column", values_to = "n_order_1") %>%
  mutate(
    id = sub("rank_pref1_", "", column),
    item.f = id_to_label[id]
  ) %>%
  filter(!is.na(item.f))  # Only keep mapped items


# Step 3: Plot
ggplot(rank_counts1, aes(x = item.f, y = n_order_1, fill = item.f)) +
  geom_col() +
  scale_fill_manual(values = bet_colors) +
  labs(
    title = "Count of Participants ranking the\nItems on first Position (Preference Task 1)",
    x = "Item",
    y = "Count (Rank = 1)"
  ) +
  theme_minimal() +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1),
    plot.title = element_text(face = "bold", hjust = 0.5),
    legend.position = "none"
  )

How many people touched which lottery first?

order1_counts <- touch_order_analysis.long_pref1 %>%
  filter(order == 1) %>%            # Keep only rows where order == 1
  count(item.f, name = "n_order_1") # Count how many times each item.f appears

# Plot
ggplot(order1_counts, aes(x = item.f, y = n_order_1, fill = item.f)) +
  geom_col() +
  scale_fill_manual(values = bet_colors) +
  labs(
    title = "Count of Participants touching\neach Item first (Preference Task 1)",
    x = "Item",
    y = "Count (Order = 1)"
  ) +
  theme_minimal() +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1),
    plot.title = element_text(face = "bold", hjust = 0.5),
    legend.position = "none"
  )

How do the ranking Profiles for these people look like?

# Only consider participants in the same "category", i.e., who arrived at the same final ranking. Let's look at the ones who ranked the highest amount/probability highest

# Only consider participants who ranked the highest Amount first
selected_ids_Pr1_Amt6 <- dat %>%
  filter(rank_pref1_68 == 1) %>%
  pull(ResponseId)

# Only consider participants who ranked the highest Probability first
selected_ids_Pr6_Amt1 <- dat %>%
  filter(rank_pref1_49 == 1) %>%
  pull(ResponseId)

# Remaining IDs
excluded_ids <- union(selected_ids_Pr1_Amt6, selected_ids_Pr6_Amt1)
remaining_ids <- dat %>%
  filter(!ResponseId %in% excluded_ids) %>%
  pull(ResponseId)


# Create Distance_Prefer2
Distance_Prefer1<-RankProcess_Prefer1 %>%
  group_by(ResponseId)%>%
  mutate(
    # Split the string into parts based on commas
    parts = str_split(order, ",")
  ) %>%
  mutate(
    Rank1 = sapply(parts, function(x) x[1]),  # Extract before 1st comma
    Rank2 = sapply(parts, function(x) x[2]),  # Extract before 2nd comma
    Rank3 = sapply(parts, function(x) x[3]),  # Extract before 3rd comma
    Rank4 = sapply(parts, function(x) x[4]),  # Extract before 4th comma
    Rank5 = sapply(parts, function(x) x[5]),  # Extract before 5th comma
    Rank6 = sapply(parts, function(x) ifelse(length(x) > 5, x[6], NA))  # Extract after 5th comma
  ) %>%
  select(-parts)

items_Prefer1 <- c("49", "50", "64", "65", "67", "68")
for (item in items_Prefer1) {
  Distance_Prefer1[[paste0("current_", item)]] <- NA_integer_
}

Distance_Prefer1 <- Distance_Prefer1 %>%
  rowwise() %>%
  mutate(
    across(
      starts_with("current_"),
      ~ {
        item_number <- str_remove(cur_column(), "current_")  # Extract the item number
        case_when(
          Rank1 == item_number ~ 1,
          Rank2 == item_number ~ 2,
          Rank3 == item_number ~ 3,
          Rank4 == item_number ~ 4,
          Rank5 == item_number ~ 5,
          Rank6 == item_number ~ 6,
          TRUE ~ 1 
        )
      }
    )
  ) %>%
  ungroup()

for (item in items_Prefer1) {
  Distance_Prefer1[[paste0("last_", item)]] <- lag(Distance_Prefer1[[paste0("current_", item)]])
}

Distance_Prefer1<-Distance_Prefer1%>%
  group_by(ResponseId)%>%
  rowwise() %>%
  mutate(
    current_item_moved = get(paste0("current_", item_moved)),  # Get the rank of the moved item from current columns
    last_item_moved = get(paste0("last_", item_moved)),        # Get the rank of the moved item from last columns
    # Determine the movement direction; we should not see any "no_change"
    move_direction = case_when(
      is.na(last_item_moved) ~ "no_change",  
      current_item_moved < last_item_moved ~ "up",
      current_item_moved > last_item_moved ~ "down",
      TRUE ~ "no_change"
    )
  ) %>%
  ungroup()


# Filter Distance_Prefer2 for those ResponseIds
Ranking_per_Step_Pr1_Amt6 <- Distance_Prefer1 %>%
  filter(ResponseId %in% selected_ids_Pr1_Amt6)

Ranking_per_Step_Pr6_Amt1 <- Distance_Prefer1 %>%
  filter(ResponseId %in% selected_ids_Pr6_Amt1)

Ranking_per_Step_remaining <- Distance_Prefer1 %>%
  filter(ResponseId %in% remaining_ids)


## Create new R columns
# Initialize R columns
Ranking_per_Step_Pr1_Amt6 <- Ranking_per_Step_Pr1_Amt6 %>% 
  mutate(Pr1_Amt6 = NA, Pr2_Amt5 = NA, Pr3_Amt4 = NA, Pr4_Amt3 = NA, Pr5_Amt2 = NA, Pr6_Amt1 = NA)

Ranking_per_Step_Pr6_Amt1 <- Ranking_per_Step_Pr6_Amt1 %>% 
  mutate(Pr1_Amt6 = NA, Pr2_Amt5 = NA, Pr3_Amt4 = NA, Pr4_Amt3 = NA, Pr5_Amt2 = NA, Pr6_Amt1 = NA)

Ranking_per_Step_remaining <- Ranking_per_Step_remaining %>% 
  mutate(Pr1_Amt6 = NA, Pr2_Amt5 = NA, Pr3_Amt4 = NA, Pr4_Amt3 = NA, Pr5_Amt2 = NA, Pr6_Amt1 = NA)

# Function to fill R columns
fill_R_columns <- function(order_string, target_value) {
  order_string <- gsub("[[:space:][:cntrl:]]", "", order_string)  # Clean spaces and newlines
  order_numbers <- strsplit(order_string, ",")[[1]]
  position_of_value <- which(order_numbers == as.character(target_value))
  
  if (length(position_of_value) == 0) {
    return(NA)
  } else if (position_of_value == 1) {
    return(1)
  } else {
    return(position_of_value)
  }
}

# Apply function to fill R columns
Ranking_per_Step_Pr1_Amt6 <- Ranking_per_Step_Pr1_Amt6 %>% 
  mutate(
    Pr6_Amt1 = sapply(order, fill_R_columns, target_value = 49),
    Pr5_Amt2 = sapply(order, fill_R_columns, target_value = 50),
    Pr4_Amt3 = sapply(order, fill_R_columns, target_value = 64),
    Pr3_Amt4 = sapply(order, fill_R_columns, target_value = 65),
    Pr2_Amt5 = sapply(order, fill_R_columns, target_value = 67),
    Pr1_Amt6 = sapply(order, fill_R_columns, target_value = 68),
  )

Ranking_per_Step_Pr6_Amt1 <- Ranking_per_Step_Pr6_Amt1 %>% 
  mutate(
    Pr6_Amt1 = sapply(order, fill_R_columns, target_value = 49),
    Pr5_Amt2 = sapply(order, fill_R_columns, target_value = 50),
    Pr4_Amt3 = sapply(order, fill_R_columns, target_value = 64),
    Pr3_Amt4 = sapply(order, fill_R_columns, target_value = 65),
    Pr2_Amt5 = sapply(order, fill_R_columns, target_value = 67),
    Pr1_Amt6 = sapply(order, fill_R_columns, target_value = 68),
  )

Ranking_per_Step_remaining <- Ranking_per_Step_remaining %>% 
  mutate(
    Pr6_Amt1 = sapply(order, fill_R_columns, target_value = 49),
    Pr5_Amt2 = sapply(order, fill_R_columns, target_value = 50),
    Pr4_Amt3 = sapply(order, fill_R_columns, target_value = 64),
    Pr3_Amt4 = sapply(order, fill_R_columns, target_value = 65),
    Pr2_Amt5 = sapply(order, fill_R_columns, target_value = 67),
    Pr1_Amt6 = sapply(order, fill_R_columns, target_value = 68),
  )


# Ensure at least 9 rows per ResponseId
Ranking_per_Step_Pr1_Amt6 <- Ranking_per_Step_Pr1_Amt6 %>% group_by(ResponseId) %>% 
  group_modify(~ {
    while (nrow(.x) < 10) {
      max_step_row <- .x %>% filter(step == max(step))
      .x <- bind_rows(.x, max_step_row %>% mutate(step = max(step) + 1))
    }
    return(.x)
  }) %>% ungroup()

Ranking_per_Step_Pr6_Amt1 <- Ranking_per_Step_Pr6_Amt1 %>% group_by(ResponseId) %>% 
  group_modify(~ {
    while (nrow(.x) < 10) {
      max_step_row <- .x %>% filter(step == max(step))
      .x <- bind_rows(.x, max_step_row %>% mutate(step = max(step) + 1))
    }
    return(.x)
  }) %>% ungroup()

Ranking_per_Step_remaining <- Ranking_per_Step_remaining %>% group_by(ResponseId) %>% 
  group_modify(~ {
    while (nrow(.x) < 8) {
      max_step_row <- .x %>% filter(step == max(step))
      .x <- bind_rows(.x, max_step_row %>% mutate(step = max(step) + 1))
    }
    return(.x)
  }) %>% ungroup()


## Summary DFs
summary_ranks_step_Pr1_Amt6 <- Ranking_per_Step_Pr1_Amt6 %>%
  filter(step < 10) %>%
  group_by(step) %>%
  dplyr::summarise(
    mean_Pr6_Amt1 = mean(Pr6_Amt1, na.rm = TRUE),
    se_Pr6_Amt1 = sd(Pr6_Amt1, na.rm = TRUE) / sqrt(n()),

    mean_Pr5_Amt2 = mean(Pr5_Amt2, na.rm = TRUE),
    se_Pr5_Amt2 = sd(Pr5_Amt2, na.rm = TRUE) / sqrt(n()),
    
    mean_Pr4_Amt3 = mean(Pr4_Amt3, na.rm = TRUE),
    se_Pr4_Amt3 = sd(Pr4_Amt3, na.rm = TRUE) / sqrt(n()),
    
    mean_Pr3_Amt4 = mean(Pr3_Amt4, na.rm = TRUE),
    se_Pr3_Amt4 = sd(Pr3_Amt4, na.rm = TRUE) / sqrt(n()),
    
    mean_Pr2_Amt5 = mean(Pr2_Amt5, na.rm = TRUE),
    se_Pr2_Amt5 = sd(Pr2_Amt5, na.rm = TRUE) / sqrt(n()),
    
    mean_Pr1_Amt6 = mean(Pr1_Amt6, na.rm = TRUE),
    se_Pr1_Amt6 = sd(Pr1_Amt6, na.rm = TRUE) / sqrt(n()),
    
    .groups = "drop"
  )

summary_ranks_step_Pr6_Amt1 <- Ranking_per_Step_Pr6_Amt1 %>%
  filter(step < 10) %>%
  group_by(step) %>%
  dplyr::summarise(
    mean_Pr6_Amt1 = mean(Pr6_Amt1, na.rm = TRUE),
    se_Pr6_Amt1 = sd(Pr6_Amt1, na.rm = TRUE) / sqrt(n()),

    mean_Pr5_Amt2 = mean(Pr5_Amt2, na.rm = TRUE),
    se_Pr5_Amt2 = sd(Pr5_Amt2, na.rm = TRUE) / sqrt(n()),
    
    mean_Pr4_Amt3 = mean(Pr4_Amt3, na.rm = TRUE),
    se_Pr4_Amt3 = sd(Pr4_Amt3, na.rm = TRUE) / sqrt(n()),
    
    mean_Pr3_Amt4 = mean(Pr3_Amt4, na.rm = TRUE),
    se_Pr3_Amt4 = sd(Pr3_Amt4, na.rm = TRUE) / sqrt(n()),
    
    mean_Pr2_Amt5 = mean(Pr2_Amt5, na.rm = TRUE),
    se_Pr2_Amt5 = sd(Pr2_Amt5, na.rm = TRUE) / sqrt(n()),
    
    mean_Pr1_Amt6 = mean(Pr1_Amt6, na.rm = TRUE),
    se_Pr1_Amt6 = sd(Pr1_Amt6, na.rm = TRUE) / sqrt(n()),
    
    .groups = "drop"
  )

summary_ranks_step_remaining <- Ranking_per_Step_remaining %>%
  filter(step < 8) %>%
  group_by(step) %>%
  dplyr::summarise(
    mean_Pr6_Amt1 = mean(Pr6_Amt1, na.rm = TRUE),
    se_Pr6_Amt1 = sd(Pr6_Amt1, na.rm = TRUE) / sqrt(n()),

    mean_Pr5_Amt2 = mean(Pr5_Amt2, na.rm = TRUE),
    se_Pr5_Amt2 = sd(Pr5_Amt2, na.rm = TRUE) / sqrt(n()),
    
    mean_Pr4_Amt3 = mean(Pr4_Amt3, na.rm = TRUE),
    se_Pr4_Amt3 = sd(Pr4_Amt3, na.rm = TRUE) / sqrt(n()),
    
    mean_Pr3_Amt4 = mean(Pr3_Amt4, na.rm = TRUE),
    se_Pr3_Amt4 = sd(Pr3_Amt4, na.rm = TRUE) / sqrt(n()),
    
    mean_Pr2_Amt5 = mean(Pr2_Amt5, na.rm = TRUE),
    se_Pr2_Amt5 = sd(Pr2_Amt5, na.rm = TRUE) / sqrt(n()),
    
    mean_Pr1_Amt6 = mean(Pr1_Amt6, na.rm = TRUE),
    se_Pr1_Amt6 = sd(Pr1_Amt6, na.rm = TRUE) / sqrt(n()),
    
    .groups = "drop"
  )

## Long DFs
summary_ranks_step_Pr1_Amt6_long <- summary_ranks_step_Pr1_Amt6 %>%
  pivot_longer(
    cols = -step,
    names_to = c(".value", "label"),
    names_pattern = "^(mean|se)_(.*)$"
  ) %>%
  rename(
    mean_rank = mean,
    mean_se = se
  )

summary_ranks_step_Pr6_Amt1_long <- summary_ranks_step_Pr6_Amt1 %>%
  pivot_longer(
    cols = -step,
    names_to = c(".value", "label"),
    names_pattern = "^(mean|se)_(.*)$"
  ) %>%
  rename(
    mean_rank = mean,
    mean_se = se
  )

summary_ranks_step_remaining_long <- summary_ranks_step_remaining %>%
  pivot_longer(
    cols = -step,
    names_to = c(".value", "label"),
    names_pattern = "^(mean|se)_(.*)$"
  ) %>%
  rename(
    mean_rank = mean,
    mean_se = se
  )

# Shapes
bet_shapes <-  c("Pr6_Amt1" = 21, "Pr5_Amt2" = 22, "Pr4_Amt3" = 23, "Pr3_Amt4" = 24, "Pr2_Amt5" = 25, "Pr1_Amt6" = 11)

# Create plot
S_Bet_Profile1 <- ggplot(summary_ranks_step_Pr1_Amt6_long, 
                        aes(x = step, y = mean_rank)) +   
  geom_line(aes(color = label), size = 1) +
  geom_point(aes(fill = label, shape = label), size = 3) +
  geom_errorbar(aes(ymin = mean_rank - mean_se, ymax = mean_rank + mean_se), 
                width = 0.2, color = "black") +
  scale_color_manual(values = bet_colors) +
  scale_fill_manual(values = bet_colors) +
  scale_shape_manual(values = bet_shapes) +
  scale_x_continuous(breaks = 0:9) +
  scale_y_continuous(limits = c(1, 6), breaks = 1:6) +  
  labs(x = "Ranking Step", y = "Mean Rank", 
       title = "Mean of Ranks by Step - $-Bet Profile\n(Preference Task 1)", 
       color = "Label", shape = "Label", fill = "Label") +
  theme_minimal() +
  theme(legend.position = "bottom", 
        plot.title = element_text(hjust = 0.5))


P_Bet_Profile1 <- ggplot(summary_ranks_step_Pr6_Amt1_long, 
                        aes(x = step, y = mean_rank)) +   
  geom_line(aes(color = label), size = 1) +
  geom_point(aes(fill = label, shape = label), size = 3) +
  geom_errorbar(aes(ymin = mean_rank - mean_se, ymax = mean_rank + mean_se), 
                width = 0.2, color = "black") +
  scale_color_manual(values = bet_colors) +
  scale_fill_manual(values = bet_colors) +
  scale_shape_manual(values = bet_shapes) +
  scale_x_continuous(breaks = 0:9) +
  scale_y_continuous(limits = c(1, 6), breaks = 1:6) +  
  labs(x = "Ranking Step", y = "Mean Rank", 
       title = "Mean of Ranks by Step - P-Bet Profile\n(Preference Task 1)", 
       color = "Label", shape = "Label", fill = "Label") +
  theme_minimal() +
  theme(legend.position = "bottom", 
        plot.title = element_text(hjust = 0.5))


remaining_Profile1 <- ggplot(summary_ranks_step_remaining_long, 
                        aes(x = step, y = mean_rank)) +   
  geom_line(aes(color = label), size = 1) +
  geom_point(aes(fill = label, shape = label), size = 3) +
  geom_errorbar(aes(ymin = mean_rank - mean_se, ymax = mean_rank + mean_se), 
                width = 0.2, color = "black") +
  scale_color_manual(values = bet_colors) +
  scale_fill_manual(values = bet_colors) +
  scale_shape_manual(values = bet_shapes) +
  scale_x_continuous(breaks = 0:9) +
  scale_y_continuous(limits = c(1, 6), breaks = 1:6) +
  labs(x = "Ranking Step", y = "Mean Rank", 
       title = "Mean of Ranks by Step - Unclear Profile\n(Preference Task 1)", 
       color = "Label", shape = "Label", fill = "Label") +
  theme_minimal() +
  theme(legend.position = "bottom", 
        plot.title = element_text(hjust = 0.5))


# Suppress individual legends
S_Bet_Profile1 <- S_Bet_Profile1 + theme(legend.position = "bottom")
P_Bet_Profile1 <- P_Bet_Profile1 + theme(legend.position = "bottom")

# Combine the plots with shared legend
combined_plot1 <- S_Bet_Profile1 + P_Bet_Profile1 +
  plot_layout(guides = "collect") & 
  theme(legend.position = "bottom")

# Display combined plot
combined_plot1

remaining_Profile1

Distribution of Drag Count by item

The following set of histograms illustrates the number of times each item is touched.

drag_drop_counts_Prefer1 <- drag_and_drop_count_Prefer1_long %>%
  count(item.f,N) %>%
  group_by(item.f)%>%
  mutate(percentage = n / sum(n) * 100,
         condition="Amount")%>%
  ungroup()


ggplot(drag_drop_counts_Prefer1, aes(x = factor(N), y = n)) +
  geom_bar(
    stat = "identity",
    color = "black"
  ) +
  geom_text(
    aes(
      label = paste0(n, " (", round(percentage, 1), "%)")
    ),
    vjust = -0.5,
    size = 5,
    fontface="bold") +
  labs(
    title = "Drag Count by item and Quiz Condition",
    x = "Drag Count",
    y = "Frequency"
  ) +
  theme_minimal() +
  theme(
    strip.text = element_text(size = 12, face = "bold"),  # Increased size and bold text
    plot.title = element_text(hjust = 0.5),
    axis.title = element_text(size = 12),  # Adjust axis titles size if needed
    axis.text = element_text(size = 10)    # Adjust axis labels size if needed
  ) +
  facet_wrap(~ item.f,ncol=2) +
  ylim(0, 150)

Correlation with Attribute Values

Note on attribute rank coding: Across Amount and Prob Tasks, greater value indicates higher rank (i.e., 1=item in the bottom and 6=item at the top.)

summary_data_Prefer1 <- drag_and_drop_count_Prefer1_long%>%
  dplyr::group_by(condition, item.f) %>%
  dplyr::summarize(drag_mean = mean(N_ind, na.rm = TRUE),
            drag_sd = sd(N_ind, na.rm = TRUE),
            n = n(),
            se = drag_sd / sqrt(n),  # Standard error
            .groups = "drop",
            Avg.Amount=mean(Amt),
            Avg.Prob=mean(Prob))

ggplot(summary_data_Prefer1, aes(x = Avg.Amount, y = drag_mean, label = item.f)) +
  geom_point(size = 3, color = "black") +
  geom_text(vjust = -1, hjust = 1) +
  theme_minimal() +
  labs(title = "Drag Count and Amount Attribute", subtitle = "1ST Preference Task", x = "Avg. Amt", y = "Avg. Drag Count Indicator") +
  theme(axis.title = element_text(face = "bold"), 
        plot.subtitle = element_text(hjust = 0.5), 
        plot.title = element_text(face = "bold", hjust = 0.5))+
  geom_smooth(method = "lm", se = FALSE, color = "blue", linetype = "dashed") 

ggplot(summary_data_Prefer1, aes(x = Avg.Prob, y = drag_mean, label = item.f)) +
  geom_point(size = 3, color = "black") +
  geom_text(vjust = -1, hjust = 1) +
  theme_minimal() +
  labs(title = "Drag Count and Prob Attribute", subtitle = "1ST Preference Task", x = "Avg. Prob", y = "Avg. Drag Count Indicator") +
  theme(axis.title = element_text(face = "bold"), 
        plot.subtitle = element_text(hjust = 0.5), 
        plot.title = element_text(face = "bold", hjust = 0.5))+
  geom_smooth(method = "lm", se = FALSE, color = "blue", linetype = "dashed") 

Distribution of Drag Order by item

touch_order_pref1 <- touch_order_analysis.long_pref1 %>%
  count(item.f,order,condition) %>%
  group_by(item.f)%>%
  mutate(percentage = n / sum(n) * 100)%>%
  ungroup()

ggplot(touch_order_pref1, aes(x = factor(order), y = n)) +
  geom_bar(
    stat = "identity",
    color = "black"
  ) +
  geom_text(
    aes(
      label = paste0(n, " (", round(percentage, 1), "%)")
    ),
    vjust = -0.5,
    size = 5,
    fontface="bold"
  ) +
  labs(
    title = "Drag Order by item and Condition",
    x = "Drag Order",
    y = "Frequency"
  ) +
  theme_minimal() +
  theme(
    strip.text = element_text(size = 12, face = "bold"),  # Facet label adjustments
    plot.title = element_text(hjust = 0.5, face = "bold"),
    axis.title = element_text(size = 12),
    axis.text = element_text(size = 10)
  ) +
  facet_wrap(~ item.f ,ncol = 2) +
  ylim(0, 100)

Correlation with Attribute Values

summary_data_pref1 <- touch_order_analysis.long_pref1%>%
  dplyr::group_by(condition, item.f) %>%
  dplyr::summarize(order_mean = mean(order, na.rm = TRUE),
            order_sd = sd(order, na.rm = TRUE),
            n = n(),
            se = order_sd / sqrt(n),  # Standard error
            .groups = "drop",
            Avg.Amount=mean(Amt),
            Avg.Prob=mean(Prob))

ggplot(summary_data_pref1, aes(x = Avg.Amount, y = order_mean, label = item.f)) +
  geom_point(size = 3, color = "black") +
  geom_text(vjust = -1, hjust = 1) +
  theme_minimal() +
  labs(title = "Drag Order and Amount Attribute", subtitle = "1ST Preference Task", x = "Avg. Amt", y = "Avg. Drag Order") +
  theme(axis.title = element_text(face = "bold"),
        plot.subtitle = element_text(hjust = 0.5),
        plot.title = element_text(face = "bold", hjust = 0.5))+
  geom_smooth(method = "lm", se = FALSE, color = "blue", linetype = "dashed")

ggplot(summary_data_pref1, aes(x = Avg.Prob, y = order_mean, label = item.f)) +
  geom_point(size = 3, color = "black") +
  geom_text(vjust = -1, hjust = 1) +
  theme_minimal() +
  labs(title = "Drag Order and Prob Attribute", subtitle = "1ST Preference Task", x = "Avg. Prob", y = "Avg. Drag Order") +
  theme(axis.title = element_text(face = "bold"),
        plot.subtitle = element_text(hjust = 0.5),
        plot.title = element_text(face = "bold", hjust = 0.5))+
  geom_smooth(method = "lm", se = FALSE, color = "blue", linetype = "dashed") 

Distribution of Drag Distance

summary_stats <- Distance_pref1.cleanup.df %>%
  group_by(item.f) %>%
  dplyr::summarize(
    mean_distance = mean(distance, na.rm = TRUE),
    median_distance = median(distance, na.rm = TRUE)
  )

Distance_pref1.cleanup.df$item.f<- factor(Distance_pref2.cleanup.df$item.f, levels = rev(c(  "Pr6_Amt1","Pr5_Amt2", "Pr4_Amt3","Pr3_Amt4", "Pr2_Amt5","Pr1_Amt6")), ordered = TRUE)



ggplot(Distance_pref1.cleanup.df ,
       aes(x = -distance, fill = item.f)) +
  geom_histogram(binwidth = 1, alpha = 0.3, position = "identity") +
  labs(
    title = "Distribution of Drag Distance - pref1 Task",
    x = "Distance",
    y = "Count",
    fill = "item"
  ) +
  theme_minimal()+
  facet_grid(~item.f)+
  xlim(6,-6)+
    scale_fill_manual(values = bet_colors)

Descriptives - 2ND Preference Task

rank_data2 <- dat %>%
  select(
    `49` = rank_pref2_49,
    `50` = rank_pref2_50,
    `64` = rank_pref2_64,
    `65` = rank_pref2_65,
    `67` = rank_pref2_67,
    `68` = rank_pref2_68
  ) %>%
  pivot_longer(
    everything(),
    names_to = "item_moved",
    values_to = "Subj_rank"
  ) %>%
  mutate(
    item_moved = as.integer(item_moved),
    item_label = case_when(
      item_moved == 49 ~ "Pr6_Amt1",
      item_moved == 50 ~ "Pr5_Amt2",
      item_moved == 64 ~ "Pr4_Amt3",
      item_moved == 65 ~ "Pr3_Amt4",
      item_moved == 67 ~ "Pr2_Amt5",
      item_moved == 68 ~ "Pr1_Amt6"
    ),
    item_label = factor(item_label, levels = c("Pr6_Amt1", "Pr5_Amt2", "Pr4_Amt3", "Pr3_Amt4", "Pr2_Amt5", "Pr1_Amt6"))
  )


ggplot(rank_data2, aes(x = item_label, y = Subj_rank)) +  # reverse coded so 6 = top rank
  geom_violin(trim = FALSE, fill = "lightblue", color = "darkblue", alpha = 0.5) +
  geom_jitter(width = 0.15, height = 0, alpha = 0.6, size = 1.5, color = "black") +
  stat_summary(fun = mean, geom = "point", shape = 21, size = 3, fill = "red", color = "red") +
  labs(
    title = "Distribution of pref2 Ranks by Item",
    x = "Item",
    y = "Rank"
  ) +
  scale_y_continuous(breaks = 1:6, limits = c(1, 6)) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

What’s the distribution of final ranks?

# Step 1: Define the mapping
id_to_label <- c(
  "49" = "Pr6_Amt1",
  "50" = "Pr5_Amt2",
  "64" = "Pr4_Amt3",
  "65" = "Pr3_Amt4",
  "67" = "Pr2_Amt5",
  "68" = "Pr1_Amt6"
)

# Step 2: Select relevant columns and count number of 1s
rank_counts <- dat %>%
  select(starts_with("rank_pref2_")) %>%
  summarise(across(everything(), ~ sum(. == 1, na.rm = TRUE))) %>%
  pivot_longer(cols = everything(), names_to = "column", values_to = "n_order_1") %>%
  mutate(
    id = sub("rank_pref2_", "", column),
    item.f = id_to_label[id]
  ) %>%
  filter(!is.na(item.f))  # Only keep mapped items


# Step 3: Plot
ggplot(rank_counts, aes(x = item.f, y = n_order_1, fill = item.f)) +
  geom_col() +
  scale_fill_manual(values = bet_colors) +
  labs(
    title = "Count of Participants ranking the\nItems on first Position",
    x = "Item",
    y = "Count (Rank = 1)"
  ) +
  theme_minimal() +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1),
    plot.title = element_text(face = "bold", hjust = 0.5),
    legend.position = "none"
  )

How many people touched which lottery first?

order1_counts <- touch_order_analysis.long_pref2 %>%
  filter(order == 1) %>%            # Keep only rows where order == 1
  count(item.f, name = "n_order_1") # Count how many times each item.f appears

# Plot
ggplot(order1_counts, aes(x = item.f, y = n_order_1, fill = item.f)) +
  geom_col() +
  scale_fill_manual(values = bet_colors) +
  labs(
    title = "Count of Participants touching\neach Item first",
    x = "Item",
    y = "Count (Order = 1)"
  ) +
  theme_minimal() +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1),
    plot.title = element_text(face = "bold", hjust = 0.5),
    legend.position = "none"
  )

How do the ranking Profiles for these people look like?

# Only consider participants in the same "category", i.e., who arrived at the same final ranking. Let's look at the ones who ranked the highest amount/probability highest

# Only consider participants who ranked the highest Amount first
selected_ids_Pr1_Amt6 <- dat %>%
  filter(rank_pref2_68 == 1) %>%
  pull(ResponseId)

# Only consider participants who ranked the highest Probability first
selected_ids_Pr6_Amt1 <- dat %>%
  filter(rank_pref2_49 == 1) %>%
  pull(ResponseId)

# Remaining IDs
excluded_ids <- union(selected_ids_Pr1_Amt6, selected_ids_Pr6_Amt1)
remaining_ids <- dat %>%
  filter(!ResponseId %in% excluded_ids) %>%
  pull(ResponseId)


# Create Distance_Prefer2
Distance_Prefer2<-RankProcess_Prefer2 %>%
  group_by(ResponseId)%>%
  mutate(
    # Split the string into parts based on commas
    parts = str_split(order, ",")
  ) %>%
  mutate(
    Rank1 = sapply(parts, function(x) x[1]),  # Extract before 1st comma
    Rank2 = sapply(parts, function(x) x[2]),  # Extract before 2nd comma
    Rank3 = sapply(parts, function(x) x[3]),  # Extract before 3rd comma
    Rank4 = sapply(parts, function(x) x[4]),  # Extract before 4th comma
    Rank5 = sapply(parts, function(x) x[5]),  # Extract before 5th comma
    Rank6 = sapply(parts, function(x) ifelse(length(x) > 5, x[6], NA))  # Extract after 5th comma
  ) %>%
  select(-parts)

items_Prefer2 <- c("49", "50", "64", "65", "67", "68")
for (item in items_Prefer2) {
  Distance_Prefer2[[paste0("current_", item)]] <- NA_integer_
}

Distance_Prefer2 <- Distance_Prefer2 %>%
  rowwise() %>%
  mutate(
    across(
      starts_with("current_"),
      ~ {
        item_number <- str_remove(cur_column(), "current_")  # Extract the item number
        case_when(
          Rank1 == item_number ~ 1,
          Rank2 == item_number ~ 2,
          Rank3 == item_number ~ 3,
          Rank4 == item_number ~ 4,
          Rank5 == item_number ~ 5,
          Rank6 == item_number ~ 6,
          TRUE ~ 1 
        )
      }
    )
  ) %>%
  ungroup()

for (item in items_Prefer2) {
  Distance_Prefer2[[paste0("last_", item)]] <- lag(Distance_Prefer2[[paste0("current_", item)]])
}

Distance_Prefer2<-Distance_Prefer2%>%
  group_by(ResponseId)%>%
  rowwise() %>%
  mutate(
    current_item_moved = get(paste0("current_", item_moved)),  # Get the rank of the moved item from current columns
    last_item_moved = get(paste0("last_", item_moved)),        # Get the rank of the moved item from last columns
    # Determine the movement direction; we should not see any "no_change"
    move_direction = case_when(
      is.na(last_item_moved) ~ "no_change",  
      current_item_moved < last_item_moved ~ "up",
      current_item_moved > last_item_moved ~ "down",
      TRUE ~ "no_change"
    )
  ) %>%
  ungroup()


# Filter Distance_Prefer2 for those ResponseIds
Ranking_per_Step_Pr1_Amt6 <- Distance_Prefer2 %>%
  filter(ResponseId %in% selected_ids_Pr1_Amt6)

Ranking_per_Step_Pr6_Amt1 <- Distance_Prefer2 %>%
  filter(ResponseId %in% selected_ids_Pr6_Amt1)

Ranking_per_Step_remaining <- Distance_Prefer2 %>%
  filter(ResponseId %in% remaining_ids)


## Create new R columns
# Initialize R columns
Ranking_per_Step_Pr1_Amt6 <- Ranking_per_Step_Pr1_Amt6 %>% 
  mutate(Pr1_Amt6 = NA, Pr2_Amt5 = NA, Pr3_Amt4 = NA, Pr4_Amt3 = NA, Pr5_Amt2 = NA, Pr6_Amt1 = NA)

Ranking_per_Step_Pr6_Amt1 <- Ranking_per_Step_Pr6_Amt1 %>% 
  mutate(Pr1_Amt6 = NA, Pr2_Amt5 = NA, Pr3_Amt4 = NA, Pr4_Amt3 = NA, Pr5_Amt2 = NA, Pr6_Amt1 = NA)

Ranking_per_Step_remaining <- Ranking_per_Step_remaining %>% 
  mutate(Pr1_Amt6 = NA, Pr2_Amt5 = NA, Pr3_Amt4 = NA, Pr4_Amt3 = NA, Pr5_Amt2 = NA, Pr6_Amt1 = NA)

# Function to fill R columns
fill_R_columns <- function(order_string, target_value) {
  order_string <- gsub("[[:space:][:cntrl:]]", "", order_string)  # Clean spaces and newlines
  order_numbers <- strsplit(order_string, ",")[[1]]
  position_of_value <- which(order_numbers == as.character(target_value))
  
  if (length(position_of_value) == 0) {
    return(NA)
  } else if (position_of_value == 1) {
    return(1)
  } else {
    return(position_of_value)
  }
}

# Apply function to fill R columns
Ranking_per_Step_Pr1_Amt6 <- Ranking_per_Step_Pr1_Amt6 %>% 
  mutate(
    Pr6_Amt1 = sapply(order, fill_R_columns, target_value = 49),
    Pr5_Amt2 = sapply(order, fill_R_columns, target_value = 50),
    Pr4_Amt3 = sapply(order, fill_R_columns, target_value = 64),
    Pr3_Amt4 = sapply(order, fill_R_columns, target_value = 65),
    Pr2_Amt5 = sapply(order, fill_R_columns, target_value = 67),
    Pr1_Amt6 = sapply(order, fill_R_columns, target_value = 68),
  )

Ranking_per_Step_Pr6_Amt1 <- Ranking_per_Step_Pr6_Amt1 %>% 
  mutate(
    Pr6_Amt1 = sapply(order, fill_R_columns, target_value = 49),
    Pr5_Amt2 = sapply(order, fill_R_columns, target_value = 50),
    Pr4_Amt3 = sapply(order, fill_R_columns, target_value = 64),
    Pr3_Amt4 = sapply(order, fill_R_columns, target_value = 65),
    Pr2_Amt5 = sapply(order, fill_R_columns, target_value = 67),
    Pr1_Amt6 = sapply(order, fill_R_columns, target_value = 68),
  )

Ranking_per_Step_remaining <- Ranking_per_Step_remaining %>% 
  mutate(
    Pr6_Amt1 = sapply(order, fill_R_columns, target_value = 49),
    Pr5_Amt2 = sapply(order, fill_R_columns, target_value = 50),
    Pr4_Amt3 = sapply(order, fill_R_columns, target_value = 64),
    Pr3_Amt4 = sapply(order, fill_R_columns, target_value = 65),
    Pr2_Amt5 = sapply(order, fill_R_columns, target_value = 67),
    Pr1_Amt6 = sapply(order, fill_R_columns, target_value = 68),
  )




# Ensure at least 9 rows per ResponseId
Ranking_per_Step_Pr1_Amt6 <- Ranking_per_Step_Pr1_Amt6 %>% group_by(ResponseId) %>% 
  group_modify(~ {
    while (nrow(.x) < 10) {
      max_step_row <- .x %>% filter(step == max(step))
      .x <- bind_rows(.x, max_step_row %>% mutate(step = max(step) + 1))
    }
    return(.x)
  }) %>% ungroup()

Ranking_per_Step_Pr6_Amt1 <- Ranking_per_Step_Pr6_Amt1 %>% group_by(ResponseId) %>% 
  group_modify(~ {
    while (nrow(.x) < 10) {
      max_step_row <- .x %>% filter(step == max(step))
      .x <- bind_rows(.x, max_step_row %>% mutate(step = max(step) + 1))
    }
    return(.x)
  }) %>% ungroup()

Ranking_per_Step_remaining <- Ranking_per_Step_remaining %>% group_by(ResponseId) %>% 
  group_modify(~ {
    while (nrow(.x) < 8) {
      max_step_row <- .x %>% filter(step == max(step))
      .x <- bind_rows(.x, max_step_row %>% mutate(step = max(step) + 1))
    }
    return(.x)
  }) %>% ungroup()


## Summary DFs
summary_ranks_step_Pr1_Amt6 <- Ranking_per_Step_Pr1_Amt6 %>%
  filter(step < 10) %>%
  group_by(step) %>%
  dplyr::summarise(
    mean_Pr6_Amt1 = mean(Pr6_Amt1, na.rm = TRUE),
    se_Pr6_Amt1 = sd(Pr6_Amt1, na.rm = TRUE) / sqrt(n()),

    mean_Pr5_Amt2 = mean(Pr5_Amt2, na.rm = TRUE),
    se_Pr5_Amt2 = sd(Pr5_Amt2, na.rm = TRUE) / sqrt(n()),
    
    mean_Pr4_Amt3 = mean(Pr4_Amt3, na.rm = TRUE),
    se_Pr4_Amt3 = sd(Pr4_Amt3, na.rm = TRUE) / sqrt(n()),
    
    mean_Pr3_Amt4 = mean(Pr3_Amt4, na.rm = TRUE),
    se_Pr3_Amt4 = sd(Pr3_Amt4, na.rm = TRUE) / sqrt(n()),
    
    mean_Pr2_Amt5 = mean(Pr2_Amt5, na.rm = TRUE),
    se_Pr2_Amt5 = sd(Pr2_Amt5, na.rm = TRUE) / sqrt(n()),
    
    mean_Pr1_Amt6 = mean(Pr1_Amt6, na.rm = TRUE),
    se_Pr1_Amt6 = sd(Pr1_Amt6, na.rm = TRUE) / sqrt(n()),
    
    .groups = "drop"
  )

summary_ranks_step_Pr6_Amt1 <- Ranking_per_Step_Pr6_Amt1 %>%
  filter(step < 10) %>%
  group_by(step) %>%
  dplyr::summarise(
    mean_Pr6_Amt1 = mean(Pr6_Amt1, na.rm = TRUE),
    se_Pr6_Amt1 = sd(Pr6_Amt1, na.rm = TRUE) / sqrt(n()),

    mean_Pr5_Amt2 = mean(Pr5_Amt2, na.rm = TRUE),
    se_Pr5_Amt2 = sd(Pr5_Amt2, na.rm = TRUE) / sqrt(n()),
    
    mean_Pr4_Amt3 = mean(Pr4_Amt3, na.rm = TRUE),
    se_Pr4_Amt3 = sd(Pr4_Amt3, na.rm = TRUE) / sqrt(n()),
    
    mean_Pr3_Amt4 = mean(Pr3_Amt4, na.rm = TRUE),
    se_Pr3_Amt4 = sd(Pr3_Amt4, na.rm = TRUE) / sqrt(n()),
    
    mean_Pr2_Amt5 = mean(Pr2_Amt5, na.rm = TRUE),
    se_Pr2_Amt5 = sd(Pr2_Amt5, na.rm = TRUE) / sqrt(n()),
    
    mean_Pr1_Amt6 = mean(Pr1_Amt6, na.rm = TRUE),
    se_Pr1_Amt6 = sd(Pr1_Amt6, na.rm = TRUE) / sqrt(n()),
    
    .groups = "drop"
  )

summary_ranks_step_remaining <- Ranking_per_Step_remaining %>%
  filter(step < 8) %>%
  group_by(step) %>%
  dplyr::summarise(
    mean_Pr6_Amt1 = mean(Pr6_Amt1, na.rm = TRUE),
    se_Pr6_Amt1 = sd(Pr6_Amt1, na.rm = TRUE) / sqrt(n()),

    mean_Pr5_Amt2 = mean(Pr5_Amt2, na.rm = TRUE),
    se_Pr5_Amt2 = sd(Pr5_Amt2, na.rm = TRUE) / sqrt(n()),
    
    mean_Pr4_Amt3 = mean(Pr4_Amt3, na.rm = TRUE),
    se_Pr4_Amt3 = sd(Pr4_Amt3, na.rm = TRUE) / sqrt(n()),
    
    mean_Pr3_Amt4 = mean(Pr3_Amt4, na.rm = TRUE),
    se_Pr3_Amt4 = sd(Pr3_Amt4, na.rm = TRUE) / sqrt(n()),
    
    mean_Pr2_Amt5 = mean(Pr2_Amt5, na.rm = TRUE),
    se_Pr2_Amt5 = sd(Pr2_Amt5, na.rm = TRUE) / sqrt(n()),
    
    mean_Pr1_Amt6 = mean(Pr1_Amt6, na.rm = TRUE),
    se_Pr1_Amt6 = sd(Pr1_Amt6, na.rm = TRUE) / sqrt(n()),
    
    .groups = "drop"
  )

## Long DFs
summary_ranks_step_Pr1_Amt6_long <- summary_ranks_step_Pr1_Amt6 %>%
  pivot_longer(
    cols = -step,
    names_to = c(".value", "label"),
    names_pattern = "^(mean|se)_(.*)$"
  ) %>%
  rename(
    mean_rank = mean,
    mean_se = se
  )

summary_ranks_step_Pr6_Amt1_long <- summary_ranks_step_Pr6_Amt1 %>%
  pivot_longer(
    cols = -step,
    names_to = c(".value", "label"),
    names_pattern = "^(mean|se)_(.*)$"
  ) %>%
  rename(
    mean_rank = mean,
    mean_se = se
  )

summary_ranks_step_remaining_long <- summary_ranks_step_remaining %>%
  pivot_longer(
    cols = -step,
    names_to = c(".value", "label"),
    names_pattern = "^(mean|se)_(.*)$"
  ) %>%
  rename(
    mean_rank = mean,
    mean_se = se
  )

# Shapes
bet_shapes <-  c("Pr6_Amt1" = 21, "Pr5_Amt2" = 22, "Pr4_Amt3" = 23, "Pr3_Amt4" = 24, "Pr2_Amt5" = 25, "Pr1_Amt6" = 11)

# Create plot
S_Bet_Profile <- ggplot(summary_ranks_step_Pr1_Amt6_long, 
                        aes(x = step, y = mean_rank)) +   
  geom_line(aes(color = label), size = 1) +
  geom_point(aes(fill = label, shape = label), size = 3) +
  geom_errorbar(aes(ymin = mean_rank - mean_se, ymax = mean_rank + mean_se), 
                width = 0.2, color = "black") +
  scale_color_manual(values = bet_colors) +
  scale_fill_manual(values = bet_colors) +
  scale_shape_manual(values = bet_shapes) +
  scale_x_continuous(breaks = 0:9) +
  scale_y_continuous(limits = c(1, 6), breaks = 1:6) +  # 👈 Fixed y-axis
  labs(x = "Ranking Step", y = "Mean Rank", 
       title = "Mean of Ranks by Step - $-Bet Profile", 
       color = "Label", shape = "Label", fill = "Label") +
  theme_minimal() +
  theme(legend.position = "bottom", 
        plot.title = element_text(hjust = 0.5))


P_Bet_Profile <- ggplot(summary_ranks_step_Pr6_Amt1_long, 
                        aes(x = step, y = mean_rank)) +   
  geom_line(aes(color = label), size = 1) +
  geom_point(aes(fill = label, shape = label), size = 3) +
  geom_errorbar(aes(ymin = mean_rank - mean_se, ymax = mean_rank + mean_se), 
                width = 0.2, color = "black") +
  scale_color_manual(values = bet_colors) +
  scale_fill_manual(values = bet_colors) +
  scale_shape_manual(values = bet_shapes) +
  scale_x_continuous(breaks = 0:9) +
  scale_y_continuous(limits = c(1, 6), breaks = 1:6) +  # Fixed y-axis
  labs(x = "Ranking Step", y = "Mean Rank", 
       title = "Mean of Ranks by Step - P-Bet Profile", 
       color = "Label", shape = "Label", fill = "Label") +
  theme_minimal() +
  theme(legend.position = "bottom", 
        plot.title = element_text(hjust = 0.5))


remaining_Profile <- ggplot(summary_ranks_step_remaining_long, 
                        aes(x = step, y = mean_rank)) +   
  geom_line(aes(color = label), size = 1) +
  geom_point(aes(fill = label, shape = label), size = 3) +
  geom_errorbar(aes(ymin = mean_rank - mean_se, ymax = mean_rank + mean_se), 
                width = 0.2, color = "black") +
  scale_color_manual(values = bet_colors) +
  scale_fill_manual(values = bet_colors) +
  scale_shape_manual(values = bet_shapes) +
  scale_x_continuous(breaks = 0:9) +
  scale_y_continuous(limits = c(1, 6), breaks = 1:6) +  # 👈 Fixed y-axis
  labs(x = "Ranking Step", y = "Mean Rank", 
       title = "Mean of Ranks by Step - Unclear Profile", 
       color = "Label", shape = "Label", fill = "Label") +
  theme_minimal() +
  theme(legend.position = "bottom", 
        plot.title = element_text(hjust = 0.5))


# Suppress individual legends
S_Bet_Profile <- S_Bet_Profile + theme(legend.position = "bottom")
P_Bet_Profile <- P_Bet_Profile + theme(legend.position = "bottom")

# Combine the plots with shared legend
combined_plot <- S_Bet_Profile + P_Bet_Profile +
  plot_layout(guides = "collect") & 
  theme(legend.position = "bottom")

# Display combined plot
combined_plot

remaining_Profile

Distribution of Drag Count by item

The following set of histograms illustrates the number of times each item is touched.

drag_drop_counts_Prefer2 <- drag_and_drop_count_Prefer2_long %>%
  count(item.f,N) %>%
  group_by(item.f)%>%
  mutate(percentage = n / sum(n) * 100,
         condition="Amount")%>%
  ungroup()


ggplot(drag_drop_counts_Prefer2, aes(x = factor(N), y = n)) +
  geom_bar(
    stat = "identity",
    color = "black"
  ) +
  geom_text(
    aes(
      label = paste0(n, " (", round(percentage, 1), "%)")
    ),
    vjust = -0.5,
    size = 5,
    fontface="bold") +
  labs(
    title = "Drag Count by item and Quiz Condition",
    x = "Drag Count",
    y = "Frequency"
  ) +
  theme_minimal() +
  theme(
    strip.text = element_text(size = 12, face = "bold"),  # Increased size and bold text
    plot.title = element_text(hjust = 0.5),
    axis.title = element_text(size = 12),  # Adjust axis titles size if needed
    axis.text = element_text(size = 10)    # Adjust axis labels size if needed
  ) +
  facet_wrap(~ item.f,ncol=2) +
  ylim(0, 150)

Correlation with Attribute Values

Note on attribute rank coding: Across Amount and Prob Tasks, greater value indicates higher rank (i.e., 1=item in the bottom and 6=item at the top.)

summary_data_Prefer2 <- drag_and_drop_count_Prefer2_long%>%
  dplyr::group_by(condition, item.f) %>%
  dplyr::summarize(drag_mean = mean(N_ind, na.rm = TRUE),
            drag_sd = sd(N_ind, na.rm = TRUE),
            n = n(),
            se = drag_sd / sqrt(n),  # Standard error
            .groups = "drop",
            Avg.Amount=mean(Amt),
            Avg.Prob=mean(Prob))

ggplot(summary_data_Prefer2, aes(x = Avg.Amount, y = drag_mean, label = item.f)) +
  geom_point(size = 3, color = "black") +
  geom_text(vjust = -1, hjust = 1) +
  theme_minimal() +
  labs(title = "Drag Count and Amount Attribute", subtitle = "2ND Preference Task", x = "Avg. Amt", y = "Avg. Drag Count Indicator") +
  theme(axis.title = element_text(face = "bold"), 
        plot.subtitle = element_text(hjust = 0.5), 
        plot.title = element_text(face = "bold", hjust = 0.5))+
  geom_smooth(method = "lm", se = FALSE, color = "blue", linetype = "dashed") 

ggplot(summary_data_Prefer2, aes(x = Avg.Prob, y = drag_mean, label = item.f)) +
  geom_point(size = 3, color = "black") +
  geom_text(vjust = -1, hjust = 1) +
  theme_minimal() +
  labs(title = "Drag Count and Prob Attribute", subtitle = "2ND Preference Task", x = "Avg. Prob", y = "Avg. Drag Count Indicator") +
  theme(axis.title = element_text(face = "bold"), 
        plot.subtitle = element_text(hjust = 0.5), 
        plot.title = element_text(face = "bold", hjust = 0.5))+
  geom_smooth(method = "lm", se = FALSE, color = "blue", linetype = "dashed") 

Distribution of Drag Order by item

touch_order_pref2 <- touch_order_analysis.long_pref2 %>%
  count(item.f,order,condition) %>%
  group_by(item.f)%>%
  mutate(percentage = n / sum(n) * 100)%>%
  ungroup()

ggplot(touch_order_pref2, aes(x = factor(order), y = n)) +
  geom_bar(
    stat = "identity",
    color = "black"
  ) +
  geom_text(
    aes(
      label = paste0(n, " (", round(percentage, 1), "%)")
    ),
    vjust = -0.5,
    size = 5,
    fontface="bold"
  ) +
  labs(
    title = "Drag Order by item and Condition",
    x = "Drag Order",
    y = "Frequency"
  ) +
  theme_minimal() +
  theme(
    strip.text = element_text(size = 12, face = "bold"),  # Facet label adjustments
    plot.title = element_text(hjust = 0.5, face = "bold"),
    axis.title = element_text(size = 12),
    axis.text = element_text(size = 10)
  ) +
  facet_wrap(~ item.f ,ncol = 2) +
  ylim(0, 100)

Correlation with Attribute Values

summary_data_pref2 <- touch_order_analysis.long_pref2%>%
  dplyr::group_by(condition, item.f) %>%
  dplyr::summarize(order_mean = mean(order, na.rm = TRUE),
            order_sd = sd(order, na.rm = TRUE),
            n = n(),
            se = order_sd / sqrt(n),  # Standard error
            .groups = "drop",
            Avg.Amount=mean(Amt),
            Avg.Prob=mean(Prob))
ggplot(summary_data_pref2, aes(x = Avg.Amount, y = order_mean, label = item.f)) +
  geom_point(size = 3, color = "black") +
  geom_text(vjust = -1, hjust = 1) +
  theme_minimal() +
  labs(title = "Drag Order and Amount Attribute", subtitle = "2ND Preference Task", x = "Avg. Amt", y = "Avg. Drag Order") +
  theme(axis.title = element_text(face = "bold"),
        plot.subtitle = element_text(hjust = 0.5),
        plot.title = element_text(face = "bold", hjust = 0.5))+
  geom_smooth(method = "lm", se = FALSE, color = "blue", linetype = "dashed")

ggplot(summary_data_pref2, aes(x = Avg.Prob, y = order_mean, label = item.f)) +
  geom_point(size = 3, color = "black") +
  geom_text(vjust = -1, hjust = 1) +
  theme_minimal() +
  labs(title = "Drag Order and Prob Attribute", subtitle = "2ND Preference Task", x = "Avg. Prob", y = "Avg. Drag Order") +
  theme(axis.title = element_text(face = "bold"),
        plot.subtitle = element_text(hjust = 0.5),
        plot.title = element_text(face = "bold", hjust = 0.5))+
  geom_smooth(method = "lm", se = FALSE, color = "blue", linetype = "dashed") 

Distribution of Drag Distance

summary_stats <- Distance_pref2.cleanup.df %>%
  group_by(item.f) %>%
  dplyr::summarize(
    mean_distance = mean(distance, na.rm = TRUE),
    median_distance = median(distance, na.rm = TRUE)
  )

Distance_pref2.cleanup.df$item.f<- factor(Distance_pref2.cleanup.df$item.f, levels = rev(c(  "Pr6_Amt1","Pr5_Amt2", "Pr4_Amt3","Pr3_Amt4", "Pr2_Amt5","Pr1_Amt6")), ordered = TRUE)



ggplot(Distance_pref2.cleanup.df ,
       aes(x = -distance, fill = item.f)) +
  geom_histogram(binwidth = 1, alpha = 0.3, position = "identity") +
  labs(
    title = "Distribution of Drag Distance - pref2 Task",
    x = "Distance",
    y = "Count",
    fill = "item"
  ) +
  theme_minimal()+
  facet_grid(~item.f)+
  xlim(6,-6)+
    scale_fill_manual(values = bet_colors)

Shared Visualizations

DV 1: Drag Count

summary_data_Prefer1_ind<- drag_and_drop_count_Prefer1_long %>%
  mutate(N=case_when(
    N==0~0,
    TRUE~1
  ))%>%
  dplyr::group_by(condition, item.f) %>%
  dplyr::summarize(drag_drop_mean = mean(N, na.rm = TRUE),
            drag_drop_sd = sd(N, na.rm = TRUE),
            n = n(),
            se = drag_drop_sd / sqrt(n),
            .groups = "drop")

summary_data_Prefer2_ind<- drag_and_drop_count_Prefer2_long %>%
  mutate(N=case_when(
    N==0~0,
    TRUE~1
  ))%>%
  dplyr::group_by(condition, item.f) %>%
  dplyr::summarize(drag_drop_mean = mean(N, na.rm = TRUE),
            drag_drop_sd = sd(N, na.rm = TRUE),
            n = n(),
            se = drag_drop_sd / sqrt(n),
            .groups = "drop")

# Combine the two data frames into one
summary_data_pref_combined <- bind_rows(summary_data_Prefer1_ind, summary_data_Prefer2_ind)

# 
summary_data_pref_combined$item.f = factor(summary_data_combined_ind$item.f, levels = rev(c(  "Pr6_Amt1","Pr5_Amt2", "Pr4_Amt3","Pr3_Amt4", "Pr2_Amt5","Pr1_Amt6")), ordered = TRUE)


# Plot
ggplot(summary_data_pref_combined, aes(x = condition, y = drag_drop_mean, 
                                      group = item.f, color = item.f, shape = item.f)) +
  geom_line(linewidth = 1, position = position_dodge(0.3)) +
  geom_point(size = 6, position = position_dodge(0.3)) +
  geom_errorbar(
    aes(
      ymin = drag_drop_mean - se,
      ymax = drag_drop_mean + se
    ),
    width = 0.2,
    position = position_dodge(0.3)
  ) +
  labs(
    x = "Condition",
    y = "Mean ± SE Drag Count",
    title = "Mean Drag Count by Condition"
  ) +
  scale_color_manual(values = bet_colors) +  
  scale_shape_manual(values = c("Pr6_Amt1" = 21, "Pr5_Amt2" = 22, 
                                "Pr4_Amt3" = 23, "Pr3_Amt4" = 24, 
                                "Pr2_Amt5" = 25, "Pr1_Amt6" = 11)) +
  theme_minimal() +
  theme(
    legend.position = "top", # Place legend at the top
    legend.title = element_text(face = "bold"),
    axis.title = element_text(face = "bold"),
    plot.subtitle = element_text(hjust = 0.5),
    plot.title = element_text(face = "bold", hjust = 0.5)
  )

DV 2: Drag Order

summary_data_Prefer1<- touch_order_analysis.long_pref1%>%
  dplyr::group_by(condition, item.f) %>%
  dplyr::summarize(order_mean = mean(order, na.rm = TRUE),
            order_sd = sd(order, na.rm = TRUE),
            n = n(),
            se = order_sd / sqrt(n),  # Standard error
            .groups = "drop")


summary_data_Prefer2 <- touch_order_analysis.long_pref2%>%
  dplyr::group_by(condition, item.f) %>%
  dplyr::summarize(order_mean = mean(order, na.rm = TRUE),
            order_sd = sd(order, na.rm = TRUE),
            n = n(),
            se = order_sd / sqrt(n),  # Standard error
            .groups = "drop")


summary_data_pref_combined <- bind_rows(summary_data_Prefer1, summary_data_Prefer2)

summary_data_pref_combined$item.f = factor(summary_data_combined_ind$item.f, levels = rev(c(  "Pr6_Amt1","Pr5_Amt2", "Pr4_Amt3","Pr3_Amt4", "Pr2_Amt5","Pr1_Amt6")), ordered = TRUE)



ggplot(summary_data_pref_combined, aes(x = condition, y = order_mean, 
                                      group = item.f, color = item.f, shape = item.f)) +
  geom_line(linewidth = 1, position = position_dodge(0.3)) +
  geom_point(size = 6, position = position_dodge(0.3)) +
  geom_errorbar(
    aes(
      ymin = order_mean - se,
      ymax = order_mean + se
    ),
    width = 0.2,
    position = position_dodge(0.3)
  ) +
  labs(
    x = "Condition",
    y = "Mean ± SE Drag Order",
    title = "Mean Drag Order by Condition"
  ) +
  scale_color_manual(values = bet_colors) +  
  scale_shape_manual(values = c("Pr6_Amt1" = 21, "Pr5_Amt2" = 22, 
                                "Pr4_Amt3" = 23, "Pr3_Amt4" = 24, 
                                "Pr2_Amt5" = 25, "Pr1_Amt6" = 11)) +
  theme_minimal() +
  theme(
    legend.position = "top", # Place legend at the top
    legend.title = element_text(face = "bold"),
    axis.title = element_text(face = "bold"),
    plot.subtitle = element_text(hjust = 0.5),
    plot.title = element_text(face = "bold", hjust = 0.5)
  )

DV 3: Drag Distance

Distance_pref1_cleanup.df.test<-Distance_pref1.cleanup.df%>%
  select(ResponseId, item.f,distance,distance.abs)%>%
  mutate(condition="Pref1")

Distance_pref2_cleanup.df.test<-Distance_pref2.cleanup.df%>%
  select(ResponseId, item.f,distance,distance.abs)%>%
  mutate(condition="Pref2")

Distance_cleanup.df.combined<-rbind(Distance_pref1_cleanup.df.test,Distance_pref2_cleanup.df.test)

summary_distance_data_pref <- Distance_cleanup.df.combined %>%
  mutate(condition=as.factor(condition),
         distance.abs=(distance))%>%
  group_by(condition, item.f) %>%
  dplyr::summarize(
    distance_mean = -mean(distance, na.rm = TRUE),
    distance_sd = sd(distance, na.rm = TRUE),
    n = n(),
    se = distance_sd / sqrt(n),
    .groups = "drop"
  )


summary_distance_data_pref$item.f = factor(summary_data_combined_ind$item.f, levels = rev(c(  "Pr6_Amt1","Pr5_Amt2", "Pr4_Amt3","Pr3_Amt4", "Pr2_Amt5","Pr1_Amt6")), ordered = TRUE)


ggplot(summary_distance_data_pref, aes(x = condition, y = distance_mean, group = item.f, color = item.f,shape=item.f)) +
  geom_line(linewidth = 1, position = position_dodge(0.3)) +
  geom_point(size = 6, position = position_dodge(0.3)) +
  geom_errorbar(
    aes(
      ymin = distance_mean - se,
      ymax = distance_mean + se
    ),
    width = 0.2,
    position = position_dodge(0.3)
  ) +
  labs(
    x = "Condition",
    y = "Mean ± SE Drag Distance",
    title = "Mean Drag Distance by Condition"
  ) +
  theme_minimal() +
  theme(
    legend.position = "top", # Place legend at the top
    legend.title = element_text(face = "bold"),
    axis.title = element_text(face = "bold"),
    plot.subtitle = element_text(hjust = 0.5),
    plot.title = element_text(face = "bold", hjust = 0.5)
  )+
  scale_shape_manual(values = c("Pr6_Amt1" = 21, "Pr5_Amt2" = 22, 
                                "Pr4_Amt3" = 23, "Pr3_Amt4" = 24, 
                                "Pr2_Amt5" = 25, "Pr1_Amt6" = 11)) +
    scale_color_manual(values = bet_colors)

Shared Regressions

### Prep: Add scaled amount and probability ###

# Count
drag_and_drop_count_Prefer1_long$Prob.c <- scale(drag_and_drop_count_Prefer1_long$Prob)
drag_and_drop_count_Prefer1_long$Amt.c <- scale(drag_and_drop_count_Prefer1_long$Amt)
drag_and_drop_count_Prefer2_long$Prob.c <- scale(drag_and_drop_count_Prefer1_long$Prob)
drag_and_drop_count_Prefer2_long$Amt.c <- scale(drag_and_drop_count_Prefer1_long$Amt)

# Order
touch_order_analysis.long_pref1$Prob.c <- scale(touch_order_analysis.long_pref1$Prob)
touch_order_analysis.long_pref1$Amt.c <- scale(touch_order_analysis.long_pref1$Amt)
touch_order_analysis.long_pref2$Prob.c <- scale(touch_order_analysis.long_pref1$Prob)
touch_order_analysis.long_pref2$Amt.c <- scale(touch_order_analysis.long_pref1$Amt)

# Distance
distance1_df <- master_df %>%
  select(c(1:6, 8:9, 13:21))

distance1_df$prob <- distance1_df$set1_prob 
distance1_df$amt <- distance1_df$set1_amt
distance1_df$initial_order <- distance1_df$set1_initial_order

distance2_df <- master_df %>%
  select(c(1:5, 10:12, 15, 22:27))

distance2_df$prob <- distance2_df$set2_prob 
distance2_df$amt <- distance2_df$set2_amt
distance2_df$initial_order <- distance2_df$set2_initial_order

distance1_df$amt.c <- scale(distance1_df$amt)
distance1_df$prob.c <- scale(distance1_df$prob)
distance2_df$amt.c <- scale(distance2_df$amt)
distance2_df$prob.c <- scale(distance2_df$prob)

DV 1: Drag Count

M1 <- glmer(
  N_ind ~ Prob.c + Amt.c + initial.rank.r + (1 | ResponseId),
  data = drag_and_drop_count_Prefer1_long,
  #subset = item.f != "Pr6_Amt1",
  family = binomial,
  control = glmerControl(optimizer = "bobyqa", optCtrl = list(maxfun = 2e5))
)

M2<-glmer(
  N_ind ~ Prob.c + Amt.c + initial.rank.r + (1 | ResponseId),
  data = drag_and_drop_count_Prefer2_long,
  family = binomial,
  control = glmerControl(optimizer = "bobyqa", optCtrl = list(maxfun = 2e5))
)


tab_model(
  M1, M2,
  dv.labels = c(
    "Drag and Drop Count<br>Preference Task 1",
    "Drag and Drop Count<br>Preference Task 2"
  ),
  pred.labels = c(
    "Intercept",
    "Probability",
    "Amount",
    "Initial Rank [6]",
    "Initial Rank [5]",
    "Initial Rank [4]",
    "Initial Rank [3]",
    "Initial Rank [2]"
  ),
  show.se = T, 
  show.ci = F,
  show.stat = T
)
  Drag and Drop Count
Preference Task 1
Drag and Drop Count
Preference Task 2
Predictors Odds Ratios std. Error Statistic p Odds Ratios std. Error Statistic p
Intercept 0.27 0.05 -6.95 <0.001 0.15 0.03 -8.58 <0.001
Probability 0.97 0.10 -0.35 0.726 1.13 0.12 1.15 0.249
Amount 0.66 0.06 -4.26 <0.001 0.77 0.08 -2.62 0.009
Initial Rank [6] 14.76 3.93 10.10 <0.001 26.85 7.94 11.13 <0.001
Initial Rank [5] 11.01 2.85 9.27 <0.001 24.70 7.24 10.94 <0.001
Initial Rank [4] 12.73 3.32 9.74 <0.001 20.96 6.02 10.59 <0.001
Initial Rank [3] 7.61 1.88 8.20 <0.001 14.74 4.12 9.61 <0.001
Initial Rank [2] 3.86 0.92 5.66 <0.001 7.01 1.89 7.23 <0.001
Random Effects
σ2 3.29 3.29
τ00 0.25 ResponseId 0.18 ResponseId
ICC 0.07 0.05
N 186 ResponseId 186 ResponseId
Observations 1116 1116
Marginal R2 / Conditional R2 0.225 / 0.280 0.298 / 0.335

DV 2: Drag Order

Ordinal Models

  • ordinal model might be most suitable given the nature of the data
  • however, these models have issues (see below)
M1<-clmm(factor(order)~Amt+Prob+initial.rank.r+(1|ResponseId),touch_order_analysis.long_pref1)
M2<-clmm(factor(order)~Amt+Prob+initial.rank.r+(1|ResponseId),touch_order_analysis.long_pref2)

tab_model(M1,M2,
          show.se = T,
          show.ci = F,
          show.stat = T,
          pred.labels =
            c("1|2","2|3", "3|4", "4|5", "5|6",
              "Amount",
              "Probability",
              "Initial Rank [6]","Initial Rank [5]","Initial Rank [4]","Initial Rank [3]","Initial Rank [2]"),
          dv.labels = 
            c("Drag and Drop Order<br>Preference Task 1",
              "Drag and Drop Order<br>Preference Task 2"))
  Drag and Drop Order
Preference Task 1
Drag and Drop Order
Preference Task 2
Predictors Odds Ratios std. Error Statistic p Odds Ratios std. Error Statistic p
1|2 0.02 NaN NaN NaN 0.01 NaN NaN NaN
2|3 0.07 NaN NaN NaN 0.03 NaN NaN NaN
3|4 0.22 NaN NaN NaN 0.10 NaN NaN NaN
4|5 0.71 NaN NaN NaN 0.38 NaN NaN NaN
5|6 3.95 NaN NaN NaN 2.67 NaN NaN NaN
Amount 1.00 NaN NaN NaN 0.99 NaN NaN NaN
Probability 0.98 NaN NaN NaN 0.98 NaN NaN NaN
Initial Rank [6] 0.15 NaN NaN NaN 0.10 NaN NaN NaN
Initial Rank [5] 0.15 NaN NaN NaN 0.10 NaN NaN NaN
Initial Rank [4] 0.14 NaN NaN NaN 0.09 NaN NaN NaN
Initial Rank [3] 0.20 NaN NaN NaN 0.12 NaN NaN NaN
Initial Rank [2] 0.26 NaN NaN NaN 0.21 NaN NaN NaN
Random Effects
σ2 3.29 3.29
τ00 0.00 ResponseId 0.00 ResponseId
N 186 ResponseId 186 ResponseId
Observations 1116 1116
Marginal R2 / Conditional R2 0.195 / NA 0.255 / NA



lm_robust

Maybe not an ideal model, however, it will return results:

M1<-lm_robust(order ~ Amt+Prob+initial.rank.r, data = touch_order_analysis.long_pref1, cluster = ResponseId,se_type = "stata")

M2<-lm_robust(order ~ Amt+Prob+initial.rank.r, data = touch_order_analysis.long_pref2, cluster = ResponseId,se_type = "stata")

tab_model(M1,M2,
          show.se = T,
          show.ci = F,
          show.stat = T,
          pred.labels =
            c("Intercept",
              "Amount",
              "Probability",
              "Initial Rank [6]","Initial Rank [5]","Initial Rank [4]","Initial Rank [3]","Initial Rank [2]"),
          dv.labels = c("Drag and Drop Order<br>Preference Task 1", "Drag and Drop Order<br>Preference Task 2"))
  Drag and Drop Order
Preference Task 1
Drag and Drop Order
Preference Task 2
Predictors Estimates std. Error Statistic p Estimates std. Error Statistic p
Intercept 4.47 0.15 30.06 <0.001 5.00 0.13 38.81 <0.001
Amount 0.00 0.00 0.89 0.372 -0.00 0.00 -2.87 0.004
Probability -0.01 0.00 -4.91 <0.001 -0.02 0.00 -8.19 <0.001
Initial Rank [6] -1.39 0.13 -10.94 <0.001 -1.54 0.12 -12.75 <0.001
Initial Rank [5] -1.35 0.14 -9.68 <0.001 -1.59 0.11 -14.19 <0.001
Initial Rank [4] -1.48 0.13 -11.50 <0.001 -1.61 0.12 -12.92 <0.001
Initial Rank [3] -1.16 0.14 -8.50 <0.001 -1.47 0.12 -12.15 <0.001
Initial Rank [2] -0.95 0.14 -6.75 <0.001 -1.06 0.12 -8.90 <0.001
Observations 1116 1116
R2 / R2 adjusted 0.184 / 0.179 0.248 / 0.243

DV 3: Drag Distance

distance_set1 <- lmer(set1_drag_distance.r ~ prob + amt + factor(initial_order) +
                      (1 | ResponseId),
                      data = distance1_df)

distance_set2 <- lmer(set2_drag_distance.r ~ prob + amt + factor(initial_order) +
                      (1 | ResponseId),
                      data = distance2_df)

tab_model(
  distance_set1, distance_set2,
  dv.labels = c(
    "Drag and Drop Distance<br>Preference Task 1",
    "Drag and Drop Distance<br>Preference Task 2"
  ),
  pred.labels =
            c("Intercept",
              "Probability",
              "Amount",
              "Initial Rank [2]","Initial Rank [3]","Initial Rank [4]","Initial Rank [5]","Initial Rank [6]"),
  show.se = T,
  show.ci = F,
  show.stat = T
)
  Drag and Drop Distance
Preference Task 1
Drag and Drop Distance
Preference Task 2
Predictors Estimates std. Error Statistic p Estimates std. Error Statistic p
Intercept -0.17 0.14 -1.26 0.207 -0.31 0.13 -2.47 0.014
Probability 0.01 0.00 3.49 <0.001 0.01 0.00 6.65 <0.001
Amount -0.01 0.00 -3.73 <0.001 -0.00 0.00 -2.44 0.015
Initial Rank [2] 0.65 0.12 5.28 <0.001 0.49 0.12 4.16 <0.001
Initial Rank [3] 1.06 0.12 8.59 <0.001 1.03 0.12 8.80 <0.001
Initial Rank [4] 1.63 0.12 13.18 <0.001 1.49 0.12 12.76 <0.001
Initial Rank [5] 2.01 0.12 16.24 <0.001 2.05 0.12 17.59 <0.001
Initial Rank [6] 2.75 0.12 22.25 <0.001 2.57 0.12 22.10 <0.001
Random Effects
σ2 1.42 1.26
τ00 0.18 ResponseId 0.24 ResponseId
ICC 0.11 0.16
N 186 ResponseId 186 ResponseId
Observations 1116 1116
Marginal R2 / Conditional R2 0.370 / 0.440 0.388 / 0.485

Time Analysis - Prefence 1

We want to check whether probability and amount affect the 3F patterns differently over time. For this analysis, we might want to consider redoing the coding of the data frame. The reason for this is that we currently only consider the first time an item is touched: In the prior analyses, that item has as “order” the first time it was touch (not the second), and the same for drag distance: only the first distance is considered. For the “over-time-analysis”, we could consider several orders of being touched and dragged. For now, going with the old (and pre-registered) analysis scheme, I will drop duplicate observations (i.e., only the first “order” and “distance”) are considered.

We are removing 83 observations from such “duplicates” (i.e., items touched more than once), which corresponds to ~11%. Additionally, we cut after step 5 (cuts 59 obs from 32 people).

Drag Distance

Comment Previously, I ran models without the random slope. Then, the interactions were stronger for amount and step.

# First, create data frames to measure the impact of amount and probability on the 3F measures

# Join master_df to RankProcess_Prefer1 using ResponseId and item.f / bet_label
time_df_pref1 <- RankProcess_Prefer1 %>%
  left_join(
    master_df %>%
      select(ResponseId, bet_label, set1_amt, set1_prob, set1_order, set1_drag_distance.r),
    by = c("ResponseId", "item.f" = "bet_label")
  ) %>%
  rename(Amt = set1_amt, Prob = set1_prob, drag_order = set1_order, distance.r = set1_drag_distance.r) %>%
  filter(step != 0)

time_df_pref1$Amt.c <- scale(time_df_pref1$Amt)
time_df_pref1$Prob.c <- scale(time_df_pref1$Prob)

#sum(duplicated(time_df_pref1[, c("ResponseId", "item.f")])) #83
#sum(time_df_pref1$step > 5, na.rm = TRUE) #59 obs
#length(unique(time_df_pref1$ResponseId[time_df_pref1$step > 5])) #from 32 unique IDs


# Check for duplicates and remove them
duplicated_rows <- duplicated(time_df_pref1[, c("ResponseId", "item.f")])
time_df_pref1_unique <- time_df_pref1 %>%
  distinct(ResponseId, item.f, .keep_all = TRUE)

# regressions
time1 <- lmer(distance.r ~ Amt.c + Prob.c + factor(step) +
                  (1 + Amt.c + Prob.c | ResponseId),
                  data = time_df_pref1_unique,
                  subset = step < 6,
                  control = lmerControl(optimizer = "bobyqa", optCtrl = list(maxfun = 2e5))
                )

time2 <- lmer(distance.r ~ Amt.c * factor(step) + Prob.c +
                  (1 + Amt.c + Prob.c | ResponseId),
                  data = time_df_pref1_unique,
                  subset = step < 6,
                  control = lmerControl(optimizer = "bobyqa", optCtrl = list(maxfun = 2e5))
                )

time3 <- lmer(distance.r ~ Amt.c + Prob.c * factor(step) +
                  (1 + Amt.c + Prob.c | ResponseId),
                  data = time_df_pref1_unique,
                  subset = step < 6,
                  control = lmerControl(optimizer = "bobyqa", optCtrl = list(maxfun = 2e5))
                )

tab_model(time1, time2, time3,
          show.se = T,
          show.ci = F,
          show.stat = T,
          pred.labels =
            c("Intercept",
              "Amount (centered)",
              "Probability (centered)",
              "Step [2]", "Step [3]", "Step [4]", "Step [5]",
              "Amount x Step [2]", "Amount x Step [3]", "Amount x Step   [4]", "Amount x Step [5]",
              "Prob x Step [2]", "Prob x Step [3]", "Prob x Step   [4]", "Prob x Step [5]"),
          dv.labels = 
            c("DV = Distance<br>(Pref 1, reverse-coded)",
              "DV = Distance<br>(Pref 1, reverse-coded)",
              "DV = Distance<br>(Pref 1, reverse-coded)"))
  DV = Distance
(Pref 1, reverse-coded)
DV = Distance
(Pref 1, reverse-coded)
DV = Distance
(Pref 1, reverse-coded)
Predictors Estimates std. Error Statistic p Estimates std. Error Statistic p Estimates std. Error Statistic p
Intercept 2.35 0.12 19.43 <0.001 2.38 0.12 19.62 <0.001 2.31 0.13 18.44 <0.001
Amount (centered) -0.20 0.10 -1.94 0.053 0.00 0.15 0.02 0.983 -0.22 0.11 -2.03 0.042
Probability (centered) 0.12 0.10 1.23 0.220 0.16 0.10 1.62 0.106 0.18 0.12 1.50 0.134
Step [2] -0.36 0.16 -2.27 0.023 -0.36 0.16 -2.26 0.024 -0.33 0.16 -2.05 0.041
Step [3] -0.67 0.18 -3.79 <0.001 -0.74 0.18 -4.16 <0.001 -0.56 0.18 -3.06 0.002
Step [4] -0.88 0.19 -4.53 <0.001 -0.91 0.19 -4.69 <0.001 -0.94 0.20 -4.66 <0.001
Step [5] -0.95 0.24 -3.93 <0.001 -0.99 0.24 -4.13 <0.001 -1.00 0.25 -3.98 <0.001
Amount x Step [2] -0.08 0.18 -0.45 0.656
Amount x Step [3] -0.55 0.21 -2.59 0.010
Amount x Step [4] -0.28 0.22 -1.27 0.205
Amount x Step [5] -0.16 0.27 -0.59 0.556
Prob x Step [2] -0.05 0.15 -0.35 0.726
Prob x Step [3] 0.18 0.22 0.82 0.410
Prob x Step [4] -0.36 0.21 -1.67 0.095
Prob x Step [5] -0.41 0.28 -1.44 0.152
Random Effects
σ2 1.76 1.76 1.72
τ00 0.23 ResponseId 0.25 ResponseId 0.24 ResponseId
τ11 0.38 ResponseId.Amt.c 0.29 ResponseId.Amt.c 0.42 ResponseId.Amt.c
0.32 ResponseId.Prob.c 0.34 ResponseId.Prob.c 0.35 ResponseId.Prob.c
ρ01 0.97 1.00 0.89
0.39 0.36 0.32
ICC 0.30   0.32
N 186 ResponseId 186 ResponseId 186 ResponseId
Observations 653 653 653
Marginal R2 / Conditional R2 0.091 / 0.362 0.145 / NA 0.098 / 0.383

Drag Order

Comment I am not sure in how far these models with the current coding make sense. Naturally, with larger numbers of “step”, the “order” will increase (this is just the nature of coding). It might make more sense, if we allow several “orders” to be assigned to an item, for instance, if it is touched first and fifth, it should have two values for order. However, since, on average most items are only touched once, this analysis might not work well in terms of modelling.

# regressions
time1 <- lmer(drag_order ~ Amt.c + Prob.c + factor(step) +
                  (1 + Amt.c + Prob.c | ResponseId),
                  data = time_df_pref1_unique,
                  subset = step < 6,
                  control = lmerControl(optimizer = "bobyqa", optCtrl = list(maxfun = 2e5))
                )

time2 <- lmer(drag_order ~ Amt.c * factor(step) + Prob.c +
                  (1 + Amt.c + Prob.c | ResponseId),
                  data = time_df_pref1_unique,
                  subset = step < 6,
                  control = lmerControl(optimizer = "bobyqa", optCtrl = list(maxfun = 2e5))
                )

time3 <- lmer(drag_order ~ Amt.c + Prob.c * factor(step) +
                  (1 + Amt.c + Prob.c | ResponseId),
                  data = time_df_pref1_unique,
                  subset = step < 6,
                  control = lmerControl(optimizer = "bobyqa", optCtrl = list(maxfun = 2e5))
                )

tab_model(time1, time2, time3,
          show.se = T,
          show.ci = F,
          show.stat = T,
          pred.labels =
            c("Intercept",
              "Amount (centered)",
              "Probability (centered)",
              "Step [2]", "Step [3]", "Step [4]", "Step [5]",
              "Amount x Step [2]", "Amount x Step [3]", "Amount x Step   [4]", "Amount x Step [5]",
              "Prob x Step [2]", "Prob x Step [3]", "Prob x Step   [4]", "Prob x Step [5]"),
          dv.labels = 
            c("DV = Order<br>(Pref 1, reverse-coded)",
              "DV = Order<br>(Pref 1, reverse-coded)",
              "DV = Order<br>(Pref 1, reverse-coded)"))
  DV = Order
(Pref 1, reverse-coded)
DV = Order
(Pref 1, reverse-coded)
DV = Order
(Pref 1, reverse-coded)
Predictors Estimates std. Error Statistic p Estimates std. Error Statistic p Estimates std. Error Statistic p
Intercept 1.01 0.02 55.08 <0.001 1.01 0.02 55.71 <0.001 1.00 0.02 52.27 <0.001
Amount (centered) -0.00 0.01 -0.20 0.839 -0.01 0.02 -0.71 0.478 -0.01 0.01 -0.49 0.627
Probability (centered) -0.01 0.01 -0.84 0.400 -0.01 0.01 -1.13 0.259 -0.00 0.02 -0.28 0.783
Step [2] 0.97 0.02 49.04 <0.001 0.97 0.02 48.78 <0.001 0.98 0.02 46.61 <0.001
Step [3] 1.91 0.02 84.46 <0.001 1.91 0.02 84.92 <0.001 1.91 0.02 81.52 <0.001
Step [4] 2.81 0.02 113.53 <0.001 2.81 0.02 114.79 <0.001 2.83 0.03 111.68 <0.001
Step [5] 3.64 0.03 116.50 <0.001 3.63 0.03 117.28 <0.001 3.62 0.03 113.57 <0.001
Amount x Step [2] 0.01 0.02 0.51 0.613
Amount x Step [3] 0.03 0.03 1.32 0.189
Amount x Step [4] -0.07 0.03 -2.80 0.005
Amount x Step [5] 0.10 0.03 3.26 0.001
Prob x Step [2] 0.00 0.02 0.06 0.951
Prob x Step [3] -0.04 0.03 -1.44 0.149
Prob x Step [4] 0.03 0.02 1.36 0.175
Prob x Step [5] -0.13 0.03 -3.87 <0.001
Random Effects
σ2 0.03 0.03 0.03
τ00 0.02 ResponseId 0.02 ResponseId 0.02 ResponseId
τ11 0.01 ResponseId.Amt.c 0.01 ResponseId.Amt.c 0.01 ResponseId.Amt.c
0.01 ResponseId.Prob.c 0.01 ResponseId.Prob.c 0.01 ResponseId.Prob.c
ρ01 -0.36 -0.53 -0.44
-0.87 -0.94 -0.91
N 186 ResponseId 186 ResponseId 186 ResponseId
Observations 653 653 653
Marginal R2 / Conditional R2 0.980 / NA 0.980 / NA 0.980 / NA

Time Analysis - Prefence 2

The non-duplicate counting procedure removes 70 observations (9.4%). Cutting after step 5 removes 43 observations from 23 people.

Drag Distance

# First, create data frames to measure the impact of amount and probability on the 3F measures

# Join master_df to RankProcess_Prefer2 using ResponseId and item.f / bet_label
time_df_pref2 <- RankProcess_Prefer2 %>%
  left_join(
    master_df %>%
      select(ResponseId, bet_label, set2_amt, set2_prob, set2_order, set2_drag_distance.r),
    by = c("ResponseId", "item.f" = "bet_label")
  ) %>%
  rename(Amt = set2_amt, Prob = set2_prob, drag_order = set2_order, distance.r = set2_drag_distance.r) %>%
  filter(step != 0)

time_df_pref2$Amt.c <- scale(time_df_pref2$Amt)
time_df_pref2$Prob.c <- scale(time_df_pref2$Prob)

#sum(duplicated(time_df_pref2[, c("ResponseId", "item.f")])) #70
#sum(time_df_pref2$step > 5, na.rm = TRUE) #43 obs
#length(unique(time_df_pref2$ResponseId[time_df_pref2$step > 5])) #from 23 unique IDs


# Check for duplicates and remove them
duplicated_rows <- duplicated(time_df_pref2[, c("ResponseId", "item.f")])
time_df_pref2_unique <- time_df_pref2 %>%
  distinct(ResponseId, item.f, .keep_all = TRUE)

# regressions
time1 <- lmer(distance.r ~ Amt.c + Prob.c + factor(step) +
                  (1 + Amt.c + Prob.c | ResponseId),
                  data = time_df_pref2_unique,
                  subset = step < 6,
                  control = lmerControl(optimizer = "bobyqa", optCtrl = list(maxfun = 2e5))
                )

time2 <- lmer(distance.r ~ Amt.c * factor(step) + Prob.c +
                  (1 + Amt.c + Prob.c | ResponseId),
                  data = time_df_pref2_unique,
                  subset = step < 6,
                  control = lmerControl(optimizer = "bobyqa", optCtrl = list(maxfun = 2e5))
                )

time3 <- lmer(distance.r ~ Amt.c + Prob.c * factor(step) +
                  (1 + Amt.c + Prob.c | ResponseId),
                  data = time_df_pref2_unique,
                  subset = step < 6,
                  control = lmerControl(optimizer = "bobyqa", optCtrl = list(maxfun = 2e5))
                )

tab_model(time1, time2, time3,
          show.se = T,
          show.ci = F,
          show.stat = T,
          pred.labels =
            c("Intercept",
              "Amount (centered)",
              "Probability (centered)",
              "Step [2]", "Step [3]", "Step [4]", "Step [5]",
              "Amount x Step [2]", "Amount x Step [3]", "Amount x Step   [4]", "Amount x Step [5]",
              "Prob x Step [2]", "Prob x Step [3]", "Prob x Step   [4]", "Prob x Step [5]"),
          dv.labels = 
            c("DV = Distance<br>(Pref 2, reverse-coded)",
              "DV = Distance<br>(Pref 2, reverse-coded)",
              "DV = Distance<br>(Pref 2, reverse-coded)"))
  DV = Distance
(Pref 2, reverse-coded)
DV = Distance
(Pref 2, reverse-coded)
DV = Distance
(Pref 2, reverse-coded)
Predictors Estimates std. Error Statistic p Estimates std. Error Statistic p Estimates std. Error Statistic p
Intercept 2.50 0.11 22.27 <0.001 2.49 0.11 21.88 <0.001 2.50 0.12 20.17 <0.001
Amount (centered) -0.33 0.10 -3.11 0.002 -0.30 0.14 -2.13 0.033 -0.33 0.11 -3.13 0.002
Probability (centered) 0.12 0.08 1.44 0.150 0.13 0.08 1.64 0.102 0.11 0.11 0.99 0.320
Step [2] -0.40 0.14 -2.91 0.004 -0.38 0.14 -2.62 0.009 -0.41 0.15 -2.70 0.007
Step [3] -0.73 0.16 -4.71 <0.001 -0.72 0.16 -4.43 <0.001 -0.73 0.16 -4.47 <0.001
Step [4] -1.04 0.18 -5.71 <0.001 -1.06 0.18 -5.76 <0.001 -1.07 0.21 -5.17 <0.001
Step [5] -1.14 0.23 -4.99 <0.001 -1.11 0.23 -4.81 <0.001 -1.19 0.24 -4.96 <0.001
Amount x Step [2] 0.08 0.17 0.46 0.646
Amount x Step [3] -0.00 0.20 -0.02 0.986
Amount x Step [4] -0.35 0.24 -1.44 0.150
Amount x Step [5] 0.23 0.29 0.81 0.420
Prob x Step [2] 0.03 0.14 0.18 0.858
Prob x Step [3] 0.04 0.17 0.25 0.801
Prob x Step [4] -0.05 0.23 -0.22 0.827
Prob x Step [5] -0.15 0.26 -0.60 0.552
Random Effects
σ2 1.43 1.43 1.45
τ00 0.36 ResponseId 0.36 ResponseId 0.36 ResponseId
τ11 0.50 ResponseId.Amt.c 0.50 ResponseId.Amt.c 0.51 ResponseId.Amt.c
0.02 ResponseId.Prob.c 0.02 ResponseId.Prob.c 0.02 ResponseId.Prob.c
ρ01 0.79 0.80 0.80
0.99 1.00 0.98
N 186 ResponseId 186 ResponseId 186 ResponseId
Observations 647 647 647
Marginal R2 / Conditional R2 0.202 / NA 0.211 / NA 0.202 / NA

Drag Order

# regressions
time1 <- lmer(drag_order ~ Amt.c + Prob.c + factor(step) +
                  (1 + Amt.c + Prob.c | ResponseId),
                  data = time_df_pref2_unique,
                  subset = step < 6,
                  control = lmerControl(optimizer = "bobyqa", optCtrl = list(maxfun = 2e5))
                )

time2 <- lmer(drag_order ~ Amt.c * factor(step) + Prob.c +
                  (1 + Amt.c + Prob.c | ResponseId),
                  data = time_df_pref2_unique,
                  subset = step < 6,
                  control = lmerControl(optimizer = "bobyqa", optCtrl = list(maxfun = 2e5))
                )

time3 <- lmer(drag_order ~ Amt.c + Prob.c * factor(step) +
                  (1 + Amt.c + Prob.c | ResponseId),
                  data = time_df_pref2_unique,
                  subset = step < 6,
                  control = lmerControl(optimizer = "bobyqa", optCtrl = list(maxfun = 2e5))
                )

tab_model(time1, time2, time3,
          show.se = T,
          show.ci = F,
          show.stat = T,
          pred.labels =
            c("Intercept",
              "Amount (centered)",
              "Probability (centered)",
              "Step [2]", "Step [3]", "Step [4]", "Step [5]",
              "Amount x Step [2]", "Amount x Step [3]", "Amount x Step   [4]", "Amount x Step [5]",
              "Prob x Step [2]", "Prob x Step [3]", "Prob x Step   [4]", "Prob x Step [5]"),
          dv.labels = 
            c("DV = Order<br>(Pref 2, reverse-coded)",
              "DV = Order<br>(Pref 2, reverse-coded)",
              "DV = Order<br>(Pref 2, reverse-coded)"))
  DV = Order
(Pref 2, reverse-coded)
DV = Order
(Pref 2, reverse-coded)
DV = Order
(Pref 2, reverse-coded)
Predictors Estimates std. Error Statistic p Estimates std. Error Statistic p Estimates std. Error Statistic p
Intercept 0.99 0.02 52.77 <0.001 1.00 0.02 53.43 <0.001 0.99 0.02 50.58 <0.001
Amount (centered) 0.03 0.01 2.02 0.043 0.01 0.02 0.65 0.515 0.03 0.01 2.08 0.038
Probability (centered) 0.01 0.01 0.88 0.380 0.01 0.01 0.55 0.584 0.02 0.02 1.13 0.257
Step [2] 0.98 0.02 46.71 <0.001 0.98 0.02 46.93 <0.001 0.99 0.02 44.66 <0.001
Step [3] 1.92 0.02 82.44 <0.001 1.92 0.02 83.13 <0.001 1.92 0.02 80.69 <0.001
Step [4] 2.87 0.03 103.00 <0.001 2.86 0.03 102.61 <0.001 2.88 0.03 91.06 <0.001
Step [5] 3.60 0.03 103.89 <0.001 3.57 0.03 102.40 <0.001 3.58 0.04 99.58 <0.001
Amount x Step [2] -0.01 0.02 -0.34 0.736
Amount x Step [3] 0.00 0.02 0.01 0.996
Amount x Step [4] 0.04 0.03 1.54 0.125
Amount x Step [5] 0.15 0.03 4.56 <0.001
Prob x Step [2] 0.01 0.02 0.44 0.659
Prob x Step [3] -0.02 0.02 -0.80 0.427
Prob x Step [4] 0.01 0.03 0.31 0.757
Prob x Step [5] -0.10 0.04 -2.58 0.010
Random Effects
σ2 0.03 0.03 0.03
τ00 0.02 ResponseId 0.02 ResponseId 0.02 ResponseId
τ11 0.01 ResponseId.Amt.c 0.01 ResponseId.Amt.c 0.01 ResponseId.Amt.c
0.01 ResponseId.Prob.c 0.01 ResponseId.Prob.c 0.01 ResponseId.Prob.c
ρ01 -1.00 -1.00 -1.00
-1.00 -1.00 -1.00
N 186 ResponseId 186 ResponseId 186 ResponseId
Observations 647 647 647
Marginal R2 / Conditional R2 0.975 / NA 0.976 / NA 0.975 / NA

Rank Models - Prefence 1

lm_robust models; attempting to predict final rank using the DROPT measures.

lm_robust

master_df$set1_rank.r <- 7-master_df$set1_rank

choice1 <- lm_robust(set1_rank.r ~ set1_amt.c + set1_prob.c, data = master_df, clusters = ResponseId)
# choice2 <- lm_robust(set1_rank.r ~ set1_amt + set1_prob + set1_touch_count_binary, data = master_df, clusters = ResponseId)
# choice3 <- lm_robust(set1_rank.r ~ set1_amt + set1_prob + set1_order, data = master_df, clusters = ResponseId)
# choice4 <- lm_robust(set1_rank.r~ set1_amt + set1_prob + set1_drag_distance.r, data = master_df, clusters = ResponseId)
choice5 <- lm_robust(set1_rank.r ~ set1_amt.c + set1_prob.c + set1_touch_count_binary + set1_order + set1_drag_distance.r, data = master_df, clusters = ResponseId)


tab_model(
  choice1, choice5,
  dv.labels = c(
    "Rank (reverse-coded)",
    "Rank (reverse-coded)"
  ),
  pred.labels =
            c("Intercept",
              "Amount",
              "Probability",
              "Touch Count (binary)",
              "Touch Order",
              "Drag Distance"),
  show.se = T,
  show.ci = F,
  show.stat = T
)
  Rank (reverse-coded) Rank (reverse-coded)
Predictors Estimates std. Error Statistic p Estimates std. Error Statistic p
Intercept 3.50 0.00 1104.78 <0.001 4.60 0.13 35.48 <0.001
Amount -0.42 0.07 -6.00 <0.001 -0.36 0.06 -6.01 <0.001
Probability 0.30 0.08 3.67 <0.001 0.01 0.07 0.18 0.858
Touch Count (binary) -0.19 0.09 -2.28 0.023
Touch Order -0.47 0.04 -13.25 <0.001
Drag Distance 0.41 0.03 15.45 <0.001
Observations 1164 1116
R2 / R2 adjusted 0.152 / 0.150 0.509 / 0.506

Predict 2ND Lottery Preference

explore<-master_df %>%
  mutate(
    amount_rank = case_when(
      bet_label == "Pr6_Amt1" ~ 1,
      bet_label == "Pr5_Amt2" ~ 2,
      bet_label == "Pr4_Amt3" ~ 3,
      bet_label == "Pr3_Amt4" ~ 4,
      bet_label == "Pr2_Amt5" ~ 5,
      bet_label == "Pr1_Amt6" ~ 6
    )
  )%>%
  group_by(ResponseId) %>%
  summarise(
    Tau.amt_set1 = cor(set1_rank, amount_rank, method = "kendall"),
    Rho.amt_set1 = cor(set1_rank, amount_rank, method = "spearman"),
    .groups = "drop"
  )
# 
# 
# ggplot(explore, aes(x = Tau.amt_set1)) +
#   geom_histogram(binwidth = 0.1, fill = "#3182bd", color = "white") +
#   labs(title = "Kendall's Tau (Amount vs Set1 Rank)",
#        x = "Kendall's Tau", y = "Count") +
#   theme_minimal()
# 
# ggplot(explore, aes(x = Rho.amt_set1)) +
#   geom_histogram(binwidth = 0.1, fill = "#31a354", color = "white") +
#   labs(title = "Spearman's Rho (Amount vs Set1 Rank)",
#        x = "Spearman's Rho", y = "Count") +
#   theme_minimal()
  • A quick and dirty descriptive..
Ranking_per_Step_Pr1_Amt6 <- Distance_Prefer2
Ranking_per_Step_Pr1_Amt6 <- Ranking_per_Step_Pr1_Amt6 %>%
  left_join(explore %>% select(ResponseId, Tau.amt_set1), by = "ResponseId")

## Create new R columns
# Initialize R columns
Ranking_per_Step_Pr1_Amt6 <- Ranking_per_Step_Pr1_Amt6 %>% 
  mutate(Pr1_Amt6 = NA, Pr2_Amt5 = NA, Pr3_Amt4 = NA, Pr4_Amt3 = NA, Pr5_Amt2 = NA, Pr6_Amt1 = NA)

# Function to fill R columns
fill_R_columns <- function(order_string, target_value) {
  order_string <- gsub("[[:space:][:cntrl:]]", "", order_string)  # Clean spaces and newlines
  order_numbers <- strsplit(order_string, ",")[[1]]
  position_of_value <- which(order_numbers == as.character(target_value))
  
  if (length(position_of_value) == 0) {
    return(NA)
  } else if (position_of_value == 1) {
    return(1)
  } else {
    return(position_of_value)
  }
}

Ranking_per_Step_Pr1_Amt6 <- Ranking_per_Step_Pr1_Amt6 %>% 
  mutate(
    Pr6_Amt1 = sapply(order, fill_R_columns, target_value = 49),
    Pr5_Amt2 = sapply(order, fill_R_columns, target_value = 50),
    Pr4_Amt3 = sapply(order, fill_R_columns, target_value = 64),
    Pr3_Amt4 = sapply(order, fill_R_columns, target_value = 65),
    Pr2_Amt5 = sapply(order, fill_R_columns, target_value = 67),
    Pr1_Amt6 = sapply(order, fill_R_columns, target_value = 68),
  )


# Ensure at least 9 rows per ResponseId
Ranking_per_Step_Pr1_Amt6 <- Ranking_per_Step_Pr1_Amt6 %>% group_by(ResponseId) %>% 
  group_modify(~ {
    while (nrow(.x) < 10) {
      max_step_row <- .x %>% filter(step == max(step))
      .x <- bind_rows(.x, max_step_row %>% mutate(step = max(step) + 1))
    }
    return(.x)
  }) %>% ungroup()



## Summary DFs
summary_ranks_step_split_by_tau <- Ranking_per_Step_Pr1_Amt6 %>%
  filter(step < 10) %>%
  group_by(step, Tau.amt_set1) %>%
  summarise(
    mean_Pr6_Amt1 = mean(Pr6_Amt1, na.rm = TRUE),
    se_Pr6_Amt1 = sd(Pr6_Amt1, na.rm = TRUE) / sqrt(n()),
    
    mean_Pr5_Amt2 = mean(Pr5_Amt2, na.rm = TRUE),
    se_Pr5_Amt2 = sd(Pr5_Amt2, na.rm = TRUE) / sqrt(n()),
    
    mean_Pr4_Amt3 = mean(Pr4_Amt3, na.rm = TRUE),
    se_Pr4_Amt3 = sd(Pr4_Amt3, na.rm = TRUE) / sqrt(n()),
    
    mean_Pr3_Amt4 = mean(Pr3_Amt4, na.rm = TRUE),
    se_Pr3_Amt4 = sd(Pr3_Amt4, na.rm = TRUE) / sqrt(n()),
    
    mean_Pr2_Amt5 = mean(Pr2_Amt5, na.rm = TRUE),
    se_Pr2_Amt5 = sd(Pr2_Amt5, na.rm = TRUE) / sqrt(n()),
    
    mean_Pr1_Amt6 = mean(Pr1_Amt6, na.rm = TRUE),
    se_Pr1_Amt6 = sd(Pr1_Amt6, na.rm = TRUE) / sqrt(n()),
    
    .groups = "drop"
  )

## Long format
summary_ranks_step_split_by_tau_long <- summary_ranks_step_split_by_tau %>%
  pivot_longer(
    cols = -c(step, Tau.amt_set1),
    names_to = c(".value", "label"),
    names_pattern = "^(mean|se)_(.*)$"
  ) %>%
  rename(
    mean_rank = mean,
    mean_se = se
  )

## Add N and reorder tau levels
tau_counts <- Ranking_per_Step_Pr1_Amt6 %>%
  distinct(ResponseId, Tau.amt_set1) %>%
  count(Tau.amt_set1, name = "N")

summary_ranks_step_split_by_tau_long <- summary_ranks_step_split_by_tau_long %>%
  left_join(tau_counts, by = "Tau.amt_set1") %>%
  mutate(
    Tau.amt_set1 = factor(Tau.amt_set1, levels = sort(unique(Tau.amt_set1), decreasing = TRUE)),
    facet_label = paste0("τ = ", Tau.amt_set1, " (N = ", N, ")"),
    facet_label = factor(facet_label, levels = rev(unique(facet_label)))  # ✅ use rev()
  )

## Define shapes (you can define `bet_colors` separately)
bet_shapes <- c(
  "Pr6_Amt1" = 21, "Pr5_Amt2" = 22, "Pr4_Amt3" = 23,
  "Pr3_Amt4" = 24, "Pr2_Amt5" = 25, "Pr1_Amt6" = 11
)

## Plot
ggplot(summary_ranks_step_split_by_tau_long,
       aes(x = step, y = mean_rank)) +
  geom_line(aes(color = label), size = 1) +
  geom_point(aes(fill = label, shape = label), size = 3) +
  geom_errorbar(aes(ymin = mean_rank - mean_se, ymax = mean_rank + mean_se),
                width = 0.2, color = "black") +
  facet_wrap(~ facet_label, labeller = label_wrap_gen(width = 20)) +
  scale_color_manual(values = bet_colors) +
  scale_fill_manual(values = bet_colors) +
  scale_shape_manual(values = bet_shapes) +
  scale_x_continuous(breaks = 0:9) +
  scale_y_continuous(limits = c(1, 6), breaks = 1:6) +
  labs(
    x = "Ranking Step",
    y = "Mean Rank",
    title = "Mean Rank Trajectories by Kendall's Tau Group (N per Panel)",
    color = "Label", shape = "Label", fill = "Label"
  ) +
  theme_minimal() +
  theme(
    legend.position = "bottom",
    plot.title = element_text(hjust = 0.5)
  )+
  scale_y_reverse(breaks = 1:6)

LMER

Distance

  • Standardization: Both probability and amount are standardized before being entered into the regression
  • Remove an item to address singularity warnings:
    • Dropping the item with the greatest variance (lottery Pr6_Amt1; SD = 2.17) still resulted in a singularity issue in the model (reflected by the NA conditional R2 in Model output.
    • The variance of participants’ mean ranks did increased from 0 to 0.43 after dropping this item.
    • I also experimented with artificially increasing per-respondent variance (see more below)
    • The following models retain all items.
  • Model Specifications:
    • DV: Reverse coded so that 6 indicates the highest rank and 1 the lowest.
    • M1 includes random effects for both probability and amount
    • M2 adds a fixed effect for distance
    • M3 adds both a fixed effect and a random effect for distance.
      • I included M2 for comparison with M3, as we might not expect substantial between-respondent variance in the slope for distance?
      • Indeed we see very little variation in random slope below
  • Other:
    • NA DROPT Data Xuwen to meet with Arian to review the code , as some unexpected missing ranks/orders were observed, involving 12 participants (6%).These participants were dropped from analysis.
    • Control for Item Random Intercept?
master_df<-master_df%>% # 194 when full
  mutate(set1_prob.z=scale(set1_prob),
         set1_amt.z=scale(set1_amt),
         set2_prob.z=scale(set2_prob),
         set2_amt.z=scale(set2_amt))
master_df <- master_df %>% #182
  group_by(ResponseId) %>%
  filter(!any(is.na(set1_drag_distance.r|set2_drag_distance.r))) %>% 
  ungroup()
 
# need to look into the dataset and removes any NAs or odd values or ites with missing stuff

# master_df%>%
#   group_by(bet_label)%>%
#   summarize(set1.sd=sd(set1_rank))

# master_df.test<-master_df%>%
#   filter(bet_label!="Pr6_Amt1")
# 
# master_df.test%>%
#   group_by(ResponseId)%>%
#   summarize(mean_set1=mean(set1_rank))%>%
#   summarize(sd(mean_set1))


set1.distance_M1 <- lmer(
  set1_rank.r ~ set1_prob.z + set1_amt.z+
    (set1_prob.z + set1_amt.z | ResponseId),
  data = master_df
)
set1.distance_M2 <- lmer(
  set1_rank.r ~ set1_prob.z + set1_amt.z+ set1_drag_distance.r+
    (set1_prob.z + set1_amt.z | ResponseId),
  data = master_df
)
set1.distance_M3 <- lmer(
  set1_rank.r ~ set1_prob.z + set1_amt.z+ set1_drag_distance.r+
    (set1_prob.z + set1_amt.z + set1_drag_distance.r | ResponseId),
  data = master_df
)


tab_model(
  set1.distance_M1,
  set1.distance_M2,
  set1.distance_M3,
  dv.labels  = c("RE: Prob & Amt", "Adding Distance FE", "Adding Distance RE"),
  pred.labels = c("(Intercept)", "Prob (z)", "Amt (z)", "Drag distance"),
  show.re.var = TRUE,   # show random-effects variances
  show.icc    = TRUE,   # show ICC
  digits = 2
)
  RE: Prob & Amt Adding Distance FE Adding Distance RE
Predictors Estimates CI p Estimates CI p Estimates CI p
(Intercept) 3.50 3.45 – 3.55 <0.001 3.18 3.10 – 3.25 <0.001 3.17 3.09 – 3.25 <0.001
Prob (z) 0.23 0.07 – 0.39 0.005 0.18 0.04 – 0.32 0.011 0.24 0.11 – 0.38 0.001
Amt (z) -0.47 -0.62 – -0.33 <0.001 -0.41 -0.54 – -0.28 <0.001 -0.45 -0.58 – -0.33 <0.001
Drag distance 0.27 0.23 – 0.31 <0.001 0.27 0.22 – 0.32 <0.001
Random Effects
σ2 0.77 0.72 0.71
τ00 0.00 ResponseId 0.00 ResponseId 0.04 ResponseId
τ11 0.95 ResponseId.set1_prob.z 0.67 ResponseId.set1_prob.z 0.65 ResponseId.set1_prob.z
0.72 ResponseId.set1_amt.z 0.51 ResponseId.set1_amt.z 0.48 ResponseId.set1_amt.z
    0.03 ResponseId.set1_drag_distance.r
ρ01 -0.29   0.72
0.99   -0.61
    -0.99
N 182 ResponseId 182 ResponseId 182 ResponseId
Observations 1092 1092 1092
Marginal R2 / Conditional R2 0.360 / NA 0.457 / NA 0.510 / NA
  • Addressing Singularity Experiment

  • I added rnorm(0, 0.5) to each participant to artificially create a random intercept and test whether this would resolve the singularity modeling issue.

    • This approach resolved the issue for the M1-M2, but for the distance term, random slope variation still appears to be lacking.
    • This suggests that, at least for M1-M2, A possible workaround is to artificially add small, participant-specific constants to the ranks; this should not change beta estimates but would allow the model to run?
# Add a model below just to showcase the adding a small value idea
master_df.examine <- master_df %>%
  group_by(ResponseId) %>%
  mutate(set1_rank.r = set1_rank.r + rnorm(1, mean = 0, sd = 0.5)) %>%
  ungroup()

# master_df.examine%>%
#   group_by(ResponseId)%>%
#   summarize(mean_set1=mean(set1_rank.r))%>%
#   summarize(sd(mean_set1))

set1.distance_M1.examine <- lmer(
  set1_rank.r ~ set1_prob.z + set1_amt.z+
    (set1_prob.z + set1_amt.z | ResponseId),
  data = master_df.examine
)
set1.distance_M2.examine <- lmer(
  set1_rank.r ~ set1_prob.z + set1_amt.z+ set1_drag_distance.r+
    (set1_prob.z + set1_amt.z | ResponseId),
  data = master_df.examine
)
set1.distance_M3.examine <- lmer(
  set1_rank.r ~ set1_prob.z + set1_amt.z+ set1_drag_distance.r+
    (set1_prob.z + set1_amt.z + set1_drag_distance.r | ResponseId),
  data = master_df.examine
)


tab_model(
  set1.distance_M1.examine,
  set1.distance_M2.examine,
  set1.distance_M3.examine,
  dv.labels  = c("RE: Prob & Amt", "Adding Distance FE", "Adding Distance RE"),
  pred.labels = c("(Intercept)", "Prob (z)", "Amt (z)", "Drag distance"),
  show.re.var = TRUE,   # show random-effects variances
  show.icc    = TRUE,   # show ICC
  digits = 2
)
  RE: Prob & Amt Adding Distance FE Adding Distance RE
Predictors Estimates CI p Estimates CI p Estimates CI p
(Intercept) 3.48 3.41 – 3.55 <0.001 3.10 3.00 – 3.20 <0.001 3.08 2.97 – 3.19 <0.001
Prob (z) 0.24 0.08 – 0.40 0.004 0.18 0.04 – 0.32 0.011 0.26 0.13 – 0.40 <0.001
Amt (z) -0.47 -0.61 – -0.32 <0.001 -0.40 -0.53 – -0.27 <0.001 -0.45 -0.58 – -0.33 <0.001
Drag distance 0.32 0.27 – 0.37 <0.001 0.32 0.26 – 0.38 <0.001
Random Effects
σ2 0.99 0.89 0.86
τ00 0.06 ResponseId 0.13 ResponseId 0.24 ResponseId
τ11 0.89 ResponseId.set1_prob.z 0.59 ResponseId.set1_prob.z 0.56 ResponseId.set1_prob.z
0.66 ResponseId.set1_amt.z 0.44 ResponseId.set1_amt.z 0.41 ResponseId.set1_amt.z
    0.05 ResponseId.set1_drag_distance.r
ρ01 -0.17 -0.11 0.34
0.05 0.01 -0.33
    -0.69
ICC 0.65 0.59  
N 182 ResponseId 182 ResponseId 182 ResponseId
Observations 1092 1092 1092
Marginal R2 / Conditional R2 0.134 / 0.694 0.237 / 0.689 0.499 / NA

Visualize Random Intercepts/Slopes

get_participant_effects_lmer <- function(model, id_var = "ResponseId") {
  # Random effects matrix -> data.frame, keep names like (Intercept)
  re_mat <- ranef(model)[[id_var]]
  re_df  <- data.frame(re_mat, check.names = FALSE) # check.name =F keep all the variable names intact
  re_df[[id_var]] <- rownames(re_mat)

  # Fixed effects (named vector)
  fe <- fixef(model)

  # Union of all coefficient names (fixed + random)
  all_terms <- union(names(fe), colnames(re_mat))

  # Ensure every term exists; add fixed beta (default 0) + random deviation (default 0)
  for (nm in all_terms) {
    if (!nm %in% names(re_df)) re_df[[nm]] <- 0     
    if (nm %in% names(fe))     re_df[[nm]] <- re_df[[nm]] + unname(fe[[nm]]) # ensure that all variables are there
  }

  re_df
}

re_M1 <- get_participant_effects_lmer(set1.distance_M1)
re_M2 <- get_participant_effects_lmer(set1.distance_M2)
re_M3 <- get_participant_effects_lmer(set1.distance_M3)


# Add a model label
re_M1$model <- "M1 - RE: Prob & Amt"
re_M2$model <- "M2 - Adding Distance FE"
re_M3$model <- "M3 - Adding Distance RE"

# Combine
re_all <- bind_rows(re_M1, re_M2, re_M3)

re_all_long <- re_all %>%
  pivot_longer(
    cols = -c(ResponseId, model),
    names_to = "parameter",
    values_to = "coefficient"
  )

re_all_long <- re_all_long %>%
  mutate(
    parameter = recode(parameter,
      `(Intercept)`          = "(Intercept)",
      `set1_prob.z`          = "Prob (z)",
      `set1_amt.z`           = "Amt (z)",
      `set1_drag_distance.r` = "Drag distance"
    ),
    parameter = factor(parameter,
      levels = c("(Intercept)", "Prob (z)", "Amt (z)", "Drag distance")
    ),
    model = factor(model, levels = c("M1 - RE: Prob & Amt","M2 - Adding Distance FE","M3 - Adding Distance RE"))
  )

# Summary stats for markers
summary_stats <- re_all_long %>%
  group_by(model, parameter) %>%
  summarise(
    mean_val   = mean(coefficient, na.rm = TRUE),
    median_val = median(coefficient, na.rm = TRUE),
    .groups = "drop"
  )

# Color map: Intercept grey, slopes steelblue
param_colors <- c(
  "(Intercept)" = "grey60",
  "Prob (z)"    = "steelblue",
  "Amt (z)"     = "steelblue",
  "Drag distance" = "steelblue"
)

ggplot(re_all_long, aes(x = coefficient, fill = parameter)) +
  geom_histogram(bins = 30, alpha = 0.85, color = "white") +
  facet_grid(parameter ~ model, scales = "free") +   # allow y to vary
  scale_fill_manual(values = param_colors, guide = "none") +
  theme_minimal() +
  labs(
    title = "Participant-specific coefficients",
    x = "Coefficient value",
    y = "Count"
  )+
  theme(
 panel.border = element_rect(color = "black", fill = NA, linewidth = 0.5),
    strip.text = element_text(face = "bold",size=12)  # bold facet labels
  ) 

Predict 2ND Lotter Ranking

  • For each participant, compute predicted scores for set2 items:

\[ \begin{aligned} \widehat{s}_{ij} \;=&\; (\beta_0 + b_{0i})_{\text{set1}} \\ &+ (\beta_{prob} + b_{pi})_{\text{set1}} \cdot \text{prob.z}_{ij,\text{set2}} \\ &+ (\beta_{amt} + b_{ai})_{\text{set1}} \cdot \text{amt.z}_{ij,\text{set2}} \\ &+ (\beta_{drag} + b_{di})_{\text{set1}} \cdot \text{drag_distance.r}_{ij,\text{set2}} \end{aligned} \]

  • We then convert the predicted scores to ranks (aligned in direction with the observed set 2 ranks), since the predictions are continuous values rather than integers (manually checked, good range 0-7). Tied items are assigned their average rank The code checks and produces a message when ties are observed.
predict_set2_by_id.Distance <- function(model, data, label) {
  # Random effects (per ID)
  re_mat <- ranef(model)$ResponseId
  re_df  <- as.data.frame(re_mat)
  re_df$ResponseId <- rownames(re_mat)

  # Rename things to b_intercept/prob/ etc.
  if ("(Intercept)"           %in% colnames(re_mat)) re_df <- dplyr::rename(re_df, b_intercept = `(Intercept)`)
  if ("set1_prob.z"           %in% colnames(re_mat)) re_df <- dplyr::rename(re_df, b_prob      = `set1_prob.z`)
  if ("set1_amt.z"            %in% colnames(re_mat)) re_df <- dplyr::rename(re_df, b_amt       = `set1_amt.z`)
  if ("set1_drag_distance.r"  %in% colnames(re_mat)) re_df <- dplyr::rename(re_df, b_drag      = `set1_drag_distance.r`)

  # Ensure all needed columns exist (fill missing with 0)
  for (nm in c("b_intercept","b_prob","b_amt","b_drag")) {
    if (!nm %in% names(re_df)) re_df[[nm]] <- 0
  }

  # Fixed effects (missing terms default to 0)
  fe <- fixef(model)
  fe_int  <- if ("(Intercept)"          %in% names(fe)) unname(fe["(Intercept)"])          else 0
  fe_prob <- if ("set1_prob.z"          %in% names(fe)) unname(fe["set1_prob.z"])          else 0
  fe_amt  <- if ("set1_amt.z"           %in% names(fe)) unname(fe["set1_amt.z"])           else 0
  fe_drag <- if ("set1_drag_distance.r" %in% names(fe)) unname(fe["set1_drag_distance.r"]) else 0

df_pred <- data %>%
    left_join(re_df, by = "ResponseId") %>%
    mutate(
      set2_rank_predicted =
        (fe_int + b_intercept) +
        (fe_prob + b_prob) * set2_prob.z +
        (fe_amt  + b_amt ) * set2_amt.z +
        (fe_drag + b_drag) * set2_drag_distance.r
    ) %>%
    group_by(ResponseId) %>%
    # Remove NAs within participant before tie check & ranking
    mutate(set2_rank_predicted = ifelse(is.finite(set2_rank_predicted), set2_rank_predicted, NA_real_)) %>%
    # Tie flag, then average rank for ties
    mutate(
      has_tie = n_distinct(set2_rank_predicted) < n(),
      set2_rank_pred_rank = rank(-set2_rank_predicted, ties.method = "average")
    ) %>%
    ungroup()

  # Message about ties
  tie_summary <- df_pred %>%
    filter(!is.na(set2_rank_predicted), !is.na(set2_rank)) %>%
    distinct(ResponseId, has_tie) %>%
    summarise(n_ids = n(), n_ties = sum(has_tie), .groups = "drop")
  cat(sprintf("Tie handling: %d of %d participants had tied predicted scores; average ranks were used.",
                  tie_summary$n_ties, tie_summary$n_ids))

  # Per-participant correlations
  df_pred %>%
    group_by(ResponseId) %>%
    summarise(
      r_pearson   = (cor(set2_rank_pred_rank, set2_rank, method = "pearson")),
      tau_kendall = (cor(set2_rank_pred_rank, set2_rank, method = "kendall")),
      .groups = "drop"
    ) %>%
    mutate(model = label)
}

# Run for all three models
cor_M1 <- predict_set2_by_id.Distance(set1.distance_M1, master_df, "RE: Prob & Amt")
## Tie handling: 0 of 182 participants had tied predicted scores; average ranks were used.
cor_M2 <- predict_set2_by_id.Distance(set1.distance_M2, master_df, "Adding Distance FE")
## Tie handling: 0 of 182 participants had tied predicted scores; average ranks were used.
cor_M3 <- predict_set2_by_id.Distance(set1.distance_M3, master_df, "Adding Distance RE")
## Tie handling: 0 of 182 participants had tied predicted scores; average ranks were used.

Performances

  • Accuracy Calculation
    • For each participant, we compare the predicted rank with the true rank using both Pearson’s r and Kendall’s τ. This yields 182 data points per model, shown in the plot below.
  • black line represents median and red dot mean
cor_all <- bind_rows(cor_M1, cor_M2, cor_M3) %>%
  mutate(model = factor(model, levels = c("RE: Prob & Amt","Adding Distance FE","Adding Distance RE")))

# --- quick summaries (optional)
summary_by_model <- cor_all %>%
  group_by(model) %>%
  summarise(
    n = n(),
    mean_r   = mean(r_pearson, na.rm = TRUE),
    median_r = median(r_pearson, na.rm = TRUE),
    mean_tau = mean(tau_kendall, na.rm = TRUE),
    median_tau = median(tau_kendall, na.rm = TRUE),
    .groups = "drop"
  )

make_violin <- function(df, col, title) {
  # Calculate per-model means and medians
  stats <- df %>%
    group_by(model) %>%
    summarise(
      mean_val   = mean(.data[[col]], na.rm = TRUE),
      median_val = median(.data[[col]], na.rm = TRUE),
      .groups = "drop"
    )

  # Create caption text
  cap_text <- paste0(
    "Means: ",
    paste0(stats$model, " = ", sprintf("%.2f", stats$mean_val), collapse = " | "),
    "\nMedians: ",
    paste0(stats$model, " = ", sprintf("%.2f", stats$median_val), collapse = " | ")
  )

  ggplot(df, aes(x = model, y = .data[[col]], fill = model)) +
    geom_violin(alpha = 0.4, width = 0.9, color = "black") +
    geom_jitter(width = 0.12, alpha = 0.5, size = 1.6) +
    geom_point(data = stats, aes(x = model, y = mean_val),
               inherit.aes = FALSE, color = "red", size = 3.2) +
    geom_segment(data = stats,
                 aes(x = as.numeric(model) - 0.35, xend = as.numeric(model) + 0.35,
                     y = median_val, yend = median_val),
                 inherit.aes = FALSE, color = "black", linewidth = 1) +
    coord_cartesian(ylim = c(-1, 1)) +
    labs(x = NULL, y = "Correlation", title = title, caption = cap_text) +
    theme_minimal() +
  theme(
    legend.position = "none",
    plot.caption = element_text(
      size = 12,         # Increase font size
      face = "bold",     # Make bold
      margin = margin(t = 6)
    )
  )
}


p_r.Distance_lmer   <- make_violin(cor_all, "r_pearson",   "Per-participant Pearson r")
p_tau.Distance_lmer <- make_violin(cor_all, "tau_kendall", "Per-participant Kendall \u03C4")

p_r.Distance_lmer + p_tau.Distance_lmer + plot_layout(ncol = 2)

  • Significance Testing
    • We then test whether M2 (distance FE only) or M3 (adding distance RE) improves the correlation distribution compared to M1 (prob & amt RE only).
    • considered a Fisher transformation for r and Kendall’s τ , but many values equal 1, producing undefined (infinite) results.
    • Instead, for robustness, we applied three complementary tests: (1) paired t-test, (2) paired Wilcoxon signed-rank test, and (3) a permutation test (N = 10,000).
perm_paired_mean <- function(x, y, n_perm = 10000, seed = 123) {
  diffs <- y - x
  if (length(diffs) < 1) return(NA_real_)
  set.seed(seed)
  obs <- mean(diffs)
  # Fast vectorized sign flips
  signs <- matrix(sample(c(1,-1), length(diffs) * n_perm, replace = TRUE), nrow = n_perm)
  perm_means <- rowMeans(signs * rep(diffs, each = n_perm))
  mean(abs(perm_means) >= abs(obs))
}


# Compare M2 and M3 against M1 using a single metric column (by name)
compare_models_simple.Distance <- function(df, value_col,
                                  m1 = "RE: Prob & Amt",
                                  m2 = "Adding Distance FE",
                                  m3 = "Adding Distance RE",
                                  n_perm = 10000) {

  # Build wide table with explicit column selection (no NSE)
  wide <- df %>%
    select(ResponseId, model, value = all_of(value_col)) %>%
    pivot_wider(names_from = model, values_from = value)

  # Helper to run the three paired tests for a pair of columns
  run_tests <- function(a_name, b_name, label) {
    x <- wide[[a_name]]
    y <- wide[[b_name]]
    diffs <- y - x  # positive => b_name improves over a_name

    cat(sprintf("%s: pairs used = %d\n", label, length(diffs)))

    # Paired t-test
    t_res <- try(t.test(y, x, paired = TRUE), silent = TRUE)
    if (inherits(t_res, "try-error")) {
      t_p <- NA_real_; t_stat <- NA_real_; t_df <- NA_real_
    } else {
      t_p <- unname(t_res$p.value)
      t_stat <- unname(t_res$statistic)
      t_df <- unname(t_res$parameter)
    }

    # Wilcoxon signed-rank (paired)
    w_res <- try(wilcox.test(y, x, paired = TRUE, exact = FALSE), silent = TRUE)
    if (inherits(w_res, "try-error")) {
      w_p <- NA_real_; w_V <- NA_real_
    } else {
      w_p <- unname(w_res$p.value)
      w_V <- unname(w_res$statistic)
    }

    # Permutation (paired sign-flip) on mean diff
    p_perm <- perm_paired_mean(x, y, n_perm = n_perm)

    data.frame(
      Comparison    = label,
      N_pairs       = length(diffs),
      Mean_Diff     = if (length(diffs)) mean(diffs) else NA_real_,
      Median_Diff   = if (length(diffs)) median(diffs) else NA_real_,
      Prop_Better = if (length(diffs)) mean(diffs > 0) else NA_real_,
      Prop_Worse = if (length(diffs)) mean(diffs < 0) else NA_real_,
      Prop_Tie = if (length(diffs)) mean(diffs == 0) else NA_real_,
      t_test_p      = t_p,
      t_statistic   = t_stat,
      t_df          = t_df,
      wilcoxon_p    = w_p,
      wilcoxon_V    = w_V,
      permutation_p = p_perm,
      check.names   = FALSE
    )
  }

  rbind(
    run_tests(m1, m2, "Adding FE (M2 − M1)"),
    run_tests(m1, m3, "Adding RE (M3 − M1)")
  ) %>%
    mutate(
      Mean_Diff     = round(Mean_Diff, 3),
      Median_Diff   = round(Median_Diff, 3),
      Prop_Better = round(Prop_Better, 3),
      Prop_Worse = round(Prop_Worse, 3),
      Prop_Tie = round(Prop_Tie, 3),
      t_test_p      = round(t_test_p, 4),
      t_statistic   = round(t_statistic, 3),
      t_df          = round(t_df),
      wilcoxon_p    = round(wilcoxon_p, 4),
      permutation_p = round(permutation_p, 4)
    )
}

# ---- Build the two tables (Pearson r and Kendall tau) ----
# Expect cor_all to have: ResponseId, model, r_pearson, tau_kendall
table_r.distance_lmer   <- compare_models_simple.Distance(cor_all, "r_pearson")
## Adding FE (M2 − M1): pairs used = 182
## Adding RE (M3 − M1): pairs used = 182
table_tau.distance_lmer <- compare_models_simple.Distance(cor_all, "tau_kendall")
## Adding FE (M2 − M1): pairs used = 182
## Adding RE (M3 − M1): pairs used = 182
library(kableExtra)

kable(table_r.distance_lmer,
     caption = "<b><span style='font-size: 18px;color:black;'>Model comparison using Pearson r (Distance)</span></b>",
     digits = 4, align = "lrrrrrrrrrr", escape = FALSE) %>%
  kable_styling(full_width = FALSE)
Model comparison using Pearson r (Distance)
Comparison N_pairs Mean_Diff Median_Diff Prop_Better Prop_Worse Prop_Tie t_test_p t_statistic t_df wilcoxon_p wilcoxon_V permutation_p
Adding FE (M2 − M1) 182 0.062 0 0.286 0.192 0.522 0 4.378 181 2e-04 2800.5 0
Adding RE (M3 − M1) 182 0.076 0 0.253 0.170 0.577 0 4.248 181 9e-04 2155.0 0
kable(table_tau.distance_lmer,
     caption = "<b><span style='font-size: 18px;color:black;'>Model comparison using Kendall &tau; (Distance)</span></b>",
     digits = 4, align = "lrrrrrrrrrr", escape = FALSE) %>%
  kable_styling(full_width = FALSE)
Model comparison using Kendall τ (Distance)
Comparison N_pairs Mean_Diff Median_Diff Prop_Better Prop_Worse Prop_Tie t_test_p t_statistic t_df wilcoxon_p wilcoxon_V permutation_p
Adding FE (M2 − M1) 182 0.040 0 0.275 0.187 0.538 0.0013 3.276 181 0.0018 2472.5 5e-04
Adding RE (M3 − M1) 182 0.059 0 0.242 0.154 0.604 0.0004 3.616 181 0.0008 1909.5 3e-04

Order

  • More random slope variation?
set1.Order_M1 <- lmer(
  set1_rank.r ~ set1_prob.z + set1_amt.z+
    (set1_prob.z + set1_amt.z | ResponseId),
  data = master_df
)

set1.Order_M2 <- lmer(
  set1_rank.r ~ set1_prob.z + set1_amt.z+ set1_order+
    (set1_prob.z + set1_amt.z | ResponseId),
  data = master_df
)
set1.Order_M3 <- lmer(
  set1_rank.r ~ set1_prob.z + set1_amt.z+ set1_order+
    (set1_prob.z + set1_amt.z + set1_order | ResponseId),
  data = master_df
)


tab_model(
  set1.Order_M1,
  set1.Order_M2,
  set1.Order_M3,
  dv.labels  = c("RE: Prob & Amt", "Adding order FE", "Adding order RE"),
  pred.labels = c("(Intercept)", "Prob (z)", "Amt (z)", "Drag order"),
  show.re.var = TRUE,   # show random-effects variances
  show.icc    = TRUE,   # show ICC
  digits = 2
)
  RE: Prob & Amt Adding order FE Adding order RE
Predictors Estimates CI p Estimates CI p Estimates CI p
(Intercept) 3.50 3.45 – 3.55 <0.001 4.61 4.45 – 4.77 <0.001 4.57 4.36 – 4.78 <0.001
Prob (z) 0.23 0.07 – 0.39 0.005 0.11 -0.03 – 0.25 0.113 0.21 0.07 – 0.35 0.004
Amt (z) -0.47 -0.62 – -0.33 <0.001 -0.46 -0.60 – -0.33 <0.001 -0.48 -0.61 – -0.36 <0.001
Drag order -0.35 -0.40 – -0.31 <0.001 -0.34 -0.40 – -0.27 <0.001
Random Effects
σ2 0.77 0.70 0.59
τ00 0.00 ResponseId 0.00 ResponseId 0.92 ResponseId
τ11 0.95 ResponseId.set1_prob.z 0.61 ResponseId.set1_prob.z 0.67 ResponseId.set1_prob.z
0.72 ResponseId.set1_amt.z 0.54 ResponseId.set1_amt.z 0.47 ResponseId.set1_amt.z
    0.09 ResponseId.set1_order
ρ01 -0.29   -0.64
0.99   0.32
    -1.00
N 182 ResponseId 182 ResponseId 182 ResponseId
Observations 1092 1092 1092
Marginal R2 / Conditional R2 0.360 / NA 0.495 / NA 0.573 / NA
  • Addressing Singularity Experiment

  • add rnorm (0,0.5) to (participant-specific) and all singularity issue resolved

# Add a model below just to showcase the adding a small value idea

set1.order_M1.examine <- lmer(
  set1_rank.r ~ set1_prob.z + set1_amt.z+
    (set1_prob.z + set1_amt.z | ResponseId),
  data = master_df.examine
)
set1.order_M2.examine <- lmer(
  set1_rank.r ~ set1_prob.z + set1_amt.z+ set1_order+
    (set1_prob.z + set1_amt.z | ResponseId),
  data = master_df.examine
)
set1.order_M3.examine <- lmer(
  set1_rank.r ~ set1_prob.z + set1_amt.z+ set1_order+
    (set1_prob.z + set1_amt.z + set1_order | ResponseId),
  data = master_df.examine
)


tab_model(
  set1.order_M1.examine,
  set1.order_M2.examine,
  set1.order_M3.examine,
  dv.labels  = c("RE: Prob & Amt", "Adding Order FE", "Adding Order RE"),
  pred.labels = c("(Intercept)", "Prob (z)", "Amt (z)", "Drag Order"),
  show.re.var = TRUE,   # show random-effects variances
  show.icc    = TRUE,   # show ICC
  digits = 2
)
  RE: Prob & Amt Adding Order FE Adding Order RE
Predictors Estimates CI p Estimates CI p Estimates CI p
(Intercept) 3.48 3.41 – 3.55 <0.001 4.72 4.53 – 4.90 <0.001 4.66 4.43 – 4.90 <0.001
Prob (z) 0.24 0.08 – 0.40 0.004 0.10 -0.03 – 0.24 0.140 0.21 0.07 – 0.35 0.003
Amt (z) -0.47 -0.61 – -0.32 <0.001 -0.46 -0.59 – -0.33 <0.001 -0.49 -0.61 – -0.36 <0.001
Drag Order -0.39 -0.45 – -0.34 <0.001 -0.38 -0.45 – -0.31 <0.001
Random Effects
σ2 0.99 0.87 0.76
τ00 0.06 ResponseId 0.12 ResponseId 1.10 ResponseId
τ11 0.89 ResponseId.set1_prob.z 0.53 ResponseId.set1_prob.z 0.60 ResponseId.set1_prob.z
0.66 ResponseId.set1_amt.z 0.49 ResponseId.set1_amt.z 0.43 ResponseId.set1_amt.z
    0.10 ResponseId.set1_order
ρ01 -0.17 -0.05 -0.71
0.05 0.04 0.43
    -0.94
ICC 0.65 0.59 0.63
N 182 ResponseId 182 ResponseId 182 ResponseId
Observations 1092 1092 1092
Marginal R2 / Conditional R2 0.134 / 0.694 0.260 / 0.697 0.305 / 0.742

Visualize Random Intercepts/Slopes

get_participant_effects_lmer.Order <- function(model, id_var = "ResponseId") {
  # Random effects matrix -> data.frame, keep names like (Intercept)
  re_mat <- ranef(model)[[id_var]]
  re_df  <- data.frame(re_mat, check.names = FALSE)
  re_df[[id_var]] <- rownames(re_mat)

  # Fixed effects (named vector)
  fe <- fixef(model)

  # Union of all coefficient names (fixed + random)
  all_terms <- union(names(fe), colnames(re_mat))

  # Ensure every term exists; add fixed beta (default 0) + random deviation (default 0)
  for (nm in all_terms) {
    if (!nm %in% names(re_df)) re_df[[nm]] <- 0      # add missing column
    if (nm %in% names(fe))     re_df[[nm]] <- re_df[[nm]] + unname(fe[[nm]])
  }
  
  re_df # output
}

re_M1 <- get_participant_effects_lmer(set1.Order_M1)
re_M2 <- get_participant_effects_lmer(set1.Order_M2)
re_M3 <- get_participant_effects_lmer(set1.Order_M3)


# Add a model label
re_M1$model <- "M1 - RE: Prob & Amt"
re_M2$model <- "M2 - Adding Order FE"
re_M3$model <- "M3 - Adding Order RE"

# Combine
re_all <- bind_rows(re_M1, re_M2, re_M3)

re_all_long <- re_all %>%
  pivot_longer(
    cols = -c(ResponseId, model),
    names_to = "parameter",
    values_to = "coefficient"
  )

re_all_long <- re_all_long %>%
  mutate(
    parameter = recode(parameter,
      `(Intercept)`          = "(Intercept)",
      `set1_prob.z`          = "Prob (z)",
      `set1_amt.z`           = "Amt (z)",
      `set1_order` = "Drag Order"
    ),
    parameter = factor(parameter,
      levels = c("(Intercept)", "Prob (z)", "Amt (z)", "Drag Order")
    ),
    model = factor(model, levels = c("M1 - RE: Prob & Amt","M2 - Adding Order FE","M3 - Adding Order RE"))
  )

# Summary stats for markers
summary_stats <- re_all_long %>%
  group_by(model, parameter) %>%
  summarise(
    mean_val   = mean(coefficient, na.rm = TRUE),
    median_val = median(coefficient, na.rm = TRUE),
    .groups = "drop"
  )

# Color map: Intercept grey, slopes steelblue
param_colors <- c(
  "(Intercept)" = "grey60",
  "Prob (z)"    = "steelblue",
  "Amt (z)"     = "steelblue",
  "Drag Order" = "steelblue"
)

ggplot(re_all_long, aes(x = coefficient, fill = parameter)) +
  geom_histogram(bins = 30, alpha = 0.85, color = "white") +
  facet_grid(parameter ~ model, scales = "free") +   # allow y to vary
  scale_fill_manual(values = param_colors, guide = "none") +
  theme_minimal() +
  labs(
    title = "Participant-specific coefficients",
    x = "Coefficient value",
    y = "Count"
  )+
  theme(
 panel.border = element_rect(color = "black", fill = NA, linewidth = 0.5),
    strip.text = element_text(face = "bold",size=12)  # bold facet labels
  ) 

Predict 2ND Lotter Ranking

predict_set2_by_id.Order <- function(model, data, label) {
  # Random effects (per ID)
  re_mat <- ranef(model)$ResponseId
  re_df  <- as.data.frame(re_mat)
  re_df$ResponseId <- rownames(re_mat)

  # Rename to consistent names if present
  if ("(Intercept)"           %in% colnames(re_mat)) re_df <- dplyr::rename(re_df, b_intercept = `(Intercept)`)
  if ("set1_prob.z"           %in% colnames(re_mat)) re_df <- dplyr::rename(re_df, b_prob      = `set1_prob.z`)
  if ("set1_amt.z"            %in% colnames(re_mat)) re_df <- dplyr::rename(re_df, b_amt       = `set1_amt.z`)
  if ("set1_order"  %in% colnames(re_mat)) re_df <- dplyr::rename(re_df, b_drag      = `set1_order`)

  # Ensure all needed columns exist (fill missing with 0)
  for (nm in c("b_intercept","b_prob","b_amt","b_drag")) {
    if (!nm %in% names(re_df)) re_df[[nm]] <- 0
  }

  # Fixed effects (missing terms default to 0)
  fe <- fixef(model)
  fe_int  <- if ("(Intercept)"          %in% names(fe)) unname(fe["(Intercept)"])          else 0
  fe_prob <- if ("set1_prob.z"          %in% names(fe)) unname(fe["set1_prob.z"])          else 0
  fe_amt  <- if ("set1_amt.z"           %in% names(fe)) unname(fe["set1_amt.z"])           else 0
  fe_drag <- if ("set1_order" %in% names(fe)) unname(fe["set1_order"]) else 0
df_pred <- data %>%
    left_join(re_df, by = "ResponseId") %>%
    mutate(
      set2_rank_predicted =
        (fe_int + b_intercept) +
        (fe_prob + b_prob) * set2_prob.z +
        (fe_amt  + b_amt ) * set2_amt.z +
        (fe_drag + b_drag) * set2_order
    ) %>%
    group_by(ResponseId) %>%
    # Remove NAs within participant before tie check & ranking
    mutate(set2_rank_predicted = ifelse(is.finite(set2_rank_predicted), set2_rank_predicted, NA_real_)) %>%
    # Tie flag, then average rank for ties
    mutate(
      has_tie = n_distinct(set2_rank_predicted) < n(),
      set2_rank_pred_rank = rank(-set2_rank_predicted, ties.method = "average")
    ) %>%
    ungroup()

  # Message about ties
  tie_summary <- df_pred %>%
    filter(!is.na(set2_rank_predicted), !is.na(set2_rank)) %>%
    distinct(ResponseId, has_tie) %>%
    summarise(n_ids = n(), n_ties = sum(has_tie), .groups = "drop")
  cat(sprintf("Tie handling: %d of %d participants had tied predicted scores; average ranks were used.",
                  tie_summary$n_ties, tie_summary$n_ids))

  # Per-participant correlations
  df_pred %>%
    group_by(ResponseId) %>%
    summarise(
      r_pearson   = (cor(set2_rank_pred_rank, set2_rank, method = "pearson")),
      tau_kendall = (cor(set2_rank_pred_rank, set2_rank, method = "kendall")),
      .groups = "drop"
    ) %>%
    mutate(model = label)
}

# Run for all three models
cor_M1 <- predict_set2_by_id.Order(set1.Order_M1, master_df, "RE: Prob & Amt")
## Tie handling: 0 of 182 participants had tied predicted scores; average ranks were used.
cor_M2 <- predict_set2_by_id.Order(set1.Order_M2, master_df, "Adding Order FE")
## Tie handling: 0 of 182 participants had tied predicted scores; average ranks were used.
cor_M3 <- predict_set2_by_id.Order(set1.Order_M3, master_df, "Adding Order RE")
## Tie handling: 0 of 182 participants had tied predicted scores; average ranks were used.

Performances

cor_all <- bind_rows(cor_M1, cor_M2, cor_M3) %>%
  mutate(model = factor(model, levels = c("RE: Prob & Amt","Adding Order FE","Adding Order RE")))

p_r.Order_lmer   <- make_violin(cor_all, "r_pearson",   "Per-participant Pearson r")
p_tau.Order_lmer <- make_violin(cor_all, "tau_kendall", "Per-participant Kendall \u03C4")

p_r.Order_lmer + p_tau.Order_lmer + plot_layout(ncol = 2)

  • Significance Testing
compare_models_simple.Order <- function(df, value_col,
                                  m1 = "RE: Prob & Amt",
                                  m2 = "Adding Order FE",
                                  m3 = "Adding Order RE",
                                  n_perm = 10000) {

  # Build wide table with explicit column selection (no NSE)
  wide <- df %>%
    select(ResponseId, model, value = all_of(value_col)) %>%
    pivot_wider(names_from = model, values_from = value)

  # Helper to run the three paired tests for a pair of columns
  run_tests <- function(a_name, b_name, label) {
    x <- wide[[a_name]]
    y <- wide[[b_name]]
    diffs <- y - x  # positive => b_name improves over a_name

    cat(sprintf("%s: pairs used = %d\n", label, length(diffs)))

    # Paired t-test
    t_res <- try(t.test(y, x, paired = TRUE), silent = TRUE)
    if (inherits(t_res, "try-error")) {
      t_p <- NA_real_; t_stat <- NA_real_; t_df <- NA_real_
    } else {
      t_p <- unname(t_res$p.value)
      t_stat <- unname(t_res$statistic)
      t_df <- unname(t_res$parameter)
    }

    # Wilcoxon signed-rank (paired)
    w_res <- try(wilcox.test(y, x, paired = TRUE, exact = FALSE), silent = TRUE)
    if (inherits(w_res, "try-error")) {
      w_p <- NA_real_; w_V <- NA_real_
    } else {
      w_p <- unname(w_res$p.value)
      w_V <- unname(w_res$statistic)
    }

    # Permutation (paired sign-flip) on mean diff
    p_perm <- perm_paired_mean(x, y, n_perm = n_perm)

    data.frame(
      Comparison    = label,
      N_pairs       = length(diffs),
      Mean_Diff     = if (length(diffs)) mean(diffs) else NA_real_,
      Median_Diff   = if (length(diffs)) median(diffs) else NA_real_,
      Prop_Better = if (length(diffs)) mean(diffs > 0) else NA_real_,
      Prop_Worse = if (length(diffs)) mean(diffs < 0) else NA_real_,
      Prop_Tie = if (length(diffs)) mean(diffs == 0) else NA_real_,
      t_test_p      = t_p,
      t_statistic   = t_stat,
      t_df          = t_df,
      wilcoxon_p    = w_p,
      wilcoxon_V    = w_V,
      permutation_p = p_perm,
      check.names   = FALSE
    )
  }

  rbind(
    run_tests(m1, m2, "Adding FE (M2 − M1)"),
    run_tests(m1, m3, "Adding RE (M3 − M1)")
  ) %>%
    mutate(
      Mean_Diff     = round(Mean_Diff, 3),
      Median_Diff   = round(Median_Diff, 3),
      Prop_Better = round(Prop_Better, 3),
      Prop_Worse = round(Prop_Worse, 3),
      Prop_Tie = round(Prop_Tie, 3),
      t_test_p      = round(t_test_p, 4),
      t_statistic   = round(t_statistic, 3),
      t_df          = round(t_df),
      wilcoxon_p    = round(wilcoxon_p, 4),
      permutation_p = round(permutation_p, 4)
    )
}


# ---- Build the two tables (Pearson r and Kendall tau) ----
# Expect cor_all to have: ResponseId, model, r_pearson, tau_kendall
table_r.order_lmer   <- compare_models_simple.Order(cor_all, "r_pearson")
## Adding FE (M2 − M1): pairs used = 182
## Adding RE (M3 − M1): pairs used = 182
table_tau.order_lmer <- compare_models_simple.Order(cor_all, "tau_kendall")
## Adding FE (M2 − M1): pairs used = 182
## Adding RE (M3 − M1): pairs used = 182
# Heterogenity in order but not in distance? very interesting
kable(table_r.order_lmer,
     caption = "<b><span style='font-size: 18px;color:black;'>Model comparison using Pearson r (Order)</span></b>",
     digits = 4, align = "lrrrrrrrrrr", escape = FALSE) %>%
  kable_styling(full_width = FALSE)
Model comparison using Pearson r (Order)
Comparison N_pairs Mean_Diff Median_Diff Prop_Better Prop_Worse Prop_Tie t_test_p t_statistic t_df wilcoxon_p wilcoxon_V permutation_p
Adding FE (M2 − M1) 182 0.028 0 0.253 0.280 0.467 0.0776 1.775 181 0.1437 2780.5 0.0784
Adding RE (M3 − M1) 182 0.064 0 0.236 0.192 0.571 0.0136 2.492 181 0.0191 2010.5 0.0121
kable(table_tau.order_lmer,
     caption = "<b><span style='font-size: 18px;color:black;'>Model comparison using Kendall &tau (Order);</span></b>",
     digits = 4, align = "lrrrrrrrrrr", escape = FALSE) %>%
  kable_styling(full_width = FALSE)
Model comparison using Kendall &tau (Order);
Comparison N_pairs Mean_Diff Median_Diff Prop_Better Prop_Worse Prop_Tie t_test_p t_statistic t_df wilcoxon_p wilcoxon_V permutation_p
Adding FE (M2 − M1) 182 0.015 0 0.247 0.264 0.489 0.3384 0.960 181 0.3439 2429.0 0.3139
Adding RE (M3 − M1) 182 0.053 0 0.231 0.192 0.577 0.0224 2.302 181 0.0252 1940.5 0.0210

Count

  • Binary predictor variable is used
set1.Count_M1 <- lmer(
  set1_rank.r ~ set1_prob.z + set1_amt.z+
    (set1_prob.z + set1_amt.z | ResponseId),
  data = master_df
)

set1.Count_M2 <- lmer(
  set1_rank.r ~ set1_prob.z + set1_amt.z+ set1_touch_count_binary+
    (set1_prob.z + set1_amt.z | ResponseId),
  data = master_df
)
set1.Count_M3 <- lmer(
  set1_rank.r ~ set1_prob.z + set1_amt.z+ set1_touch_count_binary+
    (set1_prob.z + set1_amt.z + set1_touch_count_binary | ResponseId),
  data = master_df
)


tab_model(
  set1.Count_M1,
  set1.Count_M2,
  set1.Count_M3,
  dv.labels  = c("RE: Prob & Amt", "Adding Count FE", "Adding Count RE"),
  pred.labels = c("(Intercept)", "Prob (z)", "Amt (z)", "Drag Count (Binary)"),
  show.re.var = TRUE,   # show random-effects variances
  show.icc    = TRUE,   # show ICC
  digits = 2
)
  RE: Prob & Amt Adding Count FE Adding Count RE
Predictors Estimates CI p Estimates CI p Estimates CI p
(Intercept) 3.50 3.45 – 3.55 <0.001 3.20 3.10 – 3.29 <0.001 3.19 3.09 – 3.29 <0.001
Prob (z) 0.23 0.07 – 0.39 0.005 0.24 0.08 – 0.39 0.002 0.26 0.11 – 0.41 0.001
Amt (z) -0.47 -0.62 – -0.33 <0.001 -0.43 -0.57 – -0.29 <0.001 -0.44 -0.58 – -0.30 <0.001
Drag Count (Binary) 0.49 0.36 – 0.62 <0.001 0.49 0.35 – 0.63 <0.001
Random Effects
σ2 0.77 0.75 0.73
τ00 0.00 ResponseId 0.00 ResponseId 0.06 ResponseId
τ11 0.95 ResponseId.set1_prob.z 0.83 ResponseId.set1_prob.z 0.81 ResponseId.set1_prob.z
0.72 ResponseId.set1_amt.z 0.63 ResponseId.set1_amt.z 0.64 ResponseId.set1_amt.z
    0.16 ResponseId.set1_touch_count_binary
ρ01 -0.29   0.38
0.99   -0.21
    -1.00
N 182 ResponseId 182 ResponseId 182 ResponseId
Observations 1092 1092 1092
Marginal R2 / Conditional R2 0.360 / NA 0.392 / NA 0.420 / NA
  • Addressing Singularity Experiment

  • add rnorm (0,0.5) to (participant-specific) and all singularity issue resolved

# Add a model below just to showcase the adding a small value idea

set1.Count_M1.examine <- lmer(
  set1_rank.r ~ set1_prob.z + set1_amt.z+
    (set1_prob.z + set1_amt.z | ResponseId),
  data = master_df.examine
)
set1.Count_M2.examine <- lmer(
  set1_rank.r ~ set1_prob.z + set1_amt.z+ set1_touch_count_binary+
    (set1_prob.z + set1_amt.z | ResponseId),
  data = master_df.examine
)
set1.Count_M3.examine <- lmer(
  set1_rank.r ~ set1_prob.z + set1_amt.z+ set1_touch_count_binary+
    (set1_prob.z + set1_amt.z + set1_touch_count_binary | ResponseId),
  data = master_df.examine
)


tab_model(
  set1.Count_M1.examine,
  set1.Count_M2.examine,
  set1.Count_M3.examine,
  dv.labels  = c("RE: Prob & Amt", "Adding Count FE", "Adding Count RE"),
  pred.labels = c("(Intercept)", "Prob (z)", "Amt (z)", "Drag Count"),
  show.re.var = TRUE,   # show random-effects variances
  show.icc    = TRUE,   # show ICC
  digits = 2
)
  RE: Prob & Amt Adding Count FE Adding Count RE
Predictors Estimates CI p Estimates CI p Estimates CI p
(Intercept) 3.48 3.41 – 3.55 <0.001 3.13 3.01 – 3.25 <0.001 3.12 2.99 – 3.25 <0.001
Prob (z) 0.24 0.08 – 0.40 0.004 0.24 0.09 – 0.40 0.002 0.27 0.12 – 0.42 0.001
Amt (z) -0.47 -0.61 – -0.32 <0.001 -0.42 -0.56 – -0.27 <0.001 -0.43 -0.57 – -0.29 <0.001
Drag Count 0.56 0.41 – 0.71 <0.001 0.58 0.42 – 0.74 <0.001
Random Effects
σ2 0.99 0.96 0.93
τ00 0.06 ResponseId 0.08 ResponseId 0.21 ResponseId
τ11 0.89 ResponseId.set1_prob.z 0.76 ResponseId.set1_prob.z 0.74 ResponseId.set1_prob.z
0.66 ResponseId.set1_amt.z 0.58 ResponseId.set1_amt.z 0.58 ResponseId.set1_amt.z
    0.17 ResponseId.set1_touch_count_binary
ρ01 -0.17 -0.18 0.19
0.05 0.05 -0.17
    -0.79
ICC 0.65 0.62 0.63
N 182 ResponseId 182 ResponseId 182 ResponseId
Observations 1092 1092 1092
Marginal R2 / Conditional R2 0.134 / 0.694 0.165 / 0.685 0.182 / 0.696

Visualize Random Intercepts/Slopes

get_participant_effects_lmer.Count <- function(model, id_var = "ResponseId") {
  # Random effects matrix -> data.frame, keep names like (Intercept)
  re_mat <- ranef(model)[[id_var]]
  re_df  <- data.frame(re_mat, check.names = FALSE)
  re_df[[id_var]] <- rownames(re_mat)

  # Fixed effects (named vector)
  fe <- fixef(model)

  # Union of all coefficient names (fixed + random)
  all_terms <- union(names(fe), colnames(re_mat))

  # Ensure every term exists; add fixed beta (default 0) + random deviation (default 0)
  for (nm in all_terms) {
    if (!nm %in% names(re_df)) re_df[[nm]] <- 0      # add missing column
    if (nm %in% names(fe))     re_df[[nm]] <- re_df[[nm]] + unname(fe[[nm]])
  }
  
  re_df # output
}

re_M1 <- get_participant_effects_lmer(set1.Count_M1)
re_M2 <- get_participant_effects_lmer(set1.Count_M2)
re_M3 <- get_participant_effects_lmer(set1.Count_M3)


# Add a model label
re_M1$model <- "M1 - RE: Prob & Amt"
re_M2$model <- "M2 - Adding Count FE"
re_M3$model <- "M3 - Adding Count RE"

# Combine
re_all <- bind_rows(re_M1, re_M2, re_M3)

re_all_long <- re_all %>%
  pivot_longer(
    cols = -c(ResponseId, model),
    names_to = "parameter",
    values_to = "coefficient"
  )

re_all_long <- re_all_long %>%
  mutate(
    parameter = recode(parameter,
      `(Intercept)`          = "(Intercept)",
      `set1_prob.z`          = "Prob (z)",
      `set1_amt.z`           = "Amt (z)",
      `set1_touch_count_binary` = "Drag Count"
    ),
    parameter = factor(parameter,
      levels = c("(Intercept)", "Prob (z)", "Amt (z)", "Drag Count")
    ),
    model = factor(model, levels = c("M1 - RE: Prob & Amt","M2 - Adding Count FE","M3 - Adding Count RE"))
  )

# Summary stats for markers
summary_stats <- re_all_long %>%
  group_by(model, parameter) %>%
  summarise(
    mean_val   = mean(coefficient, na.rm = TRUE),
    median_val = median(coefficient, na.rm = TRUE),
    .groups = "drop"
  )

# Color map: Intercept grey, slopes steelblue
param_colors <- c(
  "(Intercept)" = "grey60",
  "Prob (z)"    = "steelblue",
  "Amt (z)"     = "steelblue",
  "Drag Count" = "steelblue"
)

ggplot(re_all_long, aes(x = coefficient, fill = parameter)) +
  geom_histogram(bins = 30, alpha = 0.85, color = "white") +
  facet_grid(parameter ~ model, scales = "free") +   # allow y to vary
  scale_fill_manual(values = param_colors, guide = "none") +
  theme_minimal() +
  labs(
    title = "Participant-specific coefficients",
    x = "Coefficient value",
    y = "Count"
  )+
  theme(
 panel.border = element_rect(color = "black", fill = NA, linewidth = 0.5),
    strip.text = element_text(face = "bold",size=12)  # bold facet labels
  ) 

Predict 2ND Lotter Ranking

predict_set2_by_id.Count <- function(model, data, label) {
  # Random effects (per ID)
  re_mat <- ranef(model)$ResponseId
  re_df  <- as.data.frame(re_mat)
  re_df$ResponseId <- rownames(re_mat)

  # Rename to consistent names if present
  if ("(Intercept)"           %in% colnames(re_mat)) re_df <- dplyr::rename(re_df, b_intercept = `(Intercept)`)
  if ("set1_prob.z"           %in% colnames(re_mat)) re_df <- dplyr::rename(re_df, b_prob      = `set1_prob.z`)
  if ("set1_amt.z"            %in% colnames(re_mat)) re_df <- dplyr::rename(re_df, b_amt       = `set1_amt.z`)
  if ("set1_touch_count_binary"  %in% colnames(re_mat)) re_df <- dplyr::rename(re_df, b_drag      = `set1_touch_count_binary`)

  # Ensure all needed columns exist (fill missing with 0)
  for (nm in c("b_intercept","b_prob","b_amt","b_drag")) {
    if (!nm %in% names(re_df)) re_df[[nm]] <- 0
  }

  # Fixed effects (missing terms default to 0)
  fe <- fixef(model)
  fe_int  <- if ("(Intercept)"          %in% names(fe)) unname(fe["(Intercept)"])          else 0
  fe_prob <- if ("set1_prob.z"          %in% names(fe)) unname(fe["set1_prob.z"])          else 0
  fe_amt  <- if ("set1_amt.z"           %in% names(fe)) unname(fe["set1_amt.z"])           else 0
  fe_drag <- if ("set1_touch_count_binary" %in% names(fe)) unname(fe["set1_touch_count_binary"]) else 0
df_pred <- data %>%
    left_join(re_df, by = "ResponseId") %>%
    mutate(
      set2_rank_predicted =
        (fe_int + b_intercept) +
        (fe_prob + b_prob) * set2_prob.z +
        (fe_amt  + b_amt ) * set2_amt.z +
        (fe_drag + b_drag) * set2_touch_count_binary
    ) %>%
    group_by(ResponseId) %>%
    # Remove NAs within participant before tie check & ranking
    mutate(set2_rank_predicted = ifelse(is.finite(set2_rank_predicted), set2_rank_predicted, NA_real_)) %>%
    # Tie flag, then average rank for ties
    mutate(
      has_tie = n_distinct(set2_rank_predicted) < n(),
      set2_rank_pred_rank = rank(-set2_rank_predicted, ties.method = "average")
    ) %>%
    ungroup()

  # Message about ties
  tie_summary <- df_pred %>%
    filter(!is.na(set2_rank_predicted), !is.na(set2_rank)) %>%
    distinct(ResponseId, has_tie) %>%
    summarise(n_ids = n(), n_ties = sum(has_tie), .groups = "drop")
  cat(sprintf("Tie handling: %d of %d participants had tied predicted scores; average ranks were used.",
                  tie_summary$n_ties, tie_summary$n_ids))

  # Per-participant correlations
  df_pred %>%
    group_by(ResponseId) %>%
    summarise(
      r_pearson   = (cor(set2_rank_pred_rank, set2_rank, method = "pearson")),
      tau_kendall = (cor(set2_rank_pred_rank, set2_rank, method = "kendall")),
      .groups = "drop"
    ) %>%
    mutate(model = label)
}

# Run for all three models
cor_M1 <- predict_set2_by_id.Count(set1.Count_M1, master_df, "RE: Prob & Amt")
## Tie handling: 0 of 182 participants had tied predicted scores; average ranks were used.
cor_M2 <- predict_set2_by_id.Count(set1.Count_M2, master_df, "Adding Count FE")
## Tie handling: 0 of 182 participants had tied predicted scores; average ranks were used.
cor_M3 <- predict_set2_by_id.Count(set1.Count_M3, master_df, "Adding Count RE")
## Tie handling: 0 of 182 participants had tied predicted scores; average ranks were used.

Performances

cor_all <- bind_rows(cor_M1, cor_M2, cor_M3) %>%
  mutate(model = factor(model, levels = c("RE: Prob & Amt","Adding Count FE","Adding Count RE")))

p_r.Count_lmer   <- make_violin(cor_all, "r_pearson",   "Per-participant Pearson r")
p_tau.Count_lmer <- make_violin(cor_all, "tau_kendall", "Per-participant Kendall \u03C4")

p_r.Count_lmer + p_tau.Count_lmer + plot_layout(ncol = 2)

  • Significance Testing
compare_models_simple.Count <- function(df, value_col,
                                  m1 = "RE: Prob & Amt",
                                  m2 = "Adding Count FE",
                                  m3 = "Adding Count RE",
                                  n_perm = 10000) {

  # Build wide table with explicit column selection (no NSE)
  wide <- df %>%
    select(ResponseId, model, value = all_of(value_col)) %>%
    pivot_wider(names_from = model, values_from = value)

  # Helper to run the three paired tests for a pair of columns
  run_tests <- function(a_name, b_name, label) {
    x <- wide[[a_name]]
    y <- wide[[b_name]]
    diffs <- y - x  # positive => b_name improves over a_name

    cat(sprintf("%s: pairs used = %d\n", label, length(diffs)))

    # Paired t-test
    t_res <- try(t.test(y, x, paired = TRUE), silent = TRUE)
    if (inherits(t_res, "try-error")) {
      t_p <- NA_real_; t_stat <- NA_real_; t_df <- NA_real_
    } else {
      t_p <- unname(t_res$p.value)
      t_stat <- unname(t_res$statistic)
      t_df <- unname(t_res$parameter)
    }

    # Wilcoxon signed-rank (paired)
    w_res <- try(wilcox.test(y, x, paired = TRUE, exact = FALSE), silent = TRUE)
    if (inherits(w_res, "try-error")) {
      w_p <- NA_real_; w_V <- NA_real_
    } else {
      w_p <- unname(w_res$p.value)
      w_V <- unname(w_res$statistic)
    }

    # Permutation (paired sign-flip) on mean diff
    p_perm <- perm_paired_mean(x, y, n_perm = n_perm)

    data.frame(
      Comparison    = label,
      N_pairs       = length(diffs),
      Mean_Diff     = if (length(diffs)) mean(diffs) else NA_real_,
      Median_Diff   = if (length(diffs)) median(diffs) else NA_real_,
      Prop_Better = if (length(diffs)) mean(diffs > 0) else NA_real_,
      Prop_Worse = if (length(diffs)) mean(diffs < 0) else NA_real_,
      Prop_Tie = if (length(diffs)) mean(diffs == 0) else NA_real_,
      t_test_p      = t_p,
      t_statistic   = t_stat,
      t_df          = t_df,
      wilcoxon_p    = w_p,
      wilcoxon_V    = w_V,
      permutation_p = p_perm,
      check.names   = FALSE
    )
  }

  rbind(
    run_tests(m1, m2, "Adding FE (M2 − M1)"),
    run_tests(m1, m3, "Adding RE (M3 − M1)")
  ) %>%
    mutate(
      Mean_Diff     = round(Mean_Diff, 3),
      Median_Diff   = round(Median_Diff, 3),
      Prop_Better = round(Prop_Better, 3),
      Prop_Worse = round(Prop_Worse, 3),
      Prop_Tie = round(Prop_Tie, 3),
      t_test_p      = round(t_test_p, 4),
      t_statistic   = round(t_statistic, 3),
      t_df          = round(t_df),
      wilcoxon_p    = round(wilcoxon_p, 4),
      permutation_p = round(permutation_p, 4)
    )
}


# ---- Build the two tables (Pearson r and Kendall tau) ----
# Expect cor_all to have: ResponseId, model, r_pearson, tau_kendall
table_r.Count_lmer   <- compare_models_simple.Count(cor_all, "r_pearson")
## Adding FE (M2 − M1): pairs used = 182
## Adding RE (M3 − M1): pairs used = 182
table_tau.Count_lmer <- compare_models_simple.Count(cor_all, "tau_kendall")
## Adding FE (M2 − M1): pairs used = 182
## Adding RE (M3 − M1): pairs used = 182
# Heterogenity in order but not in distance? very interesting
kable(table_r.Count_lmer,
     caption = "<b><span style='font-size: 18px;color:black;'>Model comparison using Pearson r (Count)</span></b>",
     digits = 4, align = "lrrrrrrrrrr", escape = FALSE) %>%
  kable_styling(full_width = FALSE)
Model comparison using Pearson r (Count)
Comparison N_pairs Mean_Diff Median_Diff Prop_Better Prop_Worse Prop_Tie t_test_p t_statistic t_df wilcoxon_p wilcoxon_V permutation_p
Adding FE (M2 − M1) 182 0.011 0 0.126 0.104 0.769 0.1520 1.438 181 0.3571 525.5 0.1646
Adding RE (M3 − M1) 182 0.020 0 0.148 0.099 0.753 0.0521 1.955 181 0.1072 660.5 0.0547
kable(table_tau.Count_lmer,
     caption = "<b><span style='font-size: 18px;color:black;'>Model comparison using Kendall &tau (Count);</span></b>",
     digits = 4, align = "lrrrrrrrrrr", escape = FALSE) %>%
  kable_styling(full_width = FALSE)
Model comparison using Kendall &tau (Count);
Comparison N_pairs Mean_Diff Median_Diff Prop_Better Prop_Worse Prop_Tie t_test_p t_statistic t_df wilcoxon_p wilcoxon_V permutation_p
Adding FE (M2 − M1) 182 0.004 0 0.115 0.104 0.780 0.5331 0.624 181 0.2353 498 0.4723
Adding RE (M3 − M1) 182 0.011 0 0.137 0.099 0.764 0.2293 1.206 181 0.1024 608 0.2006

CLMM

Distance

  • No singularity issues
set1.distance_M1 <- clmm(
  as.factor(set1_rank.r) ~ set1_prob.z + set1_amt.z+
    (set1_prob.z + set1_amt.z | ResponseId),
  data = master_df
)
set1.distance_M2 <- clmm(
  as.factor(set1_rank.r) ~ set1_prob.z + set1_amt.z+ set1_drag_distance.r+
    (set1_prob.z + set1_amt.z | ResponseId),
  data = master_df
)
set1.distance_M3 <- clmm(
  as.factor(set1_rank.r) ~ set1_prob.z + set1_amt.z+ set1_drag_distance.r+
    (set1_prob.z + set1_amt.z + set1_drag_distance.r | ResponseId),
  data = master_df
)


tab_model(
  set1.distance_M1,
  set1.distance_M2,
  set1.distance_M3,
  dv.labels  = c("RE: Prob & Amt", "Adding Distance FE", "Adding Distance RE"),
  pred.labels = c("(Intercept)", "Prob (z)", "Amt (z)", "Drag distance"),
  show.re.var = TRUE,   # show random-effects variances
  show.icc    = TRUE,   # show ICC
  digits = 2,
  transform= NULL
)
  RE: Prob & Amt Adding Distance FE Adding Distance RE
Predictors Log-Odds CI p Log-Odds CI p Log-Odds CI p
1|2 -4.15 -4.53 – -3.77 <0.001 -3.60 -3.99 – -3.21 <0.001 -3.65 -4.08 – -3.22 <0.001
2|3 -1.76 -2.01 – -1.50 <0.001 -1.15 -1.42 – -0.88 <0.001 -1.16 -1.46 – -0.86 <0.001
3|4 0.14 -0.08 – 0.36 0.220 0.84 0.58 – 1.10 <0.001 0.87 0.58 – 1.16 <0.001
4|5 1.95 1.69 – 2.21 <0.001 2.74 2.43 – 3.06 <0.001 2.81 2.45 – 3.17 <0.001
5|6 4.08 3.72 – 4.44 <0.001 4.95 4.52 – 5.38 <0.001 5.10 4.59 – 5.60 <0.001
set1_prob.z 0.60 0.16 – 1.04 0.008 0.49 0.07 – 0.90 0.021 0.61 0.19 – 1.04 0.005
set1_amt.z -1.34 -1.75 – -0.93 <0.001 -1.26 -1.66 – -0.86 <0.001 -1.39 -1.79 – -0.99 <0.001
set1_drag_distance.r 0.59 0.48 – 0.70 <0.001 0.60 0.47 – 0.73 <0.001
Random Effects
σ2 3.29 3.29 3.29
τ00 0.07 ResponseId 0.04 ResponseId 0.33 ResponseId
τ11 6.52 ResponseId.set1_prob.z 5.44 ResponseId.set1_prob.z 5.56 ResponseId.set1_prob.z
4.88 ResponseId.set1_amt.z 4.10 ResponseId.set1_amt.z 4.02 ResponseId.set1_amt.z
    0.16 ResponseId.set1_drag_distance.r
ρ01 0.69 0.74 0.83
0.56 0.55 -0.26
    -0.93
ICC 0.80 0.76 0.76
N 182 ResponseId 182 ResponseId 182 ResponseId
Observations 1092 1092 1092
Marginal R2 / Conditional R2 0.167 / 0.833 0.234 / 0.819 0.275 / 0.827

Visualize Random Intercepts/Slopes

get_participant_effects_clmm <- function(model, id_var = "ResponseId") {
  # Random effects
  re_list <- ranef(model)
  re <- as.data.frame(re_list[[id_var]])
  re[[id_var]] <- rownames(re_list[[id_var]])
  # Fixed effects: only betas, no thresholds
  fe_beta <- model$beta

  # Ensure each fixed-effect name exists in RE; fill missing with 0
  for (nm in names(fe_beta)) {
    if (!nm %in% names(re)) re[[nm]] <- 0
    re[[nm]] <- re[[nm]] + unname(fe_beta[[nm]])
  }

  re
}

# Usage:
re_M1 <- get_participant_effects_clmm(set1.distance_M1)
re_M2 <- get_participant_effects_clmm(set1.distance_M2)
re_M3 <- get_participant_effects_clmm(set1.distance_M3)

# Add a model label
re_M1$model <- "M1 - RE: Prob & Amt"
re_M2$model <- "M2 - Adding Distance FE"
re_M3$model <- "M3 - Adding Distance RE"

# Combine
re_all <- bind_rows(re_M1, re_M2, re_M3)

re_all_long <- re_all %>%
  pivot_longer(
    cols = -c(ResponseId, model),
    names_to = "parameter",
    values_to = "coefficient"
  )

re_all_long <- re_all_long %>%
  mutate(
    parameter = recode(parameter,
      `(Intercept)`          = "(Intercept)",
      `set1_prob.z`          = "Prob (z)",
      `set1_amt.z`           = "Amt (z)",
      `set1_drag_distance.r` = "Drag distance"
    ),
    parameter = factor(parameter,
      levels = c("(Intercept)", "Prob (z)", "Amt (z)", "Drag distance")
    ),
    model = factor(model, levels = c("M1 - RE: Prob & Amt","M2 - Adding Distance FE","M3 - Adding Distance RE"))
  )

# Summary stats for markers
summary_stats <- re_all_long %>%
  group_by(model, parameter) %>%
  summarise(
    mean_val   = mean(coefficient, na.rm = TRUE),
    median_val = median(coefficient, na.rm = TRUE),
    .groups = "drop"
  )

# Color map: Intercept grey, slopes steelblue
param_colors <- c(
  "(Intercept)" = "grey60",
  "Prob (z)"    = "steelblue",
  "Amt (z)"     = "steelblue",
  "Drag distance" = "steelblue"
)

ggplot(re_all_long, aes(x = coefficient, fill = parameter)) +
  geom_histogram(bins = 30, alpha = 0.85, color = "white") +
  facet_grid(parameter ~ model, scales = "free") +   # allow y to vary
  scale_fill_manual(values = param_colors, guide = "none") +
  theme_minimal() +
  labs(
    title = "Participant-specific coefficients",
    x = "Coefficient value",
    y = "Count"
  )+
  theme(
 panel.border = element_rect(color = "black", fill = NA, linewidth = 0.5),
    strip.text = element_text(face = "bold",size=12)  # bold facet labels
  ) 

Predict 2ND Lotter Ranking

predict_set2_by_id_clmm.Distace <- function(model, data, label) {
  # --- Random effects (usually only intercepts in clmm)
  re_list <- ranef(model)
  if (is.list(re_list) && !is.null(re_list$ResponseId)) {
    re_mat <- re_list$ResponseId
    re_df  <- data.frame(re_mat, check.names = FALSE)
    re_df$ResponseId <- rownames(re_mat)
  } else {
    # no RE present
    re_df <- data.frame(ResponseId = unique(data$ResponseId), check.names = FALSE)
  }

 
  if ("(Intercept)"           %in% colnames(re_mat)) re_df <- dplyr::rename(re_df, b_intercept = `(Intercept)`)
  if ("set1_prob.z"           %in% colnames(re_mat)) re_df <- dplyr::rename(re_df, b_prob      = `set1_prob.z`)
  if ("set1_amt.z"            %in% colnames(re_mat)) re_df <- dplyr::rename(re_df, b_amt       = `set1_amt.z`)
  if ("set1_drag_distance.r"  %in% colnames(re_mat)) re_df <- dplyr::rename(re_df, b_drag      = `set1_drag_distance.r`)

  # Ensure all needed columns exist (fill missing with 0)
  for (nm in c("b_intercept","b_prob","b_amt","b_drag")) {
    if (!nm %in% names(re_df)) re_df[[nm]] <- 0
  }

  # --- Fixed effects: use model$beta (no thresholds)
  fe <- model$beta
  fe_prob <- if ("set1_prob.z"          %in% names(fe)) unname(fe["set1_prob.z"])          else 0
  fe_amt  <- if ("set1_amt.z"           %in% names(fe)) unname(fe["set1_amt.z"])           else 0
  fe_drag <- if ("set1_drag_distance.r" %in% names(fe)) unname(fe["set1_drag_distance.r"]) else 0

  # NOTE: clmm has thresholds instead of a global fixed intercept; we do NOT add a fixed intercept.
  # We only use participant-specific random intercept shifts (b_intercept) on the latent scale.

  df_pred <- data %>%
    left_join(re_df, by = "ResponseId") %>%
    mutate(
      # latent score for ordering (no fixed intercept term)
      latent_score =
          b_intercept +
          (fe_prob+b_prob) * set2_prob.z +
          (fe_amt+b_amt)  * set2_amt.z  +
          (fe_drag+b_drag) * set2_drag_distance.r
    )  %>%
    group_by(ResponseId) %>%
    # 2) Handle ties via average ranks (and keep NAs out of ranking)
    mutate(
      # flag ties within participant (ignore NAs)
      has_tie = {
        x <- latent_score[is.finite(latent_score)]
        dplyr::n_distinct(x) < length(x)
      },
      # average ranks; higher score = better (rank 1)
      set2_rank_pred_rank = rank(-latent_score, ties.method = "average")
    ) %>%
    ungroup()

  # 3) Tie summary message
  tie_summary <- df_pred %>%
    distinct(ResponseId, has_tie) %>%
    summarise(n_ids = dplyr::n(), n_ties = sum(has_tie), .groups = "drop")
  cat(sprintf(
    "Tie handling: %d of %d participants had tied predicted scores; average ranks were used.\n",
    tie_summary$n_ties, tie_summary$n_ids
  ))

  # 4) Per-participant correlations (complete cases only)
  df_pred %>%
    group_by(ResponseId) %>%
    summarise(
      r_pearson   = cor(set2_rank_pred_rank, set2_rank, method = "pearson", use = "complete.obs"),
      tau_kendall = cor(set2_rank_pred_rank, set2_rank, method = "kendall", use = "complete.obs"),
      .groups = "drop"
    ) %>%
    mutate(model = label)
}




# Run for all three models
cor_M1 <- predict_set2_by_id_clmm.Distace(set1.distance_M1, master_df, "RE: Prob & Amt")
## Tie handling: 0 of 182 participants had tied predicted scores; average ranks were used.
cor_M2 <- predict_set2_by_id_clmm.Distace(set1.distance_M2, master_df, "Adding Distance FE")
## Tie handling: 0 of 182 participants had tied predicted scores; average ranks were used.
cor_M3 <- predict_set2_by_id_clmm.Distace(set1.distance_M3, master_df, "Adding Distance RE")
## Tie handling: 0 of 182 participants had tied predicted scores; average ranks were used.

Performances

cor_all <- bind_rows(cor_M1, cor_M2, cor_M3) %>%
  mutate(model = factor(model, levels = c("RE: Prob & Amt","Adding Distance FE","Adding Distance RE")))

p_r.Distance_clmm   <- make_violin(cor_all, "r_pearson",   "Per-participant Pearson r")
p_tau.Distance_clmm <- make_violin(cor_all, "tau_kendall", "Per-participant Kendall \u03C4")

p_r.Distance_clmm + p_tau.Distance_clmm + plot_layout(ncol = 2)

  • Significance Testing
table_r.distance_CLMM   <- compare_models_simple.Distance(cor_all, "r_pearson")
## Adding FE (M2 − M1): pairs used = 182
## Adding RE (M3 − M1): pairs used = 182
table_tau.distance_CLMM <- compare_models_simple.Distance(cor_all, "tau_kendall")
## Adding FE (M2 − M1): pairs used = 182
## Adding RE (M3 − M1): pairs used = 182
kable(table_r.distance_CLMM,
     caption = "<b><span style='font-size: 18px;color:black;'>Model comparison using Pearson r (Distance)</span></b>",
     digits = 4, align = "lrrrrrrrrrr", escape = FALSE) %>%
  kable_styling(full_width = FALSE)
Model comparison using Pearson r (Distance)
Comparison N_pairs Mean_Diff Median_Diff Prop_Better Prop_Worse Prop_Tie t_test_p t_statistic t_df wilcoxon_p wilcoxon_V permutation_p
Adding FE (M2 − M1) 182 0.052 0 0.253 0.154 0.593 1e-04 3.939 181 4e-04 2049.0 0
Adding RE (M3 − M1) 182 0.068 0 0.242 0.181 0.577 1e-04 4.023 181 5e-04 2184.5 0
kable(table_tau.distance_CLMM,
     caption = "<b><span style='font-size: 18px;color:black;'>Model comparison using Kendall &tau; (Distance)</span></b>",
     digits = 4, align = "lrrrrrrrrrr", escape = FALSE) %>%
  kable_styling(full_width = FALSE)
Model comparison using Kendall τ (Distance)
Comparison N_pairs Mean_Diff Median_Diff Prop_Better Prop_Worse Prop_Tie t_test_p t_statistic t_df wilcoxon_p wilcoxon_V permutation_p
Adding FE (M2 − M1) 182 0.032 0 0.242 0.143 0.615 0.0065 2.752 181 0.0044 1723 0.0061
Adding RE (M3 − M1) 182 0.044 0 0.236 0.165 0.599 0.0033 2.976 181 0.0025 1897 0.0033

Order

  • No singularity issues
  • Across models, there appear more variance to order random slope
set1.order_M1 <- clmm(
  as.factor(set1_rank.r) ~ set1_prob.z + set1_amt.z+
    (set1_prob.z + set1_amt.z | ResponseId),
  data = master_df
)
set1.order_M2 <- clmm(
  as.factor(set1_rank.r) ~ set1_prob.z + set1_amt.z+ set1_order+
    (set1_prob.z + set1_amt.z | ResponseId),
  data = master_df
)
set1.order_M3 <- clmm(
  as.factor(set1_rank.r) ~ set1_prob.z + set1_amt.z+ set1_order+
    (set1_prob.z + set1_amt.z + set1_order | ResponseId),
  data = master_df
)


tab_model(
  set1.order_M1,
  set1.order_M2,
  set1.order_M3,
  dv.labels  = c("RE: Prob & Amt", "Adding Order FE", "Adding Order RE"),
  pred.labels = c("(Intercept)", "Prob (z)", "Amt (z)", "Drag Order"),
  show.re.var = TRUE,   # show random-effects variances
  show.icc    = TRUE,   # show ICC
  digits = 2,
  transform= NULL
)
  RE: Prob & Amt Adding Order FE Adding Order RE
Predictors Log-Odds CI p Log-Odds CI p Log-Odds CI p
1|2 -4.15 -4.53 – -3.77 <0.001 -6.65 -7.28 – -6.01 <0.001 -7.34 -8.20 – -6.48 <0.001
2|3 -1.76 -2.01 – -1.50 <0.001 -4.22 -4.75 – -3.69 <0.001 -4.67 -5.41 – -3.93 <0.001
3|4 0.14 -0.08 – 0.36 0.220 -2.22 -2.70 – -1.75 <0.001 -2.43 -3.11 – -1.76 <0.001
4|5 1.95 1.69 – 2.21 <0.001 -0.27 -0.72 – 0.18 0.239 -0.23 -0.87 – 0.41 0.483
5|6 4.08 3.72 – 4.44 <0.001 1.93 1.44 – 2.42 <0.001 2.32 1.63 – 3.00 <0.001
set1_prob.z 0.60 0.16 – 1.04 0.008 0.35 -0.05 – 0.75 0.082 0.74 0.27 – 1.21 0.002
set1_amt.z -1.34 -1.75 – -0.93 <0.001 -1.34 -1.74 – -0.94 <0.001 -1.54 -1.97 – -1.10 <0.001
set1_order -0.74 -0.87 – -0.61 <0.001 -0.79 -0.99 – -0.60 <0.001
Random Effects
σ2 3.29 3.29 3.29
τ00 0.07 ResponseId 0.05 ResponseId 7.22 ResponseId
τ11 6.52 ResponseId.set1_prob.z 4.79 ResponseId.set1_prob.z 6.67 ResponseId.set1_prob.z
4.88 ResponseId.set1_amt.z 4.30 ResponseId.set1_amt.z 4.97 ResponseId.set1_amt.z
    0.72 ResponseId.set1_order
ρ01 0.69 0.74 -0.64
0.56 0.55 0.45
    -0.99
ICC 0.80 0.76 0.80
N 182 ResponseId 182 ResponseId 182 ResponseId
Observations 1092 1092 1092
Marginal R2 / Conditional R2 0.167 / 0.833 0.250 / 0.817 0.301 / 0.858

Visualize Random Intercepts/Slopes

re_M1 <- get_participant_effects_clmm(set1.order_M1)
re_M2 <- get_participant_effects_clmm(set1.order_M2)
re_M3 <- get_participant_effects_clmm(set1.order_M3)

# Add a model label
re_M1$model <- "M1 - RE: Prob & Amt"
re_M2$model <- "M2 - Adding Order FE"
re_M3$model <- "M3 - Adding Order RE"

# Combine
re_all <- bind_rows(re_M1, re_M2, re_M3)

re_all_long <- re_all %>%
  pivot_longer(
    cols = -c(ResponseId, model),
    names_to = "parameter",
    values_to = "coefficient"
  )

re_all_long <- re_all_long %>%
  mutate(
    parameter = recode(parameter,
      `(Intercept)`          = "(Intercept)",
      `set1_prob.z`          = "Prob (z)",
      `set1_amt.z`           = "Amt (z)",
      `set1_order` = "Drag Order"
    ),
    parameter = factor(parameter,
      levels = c("(Intercept)", "Prob (z)", "Amt (z)", "Drag Order")
    ),
    model = factor(model, levels = c("M1 - RE: Prob & Amt","M2 - Adding Order FE","M3 - Adding Order RE"))
  )

# Summary stats for markers
summary_stats <- re_all_long %>%
  group_by(model, parameter) %>%
  summarise(
    mean_val   = mean(coefficient, na.rm = TRUE),
    median_val = median(coefficient, na.rm = TRUE),
    .groups = "drop"
  )

# Color map: Intercept grey, slopes steelblue
param_colors <- c(
  "(Intercept)" = "grey60",
  "Prob (z)"    = "steelblue",
  "Amt (z)"     = "steelblue",
  "Drag Order" = "steelblue"
)

ggplot(re_all_long, aes(x = coefficient, fill = parameter)) +
  geom_histogram(bins = 30, alpha = 0.85, color = "white") +
  facet_grid(parameter ~ model, scales = "free") +   # allow y to vary
  scale_fill_manual(values = param_colors, guide = "none") +
  theme_minimal() +
  labs(
    title = "Participant-specific coefficients",
    x = "Coefficient value",
    y = "Count"
  )+
  theme(
 panel.border = element_rect(color = "black", fill = NA, linewidth = 0.5),
    strip.text = element_text(face = "bold",size=12)  # bold facet labels
  ) 

Predict 2ND Lotter Ranking

predict_set2_by_id_clmm.Order <- function(model, data, label) {
  # --- Random effects (usually only intercepts in clmm)
  re_list <- ranef(model)
  if (is.list(re_list) && !is.null(re_list$ResponseId)) {
    re_mat <- re_list$ResponseId
    re_df  <- data.frame(re_mat, check.names = FALSE)
    re_df$ResponseId <- rownames(re_mat)
  } else {
    # no RE present
    re_df <- data.frame(ResponseId = unique(data$ResponseId), check.names = FALSE)
  }

 
  if ("(Intercept)"           %in% colnames(re_mat)) re_df <- dplyr::rename(re_df, b_intercept = `(Intercept)`)
  if ("set1_prob.z"           %in% colnames(re_mat)) re_df <- dplyr::rename(re_df, b_prob      = `set1_prob.z`)
  if ("set1_amt.z"            %in% colnames(re_mat)) re_df <- dplyr::rename(re_df, b_amt       = `set1_amt.z`)
  if ("set1_order"  %in% colnames(re_mat)) re_df <- dplyr::rename(re_df, b_drag      = `set1_order`)

  # Ensure all needed columns exist (fill missing with 0)
  for (nm in c("b_intercept","b_prob","b_amt","b_drag")) {
    if (!nm %in% names(re_df)) re_df[[nm]] <- 0
  }
  
  # --- Fixed effects: use model$beta (no thresholds)
  fe <- model$beta
  fe_prob <- if ("set1_prob.z"          %in% names(fe)) unname(fe["set1_prob.z"])          else 0
  fe_amt  <- if ("set1_amt.z"           %in% names(fe)) unname(fe["set1_amt.z"])           else 0
  fe_drag <- if ("set1_order" %in% names(fe)) unname(fe["set1_order"]) else 0

  # NOTE: clmm has thresholds instead of a global fixed intercept; we do NOT add a fixed intercept.
  # We only use participant-specific random intercept shifts (b_intercept) on the latent scale.

  df_pred <- data %>%
    left_join(re_df, by = "ResponseId") %>%
    mutate(
      # latent score for ordering (no fixed intercept term)
      latent_score =
          b_intercept +
          (fe_prob+b_prob) * set2_prob.z +
          (fe_amt+b_amt)  * set2_amt.z  +
          (fe_drag+b_drag) * set2_order
    )  %>%
    group_by(ResponseId) %>%
    # 2) Handle ties via average ranks (and keep NAs out of ranking)
    mutate(
      # flag ties within participant (ignore NAs)
      has_tie = {
        x <- latent_score[is.finite(latent_score)]
        dplyr::n_distinct(x) < length(x)
      },
      # average ranks; higher score = better (rank 1)
      set2_rank_pred_rank = rank(-latent_score, ties.method = "average")
    ) %>%
    ungroup()

  # 3) Tie summary message
  tie_summary <- df_pred %>%
    distinct(ResponseId, has_tie) %>%
    summarise(n_ids = dplyr::n(), n_ties = sum(has_tie), .groups = "drop")
  cat(sprintf(
    "Tie handling: %d of %d participants had tied predicted scores; average ranks were used.\n",
    tie_summary$n_ties, tie_summary$n_ids
  ))

  # 4) Per-participant correlations (complete cases only)
  df_pred %>%
    group_by(ResponseId) %>%
    summarise(
      r_pearson   = cor(set2_rank_pred_rank, set2_rank, method = "pearson", use = "complete.obs"),
      tau_kendall = cor(set2_rank_pred_rank, set2_rank, method = "kendall", use = "complete.obs"),
      .groups = "drop"
    ) %>%
    mutate(model = label)
}




# Run for all three models
cor_M1 <- predict_set2_by_id_clmm.Order(set1.order_M1, master_df, "RE: Prob & Amt")
## Tie handling: 0 of 182 participants had tied predicted scores; average ranks were used.
cor_M2 <- predict_set2_by_id_clmm.Order(set1.order_M2, master_df, "Adding Order FE")
## Tie handling: 0 of 182 participants had tied predicted scores; average ranks were used.
cor_M3 <- predict_set2_by_id_clmm.Order(set1.order_M3, master_df, "Adding Order RE")
## Tie handling: 0 of 182 participants had tied predicted scores; average ranks were used.

Performances

cor_all <- bind_rows(cor_M1, cor_M2, cor_M3) %>%
  mutate(model = factor(model, levels = c("RE: Prob & Amt","Adding Order FE","Adding Order RE")))

p_r.Order_clmm   <- make_violin(cor_all, "r_pearson",   "Per-participant Pearson r")
p_tau.Order_clmm <- make_violin(cor_all, "tau_kendall", "Per-participant Kendall \u03C4")

p_r.Order_clmm + p_tau.Order_clmm + plot_layout(ncol = 2)

  • Significance Testing
table_r.order_CLMM   <- compare_models_simple.Order(cor_all, "r_pearson")
## Adding FE (M2 − M1): pairs used = 182
## Adding RE (M3 − M1): pairs used = 182
table_tau.order_CLMM <- compare_models_simple.Order(cor_all, "tau_kendall")
## Adding FE (M2 − M1): pairs used = 182
## Adding RE (M3 − M1): pairs used = 182
kable(table_r.order_CLMM,
     caption = "<b><span style='font-size: 18px;color:black;'>Model comparison using Pearson r (Order)</span></b>",
     digits = 4, align = "lrrrrrrrrrr", escape = FALSE) %>%
  kable_styling(full_width = FALSE)
Model comparison using Pearson r (Order)
Comparison N_pairs Mean_Diff Median_Diff Prop_Better Prop_Worse Prop_Tie t_test_p t_statistic t_df wilcoxon_p wilcoxon_V permutation_p
Adding FE (M2 − M1) 182 0.023 0 0.225 0.231 0.544 0.0954 1.676 181 0.0874 2116.5 0.0901
Adding RE (M3 − M1) 182 0.063 0 0.236 0.148 0.615 0.0078 2.692 181 0.0030 1749.5 0.0067
kable(table_tau.order_CLMM,
     caption = "<b><span style='font-size: 18px;color:black;'>Model comparison using Kendall &tau; (Order)</span></b>",
     digits = 4, align = "lrrrrrrrrrr", escape = FALSE) %>%
  kable_styling(full_width = FALSE)
Model comparison using Kendall τ (Order)
Comparison N_pairs Mean_Diff Median_Diff Prop_Better Prop_Worse Prop_Tie t_test_p t_statistic t_df wilcoxon_p wilcoxon_V permutation_p
Adding FE (M2 − M1) 182 0.007 0 0.22 0.231 0.549 0.5799 0.554 181 0.2341 1954 0.5423
Adding RE (M3 − M1) 182 0.051 0 0.22 0.154 0.626 0.0149 2.457 181 0.0091 1599 0.0144

Count

  • No singularity issues
set1.Count_M1 <- clmm(
  as.factor(set1_rank.r) ~ set1_prob.z + set1_amt.z+
    (set1_prob.z + set1_amt.z | ResponseId),
  data = master_df
)
set1.Count_M2 <- clmm(
  as.factor(set1_rank.r) ~ set1_prob.z + set1_amt.z+ set1_touch_count_binary+
    (set1_prob.z + set1_amt.z | ResponseId),
  data = master_df
)
set1.Count_M3 <- clmm(
  as.factor(set1_rank.r) ~ set1_prob.z + set1_amt.z+ set1_touch_count_binary+
    (set1_prob.z + set1_amt.z + set1_touch_count_binary | ResponseId),
  data = master_df
)


tab_model(
  set1.Count_M1,
  set1.Count_M2,
  set1.Count_M3,
  dv.labels  = c("RE: Prob & Amt", "Adding Count FE", "Adding Count RE"),
  pred.labels = c("(Intercept)", "Prob (z)", "Amt (z)", "Drag Count"),
  show.re.var = TRUE,   # show random-effects variances
  show.icc    = TRUE,   # show ICC
  digits = 2,
  transform= NULL
)
  RE: Prob & Amt Adding Count FE Adding Count RE
Predictors Log-Odds CI p Log-Odds CI p Log-Odds CI p
1|2 -4.15 -4.53 – -3.77 <0.001 -3.61 -4.02 – -3.21 <0.001 -3.64 -4.07 – -3.21 <0.001
2|3 -1.76 -2.01 – -1.50 <0.001 -1.17 -1.48 – -0.86 <0.001 -1.16 -1.48 – -0.84 <0.001
3|4 0.14 -0.08 – 0.36 0.220 0.78 0.48 – 1.08 <0.001 0.81 0.48 – 1.15 <0.001
4|5 1.95 1.69 – 2.21 <0.001 2.62 2.28 – 2.96 <0.001 2.66 2.27 – 3.05 <0.001
5|6 4.08 3.72 – 4.44 <0.001 4.72 4.30 – 5.15 <0.001 4.78 4.28 – 5.27 <0.001
set1_prob.z 0.60 0.16 – 1.04 0.008 0.57 0.14 – 1.00 0.009 0.59 0.16 – 1.02 0.007
set1_amt.z -1.34 -1.75 – -0.93 <0.001 -1.30 -1.70 – -0.89 <0.001 -1.36 -1.78 – -0.94 <0.001
set1_touch_count_binary 0.97 0.67 – 1.27 <0.001 0.99 0.66 – 1.32 <0.001
Random Effects
σ2 3.29 3.29 3.29
τ00 0.07 ResponseId 0.06 ResponseId 0.23 ResponseId
τ11 6.52 ResponseId.set1_prob.z 6.14 ResponseId.set1_prob.z 6.14 ResponseId.set1_prob.z
4.88 ResponseId.set1_amt.z 4.59 ResponseId.set1_amt.z 4.67 ResponseId.set1_amt.z
    0.42 ResponseId.set1_touch_count_binary
ρ01 0.69 0.65 0.75
0.56 0.61 -0.13
    -0.86
ICC 0.80 0.79 0.79
N 182 ResponseId 182 ResponseId 182 ResponseId
Observations 1092 1092 1092
Marginal R2 / Conditional R2 0.167 / 0.833 0.186 / 0.828 0.199 / 0.830

Visualize Random Intercepts/Slopes

re_M1 <- get_participant_effects_clmm(set1.Count_M1)
re_M2 <- get_participant_effects_clmm(set1.Count_M2)
re_M3 <- get_participant_effects_clmm(set1.Count_M3)

# Add a model label
re_M1$model <- "M1 - RE: Prob & Amt"
re_M2$model <- "M2 - Adding Count FE"
re_M3$model <- "M3 - Adding Count RE"

# Combine
re_all <- bind_rows(re_M1, re_M2, re_M3)

re_all_long <- re_all %>%
  pivot_longer(
    cols = -c(ResponseId, model),
    names_to = "parameter",
    values_to = "coefficient"
  )

re_all_long <- re_all_long %>%
  mutate(
    parameter = recode(parameter,
      `(Intercept)`          = "(Intercept)",
      `set1_prob.z`          = "Prob (z)",
      `set1_amt.z`           = "Amt (z)",
      `set1_touch_count_binary` = "Drag Count"
    ),
    parameter = factor(parameter,
      levels = c("(Intercept)", "Prob (z)", "Amt (z)", "Drag Count")
    ),
    model = factor(model, levels = c("M1 - RE: Prob & Amt","M2 - Adding Count FE","M3 - Adding Count RE"))
  )

# Summary stats for markers
summary_stats <- re_all_long %>%
  group_by(model, parameter) %>%
  summarise(
    mean_val   = mean(coefficient, na.rm = TRUE),
    median_val = median(coefficient, na.rm = TRUE),
    .groups = "drop"
  )

# Color map: Intercept grey, slopes steelblue
param_colors <- c(
  "(Intercept)" = "grey60",
  "Prob (z)"    = "steelblue",
  "Amt (z)"     = "steelblue",
  "Drag Count" = "steelblue"
)

ggplot(re_all_long, aes(x = coefficient, fill = parameter)) +
  geom_histogram(bins = 30, alpha = 0.85, color = "white") +
  facet_grid(parameter ~ model, scales = "free") +   # allow y to vary
  scale_fill_manual(values = param_colors, guide = "none") +
  theme_minimal() +
  labs(
    title = "Participant-specific coefficients",
    x = "Coefficient value",
    y = "Count"
  )+
  theme(
 panel.border = element_rect(color = "black", fill = NA, linewidth = 0.5),
    strip.text = element_text(face = "bold",size=12)  # bold facet labels
  ) 

Predict 2ND Lotter Ranking

predict_set2_by_id_clmm.Count <- function(model, data, label) {
  # --- Random effects (usually only intercepts in clmm)
  re_list <- ranef(model)
  if (is.list(re_list) && !is.null(re_list$ResponseId)) {
    re_mat <- re_list$ResponseId
    re_df  <- data.frame(re_mat, check.names = FALSE)
    re_df$ResponseId <- rownames(re_mat)
  } else {
    # no RE present
    re_df <- data.frame(ResponseId = unique(data$ResponseId), check.names = FALSE)
  }

 
  if ("(Intercept)"           %in% colnames(re_mat)) re_df <- dplyr::rename(re_df, b_intercept = `(Intercept)`)
  if ("set1_prob.z"           %in% colnames(re_mat)) re_df <- dplyr::rename(re_df, b_prob      = `set1_prob.z`)
  if ("set1_amt.z"            %in% colnames(re_mat)) re_df <- dplyr::rename(re_df, b_amt       = `set1_amt.z`)
  if ("set1_touch_count_binary"  %in% colnames(re_mat)) re_df <- dplyr::rename(re_df, b_drag      = `set1_touch_count_binary`)

  # Ensure all needed columns exist (fill missing with 0)
  for (nm in c("b_intercept","b_prob","b_amt","b_drag")) {
    if (!nm %in% names(re_df)) re_df[[nm]] <- 0
  }


  # --- Fixed effects: use model$beta (no thresholds)
  fe <- model$beta
  fe_prob <- if ("set1_prob.z"          %in% names(fe)) unname(fe["set1_prob.z"])          else 0
  fe_amt  <- if ("set1_amt.z"           %in% names(fe)) unname(fe["set1_amt.z"])           else 0
  fe_drag <- if ("set1_touch_count_binary" %in% names(fe)) unname(fe["set1_touch_count_binary"]) else 0

  # NOTE: clmm has thresholds instead of a global fixed intercept; we do NOT add a fixed intercept.
  # We only use participant-specific random intercept shifts (b_intercept) on the latent scale.

  df_pred <- data %>%
    left_join(re_df, by = "ResponseId") %>%
    mutate(
      # latent score for ordering (no fixed intercept term)
      latent_score =
          b_intercept +
          (fe_prob+b_prob) * set2_prob.z +
          (fe_amt+b_amt)  * set2_amt.z  +
          (fe_drag+b_drag) * set2_touch_count_binary
    )  %>%
    group_by(ResponseId) %>%
    # 2) Handle ties via average ranks (and keep NAs out of ranking)
    mutate(
      # flag ties within participant (ignore NAs)
      has_tie = {
        x <- latent_score[is.finite(latent_score)]
        dplyr::n_distinct(x) < length(x)
      },
      # average ranks; higher score = better (rank 1)
      set2_rank_pred_rank = rank(-latent_score, ties.method = "average")
    ) %>%
    ungroup()

  # 3) Tie summary message
  tie_summary <- df_pred %>%
    distinct(ResponseId, has_tie) %>%
    summarise(n_ids = dplyr::n(), n_ties = sum(has_tie), .groups = "drop")
  cat(sprintf(
    "Tie handling: %d of %d participants had tied predicted scores; average ranks were used.\n",
    tie_summary$n_ties, tie_summary$n_ids
  ))

  # 4) Per-participant correlations (complete cases only)
  df_pred %>%
    group_by(ResponseId) %>%
    summarise(
      r_pearson   = cor(set2_rank_pred_rank, set2_rank, method = "pearson", use = "complete.obs"),
      tau_kendall = cor(set2_rank_pred_rank, set2_rank, method = "kendall", use = "complete.obs"),
      .groups = "drop"
    ) %>%
    mutate(model = label)
}




# Run for all three models
cor_M1 <- predict_set2_by_id_clmm.Count(set1.Count_M1, master_df, "RE: Prob & Amt")
## Tie handling: 0 of 182 participants had tied predicted scores; average ranks were used.
cor_M2 <- predict_set2_by_id_clmm.Count(set1.Count_M2, master_df, "Adding Count FE")
## Tie handling: 0 of 182 participants had tied predicted scores; average ranks were used.
cor_M3 <- predict_set2_by_id_clmm.Count(set1.Count_M3, master_df, "Adding Count RE")
## Tie handling: 0 of 182 participants had tied predicted scores; average ranks were used.

Performances

cor_all <- bind_rows(cor_M1, cor_M2, cor_M3) %>%
  mutate(model = factor(model, levels = c("RE: Prob & Amt","Adding Count FE","Adding Count RE")))

p_r.Count_clmm   <- make_violin(cor_all, "r_pearson",   "Per-participant Pearson r")
p_tau.Count_clmm <- make_violin(cor_all, "tau_kendall", "Per-participant Kendall \u03C4")

p_r.Count_clmm + p_tau.Count_clmm + plot_layout(ncol = 2)

  • Significance Testing
table_r.Count_CLMM   <- compare_models_simple.Count(cor_all, "r_pearson")
## Adding FE (M2 − M1): pairs used = 182
## Adding RE (M3 − M1): pairs used = 182
table_tau.Count_CLMM <- compare_models_simple.Count(cor_all, "tau_kendall")
## Adding FE (M2 − M1): pairs used = 182
## Adding RE (M3 − M1): pairs used = 182
kable(table_r.Count_CLMM,
     caption = "<b><span style='font-size: 18px;color:black;'>Model comparison using Pearson r (Count)</span></b>",
     digits = 4, align = "lrrrrrrrrrr", escape = FALSE) %>%
  kable_styling(full_width = FALSE)
Model comparison using Pearson r (Count)
Comparison N_pairs Mean_Diff Median_Diff Prop_Better Prop_Worse Prop_Tie t_test_p t_statistic t_df wilcoxon_p wilcoxon_V permutation_p
Adding FE (M2 − M1) 182 0.011 0 0.132 0.088 0.780 0.0725 1.807 181 0.1318 522.5 0.0784
Adding RE (M3 − M1) 182 0.021 0 0.154 0.099 0.747 0.0097 2.614 181 0.0453 724.0 0.0060
kable(table_tau.Count_CLMM,
     caption = "<b><span style='font-size: 18px;color:black;'>Model comparison using Kendall &tau; (Count)</span></b>",
     digits = 4, align = "lrrrrrrrrrr", escape = FALSE) %>%
  kable_styling(full_width = FALSE)
Model comparison using Kendall τ (Count)
Comparison N_pairs Mean_Diff Median_Diff Prop_Better Prop_Worse Prop_Tie t_test_p t_statistic t_df wilcoxon_p wilcoxon_V permutation_p
Adding FE (M2 − M1) 182 0.004 0 0.126 0.088 0.786 0.5487 0.601 181 0.3938 451 0.4744
Adding RE (M3 − M1) 182 0.010 0 0.148 0.099 0.753 0.1688 1.381 181 0.0572 685 0.1395

Performance Summary

Pearson R/Tau Plots

p_r.Distance_clmm  <- p_r.Distance_clmm  + labs(title = "Distance • r")
p_tau.Distance_clmm<- p_tau.Distance_clmm+ labs(title = "Distance • Kendall’s τ")
p_r.Order_clmm     <- p_r.Order_clmm     + labs(title = "Order • r")
p_tau.Order_clmm   <- p_tau.Order_clmm   + labs(title = "Order • Kendall’s τ")
p_r.Count_clmm     <- p_r.Count_clmm     + labs(title = "Count • r")
p_tau.Count_clmm   <- p_tau.Count_clmm   + labs(title = "Count • Kendall’s τ")

summary_plot.CLMM <-
  (p_r.Distance_clmm | p_tau.Distance_clmm) /
  (p_r.Order_clmm    | p_tau.Order_clmm)    /
  (p_r.Count_clmm    | p_tau.Count_clmm) +
  plot_layout(guides = "collect") +
  plot_annotation(
    title = "Summary for CLMM",
    theme = theme(
      legend.position = "bottom",
      plot.title = element_text(face = "bold", size = 14),
      plot.subtitle = element_text(size = 11),
      plot.caption = element_text(size = 10, face = "bold")
    )
  )

p_r.Distance_lmer  <- p_r.Distance_lmer  + labs(title = "Distance • r")
p_tau.Distance_lmer<- p_tau.Distance_lmer+ labs(title = "Distance • Kendall’s τ")
p_r.Order_lmer     <- p_r.Order_lmer     + labs(title = "Order • r")
p_tau.Order_lmer   <- p_tau.Order_lmer   + labs(title = "Order • Kendall’s τ")
p_r.Count_lmer     <- p_r.Count_lmer     + labs(title = "Count • r")
p_tau.Count_lmer   <- p_tau.Count_lmer   + labs(title = "Count • Kendall’s τ")

summary_plot.LMER <-
  (p_r.Distance_lmer | p_tau.Distance_lmer) /
  (p_r.Order_lmer    | p_tau.Order_lmer)    /
  (p_r.Count_lmer    | p_tau.Count_lmer) +
  plot_layout(guides = "collect") +
  plot_annotation(
    title = "Summary for LMER",
    theme = theme(
      legend.position = "bottom",
      plot.title = element_text(face = "bold", size = 14),
      plot.subtitle = element_text(size = 11),
      plot.caption = element_text(size = 10, face = "bold")
    )
  )
summary_plot.LMER

summary_plot.CLMM

# ggsave("summary_plot.LMER.svg", plot = summary_plot.LMER, width = 15, height = 10, units = "in")
# ggsave("summary_plot.CLMM.svg", plot = summary_plot.CLMM, width = 15, height = 10, units = "in")

Significance Testing

library(htmltools)
make_kable <- function(tbl, metric_label, stat_label) {
  kable(
    tbl,
    caption = sprintf(
      "<b><span style='font-size:18px;color:black;'>Model comparison using %s (%s)</span></b>",
      stat_label, metric_label
    ),
    digits = 4,
    align = "lrrrrrrrrrr",
    escape = FALSE
  ) |>
    kable_styling(full_width = FALSE)
}

# Create all six tables
k_r_distance   <- make_kable(table_r.distance_CLMM,   "Distance", "Pearson r")
k_tau_distance <- make_kable(table_tau.distance_CLMM, "Distance", "Kendall &tau;")
k_r_order      <- make_kable(table_r.order_CLMM,      "Order",    "Pearson r")
k_tau_order    <- make_kable(table_tau.order_CLMM,    "Order",    "Kendall &tau;")
k_r_count      <- make_kable(table_r.Count_CLMM,      "Count",    "Pearson r")
k_tau_count    <- make_kable(table_tau.Count_CLMM,    "Count",    "Kendall &tau;")

to_html <- function(k) HTML(as.character(k))

Sig.Test.CLMM <- tagList(
  tags$style(".grid2{display:grid;grid-template-columns:1fr 1fr;gap:22px;}
              .grid2 h2{font-weight:bold;font-size:24px;color:black;margin:0 0 12px 0;}"),
  tags$h2("CLMM Significance Tests"),
  tags$div(
    class = "grid2",
    to_html(k_r_distance),  to_html(k_tau_distance),
    to_html(k_r_order),     to_html(k_tau_order),
    to_html(k_r_count),     to_html(k_tau_count)
  )
)

Sig.Test.CLMM  

CLMM Significance Tests

Model comparison using Pearson r (Distance)
Comparison N_pairs Mean_Diff Median_Diff Prop_Better Prop_Worse Prop_Tie t_test_p t_statistic t_df wilcoxon_p wilcoxon_V permutation_p
Adding FE (M2 − M1) 182 0.052 0 0.253 0.154 0.593 1e-04 3.939 181 4e-04 2049.0 0
Adding RE (M3 − M1) 182 0.068 0 0.242 0.181 0.577 1e-04 4.023 181 5e-04 2184.5 0
Model comparison using Kendall τ (Distance)
Comparison N_pairs Mean_Diff Median_Diff Prop_Better Prop_Worse Prop_Tie t_test_p t_statistic t_df wilcoxon_p wilcoxon_V permutation_p
Adding FE (M2 − M1) 182 0.032 0 0.242 0.143 0.615 0.0065 2.752 181 0.0044 1723 0.0061
Adding RE (M3 − M1) 182 0.044 0 0.236 0.165 0.599 0.0033 2.976 181 0.0025 1897 0.0033
Model comparison using Pearson r (Order)
Comparison N_pairs Mean_Diff Median_Diff Prop_Better Prop_Worse Prop_Tie t_test_p t_statistic t_df wilcoxon_p wilcoxon_V permutation_p
Adding FE (M2 − M1) 182 0.023 0 0.225 0.231 0.544 0.0954 1.676 181 0.0874 2116.5 0.0901
Adding RE (M3 − M1) 182 0.063 0 0.236 0.148 0.615 0.0078 2.692 181 0.0030 1749.5 0.0067
Model comparison using Kendall τ (Order)
Comparison N_pairs Mean_Diff Median_Diff Prop_Better Prop_Worse Prop_Tie t_test_p t_statistic t_df wilcoxon_p wilcoxon_V permutation_p
Adding FE (M2 − M1) 182 0.007 0 0.22 0.231 0.549 0.5799 0.554 181 0.2341 1954 0.5423
Adding RE (M3 − M1) 182 0.051 0 0.22 0.154 0.626 0.0149 2.457 181 0.0091 1599 0.0144
Model comparison using Pearson r (Count)
Comparison N_pairs Mean_Diff Median_Diff Prop_Better Prop_Worse Prop_Tie t_test_p t_statistic t_df wilcoxon_p wilcoxon_V permutation_p
Adding FE (M2 − M1) 182 0.011 0 0.132 0.088 0.780 0.0725 1.807 181 0.1318 522.5 0.0784
Adding RE (M3 − M1) 182 0.021 0 0.154 0.099 0.747 0.0097 2.614 181 0.0453 724.0 0.0060
Model comparison using Kendall τ (Count)
Comparison N_pairs Mean_Diff Median_Diff Prop_Better Prop_Worse Prop_Tie t_test_p t_statistic t_df wilcoxon_p wilcoxon_V permutation_p
Adding FE (M2 − M1) 182 0.004 0 0.126 0.088 0.786 0.5487 0.601 181 0.3938 451 0.4744
Adding RE (M3 − M1) 182 0.010 0 0.148 0.099 0.753 0.1688 1.381 181 0.0572 685 0.1395
k_r_distance   <- make_kable(table_r.distance_lmer,   "Distance", "Pearson r")
k_tau_distance <- make_kable(table_tau.distance_lmer, "Distance", "Kendall &tau;")
k_r_order      <- make_kable(table_r.order_lmer,      "Order",    "Pearson r")
k_tau_order    <- make_kable(table_tau.order_lmer,    "Order",    "Kendall &tau;")
k_r_count      <- make_kable(table_r.Count_lmer,      "Count",    "Pearson r")
k_tau_count    <- make_kable(table_tau.Count_lmer,    "Count",    "Kendall &tau;")

Sig.Test.LMER <- tagList(
  tags$style(".grid2{display:grid;grid-template-columns:1fr 1fr;gap:22px;}
              .grid2 h2{font-weight:bold;font-size:24px;color:black;margin:0 0 12px 0;}"),
  tags$h2("LMER Significance Tests"),
  tags$div(
    class = "grid2",
    to_html(k_r_distance),  to_html(k_tau_distance),
    to_html(k_r_order),     to_html(k_tau_order),
    to_html(k_r_count),     to_html(k_tau_count)
  )
)

Sig.Test.LMER

LMER Significance Tests

Model comparison using Pearson r (Distance)
Comparison N_pairs Mean_Diff Median_Diff Prop_Better Prop_Worse Prop_Tie t_test_p t_statistic t_df wilcoxon_p wilcoxon_V permutation_p
Adding FE (M2 − M1) 182 0.062 0 0.286 0.192 0.522 0 4.378 181 2e-04 2800.5 0
Adding RE (M3 − M1) 182 0.076 0 0.253 0.170 0.577 0 4.248 181 9e-04 2155.0 0
Model comparison using Kendall τ (Distance)
Comparison N_pairs Mean_Diff Median_Diff Prop_Better Prop_Worse Prop_Tie t_test_p t_statistic t_df wilcoxon_p wilcoxon_V permutation_p
Adding FE (M2 − M1) 182 0.040 0 0.275 0.187 0.538 0.0013 3.276 181 0.0018 2472.5 5e-04
Adding RE (M3 − M1) 182 0.059 0 0.242 0.154 0.604 0.0004 3.616 181 0.0008 1909.5 3e-04
Model comparison using Pearson r (Order)
Comparison N_pairs Mean_Diff Median_Diff Prop_Better Prop_Worse Prop_Tie t_test_p t_statistic t_df wilcoxon_p wilcoxon_V permutation_p
Adding FE (M2 − M1) 182 0.028 0 0.253 0.280 0.467 0.0776 1.775 181 0.1437 2780.5 0.0784
Adding RE (M3 − M1) 182 0.064 0 0.236 0.192 0.571 0.0136 2.492 181 0.0191 2010.5 0.0121
Model comparison using Kendall τ (Order)
Comparison N_pairs Mean_Diff Median_Diff Prop_Better Prop_Worse Prop_Tie t_test_p t_statistic t_df wilcoxon_p wilcoxon_V permutation_p
Adding FE (M2 − M1) 182 0.015 0 0.247 0.264 0.489 0.3384 0.960 181 0.3439 2429.0 0.3139
Adding RE (M3 − M1) 182 0.053 0 0.231 0.192 0.577 0.0224 2.302 181 0.0252 1940.5 0.0210
Model comparison using Pearson r (Count)
Comparison N_pairs Mean_Diff Median_Diff Prop_Better Prop_Worse Prop_Tie t_test_p t_statistic t_df wilcoxon_p wilcoxon_V permutation_p
Adding FE (M2 − M1) 182 0.011 0 0.126 0.104 0.769 0.1520 1.438 181 0.3571 525.5 0.1646
Adding RE (M3 − M1) 182 0.020 0 0.148 0.099 0.753 0.0521 1.955 181 0.1072 660.5 0.0547
Model comparison using Kendall τ (Count)
Comparison N_pairs Mean_Diff Median_Diff Prop_Better Prop_Worse Prop_Tie t_test_p t_statistic t_df wilcoxon_p wilcoxon_V permutation_p
Adding FE (M2 − M1) 182 0.004 0 0.115 0.104 0.780 0.5331 0.624 181 0.2353 498 0.4723
Adding RE (M3 − M1) 182 0.011 0 0.137 0.099 0.764 0.2293 1.206 181 0.1024 608 0.2006

Additional Thoughts

  • One could object to using clmm() , as it assumes independence of observations, but is violated by our data - e.g., once 5 ranked are assigned, the 6th rank has no degree of freedom. That said, I’m not too concerned that it solely explain the improvement from DROPT data — there may still be a main effect worth examining with other more appropriate methods
  • At first when I saw the mean and median differences were similar, I was concerned about the difference being non-significant (thankfully not the case). Still, one may object the degree of improvement with DROPT predictors.
    • one possible next step is to explore adding interaction terms with initial positions (to simply, treat ini pos as numeric). For distance we could do this with either simple additions or interactions.