Setup, Filter, General Descriptives

# 1. why Kendall's tau? Ties? spearson tho seems more granular?
# 2. cannot do t test on the thing? Need non-parametric? think about Olivier's transformation to z; probabily fine.
# 3. some how only estimate a weight for positive ranking upward? Only counts when using that to rank upward?
# 4. let's say that the x axis is tau, what the y axis? the 


# Load required package
library(gtools)  # for permutations

# Generate all permutations of 1:6
perms <- permutations(n = 6, r = 6)
ref <- 1:6
n <- length(ref)

# Initialize vectors
kendall_vals <- numeric(nrow(perms))
spearman_vals <- numeric(nrow(perms))
footrule_vals <- numeric(nrow(perms))

# Compute all metrics
for (i in 1:nrow(perms)) {
  p <- perms[i, ]
  
  # Kendall's tau
  kendall_vals[i] <- cor(p, ref, method = "kendall")
  
  # Spearman's rho
  spearman_vals[i] <- cor(p, ref, method = "spearman")
  
  # Footrule distance (normalized to range [–1, 1])
  raw_footrule <- sum(abs(p - ref))
  max_footrule <- sum(abs(rev(ref) - ref))  # = 18 for n = 6
  norm_footrule <- 1 - (2 * raw_footrule / max_footrule)
  footrule_vals[i] <- norm_footrule
}

# Count unique values
cat("Unique Kendall tau values:   ", length(unique(kendall_vals)), "\n")
## Unique Kendall tau values:    16
cat("Unique Spearman rho values:  ", length(unique(spearman_vals)), "\n")
## Unique Spearman rho values:   36
cat("Unique Footrule scores:      ", length(unique(footrule_vals)), "\n")
## Unique Footrule scores:       10
## Getting started ##

# Set WD
#setwd("C:/Users/Arian/Documents/GitHub/SAB-Lab/Dynamic Rank Order Processes Tracing (DROPT)/Shape and Lottery Experiments/Lottery Study_Preferences/Arians-Sandbox")

# Get both data files; pick and choose from each
data_num <- read.csv("DROPT_Lotteries_full_num.csv") %>%
  dplyr::slice(3:n())
data_text <- read.csv("DROPT_Lotteries_full_text.csv") %>%
  dplyr::slice(3:n())

# set numeric file as base
dat <- data_num

# Convert all columns that are not yet numeric but can be to numeric
dat[] <- lapply(dat, function(col) {
  if (is.character(col) || is.factor(col)) {
    suppressWarnings(num_col <- as.numeric(as.character(col)))
    # If most values are not NA after conversion, assume it's a valid numeric column
    if (sum(!is.na(num_col)) > 0.8 * length(num_col)) {
      return(num_col)
    } else {
      return(col)  # leave it as-is
    }
  } else {
    return(col)
  }
})
## Setup: Merge data frames - take some of the demographic information from the text df
dat$birthYear <- as.numeric(as.character(data_text$age))
dat$age <- 2025 - dat$birthYear
dat$gender <- factor(data_text$gender)
dat$gender_binary <- ifelse(dat$gender == "Male", "Male",
                             ifelse(dat$gender == "Female", "Female", NA))
dat$education <- data_text$education
dat$education_num <- as.numeric(data_num$education)
dat$income <- ifelse(dat$income != 10, as.numeric(dat$income), NA)


## necessary recoding
correct_answer <- "1,2,3"

dat <- dat %>%
  mutate(dose.coded = ifelse(Dose == correct_answer, "Correct", "Incorrect"))
dat$attn1.coded <- ifelse(dat$attn1 == 2, "Correct", "Incorrect")

dat$dose.coded <- as.factor(dat$dose.coded)
dat$attn1.coded <- as.factor(dat$attn1.coded)

dat$bonus_belief [dat$bonus_belief== 1] = 'Yes'
dat$bonus_belief [dat$bonus_belief== 0] = 'No'

dat$bonus_belief <- factor(dat$bonus_belief, levels = names(sort(table(dat$bonus_belief), decreasing = TRUE)))

dat <- dat %>%
  mutate(StartDate = as.Date(StartDate, format = "%Y-%m-%d")) %>% 
  rename(Bot_score = "Q_RecaptchaScore") %>% 
  rename(Fraud_score = "Q_RelevantIDFraudScore") %>%
  rename(Duration = "Duration..in.seconds.") 


## Apply filters

dat <- dat %>%
  filter(PROLIFIC_PID != "") %>%
  filter(DistributionChannel != "preview") %>% #
  filter(Bot_score >= 0.50) %>% #
  #filter(Fraud_score <= 30) %>%
  filter(attn1.coded == "Correct") #


# failed attention
# bots (0.5)
# fraudulent (score?)
# tau less than 0.5, but only exclude in payoff and prob

filter_summary <- data.frame(
  Filter_Name = c(
    'Test Runs (Preview)',
    'Bot_score =< 0.50',
    'Attention Check failed',
    'Sum'
  ),
  People_Filtered_Out = c(1,3,6,10)
)

filter_summary 
## Create long data frames
dat_long1 <- dat %>%
  pivot_longer(
    cols = matches("Set1_L[1-6]_(Prob|Amt)"),
    names_to = c("lottery", ".value"),
    names_pattern = "Set1_(L[1-6])_(Prob|Amt)"
  ) %>%
  mutate(item.f = case_when(
    lottery == "L1" ~ "Pr6_Amt1",
    lottery == "L2" ~ "Pr5_Amt2",
    lottery == "L3" ~ "Pr4_Amt3",
    lottery == "L4" ~ "Pr3_Amt4",
    lottery == "L5" ~ "Pr2_Amt5",
    lottery == "L6" ~ "Pr1_Amt6"
  ))

dat_long2 <- dat %>%
  pivot_longer(
    cols = matches("Set2_L[1-6]_(Prob|Amt)"),
    names_to = c("lottery", ".value"),
    names_pattern = "Set2_(L[1-6])_(Prob|Amt)"
  ) %>%
  mutate(item.f = case_when(
    lottery == "L1" ~ "Pr6_Amt1",
    lottery == "L2" ~ "Pr5_Amt2",
    lottery == "L3" ~ "Pr4_Amt3",
    lottery == "L4" ~ "Pr3_Amt4",
    lottery == "L5" ~ "Pr2_Amt5",
    lottery == "L6" ~ "Pr1_Amt6"
  ))


# write a function for dfs later
process_task <- function(data, rank_column, task_label) {
  # Extract the initial order
  initial_order <- sub("^\\{([^}]*)\\}.*", "\\1", data[[rank_column]]) # captures the content within the first {} in the string; we will apply this to the RankProcess column; Using double bracket to capture a vector

  initial_order <- gsub("0; ", "", initial_order) # remove the 0 timestamp
  initial_order_split <- strsplit(initial_order, ",") # separate the strings into list

  # Identify unique items
  unique_items <- unique(unlist(initial_order_split))

  # Create a data frame to store initial ranks
  initial_positions_df <- data.frame(matrix(ncol = length(unique_items), nrow = length(initial_order_split))) # nrow is the number of respondents
  names(initial_positions_df) <- paste0("initial.items_", unique_items)

  # Fill initial ranks
  for (i in seq_along(initial_order_split)) {
    initial_order <- initial_order_split[[i]] # for each respondent, extract the string of initial order
    for (j in seq_along(initial_order)) {
      item <- initial_order[j]
      initial_positions_df[i, paste0("initial.items_", item)] <- j
    }
  }

  data <- cbind(data, initial_positions_df)
  assign(paste0("initial.dat_", task_label), data, envir = .GlobalEnv)

  cor_results <- data.frame(
    item = paste0("rank_", task_label, "_", unique_items),
    initial_item = paste0("initial.items_", unique_items),
    correlation = NA_real_,
    p_value = NA_real_
  )

  cor_results <- cor_results %>% # it is important to do this step by Task, because IDs are only unique and consistent within each quiz.
    rowwise() %>%
    dplyr::mutate(
      correlation = cor.test(data[[item]], data[[initial_item]])$estimate,
      p_value = cor.test(data[[item]], data[[initial_item]])$p.value
    ) %>%
    dplyr::mutate(sig = p_value < 0.05)

  cor_results$task <- task_label
  cor_results
}

# List of datasets, rank columns, and task labels
tasks <- list(
  list(data = dat, rank_column = "RankProcess_Prob", task_label = "prob"),
  list(data = dat, rank_column = "RankProcess_Amount", task_label = "amount"),
  list(data = dat, rank_column = "RankProcess_Prefer1", task_label = "pref1"),
  list(data = dat, rank_column = "RankProcess_Prefer2", task_label = "pref2")
)


all_results <- bind_rows(lapply(tasks, function(t) {
  process_task(t$data, t$rank_column, t$task_label)
}))

New Prep

### Collection of Wrapper functions that can be applied to all dfs ###

## COUNT ##

get_DROPT_count <- function(dat, rank_col, rank_all_col, item_numbers, item_labels,
                            condition, initial_dat, dat_long) {
  # Load required packages
  library(dplyr)
  library(tidyr)
  library(rlang)
  library(stringr)
  
  # Validate inputs
  if (length(item_numbers) != length(item_labels)) {
    stop("item_numbers and item_labels must be the same length.")
  }
  
  item_map <- setNames(item_labels, item_numbers)
  
  # --- Helper: Create Rank Process ---
  create_RankProcess <- function(data, col_name) {
    col_sym <- sym(col_name)
    data %>%
      select(ResponseId, !!col_sym) %>%
      separate_rows(!!col_sym, sep = "}") %>%
      mutate(!!col_sym := gsub("[{}]", "", !!col_sym)) %>%
      filter(!!col_sym != "") %>%
      separate(!!col_sym, into = c("timing", "order"), sep = ";") %>%
      mutate(order = trimws(order)) %>%
      group_by(ResponseId) %>%
      mutate(step = row_number() - 1) %>%
      select(step, everything()) %>%
      ungroup()
  }
  
  # --- Helper: Create Rank Process All ---
  create_RankProcess_all <- function(data, col_name) {
    col_sym <- sym(col_name)
    data %>%
      select(ResponseId, !!col_sym) %>%
      separate_rows(!!col_sym, sep = "}") %>%
      mutate(!!col_sym := gsub("[{}]", "", !!col_sym)) %>%
      filter(!!col_sym != "") %>%
      separate(!!col_sym, into = c("timing", "order_all"), sep = ";")
  }
  
  # --- Helper: Process and Clean Data ---
  process_RankProcess_data <- function(rank_data, rank_all_data, item_map) {
    item_numbers <- names(item_map)
    
    processed <- rank_data %>%
      left_join(rank_all_data, by = c("ResponseId", "timing")) %>%
      mutate(order_all = trimws(order_all),
             item_moved = as.numeric(sub(",.*", "", order_all))) %>%
      ungroup() %>%
      mutate(item.f = as.factor(item_map[as.character(item_moved)]))
    
    na_subj <- processed %>%
      filter(is.na(item_moved)) %>%
      pull(ResponseId)
    
    is_valid_order <- grepl("^\\d+(,\\d+){5}$", processed$order_all)
    bug_respondents <- processed %>%
      filter(is_valid_order & timing != 0) %>%
      pull(ResponseId)
    
    cleaned <- processed %>%
      filter(!ResponseId %in% c(na_subj, bug_respondents))
    
    drag_counts <- cleaned %>%
      filter(step != 0) %>%
      group_by(ResponseId) %>%
      dplyr::summarize(
        !!!setNames(
          lapply(item_numbers, function(x) {
            rlang::expr(sum(item_moved == !!as.numeric(x)))
          }),
          paste0("item_", item_numbers, "_moved.N")
        ),
        .groups = "drop"
      )
    
    return(list(
      cleaned_data = cleaned,
      bug_respondent = unique(bug_respondents),
      na_subj = unique(na_subj),
      drag_and_drop_count = drag_counts
    ))
  }
  
  # --- Main workflow ---
  rank_data <- create_RankProcess(dat, rank_col)
  rank_all_data <- create_RankProcess_all(dat, rank_all_col)
  result <- process_RankProcess_data(rank_data, rank_all_data, item_map)
  
  # --- Create long-format data ---
  drag_and_drop_count_long <- result$drag_and_drop_count %>%
    pivot_longer(
      cols = starts_with("item_"),
      names_to = c("item_number", ".value"),
      names_sep = "_moved\\."
    ) %>%
    mutate(
      condition = condition,
      item_number = as.numeric(gsub("item_", "", item_number)),
      item.f = as.factor(item_map[as.character(item_number)])
    ) %>%
    mutate(
      rank.Amount = case_when(
        item.f == "Pr6_Amt1" ~ 1,
        item.f == "Pr5_Amt2" ~ 2,
        item.f == "Pr4_Amt3" ~ 3,
        item.f == "Pr3_Amt4" ~ 4,
        item.f == "Pr2_Amt5" ~ 5,
        item.f == "Pr1_Amt6" ~ 6
      ),
      rank.Prob = case_when(
        item.f == "Pr6_Amt1" ~ 6,
        item.f == "Pr5_Amt2" ~ 5,
        item.f == "Pr4_Amt3" ~ 4,
        item.f == "Pr3_Amt4" ~ 3,
        item.f == "Pr2_Amt5" ~ 2,
        item.f == "Pr1_Amt6" ~ 1
      )
    ) %>%
    left_join(initial_dat %>% select(ResponseId, starts_with("initial.items_")), by = "ResponseId") %>%
    mutate(
      initial.rank = case_when(
        item.f == "Pr6_Amt1" ~ initial.items_49,
        item.f == "Pr5_Amt2" ~ initial.items_50,
        item.f == "Pr4_Amt3" ~ initial.items_64,
        item.f == "Pr3_Amt4" ~ initial.items_65,
        item.f == "Pr2_Amt5" ~ initial.items_67,
        item.f == "Pr1_Amt6" ~ initial.items_68
      ),
      initial.rank.r = relevel(factor(7 - initial.rank), ref = 6),
      N_ind = ifelse(N == 0, 0, 1)
    ) %>%
    select(-c(starts_with("initial.items_"))) %>%
    left_join(dat_long %>% select(ResponseId, item.f, Prob, Amt), by = c("ResponseId", "item.f"))
  
  result$drag_and_drop_count_long <- drag_and_drop_count_long
  
  return(result)
}

## DISTANCE ##

get_DROPT_distance <- function(
    data,
    items,
    item_labels,
    order_col = "order",
    step_col = "step",
    timing_col = "timing",
    item_f_col = "item.f",
    response_id_col = "ResponseId",
    condition_label = "Condition"
) {
  # Step 1: Split 'order' into ranks
  data_processed <- data %>%
    group_by(.data[[response_id_col]]) %>%
    mutate(parts = str_split(.data[[order_col]], ",")) %>%
    mutate(
      Rank1 = sapply(parts, function(x) x[1]),
      Rank2 = sapply(parts, function(x) x[2]),
      Rank3 = sapply(parts, function(x) x[3]),
      Rank4 = sapply(parts, function(x) x[4]),
      Rank5 = sapply(parts, function(x) x[5]),
      Rank6 = sapply(parts, function(x) ifelse(length(x) > 5, x[6], NA))
    ) %>%
    select(-parts) %>%
    ungroup()
  
  # Step 2: Add current_* columns
  for (item in items) {
    data_processed[[paste0("current_", item)]] <- NA_integer_
  }
  
  data_processed <- data_processed %>%
    rowwise() %>%
    mutate(across(
      starts_with("current_"),
      ~ {
        item_number <- str_remove(cur_column(), "current_")
        case_when(
          Rank1 == item_number ~ 1,
          Rank2 == item_number ~ 2,
          Rank3 == item_number ~ 3,
          Rank4 == item_number ~ 4,
          Rank5 == item_number ~ 5,
          Rank6 == item_number ~ 6,
          TRUE ~ 1
        )
      }
    )) %>%
    ungroup()
  
  # Step 3: Add lagged positions
  for (item in items) {
    data_processed[[paste0("last_", item)]] <- lag(data_processed[[paste0("current_", item)]])
  }
  
  # Step 4: Compute movement direction
  data_processed <- data_processed %>%
    group_by(.data[[response_id_col]]) %>%
    rowwise() %>%
    mutate(
      current_item_moved = get(paste0("current_", get("item_moved"))),
      last_item_moved = get(paste0("last_", get("item_moved"))),
      move_direction = case_when(
        is.na(last_item_moved) ~ "no_change",
        current_item_moved < last_item_moved ~ "up",
        current_item_moved > last_item_moved ~ "down",
        TRUE ~ "no_change"
      )
    ) %>%
    ungroup()
  
  # Step 5: Filter step != 0
  data_processed <- data_processed %>%
    filter(.data[[step_col]] != 0)
  
  # Step 6: Distance and time difference
  distance_data <- data_processed %>%
    separate(.data[[timing_col]], into = c("drag_time", "drop_time"), sep = ", ", convert = TRUE) %>%
    mutate(across(starts_with("current_"), as.numeric),
           across(starts_with("last_"), as.numeric),
           DD_diff = drop_time - drag_time,
           condition = condition_label)
  
  for (item in items) {
    distance_data[[paste0("distance_", item)]] <- distance_data[[paste0("current_", item)]] - distance_data[[paste0("last_", item)]]
  }
  
  distance_data <- distance_data %>%
    select(drag_time, DD_diff, starts_with("distance_"), all_of(c(order_col, item_f_col, step_col, response_id_col, "condition")))
  
  # Step 7: Keep only latest step per item.f
  distance_data <- distance_data %>%
    group_by(.data[[response_id_col]]) %>%
    arrange(.data[[step_col]]) %>%
    filter(!duplicated(.data[[item_f_col]])) %>%
    ungroup()
  
  # Step 8: Create full grid of items per respondent
  unique_ids <- distance_data %>%
    distinct(.data[[response_id_col]]) %>%
    pull()
  
  distance_grid <- expand.grid(
    ResponseId = unique_ids,
    item.f = item_labels
  )
  
  # Step 9: Merge and compute final distance columns
  full_df <- distance_grid %>%
    left_join(distance_data, by = c("ResponseId", "item.f")) %>%
    arrange(ResponseId) %>%
    mutate(distance = case_when(
      item.f == item_labels[1] ~ distance_49,
      item.f == item_labels[2] ~ distance_50,
      item.f == item_labels[3] ~ distance_64,
      item.f == item_labels[4] ~ distance_65,
      item.f == item_labels[5] ~ distance_67,
      item.f == item_labels[6] ~ distance_68,
      TRUE ~ 0
    ),
    distance = replace_na(distance, 0),
    distance.abs = abs(distance),
    distance.r = distance * -1)
  
  return(full_df)
}


## ORDER ##

get_DROPT_order <- function(
    data,
    dat_long,
    initial_data,
    item_labels,
    item_codes,
    condition_label = "condition",
    step_col = "step",
    response_id_col = "ResponseId",
    item_col = "item.f",
    item_moved_col = "item_moved",
    prefix_initial = "initial.items_",
    condition_value = "condition_name"  # example: "pref1"
) {
  # Step 1: Filter and order
  touch_order <- data %>%
    filter(.data[[step_col]] != 0) %>%
    group_by(.data[[response_id_col]]) %>%
    arrange(.data[[step_col]]) %>%
    filter(!duplicated(.data[[item_moved_col]])) %>%
    mutate(order = row_number()) %>%
    ungroup() %>%
    mutate(!!condition_label := condition_value)
  
  # Step 2: Expand to all items × respondent
  long_df <- expand_grid(
    !!response_id_col := unique(touch_order[[response_id_col]]),
    !!item_col := unique(touch_order[[item_col]])
  )
  
  # Step 3: Add in order and condition
  order_max <- long_df %>%
    left_join(touch_order %>% select(all_of(c(response_id_col, item_col, "order"))),
              by = c(response_id_col, item_col)) %>%
    left_join(touch_order %>%
                select(all_of(c(response_id_col, condition_label))) %>%
                filter(!duplicated(.data[[response_id_col]])),
              by = response_id_col) %>%
    group_by(.data[[response_id_col]]) %>%
    dplyr::summarize(max_order = max(order, na.rm = TRUE), .groups = "drop")
  
  long_df <- long_df %>%
    left_join(touch_order %>% select(all_of(c(response_id_col, item_col, "order"))),
              by = c(response_id_col, item_col)) %>%
    left_join(touch_order %>%
                select(all_of(c(response_id_col, condition_label))) %>%
                filter(!duplicated(.data[[response_id_col]])),
              by = response_id_col) %>%
    left_join(order_max, by = response_id_col) %>%
    mutate(order = if_else(!is.na(order), order, max_order + 1))
  
  # Step 4: Add rank.Amount and rank.Prob
  long_df <- long_df %>%
    mutate(
      rank.Amount = case_when(
        !!sym(item_col) == item_labels[1] ~ 1,
        !!sym(item_col) == item_labels[2] ~ 2,
        !!sym(item_col) == item_labels[3] ~ 3,
        !!sym(item_col) == item_labels[4] ~ 4,
        !!sym(item_col) == item_labels[5] ~ 5,
        !!sym(item_col) == item_labels[6] ~ 6
      ),
      rank.Prob = case_when(
        !!sym(item_col) == item_labels[1] ~ 6,
        !!sym(item_col) == item_labels[2] ~ 5,
        !!sym(item_col) == item_labels[3] ~ 4,
        !!sym(item_col) == item_labels[4] ~ 3,
        !!sym(item_col) == item_labels[5] ~ 2,
        !!sym(item_col) == item_labels[6] ~ 1
      )
    )
  
  # Step 5: Add initial rank
  long_df <- long_df %>%
    left_join(initial_data %>%
                select(all_of(c(response_id_col, paste0(prefix_initial, item_codes)))),
              by = response_id_col) %>%
    mutate(
      initial.rank = case_when(
        !!sym(item_col) == item_labels[1] ~ .data[[paste0(prefix_initial, item_codes[1])]],
        !!sym(item_col) == item_labels[2] ~ .data[[paste0(prefix_initial, item_codes[2])]],
        !!sym(item_col) == item_labels[3] ~ .data[[paste0(prefix_initial, item_codes[3])]],
        !!sym(item_col) == item_labels[4] ~ .data[[paste0(prefix_initial, item_codes[4])]],
        !!sym(item_col) == item_labels[5] ~ .data[[paste0(prefix_initial, item_codes[5])]],
        !!sym(item_col) == item_labels[6] ~ .data[[paste0(prefix_initial, item_codes[6])]]
      ),
      initial.rank.r = 7 - initial.rank,
      initial.rank.r = relevel(factor(initial.rank.r), ref = 6)
    ) %>%
    select(-starts_with(prefix_initial))
  
  # Step 6: Merge with dat_long
  long_df <- long_df %>%
    left_join(dat_long %>%
                select(all_of(c(response_id_col, item_col, "Prob", "Amt"))),
              by = c(response_id_col, item_col))
  
  return(long_df)
}
### AMOUNT ###

## COUNT ##

result_amount <- get_DROPT_count(
  dat = dat,
  rank_col = "RankProcess_Amount",
  rank_all_col = "RankProcess_all_Amount",
  item_numbers = c(49, 50, 64, 65, 67, 68),
  item_labels = c("Pr6_Amt1", "Pr5_Amt2", "Pr4_Amt3", "Pr3_Amt4", "Pr2_Amt5", "Pr1_Amt6"),
  condition = "Amount",
  initial_dat = initial.dat_amount,
  dat_long = dat_long1
)

# Extract outputs as needed
RankProcess_Amount <- result_amount$cleaned_data
bug_respondent_Amount <- result_amount$bug_respondent
na_subj_Amount <- result_amount$na_subj
drag_and_drop_count_Amount <- result_amount$drag_and_drop_count
drag_and_drop_count_Amount_long <- result_amount$drag_and_drop_count_long

# Create summary df and filter for tau > 0.5

Summary_data_Amount<- expand_grid(
  ResponseId = unique(RankProcess_Amount$ResponseId),
  item.f = unique(RankProcess_Amount$item.f))

Summary_data_Amount<-Summary_data_Amount%>%
  mutate(rank.amount=
           case_when(
             item.f=="Pr6_Amt1" ~1,
             item.f=="Pr5_Amt2" ~ 2,
             item.f== "Pr4_Amt3" ~ 3,
             item.f== "Pr3_Amt4" ~ 4,
             item.f == "Pr2_Amt5" ~ 5,
             item.f == "Pr1_Amt6" ~6),
         rank.Prob=case_when(
           item.f=="Pr6_Amt1" ~6,
           item.f=="Pr5_Amt2" ~ 5,
           item.f== "Pr4_Amt3" ~ 4,
           item.f== "Pr3_Amt4" ~ 3,
           item.f == "Pr2_Amt5" ~ 2,
           item.f == "Pr1_Amt6" ~1
         ))%>%
  left_join(dat%>%select(ResponseId,starts_with("rank_amount_")),by="ResponseId")%>%
  mutate(Subj.rank=case_when(
    item.f=="Pr6_Amt1" ~ rank_amount_49,
    item.f=="Pr5_Amt2" ~ rank_amount_50,
    item.f== "Pr4_Amt3" ~ rank_amount_64,
    item.f== "Pr3_Amt4" ~ rank_amount_65,
    item.f == "Pr2_Amt5" ~ rank_amount_67,
    item.f == "Pr1_Amt6" ~ rank_amount_68))%>%
  select(-c(starts_with("rank_amount_")))%>%
  group_by(ResponseId) %>%
  mutate(Tau = -cor(Subj.rank, rank.amount, method = "kendall")) %>%
  ungroup()

# Check for tau less than 0.5 - used later
Amount_tau_less0.5 <- Summary_data_Amount %>%
  filter(Tau <= 0.5) %>%
  distinct(ResponseId) %>%
  pull(ResponseId)

## Count: fix long df ##

# Further recoding of the long df
drag_and_drop_count_Amount_long<-drag_and_drop_count_Amount_long%>%
  filter(ResponseId%notin%Amount_tau_less0.5)%>%
  mutate(rank.Amount=case_when(
    item.f=="Pr6_Amt1" ~1,
    item.f=="Pr5_Amt2" ~ 2,
    item.f== "Pr4_Amt3" ~ 3,
    item.f== "Pr3_Amt4" ~ 4,
    item.f == "Pr2_Amt5" ~ 5,
    item.f == "Pr1_Amt6" ~6
  ),
  rank.Prob=case_when(
    item.f=="Pr6_Amt1" ~6,
    item.f=="Pr5_Amt2" ~ 5,
    item.f== "Pr4_Amt3" ~ 4,
    item.f== "Pr3_Amt4" ~ 3,
    item.f == "Pr2_Amt5" ~ 2,
    item.f == "Pr1_Amt6" ~1))%>%
  left_join(initial.dat_amount%>%select(ResponseId,starts_with("initial.items_")),by="ResponseId")%>%
  mutate(initial.rank=case_when(
    item.f=="Pr6_Amt1" ~ initial.items_49,
    item.f=="Pr5_Amt2" ~ initial.items_50,
    item.f=="Pr4_Amt3" ~ initial.items_64,
    item.f=="Pr3_Amt4" ~ initial.items_65,
    item.f=="Pr2_Amt5" ~ initial.items_67,
    item.f=="Pr1_Amt6" ~ initial.items_68
  ),
  initial.rank.r=relevel(factor(7-initial.rank), ref = 6),
  N_ind=case_when(
    N==0~0,
    TRUE~1)
  )%>%
  select(-c(starts_with("initial.items_")))


## DISTANCE ##

Distance_Amount.cleanup.df <- get_DROPT_distance(
  data = RankProcess_Amount,
  items = c("49", "50", "64", "65", "67", "68"),
  item_labels = c("Pr6_Amt1", "Pr5_Amt2", "Pr4_Amt3", "Pr3_Amt4", "Pr2_Amt5", "Pr1_Amt6"),
  condition_label = "Amount"
)

## ORDER ##

touch_order_analysis.long_Amount <- get_DROPT_order(
  data = RankProcess_Amount,
  dat_long = dat_long1,
  initial_data = initial.dat_amount,
  item_labels = c("Pr6_Amt1", "Pr5_Amt2", "Pr4_Amt3", "Pr3_Amt4", "Pr2_Amt5", "Pr1_Amt6"),
  item_codes = c("49", "50", "64", "65", "67", "68"),
  condition_value = "Amount"
)
### PROBABILITY ###

## COUNT ##

result_prob <- get_DROPT_count(
  dat = dat,
  rank_col = "RankProcess_Prob",
  rank_all_col = "RankProcess_all_Prob",
  item_numbers = c(49, 50, 64, 65, 67, 68),
  item_labels = c("Pr6_Amt1", "Pr5_Amt2", "Pr4_Amt3", "Pr3_Amt4", "Pr2_Amt5", "Pr1_Amt6"),
  condition = "Prob",
  initial_dat = initial.dat_prob,
  dat_long = dat_long1
)

# Extract outputs as needed
RankProcess_Prob <- result_prob$cleaned_data
bug_respondent_Prob <- result_prob$bug_respondent
na_subj_Prob <- result_prob$na_subj
drag_and_drop_count_Prob <- result_prob$drag_and_drop_count
drag_and_drop_count_Prob_long <- result_prob$drag_and_drop_count_long

# Create summary df and filter for tau > 0.5
Summary_data_Prob<- expand_grid(
 ResponseId = unique(RankProcess_Prob$ResponseId),
 item.f = unique(RankProcess_Prob$item.f))

Summary_data_Prob<-Summary_data_Prob%>%
  mutate(rank.Amount=
           case_when(
    item.f=="Pr6_Amt1" ~1,
    item.f=="Pr5_Amt2" ~ 2,
    item.f== "Pr4_Amt3" ~ 3,
    item.f== "Pr3_Amt4" ~ 4,
    item.f == "Pr2_Amt5" ~ 5,
    item.f == "Pr1_Amt6" ~6),
         rank.prob=case_when(
    item.f=="Pr6_Amt1" ~6,
    item.f=="Pr5_Amt2" ~ 5,
    item.f== "Pr4_Amt3" ~ 4,
    item.f== "Pr3_Amt4" ~ 3,
    item.f == "Pr2_Amt5" ~ 2,
    item.f == "Pr1_Amt6" ~1
         ))%>%
  left_join(dat%>%select(ResponseId,starts_with("rank_prob_")),by="ResponseId")%>%
  mutate(Subj.rank=case_when(
    item.f=="Pr6_Amt1" ~ rank_prob_49,
    item.f=="Pr5_Amt2" ~ rank_prob_50,
    item.f== "Pr4_Amt3" ~ rank_prob_64,
    item.f== "Pr3_Amt4" ~ rank_prob_65,
    item.f == "Pr2_Amt5" ~ rank_prob_67,
    item.f == "Pr1_Amt6" ~ rank_prob_68))%>%
  select(-c(starts_with("rank_prob_")))%>%
  group_by(ResponseId) %>%
  mutate(Tau =- cor(Subj.rank, rank.prob, method = "kendall")) %>%
  ungroup()

# Check for tau less than 0.5 - used later
Prob_tau_less0.5 <- Summary_data_Prob %>%
  filter(Tau <= 0.5) %>%
  distinct(ResponseId) %>%
  pull(ResponseId)

## Count: fix long df ##

# Further recoding of the long df
drag_and_drop_count_Prob_long<-drag_and_drop_count_Prob_long%>%
  filter(ResponseId%notin%Prob_tau_less0.5)%>%
  mutate(rank.Prob=case_when(
    item.f=="Pr6_Amt1" ~1,
    item.f=="Pr5_Amt2" ~ 2,
    item.f== "Pr4_Amt3" ~ 3,
    item.f== "Pr3_Amt4" ~ 4,
    item.f == "Pr2_Amt5" ~ 5,
    item.f == "Pr1_Amt6" ~6
  ),
  rank.Prob=case_when(
    item.f=="Pr6_Amt1" ~6,
    item.f=="Pr5_Amt2" ~ 5,
    item.f== "Pr4_Amt3" ~ 4,
    item.f== "Pr3_Amt4" ~ 3,
    item.f == "Pr2_Amt5" ~ 2,
    item.f == "Pr1_Amt6" ~1))%>%
  left_join(initial.dat_prob%>%
  select(ResponseId,starts_with("initial.items_")),by="ResponseId")%>%
  mutate(initial.rank=case_when(
    item.f=="Pr6_Amt1" ~ initial.items_49,
    item.f=="Pr5_Amt2" ~ initial.items_50,
    item.f=="Pr4_Amt3" ~ initial.items_64,
    item.f=="Pr3_Amt4" ~ initial.items_65,
    item.f=="Pr2_Amt5" ~ initial.items_67,
    item.f=="Pr1_Amt6" ~ initial.items_68
  ),
  initial.rank.r=relevel(factor(7-initial.rank), ref = 6),
  N_ind=case_when(
    N==0~0,
    TRUE~1)
  )%>%
  select(-c(starts_with("initial.items_")))


## DISTANCE ##

Distance_Prob.cleanup.df <- get_DROPT_distance(
  data = RankProcess_Prob,
  items = c("49", "50", "64", "65", "67", "68"),
  item_labels = c("Pr6_Amt1", "Pr5_Amt2", "Pr4_Amt3", "Pr3_Amt4", "Pr2_Amt5", "Pr1_Amt6"),
  condition_label = "Prob"
)

## ORDER ##

touch_order_analysis.long_Prob <- get_DROPT_order(
  data = RankProcess_Prob,
  dat_long = dat_long1,
  initial_data = initial.dat_prob,
  item_labels = c("Pr6_Amt1", "Pr5_Amt2", "Pr4_Amt3", "Pr3_Amt4", "Pr2_Amt5", "Pr1_Amt6"),
  item_codes = c("49", "50", "64", "65", "67", "68"),
  condition_value = "Prob"
)
### PREFERENCE 1 ###

## COUNT ##

result_prefer1 <- get_DROPT_count(
  dat = dat,
  rank_col = "RankProcess_Prefer1",
  rank_all_col = "RankProcess_all_Prefer1",
  item_numbers = c(49, 50, 64, 65, 67, 68),
  item_labels = c("Pr6_Amt1", "Pr5_Amt2", "Pr4_Amt3", "Pr3_Amt4", "Pr2_Amt5", "Pr1_Amt6"),
  condition = "Prefer1",
  initial_dat = initial.dat_pref1,
  dat_long = dat_long1
)

# Extract outputs as needed
RankProcess_Prefer1 <- result_prefer1$cleaned_data
bug_respondent_Prefer1 <- result_prefer1$bug_respondent
na_subj_Prefer1 <- result_prefer1$na_subj
drag_and_drop_count_Prefer1 <- result_prefer1$drag_and_drop_count
drag_and_drop_count_Prefer1_long <- result_prefer1$drag_and_drop_count_long

## DISTANCE ##

Distance_pref1.cleanup.df <- get_DROPT_distance(
  data = RankProcess_Prefer1,
  items = c("49", "50", "64", "65", "67", "68"),
  item_labels = c("Pr6_Amt1", "Pr5_Amt2", "Pr4_Amt3", "Pr3_Amt4", "Pr2_Amt5", "Pr1_Amt6"),
  condition_label = "Pref1"
)

## ORDER ##

touch_order_analysis.long_pref1 <- get_DROPT_order(
  data = RankProcess_Prefer1,
  dat_long = dat_long1,
  initial_data = initial.dat_pref1,
  item_labels = c("Pr6_Amt1", "Pr5_Amt2", "Pr4_Amt3", "Pr3_Amt4", "Pr2_Amt5", "Pr1_Amt6"),
  item_codes = c("49", "50", "64", "65", "67", "68"),
  condition_value = "Pref1"
)
### PREFERENCE 2 ###

result_prefer2 <- get_DROPT_count(
  dat = dat,
  rank_col = "RankProcess_Prefer2",
  rank_all_col = "RankProcess_all_Prefer2",
  item_numbers = c(49, 50, 64, 65, 67, 68),
  item_labels = c("Pr6_Amt1", "Pr5_Amt2", "Pr4_Amt3", "Pr3_Amt4", "Pr2_Amt5", "Pr1_Amt6"),
  condition = "Prefer2",
  initial_dat = initial.dat_pref2,
  dat_long = dat_long2
)

# Extract outputs as needed
RankProcess_Prefer2 <- result_prefer2$cleaned_data
bug_respondent_Prefer2 <- result_prefer2$bug_respondent
na_subj_Prefer2 <- result_prefer2$na_subj
drag_and_drop_count_Prefer2 <- result_prefer2$drag_and_drop_count
drag_and_drop_count_Prefer2_long <- result_prefer2$drag_and_drop_count_long

## DISTANCE ##

Distance_pref2.cleanup.df <- get_DROPT_distance(
  data = RankProcess_Prefer2,
  items = c("49", "50", "64", "65", "67", "68"),
  item_labels = c("Pr6_Amt1", "Pr5_Amt2", "Pr4_Amt3", "Pr3_Amt4", "Pr2_Amt5", "Pr1_Amt6"),
  condition_label = "Pref2"
)

## ORDER ##

touch_order_analysis.long_pref2 <- get_DROPT_order(
  data = RankProcess_Prefer2,
  dat_long = dat_long2,
  initial_data = initial.dat_pref2,
  item_labels = c("Pr6_Amt1", "Pr5_Amt2", "Pr4_Amt3", "Pr3_Amt4", "Pr2_Amt5", "Pr1_Amt6"),
  item_codes = c("49", "50", "64", "65", "67", "68"),
  condition_value = "Pref2"
)
### Combine Preference Rankings into a Master df, which is in the long format ###

# load function
make.rm <-
  function (constant, repeated, data, contrasts) 
  {
    if (!missing(constant) && is.vector(constant)) {
      if (!missing(repeated) && is.vector(repeated)) {
        if (!missing(data)) {
          dd <- dim(data)
          replen <- length(repeated)
          if (missing(contrasts)) 
            contrasts <- ordered(sapply(paste("T", 1:length(repeated), 
                                              sep = ""), rep, dd[1]))
          else contrasts <- matrix(sapply(contrasts, rep, 
                                          dd[1]), ncol = dim(contrasts)[2])
          if (length(constant) == 1) 
            cons.col <- rep(data[, constant], replen)
          else cons.col <- lapply(data[, constant], rep, 
                                  replen)
          new.df <- data.frame(cons.col, repdat = as.vector(data.matrix(data[, 
                                                                             repeated])), contrasts)
          return(new.df)
        }
      }
    }
    cat("Usage: make.rm(constant, repeated, data [, contrasts])\n")
    cat("\tWhere 'constant' is a vector of indices of non-repeated data and\n")
    cat("\t'repeated' is a vector of indices of the repeated measures data.\n")
  }


# Preselect columns
set1amt_cols <- dat %>%
  select(starts_with("Set1_L") & ends_with("_Amt")) %>%
  colnames()

set1prob_cols <- dat %>%
  select(starts_with("Set1_L") & ends_with("_Prob")) %>%
  colnames()

set1rank_cols <- dat %>%
  select(starts_with("rank_pref1")) %>%
  colnames()

set2amt_cols <- dat %>%
  select(starts_with("Set2_L") & ends_with("_Amt")) %>%
  colnames()

set2prob_cols <- dat %>%
  select(starts_with("Set2_L") & ends_with("_Prob")) %>%
  colnames()

set2rank_cols <- dat %>%
  select(starts_with("rank_pref2")) %>%
  colnames()

amtrank_cols <- dat %>%
  select(starts_with("rank_prob_")) %>%
  colnames()

probrank_cols <- dat %>%
  select(starts_with("rank_amount_")) %>%
  colnames()

# Now call the make.rm function and create the first df
master_df <- make.rm(
  constant = c("ResponseId", "age", "gender_binary", "education_num", "income"),
  repeated = set1amt_cols,
  data = dat
)

# rename repdat column
master_df <- master_df %>%
  rename(set1_amt = "repdat")

# since the order of the other columns is identical, we can just create a new set for all the other columns
set1prob.rm <- make.rm(
  constant = c("ResponseId"),
  repeated = set1prob_cols,
  data = dat
)

set1rank.rm <- make.rm(
  constant = c("ResponseId"),
  repeated = set1rank_cols,
  data = dat
)

set2amt.rm <- make.rm(
  constant = c("ResponseId"),
  repeated = set2amt_cols,
  data = dat
)

set2prob.rm <- make.rm(
  constant = c("ResponseId"),
  repeated = set2prob_cols,
  data = dat
)

set2rank.rm <- make.rm(
  constant = c("ResponseId"),
  repeated = set2rank_cols,
  data = dat
)

amtranks.rm <- make.rm(
  constant = c("ResponseId"),
  repeated = amtrank_cols,
  data = dat
)

probranks.rm <- make.rm(
  constant = c("ResponseId"),
  repeated = probrank_cols,
  data = dat
)

# Add the columns from the other repeated measures data frames

master_df$set1_prob <- set1prob.rm$repdat
master_df$set1_rank <- set1rank.rm$repdat
master_df$set2_amt <- set2amt.rm$repdat
master_df$set2_prob <- set2prob.rm$repdat
master_df$set2_rank <- set2rank.rm$repdat
master_df$amt_subj_rank <- amtranks.rm$repdat
master_df$prob_subj_rank <- probranks.rm$repdat

# Add labels for the bets
master_df$bet_label[master_df$contrasts == "T1"] <- "Pr6_Amt1"
master_df$bet_label[master_df$contrasts == "T2"] <- "Pr5_Amt2"
master_df$bet_label[master_df$contrasts == "T3"] <- "Pr4_Amt3"
master_df$bet_label[master_df$contrasts == "T4"] <- "Pr3_Amt4"
master_df$bet_label[master_df$contrasts == "T5"] <- "Pr2_Amt5"
master_df$bet_label[master_df$contrasts == "T6"] <- "Pr1_Amt6"

### Finally, add the 3F measures from the data frames created before
# Set 1: Touch count df to add touch count (numeric and binary)
master_df <- master_df %>%
  left_join(
    drag_and_drop_count_Prefer1_long %>%
      select(ResponseId, item.f, N, N_ind) %>%
      rename(
        bet_label = item.f,
        set1_touch_count = N,
        set1_touch_count_binary = N_ind
      ),
    by = c("ResponseId", "bet_label")
  )

# Set 1: Touch order (order and max order)
master_df <- master_df %>%
  left_join(
    touch_order_analysis.long_pref1 %>%
      select(ResponseId, item.f, order, max_order) %>%
      rename(
        bet_label = item.f,
        set1_order = order,
        set1_max_order = max_order
      ),
    by = c("ResponseId", "bet_label")
  )


# Set 1: Drag distance
master_df <- master_df %>%
  left_join(
    Distance_pref1.cleanup.df %>%
      select(ResponseId, item.f, distance.r) %>%
      rename(
        bet_label = item.f,
        set1_drag_distance.r = distance.r
      ),
    by = c("ResponseId", "bet_label")
  )

# Set 1: initial order
# first, create a tmp df.
dat_tmp1 <- dat %>%
  select(ResponseId, RankProcess_Prefer1)

dat_extracted1 <- dat_tmp1 %>%
  mutate(
    item_string = str_extract(RankProcess_Prefer1, "\\{[^}]*\\}"),  # get first curly bracket contents
    item_string = str_remove_all(item_string, "\\{|\\}"),           # remove braces
    item_string = str_split(item_string, ";") %>% 
                  sapply(function(x) x[2]),                          # get text after semicolon
    item_numbers = str_split(item_string, ",")                      # split into vector of numbers
  )

dat_tmp1_long <- dat_extracted1 %>%
  select(ResponseId, item_numbers) %>%
  unnest(item_numbers) %>%
  group_by(ResponseId) %>%
  mutate(
    item_number = as.integer(str_trim(item_numbers)),
    set1_initial_order = row_number(),
    bet_label = case_when(
      item_number == 49 ~ "Pr6_Amt1",
      item_number == 50 ~ "Pr5_Amt2",
      item_number == 64 ~ "Pr4_Amt3",
      item_number == 65 ~ "Pr3_Amt4",
      item_number == 67 ~ "Pr2_Amt5",
      item_number == 68 ~ "Pr1_Amt6",
      TRUE ~ NA_character_
    )
  ) %>%
  select(ResponseId, item_number, set1_initial_order, bet_label) %>%
  ungroup()

# merge with master_df
master_df <- master_df %>%
  left_join(
    dat_tmp1_long %>%
      select(ResponseId, bet_label, set1_initial_order),
    by = c("ResponseId", "bet_label")
  )

### Set 2 3F
# Set 2: Touch count df to add touch count (numeric and binary)
master_df <- master_df %>%
  left_join(
    drag_and_drop_count_Prefer2_long %>%
      select(ResponseId, item.f, N, N_ind) %>%
      rename(
        bet_label = item.f,
        set2_touch_count = N,
        set2_touch_count_binary = N_ind
      ),
    by = c("ResponseId", "bet_label")
  )

# Set 2: Touch order (order and max order)
master_df <- master_df %>%
  left_join(
    touch_order_analysis.long_pref2 %>%
      select(ResponseId, item.f, order, max_order) %>%
      rename(
        bet_label = item.f,
        set2_order = order,
        set2_max_order = max_order
      ),
    by = c("ResponseId", "bet_label")
  )


# Set 2: Drag distance
master_df <- master_df %>%
  left_join(
    Distance_pref2.cleanup.df %>%
      select(ResponseId, item.f, distance.r) %>%
      rename(
        bet_label = item.f,
        set2_drag_distance.r = distance.r
      ),
    by = c("ResponseId", "bet_label")
  )

# Set 2: initial order
# first, create a tmp df.
dat_tmp2 <- dat %>%
  select(ResponseId, RankProcess_Prefer2)

dat_extracted2 <- dat_tmp2 %>%
  mutate(
    item_string = str_extract(RankProcess_Prefer2, "\\{[^}]*\\}"),  # get first curly bracket contents
    item_string = str_remove_all(item_string, "\\{|\\}"),           # remove braces
    item_string = str_split(item_string, ";") %>% 
                  sapply(function(x) x[2]),                          # get text after semicolon
    item_numbers = str_split(item_string, ",")                      # split into vector of numbers
  )

dat_tmp2_long <- dat_extracted2 %>%
  select(ResponseId, item_numbers) %>%
  unnest(item_numbers) %>%
  group_by(ResponseId) %>%
  mutate(
    item_number = as.integer(str_trim(item_numbers)),
    set2_initial_order = row_number(),
    bet_label = case_when(
      item_number == 49 ~ "Pr6_Amt1",
      item_number == 50 ~ "Pr5_Amt2",
      item_number == 64 ~ "Pr4_Amt3",
      item_number == 65 ~ "Pr3_Amt4",
      item_number == 67 ~ "Pr2_Amt5",
      item_number == 68 ~ "Pr1_Amt6",
      TRUE ~ NA_character_
    )
  ) %>%
  select(ResponseId, item_number, set2_initial_order, bet_label) %>%
  ungroup()

# merge with master_df
master_df <- master_df %>%
  left_join(
    dat_tmp2_long %>%
      select(ResponseId, bet_label, set2_initial_order),
    by = c("ResponseId", "bet_label")
  )

# add centered values for amount and probability
master_df$set1_amt.c <- scale(master_df$set1_amt)
master_df$set2_amt.c <- scale(master_df$set2_amt)
master_df$set1_prob.c <- scale(master_df$set1_prob)
master_df$set2_prob.c <- scale(master_df$set2_prob)
### PREP: Binary Data ###

binary_cols <- dat %>%
  select(starts_with("Binary")) %>%
  colnames()

# Now call the make.rm function and create the first df
binary.rm <- make.rm(
  constant = c("ResponseId", "age", "gender_binary", "education_num", "income"),
  repeated = binary_cols,
  data = dat
)

# rename repdat column
binary.rm <- binary.rm %>%
  rename(binary_choice = "repdat")

binary.rm$bet_number[binary.rm$contrasts == "T1"] <- "Bet1"
binary.rm$bet_number[binary.rm$contrasts == "T2"] <- "Bet2"
binary.rm$bet_number[binary.rm$contrasts == "T3"] <- "Bet3"
binary.rm$bet_number[binary.rm$contrasts == "T4"] <- "Bet4"
binary.rm$bet_number[binary.rm$contrasts == "T5"] <- "Bet5"

binary.rm$Chose_P_Bet <- ifelse(binary.rm$binary_choice == 2, 1, 0)

Data Recording Check

Sometimes, recording errors occur using the DROP-T Method. The code below shows how many observations are “thrown out” and why.

  • Bug Respondents: Bug respondents have a recording error in the RankProcess_all column. For instance, they have a comma at the end of a string, which shouldn’t happen
  • NA Respondents: After coding, these respondents have errors in the RankProcess column, such as two commas after one another.
# Amount
n_bug_amount <- length(bug_respondent_Amount)
n_na_amount  <- length(na_subj_Amount)

# Probability
n_bug_prob <- length(bug_respondent_Prob)
n_na_prob  <- length(na_subj_Prob)

# Preference 1
n_bug_pref1 <- length(bug_respondent_Prefer1)
n_na_pref1  <- length(na_subj_Prefer1)

# Preference 2
n_bug_pref2 <- length(bug_respondent_Prefer2)
n_na_pref2  <- length(na_subj_Prefer2)

# Table
bug_table <- data.frame(
  Task = c("Amount", "Probability", "Preference 1", "Preference 2"),
  Bug = c(n_bug_amount, n_bug_prob, n_bug_pref1, n_bug_pref2),
  `NA` = c(n_na_amount,  n_na_prob,  n_na_pref1,  n_na_pref2)
)

print(bug_table)
##           Task Bug NA.
## 1       Amount   1   5
## 2  Probability   2   7
## 3 Preference 1   2   7
## 4 Preference 2   4   5

Descriptive Statistics of Respondent Effort in the Task

Drag-and-drop count

Definition: The number of drag-and-drop actions (recorded with the RankCount) - only counts when the a drag-and-drop action results in an order change

### plot ###

plot_quiz <- function(data, column, title, item_n) {
  n_obs <- nrow(data)  # Total number of rows
  mean_val <- mean(data[[column]], na.rm = TRUE)  # Calculate mean
  median_val <- median(data[[column]], na.rm = TRUE)  # Calculate median
  subtitle_text <- paste0("Item.N = ", item_n, "; Subj.N = ", n_obs,
                          ", Mean = ", round(mean_val, 2),
                          ", Median = ", round(median_val, 2))
  
  ggplot(data, aes(x = "", y = !!sym(column))) + 
    geom_violin(fill = "lightblue", alpha = 0.5) + 
    geom_boxplot(width = 0.1, color = "black", alpha = 0.8) + 
    geom_jitter(width = 0.1, size = 1.5, color = "black", alpha = 0.6)+ 
    theme_minimal() + 
    labs(
      title = title,
      subtitle = subtitle_text,
      x = "",
      y = "Drag count"
    ) + 
    theme(plot.title = element_text(hjust = 0.5, face="bold"), plot.subtitle = element_text(hjust = 0.5)) +
    scale_y_continuous(breaks = seq(0, 10, by = 1), limits = c(0, 10))
}

# Create plots for each quiz

Quiz_WarmUp <- plot_quiz(dat, "RankCount_WarmUp", "Warm Up", item_n = 5)
Quiz_Prob  <- plot_quiz(dat, "RankCount_Prob", "Probability", item_n = 6)
Quiz_Amt <- plot_quiz(dat, "RankCount_Amount", "Amount", item_n = 6)
Quiz_pref1  <- plot_quiz(dat, "RankCount_Prefer1", "Preference (1st)", item_n = 6)
Quiz_Pref2  <- plot_quiz(dat, "RankCount_Prefer2", "Preference (2nd)", item_n = 6)

# Combine all plots into one graph
combined_plot <- (Quiz_WarmUp | Quiz_Prob | Quiz_Amt | Quiz_pref1| Quiz_Pref2)
combined_plot

Correlation between initial and final rank

  • RankProcess data format: {0, initial rank order}{timestamp, new rank order}, etc.
  • low correlations between initial and final rank for all 6 items across quiz conditions.
summary_stats <- all_results %>%
  group_by(task) %>%
  summarise(
    Mean = round(mean(correlation, na.rm = TRUE), 2),
    Median = round(median(correlation, na.rm = TRUE), 2),
    N = n()
  )

# summary_stats

all_results$task <- as.factor(all_results$task)
all_results$task <- factor(all_results$task, levels = c("amount", "prob", "pref1", "pref2"))



combined_plot <- ggplot(all_results, aes(x = task, y = correlation)) +
  geom_violin(fill = "lightblue", alpha = 0.5) +
  geom_jitter(aes(color = sig), width = 0.1, size = 1.5, alpha = 0.6) +
  scale_color_manual(
    values = c("TRUE" = "red", "FALSE" = "black"),
    labels = c("ns.", "p<.05"),
    name = "p value"
  ) +
  theme_minimal() +
  labs(
    title = " Correlations for Initial and Final Ranks Across Tasks",
        subtitle = paste(
      " Color Task: Mean =", summary_stats$Mean[1], ", Median =", summary_stats$Median[1], ", N =", summary_stats$N[1], "\n",
      " Prob Task: Mean =", summary_stats$Mean[2], ", Median =", summary_stats$Median[2], ", N =", summary_stats$N[2], "\n"
    ),
    x = "Task",
    y = "Correlation"
  ) +
  theme(
    plot.title = element_text(hjust = 0.5),
    plot.subtitle = element_text(hjust = 0.5)
  )

# Print the plot
print(combined_plot)

Rank Quiz Page Duration (seconds)

# each extract a dataset for each task and then do the psych mean thing

summarize_task <- function(data, column_name, task_name) {
  data %>%
    summarise(
      Task = task_name,
      Mean_t = mean(.data[[column_name]], na.rm = TRUE),
      Median_t = median(.data[[column_name]], na.rm = TRUE),
      SD = sd(.data[[column_name]], na.rm = TRUE),
      Min = min(.data[[column_name]], na.rm = TRUE),
      Max = max(.data[[column_name]], na.rm = TRUE),
      N = sum(!is.na(.data[[column_name]]))
    )
}

# Apply the function to each dataset


summary_warmup <- summarize_task(dat, "t_warmup_Page.Submit", "WarmUp")
summary_prob <- summarize_task(dat, "t_rank_prob_Page.Submit", "Probability")
summary_amt <- summarize_task(dat, "t_rank_amount_Page.Submit", "Amount")
summary_pref1 <- summarize_task(dat, "t_rank_pref1_Page.Submit", "Preference 1ST")
summary_pref2 <- summarize_task(dat, "t_rank_pref2_Page.Submit", "Preference 2ND")
# summary_Binary <- summarize_task(dat, "t_Prob_Page.Submit", "Prob")



# Combine all summaries into one table
all_summaries <- bind_rows(summary_warmup,summary_prob, summary_amt, summary_pref1,summary_pref2 )


# t.test(dat$t_Prob_Page.Submit,dat$rank_color_t_Page.Submit)
all_summaries

Preliminary Checks

Jitter Randomization Check

1ST Preference Ranking Task

# --- True lottery values and jitter ranges ---
prob <- c(5, 9, 17, 29, 54, 94)
amount <- c(56.7, 31.5, 17.5, 9.7, 5.4, 2.9)

bounded_jitter <- list(
  prob = c(2, 2, 6, 6, 18, 5),
  amt  = c(15.95, 9.25, 4.75, 3.05, 1.25, 1.25)
)

# --- Mapping from Set1_L1–L6 to correct lottery ---
lottery_mapping <- tibble(
  input_lottery = 1:6,
  lottery = factor(7 - input_lottery)
)

# --- Reshape wide-format jitter data to long format ---
jitter_check <- bind_rows(lapply(1:6, function(i) {
  tibble(
    ResponseId = dat$ResponseId,
    input_lottery = i,
    prob = dat[[paste0("Set1_L", i, "_Prob")]],
    amount = dat[[paste0("Set1_L", i, "_Amt")]]
  )
})) %>%
  left_join(lottery_mapping, by = "input_lottery") %>%
  select(ResponseId, lottery, prob, amount) %>%
  filter(!is.na(prob))

# --- Jitter bounds and true values per lottery ---
jitter_bounds <- tibble(
  lottery = factor(1:6),
  prob_min = prob - bounded_jitter$prob,
  prob_max = prob + bounded_jitter$prob,
  amt_min  = amount - bounded_jitter$amt,
  amt_max  = amount + bounded_jitter$amt,
  true_prob = prob,
  true_amount = amount
)

# --- Plot: Probability ---
ggplot(jitter_check, aes(x = lottery, y = prob)) +
  geom_jitter(width = 0.1, alpha = 0.4, size = 1, color = "black") +
  geom_point(data = jitter_bounds, aes(y = true_prob), color = "red", size = 3) +
  geom_linerange(data = jitter_bounds, aes(ymin = prob_min, ymax = prob_max), color = "red", size = 0.8) +
  theme_minimal(base_size = 13) +
  labs(
    title = "Probability of Win per Lottery",
    subtitle = "Black = actual shown values; Red line = intended jitter range",
    x = "Lottery ID", y = "Probability (%)"
  )

# --- Plot: Amount ---
ggplot(jitter_check, aes(x = lottery, y = amount)) +
  geom_jitter(width = 0.1, alpha = 0.4, size = 1, color = "black") +
  geom_point(data = jitter_bounds, aes(y = true_amount), color = "red", size = 3) +
  geom_linerange(data = jitter_bounds, aes(ymin = amt_min, ymax = amt_max), color = "red", size = 0.8) +
  theme_minimal(base_size = 13) +
  labs(
    title = "Amount to Win per Lottery",
    subtitle = "Black = actual shown values; Red line = intended jitter range",
    x = "Lottery ID", y = "Amount ($)"
  )

# --- Flag violations ---
jitter_check_flagged <- jitter_check %>%
  left_join(jitter_bounds, by = "lottery") %>%
  mutate(
    prob_out_of_range = prob < prob_min | prob > prob_max,
    amount_out_of_range = amount < amt_min | amount > amt_max
  ) %>%
  select(
    ResponseId, lottery,
    prob, prob_min, prob_max, prob_out_of_range,
    amount, amt_min, amt_max, amount_out_of_range,
    true_prob, true_amount
  )

jitter_check_flagged%>%
  filter(prob_out_of_range=="TRUE"|amount_out_of_range=="TRUE") #empty table - good!
# --- Compute EV and summary ---
jitter_check <- jitter_check %>%
  mutate(ev = (prob / 100) * amount)

ev_summary <- jitter_check %>%
  group_by(lottery) %>%
  summarise(ev_mean = mean(ev, na.rm = TRUE))

# --- Plot: Expected Value with reference line ---
ggplot(jitter_check, aes(x = lottery, y = ev)) +
  geom_jitter(width = 0.1, alpha = 0.4, size = 1, color = "black") +
  geom_point(data = ev_summary, aes(x = lottery, y = ev_mean), color = "red", size = 3) +
  geom_hline(yintercept = 2.835, linetype = "dashed", color = "blue", linewidth = 1) +
  theme_minimal(base_size = 13) +
  labs(
    title = "Expected Value per Lottery",
    subtitle = "Black = individual EVs; Red = mean EV; Blue dashed = intended EV = 2.835",
    x = "Lottery ID", y = "Expected Value ($)"
  )

ev_sd_by_response <- jitter_check %>%
  group_by(ResponseId) %>%
  summarize(ev_sd = sd(ev, na.rm = TRUE)) 
ggplot(ev_sd_by_response, aes(x = ev_sd)) +
  geom_histogram(binwidth = 0.2, fill = "steelblue", color = "white") +
  labs(
    title = "Distribution of SD of EV Within ResponseId",
    x = "Standard Deviation of EV",
    y = "Count of Participants"
  ) +
  theme_minimal()

2ND Preference Ranking Task

# --- Step 0: True values ---

prob <- c(3, 6, 15, 31, 63, 84)
amount <- c(93.4, 47.7, 18.7, 9.1, 4.4, 3.4)

# BOUNDED jitter ranges used for each lottery (for visual comparison)
bounded_jitter <- list(
  prob = c(2, 1, 8, 8, 11, 10),
  amt  = c(22, 23.7, 5.3, 4.3, 0.4, 0.4)
)

# --- Step 1: Reverse mapping: Set1_L1 = lottery 6, ..., Set1_L6 = lottery 1 ---
lottery_mapping <- tibble(
  input_lottery = 1:6,
  lottery = factor(7 - input_lottery)  # Reverse order
)

# --- Step 2: Reshape wide-format data into long-format jitter_check ---
jitter_check <- bind_rows(lapply(1:6, function(i) {
  tibble(
    ResponseId = dat$ResponseId,
    input_lottery = i,
    prob = dat[[paste0("Set2_L", i, "_Prob")]],
    amount = dat[[paste0("Set2_L", i, "_Amt")]]
  )
})) %>%
  left_join(lottery_mapping, by = "input_lottery") %>%
  select(ResponseId, lottery, prob, amount) %>%
  filter(!is.na(prob))

# --- Step 3: Jitter bounds and true value per lottery ---
jitter_bounds <- tibble(
  lottery = factor(1:6),
  prob = c(3, 6, 15, 31, 63, 84),
  prob_min = prob - bounded_jitter$prob,
  prob_max = prob + bounded_jitter$prob,
  amount = c(93.4, 47.7, 18.7, 9.1, 4.4, 3.4),
  amt_min = amount - bounded_jitter$amt,
  amt_max = amount + bounded_jitter$amt
)

# -------------------------------
# Plot 1: Probability (dots + jitter range + true center)
# -------------------------------
ggplot(jitter_check, aes(x = lottery, y = prob)) +
  geom_jitter(width = 0.1, alpha = 0.4, size = 1, color = "black") +  # actual shown values
  geom_point(data = jitter_bounds, aes(y = prob), color = "red", size = 3) +  # true center
  geom_linerange(data = jitter_bounds, aes(ymin = prob_min, ymax = prob_max), color = "red", size = 0.8) +  # expected jitter range
  theme_minimal(base_size = 13) +
  labs(
    title = "Probability of Win per Lottery",
    subtitle = "Black = actual shown values;  Red line = intended jitter range",
    x = "Lottery ID", y = "Probability (%)"
  )

# -------------------------------
# Plot 2: Amount (dots + jitter range + true center)
# -------------------------------
ggplot(jitter_check, aes(x = lottery, y = amount)) +
  geom_jitter(width = 0.1, alpha = 0.4, size = 1, color = "black") +
  geom_point(data = jitter_bounds, aes(y = amount), color = "red", size = 3) +
  geom_linerange(data = jitter_bounds, aes(ymin = amt_min, ymax = amt_max), color = "red", size = 0.8) +
  theme_minimal(base_size = 13) +
  labs(
    title = "Amount to Win per Lottery",
    subtitle = "Black = actual shown values; RRed line = intended jitter range",
    x = "Lottery ID", y = "Amount ($)"
  )

jitter_bounds_renamed <- jitter_bounds %>%
  rename(
    true_prob = prob,
    true_amount = amount
  )

jitter_check_flagged<-jitter_check %>%
  left_join(jitter_bounds_renamed, by = "lottery") %>%
  mutate(
    prob_out_of_range = prob < prob_min | prob > prob_max,
    amount_out_of_range = amount < amt_min | amount > amt_max
  ) %>%
  select(
    ResponseId, lottery,
    prob, prob_min, prob_max, prob_out_of_range,
    amount, amt_min, amt_max, amount_out_of_range,
    true_prob, true_amount
  )
jitter_check_flagged%>%
  filter(prob_out_of_range=="TRUE"|amount_out_of_range=="TRUE")
jitter_check <- jitter_check %>%
  mutate(ev = (prob / 100) * amount)
cor(jitter_check$ev,jitter_check$prob)
## [1] 0.08175302
ev_summary <- jitter_check %>%
  group_by(lottery) %>%
  summarise(ev_mean = mean(ev, na.rm = TRUE))
ggplot(jitter_check, aes(x = lottery, y = ev)) +
  geom_jitter(width = 0.1, alpha = 0.4, size = 1, color = "black") +
  geom_point(data = ev_summary, aes(x = lottery, y = ev_mean), color = "red", size = 3) +
  geom_hline(yintercept = 2.835, linetype = "dashed", color = "blue", linewidth = 1) +
  theme_minimal(base_size = 13) +
  labs(
    title = "Expected Value per Lottery",
    subtitle = "Black = individual EVs; Red = mean EV; Blue dashed = intended EV = 2.835",
    x = "Lottery ID", y = "Expected Value ($)"
  )

ev_sd_by_response <- jitter_check %>%
  group_by(ResponseId) %>%
  summarize(ev_sd = sd(ev, na.rm = TRUE)) 
ggplot(ev_sd_by_response, aes(x = ev_sd)) +
  geom_histogram(binwidth = 0.2, fill = "steelblue", color = "white") +
  labs(
    title = "Distribution of SD of EV Within ResponseId",
    x = "Standard Deviation of EV",
    y = "Count of Participants"
  ) +
  theme_minimal()

Warm Up Task

  • The majority of participants passed the warm-up test on their first attempt.
  • Reminder: Each participant is given up to 3 attempts to pass.
ggplot(dat, aes(x = factor(WarmUpAttempt_N+1))) +
  geom_bar(fill = "steelblue", color = "black", alpha = 0.8) +
  scale_x_discrete(limits = as.character(1:3)) +
  labs(
    title = "Distribution of Warm-Up Attempts",
    x = "Number of Warm-Up Attempts",
    y = "Count"
  ) +
  theme_minimal() +
  theme(
    plot.title = element_text(hjust = 0.5),
    axis.text = element_text(size = 12),
    axis.title = element_text(size = 13)
  )

Attention and Dosage Question

  • ATTN: I work 28 hours a day (TRUE/FALSE/UNSURE)
  • DOSE: After participants completed ranking by probability, amount, and how much they like each lottery, we asked them to identify/recall the tasks they had previously ranked
dat_long2 <- dat %>%
  pivot_longer(cols = c(dose.coded, attn1.coded), names_to = "Question", values_to = "Response")

ggplot(dat_long2, aes(x = Response, fill = Question)) +
  geom_bar(position = "dodge") +  # Bar plot using counts
  geom_text(stat = "count", aes(label = after_stat(count)), vjust = -0.5, size = 5) +  # Add count labels
  facet_wrap(~Question, scales = "free_x") +  # Separate plots for dose.coded and attn1.coded
  labs(x = "Response", y = "Count", title = "Count of Correct & Incorrect Responses") +
  theme_bw() +
  ylim(0, 200)  # Set y-axis limit

dose.wrong.subj<-dat%>%filter(dose.coded=="Incorrect")%>%pull(ResponseId)

# Display the actual things people select

Belief in Bonus

After the second ranking by “how much you like each lottery” question, we asked participants “Do you believe that you have the chance to win a bonus in the task you just completed?”

ggplot(dat%>%filter(!is.na(bonus_belief)), aes(x = bonus_belief)) +
  geom_bar() +  # Use counts instead of proportions
  geom_text(stat = "count", aes(label = after_stat(count)), vjust = -0.5, size = 5) +  # Add count labels
  labs(x = "Belief in Getting Bonus", y = "Count", title = "Did you believe that your responses in the ranking task you\njust completed will affect your bonus payment?") +
  theme_bw() +
  theme(legend.position = "none") + # Tilt x-axis labels
ylim(c(0,200))

Device Used During the Ranking Quiz

Possible Answers: + Mouse (wired or wireless) + Trackpad (touchpad) + Touchscreen (finger or stylus) + Others

dat$device [dat$device== 1] = 'Mouse (Wired or Wireless)'
dat$device [dat$device== 2] = 'Trackpad (touchpad)'
dat$device [dat$device == 4] = 'Touchscreen (finger or stylus)'
dat$device [dat$device == 3] = 'Other'

dat$device <- factor(dat$device, levels = names(sort(table(dat$device), decreasing = TRUE)))

ggplot(dat%>%filter(!is.na(device)), aes(x = device)) +
  geom_bar() +  # Use counts instead of proportions
  geom_text(stat = "count", aes(label = after_stat(count)), vjust = -0.5, size = 5) +  # Add count labels
  labs(x = "Device Type", y = "Count", title = "Distribution of Ranking Approaches") +
  theme_bw() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1), legend.position = "none") + # Tilt x-axis labels
ylim(c(0,150))

Technical Issues

“Did you run into any technical issue during the survey?”

tech_issues <- sum(dat$technical == 1)

In total, 2 people say they had technical issues.

They say:

  • I was about completing the study at first when the page refreshed and I had to start again.
  • I had a pop up clicked on it and went back again to my work

Study Purpose

“In a few sentences, please explain what you think the purpose of this study is. If you are not sure, please give your best guess.”

# install.packages("reactable")
library(reactable)
# Filter out missing or blank responses
purpose_table <- dat %>%
  filter(!is.na(purpose_open), purpose_open != "") %>%
  select(purpose_open)

# Create interactive table
reactable(purpose_table,
          searchable = TRUE,
          pagination = TRUE,
          defaultPageSize = 10,
          highlight = TRUE,
          bordered = TRUE,
          striped = TRUE,
          columns = list(
          purpose_open = colDef(name = "Open-Ended Responses: Purpose")
          ))

Accuracy in Probability and Payoff Ranking Tasks

Amount Task: 87% (169/194) Tau = 1
Probability condition: 90% (174/194) Tau = 1

# Check how many people per group have a tau of 1
# Summary_data %>%
#   filter(Tau == 1) %>%
#   group_by(Group) %>%
#   summarise(Count = n())

# Create a combined summary data frame for the both tasks. 
Summary_data <- data.frame(
  Tau = c(Summary_data_Prob$Tau, Summary_data_Amount$Tau),
  Group = rep(c("Prob", "Amount"), c(length(Summary_data_Prob$Tau), length(Summary_data_Amount$Tau))),
  ResponseId = c(Summary_data_Prob$ResponseId, Summary_data_Amount$ResponseId)  # Add ResponseId
) %>%
  filter(!duplicated(paste(ResponseId,Group)))

# now, compute mean values for the combined df
mean_values <- Summary_data %>%
  group_by(Group) %>%
  dplyr::summarize(mean_Tau = mean(Tau, na.rm = TRUE))

# Create violin plots for the combined df
ggplot(Summary_data, aes(x = Group, y = Tau, fill = Group)) +
  geom_violin(trim = FALSE, alpha = 0.5) +  # Violin plot with transparency
  geom_jitter(width = 0.1, alpha = 0.5, size = 1.5) +  # Add jitter points
  stat_summary(fun = mean, geom = "point", shape = 23, size = 4, fill = "white") +  # Show mean as point
  geom_text(data = mean_values, aes(x = Group, y = mean_Tau, label = sprintf("%.2f", mean_Tau)),
            hjust=2, fontface = "bold", size = 5, Amount = "black") +  # Add mean text labels
  scale_fill_manual(values = c("steelblue", "darkorange")) +  # Custom Amounts
  labs(
       x = "Condition",
       y = "Tau") +
  theme_minimal(base_size = 14) +
  theme(legend.position = "none",  # Remove redundant legend
        axis.title = element_text(face = "bold"),
        axis.text = element_text(face = "bold"))

It appears that in the Amount Condition, several people misunderstood the task (and ranked the items the other way around), dragging the average tau down more than in the Prob task.

Move Direction

# Amount
n_up_amount <- Distance_Amount.cleanup.df %>%
                        filter(distance.r>0) %>%
                        nrow()
n_down_amount  <- Distance_Amount.cleanup.df %>%
                        filter(distance.r<0) %>%
                        nrow()
n_none_amount  <- Distance_Amount.cleanup.df %>%
                        filter(distance.r==0) %>%
                        nrow()

# Probability
n_up_prob <- Distance_Prob.cleanup.df %>%
                        filter(distance.r>0) %>%
                        nrow()
n_down_prob  <- Distance_Prob.cleanup.df %>%
                        filter(distance.r<0) %>%
                        nrow()
n_none_prob  <- Distance_Prob.cleanup.df %>%
                        filter(distance.r==0) %>%
                        nrow()

# Preference 1
n_up_pref1 <- Distance_pref1.cleanup.df %>%
                        filter(distance.r>0) %>%
                        nrow()
n_down_pref1  <- Distance_pref1.cleanup.df %>%
                        filter(distance.r<0) %>%
                        nrow()
n_none_pref1  <- Distance_pref1.cleanup.df %>%
                        filter(distance.r==0) %>%
                        nrow()

# Preference 2
n_up_pref2 <- Distance_pref2.cleanup.df %>%
                        filter(distance.r>0) %>%
                        nrow()
n_down_pref2  <- Distance_pref2.cleanup.df %>%
                        filter(distance.r<0) %>%
                        nrow()
n_none_pref2  <- Distance_pref2.cleanup.df %>%
                        filter(distance.r==0) %>%
                        nrow()

# Table
moving_table <- data.frame(
  Task = c("Amount", "Probability", "Preference 1", "Preference 2"),
  Move_up = c(n_up_amount, n_up_prob, n_up_pref1, n_up_pref2),
  Move_down = c(n_down_amount, n_down_prob, n_down_pref1, n_down_pref2),
  Not_moved = c(n_none_amount, n_none_prob, n_none_pref1, n_none_pref2)
)

print(moving_table)
##           Task Move_up Move_down Not_moved
## 1       Amount     603        45       480
## 2  Probability     623        51       436
## 3 Preference 1     630        56       430
## 4 Preference 2     630        41       445

Ranking by Amount and Probability: 3F Analysis

In our 3F Hypothesis we argue that participants will rank items scoring higher on an attribute first, more frequently, and further up.

Note on rank coding: Throughout the note book, 6 refers to the highest rank (at the top) and 1 refers to the lowest (at the bottom)

DV1: Drag Count

Distribution of Drag Count by Item

The following set of histograms illustrates the number of times each item is touched.

## Amount: Count the number of times each item was touched
drag_drop_counts_Amount <- drag_and_drop_count_Amount_long %>%
  filter(ResponseId%notin%Amount_tau_less0.5)%>%
  count(item.f,N) %>%
  group_by(item.f)%>%
  mutate(percentage = n / sum(n) * 100,
         condition="Amount")%>%
  ungroup()

## Probability: Count the number of times each item was touched
drag_drop_counts_Prob <- drag_and_drop_count_Prob_long %>%
  filter(ResponseId%notin%Prob_tau_less0.5)%>%
  count(item.f,N) %>%
  group_by(item.f)%>%
  mutate(percentage = n / sum(n) * 100,
         condition="Prob")%>%
  ungroup()

# Combine both counts into one data frame
drag_drop_counts_combined<-rbind(drag_drop_counts_Amount,drag_drop_counts_Prob)

# Plot the combined data frame
ggplot(drag_drop_counts_combined, aes(x = factor(N), y = n)) +
  geom_bar(
    stat = "identity",
    color = "black"
  ) +
  geom_text(
    aes(
      label = paste0(n, " (", round(percentage, 1), "%)")),
    vjust = -0.5,
    size = 5,
    fontface="bold"
  ) +
  labs(
    title = "Drag Count by Item and Quiz Condition",
    x = "Drag Count",
    y = "Frequency"
  ) +
  theme_minimal() +
  theme(
    strip.text = element_text(size = 12, face = "bold"),  # Increased size and bold text
    plot.title = element_text(hjust = 0.5),
    axis.title = element_text(size = 12),  # Adjust axis titles size if needed
    axis.text = element_text(size = 10)    # Adjust axis labels size if needed
  ) +
  facet_wrap(~ item.f * condition,ncol=2) +
  ylim(0, 200)

Model-free visualizations

The following plot illustrates the mean number (with standard errors) of drag counts for each item, grouped by condition. Note: this is based on indicator variables, i.e., whether an item was touched or not.

# Create a summary data frame for AMOUNT; this time, code as binary whether an item was touched or not
summary_data_Amount_ind<- drag_and_drop_count_Amount_long %>%
  filter(ResponseId%notin%Amount_tau_less0.5)%>%
  mutate(N=case_when(
    N==0~0,
    TRUE~1
  ))%>%
  dplyr::group_by(condition, item.f) %>%
  dplyr::summarize(drag_drop_mean = mean(N, na.rm = TRUE),
            drag_drop_sd = sd(N, na.rm = TRUE),
            n = n(),
            se = drag_drop_sd / sqrt(n),
            .groups = "drop")

# Create a summary data frame for PROB; this time, code as binary whether an item was touched or not
summary_data_Prob_ind<- drag_and_drop_count_Prob_long %>%
  filter(ResponseId%notin%Prob_tau_less0.5)%>%
  mutate(N=case_when(
    N==0~0,
    TRUE~1
  ))%>%
  dplyr::group_by(condition, item.f) %>%
  dplyr::summarize(drag_drop_mean = mean(N, na.rm = TRUE),
            drag_drop_sd = sd(N, na.rm = TRUE),
            n = n(),
            se = drag_drop_sd / sqrt(n),
            .groups = "drop")

# Combine the two data frames into one
summary_data_combined_ind <- bind_rows(summary_data_Amount_ind, summary_data_Prob_ind)

# Define colors for the six bets. Logic: Strong green is indicative of the $-Bet; the red color is complementary and signals the focus on probability. Use this color scheme throughout

bet_colors <- c(
  "Pr6_Amt1" = "#d73027",  # Strong red
  "Pr5_Amt2" = "#f46d43",  # Medium red
  "Pr4_Amt3" = "#fdae61",  # Light red (orange-ish)
  "Pr3_Amt4" = "#a6d96a",  # Light green
  "Pr2_Amt5" = "#66bd63",  # Medium green
  "Pr1_Amt6" = "#1a9850"   # Strong green
)

summary_data_combined_ind$item.f = factor(summary_data_combined_ind$item.f, levels = rev(c(  "Pr6_Amt1","Pr5_Amt2", "Pr4_Amt3","Pr3_Amt4", "Pr2_Amt5","Pr1_Amt6")), ordered = TRUE)


# Plot
ggplot(summary_data_combined_ind, aes(x = condition, y = drag_drop_mean, 
                                      group = item.f, color = item.f, shape = item.f)) +
  geom_line(linewidth = 1, position = position_dodge(0.3)) +
  geom_point(size = 6, position = position_dodge(0.3)) +
  geom_errorbar(
    aes(
      ymin = drag_drop_mean - se,
      ymax = drag_drop_mean + se
    ),
    width = 0.2,
    position = position_dodge(0.3)
  ) +
  labs(
    x = "Condition",
    y = "Mean ± SE Drag Count",
    title = "Mean Drag Count by Condition"
  ) +
  scale_color_manual(values = bet_colors) +  
  scale_shape_manual(values = c("Pr6_Amt1" = 21, "Pr5_Amt2" = 22, 
                                "Pr4_Amt3" = 23, "Pr3_Amt4" = 24, 
                                "Pr2_Amt5" = 25, "Pr1_Amt6" = 11)) +
  theme_minimal() +
  theme(
    legend.position = "top", # Place legend at the top
    legend.title = element_text(face = "bold"),
    axis.title = element_text(face = "bold"),
    plot.subtitle = element_text(hjust = 0.5),
    plot.title = element_text(face = "bold", hjust = 0.5)
  )

# Calculate correlation - valid for both dfs
cor_result <- cor.test(dat_long1$Prob, dat_long1$Amt)
cor_estimate <- round(cor_result$estimate, 2)
# Create a summary df for amount for subsequent plotting
summary_data_Amount <- drag_and_drop_count_Amount_long%>%
  dplyr::group_by(condition, item.f) %>%
  dplyr::summarize(drag_mean = mean(N_ind, na.rm = TRUE),
            drag_sd = sd(N_ind, na.rm = TRUE),
            n = n(),
            se = drag_sd / sqrt(n),  # Standard error
            .groups = "drop",
            Avg.Amount=mean(Amt),
            Avg.Prob=mean(Prob))

summary_data_Prob <- drag_and_drop_count_Prob_long%>%
  dplyr::group_by(condition, item.f) %>%
  dplyr::summarize(drag_mean = mean(N_ind, na.rm = TRUE),
            drag_sd = sd(N_ind, na.rm = TRUE),
            n = n(),
            se = drag_sd / sqrt(n),  # Standard error
            .groups = "drop",
            Avg.Amount=mean(Amt),
            Avg.Prob=mean(Prob))
# Create plot with correlation in the caption
ggplot(summary_data_Amount, aes(x = Avg.Amount, y = Avg.Prob, label = item.f)) +
  geom_point(size = 3, color = "black") +
  geom_text(hjust = 0, nudge_x = 1) +  # nudges text to the right
  xlim(min(summary_data_Amount$Avg.Amount), max(summary_data_Amount$Avg.Amount) + 10) +
  theme_minimal() +
  labs(
    title = "Attributes of Lotteries: Ranking by Amount",
    x = "Mean Amount",
    y = "Mean Probability",
    subtitle = paste("Correlation in dat.long: r =", cor_estimate)
  ) +
  theme(
    axis.title = element_text(face = "bold"), 
    plot.subtitle = element_text(hjust = 0.5), 
    plot.title = element_text(face = "bold", hjust = 0.5)
  ) +
  geom_smooth(method = "lm", se = FALSE, color = "blue", linetype = "dashed")

Amount Task

# Create a graph showing the correlation between the amount and mean number of drags per item in the amount task
ggplot(summary_data_Amount, aes(x = Avg.Amount, y = drag_mean, label = item.f)) +
  geom_point(size = 3, color = "black") +
  geom_text(vjust = -1, hjust = 1) +
  theme_minimal() +
  labs(title = "Drag Count and Amount Attribute", subtitle = "Amount Task", x = "Avg. Amt", y = "Avg. Drag Count Indicator") +
  theme(axis.title = element_text(face = "bold"), 
        plot.subtitle = element_text(hjust = 0.5), 
        plot.title = element_text(face = "bold", hjust = 0.5))+
  geom_smooth(method = "lm", se = FALSE, color = "blue", linetype = "dashed") 

# Create a graph showing the correlation between the probability and mean number of drags per item in the probability task
ggplot(summary_data_Amount, aes(x = Avg.Prob, y = drag_mean, label = item.f)) +
  geom_point(size = 3, color = "black") +
  geom_text(vjust = -1, hjust = 1) +
  theme_minimal() +
  labs(title = "Drag Count and Prob Attribute", subtitle = "Prob Task", x = "Avg. Prob", y = "Avg. Drag Count Indicator") +
  theme(axis.title = element_text(face = "bold"), 
        plot.subtitle = element_text(hjust = 0.5), 
        plot.title = element_text(face = "bold", hjust = 0.5))+
  geom_smooth(method = "lm", se = FALSE, color = "blue", linetype = "dashed") 

Prob Task

# Create a graph showing the correlation between the amount and mean number of drags per item in the amount task
ggplot(summary_data_Prob, aes(x = Avg.Amount, y = drag_mean, label = item.f)) +
  geom_point(size = 3, color = "black") +
  geom_text(vjust = -1, hjust = 1) +
  theme_minimal() +
  labs(title = "Drag Count and Amount Attribute", subtitle = "Prob Task", x = "Avg. Amt", y = "Avg. Drag Count Indicator") +
  theme(axis.title = element_text(face = "bold"), 
        plot.subtitle = element_text(hjust = 0.5), 
        plot.title = element_text(face = "bold", hjust = 0.5))+
  geom_smooth(method = "lm", se = FALSE, color = "blue", linetype = "dashed") 

# Create a graph showing the correlation between the probability and mean number of drags per item in the probability task
ggplot(summary_data_Prob, aes(x = Avg.Prob, y = drag_mean, label = item.f)) +
  geom_point(size = 3, color = "black") +
  geom_text(vjust = -1, hjust = 1) +
  theme_minimal() +
  labs(title = "Drag Count and Prob Attribute", subtitle = "Prob Task", x = "Avg. Prob", y = "Avg. Drag Count Indicator") +
  theme(axis.title = element_text(face = "bold"), 
        plot.subtitle = element_text(hjust = 0.5), 
        plot.title = element_text(face = "bold", hjust = 0.5))+
  geom_smooth(method = "lm", se = FALSE, color = "blue", linetype = "dashed") 

Regressions

Nested Models

  • We created nested versions of the rank variables by coding them as follows:
    • When the condition matches the nested variable (e.g., condition == “Amount” for Amount/Prob rank [nested within Amount] ), we used mean centered value. This helps reduce multicollinearity with the condition variable, see Collinearity Check below.
    • When the condition does not match, the value was set to 0.
# Prep for nested model
drag_and_drop_count_long.combined<-rbind(drag_and_drop_count_Amount_long, drag_and_drop_count_Prob_long)%>%
  mutate(Prob.c=Prob-mean(Prob),
         Amount.c=Amt-mean(Amt))

drag_and_drop_count_long.combined<-drag_and_drop_count_long.combined%>%
  mutate(Amt.Nested_Amount=case_when(
    condition == "Amount" ~ Amt,
    condition == "Prob" ~ 0
  ),
  Amt.Nested_Prob=case_when(
    condition == "Amount" ~ 0,
    condition == "Prob" ~  Amt
  ),
  Prob.Nested_Amount=case_when(
    condition == "Amount" ~ Prob,
    condition == "Prob" ~ 0
  ),
  Prob.Nested_Prob=case_when(
    condition == "Amount" ~ 0,
    condition == "Prob" ~ Prob
  ),
  Amt.Nested_Amount.c=case_when(
    condition == "Amount" ~Amount.c,
    condition == "Prob" ~ 0
  ),
  Amt.Nested_Prob.c=case_when(
    condition == "Amount" ~ 0,
    condition == "Prob" ~ Amount.c
  ),
  Prob.Nested_Amount.c=case_when(
    condition == "Amount" ~Prob.c,
    condition == "Prob" ~ 0
  ),
  Prob.Nested_Prob.c=case_when(
    condition == "Amount" ~ 0,
    condition == "Prob" ~ Prob.c
  )
)
M1<-glmer(N_ind~Amt.Nested_Amount.c+Amt.Nested_Prob.c+Prob.Nested_Amount.c+Prob.Nested_Prob.c+condition+(1|ResponseId),
          drag_and_drop_count_long.combined,
          family=binomial)

M2<-glmer(N_ind~Amt.Nested_Amount.c+Amt.Nested_Prob.c+Prob.Nested_Amount.c+Prob.Nested_Prob.c+condition+initial.rank.r+(1|ResponseId),
          drag_and_drop_count_long.combined,
          family=binomial)

tab_model(M1,M2,
          show.se = T,
          show.ci = F,
          show.stat = T,
          pred.labels =
            c("Intercept",
              "Amount [Nested in Amount]",
              "Amount [Nested in Probability]",
              "Probability [Nested in Amount]",
              "Probability [Nested in Probability]",
              "Condition [Probability]",
              "Initial Rank [6]","Initial Rank [5]","Initial Rank [4]","Initial Rank [3]","Initial Rank [2]"),
          dv.labels = 
            c("Drag and Drop Count<br>(coded as binary DV; only Subject RE)",
              "Drag and Drop Count<br>(coded as binary DV; Subject RE and initial Position)"))
  Drag and Drop Count
(coded as binary DV; only Subject RE)
Drag and Drop Count
(coded as binary DV; Subject RE and initial Position)
Predictors Odds Ratios std. Error Statistic p Odds Ratios std. Error Statistic p
Intercept 1.38 0.10 4.55 <0.001 0.07 0.01 -13.95 <0.001
Amount [Nested in Amount] 1.01 0.01 1.63 0.103 1.02 0.01 2.83 0.005
Amount [Nested in Probability] 0.96 0.01 -7.21 <0.001 0.95 0.01 -7.82 <0.001
Probability [Nested in Amount] 0.97 0.00 -8.11 <0.001 0.96 0.00 -9.36 <0.001
Probability [Nested in Probability] 1.01 0.00 2.04 0.041 1.02 0.00 3.60 <0.001
Condition Probability 1.18 0.12 1.63 0.103 1.19 0.15 1.36 0.172
Initial Rank [6] 115.59 31.03 17.70 <0.001
Initial Rank [5] 94.67 24.62 17.50 <0.001
Initial Rank [4] 49.66 12.17 15.94 <0.001
Initial Rank [3] 30.26 7.10 14.54 <0.001
Initial Rank [2] 10.10 2.23 10.49 <0.001
Random Effects
σ2 3.29 3.29
τ00 0.00 ResponseId 0.14 ResponseId
ICC   0.04
N 189 ResponseId 189 ResponseId
Observations 2118 2118
Marginal R2 / Conditional R2 0.215 / NA 0.572 / 0.590

The first model has singularity issues; the second is fine.

Collinearity Checks

Model 1

car::vif(M1)
##  Amt.Nested_Amount.c    Amt.Nested_Prob.c Prob.Nested_Amount.c 
##             1.939544             1.972481             1.928123 
##   Prob.Nested_Prob.c            condition 
##             1.994129             1.032145

Model 2

car::vif(M2)
##                          GVIF Df GVIF^(1/(2*Df))
## Amt.Nested_Amount.c  2.074821  1        1.440424
## Amt.Nested_Prob.c    2.021856  1        1.421920
## Prob.Nested_Amount.c 2.021591  1        1.421827
## Prob.Nested_Prob.c   2.095049  1        1.447429
## condition            1.065344  1        1.032155
## initial.rank.r       1.407624  5        1.034782

DV2: Drag Order

Drag order is the sequence in which items are dragged and dropped. Items that are not dragged are assigned a value of (1 + the total number of dragged items). For example, if a participant moves item A three times, item B twice, and item C once, while items D, E, and F remain untouched, the drag order is: * A = 1, B = 2, C = 3, D = E = F = 4. * This coding approach simplifies cases where an item is touched multiple times (as in the example). However, as seen, it is relatively rare that participants drag the same item repeatedly, justifying this simplification.

touch_order_analysis_Amount<-RankProcess_Amount%>%
  filter(step!=0)%>%
  group_by(ResponseId)%>%
  arrange(step)%>%
  filter(!duplicated(item_moved))%>% # retains only the first instance
  mutate(order=row_number())%>%
  ungroup()%>%
  mutate(condition="Amount")


touch_order_analysis.long_Amount <- expand_grid(
  ResponseId = unique(touch_order_analysis_Amount$ResponseId),
  item.f = unique(touch_order_analysis_Amount$item.f)
)

order_max.SUBJ_Amount<-touch_order_analysis.long_Amount%>%
  left_join(touch_order_analysis_Amount%>%select(ResponseId,item.f,order),by=c("ResponseId","item.f"))%>%
  left_join(touch_order_analysis_Amount%>%select(ResponseId,condition)%>%filter(!duplicated(ResponseId)),by=c("ResponseId"))%>%
  group_by(ResponseId)%>%
  dplyr::summarize(max_order=max(order,na.rm = T))

touch_order_analysis.long_Amount<-touch_order_analysis.long_Amount%>%
  left_join(touch_order_analysis_Amount%>%select(ResponseId,item.f,order),by=c("ResponseId","item.f"))%>%
  left_join(touch_order_analysis_Amount%>%select(ResponseId,condition)%>%filter(!duplicated(ResponseId)),by=c("ResponseId"))%>%left_join(order_max.SUBJ_Amount,by="ResponseId")%>%
  mutate(order = case_when(!is.na(order)~order,
                           TRUE~max_order+1))

touch_order_analysis_Prob<-RankProcess_Prob%>%
  filter(step!=0)%>%
  group_by(ResponseId)%>%
  arrange(step)%>%
  filter(!duplicated(item_moved))%>%
  mutate(order=row_number())%>%
  ungroup()%>%
  mutate(condition="Prob")



touch_order_analysis.long_Prob <- expand_grid(
  ResponseId = unique(touch_order_analysis_Prob$ResponseId),
  item.f = unique(touch_order_analysis_Prob$item.f)
)


order_max.SUBJ_Prob<-touch_order_analysis.long_Prob%>%
  left_join(touch_order_analysis_Prob%>%select(ResponseId,item.f,order),by=c("ResponseId","item.f"))%>%
  left_join(touch_order_analysis_Prob%>%select(ResponseId,condition)%>%filter(!duplicated(ResponseId)),by=c("ResponseId"))%>%
  group_by(ResponseId)%>%
  dplyr::summarize(max_order=max(order,na.rm = T))

touch_order_analysis.long_Prob<-touch_order_analysis.long_Prob%>%
  left_join(touch_order_analysis_Prob%>%select(ResponseId,item.f,order),by=c("ResponseId","item.f"))%>%
  left_join(touch_order_analysis_Prob%>%select(ResponseId,condition)%>%filter(!duplicated(ResponseId)),by=c("ResponseId"))%>%left_join(order_max.SUBJ_Prob,by="ResponseId")%>%
  mutate(order = case_when(!is.na(order)~order,
                           TRUE~max_order+1))



# length(unique(touch_order_analysis.long_A$ResponseId)) #142, good.
# touch_order_analysis.long_A%>%
#   group_by(ResponseId)%>%
#   summarize(n_6=n_distinct(item.f))%>%
#    filter(n_6!=6) # none, good
# psych::describe(drag_order_analysis.long$order) # between 1 and 6, good.

Distribution of Drag Order by Item

touch_order_Amount <- touch_order_analysis.long_Amount %>%
  filter(ResponseId%notin%Amount_tau_less0.5)%>%
  count(item.f,order,condition) %>%
  group_by(item.f)%>%
  mutate(percentage = n / sum(n) * 100)%>%
  ungroup()

touch_order_Prob <- touch_order_analysis.long_Prob %>%
  filter(ResponseId%notin%Prob_tau_less0.5)%>%
  count(item.f,order,condition) %>%
  group_by(item.f)%>%
  mutate(percentage = n / sum(n) * 100)%>%
  ungroup()
touch_order_combined<-rbind(touch_order_Prob,touch_order_Amount)


ggplot(touch_order_combined, aes(x = factor(order), y = n)) +
  geom_bar(
    stat = "identity",
    color = "black"
  ) +
  geom_text(
    aes(
      label = paste0(n, " (", round(percentage, 1), "%)")
    ),
    vjust = -0.5,
    size = 5,
    fontface="bold"
  ) +
  labs(
    title = "Drag Order by item and Condition",
    x = "Drag Order",
    y = "Frequency"
  ) +
  theme_minimal() +
  theme(
    strip.text = element_text(size = 12, face = "bold"),  # Facet label adjustments
    plot.title = element_text(hjust = 0.5, face = "bold"),
    axis.title = element_text(size = 12),
    axis.text = element_text(size = 10)
  ) +
  facet_wrap(~ item.f * condition,ncol = 2) +
  ylim(0, 150)

Distribution of Mean Drag Order

mean_order.subj_Prob <- touch_order_analysis.long_Prob %>%
  filter(ResponseId%notin%Prob_tau_less0.5)%>%
  group_by(ResponseId)%>%
  mutate(mean_order = mean(order),
         condition="Prob")%>%
  ungroup()
mean_order.subj_Amount<- touch_order_analysis.long_Amount %>%
  filter(ResponseId%notin%Amount_tau_less0.5)%>%
  group_by(ResponseId)%>%
  mutate(mean_order = mean(order),
         condition="Amount")%>%
  ungroup()


mean_order.subj_combined<-rbind(mean_order.subj_Amount,mean_order.subj_Prob)

ggplot(mean_order.subj_combined, aes(x = mean_order)) +
  geom_density(fill = "lightblue", color = "black", alpha = 0.5) +
  geom_rug(sides = "b", color = "blue") +  # Rug plot along the bottom (x-axis) for individual data points
  labs(
    title = "Density Plot of Mean Drag Order",
    x = "Mean Drag Order",
    y = "Density"
  ) +
  facet_grid(~condition)

Model-free Visualization

summary_data_Amount<- touch_order_analysis.long_Amount%>%
  dplyr::group_by(condition, item.f) %>%
  dplyr::summarize(order_mean = mean(order, na.rm = TRUE),
            order_sd = sd(order, na.rm = TRUE),
            n = n(),
            se = order_sd / sqrt(n),  # Standard error
            .groups = "drop")


summary_data_Prob <- touch_order_analysis.long_Prob%>%
  dplyr::group_by(condition, item.f) %>%
  dplyr::summarize(order_mean = mean(order, na.rm = TRUE),
            order_sd = sd(order, na.rm = TRUE),
            n = n(),
            se = order_sd / sqrt(n),  # Standard error
            .groups = "drop")


summary_data_combined <- bind_rows(summary_data_Amount, summary_data_Prob)

summary_data_combined_ind$item.f = factor(summary_data_combined_ind$item.f, levels = rev(c(  "Pr6_Amt1","Pr5_Amt2", "Pr4_Amt3","Pr3_Amt4", "Pr2_Amt5","Pr1_Amt6")), ordered = TRUE)



ggplot(summary_data_combined, aes(x = condition, y = order_mean, 
                                      group = item.f, color = item.f, shape = item.f)) +
  geom_line(linewidth = 1, position = position_dodge(0.3)) +
  geom_point(size = 6, position = position_dodge(0.3)) +
  geom_errorbar(
    aes(
      ymin = order_mean - se,
      ymax = order_mean + se
    ),
    width = 0.2,
    position = position_dodge(0.3)
  ) +
  labs(
    x = "Condition",
    y = "Mean ± SE Drag Order",
    title = "Mean Drag Order by Condition"
  ) +
  scale_color_manual(values = bet_colors) +  
  scale_shape_manual(values = c("Pr6_Amt1" = 21, "Pr5_Amt2" = 22, 
                                "Pr4_Amt3" = 23, "Pr3_Amt4" = 24, 
                                "Pr2_Amt5" = 25, "Pr1_Amt6" = 11)) +
  theme_minimal() +
  theme(
    legend.position = "top", # Place legend at the top
    legend.title = element_text(face = "bold"),
    axis.title = element_text(face = "bold"),
    plot.subtitle = element_text(hjust = 0.5),
    plot.title = element_text(face = "bold", hjust = 0.5)
  )

Amount Task

summary_data_Amount <- touch_order_analysis.long_Amount%>%
  dplyr::group_by(condition, item.f) %>%
  dplyr::summarize(order_mean = mean(order, na.rm = TRUE),
            order_sd = sd(order, na.rm = TRUE),
            n = n(),
            se = order_sd / sqrt(n),  # Standard error
            .groups = "drop",
            Avg.Amount=mean(Amt),
            Avg.Prob=mean(Prob))


ggplot(summary_data_Amount, aes(x = Avg.Amount, y = order_mean, label = item.f)) +
  geom_point(size = 3, color = "black") +
  geom_text(vjust = -1, hjust = 1) +
  theme_minimal() +
  labs(title = "Drag Order and Amount Attribute", subtitle = "Amount Task", x = "Avg. Amt", y = "Avg. Drag Order") +
  theme(axis.title = element_text(face = "bold"),
        plot.subtitle = element_text(hjust = 0.5),
        plot.title = element_text(face = "bold", hjust = 0.5))+
  geom_smooth(method = "lm", se = FALSE, color = "blue", linetype = "dashed")

ggplot(summary_data_Amount, aes(x = Avg.Prob, y = order_mean, label = item.f)) +
  geom_point(size = 3, color = "black") +
  geom_text(vjust = -1, hjust = 1) +
  theme_minimal() +
  labs(title = "Drag Order and Prob Attribute", subtitle = "Amount Task", x = "Avg. Prob", y = "Avg. Drag Order") +
  theme(axis.title = element_text(face = "bold"),
        plot.subtitle = element_text(hjust = 0.5),
        plot.title = element_text(face = "bold", hjust = 0.5))+
  geom_smooth(method = "lm", se = FALSE, color = "blue", linetype = "dashed") 

Prob Task

summary_data_Prob <- touch_order_analysis.long_Prob%>%
  dplyr::group_by(condition, item.f) %>%
  dplyr::summarize(order_mean = mean(order, na.rm = TRUE),
            order_sd = sd(order, na.rm = TRUE),
            n = n(),
            se = order_sd / sqrt(n),  # Standard error
            .groups = "drop",
            Avg.Amount=mean(Amt),
            Avg.Prob=mean(Prob))


ggplot(summary_data_Prob, aes(x = Avg.Amount, y = order_mean, label = item.f)) +
  geom_point(size = 3, color = "black") +
  geom_text(vjust = -1, hjust = 1) +
  theme_minimal() +
  labs(title = "Drag Order and Amount Attribute", subtitle = "Prob Task", x = "Avg. Amt", y = "Avg. Drag Order") +
  theme(axis.title = element_text(face = "bold"),
        plot.subtitle = element_text(hjust = 0.5),
        plot.title = element_text(face = "bold", hjust = 0.5))+
  geom_smooth(method = "lm", se = FALSE, color = "blue", linetype = "dashed")

ggplot(summary_data_Prob, aes(x = Avg.Prob, y = order_mean, label = item.f)) +
  geom_point(size = 3, color = "black") +
  geom_text(vjust = -1, hjust = 1) +
  theme_minimal() +
  labs(title = "Drag Order and Prob Attribute", subtitle = "Prob Task", x  = "Avg. Prob", y = "Avg. Drag Order") +
  theme(axis.title = element_text(face = "bold"),
        plot.subtitle = element_text(hjust = 0.5),
        plot.title = element_text(face = "bold", hjust = 0.5))+
  geom_smooth(method = "lm", se = FALSE, color = "blue", linetype = "dashed") 

Regressions

Amount and Prob Attribute values are centered before being entered into the model.

touch_order_analysis.long_Amount$condition<-"Amount"
touch_order_analysis.long_Prob$condition<-"Prob"

touch_order_analysis.long.combined<-rbind(touch_order_analysis.long_Prob, touch_order_analysis.long_Amount)%>%
  mutate(Prob.c=Prob-mean(Prob),
         Amount.c=Amt-mean(Amt))

Nested Models

##### Nested Model
touch_order_analysis.long.combined<-touch_order_analysis.long.combined%>%
  mutate(Amt.Nested_Amount=case_when(
    condition == "Amount" ~Amt,
    condition == "Prob" ~ 0
  ),
  Amt.Nested_Prob=case_when(
    condition == "Amount" ~ 0,
    condition == "Prob" ~ Amt
  ),
  Prob.Nested_Amount=case_when(
    condition == "Amount" ~Prob,
    condition == "Prob" ~ 0
  ),
  Prob.Nested_Prob=case_when(
    condition == "Amount" ~ 0,
    condition == "Prob" ~ Prob
  ),
  Amt.Nested_Amount.c=case_when(
    condition == "Amount" ~Amount.c,
    condition == "Prob" ~ 0
  ),
  Amt.Nested_Prob.c=case_when(
    condition == "Amount" ~ 0,
    condition == "Prob" ~ Amount.c
  ),
  Prob.Nested_Amount.c=case_when(
    condition == "Amount" ~Prob.c,
    condition == "Prob" ~ 0
  ),
  Prob.Nested_Prob.c=case_when(
    condition == "Amount" ~ 0,
    condition == "Prob" ~ Prob.c
  ))

M1<-clmm(factor(order)~Amt.Nested_Amount.c+Amt.Nested_Prob.c+Prob.Nested_Amount.c+Prob.Nested_Prob.c+condition + (1|ResponseId),touch_order_analysis.long.combined)

M2<-clmm(factor(order)~Amt.Nested_Amount.c+Amt.Nested_Prob.c+Prob.Nested_Amount.c+Prob.Nested_Prob.c+condition+initial.rank.r+(1|ResponseId),touch_order_analysis.long.combined)

tab_model(M1,M2,
          show.se = T,
          show.ci = F,
          show.stat = T,
          pred.labels =
            c("1|2","2|3", "3|4", "4|5", "5|6",
              "Amount [Nested in Amount]",
              "Amount [Nested in Probability]",
              "Probability [Nested in Amount]",
              "Probability [Nested in Probability]",
              "Condition [Probability]",
              "Initial Rank [6]","Initial Rank [5]","Initial Rank [4]","Initial Rank [3]","Initial Rank [2]"),
          dv.labels = 
            c("Drag and Drop Order<br>(coded as binary DV; only Subject RE)",
              "Drag and Drop Order<br>(coded as binary DV; Subject RE and initial Position)"))
  Drag and Drop Order
(coded as binary DV; only Subject RE)
Drag and Drop Order
(coded as binary DV; Subject RE and initial Position)
Predictors Odds Ratios std. Error Statistic p Odds Ratios std. Error Statistic p
1|2 0.12 0.01 -26.06 <0.001 0.01 0.00 -31.83 <0.001
2|3 0.52 0.03 -10.24 <0.001 0.03 0.00 -24.48 <0.001
3|4 1.82 0.11 9.59 <0.001 0.16 0.02 -14.92 <0.001
4|5 7.49 0.55 27.35 <0.001 0.91 0.11 -0.77 0.439
5|6 66.39 9.27 30.06 <0.001 14.23 2.26 16.68 <0.001
Amount [Nested in Amount] 0.95 0.00 -11.69 <0.001 0.94 0.00 -12.70 <0.001
Amount [Nested in Probability] 1.01 0.00 3.44 0.001 1.02 0.00 5.12 <0.001
Probability [Nested in Amount] 1.01 0.00 3.86 <0.001 1.02 0.00 6.42 <0.001
Probability [Nested in Probability] 0.96 0.00 -14.50 <0.001 0.95 0.00 -16.12 <0.001
Condition Probability 1.03 0.08 0.33 0.738 1.06 0.08 0.74 0.461
Initial Rank [6] 0.04 0.01 -20.73 <0.001
Initial Rank [5] 0.04 0.01 -21.08 <0.001
Initial Rank [4] 0.04 0.01 -20.08 <0.001
Initial Rank [3] 0.05 0.01 -19.44 <0.001
Initial Rank [2] 0.10 0.01 -15.74 <0.001
Random Effects
σ2 3.29 3.29
τ00 0.00 ResponseId 0.04 ResponseId
ICC   0.01
N 193 ResponseId 193 ResponseId
Observations 2238 2238
Marginal R2 / Conditional R2 0.366 / NA 0.552 / 0.558

Collinearity Check

This is not possible for the clmm() family; have to find a different way

# car::vif(M1)
# car::vif(M2)

DV3: Drag Distance

Coding of Distance

After each dragging item, the distance is calculated as Last Position - Current Position of the dragged item. Positive values indicate that the item was ranked up (e.g., moved from 2nd place to the 1st place), and positive values indicate that the item is ranked down (e.g., moved from 1st place to the second place). items that are not dragged for each respondent are assigned a distance of 0

Distribution of Drag Distance

summary_stats <- Distance_Amount.cleanup.df %>%
  group_by(item.f) %>%
  dplyr::summarize(
    mean_distance = mean(distance, na.rm = TRUE),
    median_distance = median(distance, na.rm = TRUE)
  )

Distance_Amount.cleanup.df$item.f<- factor(Distance_Amount.cleanup.df$item.f, levels = rev(c(  "Pr6_Amt1","Pr5_Amt2", "Pr4_Amt3","Pr3_Amt4", "Pr2_Amt5","Pr1_Amt6")), ordered = TRUE)




ggplot(Distance_Amount.cleanup.df ,
       aes(x = -distance, fill = item.f)) +
  geom_histogram(binwidth = 1, alpha = 0.3, position = "identity") +
  labs(
    title = "Distribution of Drag Distance - Amount Task",
    x = "Distance",
    y = "Count",
    fill = "item"
  ) +
  theme_minimal()+
  facet_grid(~item.f)+
  xlim(6,-6)+
    scale_fill_manual(values = bet_colors)

summary_stats <- Distance_Prob.cleanup.df %>%
  group_by(item.f) %>%
  dplyr::summarize(
    mean_distance = mean(distance, na.rm = TRUE),
    median_distance = median(distance, na.rm = TRUE)
  )


 Distance_Prob.cleanup.df$item.f<- factor(Distance_Prob.cleanup.df$item.f, levels = rev(c(  "Pr6_Amt1","Pr5_Amt2", "Pr4_Amt3","Pr3_Amt4", "Pr2_Amt5","Pr1_Amt6")), ordered = TRUE)

ggplot(Distance_Prob.cleanup.df ,
       aes(x = -distance, fill = item.f)) +
  geom_histogram(binwidth = 1, alpha = 0.3, position = "identity") +
  labs(
    title = "Distribution of Drag Distance - Prob Task",
    x = "Distance",
    y = "Count",
    fill = "item"
  ) +
  theme_minimal()+
  facet_grid(~item.f)+
    scale_fill_manual(values = bet_colors)+
  xlim(6,-6)

Model-Free Visualization

Distance_Amount_cleanup.df.test<-Distance_Amount.cleanup.df%>%
  select(ResponseId, item.f,distance,distance.abs)%>%
  mutate(condition="Amount")

Distance_Prob_cleanup.df.test<-Distance_Prob.cleanup.df%>%
  select(ResponseId, item.f,distance,distance.abs)%>%
  mutate(condition="Prob")

Distance_cleanup.df.combined<-rbind(Distance_Amount_cleanup.df.test,Distance_Prob_cleanup.df.test)

summary_distance_data <- Distance_cleanup.df.combined %>%
  mutate(condition=as.factor(condition),
         distance.abs=(distance))%>%
  group_by(condition, item.f) %>%
  dplyr::summarize(
    distance_mean = -mean(distance, na.rm = TRUE),
    distance_sd = sd(distance, na.rm = TRUE),
    n = n(),
    se = distance_sd / sqrt(n),
    .groups = "drop"
  )


summary_data_combined_ind$item.f = factor(summary_data_combined_ind$item.f, levels = rev(c(  "Pr6_Amt1","Pr5_Amt2", "Pr4_Amt3","Pr3_Amt4", "Pr2_Amt5","Pr1_Amt6")), ordered = TRUE)


ggplot(summary_distance_data, aes(x = condition, y = distance_mean, group = item.f, color = item.f,shape=item.f)) +
  geom_line(linewidth = 1, position = position_dodge(0.3)) +
  geom_point(size = 6, position = position_dodge(0.3)) +
  geom_errorbar(
    aes(
      ymin = distance_mean - se,
      ymax = distance_mean + se
    ),
    width = 0.2,
    position = position_dodge(0.3)
  ) +
  labs(
    x = "Condition",
    y = "Mean ± SE Drag Distance",
    title = "Mean Drag Distance by Condition"
  ) +
  theme_minimal() +
  theme(
    legend.position = "top", # Place legend at the top
    legend.title = element_text(face = "bold"),
    axis.title = element_text(face = "bold"),
    plot.subtitle = element_text(hjust = 0.5),
    plot.title = element_text(face = "bold", hjust = 0.5)
  )+
  scale_shape_manual(values = c("Pr6_Amt1" = 21, "Pr5_Amt2" = 22, 
                                "Pr4_Amt3" = 23, "Pr3_Amt4" = 24, 
                                "Pr2_Amt5" = 25, "Pr1_Amt6" = 11)) +
    scale_color_manual(values = bet_colors)

If we plot the ABSOLUTE VALUE of Distance:

summary_distance_data <- Distance_cleanup.df.combined %>%
  mutate(condition=as.factor(condition),
         distance.abs=abs(distance))%>%
  group_by(condition, item.f) %>%
  dplyr::summarize(
    distance_mean = mean(distance.abs, na.rm = TRUE),
    distance_sd = sd(distance.abs, na.rm = TRUE),
    n = n(),
    se = distance_sd / sqrt(n),
    .groups = "drop"
  )

summary_data_combined_ind$item.f = factor(summary_data_combined_ind$item.f, levels = rev(c(  "Pr6_Amt1","Pr5_Amt2", "Pr4_Amt3","Pr3_Amt4", "Pr2_Amt5","Pr1_Amt6")), ordered = TRUE)

ggplot(summary_distance_data, aes(x = condition, y = distance_mean, group = item.f, color = item.f,shape=item.f)) +
  geom_line(linewidth = 1, position = position_dodge(0.3)) +
  geom_point(size = 6, position = position_dodge(0.3)) +
  geom_errorbar(
    aes(
      ymin = distance_mean - se,
      ymax = distance_mean + se
    ),
    width = 0.2,
    position = position_dodge(0.3)
  ) +
  labs(
    x = "Condition",
    y = "Mean ± SE Drag Order",
    title = "Mean Drag Order by Condition"
  ) +
  theme_minimal() +
  theme(
    legend.position = "top", # Place legend at the top
    legend.title = element_text(face = "bold"),
    axis.title = element_text(face = "bold"),
    plot.subtitle = element_text(hjust = 0.5),
    plot.title = element_text(face = "bold", hjust = 0.5)
  )+
  scale_shape_manual(values = c("Pr6_Amt1" = 21, "Pr5_Amt2" = 22, 
                                "Pr4_Amt3" = 23, "Pr3_Amt4" = 24, 
                                "Pr2_Amt5" = 25, "Pr1_Amt6" = 11)) +
    scale_color_manual(values = bet_colors)

Amount Task

Distance_Amount.cleanup.df<-Distance_Amount.cleanup.df%>%
    filter(ResponseId%notin%Amount_tau_less0.5)%>%
  mutate(rank.Amount=case_when(
    item.f=="Pr6_Amt1" ~1,
    item.f=="Pr5_Amt2" ~ 2,
    item.f== "Pr4_Amt3" ~ 3,
    item.f== "Pr3_Amt4" ~ 4,
    item.f == "Pr2_Amt5" ~ 5,
    item.f == "Pr1_Amt6" ~6
  ),
  rank.Prob=case_when(
    item.f=="Pr6_Amt1" ~6,
    item.f=="Pr5_Amt2" ~ 5,
    item.f== "Pr4_Amt3" ~ 4,
    item.f== "Pr3_Amt4" ~ 3,
    item.f == "Pr2_Amt5" ~ 2,
    item.f == "Pr1_Amt6" ~1))%>%
  left_join(initial.dat_amount%>%select(ResponseId,starts_with("initial.items_")),by="ResponseId")%>%
  mutate(initial.rank=case_when(
    item.f=="Pr6_Amt1" ~ initial.items_49,
    item.f=="Pr5_Amt2" ~ initial.items_50,
    item.f=="Pr4_Amt3" ~ initial.items_64,
    item.f=="Pr3_Amt4" ~ initial.items_65,
    item.f=="Pr2_Amt5" ~ initial.items_67,
    item.f=="Pr1_Amt6" ~ initial.items_68
  ),
  initial.rank.r=7-initial.rank,
  initial.rank.r = relevel(factor(initial.rank.r), ref = 6)
  )%>%
  select(-c(starts_with("initial.items_")))%>%
  left_join(dat_long1%>%select(ResponseId,item.f,Prob,Amt),by=c("ResponseId","item.f"))


summary_data_Amount <- Distance_Amount.cleanup.df%>%
  dplyr::group_by(item.f) %>%
  dplyr::summarize(distance_mean = mean(distance, na.rm = TRUE),
            distance_sd = sd(distance, na.rm = TRUE),
            n = n(),
            se = distance_sd / sqrt(n),  # Standard error
            .groups = "drop",
            Avg.Amount=mean(Amt),
            Avg.Prob=mean(Prob))
  • Model Specification: Drag Count predicted by Amount and Prob attribute rank
  • Note: A negative sign was added to the distance DV. So a positive coefficient indicates that a higher value of the predictor contributes to the item being ranked further up
ggplot(summary_data_Amount, aes(x = Avg.Amount, y = -distance_mean, label = item.f)) +
  geom_point(size = 3, color = "black") +
  geom_text(vjust = -1, hjust = 1) +
  theme_minimal() +
  labs(title = "Drag Distance and Amount Attribute", subtitle = "Amount Task", x = "Avg. Amt", y = "Avg. Drag Distance") +
  theme(axis.title = element_text(face = "bold"),
        plot.subtitle = element_text(hjust = 0.5),
        plot.title = element_text(face = "bold", hjust = 0.5))+
  geom_smooth(method = "lm", se = FALSE, color = "blue", linetype = "dashed") 

ggplot(summary_data_Amount, aes(x = Avg.Prob, y = -distance_mean, label = item.f)) +
  geom_point(size = 3, color = "black") +
  geom_text(vjust = -1, hjust = 1) +
  theme_minimal() +
  labs(title = "Drag Distance and Prob Attribute", subtitle = "Amount Task", x = "Avg. Prob", y = "Avg. Drag Distance") +
  theme(axis.title = element_text(face = "bold"),
        plot.subtitle = element_text(hjust = 0.5),
        plot.title = element_text(face = "bold", hjust = 0.5))+
  geom_smooth(method = "lm", se = FALSE, color = "blue", linetype = "dashed") 

Prob Task

Distance_Prob.cleanup.df<-Distance_Prob.cleanup.df%>%
  filter(ResponseId%notin%Prob_tau_less0.5)%>%
  mutate(rank.Amount=case_when(
    item.f=="Pr6_Amt1" ~1,
    item.f=="Pr5_Amt2" ~ 2,
    item.f== "Pr4_Amt3" ~ 3,
    item.f== "Pr3_Amt4" ~ 4,
    item.f == "Pr2_Amt5" ~ 5,
    item.f == "Pr1_Amt6" ~6
  ),
  rank.Prob=case_when(
    item.f=="Pr6_Amt1" ~6,
    item.f=="Pr5_Amt2" ~ 5,
    item.f== "Pr4_Amt3" ~ 4,
    item.f== "Pr3_Amt4" ~ 3,
    item.f == "Pr2_Amt5" ~ 2,
    item.f == "Pr1_Amt6" ~1))%>%
  left_join(initial.dat_prob%>%select(ResponseId,starts_with("initial.items_")),by="ResponseId")%>%
  mutate(initial.rank=case_when(
    item.f=="Pr6_Amt1" ~ initial.items_49,
    item.f=="Pr5_Amt2" ~ initial.items_50,
    item.f=="Pr4_Amt3" ~ initial.items_64,
    item.f=="Pr3_Amt4" ~ initial.items_65,
    item.f=="Pr2_Amt5" ~ initial.items_67,
    item.f=="Pr1_Amt6" ~ initial.items_68
  ),
  initial.rank.r=7-initial.rank,
  initial.rank.r = relevel(factor(initial.rank.r), ref = 6)
  )%>%
  select(-c(starts_with("initial.items_")))%>%
  left_join(dat_long1%>%select(ResponseId,item.f,Prob,Amt),by=c("ResponseId","item.f"))



summary_data_Prob <- Distance_Prob.cleanup.df%>%
  dplyr::group_by(item.f) %>%
  dplyr::summarize(distance_mean = mean(distance, na.rm = TRUE),
            distance_sd = sd(distance, na.rm = TRUE),
            n = n(),
            se = distance_sd / sqrt(n),  # Standard error
            .groups = "drop",
            Avg.Amount=mean(Amt),
            Avg.Prob=mean(Prob))
ggplot(summary_data_Prob, aes(x = Avg.Amount, y = -distance_mean, label = item.f)) +
  geom_point(size = 3, color = "black") +
  geom_text(vjust = -1, hjust = 1) +
  theme_minimal() +
  labs(title = "Drag Distance and Amount Attribute", subtitle = "Prob Task", x = "Avg. Amt", y = "Avg. Drag Distance") +
  theme(axis.title = element_text(face = "bold"),
        plot.subtitle = element_text(hjust = 0.5),
        plot.title = element_text(face = "bold", hjust = 0.5))+
  geom_smooth(method = "lm", se = FALSE, color = "blue", linetype = "dashed") 

ggplot(summary_data_Prob, aes(x = Avg.Prob, y = -distance_mean, label = item.f)) +
  geom_point(size = 3, color = "black") +
  geom_text(vjust = -1, hjust = 1) +
  theme_minimal() +
  labs(title = "Drag Distance and Prob Attribute", subtitle = "Prob Task", x = "Avg. Prob", y = "Avg. Drag Distance") +
  theme(axis.title = element_text(face = "bold"),
        plot.subtitle = element_text(hjust = 0.5),
        plot.title = element_text(face = "bold", hjust = 0.5))+
  geom_smooth(method = "lm", se = FALSE, color = "blue", linetype = "dashed") 

Regressions

Amount and Prob Attribute Ranks are centered before being entered into the model.

Distance_Amount.cleanup.df$condition<-"Amount"
Distance_Prob.cleanup.df$condition<-"Prob"
Distance.cleanup.combined<-rbind(Distance_Amount.cleanup.df,Distance_Prob.cleanup.df)%>%
  mutate(Prob.c=Prob-mean(Prob),
         Amount.c=Amt-mean(Amt))

Nested Models

##### Nested Model

Distance.cleanup.combined<-Distance.cleanup.combined%>%
  mutate(Amt.Nested_Amount=case_when(
    condition == "Amount" ~Amt,
    condition == "Prob" ~ 0
  ),
  Amt.Nested_Prob=case_when(
    condition == "Amount" ~ 0,
    condition == "Prob" ~ Amt
  ),
  Prob.Nested_Amount=case_when(
    condition == "Amount" ~Prob,
    condition == "Prob" ~ 0
  ),
  Prob.Nested_Prob=case_when(
    condition == "Amount" ~ 0,
    condition == "Prob" ~ Prob
  ),
  Amt.Nested_Amount.c=case_when(
    condition == "Amount" ~Amount.c,
    condition == "Prob" ~ 0
  ),
  Amt.Nested_Prob.c=case_when(
    condition == "Amount" ~ 0,
    condition == "Prob" ~ Amount.c
  ),
  Prob.Nested_Amount.c=case_when(
    condition == "Amount" ~ Prob.c,
    condition == "Prob" ~ 0
  ),
  Prob.Nested_Prob.c=case_when(
    condition == "Amount" ~ 0,
    condition == "Prob" ~ Prob.c
  ))

M1<-lmer(distance.r~Amt.Nested_Amount.c+Amt.Nested_Prob.c+Prob.Nested_Amount.c+Prob.Nested_Prob.c+condition+(1|ResponseId),Distance.cleanup.combined)
M2<-lmer(distance.r~Amt.Nested_Amount.c+Amt.Nested_Prob.c+Prob.Nested_Amount.c+Prob.Nested_Prob.c+condition+initial.rank.r+(1|ResponseId),Distance.cleanup.combined)

tab_model(
  M1, M2,
  dv.labels = c(
    "Drag and Drop Distance<br>(Only Subject RE)",
    "Drag and Drop Distance<br>(Subject RE + Added Initial Position)"
  ),
  pred.labels =
            c("Intercept",
              "Amount [Nested in Amount]",
              "Amount [Nested in Probability]",
              "Probability [Nested in Amount]",
              "Probability [Nested in Probability]",
              "Condition [Probability]",
              "Initial Rank [6]","Initial Rank [5]","Initial Rank [4]","Initial Rank [3]","Initial Rank [2]"),
  show.se = T,
  show.ci = F,
  show.stat = T
)
  Drag and Drop Distance
(Only Subject RE)
Drag and Drop Distance
(Subject RE + Added Initial Position)
Predictors Estimates std. Error Statistic p Estimates std. Error Statistic p
Intercept 1.11 0.05 23.68 <0.001 -0.24 0.06 -4.23 <0.001
Amount [Nested in Amount] 0.02 0.00 8.95 <0.001 0.02 0.00 14.10 <0.001
Amount [Nested in Probability] -0.02 0.00 -8.71 <0.001 -0.02 0.00 -12.16 <0.001
Probability [Nested in Amount] -0.02 0.00 -9.61 <0.001 -0.02 0.00 -15.99 <0.001
Probability [Nested in Probability] 0.01 0.00 8.96 <0.001 0.02 0.00 16.37 <0.001
Condition Probability 0.01 0.05 0.15 0.881 0.00 0.03 0.12 0.903
Initial Rank [6] 2.69 0.06 48.01 <0.001
Initial Rank [5] 2.12 0.06 37.89 <0.001
Initial Rank [4] 1.57 0.06 28.01 <0.001
Initial Rank [3] 1.14 0.06 20.39 <0.001
Initial Rank [2] 0.57 0.06 10.10 <0.001
Random Effects
σ2 1.46 0.55
τ00 0.14 ResponseId 0.23 ResponseId
ICC 0.09 0.29
N 189 ResponseId 189 ResponseId
Observations 2118 2118
Marginal R2 / Conditional R2 0.330 / 0.389 0.673 / 0.768

Collinearity Check

car::vif(M1)
##  Amt.Nested_Amount.c    Amt.Nested_Prob.c Prob.Nested_Amount.c 
##             2.032084             2.036722             2.032065 
##   Prob.Nested_Prob.c            condition 
##             2.036704             1.000001
car::vif(M2)
##                          GVIF Df GVIF^(1/(2*Df))
## Amt.Nested_Amount.c  2.045435  1        1.430187
## Amt.Nested_Prob.c    2.050920  1        1.432103
## Prob.Nested_Amount.c 2.049421  1        1.431580
## Prob.Nested_Prob.c   2.049804  1        1.431714
## condition            1.000004  1        1.000002
## initial.rank.r       1.017056  5        1.001693

Ranking Over Time

Distance_Amount<-RankProcess_Amount %>%
  group_by(ResponseId)%>%
  mutate(
    parts = str_split(order, ",")
  ) %>%
  mutate(
    Rank1 = sapply(parts, function(x) x[1]),  # Extract before 1st comma
    Rank2 = sapply(parts, function(x) x[2]),  # Extract before 2nd comma
    Rank3 = sapply(parts, function(x) x[3]),  # Extract before 3rd comma
    Rank4 = sapply(parts, function(x) x[4]),  # Extract before 4th comma
    Rank5 = sapply(parts, function(x) x[5]),  # Extract before 5th comma
    Rank6 = sapply(parts, function(x) ifelse(length(x) > 5, x[6], NA))  # Extract after 5th comma
  ) %>%
  select(-parts)

items_Amount <- c("49", "50", "64", "65", "67", "68")
for (item in items_Amount) {
  Distance_Amount[[paste0("current_", item)]] <- NA_integer_
}


Distance_Amount <- Distance_Amount %>%
  rowwise() %>%
  mutate(
    across(
      starts_with("current_"),
      ~ {
        item_number <- str_remove(cur_column(), "current_")  # Extract the item number
        case_when(
          Rank1 == item_number ~ 1,
          Rank2 == item_number ~ 2,
          Rank3 == item_number ~ 3,
          Rank4 == item_number ~ 4,
          Rank5 == item_number ~ 5,
          Rank6 == item_number ~ 6,
          TRUE ~ 1
        )
      }
    )
  ) %>%
  ungroup()


for (item in items_Amount) {
  Distance_Amount[[paste0("last_", item)]] <- lag(Distance_Amount[[paste0("current_", item)]])
}


Distance_Amount<-Distance_Amount%>%
  group_by(ResponseId)%>%
  rowwise() %>%
  mutate(
    current_item_moved = get(paste0("current_", item_moved)),  
    last_item_moved = get(paste0("last_", item_moved)),        
    move_direction = case_when(
      is.na(last_item_moved) ~ "no_change",  
      current_item_moved < last_item_moved ~ "up",
      current_item_moved > last_item_moved ~ "down",
      TRUE ~ "no_change"
    )
  ) %>%
  ungroup()

Distance_Amount <- Distance_Amount %>%
  group_by(ResponseId)%>%
  filter(step!=0)

TimeAnalysis.Amount<-Distance_Amount%>%
  filter(ResponseId%notin%Amount_tau_less0.5)%>%
  separate(timing, into = c("drag_time", "drop_time"), sep = ", ", convert = TRUE)%>%
  mutate(DD_diff=drop_time-drag_time,
         condition="Amount")%>%
  select(step,ResponseId,condition,item.f,drag_time,drop_time,DD_diff,current_49:current_68)

duplicated.n<-nrow(TimeAnalysis.Amount)
item<-c("Pr6_Amt1","Pr5_Amt2", "Pr4_Amt3","Pr3_Amt4", "Pr2_Amt5","Pr1_Amt6")


TimeAnalysis.Amount <- TimeAnalysis.Amount %>%
  uncount(weights = 6) 

TimeAnalysis.Amount$item.f<- rep(item, times = duplicated.n)
TimeAnalysis.Amount<-TimeAnalysis.Amount%>%
  mutate(current_rank=case_when(
    item.f=="Pr6_Amt1" ~ current_49,
    item.f=="Pr5_Amt2" ~ current_50,
    item.f=="Pr4_Amt3" ~ current_64,
    item.f=="Pr3_Amt4" ~ current_65,
    item.f=="Pr2_Amt5" ~ current_67,
    item.f=="Pr1_Amt6" ~ current_68
  ))%>%
  select(-c(current_49:current_68))

item_Amounts <- c(
  "Pr6_Amt1" = "#d73027",  # Strong red
  "Pr5_Amt2" = "#f46d43",  # Medium red
  "Pr4_Amt3" = "#fdae61",  # Light red (orange-ish)
  "Pr3_Amt4" = "#a6d96a",  # Light green
  "Pr2_Amt5" = "#66bd63",  # Medium green
  "Pr1_Amt6" = "#1a9850"   # Strong green
)

item_shapes <- c("Pr6_Amt1" = 21, "Pr5_Amt2" = 22, 
                 "Pr4_Amt3" = 23, "Pr3_Amt4" = 24, 
                 "Pr2_Amt5" = 25, "Pr1_Amt6" = 11)
# TimeAnalysis.Amount #one more step here to expand the dataset

TimeAnalysis.Amount.expand <- expand_grid(
  ResponseId = unique(TimeAnalysis.Amount$ResponseId),
  step = unique(TimeAnalysis.Amount$step),
  item.f = unique(TimeAnalysis.Amount$item.f)
) %>%
  left_join(
    TimeAnalysis.Amount %>% select(ResponseId, step, item.f, current_rank),
    by = c("ResponseId", "step", "item.f")
  ) %>%
  arrange(ResponseId, item.f, step) %>%
  group_by(ResponseId, item.f) %>%
  fill(current_rank, .direction = "down")%>%
  ungroup()


Summary.Amount <- TimeAnalysis.Amount.expand %>%
  group_by(step, item.f) %>%
  dplyr::summarize(mean.current_rank = mean(current_rank),
            sd.current_rank = sd(current_rank),
            n = n(),  
            se = sd.current_rank / sqrt(n),  
            .groups = "drop")



initial.rank<-touch_order_analysis.long_Amount%>%
  filter(ResponseId%notin%Amount_tau_less0.5)%>%
  group_by(item.f)%>%
  mutate(initial.rank=as.numeric(initial.rank))%>%
  dplyr::summarize(mean.current_rank = 7-mean(initial.rank),
            sd.current_rank = sd(initial.rank),
            n = n(),  
            se = sd.current_rank / sqrt(n),  
            .groups = "drop")%>%
  mutate(step=0)

Summary.Amount<-rbind(Summary.Amount,
                     initial.rank)

ggplot(Summary.Amount, aes(x = step, y = mean.current_rank, 
                          color = item.f,  shape = item.f)) +
  geom_line(size = 1) +  
  geom_point(size = 6, fill = "white") +  
  geom_errorbar(aes(ymin = mean.current_rank - se, ymax = mean.current_rank + se), 
                width = 0.3, size = 1.2, alpha = 0.8) + 
  scale_color_manual(values = item_Amounts) +  
  scale_shape_manual(values = item_shapes) +  
  labs(title = "Mean Rank by Step (Amount Task)",
       x = "Step",
       y = "Mean Rank",
       color = "Item",
       linetype = "Item",
       shape = "Item") +
  theme_minimal() +  # Clean theme
  theme(legend.position = "right",
    axis.title.x = element_text(face = "bold", size = 14),  # Bold x-axis label
    axis.title.y = element_text(face = "bold", size = 14),  # Bold y-axis label
    axis.text.x = element_text(face = "bold", size = 12),   # Bold x-axis text
    axis.text.y = element_text(face = "bold", size = 12)    # Bold y-axis text
  )+
  scale_y_continuous(breaks = 6:1) +
  scale_x_continuous(breaks = 0:8) 

Distance_Prob<-RankProcess_Prob %>%
  group_by(ResponseId)%>%
  mutate(
    # Split the string into parts based on commas
    parts = str_split(order, ",")
  ) %>%
  mutate(
    Rank1 = sapply(parts, function(x) x[1]),  # Extract before 1st comma
    Rank2 = sapply(parts, function(x) x[2]),  # Extract before 2nd comma
    Rank3 = sapply(parts, function(x) x[3]),  # Extract before 3rd comma
    Rank4 = sapply(parts, function(x) x[4]),  # Extract before 4th comma
    Rank5 = sapply(parts, function(x) x[5]),  # Extract before 5th comma
    Rank6 = sapply(parts, function(x) ifelse(length(x) > 5, x[6], NA))  # Extract after 5th comma
  ) %>%
  select(-parts)

items_Prob <- c("49", "50", "64", "65", "67", "68")
for (item in items_Prob) {
  Distance_Prob[[paste0("current_", item)]] <- NA_integer_
}


Distance_Prob <- Distance_Prob %>%
  rowwise() %>%
  mutate(
    across(
      starts_with("current_"),
      ~ {
        item_number <- str_remove(cur_column(), "current_")  # Extract the item number
        case_when(
          Rank1 == item_number ~ 1,
          Rank2 == item_number ~ 2,
          Rank3 == item_number ~ 3,
          Rank4 == item_number ~ 4,
          Rank5 == item_number ~ 5,
          Rank6 == item_number ~ 6,
          TRUE ~ 1 # Distance_A %>% mutate(NA_count = rowSums(is.na(select(., starts_with("current_"))))); this code somehow results in the first item always gets an NA, so manually fix this error
        )
      }
    )
  ) %>%
  ungroup()


for (item in items_Prob) {
  Distance_Prob[[paste0("last_", item)]] <- lag(Distance_Prob[[paste0("current_", item)]])
}


Distance_Prob<-Distance_Prob%>%
  group_by(ResponseId)%>%
  rowwise() %>%
  mutate(
    current_item_moved = get(paste0("current_", item_moved)),  # Get the rank of the moved item from current columns
    last_item_moved = get(paste0("last_", item_moved)),        # Get the rank of the moved item from last columns
    # Determine the movement direction; we should not see any "no_change"
    move_direction = case_when(
      is.na(last_item_moved) ~ "no_change",  
      current_item_moved < last_item_moved ~ "up",
      current_item_moved > last_item_moved ~ "down",
      TRUE ~ "no_change"
    )
  ) %>%
  ungroup()


Distance_Prob <- Distance_Prob %>%
  group_by(ResponseId)%>%
  filter(step!=0) # need to retain step 0 for steps that come before


TimeAnalysis.Prob<-Distance_Prob%>%
  filter(ResponseId%notin%Prob_tau_less0.5)%>%
  separate(timing, into = c("drag_time", "drop_time"), sep = ", ", convert = TRUE)%>%
  mutate(DD_diff=drop_time-drag_time,
         condition="Prob")%>%
  select(step,ResponseId,condition,item.f,drag_time,drop_time,DD_diff,current_49:current_68)

duplicated.n<-nrow(TimeAnalysis.Prob)
item<-c("Pr6_Amt1","Pr5_Amt2", "Pr4_Amt3","Pr3_Amt4", "Pr2_Amt5","Pr1_Amt6")

TimeAnalysis.Prob <- TimeAnalysis.Prob %>%
  uncount(weights = 6) 
TimeAnalysis.Prob$item.f<- rep(item, times = duplicated.n)

TimeAnalysis.Prob<-TimeAnalysis.Prob%>%
  mutate(current_rank=case_when(
    item.f=="Pr6_Amt1" ~ current_49,
    item.f=="Pr5_Amt2" ~ current_50,
    item.f=="Pr4_Amt3" ~ current_64,
    item.f=="Pr3_Amt4" ~ current_65,
    item.f=="Pr2_Amt5" ~ current_67,
    item.f=="Pr1_Amt6" ~ current_68
  ))%>%
  select(-c(current_49:current_68))

item_Amounts <- c(
  "Pr6_Amt1" = "#d73027",  # Strong red
  "Pr5_Amt2" = "#f46d43",  # Medium red
  "Pr4_Amt3" = "#fdae61",  # Light red (orange-ish)
  "Pr3_Amt4" = "#a6d96a",  # Light green
  "Pr2_Amt5" = "#66bd63",  # Medium green
  "Pr1_Amt6" = "#1a9850"   # Strong green
)

item_shapes <- c("Pr6_Amt1" = 21, "Pr5_Amt2" = 22, 
                 "Pr4_Amt3" = 23, "Pr3_Amt4" = 24, 
                 "Pr2_Amt5" = 25, "Pr1_Amt6" = 11)

TimeAnalysis.Prob.expand <- expand_grid(
  ResponseId = unique(TimeAnalysis.Prob$ResponseId),
  step = unique(TimeAnalysis.Prob$step),
  item.f = unique(TimeAnalysis.Prob$item.f)
) %>%
  left_join(
    TimeAnalysis.Prob %>% select(ResponseId, step, item.f, current_rank),
    by = c("ResponseId", "step", "item.f")
  ) %>%
  arrange(ResponseId, item.f, step) %>%
  group_by(ResponseId, item.f) %>%
  fill(current_rank, .direction = "down")%>%
  ungroup()


Summary.Prob <- TimeAnalysis.Prob.expand %>%
  group_by(step, item.f) %>%
  dplyr::summarize(mean.current_rank = mean(current_rank),
            sd.current_rank = sd(current_rank),
            n = n(),  
            se = sd.current_rank / sqrt(n),  
            .groups = "drop")


initial.rank<-touch_order_analysis.long_Prob%>%
  filter(ResponseId%notin%Prob_tau_less0.5)%>%
  group_by(item.f)%>%
  mutate(initial.rank=as.numeric(initial.rank))%>%
  dplyr::summarize(mean.current_rank = 7- mean(initial.rank),
            sd.current_rank = sd(initial.rank),
            n = n(),  
            se = sd.current_rank / sqrt(n),  
            .groups = "drop")%>%
  mutate(step=0)

Summary.Prob<-rbind(Summary.Prob,
                     initial.rank)

ggplot(Summary.Prob, aes(x = step, y = mean.current_rank, 
                          color = item.f,  shape = item.f)) +
  geom_line(size = 1) +  
  geom_point(size = 6, fill = "white") +  
  geom_errorbar(aes(ymin = mean.current_rank - se, ymax = mean.current_rank + se), 
                width = 0.3, size = 1.2, alpha = 0.8) + 
  scale_color_manual(values = item_Amounts) +  
  scale_shape_manual(values = item_shapes) +  
  labs(title = "Mean Rank by Step (Prob Task)",
       x = "Step",
       y = "Mean Rank",
       color = "Item",
       linetype = "Item",
       shape = "Item") +
  theme_minimal() +  # Clean theme
  theme(legend.position = "right",
    axis.title.x = element_text(face = "bold", size = 14),  # Bold x-axis label
    axis.title.y = element_text(face = "bold", size = 14),  # Bold y-axis label
    axis.text.x = element_text(face = "bold", size = 12),   # Bold x-axis text
    axis.text.y = element_text(face = "bold", size = 12)    # Bold y-axis text
  )+
  scale_y_continuous(breaks = 6:1) +
  scale_x_continuous(breaks = 0:10) 

Correlation between 3F Measures

This only concerns the ranking-by-preference tasks

Preference 1 Ranking

# Display correlation matrix
print(round(cor(master_df[, c("set1_touch_count", "set1_order", "set1_drag_distance.r")], 
                use = "pairwise.complete.obs"), 2))
##                      set1_touch_count set1_order set1_drag_distance.r
## set1_touch_count                 1.00      -0.48                 0.51
## set1_order                      -0.48       1.00                -0.47
## set1_drag_distance.r             0.51      -0.47                 1.00
# Create SPLOM
ggpairs(master_df, 
        columns = c("set1_touch_count", "set1_order", "set1_drag_distance.r"),
        lower = list(continuous = "smooth"), 
        diag = list(continuous = "densityDiag"), 
        upper = list(continuous = "cor")) +
  theme_minimal()

Preference 2 Ranking

# Display correlation matrix
print(round(cor(master_df[, c("set2_touch_count", "set2_order", "set2_drag_distance.r")], 
                use = "pairwise.complete.obs"), 2))
##                      set2_touch_count set2_order set2_drag_distance.r
## set2_touch_count                 1.00      -0.49                 0.54
## set2_order                      -0.49       1.00                -0.51
## set2_drag_distance.r             0.54      -0.51                 1.00
# Create SPLOM
ggpairs(master_df, 
        columns = c("set2_touch_count", "set2_order", "set2_drag_distance.r"),
        lower = list(continuous = "smooth"), 
        diag = list(continuous = "densityDiag"), 
        upper = list(continuous = "cor")) +
  theme_minimal()

DV: Subjective Rank

Regress the subjective ranks on the amount and probability tasks.

Amount

amt_rob <- lm_robust(amt_subj_rank ~ set1_amt.c + set1_prob.c, data = master_df, cluster = ResponseId, se_type = "stata")

amt_lmer <- lmer(amt_subj_rank ~ set1_amt.c + set1_prob.c + (1 | ResponseId), data = master_df)

amt_clmm <- clmm(factor(amt_subj_rank) ~ set1_amt.c + set1_prob.c + (1 | ResponseId), data = master_df)

tab_model(amt_rob, amt_lmer, amt_clmm,
          show.se = T,
          show.ci = F,
          show.stat = T,
          pred.labels =
            c("Intercept",
              "Amount (centered)",
              "Probability (centered)",
              "1|2","2|3", "3|4", "4|5", "5|6"),
          dv.labels = 
            c("DV = Amount (subj. Rank)<br>lm_robust",
              "DV = Amount (subj. Rank)<br>linear, repeated",
              "DV = Amount (subj. Rank)<br>ordinal, repeated"))
  DV = Amount (subj. Rank)
lm_robust
DV = Amount (subj. Rank)
linear, repeated
DV = Amount (subj. Rank)
ordinal, repeated
Predictors Estimates std. Error Statistic p Estimates std. Error Statistic p Odds Ratios std. Error Statistic p
Intercept 3.50 0.01 502.70 <0.001 3.50 0.01 255.13 <0.001
Amount (centered) 0.79 0.03 31.40 <0.001 0.79 0.02 40.52 <0.001 55.33 NaN NaN NaN
Probability (centered) -0.98 0.02 -47.78 <0.001 -0.98 0.02 -50.24 <0.001 0.00 NaN NaN NaN
1|2 0.00 NaN NaN NaN
2|3 0.03 NaN NaN NaN
3|4 5.34 NaN NaN NaN
4|5 895.14 NaN NaN NaN
5|6 204562.15 NaN NaN NaN
Random Effects
σ2   0.22 3.29
τ00   0.00 ResponseId 0.00 ResponseId
N   194 ResponseId 194 ResponseId
Observations 1164 1164 1164
R2 / R2 adjusted 0.925 / 0.925 0.925 / NA 0.978 / NA

Probability

prob_rob <- lm_robust(prob_subj_rank ~ set1_amt.c + set1_prob.c, data = master_df, cluster = ResponseId, se_type = "stata")

prob_lmer <- lmer(prob_subj_rank ~ set1_amt.c + set1_prob.c + (1 | ResponseId), data = master_df)

prob_clmm <- clmm(factor(prob_subj_rank) ~ set1_amt.c + set1_prob.c + (1 | ResponseId), data = master_df)

tab_model(prob_rob, prob_lmer, prob_clmm,
          show.se = T,
          show.ci = F,
          show.stat = T,
          pred.labels =
            c("Intercept",
              "Amount (centered)",
              "Probability (centered)",
              "1|2","2|3", "3|4", "4|5", "5|6"),
          dv.labels = 
            c("DV = Prob (subj. Rank)<br>lm_robust",
              "DV = Prob (subj. Rank)<br>linear, repeated",
              "DV = Prob (subj. Rank)<br>ordinal, repeated"))
  DV = Prob (subj. Rank)
lm_robust
DV = Prob (subj. Rank)
linear, repeated
DV = Prob (subj. Rank)
ordinal, repeated
Predictors Estimates std. Error Statistic p Estimates std. Error Statistic p Odds Ratios std. Error Statistic p
Intercept 3.50 0.01 568.20 <0.001 3.50 0.03 122.35 <0.001
Amount (centered) -0.75 0.04 -20.34 <0.001 -0.75 0.04 -18.34 <0.001 0.08 NaN NaN NaN
Probability (centered) 0.77 0.05 15.27 <0.001 0.77 0.04 18.85 <0.001 10.31 NaN NaN NaN
1|2 0.01 NaN NaN NaN
2|3 0.13 NaN NaN NaN
3|4 1.07 NaN NaN NaN
4|5 8.71 NaN NaN NaN
5|6 152.85 NaN NaN NaN
Random Effects
σ2   0.95 3.29
τ00   0.00 ResponseId 0.00 ResponseId
N   194 ResponseId 194 ResponseId
Observations 1164 1164 1164
R2 / R2 adjusted 0.674 / 0.674 0.674 / NA 0.859 / NA

Ranking by Preference Analysis

Descriptives - 1ST Preference Task

rank_data <- dat %>%
  select(
    `49` = rank_pref1_49,
    `50` = rank_pref1_50,
    `64` = rank_pref1_64,
    `65` = rank_pref1_65,
    `67` = rank_pref1_67,
    `68` = rank_pref1_68
  ) %>%
  pivot_longer(
    everything(),
    names_to = "item_moved",
    values_to = "Subj_rank"
  ) %>%
  mutate(
    item_moved = as.integer(item_moved),
    item_label = case_when(
      item_moved == 49 ~ "Pr6_Amt1",
      item_moved == 50 ~ "Pr5_Amt2",
      item_moved == 64 ~ "Pr4_Amt3",
      item_moved == 65 ~ "Pr3_Amt4",
      item_moved == 67 ~ "Pr2_Amt5",
      item_moved == 68 ~ "Pr1_Amt6"
    ),
    item_label = factor(item_label, levels = c("Pr6_Amt1", "Pr5_Amt2", "Pr4_Amt3", "Pr3_Amt4", "Pr2_Amt5", "Pr1_Amt6"))
  )


ggplot(rank_data, aes(x = item_label, y = Subj_rank)) +  # reverse coded so 6 = top rank
  geom_violin(trim = FALSE, fill = "lightblue", color = "darkblue", alpha = 0.5) +
  geom_jitter(width = 0.15, height = 0, alpha = 0.6, size = 1.5, color = "black") +
  stat_summary(fun = mean, geom = "point", shape = 21, size = 3, fill = "red", color = "red") +
  labs(
    title = "Distribution of pref1 Ranks by Item",
    x = "Item",
    y = "Rank"
  ) +
  scale_y_continuous(breaks = 1:6, limits = c(1, 6)) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

What’s the distribution of final ranks?

# Step 1: Define the mapping
id_to_label <- c(
  "49" = "Pr6_Amt1",
  "50" = "Pr5_Amt2",
  "64" = "Pr4_Amt3",
  "65" = "Pr3_Amt4",
  "67" = "Pr2_Amt5",
  "68" = "Pr1_Amt6"
)

# Step 2: Select relevant columns and count number of 1s
rank_counts1 <- dat %>%
  select(starts_with("rank_pref1_")) %>%
  summarise(across(everything(), ~ sum(. == 1, na.rm = TRUE))) %>%
  pivot_longer(cols = everything(), names_to = "column", values_to = "n_order_1") %>%
  mutate(
    id = sub("rank_pref1_", "", column),
    item.f = id_to_label[id]
  ) %>%
  filter(!is.na(item.f))  # Only keep mapped items


# Step 3: Plot
ggplot(rank_counts1, aes(x = item.f, y = n_order_1, fill = item.f)) +
  geom_col() +
  scale_fill_manual(values = bet_colors) +
  labs(
    title = "Count of Participants ranking the\nItems on first Position (Preference Task 1)",
    x = "Item",
    y = "Count (Rank = 1)"
  ) +
  theme_minimal() +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1),
    plot.title = element_text(face = "bold", hjust = 0.5),
    legend.position = "none"
  )

How many people touched which lottery first?

order1_counts <- touch_order_analysis.long_pref1 %>%
  filter(order == 1) %>%            # Keep only rows where order == 1
  count(item.f, name = "n_order_1") # Count how many times each item.f appears

# Plot
ggplot(order1_counts, aes(x = item.f, y = n_order_1, fill = item.f)) +
  geom_col() +
  scale_fill_manual(values = bet_colors) +
  labs(
    title = "Count of Participants touching\neach Item first (Preference Task 1)",
    x = "Item",
    y = "Count (Order = 1)"
  ) +
  theme_minimal() +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1),
    plot.title = element_text(face = "bold", hjust = 0.5),
    legend.position = "none"
  )

How do the ranking Profiles for these people look like?

# Only consider participants in the same "category", i.e., who arrived at the same final ranking. Let's look at the ones who ranked the highest amount/probability highest

# Only consider participants who ranked the highest Amount first
selected_ids_Pr1_Amt6 <- dat %>%
  filter(rank_pref1_68 == 1) %>%
  pull(ResponseId)

# Only consider participants who ranked the highest Probability first
selected_ids_Pr6_Amt1 <- dat %>%
  filter(rank_pref1_49 == 1) %>%
  pull(ResponseId)

# Remaining IDs
excluded_ids <- union(selected_ids_Pr1_Amt6, selected_ids_Pr6_Amt1)
remaining_ids <- dat %>%
  filter(!ResponseId %in% excluded_ids) %>%
  pull(ResponseId)


# Create Distance_Prefer2
Distance_Prefer1<-RankProcess_Prefer1 %>%
  group_by(ResponseId)%>%
  mutate(
    # Split the string into parts based on commas
    parts = str_split(order, ",")
  ) %>%
  mutate(
    Rank1 = sapply(parts, function(x) x[1]),  # Extract before 1st comma
    Rank2 = sapply(parts, function(x) x[2]),  # Extract before 2nd comma
    Rank3 = sapply(parts, function(x) x[3]),  # Extract before 3rd comma
    Rank4 = sapply(parts, function(x) x[4]),  # Extract before 4th comma
    Rank5 = sapply(parts, function(x) x[5]),  # Extract before 5th comma
    Rank6 = sapply(parts, function(x) ifelse(length(x) > 5, x[6], NA))  # Extract after 5th comma
  ) %>%
  select(-parts)

items_Prefer1 <- c("49", "50", "64", "65", "67", "68")
for (item in items_Prefer1) {
  Distance_Prefer1[[paste0("current_", item)]] <- NA_integer_
}

Distance_Prefer1 <- Distance_Prefer1 %>%
  rowwise() %>%
  mutate(
    across(
      starts_with("current_"),
      ~ {
        item_number <- str_remove(cur_column(), "current_")  # Extract the item number
        case_when(
          Rank1 == item_number ~ 1,
          Rank2 == item_number ~ 2,
          Rank3 == item_number ~ 3,
          Rank4 == item_number ~ 4,
          Rank5 == item_number ~ 5,
          Rank6 == item_number ~ 6,
          TRUE ~ 1 
        )
      }
    )
  ) %>%
  ungroup()

for (item in items_Prefer1) {
  Distance_Prefer1[[paste0("last_", item)]] <- lag(Distance_Prefer1[[paste0("current_", item)]])
}

Distance_Prefer1<-Distance_Prefer1%>%
  group_by(ResponseId)%>%
  rowwise() %>%
  mutate(
    current_item_moved = get(paste0("current_", item_moved)),  # Get the rank of the moved item from current columns
    last_item_moved = get(paste0("last_", item_moved)),        # Get the rank of the moved item from last columns
    # Determine the movement direction; we should not see any "no_change"
    move_direction = case_when(
      is.na(last_item_moved) ~ "no_change",  
      current_item_moved < last_item_moved ~ "up",
      current_item_moved > last_item_moved ~ "down",
      TRUE ~ "no_change"
    )
  ) %>%
  ungroup()


# Filter Distance_Prefer2 for those ResponseIds
Ranking_per_Step_Pr1_Amt6 <- Distance_Prefer1 %>%
  filter(ResponseId %in% selected_ids_Pr1_Amt6)

Ranking_per_Step_Pr6_Amt1 <- Distance_Prefer1 %>%
  filter(ResponseId %in% selected_ids_Pr6_Amt1)

Ranking_per_Step_remaining <- Distance_Prefer1 %>%
  filter(ResponseId %in% remaining_ids)


## Create new R columns
# Initialize R columns
Ranking_per_Step_Pr1_Amt6 <- Ranking_per_Step_Pr1_Amt6 %>% 
  mutate(Pr1_Amt6 = NA, Pr2_Amt5 = NA, Pr3_Amt4 = NA, Pr4_Amt3 = NA, Pr5_Amt2 = NA, Pr6_Amt1 = NA)

Ranking_per_Step_Pr6_Amt1 <- Ranking_per_Step_Pr6_Amt1 %>% 
  mutate(Pr1_Amt6 = NA, Pr2_Amt5 = NA, Pr3_Amt4 = NA, Pr4_Amt3 = NA, Pr5_Amt2 = NA, Pr6_Amt1 = NA)

Ranking_per_Step_remaining <- Ranking_per_Step_remaining %>% 
  mutate(Pr1_Amt6 = NA, Pr2_Amt5 = NA, Pr3_Amt4 = NA, Pr4_Amt3 = NA, Pr5_Amt2 = NA, Pr6_Amt1 = NA)

# Function to fill R columns
fill_R_columns <- function(order_string, target_value) {
  order_string <- gsub("[[:space:][:cntrl:]]", "", order_string)  # Clean spaces and newlines
  order_numbers <- strsplit(order_string, ",")[[1]]
  position_of_value <- which(order_numbers == as.character(target_value))
  
  if (length(position_of_value) == 0) {
    return(NA)
  } else if (position_of_value == 1) {
    return(1)
  } else {
    return(position_of_value)
  }
}

# Apply function to fill R columns
Ranking_per_Step_Pr1_Amt6 <- Ranking_per_Step_Pr1_Amt6 %>% 
  mutate(
    Pr6_Amt1 = sapply(order, fill_R_columns, target_value = 49),
    Pr5_Amt2 = sapply(order, fill_R_columns, target_value = 50),
    Pr4_Amt3 = sapply(order, fill_R_columns, target_value = 64),
    Pr3_Amt4 = sapply(order, fill_R_columns, target_value = 65),
    Pr2_Amt5 = sapply(order, fill_R_columns, target_value = 67),
    Pr1_Amt6 = sapply(order, fill_R_columns, target_value = 68),
  )

Ranking_per_Step_Pr6_Amt1 <- Ranking_per_Step_Pr6_Amt1 %>% 
  mutate(
    Pr6_Amt1 = sapply(order, fill_R_columns, target_value = 49),
    Pr5_Amt2 = sapply(order, fill_R_columns, target_value = 50),
    Pr4_Amt3 = sapply(order, fill_R_columns, target_value = 64),
    Pr3_Amt4 = sapply(order, fill_R_columns, target_value = 65),
    Pr2_Amt5 = sapply(order, fill_R_columns, target_value = 67),
    Pr1_Amt6 = sapply(order, fill_R_columns, target_value = 68),
  )

Ranking_per_Step_remaining <- Ranking_per_Step_remaining %>% 
  mutate(
    Pr6_Amt1 = sapply(order, fill_R_columns, target_value = 49),
    Pr5_Amt2 = sapply(order, fill_R_columns, target_value = 50),
    Pr4_Amt3 = sapply(order, fill_R_columns, target_value = 64),
    Pr3_Amt4 = sapply(order, fill_R_columns, target_value = 65),
    Pr2_Amt5 = sapply(order, fill_R_columns, target_value = 67),
    Pr1_Amt6 = sapply(order, fill_R_columns, target_value = 68),
  )


# Ensure at least 9 rows per ResponseId
Ranking_per_Step_Pr1_Amt6 <- Ranking_per_Step_Pr1_Amt6 %>% group_by(ResponseId) %>% 
  group_modify(~ {
    while (nrow(.x) < 10) {
      max_step_row <- .x %>% filter(step == max(step))
      .x <- bind_rows(.x, max_step_row %>% mutate(step = max(step) + 1))
    }
    return(.x)
  }) %>% ungroup()

Ranking_per_Step_Pr6_Amt1 <- Ranking_per_Step_Pr6_Amt1 %>% group_by(ResponseId) %>% 
  group_modify(~ {
    while (nrow(.x) < 10) {
      max_step_row <- .x %>% filter(step == max(step))
      .x <- bind_rows(.x, max_step_row %>% mutate(step = max(step) + 1))
    }
    return(.x)
  }) %>% ungroup()

Ranking_per_Step_remaining <- Ranking_per_Step_remaining %>% group_by(ResponseId) %>% 
  group_modify(~ {
    while (nrow(.x) < 8) {
      max_step_row <- .x %>% filter(step == max(step))
      .x <- bind_rows(.x, max_step_row %>% mutate(step = max(step) + 1))
    }
    return(.x)
  }) %>% ungroup()


## Summary DFs
summary_ranks_step_Pr1_Amt6 <- Ranking_per_Step_Pr1_Amt6 %>%
  filter(step < 10) %>%
  group_by(step) %>%
  dplyr::summarise(
    mean_Pr6_Amt1 = mean(Pr6_Amt1, na.rm = TRUE),
    se_Pr6_Amt1 = sd(Pr6_Amt1, na.rm = TRUE) / sqrt(n()),

    mean_Pr5_Amt2 = mean(Pr5_Amt2, na.rm = TRUE),
    se_Pr5_Amt2 = sd(Pr5_Amt2, na.rm = TRUE) / sqrt(n()),
    
    mean_Pr4_Amt3 = mean(Pr4_Amt3, na.rm = TRUE),
    se_Pr4_Amt3 = sd(Pr4_Amt3, na.rm = TRUE) / sqrt(n()),
    
    mean_Pr3_Amt4 = mean(Pr3_Amt4, na.rm = TRUE),
    se_Pr3_Amt4 = sd(Pr3_Amt4, na.rm = TRUE) / sqrt(n()),
    
    mean_Pr2_Amt5 = mean(Pr2_Amt5, na.rm = TRUE),
    se_Pr2_Amt5 = sd(Pr2_Amt5, na.rm = TRUE) / sqrt(n()),
    
    mean_Pr1_Amt6 = mean(Pr1_Amt6, na.rm = TRUE),
    se_Pr1_Amt6 = sd(Pr1_Amt6, na.rm = TRUE) / sqrt(n()),
    
    .groups = "drop"
  )

summary_ranks_step_Pr6_Amt1 <- Ranking_per_Step_Pr6_Amt1 %>%
  filter(step < 10) %>%
  group_by(step) %>%
  dplyr::summarise(
    mean_Pr6_Amt1 = mean(Pr6_Amt1, na.rm = TRUE),
    se_Pr6_Amt1 = sd(Pr6_Amt1, na.rm = TRUE) / sqrt(n()),

    mean_Pr5_Amt2 = mean(Pr5_Amt2, na.rm = TRUE),
    se_Pr5_Amt2 = sd(Pr5_Amt2, na.rm = TRUE) / sqrt(n()),
    
    mean_Pr4_Amt3 = mean(Pr4_Amt3, na.rm = TRUE),
    se_Pr4_Amt3 = sd(Pr4_Amt3, na.rm = TRUE) / sqrt(n()),
    
    mean_Pr3_Amt4 = mean(Pr3_Amt4, na.rm = TRUE),
    se_Pr3_Amt4 = sd(Pr3_Amt4, na.rm = TRUE) / sqrt(n()),
    
    mean_Pr2_Amt5 = mean(Pr2_Amt5, na.rm = TRUE),
    se_Pr2_Amt5 = sd(Pr2_Amt5, na.rm = TRUE) / sqrt(n()),
    
    mean_Pr1_Amt6 = mean(Pr1_Amt6, na.rm = TRUE),
    se_Pr1_Amt6 = sd(Pr1_Amt6, na.rm = TRUE) / sqrt(n()),
    
    .groups = "drop"
  )

summary_ranks_step_remaining <- Ranking_per_Step_remaining %>%
  filter(step < 8) %>%
  group_by(step) %>%
  dplyr::summarise(
    mean_Pr6_Amt1 = mean(Pr6_Amt1, na.rm = TRUE),
    se_Pr6_Amt1 = sd(Pr6_Amt1, na.rm = TRUE) / sqrt(n()),

    mean_Pr5_Amt2 = mean(Pr5_Amt2, na.rm = TRUE),
    se_Pr5_Amt2 = sd(Pr5_Amt2, na.rm = TRUE) / sqrt(n()),
    
    mean_Pr4_Amt3 = mean(Pr4_Amt3, na.rm = TRUE),
    se_Pr4_Amt3 = sd(Pr4_Amt3, na.rm = TRUE) / sqrt(n()),
    
    mean_Pr3_Amt4 = mean(Pr3_Amt4, na.rm = TRUE),
    se_Pr3_Amt4 = sd(Pr3_Amt4, na.rm = TRUE) / sqrt(n()),
    
    mean_Pr2_Amt5 = mean(Pr2_Amt5, na.rm = TRUE),
    se_Pr2_Amt5 = sd(Pr2_Amt5, na.rm = TRUE) / sqrt(n()),
    
    mean_Pr1_Amt6 = mean(Pr1_Amt6, na.rm = TRUE),
    se_Pr1_Amt6 = sd(Pr1_Amt6, na.rm = TRUE) / sqrt(n()),
    
    .groups = "drop"
  )

## Long DFs
summary_ranks_step_Pr1_Amt6_long <- summary_ranks_step_Pr1_Amt6 %>%
  pivot_longer(
    cols = -step,
    names_to = c(".value", "label"),
    names_pattern = "^(mean|se)_(.*)$"
  ) %>%
  rename(
    mean_rank = mean,
    mean_se = se
  )

summary_ranks_step_Pr6_Amt1_long <- summary_ranks_step_Pr6_Amt1 %>%
  pivot_longer(
    cols = -step,
    names_to = c(".value", "label"),
    names_pattern = "^(mean|se)_(.*)$"
  ) %>%
  rename(
    mean_rank = mean,
    mean_se = se
  )

summary_ranks_step_remaining_long <- summary_ranks_step_remaining %>%
  pivot_longer(
    cols = -step,
    names_to = c(".value", "label"),
    names_pattern = "^(mean|se)_(.*)$"
  ) %>%
  rename(
    mean_rank = mean,
    mean_se = se
  )

# Shapes
bet_shapes <-  c("Pr6_Amt1" = 21, "Pr5_Amt2" = 22, "Pr4_Amt3" = 23, "Pr3_Amt4" = 24, "Pr2_Amt5" = 25, "Pr1_Amt6" = 11)

# Create plot
S_Bet_Profile1 <- ggplot(summary_ranks_step_Pr1_Amt6_long, 
                        aes(x = step, y = mean_rank)) +   
  geom_line(aes(color = label), size = 1) +
  geom_point(aes(fill = label, shape = label), size = 3) +
  geom_errorbar(aes(ymin = mean_rank - mean_se, ymax = mean_rank + mean_se), 
                width = 0.2, color = "black") +
  scale_color_manual(values = bet_colors) +
  scale_fill_manual(values = bet_colors) +
  scale_shape_manual(values = bet_shapes) +
  scale_x_continuous(breaks = 0:9) +
  scale_y_continuous(limits = c(1, 6), breaks = 1:6) +  
  labs(x = "Ranking Step", y = "Mean Rank", 
       title = "Mean of Ranks by Step - $-Bet Profile\n(Preference Task 1)", 
       color = "Label", shape = "Label", fill = "Label") +
  theme_minimal() +
  theme(legend.position = "bottom", 
        plot.title = element_text(hjust = 0.5))


P_Bet_Profile1 <- ggplot(summary_ranks_step_Pr6_Amt1_long, 
                        aes(x = step, y = mean_rank)) +   
  geom_line(aes(color = label), size = 1) +
  geom_point(aes(fill = label, shape = label), size = 3) +
  geom_errorbar(aes(ymin = mean_rank - mean_se, ymax = mean_rank + mean_se), 
                width = 0.2, color = "black") +
  scale_color_manual(values = bet_colors) +
  scale_fill_manual(values = bet_colors) +
  scale_shape_manual(values = bet_shapes) +
  scale_x_continuous(breaks = 0:9) +
  scale_y_continuous(limits = c(1, 6), breaks = 1:6) +  
  labs(x = "Ranking Step", y = "Mean Rank", 
       title = "Mean of Ranks by Step - P-Bet Profile\n(Preference Task 1)", 
       color = "Label", shape = "Label", fill = "Label") +
  theme_minimal() +
  theme(legend.position = "bottom", 
        plot.title = element_text(hjust = 0.5))


remaining_Profile1 <- ggplot(summary_ranks_step_remaining_long, 
                        aes(x = step, y = mean_rank)) +   
  geom_line(aes(color = label), size = 1) +
  geom_point(aes(fill = label, shape = label), size = 3) +
  geom_errorbar(aes(ymin = mean_rank - mean_se, ymax = mean_rank + mean_se), 
                width = 0.2, color = "black") +
  scale_color_manual(values = bet_colors) +
  scale_fill_manual(values = bet_colors) +
  scale_shape_manual(values = bet_shapes) +
  scale_x_continuous(breaks = 0:9) +
  scale_y_continuous(limits = c(1, 6), breaks = 1:6) +
  labs(x = "Ranking Step", y = "Mean Rank", 
       title = "Mean of Ranks by Step - Unclear Profile\n(Preference Task 1)", 
       color = "Label", shape = "Label", fill = "Label") +
  theme_minimal() +
  theme(legend.position = "bottom", 
        plot.title = element_text(hjust = 0.5))


# Suppress individual legends
S_Bet_Profile1 <- S_Bet_Profile1 + theme(legend.position = "bottom")
P_Bet_Profile1 <- P_Bet_Profile1 + theme(legend.position = "bottom")

# Combine the plots with shared legend
combined_plot1 <- S_Bet_Profile1 + P_Bet_Profile1 +
  plot_layout(guides = "collect") & 
  theme(legend.position = "bottom")

# Display combined plot
combined_plot1

remaining_Profile1

Distribution of Drag Count by item

The following set of histograms illustrates the number of times each item is touched.

drag_drop_counts_Prefer1 <- drag_and_drop_count_Prefer1_long %>%
  count(item.f,N) %>%
  group_by(item.f)%>%
  mutate(percentage = n / sum(n) * 100,
         condition="Amount")%>%
  ungroup()


ggplot(drag_drop_counts_Prefer1, aes(x = factor(N), y = n)) +
  geom_bar(
    stat = "identity",
    color = "black"
  ) +
  geom_text(
    aes(
      label = paste0(n, " (", round(percentage, 1), "%)")
    ),
    vjust = -0.5,
    size = 5,
    fontface="bold") +
  labs(
    title = "Drag Count by item and Quiz Condition",
    x = "Drag Count",
    y = "Frequency"
  ) +
  theme_minimal() +
  theme(
    strip.text = element_text(size = 12, face = "bold"),  # Increased size and bold text
    plot.title = element_text(hjust = 0.5),
    axis.title = element_text(size = 12),  # Adjust axis titles size if needed
    axis.text = element_text(size = 10)    # Adjust axis labels size if needed
  ) +
  facet_wrap(~ item.f,ncol=2) +
  ylim(0, 150)

Correlation with Attribute Values

Note on attribute rank coding: Across Amount and Prob Tasks, greater value indicates higher rank (i.e., 1=item in the bottom and 6=item at the top.)

summary_data_Prefer1 <- drag_and_drop_count_Prefer1_long%>%
  dplyr::group_by(condition, item.f) %>%
  dplyr::summarize(drag_mean = mean(N_ind, na.rm = TRUE),
            drag_sd = sd(N_ind, na.rm = TRUE),
            n = n(),
            se = drag_sd / sqrt(n),  # Standard error
            .groups = "drop",
            Avg.Amount=mean(Amt),
            Avg.Prob=mean(Prob))

ggplot(summary_data_Prefer1, aes(x = Avg.Amount, y = drag_mean, label = item.f)) +
  geom_point(size = 3, color = "black") +
  geom_text(vjust = -1, hjust = 1) +
  theme_minimal() +
  labs(title = "Drag Count and Amount Attribute", subtitle = "1ST Preference Task", x = "Avg. Amt", y = "Avg. Drag Count Indicator") +
  theme(axis.title = element_text(face = "bold"), 
        plot.subtitle = element_text(hjust = 0.5), 
        plot.title = element_text(face = "bold", hjust = 0.5))+
  geom_smooth(method = "lm", se = FALSE, color = "blue", linetype = "dashed") 

ggplot(summary_data_Prefer1, aes(x = Avg.Prob, y = drag_mean, label = item.f)) +
  geom_point(size = 3, color = "black") +
  geom_text(vjust = -1, hjust = 1) +
  theme_minimal() +
  labs(title = "Drag Count and Prob Attribute", subtitle = "1ST Preference Task", x = "Avg. Prob", y = "Avg. Drag Count Indicator") +
  theme(axis.title = element_text(face = "bold"), 
        plot.subtitle = element_text(hjust = 0.5), 
        plot.title = element_text(face = "bold", hjust = 0.5))+
  geom_smooth(method = "lm", se = FALSE, color = "blue", linetype = "dashed") 

Distribution of Drag Order by item

touch_order_pref1 <- touch_order_analysis.long_pref1 %>%
  count(item.f,order,condition) %>%
  group_by(item.f)%>%
  mutate(percentage = n / sum(n) * 100)%>%
  ungroup()

ggplot(touch_order_pref1, aes(x = factor(order), y = n)) +
  geom_bar(
    stat = "identity",
    color = "black"
  ) +
  geom_text(
    aes(
      label = paste0(n, " (", round(percentage, 1), "%)")
    ),
    vjust = -0.5,
    size = 5,
    fontface="bold"
  ) +
  labs(
    title = "Drag Order by item and Condition",
    x = "Drag Order",
    y = "Frequency"
  ) +
  theme_minimal() +
  theme(
    strip.text = element_text(size = 12, face = "bold"),  # Facet label adjustments
    plot.title = element_text(hjust = 0.5, face = "bold"),
    axis.title = element_text(size = 12),
    axis.text = element_text(size = 10)
  ) +
  facet_wrap(~ item.f ,ncol = 2) +
  ylim(0, 100)

Correlation with Attribute Values

summary_data_pref1 <- touch_order_analysis.long_pref1%>%
  dplyr::group_by(condition, item.f) %>%
  dplyr::summarize(order_mean = mean(order, na.rm = TRUE),
            order_sd = sd(order, na.rm = TRUE),
            n = n(),
            se = order_sd / sqrt(n),  # Standard error
            .groups = "drop",
            Avg.Amount=mean(Amt),
            Avg.Prob=mean(Prob))

ggplot(summary_data_pref1, aes(x = Avg.Amount, y = order_mean, label = item.f)) +
  geom_point(size = 3, color = "black") +
  geom_text(vjust = -1, hjust = 1) +
  theme_minimal() +
  labs(title = "Drag Order and Amount Attribute", subtitle = "1ST Preference Task", x = "Avg. Amt", y = "Avg. Drag Order") +
  theme(axis.title = element_text(face = "bold"),
        plot.subtitle = element_text(hjust = 0.5),
        plot.title = element_text(face = "bold", hjust = 0.5))+
  geom_smooth(method = "lm", se = FALSE, color = "blue", linetype = "dashed")

ggplot(summary_data_pref1, aes(x = Avg.Prob, y = order_mean, label = item.f)) +
  geom_point(size = 3, color = "black") +
  geom_text(vjust = -1, hjust = 1) +
  theme_minimal() +
  labs(title = "Drag Order and Prob Attribute", subtitle = "1ST Preference Task", x = "Avg. Prob", y = "Avg. Drag Order") +
  theme(axis.title = element_text(face = "bold"),
        plot.subtitle = element_text(hjust = 0.5),
        plot.title = element_text(face = "bold", hjust = 0.5))+
  geom_smooth(method = "lm", se = FALSE, color = "blue", linetype = "dashed") 

Distribution of Drag Distance

summary_stats <- Distance_pref1.cleanup.df %>%
  group_by(item.f) %>%
  dplyr::summarize(
    mean_distance = mean(distance, na.rm = TRUE),
    median_distance = median(distance, na.rm = TRUE)
  )

Distance_pref1.cleanup.df$item.f<- factor(Distance_pref2.cleanup.df$item.f, levels = rev(c(  "Pr6_Amt1","Pr5_Amt2", "Pr4_Amt3","Pr3_Amt4", "Pr2_Amt5","Pr1_Amt6")), ordered = TRUE)



ggplot(Distance_pref1.cleanup.df ,
       aes(x = -distance, fill = item.f)) +
  geom_histogram(binwidth = 1, alpha = 0.3, position = "identity") +
  labs(
    title = "Distribution of Drag Distance - pref1 Task",
    x = "Distance",
    y = "Count",
    fill = "item"
  ) +
  theme_minimal()+
  facet_grid(~item.f)+
  xlim(6,-6)+
    scale_fill_manual(values = bet_colors)

Descriptives - 2ND Preference Task

rank_data2 <- dat %>%
  select(
    `49` = rank_pref2_49,
    `50` = rank_pref2_50,
    `64` = rank_pref2_64,
    `65` = rank_pref2_65,
    `67` = rank_pref2_67,
    `68` = rank_pref2_68
  ) %>%
  pivot_longer(
    everything(),
    names_to = "item_moved",
    values_to = "Subj_rank"
  ) %>%
  mutate(
    item_moved = as.integer(item_moved),
    item_label = case_when(
      item_moved == 49 ~ "Pr6_Amt1",
      item_moved == 50 ~ "Pr5_Amt2",
      item_moved == 64 ~ "Pr4_Amt3",
      item_moved == 65 ~ "Pr3_Amt4",
      item_moved == 67 ~ "Pr2_Amt5",
      item_moved == 68 ~ "Pr1_Amt6"
    ),
    item_label = factor(item_label, levels = c("Pr6_Amt1", "Pr5_Amt2", "Pr4_Amt3", "Pr3_Amt4", "Pr2_Amt5", "Pr1_Amt6"))
  )


ggplot(rank_data2, aes(x = item_label, y = Subj_rank)) +  # reverse coded so 6 = top rank
  geom_violin(trim = FALSE, fill = "lightblue", color = "darkblue", alpha = 0.5) +
  geom_jitter(width = 0.15, height = 0, alpha = 0.6, size = 1.5, color = "black") +
  stat_summary(fun = mean, geom = "point", shape = 21, size = 3, fill = "red", color = "red") +
  labs(
    title = "Distribution of pref2 Ranks by Item",
    x = "Item",
    y = "Rank"
  ) +
  scale_y_continuous(breaks = 1:6, limits = c(1, 6)) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

What’s the distribution of final ranks?

# Step 1: Define the mapping
id_to_label <- c(
  "49" = "Pr6_Amt1",
  "50" = "Pr5_Amt2",
  "64" = "Pr4_Amt3",
  "65" = "Pr3_Amt4",
  "67" = "Pr2_Amt5",
  "68" = "Pr1_Amt6"
)

# Step 2: Select relevant columns and count number of 1s
rank_counts <- dat %>%
  select(starts_with("rank_pref2_")) %>%
  summarise(across(everything(), ~ sum(. == 1, na.rm = TRUE))) %>%
  pivot_longer(cols = everything(), names_to = "column", values_to = "n_order_1") %>%
  mutate(
    id = sub("rank_pref2_", "", column),
    item.f = id_to_label[id]
  ) %>%
  filter(!is.na(item.f))  # Only keep mapped items


# Step 3: Plot
ggplot(rank_counts, aes(x = item.f, y = n_order_1, fill = item.f)) +
  geom_col() +
  scale_fill_manual(values = bet_colors) +
  labs(
    title = "Count of Participants ranking the\nItems on first Position",
    x = "Item",
    y = "Count (Rank = 1)"
  ) +
  theme_minimal() +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1),
    plot.title = element_text(face = "bold", hjust = 0.5),
    legend.position = "none"
  )

How many people touched which lottery first?

order1_counts <- touch_order_analysis.long_pref2 %>%
  filter(order == 1) %>%            # Keep only rows where order == 1
  count(item.f, name = "n_order_1") # Count how many times each item.f appears

# Plot
ggplot(order1_counts, aes(x = item.f, y = n_order_1, fill = item.f)) +
  geom_col() +
  scale_fill_manual(values = bet_colors) +
  labs(
    title = "Count of Participants touching\neach Item first",
    x = "Item",
    y = "Count (Order = 1)"
  ) +
  theme_minimal() +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1),
    plot.title = element_text(face = "bold", hjust = 0.5),
    legend.position = "none"
  )

How do the ranking Profiles for these people look like?

# Only consider participants in the same "category", i.e., who arrived at the same final ranking. Let's look at the ones who ranked the highest amount/probability highest

# Only consider participants who ranked the highest Amount first
selected_ids_Pr1_Amt6 <- dat %>%
  filter(rank_pref2_68 == 1) %>%
  pull(ResponseId)

# Only consider participants who ranked the highest Probability first
selected_ids_Pr6_Amt1 <- dat %>%
  filter(rank_pref2_49 == 1) %>%
  pull(ResponseId)

# Remaining IDs
excluded_ids <- union(selected_ids_Pr1_Amt6, selected_ids_Pr6_Amt1)
remaining_ids <- dat %>%
  filter(!ResponseId %in% excluded_ids) %>%
  pull(ResponseId)


# Create Distance_Prefer2
Distance_Prefer2<-RankProcess_Prefer2 %>%
  group_by(ResponseId)%>%
  mutate(
    # Split the string into parts based on commas
    parts = str_split(order, ",")
  ) %>%
  mutate(
    Rank1 = sapply(parts, function(x) x[1]),  # Extract before 1st comma
    Rank2 = sapply(parts, function(x) x[2]),  # Extract before 2nd comma
    Rank3 = sapply(parts, function(x) x[3]),  # Extract before 3rd comma
    Rank4 = sapply(parts, function(x) x[4]),  # Extract before 4th comma
    Rank5 = sapply(parts, function(x) x[5]),  # Extract before 5th comma
    Rank6 = sapply(parts, function(x) ifelse(length(x) > 5, x[6], NA))  # Extract after 5th comma
  ) %>%
  select(-parts)

items_Prefer2 <- c("49", "50", "64", "65", "67", "68")
for (item in items_Prefer2) {
  Distance_Prefer2[[paste0("current_", item)]] <- NA_integer_
}

Distance_Prefer2 <- Distance_Prefer2 %>%
  rowwise() %>%
  mutate(
    across(
      starts_with("current_"),
      ~ {
        item_number <- str_remove(cur_column(), "current_")  # Extract the item number
        case_when(
          Rank1 == item_number ~ 1,
          Rank2 == item_number ~ 2,
          Rank3 == item_number ~ 3,
          Rank4 == item_number ~ 4,
          Rank5 == item_number ~ 5,
          Rank6 == item_number ~ 6,
          TRUE ~ 1 
        )
      }
    )
  ) %>%
  ungroup()

for (item in items_Prefer2) {
  Distance_Prefer2[[paste0("last_", item)]] <- lag(Distance_Prefer2[[paste0("current_", item)]])
}

Distance_Prefer2<-Distance_Prefer2%>%
  group_by(ResponseId)%>%
  rowwise() %>%
  mutate(
    current_item_moved = get(paste0("current_", item_moved)),  # Get the rank of the moved item from current columns
    last_item_moved = get(paste0("last_", item_moved)),        # Get the rank of the moved item from last columns
    # Determine the movement direction; we should not see any "no_change"
    move_direction = case_when(
      is.na(last_item_moved) ~ "no_change",  
      current_item_moved < last_item_moved ~ "up",
      current_item_moved > last_item_moved ~ "down",
      TRUE ~ "no_change"
    )
  ) %>%
  ungroup()


# Filter Distance_Prefer2 for those ResponseIds
Ranking_per_Step_Pr1_Amt6 <- Distance_Prefer2 %>%
  filter(ResponseId %in% selected_ids_Pr1_Amt6)

Ranking_per_Step_Pr6_Amt1 <- Distance_Prefer2 %>%
  filter(ResponseId %in% selected_ids_Pr6_Amt1)

Ranking_per_Step_remaining <- Distance_Prefer2 %>%
  filter(ResponseId %in% remaining_ids)


## Create new R columns
# Initialize R columns
Ranking_per_Step_Pr1_Amt6 <- Ranking_per_Step_Pr1_Amt6 %>% 
  mutate(Pr1_Amt6 = NA, Pr2_Amt5 = NA, Pr3_Amt4 = NA, Pr4_Amt3 = NA, Pr5_Amt2 = NA, Pr6_Amt1 = NA)

Ranking_per_Step_Pr6_Amt1 <- Ranking_per_Step_Pr6_Amt1 %>% 
  mutate(Pr1_Amt6 = NA, Pr2_Amt5 = NA, Pr3_Amt4 = NA, Pr4_Amt3 = NA, Pr5_Amt2 = NA, Pr6_Amt1 = NA)

Ranking_per_Step_remaining <- Ranking_per_Step_remaining %>% 
  mutate(Pr1_Amt6 = NA, Pr2_Amt5 = NA, Pr3_Amt4 = NA, Pr4_Amt3 = NA, Pr5_Amt2 = NA, Pr6_Amt1 = NA)

# Function to fill R columns
fill_R_columns <- function(order_string, target_value) {
  order_string <- gsub("[[:space:][:cntrl:]]", "", order_string)  # Clean spaces and newlines
  order_numbers <- strsplit(order_string, ",")[[1]]
  position_of_value <- which(order_numbers == as.character(target_value))
  
  if (length(position_of_value) == 0) {
    return(NA)
  } else if (position_of_value == 1) {
    return(1)
  } else {
    return(position_of_value)
  }
}

# Apply function to fill R columns
Ranking_per_Step_Pr1_Amt6 <- Ranking_per_Step_Pr1_Amt6 %>% 
  mutate(
    Pr6_Amt1 = sapply(order, fill_R_columns, target_value = 49),
    Pr5_Amt2 = sapply(order, fill_R_columns, target_value = 50),
    Pr4_Amt3 = sapply(order, fill_R_columns, target_value = 64),
    Pr3_Amt4 = sapply(order, fill_R_columns, target_value = 65),
    Pr2_Amt5 = sapply(order, fill_R_columns, target_value = 67),
    Pr1_Amt6 = sapply(order, fill_R_columns, target_value = 68),
  )

Ranking_per_Step_Pr6_Amt1 <- Ranking_per_Step_Pr6_Amt1 %>% 
  mutate(
    Pr6_Amt1 = sapply(order, fill_R_columns, target_value = 49),
    Pr5_Amt2 = sapply(order, fill_R_columns, target_value = 50),
    Pr4_Amt3 = sapply(order, fill_R_columns, target_value = 64),
    Pr3_Amt4 = sapply(order, fill_R_columns, target_value = 65),
    Pr2_Amt5 = sapply(order, fill_R_columns, target_value = 67),
    Pr1_Amt6 = sapply(order, fill_R_columns, target_value = 68),
  )

Ranking_per_Step_remaining <- Ranking_per_Step_remaining %>% 
  mutate(
    Pr6_Amt1 = sapply(order, fill_R_columns, target_value = 49),
    Pr5_Amt2 = sapply(order, fill_R_columns, target_value = 50),
    Pr4_Amt3 = sapply(order, fill_R_columns, target_value = 64),
    Pr3_Amt4 = sapply(order, fill_R_columns, target_value = 65),
    Pr2_Amt5 = sapply(order, fill_R_columns, target_value = 67),
    Pr1_Amt6 = sapply(order, fill_R_columns, target_value = 68),
  )




# Ensure at least 9 rows per ResponseId
Ranking_per_Step_Pr1_Amt6 <- Ranking_per_Step_Pr1_Amt6 %>% group_by(ResponseId) %>% 
  group_modify(~ {
    while (nrow(.x) < 10) {
      max_step_row <- .x %>% filter(step == max(step))
      .x <- bind_rows(.x, max_step_row %>% mutate(step = max(step) + 1))
    }
    return(.x)
  }) %>% ungroup()

Ranking_per_Step_Pr6_Amt1 <- Ranking_per_Step_Pr6_Amt1 %>% group_by(ResponseId) %>% 
  group_modify(~ {
    while (nrow(.x) < 10) {
      max_step_row <- .x %>% filter(step == max(step))
      .x <- bind_rows(.x, max_step_row %>% mutate(step = max(step) + 1))
    }
    return(.x)
  }) %>% ungroup()

Ranking_per_Step_remaining <- Ranking_per_Step_remaining %>% group_by(ResponseId) %>% 
  group_modify(~ {
    while (nrow(.x) < 8) {
      max_step_row <- .x %>% filter(step == max(step))
      .x <- bind_rows(.x, max_step_row %>% mutate(step = max(step) + 1))
    }
    return(.x)
  }) %>% ungroup()


## Summary DFs
summary_ranks_step_Pr1_Amt6 <- Ranking_per_Step_Pr1_Amt6 %>%
  filter(step < 10) %>%
  group_by(step) %>%
  dplyr::summarise(
    mean_Pr6_Amt1 = mean(Pr6_Amt1, na.rm = TRUE),
    se_Pr6_Amt1 = sd(Pr6_Amt1, na.rm = TRUE) / sqrt(n()),

    mean_Pr5_Amt2 = mean(Pr5_Amt2, na.rm = TRUE),
    se_Pr5_Amt2 = sd(Pr5_Amt2, na.rm = TRUE) / sqrt(n()),
    
    mean_Pr4_Amt3 = mean(Pr4_Amt3, na.rm = TRUE),
    se_Pr4_Amt3 = sd(Pr4_Amt3, na.rm = TRUE) / sqrt(n()),
    
    mean_Pr3_Amt4 = mean(Pr3_Amt4, na.rm = TRUE),
    se_Pr3_Amt4 = sd(Pr3_Amt4, na.rm = TRUE) / sqrt(n()),
    
    mean_Pr2_Amt5 = mean(Pr2_Amt5, na.rm = TRUE),
    se_Pr2_Amt5 = sd(Pr2_Amt5, na.rm = TRUE) / sqrt(n()),
    
    mean_Pr1_Amt6 = mean(Pr1_Amt6, na.rm = TRUE),
    se_Pr1_Amt6 = sd(Pr1_Amt6, na.rm = TRUE) / sqrt(n()),
    
    .groups = "drop"
  )

summary_ranks_step_Pr6_Amt1 <- Ranking_per_Step_Pr6_Amt1 %>%
  filter(step < 10) %>%
  group_by(step) %>%
  dplyr::summarise(
    mean_Pr6_Amt1 = mean(Pr6_Amt1, na.rm = TRUE),
    se_Pr6_Amt1 = sd(Pr6_Amt1, na.rm = TRUE) / sqrt(n()),

    mean_Pr5_Amt2 = mean(Pr5_Amt2, na.rm = TRUE),
    se_Pr5_Amt2 = sd(Pr5_Amt2, na.rm = TRUE) / sqrt(n()),
    
    mean_Pr4_Amt3 = mean(Pr4_Amt3, na.rm = TRUE),
    se_Pr4_Amt3 = sd(Pr4_Amt3, na.rm = TRUE) / sqrt(n()),
    
    mean_Pr3_Amt4 = mean(Pr3_Amt4, na.rm = TRUE),
    se_Pr3_Amt4 = sd(Pr3_Amt4, na.rm = TRUE) / sqrt(n()),
    
    mean_Pr2_Amt5 = mean(Pr2_Amt5, na.rm = TRUE),
    se_Pr2_Amt5 = sd(Pr2_Amt5, na.rm = TRUE) / sqrt(n()),
    
    mean_Pr1_Amt6 = mean(Pr1_Amt6, na.rm = TRUE),
    se_Pr1_Amt6 = sd(Pr1_Amt6, na.rm = TRUE) / sqrt(n()),
    
    .groups = "drop"
  )

summary_ranks_step_remaining <- Ranking_per_Step_remaining %>%
  filter(step < 8) %>%
  group_by(step) %>%
  dplyr::summarise(
    mean_Pr6_Amt1 = mean(Pr6_Amt1, na.rm = TRUE),
    se_Pr6_Amt1 = sd(Pr6_Amt1, na.rm = TRUE) / sqrt(n()),

    mean_Pr5_Amt2 = mean(Pr5_Amt2, na.rm = TRUE),
    se_Pr5_Amt2 = sd(Pr5_Amt2, na.rm = TRUE) / sqrt(n()),
    
    mean_Pr4_Amt3 = mean(Pr4_Amt3, na.rm = TRUE),
    se_Pr4_Amt3 = sd(Pr4_Amt3, na.rm = TRUE) / sqrt(n()),
    
    mean_Pr3_Amt4 = mean(Pr3_Amt4, na.rm = TRUE),
    se_Pr3_Amt4 = sd(Pr3_Amt4, na.rm = TRUE) / sqrt(n()),
    
    mean_Pr2_Amt5 = mean(Pr2_Amt5, na.rm = TRUE),
    se_Pr2_Amt5 = sd(Pr2_Amt5, na.rm = TRUE) / sqrt(n()),
    
    mean_Pr1_Amt6 = mean(Pr1_Amt6, na.rm = TRUE),
    se_Pr1_Amt6 = sd(Pr1_Amt6, na.rm = TRUE) / sqrt(n()),
    
    .groups = "drop"
  )

## Long DFs
summary_ranks_step_Pr1_Amt6_long <- summary_ranks_step_Pr1_Amt6 %>%
  pivot_longer(
    cols = -step,
    names_to = c(".value", "label"),
    names_pattern = "^(mean|se)_(.*)$"
  ) %>%
  rename(
    mean_rank = mean,
    mean_se = se
  )

summary_ranks_step_Pr6_Amt1_long <- summary_ranks_step_Pr6_Amt1 %>%
  pivot_longer(
    cols = -step,
    names_to = c(".value", "label"),
    names_pattern = "^(mean|se)_(.*)$"
  ) %>%
  rename(
    mean_rank = mean,
    mean_se = se
  )

summary_ranks_step_remaining_long <- summary_ranks_step_remaining %>%
  pivot_longer(
    cols = -step,
    names_to = c(".value", "label"),
    names_pattern = "^(mean|se)_(.*)$"
  ) %>%
  rename(
    mean_rank = mean,
    mean_se = se
  )

# Shapes
bet_shapes <-  c("Pr6_Amt1" = 21, "Pr5_Amt2" = 22, "Pr4_Amt3" = 23, "Pr3_Amt4" = 24, "Pr2_Amt5" = 25, "Pr1_Amt6" = 11)

# Create plot
S_Bet_Profile <- ggplot(summary_ranks_step_Pr1_Amt6_long, 
                        aes(x = step, y = mean_rank)) +   
  geom_line(aes(color = label), size = 1) +
  geom_point(aes(fill = label, shape = label), size = 3) +
  geom_errorbar(aes(ymin = mean_rank - mean_se, ymax = mean_rank + mean_se), 
                width = 0.2, color = "black") +
  scale_color_manual(values = bet_colors) +
  scale_fill_manual(values = bet_colors) +
  scale_shape_manual(values = bet_shapes) +
  scale_x_continuous(breaks = 0:9) +
  scale_y_continuous(limits = c(1, 6), breaks = 1:6) +  # 👈 Fixed y-axis
  labs(x = "Ranking Step", y = "Mean Rank", 
       title = "Mean of Ranks by Step - $-Bet Profile", 
       color = "Label", shape = "Label", fill = "Label") +
  theme_minimal() +
  theme(legend.position = "bottom", 
        plot.title = element_text(hjust = 0.5))


P_Bet_Profile <- ggplot(summary_ranks_step_Pr6_Amt1_long, 
                        aes(x = step, y = mean_rank)) +   
  geom_line(aes(color = label), size = 1) +
  geom_point(aes(fill = label, shape = label), size = 3) +
  geom_errorbar(aes(ymin = mean_rank - mean_se, ymax = mean_rank + mean_se), 
                width = 0.2, color = "black") +
  scale_color_manual(values = bet_colors) +
  scale_fill_manual(values = bet_colors) +
  scale_shape_manual(values = bet_shapes) +
  scale_x_continuous(breaks = 0:9) +
  scale_y_continuous(limits = c(1, 6), breaks = 1:6) +  # Fixed y-axis
  labs(x = "Ranking Step", y = "Mean Rank", 
       title = "Mean of Ranks by Step - P-Bet Profile", 
       color = "Label", shape = "Label", fill = "Label") +
  theme_minimal() +
  theme(legend.position = "bottom", 
        plot.title = element_text(hjust = 0.5))


remaining_Profile <- ggplot(summary_ranks_step_remaining_long, 
                        aes(x = step, y = mean_rank)) +   
  geom_line(aes(color = label), size = 1) +
  geom_point(aes(fill = label, shape = label), size = 3) +
  geom_errorbar(aes(ymin = mean_rank - mean_se, ymax = mean_rank + mean_se), 
                width = 0.2, color = "black") +
  scale_color_manual(values = bet_colors) +
  scale_fill_manual(values = bet_colors) +
  scale_shape_manual(values = bet_shapes) +
  scale_x_continuous(breaks = 0:9) +
  scale_y_continuous(limits = c(1, 6), breaks = 1:6) +  # 👈 Fixed y-axis
  labs(x = "Ranking Step", y = "Mean Rank", 
       title = "Mean of Ranks by Step - Unclear Profile", 
       color = "Label", shape = "Label", fill = "Label") +
  theme_minimal() +
  theme(legend.position = "bottom", 
        plot.title = element_text(hjust = 0.5))


# Suppress individual legends
S_Bet_Profile <- S_Bet_Profile + theme(legend.position = "bottom")
P_Bet_Profile <- P_Bet_Profile + theme(legend.position = "bottom")

# Combine the plots with shared legend
combined_plot <- S_Bet_Profile + P_Bet_Profile +
  plot_layout(guides = "collect") & 
  theme(legend.position = "bottom")

# Display combined plot
combined_plot

remaining_Profile

Distribution of Drag Count by item

The following set of histograms illustrates the number of times each item is touched.

drag_drop_counts_Prefer2 <- drag_and_drop_count_Prefer2_long %>%
  count(item.f,N) %>%
  group_by(item.f)%>%
  mutate(percentage = n / sum(n) * 100,
         condition="Amount")%>%
  ungroup()


ggplot(drag_drop_counts_Prefer2, aes(x = factor(N), y = n)) +
  geom_bar(
    stat = "identity",
    color = "black"
  ) +
  geom_text(
    aes(
      label = paste0(n, " (", round(percentage, 1), "%)")
    ),
    vjust = -0.5,
    size = 5,
    fontface="bold") +
  labs(
    title = "Drag Count by item and Quiz Condition",
    x = "Drag Count",
    y = "Frequency"
  ) +
  theme_minimal() +
  theme(
    strip.text = element_text(size = 12, face = "bold"),  # Increased size and bold text
    plot.title = element_text(hjust = 0.5),
    axis.title = element_text(size = 12),  # Adjust axis titles size if needed
    axis.text = element_text(size = 10)    # Adjust axis labels size if needed
  ) +
  facet_wrap(~ item.f,ncol=2) +
  ylim(0, 150)

Correlation with Attribute Values

Note on attribute rank coding: Across Amount and Prob Tasks, greater value indicates higher rank (i.e., 1=item in the bottom and 6=item at the top.)

summary_data_Prefer2 <- drag_and_drop_count_Prefer2_long%>%
  dplyr::group_by(condition, item.f) %>%
  dplyr::summarize(drag_mean = mean(N_ind, na.rm = TRUE),
            drag_sd = sd(N_ind, na.rm = TRUE),
            n = n(),
            se = drag_sd / sqrt(n),  # Standard error
            .groups = "drop",
            Avg.Amount=mean(Amt),
            Avg.Prob=mean(Prob))

ggplot(summary_data_Prefer2, aes(x = Avg.Amount, y = drag_mean, label = item.f)) +
  geom_point(size = 3, color = "black") +
  geom_text(vjust = -1, hjust = 1) +
  theme_minimal() +
  labs(title = "Drag Count and Amount Attribute", subtitle = "2ND Preference Task", x = "Avg. Amt", y = "Avg. Drag Count Indicator") +
  theme(axis.title = element_text(face = "bold"), 
        plot.subtitle = element_text(hjust = 0.5), 
        plot.title = element_text(face = "bold", hjust = 0.5))+
  geom_smooth(method = "lm", se = FALSE, color = "blue", linetype = "dashed") 

ggplot(summary_data_Prefer2, aes(x = Avg.Prob, y = drag_mean, label = item.f)) +
  geom_point(size = 3, color = "black") +
  geom_text(vjust = -1, hjust = 1) +
  theme_minimal() +
  labs(title = "Drag Count and Prob Attribute", subtitle = "2ND Preference Task", x = "Avg. Prob", y = "Avg. Drag Count Indicator") +
  theme(axis.title = element_text(face = "bold"), 
        plot.subtitle = element_text(hjust = 0.5), 
        plot.title = element_text(face = "bold", hjust = 0.5))+
  geom_smooth(method = "lm", se = FALSE, color = "blue", linetype = "dashed") 

Distribution of Drag Order by item

touch_order_pref2 <- touch_order_analysis.long_pref2 %>%
  count(item.f,order,condition) %>%
  group_by(item.f)%>%
  mutate(percentage = n / sum(n) * 100)%>%
  ungroup()

ggplot(touch_order_pref2, aes(x = factor(order), y = n)) +
  geom_bar(
    stat = "identity",
    color = "black"
  ) +
  geom_text(
    aes(
      label = paste0(n, " (", round(percentage, 1), "%)")
    ),
    vjust = -0.5,
    size = 5,
    fontface="bold"
  ) +
  labs(
    title = "Drag Order by item and Condition",
    x = "Drag Order",
    y = "Frequency"
  ) +
  theme_minimal() +
  theme(
    strip.text = element_text(size = 12, face = "bold"),  # Facet label adjustments
    plot.title = element_text(hjust = 0.5, face = "bold"),
    axis.title = element_text(size = 12),
    axis.text = element_text(size = 10)
  ) +
  facet_wrap(~ item.f ,ncol = 2) +
  ylim(0, 100)

Correlation with Attribute Values

summary_data_pref2 <- touch_order_analysis.long_pref2%>%
  dplyr::group_by(condition, item.f) %>%
  dplyr::summarize(order_mean = mean(order, na.rm = TRUE),
            order_sd = sd(order, na.rm = TRUE),
            n = n(),
            se = order_sd / sqrt(n),  # Standard error
            .groups = "drop",
            Avg.Amount=mean(Amt),
            Avg.Prob=mean(Prob))
ggplot(summary_data_pref2, aes(x = Avg.Amount, y = order_mean, label = item.f)) +
  geom_point(size = 3, color = "black") +
  geom_text(vjust = -1, hjust = 1) +
  theme_minimal() +
  labs(title = "Drag Order and Amount Attribute", subtitle = "2ND Preference Task", x = "Avg. Amt", y = "Avg. Drag Order") +
  theme(axis.title = element_text(face = "bold"),
        plot.subtitle = element_text(hjust = 0.5),
        plot.title = element_text(face = "bold", hjust = 0.5))+
  geom_smooth(method = "lm", se = FALSE, color = "blue", linetype = "dashed")

ggplot(summary_data_pref2, aes(x = Avg.Prob, y = order_mean, label = item.f)) +
  geom_point(size = 3, color = "black") +
  geom_text(vjust = -1, hjust = 1) +
  theme_minimal() +
  labs(title = "Drag Order and Prob Attribute", subtitle = "2ND Preference Task", x = "Avg. Prob", y = "Avg. Drag Order") +
  theme(axis.title = element_text(face = "bold"),
        plot.subtitle = element_text(hjust = 0.5),
        plot.title = element_text(face = "bold", hjust = 0.5))+
  geom_smooth(method = "lm", se = FALSE, color = "blue", linetype = "dashed") 

Distribution of Drag Distance

summary_stats <- Distance_pref2.cleanup.df %>%
  group_by(item.f) %>%
  dplyr::summarize(
    mean_distance = mean(distance, na.rm = TRUE),
    median_distance = median(distance, na.rm = TRUE)
  )

Distance_pref2.cleanup.df$item.f<- factor(Distance_pref2.cleanup.df$item.f, levels = rev(c(  "Pr6_Amt1","Pr5_Amt2", "Pr4_Amt3","Pr3_Amt4", "Pr2_Amt5","Pr1_Amt6")), ordered = TRUE)



ggplot(Distance_pref2.cleanup.df ,
       aes(x = -distance, fill = item.f)) +
  geom_histogram(binwidth = 1, alpha = 0.3, position = "identity") +
  labs(
    title = "Distribution of Drag Distance - pref2 Task",
    x = "Distance",
    y = "Count",
    fill = "item"
  ) +
  theme_minimal()+
  facet_grid(~item.f)+
  xlim(6,-6)+
    scale_fill_manual(values = bet_colors)

Shared Visualizations

DV 1: Drag Count

summary_data_Prefer1_ind<- drag_and_drop_count_Prefer1_long %>%
  mutate(N=case_when(
    N==0~0,
    TRUE~1
  ))%>%
  dplyr::group_by(condition, item.f) %>%
  dplyr::summarize(drag_drop_mean = mean(N, na.rm = TRUE),
            drag_drop_sd = sd(N, na.rm = TRUE),
            n = n(),
            se = drag_drop_sd / sqrt(n),
            .groups = "drop")

summary_data_Prefer2_ind<- drag_and_drop_count_Prefer2_long %>%
  mutate(N=case_when(
    N==0~0,
    TRUE~1
  ))%>%
  dplyr::group_by(condition, item.f) %>%
  dplyr::summarize(drag_drop_mean = mean(N, na.rm = TRUE),
            drag_drop_sd = sd(N, na.rm = TRUE),
            n = n(),
            se = drag_drop_sd / sqrt(n),
            .groups = "drop")

# Combine the two data frames into one
summary_data_pref_combined <- bind_rows(summary_data_Prefer1_ind, summary_data_Prefer2_ind)

# 
summary_data_pref_combined$item.f = factor(summary_data_combined_ind$item.f, levels = rev(c(  "Pr6_Amt1","Pr5_Amt2", "Pr4_Amt3","Pr3_Amt4", "Pr2_Amt5","Pr1_Amt6")), ordered = TRUE)


# Plot
ggplot(summary_data_pref_combined, aes(x = condition, y = drag_drop_mean, 
                                      group = item.f, color = item.f, shape = item.f)) +
  geom_line(linewidth = 1, position = position_dodge(0.3)) +
  geom_point(size = 6, position = position_dodge(0.3)) +
  geom_errorbar(
    aes(
      ymin = drag_drop_mean - se,
      ymax = drag_drop_mean + se
    ),
    width = 0.2,
    position = position_dodge(0.3)
  ) +
  labs(
    x = "Condition",
    y = "Mean ± SE Drag Count",
    title = "Mean Drag Count by Condition"
  ) +
  scale_color_manual(values = bet_colors) +  
  scale_shape_manual(values = c("Pr6_Amt1" = 21, "Pr5_Amt2" = 22, 
                                "Pr4_Amt3" = 23, "Pr3_Amt4" = 24, 
                                "Pr2_Amt5" = 25, "Pr1_Amt6" = 11)) +
  theme_minimal() +
  theme(
    legend.position = "top", # Place legend at the top
    legend.title = element_text(face = "bold"),
    axis.title = element_text(face = "bold"),
    plot.subtitle = element_text(hjust = 0.5),
    plot.title = element_text(face = "bold", hjust = 0.5)
  )

DV 2: Drag Order

summary_data_Prefer1<- touch_order_analysis.long_pref1%>%
  dplyr::group_by(condition, item.f) %>%
  dplyr::summarize(order_mean = mean(order, na.rm = TRUE),
            order_sd = sd(order, na.rm = TRUE),
            n = n(),
            se = order_sd / sqrt(n),  # Standard error
            .groups = "drop")


summary_data_Prefer2 <- touch_order_analysis.long_pref2%>%
  dplyr::group_by(condition, item.f) %>%
  dplyr::summarize(order_mean = mean(order, na.rm = TRUE),
            order_sd = sd(order, na.rm = TRUE),
            n = n(),
            se = order_sd / sqrt(n),  # Standard error
            .groups = "drop")


summary_data_pref_combined <- bind_rows(summary_data_Prefer1, summary_data_Prefer2)

summary_data_pref_combined$item.f = factor(summary_data_combined_ind$item.f, levels = rev(c(  "Pr6_Amt1","Pr5_Amt2", "Pr4_Amt3","Pr3_Amt4", "Pr2_Amt5","Pr1_Amt6")), ordered = TRUE)



ggplot(summary_data_pref_combined, aes(x = condition, y = order_mean, 
                                      group = item.f, color = item.f, shape = item.f)) +
  geom_line(linewidth = 1, position = position_dodge(0.3)) +
  geom_point(size = 6, position = position_dodge(0.3)) +
  geom_errorbar(
    aes(
      ymin = order_mean - se,
      ymax = order_mean + se
    ),
    width = 0.2,
    position = position_dodge(0.3)
  ) +
  labs(
    x = "Condition",
    y = "Mean ± SE Drag Order",
    title = "Mean Drag Order by Condition"
  ) +
  scale_color_manual(values = bet_colors) +  
  scale_shape_manual(values = c("Pr6_Amt1" = 21, "Pr5_Amt2" = 22, 
                                "Pr4_Amt3" = 23, "Pr3_Amt4" = 24, 
                                "Pr2_Amt5" = 25, "Pr1_Amt6" = 11)) +
  theme_minimal() +
  theme(
    legend.position = "top", # Place legend at the top
    legend.title = element_text(face = "bold"),
    axis.title = element_text(face = "bold"),
    plot.subtitle = element_text(hjust = 0.5),
    plot.title = element_text(face = "bold", hjust = 0.5)
  )

DV 3: Drag Distance

Distance_pref1_cleanup.df.test<-Distance_pref1.cleanup.df%>%
  select(ResponseId, item.f,distance,distance.abs)%>%
  mutate(condition="Pref1")

Distance_pref2_cleanup.df.test<-Distance_pref2.cleanup.df%>%
  select(ResponseId, item.f,distance,distance.abs)%>%
  mutate(condition="Pref2")

Distance_cleanup.df.combined<-rbind(Distance_pref1_cleanup.df.test,Distance_pref2_cleanup.df.test)

summary_distance_data_pref <- Distance_cleanup.df.combined %>%
  mutate(condition=as.factor(condition),
         distance.abs=(distance))%>%
  group_by(condition, item.f) %>%
  dplyr::summarize(
    distance_mean = -mean(distance, na.rm = TRUE),
    distance_sd = sd(distance, na.rm = TRUE),
    n = n(),
    se = distance_sd / sqrt(n),
    .groups = "drop"
  )


summary_distance_data_pref$item.f = factor(summary_data_combined_ind$item.f, levels = rev(c(  "Pr6_Amt1","Pr5_Amt2", "Pr4_Amt3","Pr3_Amt4", "Pr2_Amt5","Pr1_Amt6")), ordered = TRUE)


ggplot(summary_distance_data_pref, aes(x = condition, y = distance_mean, group = item.f, color = item.f,shape=item.f)) +
  geom_line(linewidth = 1, position = position_dodge(0.3)) +
  geom_point(size = 6, position = position_dodge(0.3)) +
  geom_errorbar(
    aes(
      ymin = distance_mean - se,
      ymax = distance_mean + se
    ),
    width = 0.2,
    position = position_dodge(0.3)
  ) +
  labs(
    x = "Condition",
    y = "Mean ± SE Drag Distance",
    title = "Mean Drag Distance by Condition"
  ) +
  theme_minimal() +
  theme(
    legend.position = "top", # Place legend at the top
    legend.title = element_text(face = "bold"),
    axis.title = element_text(face = "bold"),
    plot.subtitle = element_text(hjust = 0.5),
    plot.title = element_text(face = "bold", hjust = 0.5)
  )+
  scale_shape_manual(values = c("Pr6_Amt1" = 21, "Pr5_Amt2" = 22, 
                                "Pr4_Amt3" = 23, "Pr3_Amt4" = 24, 
                                "Pr2_Amt5" = 25, "Pr1_Amt6" = 11)) +
    scale_color_manual(values = bet_colors)

Shared Regressions

### Prep: Add scaled amount and probability ###

# Count
drag_and_drop_count_Prefer1_long$Prob.c <- scale(drag_and_drop_count_Prefer1_long$Prob)
drag_and_drop_count_Prefer1_long$Amt.c <- scale(drag_and_drop_count_Prefer1_long$Amt)
drag_and_drop_count_Prefer2_long$Prob.c <- scale(drag_and_drop_count_Prefer1_long$Prob)
drag_and_drop_count_Prefer2_long$Amt.c <- scale(drag_and_drop_count_Prefer1_long$Amt)

# Order
touch_order_analysis.long_pref1$Prob.c <- scale(touch_order_analysis.long_pref1$Prob)
touch_order_analysis.long_pref1$Amt.c <- scale(touch_order_analysis.long_pref1$Amt)
touch_order_analysis.long_pref2$Prob.c <- scale(touch_order_analysis.long_pref1$Prob)
touch_order_analysis.long_pref2$Amt.c <- scale(touch_order_analysis.long_pref1$Amt)

# Distance
distance1_df <- master_df %>%
  select(c(1:6, 8:9, 13:21))

distance1_df$prob <- distance1_df$set1_prob 
distance1_df$amt <- distance1_df$set1_amt
distance1_df$initial_order <- distance1_df$set1_initial_order

distance2_df <- master_df %>%
  select(c(1:5, 10:12, 15, 22:27))

distance2_df$prob <- distance2_df$set2_prob 
distance2_df$amt <- distance2_df$set2_amt
distance2_df$initial_order <- distance2_df$set2_initial_order

distance1_df$amt.c <- scale(distance1_df$amt)
distance1_df$prob.c <- scale(distance1_df$prob)
distance2_df$amt.c <- scale(distance2_df$amt)
distance2_df$prob.c <- scale(distance2_df$prob)

DV 1: Drag Count

M1 <- glmer(
  N_ind ~ Prob.c + Amt.c + initial.rank.r + (1 | ResponseId),
  data = drag_and_drop_count_Prefer1_long,
  #subset = item.f != "Pr6_Amt1",
  family = binomial,
  control = glmerControl(optimizer = "bobyqa", optCtrl = list(maxfun = 2e5))
)

M2<-glmer(
  N_ind ~ Prob.c + Amt.c + initial.rank.r + (1 | ResponseId),
  data = drag_and_drop_count_Prefer2_long,
  family = binomial,
  control = glmerControl(optimizer = "bobyqa", optCtrl = list(maxfun = 2e5))
)


tab_model(
  M1, M2,
  dv.labels = c(
    "Drag and Drop Count<br>Preference Task 1",
    "Drag and Drop Count<br>Preference Task 2"
  ),
  pred.labels = c(
    "Intercept",
    "Probability",
    "Amount",
    "Initial Rank [6]",
    "Initial Rank [5]",
    "Initial Rank [4]",
    "Initial Rank [3]",
    "Initial Rank [2]"
  ),
  show.se = T, 
  show.ci = F,
  show.stat = T
)
  Drag and Drop Count
Preference Task 1
Drag and Drop Count
Preference Task 2
Predictors Odds Ratios std. Error Statistic p Odds Ratios std. Error Statistic p
Intercept 0.27 0.05 -6.95 <0.001 0.15 0.03 -8.58 <0.001
Probability 0.97 0.10 -0.35 0.726 1.13 0.12 1.15 0.249
Amount 0.66 0.06 -4.26 <0.001 0.77 0.08 -2.62 0.009
Initial Rank [6] 14.76 3.93 10.10 <0.001 26.85 7.94 11.13 <0.001
Initial Rank [5] 11.01 2.85 9.27 <0.001 24.70 7.24 10.94 <0.001
Initial Rank [4] 12.73 3.32 9.74 <0.001 20.96 6.02 10.59 <0.001
Initial Rank [3] 7.61 1.88 8.20 <0.001 14.74 4.12 9.61 <0.001
Initial Rank [2] 3.86 0.92 5.66 <0.001 7.01 1.89 7.23 <0.001
Random Effects
σ2 3.29 3.29
τ00 0.25 ResponseId 0.18 ResponseId
ICC 0.07 0.05
N 186 ResponseId 186 ResponseId
Observations 1116 1116
Marginal R2 / Conditional R2 0.225 / 0.280 0.298 / 0.335

DV 2: Drag Order

Ordinal Models

  • ordinal model might be most suitable given the nature of the data
  • however, these models have issues (see below)
M1<-clmm(factor(order)~Amt+Prob+initial.rank.r+(1|ResponseId),touch_order_analysis.long_pref1)
M2<-clmm(factor(order)~Amt+Prob+initial.rank.r+(1|ResponseId),touch_order_analysis.long_pref2)

tab_model(M1,M2,
          show.se = T,
          show.ci = F,
          show.stat = T,
          pred.labels =
            c("1|2","2|3", "3|4", "4|5", "5|6",
              "Amount",
              "Probability",
              "Initial Rank [6]","Initial Rank [5]","Initial Rank [4]","Initial Rank [3]","Initial Rank [2]"),
          dv.labels = 
            c("Drag and Drop Order<br>Preference Task 1",
              "Drag and Drop Order<br>Preference Task 2"))
  Drag and Drop Order
Preference Task 1
Drag and Drop Order
Preference Task 2
Predictors Odds Ratios std. Error Statistic p Odds Ratios std. Error Statistic p
1|2 0.02 NaN NaN NaN 0.01 NaN NaN NaN
2|3 0.07 NaN NaN NaN 0.03 NaN NaN NaN
3|4 0.22 NaN NaN NaN 0.10 NaN NaN NaN
4|5 0.71 NaN NaN NaN 0.38 NaN NaN NaN
5|6 3.95 NaN NaN NaN 2.67 NaN NaN NaN
Amount 1.00 NaN NaN NaN 0.99 NaN NaN NaN
Probability 0.98 NaN NaN NaN 0.98 NaN NaN NaN
Initial Rank [6] 0.15 NaN NaN NaN 0.10 NaN NaN NaN
Initial Rank [5] 0.15 NaN NaN NaN 0.10 NaN NaN NaN
Initial Rank [4] 0.14 NaN NaN NaN 0.09 NaN NaN NaN
Initial Rank [3] 0.20 NaN NaN NaN 0.12 NaN NaN NaN
Initial Rank [2] 0.26 NaN NaN NaN 0.21 NaN NaN NaN
Random Effects
σ2 3.29 3.29
τ00 0.00 ResponseId 0.00 ResponseId
N 186 ResponseId 186 ResponseId
Observations 1116 1116
Marginal R2 / Conditional R2 0.195 / NA 0.255 / NA



lm_robust

Maybe not an ideal model, however, it will return results:

M1<-lm_robust(order ~ Amt+Prob+initial.rank.r, data = touch_order_analysis.long_pref1, cluster = ResponseId,se_type = "stata")

M2<-lm_robust(order ~ Amt+Prob+initial.rank.r, data = touch_order_analysis.long_pref2, cluster = ResponseId,se_type = "stata")

tab_model(M1,M2,
          show.se = T,
          show.ci = F,
          show.stat = T,
          pred.labels =
            c("Intercept",
              "Amount",
              "Probability",
              "Initial Rank [6]","Initial Rank [5]","Initial Rank [4]","Initial Rank [3]","Initial Rank [2]"),
          dv.labels = c("Drag and Drop Order<br>Preference Task 1", "Drag and Drop Order<br>Preference Task 2"))
  Drag and Drop Order
Preference Task 1
Drag and Drop Order
Preference Task 2
Predictors Estimates std. Error Statistic p Estimates std. Error Statistic p
Intercept 4.47 0.15 30.06 <0.001 5.00 0.13 38.81 <0.001
Amount 0.00 0.00 0.89 0.372 -0.00 0.00 -2.87 0.004
Probability -0.01 0.00 -4.91 <0.001 -0.02 0.00 -8.19 <0.001
Initial Rank [6] -1.39 0.13 -10.94 <0.001 -1.54 0.12 -12.75 <0.001
Initial Rank [5] -1.35 0.14 -9.68 <0.001 -1.59 0.11 -14.19 <0.001
Initial Rank [4] -1.48 0.13 -11.50 <0.001 -1.61 0.12 -12.92 <0.001
Initial Rank [3] -1.16 0.14 -8.50 <0.001 -1.47 0.12 -12.15 <0.001
Initial Rank [2] -0.95 0.14 -6.75 <0.001 -1.06 0.12 -8.90 <0.001
Observations 1116 1116
R2 / R2 adjusted 0.184 / 0.179 0.248 / 0.243

DV 3: Drag Distance

distance_set1 <- lmer(set1_drag_distance.r ~ prob + amt + factor(initial_order) +
                      (1 | ResponseId),
                      data = distance1_df)

distance_set2 <- lmer(set2_drag_distance.r ~ prob + amt + factor(initial_order) +
                      (1 | ResponseId),
                      data = distance2_df)

tab_model(
  distance_set1, distance_set2,
  dv.labels = c(
    "Drag and Drop Distance<br>Preference Task 1",
    "Drag and Drop Distance<br>Preference Task 2"
  ),
  pred.labels =
            c("Intercept",
              "Probability",
              "Amount",
              "Initial Rank [2]","Initial Rank [3]","Initial Rank [4]","Initial Rank [5]","Initial Rank [6]"),
  show.se = T,
  show.ci = F,
  show.stat = T
)
  Drag and Drop Distance
Preference Task 1
Drag and Drop Distance
Preference Task 2
Predictors Estimates std. Error Statistic p Estimates std. Error Statistic p
Intercept -0.17 0.14 -1.26 0.207 -0.31 0.13 -2.47 0.014
Probability 0.01 0.00 3.49 <0.001 0.01 0.00 6.65 <0.001
Amount -0.01 0.00 -3.73 <0.001 -0.00 0.00 -2.44 0.015
Initial Rank [2] 0.65 0.12 5.28 <0.001 0.49 0.12 4.16 <0.001
Initial Rank [3] 1.06 0.12 8.59 <0.001 1.03 0.12 8.80 <0.001
Initial Rank [4] 1.63 0.12 13.18 <0.001 1.49 0.12 12.76 <0.001
Initial Rank [5] 2.01 0.12 16.24 <0.001 2.05 0.12 17.59 <0.001
Initial Rank [6] 2.75 0.12 22.25 <0.001 2.57 0.12 22.10 <0.001
Random Effects
σ2 1.42 1.26
τ00 0.18 ResponseId 0.24 ResponseId
ICC 0.11 0.16
N 186 ResponseId 186 ResponseId
Observations 1116 1116
Marginal R2 / Conditional R2 0.370 / 0.440 0.388 / 0.485

Time Analysis - Prefence 1

We want to check whether probability and amount affect the 3F patterns differently over time. For this analysis, we might want to consider redoing the coding of the data frame. The reason for this is that we currently only consider the first time an item is touched: In the prior analyses, that item has as “order” the first time it was touch (not the second), and the same for drag distance: only the first distance is considered. For the “over-time-analysis”, we could consider several orders of being touched and dragged. For now, going with the old (and pre-registered) analysis scheme, I will drop duplicate observations (i.e., only the first “order” and “distance”) are considered.

We are removing 83 observations from such “duplicates” (i.e., items touched more than once), which corresponds to ~11%. Additionally, we cut after step 5 (cuts 59 obs from 32 people).

Drag Distance

Comment Previously, I ran models without the random slope. Then, the interactions were stronger for amount and step.

# First, create data frames to measure the impact of amount and probability on the 3F measures

# Join master_df to RankProcess_Prefer1 using ResponseId and item.f / bet_label
time_df_pref1 <- RankProcess_Prefer1 %>%
  left_join(
    master_df %>%
      select(ResponseId, bet_label, set1_amt, set1_prob, set1_order, set1_drag_distance.r),
    by = c("ResponseId", "item.f" = "bet_label")
  ) %>%
  rename(Amt = set1_amt, Prob = set1_prob, drag_order = set1_order, distance.r = set1_drag_distance.r) %>%
  filter(step != 0)

time_df_pref1$Amt.c <- scale(time_df_pref1$Amt)
time_df_pref1$Prob.c <- scale(time_df_pref1$Prob)

#sum(duplicated(time_df_pref1[, c("ResponseId", "item.f")])) #83
#sum(time_df_pref1$step > 5, na.rm = TRUE) #59 obs
#length(unique(time_df_pref1$ResponseId[time_df_pref1$step > 5])) #from 32 unique IDs


# Check for duplicates and remove them
duplicated_rows <- duplicated(time_df_pref1[, c("ResponseId", "item.f")])
time_df_pref1_unique <- time_df_pref1 %>%
  distinct(ResponseId, item.f, .keep_all = TRUE)

# regressions
time1 <- lmer(distance.r ~ Amt.c + Prob.c + factor(step) +
                  (1 + Amt.c + Prob.c | ResponseId),
                  data = time_df_pref1_unique,
                  subset = step < 6,
                  control = lmerControl(optimizer = "bobyqa", optCtrl = list(maxfun = 2e5))
                )

time2 <- lmer(distance.r ~ Amt.c * factor(step) + Prob.c +
                  (1 + Amt.c + Prob.c | ResponseId),
                  data = time_df_pref1_unique,
                  subset = step < 6,
                  control = lmerControl(optimizer = "bobyqa", optCtrl = list(maxfun = 2e5))
                )

time3 <- lmer(distance.r ~ Amt.c + Prob.c * factor(step) +
                  (1 + Amt.c + Prob.c | ResponseId),
                  data = time_df_pref1_unique,
                  subset = step < 6,
                  control = lmerControl(optimizer = "bobyqa", optCtrl = list(maxfun = 2e5))
                )

tab_model(time1, time2, time3,
          show.se = T,
          show.ci = F,
          show.stat = T,
          pred.labels =
            c("Intercept",
              "Amount (centered)",
              "Probability (centered)",
              "Step [2]", "Step [3]", "Step [4]", "Step [5]",
              "Amount x Step [2]", "Amount x Step [3]", "Amount x Step   [4]", "Amount x Step [5]",
              "Prob x Step [2]", "Prob x Step [3]", "Prob x Step   [4]", "Prob x Step [5]"),
          dv.labels = 
            c("DV = Distance<br>(Pref 1, reverse-coded)",
              "DV = Distance<br>(Pref 1, reverse-coded)",
              "DV = Distance<br>(Pref 1, reverse-coded)"))
  DV = Distance
(Pref 1, reverse-coded)
DV = Distance
(Pref 1, reverse-coded)
DV = Distance
(Pref 1, reverse-coded)
Predictors Estimates std. Error Statistic p Estimates std. Error Statistic p Estimates std. Error Statistic p
Intercept 2.35 0.12 19.43 <0.001 2.38 0.12 19.62 <0.001 2.31 0.13 18.44 <0.001
Amount (centered) -0.20 0.10 -1.94 0.053 0.00 0.15 0.02 0.983 -0.22 0.11 -2.03 0.042
Probability (centered) 0.12 0.10 1.23 0.220 0.16 0.10 1.62 0.106 0.18 0.12 1.50 0.134
Step [2] -0.36 0.16 -2.27 0.023 -0.36 0.16 -2.26 0.024 -0.33 0.16 -2.05 0.041
Step [3] -0.67 0.18 -3.79 <0.001 -0.74 0.18 -4.16 <0.001 -0.56 0.18 -3.06 0.002
Step [4] -0.88 0.19 -4.53 <0.001 -0.91 0.19 -4.69 <0.001 -0.94 0.20 -4.66 <0.001
Step [5] -0.95 0.24 -3.93 <0.001 -0.99 0.24 -4.13 <0.001 -1.00 0.25 -3.98 <0.001
Amount x Step [2] -0.08 0.18 -0.45 0.656
Amount x Step [3] -0.55 0.21 -2.59 0.010
Amount x Step [4] -0.28 0.22 -1.27 0.205
Amount x Step [5] -0.16 0.27 -0.59 0.556
Prob x Step [2] -0.05 0.15 -0.35 0.726
Prob x Step [3] 0.18 0.22 0.82 0.410
Prob x Step [4] -0.36 0.21 -1.67 0.095
Prob x Step [5] -0.41 0.28 -1.44 0.152
Random Effects
σ2 1.76 1.76 1.72
τ00 0.23 ResponseId 0.25 ResponseId 0.24 ResponseId
τ11 0.38 ResponseId.Amt.c 0.29 ResponseId.Amt.c 0.42 ResponseId.Amt.c
0.32 ResponseId.Prob.c 0.34 ResponseId.Prob.c 0.35 ResponseId.Prob.c
ρ01 0.97 1.00 0.89
0.39 0.36 0.32
ICC 0.30   0.32
N 186 ResponseId 186 ResponseId 186 ResponseId
Observations 653 653 653
Marginal R2 / Conditional R2 0.091 / 0.362 0.145 / NA 0.098 / 0.383

Drag Order

Comment I am not sure in how far these models with the current coding make sense. Naturally, with larger numbers of “step”, the “order” will increase (this is just the nature of coding). It might make more sense, if we allow several “orders” to be assigned to an item, for instance, if it is touched first and fifth, it should have two values for order. However, since, on average most items are only touched once, this analysis might not work well in terms of modelling.

# regressions
time1 <- lmer(drag_order ~ Amt.c + Prob.c + factor(step) +
                  (1 + Amt.c + Prob.c | ResponseId),
                  data = time_df_pref1_unique,
                  subset = step < 6,
                  control = lmerControl(optimizer = "bobyqa", optCtrl = list(maxfun = 2e5))
                )

time2 <- lmer(drag_order ~ Amt.c * factor(step) + Prob.c +
                  (1 + Amt.c + Prob.c | ResponseId),
                  data = time_df_pref1_unique,
                  subset = step < 6,
                  control = lmerControl(optimizer = "bobyqa", optCtrl = list(maxfun = 2e5))
                )

time3 <- lmer(drag_order ~ Amt.c + Prob.c * factor(step) +
                  (1 + Amt.c + Prob.c | ResponseId),
                  data = time_df_pref1_unique,
                  subset = step < 6,
                  control = lmerControl(optimizer = "bobyqa", optCtrl = list(maxfun = 2e5))
                )

tab_model(time1, time2, time3,
          show.se = T,
          show.ci = F,
          show.stat = T,
          pred.labels =
            c("Intercept",
              "Amount (centered)",
              "Probability (centered)",
              "Step [2]", "Step [3]", "Step [4]", "Step [5]",
              "Amount x Step [2]", "Amount x Step [3]", "Amount x Step   [4]", "Amount x Step [5]",
              "Prob x Step [2]", "Prob x Step [3]", "Prob x Step   [4]", "Prob x Step [5]"),
          dv.labels = 
            c("DV = Order<br>(Pref 1, reverse-coded)",
              "DV = Order<br>(Pref 1, reverse-coded)",
              "DV = Order<br>(Pref 1, reverse-coded)"))
  DV = Order
(Pref 1, reverse-coded)
DV = Order
(Pref 1, reverse-coded)
DV = Order
(Pref 1, reverse-coded)
Predictors Estimates std. Error Statistic p Estimates std. Error Statistic p Estimates std. Error Statistic p
Intercept 1.01 0.02 55.08 <0.001 1.01 0.02 55.71 <0.001 1.00 0.02 52.27 <0.001
Amount (centered) -0.00 0.01 -0.20 0.839 -0.01 0.02 -0.71 0.478 -0.01 0.01 -0.49 0.627
Probability (centered) -0.01 0.01 -0.84 0.400 -0.01 0.01 -1.13 0.259 -0.00 0.02 -0.28 0.783
Step [2] 0.97 0.02 49.04 <0.001 0.97 0.02 48.78 <0.001 0.98 0.02 46.61 <0.001
Step [3] 1.91 0.02 84.46 <0.001 1.91 0.02 84.92 <0.001 1.91 0.02 81.52 <0.001
Step [4] 2.81 0.02 113.53 <0.001 2.81 0.02 114.79 <0.001 2.83 0.03 111.68 <0.001
Step [5] 3.64 0.03 116.50 <0.001 3.63 0.03 117.28 <0.001 3.62 0.03 113.57 <0.001
Amount x Step [2] 0.01 0.02 0.51 0.613
Amount x Step [3] 0.03 0.03 1.32 0.189
Amount x Step [4] -0.07 0.03 -2.80 0.005
Amount x Step [5] 0.10 0.03 3.26 0.001
Prob x Step [2] 0.00 0.02 0.06 0.951
Prob x Step [3] -0.04 0.03 -1.44 0.149
Prob x Step [4] 0.03 0.02 1.36 0.175
Prob x Step [5] -0.13 0.03 -3.87 <0.001
Random Effects
σ2 0.03 0.03 0.03
τ00 0.02 ResponseId 0.02 ResponseId 0.02 ResponseId
τ11 0.01 ResponseId.Amt.c 0.01 ResponseId.Amt.c 0.01 ResponseId.Amt.c
0.01 ResponseId.Prob.c 0.01 ResponseId.Prob.c 0.01 ResponseId.Prob.c
ρ01 -0.36 -0.53 -0.44
-0.87 -0.94 -0.91
N 186 ResponseId 186 ResponseId 186 ResponseId
Observations 653 653 653
Marginal R2 / Conditional R2 0.980 / NA 0.980 / NA 0.980 / NA

Time Analysis - Prefence 2

The non-duplicate counting procedure removes 70 observations (9.4%). Cutting after step 5 removes 43 observations from 23 people.

Drag Distance

# First, create data frames to measure the impact of amount and probability on the 3F measures

# Join master_df to RankProcess_Prefer2 using ResponseId and item.f / bet_label
time_df_pref2 <- RankProcess_Prefer2 %>%
  left_join(
    master_df %>%
      select(ResponseId, bet_label, set2_amt, set2_prob, set2_order, set2_drag_distance.r),
    by = c("ResponseId", "item.f" = "bet_label")
  ) %>%
  rename(Amt = set2_amt, Prob = set2_prob, drag_order = set2_order, distance.r = set2_drag_distance.r) %>%
  filter(step != 0)

time_df_pref2$Amt.c <- scale(time_df_pref2$Amt)
time_df_pref2$Prob.c <- scale(time_df_pref2$Prob)

#sum(duplicated(time_df_pref2[, c("ResponseId", "item.f")])) #70
#sum(time_df_pref2$step > 5, na.rm = TRUE) #43 obs
#length(unique(time_df_pref2$ResponseId[time_df_pref2$step > 5])) #from 23 unique IDs


# Check for duplicates and remove them
duplicated_rows <- duplicated(time_df_pref2[, c("ResponseId", "item.f")])
time_df_pref2_unique <- time_df_pref2 %>%
  distinct(ResponseId, item.f, .keep_all = TRUE)

# regressions
time1 <- lmer(distance.r ~ Amt.c + Prob.c + factor(step) +
                  (1 + Amt.c + Prob.c | ResponseId),
                  data = time_df_pref2_unique,
                  subset = step < 6,
                  control = lmerControl(optimizer = "bobyqa", optCtrl = list(maxfun = 2e5))
                )

time2 <- lmer(distance.r ~ Amt.c * factor(step) + Prob.c +
                  (1 + Amt.c + Prob.c | ResponseId),
                  data = time_df_pref2_unique,
                  subset = step < 6,
                  control = lmerControl(optimizer = "bobyqa", optCtrl = list(maxfun = 2e5))
                )

time3 <- lmer(distance.r ~ Amt.c + Prob.c * factor(step) +
                  (1 + Amt.c + Prob.c | ResponseId),
                  data = time_df_pref2_unique,
                  subset = step < 6,
                  control = lmerControl(optimizer = "bobyqa", optCtrl = list(maxfun = 2e5))
                )

tab_model(time1, time2, time3,
          show.se = T,
          show.ci = F,
          show.stat = T,
          pred.labels =
            c("Intercept",
              "Amount (centered)",
              "Probability (centered)",
              "Step [2]", "Step [3]", "Step [4]", "Step [5]",
              "Amount x Step [2]", "Amount x Step [3]", "Amount x Step   [4]", "Amount x Step [5]",
              "Prob x Step [2]", "Prob x Step [3]", "Prob x Step   [4]", "Prob x Step [5]"),
          dv.labels = 
            c("DV = Distance<br>(Pref 2, reverse-coded)",
              "DV = Distance<br>(Pref 2, reverse-coded)",
              "DV = Distance<br>(Pref 2, reverse-coded)"))
  DV = Distance
(Pref 2, reverse-coded)
DV = Distance
(Pref 2, reverse-coded)
DV = Distance
(Pref 2, reverse-coded)
Predictors Estimates std. Error Statistic p Estimates std. Error Statistic p Estimates std. Error Statistic p
Intercept 2.50 0.11 22.27 <0.001 2.49 0.11 21.88 <0.001 2.50 0.12 20.17 <0.001
Amount (centered) -0.33 0.10 -3.11 0.002 -0.30 0.14 -2.13 0.033 -0.33 0.11 -3.13 0.002
Probability (centered) 0.12 0.08 1.44 0.150 0.13 0.08 1.64 0.102 0.11 0.11 0.99 0.320
Step [2] -0.40 0.14 -2.91 0.004 -0.38 0.14 -2.62 0.009 -0.41 0.15 -2.70 0.007
Step [3] -0.73 0.16 -4.71 <0.001 -0.72 0.16 -4.43 <0.001 -0.73 0.16 -4.47 <0.001
Step [4] -1.04 0.18 -5.71 <0.001 -1.06 0.18 -5.76 <0.001 -1.07 0.21 -5.17 <0.001
Step [5] -1.14 0.23 -4.99 <0.001 -1.11 0.23 -4.81 <0.001 -1.19 0.24 -4.96 <0.001
Amount x Step [2] 0.08 0.17 0.46 0.646
Amount x Step [3] -0.00 0.20 -0.02 0.986
Amount x Step [4] -0.35 0.24 -1.44 0.150
Amount x Step [5] 0.23 0.29 0.81 0.420
Prob x Step [2] 0.03 0.14 0.18 0.858
Prob x Step [3] 0.04 0.17 0.25 0.801
Prob x Step [4] -0.05 0.23 -0.22 0.827
Prob x Step [5] -0.15 0.26 -0.60 0.552
Random Effects
σ2 1.43 1.43 1.45
τ00 0.36 ResponseId 0.36 ResponseId 0.36 ResponseId
τ11 0.50 ResponseId.Amt.c 0.50 ResponseId.Amt.c 0.51 ResponseId.Amt.c
0.02 ResponseId.Prob.c 0.02 ResponseId.Prob.c 0.02 ResponseId.Prob.c
ρ01 0.79 0.80 0.80
0.99 1.00 0.98
N 186 ResponseId 186 ResponseId 186 ResponseId
Observations 647 647 647
Marginal R2 / Conditional R2 0.202 / NA 0.211 / NA 0.202 / NA

Drag Order

# regressions
time1 <- lmer(drag_order ~ Amt.c + Prob.c + factor(step) +
                  (1 + Amt.c + Prob.c | ResponseId),
                  data = time_df_pref2_unique,
                  subset = step < 6,
                  control = lmerControl(optimizer = "bobyqa", optCtrl = list(maxfun = 2e5))
                )

time2 <- lmer(drag_order ~ Amt.c * factor(step) + Prob.c +
                  (1 + Amt.c + Prob.c | ResponseId),
                  data = time_df_pref2_unique,
                  subset = step < 6,
                  control = lmerControl(optimizer = "bobyqa", optCtrl = list(maxfun = 2e5))
                )

time3 <- lmer(drag_order ~ Amt.c + Prob.c * factor(step) +
                  (1 + Amt.c + Prob.c | ResponseId),
                  data = time_df_pref2_unique,
                  subset = step < 6,
                  control = lmerControl(optimizer = "bobyqa", optCtrl = list(maxfun = 2e5))
                )

tab_model(time1, time2, time3,
          show.se = T,
          show.ci = F,
          show.stat = T,
          pred.labels =
            c("Intercept",
              "Amount (centered)",
              "Probability (centered)",
              "Step [2]", "Step [3]", "Step [4]", "Step [5]",
              "Amount x Step [2]", "Amount x Step [3]", "Amount x Step   [4]", "Amount x Step [5]",
              "Prob x Step [2]", "Prob x Step [3]", "Prob x Step   [4]", "Prob x Step [5]"),
          dv.labels = 
            c("DV = Order<br>(Pref 2, reverse-coded)",
              "DV = Order<br>(Pref 2, reverse-coded)",
              "DV = Order<br>(Pref 2, reverse-coded)"))
  DV = Order
(Pref 2, reverse-coded)
DV = Order
(Pref 2, reverse-coded)
DV = Order
(Pref 2, reverse-coded)
Predictors Estimates std. Error Statistic p Estimates std. Error Statistic p Estimates std. Error Statistic p
Intercept 0.99 0.02 52.77 <0.001 1.00 0.02 53.43 <0.001 0.99 0.02 50.58 <0.001
Amount (centered) 0.03 0.01 2.02 0.043 0.01 0.02 0.65 0.515 0.03 0.01 2.08 0.038
Probability (centered) 0.01 0.01 0.88 0.380 0.01 0.01 0.55 0.584 0.02 0.02 1.13 0.257
Step [2] 0.98 0.02 46.71 <0.001 0.98 0.02 46.93 <0.001 0.99 0.02 44.66 <0.001
Step [3] 1.92 0.02 82.44 <0.001 1.92 0.02 83.13 <0.001 1.92 0.02 80.69 <0.001
Step [4] 2.87 0.03 103.00 <0.001 2.86 0.03 102.61 <0.001 2.88 0.03 91.06 <0.001
Step [5] 3.60 0.03 103.89 <0.001 3.57 0.03 102.40 <0.001 3.58 0.04 99.58 <0.001
Amount x Step [2] -0.01 0.02 -0.34 0.736
Amount x Step [3] 0.00 0.02 0.01 0.996
Amount x Step [4] 0.04 0.03 1.54 0.125
Amount x Step [5] 0.15 0.03 4.56 <0.001
Prob x Step [2] 0.01 0.02 0.44 0.659
Prob x Step [3] -0.02 0.02 -0.80 0.427
Prob x Step [4] 0.01 0.03 0.31 0.757
Prob x Step [5] -0.10 0.04 -2.58 0.010
Random Effects
σ2 0.03 0.03 0.03
τ00 0.02 ResponseId 0.02 ResponseId 0.02 ResponseId
τ11 0.01 ResponseId.Amt.c 0.01 ResponseId.Amt.c 0.01 ResponseId.Amt.c
0.01 ResponseId.Prob.c 0.01 ResponseId.Prob.c 0.01 ResponseId.Prob.c
ρ01 -1.00 -1.00 -1.00
-1.00 -1.00 -1.00
N 186 ResponseId 186 ResponseId 186 ResponseId
Observations 647 647 647
Marginal R2 / Conditional R2 0.975 / NA 0.976 / NA 0.975 / NA

Rank Models - Prefence 1

lm_robust models; attempting to predict final rank using the DROPT measures.

lm_robust

master_df$set1_rank.r <- 7-master_df$set1_rank

choice1 <- lm_robust(set1_rank.r ~ set1_amt.c + set1_prob.c, data = master_df, clusters = ResponseId)
# choice2 <- lm_robust(set1_rank.r ~ set1_amt + set1_prob + set1_touch_count_binary, data = master_df, clusters = ResponseId)
# choice3 <- lm_robust(set1_rank.r ~ set1_amt + set1_prob + set1_order, data = master_df, clusters = ResponseId)
# choice4 <- lm_robust(set1_rank.r~ set1_amt + set1_prob + set1_drag_distance.r, data = master_df, clusters = ResponseId)
choice5 <- lm_robust(set1_rank.r ~ set1_amt.c + set1_prob.c + set1_touch_count_binary + set1_order + set1_drag_distance.r, data = master_df, clusters = ResponseId)


tab_model(
  choice1, choice5,
  dv.labels = c(
    "Rank (reverse-coded)",
    "Rank (reverse-coded)"
  ),
  pred.labels =
            c("Intercept",
              "Amount",
              "Probability",
              "Touch Count (binary)",
              "Touch Order",
              "Drag Distance"),
  show.se = T,
  show.ci = F,
  show.stat = T
)
  Rank (reverse-coded) Rank (reverse-coded)
Predictors Estimates std. Error Statistic p Estimates std. Error Statistic p
Intercept 3.50 0.00 1104.78 <0.001 4.60 0.13 35.48 <0.001
Amount -0.42 0.07 -6.00 <0.001 -0.36 0.06 -6.01 <0.001
Probability 0.30 0.08 3.67 <0.001 0.01 0.07 0.18 0.858
Touch Count (binary) -0.19 0.09 -2.28 0.023
Touch Order -0.47 0.04 -13.25 <0.001
Drag Distance 0.41 0.03 15.45 <0.001
Observations 1164 1116
R2 / R2 adjusted 0.152 / 0.150 0.509 / 0.506

Predict 2ND Lottery Preference (without using DRORT data in the 2ND Task)

explore<-master_df %>%
  mutate(
    amount_rank = case_when(
      bet_label == "Pr6_Amt1" ~ 1,
      bet_label == "Pr5_Amt2" ~ 2,
      bet_label == "Pr4_Amt3" ~ 3,
      bet_label == "Pr3_Amt4" ~ 4,
      bet_label == "Pr2_Amt5" ~ 5,
      bet_label == "Pr1_Amt6" ~ 6
    )
  )%>%
  group_by(ResponseId) %>%
  summarise(
    Tau.amt_set1 = cor(set1_rank, amount_rank, method = "kendall"),
    Rho.amt_set1 = cor(set1_rank, amount_rank, method = "spearman"),
    .groups = "drop"
  )

Extract Individual Weights for DROPT DVs (1ST Task data)

  • A quick and dirty descriptive of the 1ST Task
    • participants have heterogenous preferences
Ranking_per_Step_Pr1_Amt6 <- Distance_Prefer1
Ranking_per_Step_Pr1_Amt6 <- Ranking_per_Step_Pr1_Amt6 %>%
  left_join(explore %>% select(ResponseId, Tau.amt_set1), by = "ResponseId")

## Create new R columns
# Initialize R columns
Ranking_per_Step_Pr1_Amt6 <- Ranking_per_Step_Pr1_Amt6 %>% 
  mutate(Pr1_Amt6 = NA, Pr2_Amt5 = NA, Pr3_Amt4 = NA, Pr4_Amt3 = NA, Pr5_Amt2 = NA, Pr6_Amt1 = NA)

# Function to fill R columns
fill_R_columns <- function(order_string, target_value) {
  order_string <- gsub("[[:space:][:cntrl:]]", "", order_string)  # Clean spaces and newlines
  order_numbers <- strsplit(order_string, ",")[[1]]
  position_of_value <- which(order_numbers == as.character(target_value))
  
  if (length(position_of_value) == 0) {
    return(NA)
  } else if (position_of_value == 1) {
    return(1)
  } else {
    return(position_of_value)
  }
}

Ranking_per_Step_Pr1_Amt6 <- Ranking_per_Step_Pr1_Amt6 %>% 
  mutate(
    Pr6_Amt1 = sapply(order, fill_R_columns, target_value = 49),
    Pr5_Amt2 = sapply(order, fill_R_columns, target_value = 50),
    Pr4_Amt3 = sapply(order, fill_R_columns, target_value = 64),
    Pr3_Amt4 = sapply(order, fill_R_columns, target_value = 65),
    Pr2_Amt5 = sapply(order, fill_R_columns, target_value = 67),
    Pr1_Amt6 = sapply(order, fill_R_columns, target_value = 68),
  )


# Ensure at least 9 rows per ResponseId
Ranking_per_Step_Pr1_Amt6 <- Ranking_per_Step_Pr1_Amt6 %>% group_by(ResponseId) %>% 
  group_modify(~ {
    while (nrow(.x) < 10) {
      max_step_row <- .x %>% filter(step == max(step))
      .x <- bind_rows(.x, max_step_row %>% mutate(step = max(step) + 1))
    }
    return(.x)
  }) %>% ungroup()



## Summary DFs
summary_ranks_step_split_by_tau <- Ranking_per_Step_Pr1_Amt6 %>%
  filter(step < 10) %>%
  group_by(step, Tau.amt_set1) %>%
  summarise(
    mean_Pr6_Amt1 = mean(Pr6_Amt1, na.rm = TRUE),
    se_Pr6_Amt1 = sd(Pr6_Amt1, na.rm = TRUE) / sqrt(n()),
    
    mean_Pr5_Amt2 = mean(Pr5_Amt2, na.rm = TRUE),
    se_Pr5_Amt2 = sd(Pr5_Amt2, na.rm = TRUE) / sqrt(n()),
    
    mean_Pr4_Amt3 = mean(Pr4_Amt3, na.rm = TRUE),
    se_Pr4_Amt3 = sd(Pr4_Amt3, na.rm = TRUE) / sqrt(n()),
    
    mean_Pr3_Amt4 = mean(Pr3_Amt4, na.rm = TRUE),
    se_Pr3_Amt4 = sd(Pr3_Amt4, na.rm = TRUE) / sqrt(n()),
    
    mean_Pr2_Amt5 = mean(Pr2_Amt5, na.rm = TRUE),
    se_Pr2_Amt5 = sd(Pr2_Amt5, na.rm = TRUE) / sqrt(n()),
    
    mean_Pr1_Amt6 = mean(Pr1_Amt6, na.rm = TRUE),
    se_Pr1_Amt6 = sd(Pr1_Amt6, na.rm = TRUE) / sqrt(n()),
    
    .groups = "drop"
  )

## Long format
summary_ranks_step_split_by_tau_long <- summary_ranks_step_split_by_tau %>%
  pivot_longer(
    cols = -c(step, Tau.amt_set1),
    names_to = c(".value", "label"),
    names_pattern = "^(mean|se)_(.*)$"
  ) %>%
  rename(
    mean_rank = mean,
    mean_se = se
  )

## Add N and reorder tau levels
tau_counts <- Ranking_per_Step_Pr1_Amt6 %>%
  distinct(ResponseId, Tau.amt_set1) %>%
  count(Tau.amt_set1, name = "N")

summary_ranks_step_split_by_tau_long <- summary_ranks_step_split_by_tau_long %>%
  left_join(tau_counts, by = "Tau.amt_set1") %>%
  mutate(
    Tau.amt_set1 = factor(Tau.amt_set1, levels = sort(unique(Tau.amt_set1), decreasing = TRUE)),
    facet_label = paste0("τ = ", Tau.amt_set1, " (N = ", N, ")"),
    facet_label = factor(facet_label, levels = rev(unique(facet_label)))  # ✅ use rev()
  )

## Define shapes (you can define `bet_colors` separately)
bet_shapes <- c(
  "Pr6_Amt1" = 21, "Pr5_Amt2" = 22, "Pr4_Amt3" = 23,
  "Pr3_Amt4" = 24, "Pr2_Amt5" = 25, "Pr1_Amt6" = 11
)

## Plot

ggplot(summary_ranks_step_split_by_tau_long%>%filter(!is.na(Tau.amt_set1)),
       aes(x = step, y = mean_rank)) +
  geom_line(aes(color = label), size = 1) +
  geom_point(aes(fill = label, shape = label), size = 3) +
  geom_errorbar(aes(ymin = mean_rank - mean_se, ymax = mean_rank + mean_se),
                width = 0.2, color = "black") +
  facet_wrap(~ facet_label, labeller = label_wrap_gen(width = 20)) +
  scale_color_manual(values = bet_colors) +
  scale_fill_manual(values = bet_colors) +
  scale_shape_manual(values = bet_shapes) +
  scale_x_continuous(breaks = 0:9) +
  scale_y_continuous(limits = c(1, 6), breaks = 1:6) +
  labs(
    x = "Ranking Step",
    y = "Mean Rank",
    title = "Mean Rank Trajectories by Kendall's Tau Group (N per Panel)",
    color = "Label", shape = "Label", fill = "Label"
  ) +
  theme_minimal() +
  theme(
    legend.position = "bottom",
    plot.title = element_text(hjust = 0.5)
  )+
  scale_y_reverse(breaks = 1:6)

mixed_participants<-explore%>%
  filter(Tau.amt_set1<0.86 & Tau.amt_set1>-0.86)%>%
  pull(ResponseId)
  • Model Specifications:
    • Formula: Distance/Order/Count ~ set1_prob.z + set1_amt.z+ as.factor(set1_initial_order) + (set1_prob.z + set1_amt.z | ResponseId) + (1|bet_label)
      • Order variable was reverse-scored by multiplying by -1, so that higher values indicate being moved earlier.
      • initial order is omitted from output below
  • Standardization: Both the prob and amt attributes were standardized
  • Singularity: For order, singularity remained unresolved both when switching to clmm and when excluding the highest variance item (Seems fine and proceed - this appears to stem from limited variation in participants’ mean)
    • final rank model employs an clmm() model (log-odds displayed; can be interpreted as beta)
master_df<-master_df%>% # 194 when full
  mutate(set1_prob.z=scale(set1_prob),
         set1_amt.z=scale(set1_amt),
         set2_prob.z=scale(set2_prob),
         set2_amt.z=scale(set2_amt),
         set1_order.r=-set1_order,
         set1_ev = set1_prob.z*set1_amt.z,
         set2_ev = set2_prob.z*set2_amt.z
         )
master_df <- master_df %>% #182
  group_by(ResponseId) %>%
  filter(!any(is.na(set1_drag_distance.r|set2_drag_distance.r))) %>% 
  ungroup()
 
# need to look into the dataset and removes any NAs or odd values or ites with missing stuff

# master_df%>%
#   group_by(bet_label)%>%
#   summarize(set1.sd=sd(set1_order.r))

# master_df.test<-master_df%>%
#   filter(bet_label!="Pr6_Amt1")
# 
# master_df.test%>%
#   group_by(ResponseId)%>%
#   summarize(mean_set1=mean(set1_rank))%>%
#   summarize(sd(mean_set1))


set1.Rank.RE <- clmm(
  as.factor(set1_rank.r) ~ set1_prob.z + set1_amt.z+
    (set1_prob.z + set1_amt.z | ResponseId) + (1|bet_label),
  data = master_df
)


set1.distance.RE <- lmer(
  set1_drag_distance.r ~ set1_prob.z + set1_amt.z + (1|bet_label)+as.factor(set1_initial_order)+
    (set1_prob.z + set1_amt.z | ResponseId),
  data = master_df
)
set1.order.RE <- lmer(
  set1_order.r ~ set1_prob.z + set1_amt.z + (1|bet_label)+as.factor(set1_initial_order)+
    (set1_prob.z + set1_amt.z | ResponseId),
  data = master_df
)
set1.count.RE <- glmer(
  set1_touch_count_binary ~ set1_prob.z + set1_amt.z + (1|bet_label)+as.factor(set1_initial_order)+
    (set1_prob.z + set1_amt.z | ResponseId),
  data = master_df
)

tab_model(
  set1.Rank.RE,
  set1.distance.RE,
  set1.order.RE,
  set1.count.RE,
  dv.labels  = c("DV: Final Rank", "Distance", "Order", "Count"),
  pred.labels = c("Prob (z)", "Amt (z)", "(Intercept)"), 
  show.re.var = TRUE,
  show.icc    = TRUE,
  digits = 2,
  drop = c("as.factor\\(set1_initial_order\\)", "\\|"),
  transform=NULL
)
  DV: Final Rank Distance Order Count
Predictors Log-Odds CI p Estimates CI p Estimates CI p Estimates CI p
Prob (z) 1.80 1.00 – 2.60 <0.001 0.46 0.21 – 0.72 <0.001 0.36 0.20 – 0.52 <0.001 0.04 -0.03 – 0.11 0.310
Amt (z) 0.63 -0.22 – 1.47 0.146 0.15 -0.08 – 0.38 0.198 -0.01 -0.16 – 0.14 0.907 -0.03 -0.10 – 0.04 0.348
(Intercept) -0.16 -0.48 – 0.16 0.319 -4.04 -4.22 – -3.85 <0.001 0.23 0.15 – 0.31 <0.001
Random Effects
σ2 3.29 0.65 1.13 0.15
τ00 0.05 ResponseId 0.30 ResponseId 0.00 ResponseId 0.01 ResponseId
2.19 bet_label 0.12 bet_label 0.01 bet_label 0.00 bet_label
τ11 6.68 ResponseId.set1_prob.z 0.35 ResponseId.set1_prob.z 0.35 ResponseId.set1_prob.z 0.01 ResponseId.set1_prob.z
5.15 ResponseId.set1_amt.z 0.24 ResponseId.set1_amt.z 0.18 ResponseId.set1_amt.z 0.02 ResponseId.set1_amt.z
ρ01 0.56 ResponseId.set1_prob.z 0.01 ResponseId.set1_prob.z -1.00 ResponseId.set1_prob.z -0.69 ResponseId.set1_prob.z
0.65 ResponseId.set1_amt.z 0.25 ResponseId.set1_amt.z 0.19 ResponseId.set1_amt.z -0.32 ResponseId.set1_amt.z
ICC 0.83 0.61 0.35 0.25
N 182 ResponseId 6 bet_label 6 bet_label 6 bet_label
6 bet_label 182 ResponseId 182 ResponseId 182 ResponseId
Observations 1092 1092 1092 1092
Marginal R2 / Conditional R2 0.094 / 0.847 0.366 / 0.756 0.159 / 0.451 0.184 / 0.391
# test$set1_amt.z
# test<-as.data.frame(ranef(set1.count.RE)$ResponseId)
# VarCorr(set1.count.RE)$ResponseId["set1_amt.z","set1_amt.z"]

Visualize Random Intercepts/Slopes

  • x and y scales DIFFER across plots
get_participant_effects_lmer <- function(model, id_var = "ResponseId") {
  # Random effects matrix -> data.frame, keep names like (Intercept)
  re_mat <- ranef(model)[[id_var]]
  re_df  <- data.frame(re_mat, check.names = FALSE) # check.name =F keep all the variable names intact
  re_df[[id_var]] <- rownames(re_mat)

  # Fixed effects (named vector)
  fe <- fixef(model)

  # Union of all coefficient names (fixed + random)
  all_terms <- union(names(fe), colnames(re_mat))

  # Ensure every term exists; add fixed beta (default 0) + random deviation (default 0)
  for (nm in all_terms) {
    if (!nm %in% names(re_df)) re_df[[nm]] <- 0     
    if (nm %in% names(fe))     re_df[[nm]] <- re_df[[nm]] + unname(fe[[nm]]) # ensure that all variables are there
  }

  re_df
}
get_participant_effects_clmm <- function(model, id_var = "ResponseId") {
  # Random effects
  re_list <- ranef(model)
  re <- as.data.frame(re_list[[id_var]])
  re[[id_var]] <- rownames(re_list[[id_var]])
  # Fixed effects: only betas, no thresholds
  fe_beta <- model$beta

  # Ensure each fixed-effect name exists in RE; fill missing with 0
  for (nm in names(fe_beta)) {
    if (!nm %in% names(re)) re[[nm]] <- 0
    re[[nm]] <- re[[nm]] + unname(fe_beta[[nm]])
  }

  re
}

re_rank     <- get_participant_effects_clmm(set1.Rank.RE)
re_distance <- get_participant_effects_lmer(set1.distance.RE)
re_order    <- get_participant_effects_lmer(set1.order.RE)
re_count    <- get_participant_effects_lmer(set1.count.RE)

# Add labels
re_rank$model     <- "DV: Final Rank"
re_distance$model <- "DV: Distance"
re_order$model    <- "DV: Order"
re_count$model    <- "DV: Count"

# Combine
re_all <- bind_rows(re_rank, re_distance, re_order, re_count) %>%
  dplyr::select(-dplyr::matches("as\\.factor\\(set1_initial_order\\)"))

re_all_long <- re_all %>%
  pivot_longer(
    cols = -c(ResponseId, model),
    names_to = "parameter",
    values_to = "coefficient"
  ) %>%
  mutate(
    parameter = recode(parameter,
      `(Intercept)`   = "(Intercept)",
      `set1_prob.z`   = "Prob (z)",
      `set1_amt.z`    = "Amt (z)"
    ),
    parameter = factor(parameter,
      levels = c("(Intercept)", "Prob (z)", "Amt (z)")
    ),
    model = factor(model,
      levels = c("DV: Final Rank", "DV: Distance", "DV: Order", "DV: Count"))
  )

param_colors <- c(
  "(Intercept)" = "grey60",
  "Prob (z)"    = "steelblue",
  "Amt (z)"     = "steelblue"
)

ggplot(re_all_long, aes(x = coefficient, fill = parameter)) +
  geom_histogram(bins = 30, alpha = 0.85, color = "white") +
  facet_grid(parameter ~ model, scales = "free") +
  scale_fill_manual(values = param_colors, guide = "none") +
  theme_minimal() +
  labs(
    title = "Participant-specific coefficients across models",
    x = "Coefficient value",
    y = "Count"
  ) +
  theme(
    panel.border = element_rect(color = "black", fill = NA, linewidth = 0.5),
    strip.text = element_text(face = "bold", size = 12)
  )

Factor Score and Correlation Matrix

  • Factor Score
    • the three DROPT DVs could be indicators of a common attention factor, so we try a FA with 2 factors (although parallel analysis suggested 3, and outputs below suggest a poor fit with n.factor=2). Both oblimin- and Varimax (orthogonal)-rotated loadings indicated that all three prob and amt participant-specific slopes loaded on the same factor with loadings ≥ .3.
re_wide <- re_all %>%
  pivot_longer(
    cols = -c(ResponseId, model),
    names_to = "parameter",
    values_to = "coefficient"
  ) %>%
  mutate(                           # keep your simple labels if you want
    parameter = recode(parameter,
      `(Intercept)` = "intercept",
      `set1_prob.z` = "prob_z",
      `set1_amt.z`  = "amt_z"
    )
  ) %>%
  unite(var, parameter, model, sep = "__") %>%   # e.g., prob_z__DV: Distance
  pivot_wider(names_from = var, values_from = coefficient)

slope_mat <- re_wide %>%
  select(matches("^(prob_z|amt_z)__DV: (Distance|Order|Count)$")) %>%
  mutate(across(
    everything(),
    ~ as.numeric(scale(.)),
    .names = "{.col}.z"
  ))
# colSums(is.na(slope_mat)) # none, good
slope_z <- slope_mat %>% select(ends_with(".z"))
slope_z <- slope_z %>%
  select(
    matches("^amt"),   # all amt columns first
    matches("^prob")   # then all prob columns
  )

# fa.parallel(slope_z, fa = "fa", fm = "pa", n.iter = 100, show.legend = TRUE, main = "Parallel Analysis") # suggests 3 factors

slope_z <- slope_z %>%
  rename_with(~ .x %>%
    str_replace("_z__DV: ", "_") %>%   # clean out weird middle chunk
    str_to_lower() %>%                 # lower case
    str_replace_all(" ", "_") %>%      # replace spaces
    str_remove("\\.z$") %>%            # drop trailing .z
    str_replace("^amt_(.*)",  "slope_\\1.amt") %>%
    str_replace("^prob_(.*)", "slope_\\1.prob")
  )
fa_oblim <- psych::fa(slope_z, nfactors = 2, rotate = "oblimin",
                      fm = "pa", scores = "regression")
cat("## **Oblimin Rotation:**\n")
## ## **Oblimin Rotation:**
summary(fa_oblim)
## 
## Factor analysis with Call: psych::fa(r = slope_z, nfactors = 2, rotate = "oblimin", scores = "regression", 
##     fm = "pa")
## 
## Test of the hypothesis that 2 factors are sufficient.
## The degrees of freedom for the model is 4  and the objective function was  0.21 
## The number of observations was  182  with Chi Square =  36.85  with prob <  1.9e-07 
## 
## The root mean square of the residuals (RMSA) is  0.05 
## The df corrected root mean square of the residuals is  0.1 
## 
## Tucker Lewis Index of factoring reliability =  0.728
## RMSEA index =  0.212  and the 90 % confidence intervals are  0.153 0.278
## BIC =  16.04
##  With factor correlations of 
##       PA2   PA1
## PA2  1.00 -0.55
## PA1 -0.55  1.00
print(fa_oblim$loadings, cutoff = 0.3)
## 
## Loadings:
##                     PA2    PA1   
## slope_distance.amt  -0.424  0.326
## slope_order.amt             0.744
## slope_count.amt             0.871
## slope_distance.prob  0.713       
## slope_order.prob     0.611 -0.311
## slope_count.prob     0.749       
## 
##                  PA2   PA1
## SS loadings    1.655 1.550
## Proportion Var 0.276 0.258
## Cumulative Var 0.276 0.534
fa_varim <- psych::fa(slope_z, nfactors = 2, rotate = "varimax",
                      fm = "pa", scores = "regression")
cat("## **Varimax (orthogonal) Rotation:**\n")
## ## **Varimax (orthogonal) Rotation:**
summary(fa_varim)
## 
## Factor analysis with Call: psych::fa(r = slope_z, nfactors = 2, rotate = "varimax", scores = "regression", 
##     fm = "pa")
## 
## Test of the hypothesis that 2 factors are sufficient.
## The degrees of freedom for the model is 4  and the objective function was  0.21 
## The number of observations was  182  with Chi Square =  36.85  with prob <  1.9e-07 
## 
## The root mean square of the residuals (RMSA) is  0.05 
## The df corrected root mean square of the residuals is  0.1 
## 
## Tucker Lewis Index of factoring reliability =  0.728
## RMSEA index =  0.212  and the 90 % confidence intervals are  0.153 0.278
## BIC =  16.04
print(fa_varim$loadings, cutoff = 0.3)
## 
## Loadings:
##                     PA1    PA2   
## slope_distance.amt   0.458 -0.477
## slope_order.amt      0.777 -0.331
## slope_count.amt      0.823       
## slope_distance.prob -0.305  0.690
## slope_order.prob    -0.505  0.650
## slope_count.prob            0.666
## 
##                  PA1   PA2
## SS loadings    1.845 1.698
## Proportion Var 0.307 0.283
## Cumulative Var 0.307 0.591
### Fit CFA
library(lavaan)

amt_vars  <- names(slope_z)[grepl("\\.amt$",  names(slope_z))]
prob_vars <- names(slope_z)[grepl("\\.prob$", names(slope_z))]

cfa_model <- paste0(
  "Prob =~ ", paste(prob_vars, collapse = " + "), "\n",
  "Amt  =~ ", paste(amt_vars,  collapse = " + ")
)

fit <- cfa(
  cfa_model,
  data      = slope_z,
  std.lv    = TRUE,      # factors on SD=1 scale (helps interpret loadings)
  estimator = "MLR",
  missing   = "fiml"
)
summary(fit)
## lavaan 0.6-19 ended normally after 17 iterations
## 
##   Estimator                                         ML
##   Optimization method                           NLMINB
##   Number of model parameters                        19
## 
##   Number of observations                           182
##   Number of missing patterns                         1
## 
## Model Test User Model:
##                                               Standard      Scaled
##   Test Statistic                                45.213      36.404
##   Degrees of freedom                                 8           8
##   P-value (Chi-square)                           0.000       0.000
##   Scaling correction factor                                  1.242
##     Yuan-Bentler correction (Mplus variant)                       
## 
## Parameter Estimates:
## 
##   Standard errors                             Sandwich
##   Information bread                           Observed
##   Observed information based on                Hessian
## 
## Latent Variables:
##                    Estimate  Std.Err  z-value  P(>|z|)
##   Prob =~                                             
##     slop_dstnc.prb    0.727    0.061   11.975    0.000
##     slope_ordr.prb    0.939    0.052   17.935    0.000
##     slope_cont.prb    0.486    0.070    6.915    0.000
##   Amt =~                                              
##     slope_dstnc.mt    0.632    0.079    7.960    0.000
##     slope_order.mt    0.887    0.059   15.140    0.000
##     slope_count.mt    0.741    0.058   12.849    0.000
## 
## Covariances:
##                    Estimate  Std.Err  z-value  P(>|z|)
##   Prob ~~                                             
##     Amt              -0.736    0.058  -12.624    0.000
## 
## Intercepts:
##                    Estimate  Std.Err  z-value  P(>|z|)
##    .slop_dstnc.prb   -0.000    0.074   -0.000    1.000
##    .slope_ordr.prb    0.000    0.074    0.000    1.000
##    .slope_cont.prb    0.000    0.074    0.000    1.000
##    .slope_dstnc.mt   -0.000    0.074   -0.000    1.000
##    .slope_order.mt   -0.000    0.074   -0.000    1.000
##    .slope_count.mt    0.000    0.074    0.000    1.000
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)
##    .slop_dstnc.prb    0.467    0.080    5.848    0.000
##    .slope_ordr.prb    0.113    0.069    1.630    0.103
##    .slope_cont.prb    0.758    0.088    8.643    0.000
##    .slope_dstnc.mt    0.595    0.084    7.065    0.000
##    .slope_order.mt    0.208    0.063    3.285    0.001
##    .slope_count.mt    0.445    0.055    8.157    0.000
##     Prob              1.000                           
##     Amt               1.000
fscores <- lavPredict(fit, method = "regression")
re_wide <- re_wide %>%
  mutate(
    FA_prob.DROPT = as.numeric(fscores[, "Prob"]),
    FA_amt.DROPT  = as.numeric(fscores[, "Amt"])
  )
re_wide <- re_wide %>%
  select(
    matches("^amt"),   # all amt columns first
    matches("^prob"),  # then all prob columns
    everything()       # then everything else
  )
  • Correlation Matrix

    • Driven by one attribute first then the other an this pattern hold for (when subsetting) for those with in between profiles
re_wide.renamed <- re_wide %>%
  rename_with(~ .x %>%
    # strip "_z__DV: " prefix
    gsub("_z__DV: ", "_", .) %>%
    # lower-case
    tolower() %>%
    # replace spaces with underscores
    gsub(" ", "_", .) %>%
    # add slope_ prefix
    sub("^amt", "slope_amt", .) %>%
    sub("^prob", "slope_prob", .) %>%
    # handle FA columns
    sub("^fa_prob.*", "DROPT_FA_slope_prob", .) %>%
    sub("^fa_amt.*",  "DROPT_FA_slope_amt", .)
  )%>%
  rename_with(~ .x %>%
    # handle slope_* variables: move amt/prob to suffix
    sub("^slope_amt_(.*)$", "slope_\\1.amt", .) %>%
    sub("^slope_prob_(.*)$", "slope_\\1.prob", .) %>%
    # handle factor score vars
    sub("^DROPT_FA_slope_amt$",  "DROPT_FA.amt", .) %>%
    sub("^DROPT_FA_slope_prob$", "DROPT_FA.prob", .)
  )
ggpairs(
  re_wide.renamed %>%
    select(-starts_with("intercept"), -responseid)
)

Participants with mixed preferences (N=67):

  • This is an attempt to say that the pattern holds also for the subset of those with mixed preferences: participants leaning toward probability tend to rely more on probability when ranking, while those leaning toward amount tend to rely more on amount.
    • this analysis could definitely be improved (e.g., using interaction models - tho a challenge is that correlations can’t be calculated at the individual level)
    • again, we may try say something about participants first using one attribute then the other (if we really want to hone in on this, we may need a study 3 that has more lotteries/attributes)
ggpairs(
  re_wide.renamed %>% 
    filter(responseid%in%mixed_participants)%>%
    select(-starts_with("intercept"), -responseid)
)

Comparison with Task 2

  • If the drag distance slope or DROPT slope FA score captures an individual’s tendency to attend more to [addm], or think first about [QT], prob vs. amt — and thereby improves prediction accuracy — then it should be a stable trait, showing high correlation across tasks. To see if this is the case, we repeat the same exercise with Task 1 and also examine the correlation between the DROPT slopes across tasks.
explore.Set2<-master_df %>%
  mutate(
    amount_rank = case_when(
      bet_label == "Pr6_Amt1" ~ 1,
      bet_label == "Pr5_Amt2" ~ 2,
      bet_label == "Pr4_Amt3" ~ 3,
      bet_label == "Pr3_Amt4" ~ 4,
      bet_label == "Pr2_Amt5" ~ 5,
      bet_label == "Pr1_Amt6" ~ 6
    )
  )%>%
  group_by(ResponseId) %>%
  summarise(
    Tau.amt_set2 = cor(set2_rank, amount_rank, method = "kendall"),
    Rho.amt_set2 = cor(set2_rank, amount_rank, method = "spearman"),
    .groups = "drop"
  )
  • A quick and dirty descriptive of the 2ND Task
    • participants have heterogenous preferences
Ranking_per_Step_Pr1_Amt6 <- Distance_Prefer2
Ranking_per_Step_Pr1_Amt6 <- Ranking_per_Step_Pr1_Amt6 %>%
  left_join(explore.Set2 %>% select(ResponseId, Tau.amt_set2), by = "ResponseId")

## Create new R columns
# Initialize R columns
Ranking_per_Step_Pr1_Amt6 <- Ranking_per_Step_Pr1_Amt6 %>% 
  mutate(Pr1_Amt6 = NA, Pr2_Amt5 = NA, Pr3_Amt4 = NA, Pr4_Amt3 = NA, Pr5_Amt2 = NA, Pr6_Amt1 = NA)

# Function to fill R columns
fill_R_columns <- function(order_string, target_value) {
  order_string <- gsub("[[:space:][:cntrl:]]", "", order_string)  # Clean spaces and newlines
  order_numbers <- strsplit(order_string, ",")[[1]]
  position_of_value <- which(order_numbers == as.character(target_value))
  
  if (length(position_of_value) == 0) {
    return(NA)
  } else if (position_of_value == 1) {
    return(1)
  } else {
    return(position_of_value)
  }
}

Ranking_per_Step_Pr1_Amt6 <- Ranking_per_Step_Pr1_Amt6 %>% 
  mutate(
    Pr6_Amt1 = sapply(order, fill_R_columns, target_value = 49),
    Pr5_Amt2 = sapply(order, fill_R_columns, target_value = 50),
    Pr4_Amt3 = sapply(order, fill_R_columns, target_value = 64),
    Pr3_Amt4 = sapply(order, fill_R_columns, target_value = 65),
    Pr2_Amt5 = sapply(order, fill_R_columns, target_value = 67),
    Pr1_Amt6 = sapply(order, fill_R_columns, target_value = 68),
  )


# Ensure at least 9 rows per ResponseId
Ranking_per_Step_Pr1_Amt6 <- Ranking_per_Step_Pr1_Amt6 %>% group_by(ResponseId) %>% 
  group_modify(~ {
    while (nrow(.x) < 10) {
      max_step_row <- .x %>% filter(step == max(step))
      .x <- bind_rows(.x, max_step_row %>% mutate(step = max(step) + 1))
    }
    return(.x)
  }) %>% ungroup()



## Summary DFs
summary_ranks_step_split_by_tau <- Ranking_per_Step_Pr1_Amt6 %>%
  filter(step < 10) %>%
  group_by(step, Tau.amt_set2) %>%
  summarise(
    mean_Pr6_Amt1 = mean(Pr6_Amt1, na.rm = TRUE),
    se_Pr6_Amt1 = sd(Pr6_Amt1, na.rm = TRUE) / sqrt(n()),
    
    mean_Pr5_Amt2 = mean(Pr5_Amt2, na.rm = TRUE),
    se_Pr5_Amt2 = sd(Pr5_Amt2, na.rm = TRUE) / sqrt(n()),
    
    mean_Pr4_Amt3 = mean(Pr4_Amt3, na.rm = TRUE),
    se_Pr4_Amt3 = sd(Pr4_Amt3, na.rm = TRUE) / sqrt(n()),
    
    mean_Pr3_Amt4 = mean(Pr3_Amt4, na.rm = TRUE),
    se_Pr3_Amt4 = sd(Pr3_Amt4, na.rm = TRUE) / sqrt(n()),
    
    mean_Pr2_Amt5 = mean(Pr2_Amt5, na.rm = TRUE),
    se_Pr2_Amt5 = sd(Pr2_Amt5, na.rm = TRUE) / sqrt(n()),
    
    mean_Pr1_Amt6 = mean(Pr1_Amt6, na.rm = TRUE),
    se_Pr1_Amt6 = sd(Pr1_Amt6, na.rm = TRUE) / sqrt(n()),
    
    .groups = "drop"
  )

## Long format
summary_ranks_step_split_by_tau_long <- summary_ranks_step_split_by_tau %>%
  pivot_longer(
    cols = -c(step, Tau.amt_set2),
    names_to = c(".value", "label"),
    names_pattern = "^(mean|se)_(.*)$"
  ) %>%
  rename(
    mean_rank = mean,
    mean_se = se
  )

## Add N and reorder tau levels
tau_counts <- Ranking_per_Step_Pr1_Amt6 %>%
  distinct(ResponseId, Tau.amt_set2) %>%
  count(Tau.amt_set2, name = "N")

summary_ranks_step_split_by_tau_long <- summary_ranks_step_split_by_tau_long %>%
  left_join(tau_counts, by = "Tau.amt_set2") %>%
  mutate(
    Tau.amt_set1 = factor(Tau.amt_set2, levels = sort(unique(Tau.amt_set2), decreasing = TRUE)),
    facet_label = paste0("τ = ", Tau.amt_set2, " (N = ", N, ")"),
    facet_label = factor(facet_label, levels = rev(unique(facet_label)))  # ✅ use rev()
  )

## Define shapes (you can define `bet_colors` separately)
bet_shapes <- c(
  "Pr6_Amt1" = 21, "Pr5_Amt2" = 22, "Pr4_Amt3" = 23,
  "Pr3_Amt4" = 24, "Pr2_Amt5" = 25, "Pr1_Amt6" = 11
)

## Plot

ggplot(summary_ranks_step_split_by_tau_long%>%filter(!is.na(Tau.amt_set2)),
       aes(x = step, y = mean_rank)) +
  geom_line(aes(color = label), size = 1) +
  geom_point(aes(fill = label, shape = label), size = 3) +
  geom_errorbar(aes(ymin = mean_rank - mean_se, ymax = mean_rank + mean_se),
                width = 0.2, color = "black") +
  facet_wrap(~ facet_label, labeller = label_wrap_gen(width = 20)) +
  scale_color_manual(values = bet_colors) +
  scale_fill_manual(values = bet_colors) +
  scale_shape_manual(values = bet_shapes) +
  scale_x_continuous(breaks = 0:9) +
  scale_y_continuous(limits = c(1, 6), breaks = 1:6) +
  labs(
    x = "Ranking Step",
    y = "Mean Rank",
    title = "Mean Rank Trajectories by Kendall's Tau Group (N per Panel)",
    color = "Label", shape = "Label", fill = "Label"
  ) +
  theme_minimal() +
  theme(
    legend.position = "bottom",
    plot.title = element_text(hjust = 0.5)
  )+
  scale_y_reverse(breaks = 1:6)

mixed_participants.set2<-explore.Set2%>%
  filter(Tau.amt_set2<0.86 & Tau.amt_set2>-0.86)%>%
  pull(ResponseId)
  • Correlation between Set 1 and Set 2 Tau (r=0.72)
master_df %>%
  mutate(
    amount_rank = case_when(
      bet_label == "Pr6_Amt1" ~ 1,
      bet_label == "Pr5_Amt2" ~ 2,
      bet_label == "Pr4_Amt3" ~ 3,
      bet_label == "Pr3_Amt4" ~ 4,
      bet_label == "Pr2_Amt5" ~ 5,
      bet_label == "Pr1_Amt6" ~ 6
    )
  ) %>%
  group_by(ResponseId) %>%
  summarise(
    Tau.amt_set2 = cor(set2_rank, amount_rank, method = "kendall"),
    Tau.amt_set1 = cor(set1_rank, amount_rank, method = "kendall"),
    .groups = "drop"
  ) %>%
  summarise(correlation = cor(Tau.amt_set1, Tau.amt_set2, use = "complete.obs"))
master_df<-master_df%>% # 194 when full
  mutate(set2_prob.z=scale(set2_prob),
         set2_amt.z=scale(set2_amt),
         set2_prob.z=scale(set2_prob),
         set2_amt.z=scale(set2_amt),
         set2_order.r=-set2_order,
         )
master_df <- master_df %>% #182
  group_by(ResponseId) %>%
  filter(!any(is.na(set1_drag_distance.r|set2_drag_distance.r))) %>% 
  ungroup()
 
# need to look into the dataset and removes any NAs or odd values or ites with missing stuff

# master_df%>%
#   group_by(bet_label)%>%
#   summarize(set1.sd=sd(set1_order.r))

# master_df.test<-master_df%>%
#   filter(bet_label!="Pr6_Amt1")
# 
# master_df.test%>%
#   group_by(ResponseId)%>%
#   summarize(mean_set1=mean(set1_rank))%>%
#   summarize(sd(mean_set1))
master_df$set2_rank.r <- 7-master_df$set2_rank

set2.Rank.RE <- clmm(
  as.factor(set2_rank.r) ~ set2_prob.z + set2_amt.z+
    (set2_prob.z + set2_amt.z | ResponseId) + (1|bet_label),
  data = master_df
)
set2.distance.RE <- lmer(
  set2_drag_distance.r ~ set2_prob.z + set2_amt.z + (1|bet_label)+as.factor(set2_initial_order)+
    (set2_prob.z + set2_amt.z | ResponseId),
  data = master_df
)
set2.order.RE <- lmer(
  set2_order.r ~ set2_prob.z + set2_amt.z + (1|bet_label)+as.factor(set2_initial_order)+
    (set2_prob.z + set2_amt.z | ResponseId),
  data = master_df
)
set2.count.RE <- glmer(
  set2_touch_count_binary ~ set2_prob.z + set2_amt.z + (1|bet_label)+as.factor(set2_initial_order)+
    (set2_prob.z + set2_amt.z | ResponseId),
  data = master_df
)

tab_model(
  set2.Rank.RE,
  set2.distance.RE,
  set2.order.RE,
  set2.count.RE,
  dv.labels  = c("DV: Final Rank", "Distance", "Order", "Count"),
  pred.labels = c("Prob (z)", "Amt (z)", "(Intercept)"), 
  show.re.var = TRUE,
  show.icc    = TRUE,
  digits = 2,
  drop = c("as.factor\\(set2_initial_order\\)", "\\|"),
  transform=NULL
)
  DV: Final Rank Distance Order Count
Predictors Log-Odds CI p Estimates CI p Estimates CI p Estimates CI p
Prob (z) 1.81 1.05 – 2.57 <0.001 0.30 0.19 – 0.40 <0.001 0.48 0.36 – 0.60 <0.001 0.04 -0.01 – 0.08 0.085
Amt (z) 0.74 -0.09 – 1.57 0.080 -0.14 -0.24 – -0.03 0.015 0.15 0.04 – 0.25 0.007 -0.03 -0.08 – 0.01 0.142
(Intercept) -0.02 -0.18 – 0.14 0.816 -4.15 -4.31 – -4.00 <0.001 0.14 0.08 – 0.20 <0.001
Random Effects
σ2 3.29 0.66 0.93 0.13
τ00 0.03 ResponseId 0.33 ResponseId 0.01 ResponseId 0.02 ResponseId
1.12 bet_label 0.00 bet_label 0.00 bet_label 0.00 bet_label
τ11 4.98 ResponseId.set2_prob.z 0.25 ResponseId.set2_prob.z 0.36 ResponseId.set2_prob.z 0.02 ResponseId.set2_prob.z
5.00 ResponseId.set2_amt.z 0.28 ResponseId.set2_amt.z 0.19 ResponseId.set2_amt.z 0.03 ResponseId.set2_amt.z
ρ01 -0.19 ResponseId.set2_prob.z 0.07 ResponseId.set2_prob.z -0.74 ResponseId.set2_prob.z -0.43 ResponseId.set2_prob.z
1.00 ResponseId.set2_amt.z 0.25 ResponseId.set2_amt.z -0.17 ResponseId.set2_amt.z -0.23 ResponseId.set2_amt.z
ICC 0.79 0.57 0.40 0.28
N 182 ResponseId 6 bet_label 6 bet_label 6 bet_label
6 bet_label 182 ResponseId 182 ResponseId 182 ResponseId
Observations 1092 1092 1092 1092
Marginal R2 / Conditional R2 0.112 / 0.812 0.389 / 0.735 0.209 / 0.526 0.232 / 0.450
# test$set2_amt.z
# test<-as.data.frame(ranef(set2.count.RE)$ResponseId)
# VarCorr(set2.count.RE)$ResponseId["set2_amt.z","set2_amt.z"]
Visualize Random Intercepts/Slopes
  • We don’t see the nice bimodal distribution with intercepts extracted from DROPT data
    • x and y scales DIFFER across plots
re_rank.set2     <- get_participant_effects_clmm(set2.Rank.RE)
re_distance.set2 <- get_participant_effects_lmer(set2.distance.RE)
re_order.set2   <- get_participant_effects_lmer(set2.order.RE)
re_count.set2    <- get_participant_effects_lmer(set2.count.RE)

# Add labels
re_rank.set2$model     <- "DV: Final Rank"
re_distance.set2$model <- "DV: Distance"
re_order.set2$model    <- "DV: Order"
re_count.set2$model    <- "DV: Count"

# Combine
re_all.set2 <- bind_rows(re_rank.set2, re_distance.set2, re_order.set2, re_count.set2) %>%
  dplyr::select(-dplyr::matches("as\\.factor\\(set2_initial_order\\)"))

re_all_long.set2 <- re_all.set2 %>%
  pivot_longer(
    cols = -c(ResponseId, model),
    names_to = "parameter",
    values_to = "coefficient"
  ) %>%
  mutate(
    parameter = recode(parameter,
      `(Intercept)`   = "(Intercept)",
      `set2_prob.z`   = "Prob (z)",
      `set2_amt.z`    = "Amt (z)"
    ),
    parameter = factor(parameter,
      levels = c("(Intercept)", "Prob (z)", "Amt (z)")
    ),
    model = factor(model,
      levels = c("DV: Final Rank", "DV: Distance", "DV: Order", "DV: Count"))
  )

param_colors <- c(
  "(Intercept)" = "grey60",
  "Prob (z)"    = "steelblue",
  "Amt (z)"     = "steelblue"
)

ggplot(re_all_long.set2, aes(x = coefficient, fill = parameter)) +
  geom_histogram(bins = 30, alpha = 0.85, color = "white") +
  facet_grid(parameter ~ model, scales = "free") +
  scale_fill_manual(values = param_colors, guide = "none") +
  theme_minimal() +
  labs(
    title = "Participant-specific coefficients across models",
    x = "Coefficient value",
    y = "Count"
  ) +
  theme(
    panel.border = element_rect(color = "black", fill = NA, linewidth = 0.5),
    strip.text = element_text(face = "bold", size = 12)
  )

Factor Score and Correlation Matrix
  • Factor Score
    • the three DROPT DVs may be indicators of a common attention construct, so we try a FA with 2 factors (and parallel analysis suggested 2, though we have poor efa fit). Both oblimin- and Varimax (orthogonal)-rotated loadings indicated that all three prob and amt participant-specific slopes loaded on the same factor with loadings ≥ .3.
re_wide.set2 <- re_all.set2 %>%
  pivot_longer(
    cols = -c(ResponseId, model),
    names_to = "parameter",
    values_to = "coefficient"
  ) %>%
  mutate(                           # keep your simple labels if you want
    parameter = recode(parameter,
      `(Intercept)` = "intercept",
      `set2_prob.z` = "prob_z",
      `set2_amt.z`  = "amt_z"
    )
  ) %>%
  unite(var, parameter, model, sep = "__") %>%   # e.g., prob_z__DV: Distance
  pivot_wider(names_from = var, values_from = coefficient)

slope_mat.set2 <- re_wide.set2 %>%
  select(matches("^(prob_z|amt_z)__DV: (Distance|Order|Count)$")) %>%
  mutate(across(
    everything(),
    ~ as.numeric(scale(.)),
    .names = "{.col}.z"
  ))
# colSums(is.na(slope_mat)) # none, good
slope_z.set2 <- slope_mat.set2 %>% select(ends_with(".z"))
slope_z.set2 <- slope_z.set2 %>%
  select(
    matches("^amt"),   # all amt columns first
    matches("^prob")   # then all prob columns
  )
slope_z.set2 <- slope_z.set2 %>%
  rename_with(~ .x %>%
    str_replace("_z__DV: ", "_") %>%   # clean out weird middle chunk
    str_to_lower() %>%                 # lower case
    str_replace_all(" ", "_") %>%      # replace spaces
    str_remove("\\.z$") %>%            # drop trailing .z
    str_replace("^amt_(.*)",  "slope_\\1.amt") %>%
    str_replace("^prob_(.*)", "slope_\\1.prob")
  )


fa_oblim <- psych::fa(slope_z.set2, nfactors = 2, rotate = "oblimin",
                      fm = "pa", scores = "regression")
cat("## **Oblimin Rotation:**\n")
## ## **Oblimin Rotation:**
summary(fa_oblim)
## 
## Factor analysis with Call: psych::fa(r = slope_z.set2, nfactors = 2, rotate = "oblimin", 
##     scores = "regression", fm = "pa")
## 
## Test of the hypothesis that 2 factors are sufficient.
## The degrees of freedom for the model is 4  and the objective function was  0.2 
## The number of observations was  182  with Chi Square =  34.91  with prob <  4.8e-07 
## 
## The root mean square of the residuals (RMSA) is  0.04 
## The df corrected root mean square of the residuals is  0.08 
## 
## Tucker Lewis Index of factoring reliability =  0.764
## RMSEA index =  0.206  and the 90 % confidence intervals are  0.147 0.272
## BIC =  14.1
##  With factor correlations of 
##       PA1   PA2
## PA1  1.00 -0.24
## PA2 -0.24  1.00
print(fa_oblim$loadings, cutoff = 0.3)
## 
## Loadings:
##                     PA1    PA2   
## slope_distance.amt  -0.344  0.366
## slope_order.amt     -0.316  0.706
## slope_count.amt             0.944
## slope_distance.prob  0.848       
## slope_order.prob     0.739       
## slope_count.prob     0.759       
## 
##                  PA1   PA2
## SS loadings    2.070 1.667
## Proportion Var 0.345 0.278
## Cumulative Var 0.345 0.623
fa_varim <- psych::fa(slope_z, nfactors = 2, rotate = "varimax",
                      fm = "pa", scores = "regression")
cat("## **Varimax (orthogonal) Rotation:**\n")
## ## **Varimax (orthogonal) Rotation:**
summary(fa_varim)
## 
## Factor analysis with Call: psych::fa(r = slope_z, nfactors = 2, rotate = "varimax", scores = "regression", 
##     fm = "pa")
## 
## Test of the hypothesis that 2 factors are sufficient.
## The degrees of freedom for the model is 4  and the objective function was  0.21 
## The number of observations was  182  with Chi Square =  36.85  with prob <  1.9e-07 
## 
## The root mean square of the residuals (RMSA) is  0.05 
## The df corrected root mean square of the residuals is  0.1 
## 
## Tucker Lewis Index of factoring reliability =  0.728
## RMSEA index =  0.212  and the 90 % confidence intervals are  0.153 0.278
## BIC =  16.04
print(fa_varim$loadings, cutoff = 0.3)
## 
## Loadings:
##                     PA1    PA2   
## slope_distance.amt   0.458 -0.477
## slope_order.amt      0.777 -0.331
## slope_count.amt      0.823       
## slope_distance.prob -0.305  0.690
## slope_order.prob    -0.505  0.650
## slope_count.prob            0.666
## 
##                  PA1   PA2
## SS loadings    1.845 1.698
## Proportion Var 0.307 0.283
## Cumulative Var 0.307 0.591
amt_vars  <- names(slope_z.set2)[grepl("\\.amt$",  names(slope_z.set2))]
prob_vars <- names(slope_z.set2)[grepl("\\.prob$", names(slope_z.set2))]
cfa_model <- paste0(
  "Prob =~ ", paste(prob_vars, collapse = " + "), "\n",
  "Amt  =~ ", paste(amt_vars,  collapse = " + ")
)

fit.set2 <- cfa(
  cfa_model,
  data      = slope_z.set2,
  std.lv    = TRUE,      # factors on SD=1 scale (helps interpret loadings)
  estimator = "MLR",
  missing   = "fiml"
)
summary(fit.set2)
## lavaan 0.6-19 ended normally after 19 iterations
## 
##   Estimator                                         ML
##   Optimization method                           NLMINB
##   Number of model parameters                        19
## 
##   Number of observations                           182
##   Number of missing patterns                         1
## 
## Model Test User Model:
##                                               Standard      Scaled
##   Test Statistic                                92.617     112.584
##   Degrees of freedom                                 8           8
##   P-value (Chi-square)                           0.000       0.000
##   Scaling correction factor                                  0.823
##     Yuan-Bentler correction (Mplus variant)                       
## 
## Parameter Estimates:
## 
##   Standard errors                             Sandwich
##   Information bread                           Observed
##   Observed information based on                Hessian
## 
## Latent Variables:
##                    Estimate  Std.Err  z-value  P(>|z|)
##   Prob =~                                             
##     slop_dstnc.prb    0.860    0.111    7.769    0.000
##     slope_ordr.prb    0.875    0.105    8.370    0.000
##     slope_cont.prb    0.562    0.120    4.693    0.000
##   Amt =~                                              
##     slope_dstnc.mt    0.502    0.094    5.340    0.000
##     slope_order.mt    0.993    0.067   14.829    0.000
##     slope_count.mt    0.676    0.057   11.866    0.000
## 
## Covariances:
##                    Estimate  Std.Err  z-value  P(>|z|)
##   Prob ~~                                             
##     Amt              -0.539    0.120   -4.497    0.000
## 
## Intercepts:
##                    Estimate  Std.Err  z-value  P(>|z|)
##    .slop_dstnc.prb   -0.000    0.074   -0.000    1.000
##    .slope_ordr.prb   -0.000    0.074   -0.000    1.000
##    .slope_cont.prb    0.000    0.074    0.000    1.000
##    .slope_dstnc.mt    0.000    0.074    0.000    1.000
##    .slope_order.mt    0.000    0.074    0.000    1.000
##    .slope_count.mt    0.000    0.074    0.000    1.000
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)
##    .slop_dstnc.prb    0.256    0.163    1.564    0.118
##    .slope_ordr.prb    0.229    0.157    1.456    0.145
##    .slope_cont.prb    0.679    0.106    6.392    0.000
##    .slope_dstnc.mt    0.743    0.115    6.447    0.000
##    .slope_order.mt    0.009    0.108    0.085    0.932
##    .slope_count.mt    0.537    0.076    7.071    0.000
##     Prob              1.000                           
##     Amt               1.000
fscores.set2 <- lavPredict(fit.set2, method = "regression")
re_wide.set2 <- re_wide.set2 %>%
  mutate(
    FA_prob.DROPT = as.numeric(fscores.set2[, "Prob"]),
    FA_amt.DROPT  = as.numeric(fscores.set2[, "Amt"])
  )
re_wide.set2 <- re_wide.set2 %>%
  select(
    matches("^amt"),   # all amt columns first
    matches("^prob"),  # then all prob columns
    everything()       # then everything else
  )
  • Correlation Matrix
re_wide.renamed.set2 <- re_wide.set2 %>%
  rename_with(~ .x %>%
    # strip "_z__DV: " prefix
    gsub("_z__DV: ", "_", .) %>%
    # lower-case
    tolower() %>%
    # replace spaces with underscores
    gsub(" ", "_", .) %>%
    # add slope_ prefix
    sub("^amt", "slope_amt", .) %>%
    sub("^prob", "slope_prob", .) %>%
    # handle FA columns
    sub("^fa_prob.*", "DROPT_FA_slope_prob", .) %>%
    sub("^fa_amt.*",  "DROPT_FA_slope_amt", .)
  )%>%
  rename_with(~ .x %>%
    # handle slope_* variables: move amt/prob to suffix
    sub("^slope_amt_(.*)$", "slope_\\1.amt", .) %>%
    sub("^slope_prob_(.*)$", "slope_\\1.prob", .) %>%
    # handle factor score vars
    sub("^DROPT_FA_slope_amt$",  "DROPT_FA.amt", .) %>%
    sub("^DROPT_FA_slope_prob$", "DROPT_FA.prob", .)
  )
ggpairs(
  re_wide.renamed.set2 %>%
    select(-starts_with("intercept"), -responseid)
)

Participants with mixed preferences (N=73):

ggpairs(
  re_wide.renamed.set2 %>% 
    filter(responseid%in%mixed_participants.set2)%>%
    select(-starts_with("intercept"), -responseid)
)

Correlation between Task1 and Task2 (across all participants)
re_wide.renamed.combined <- re_wide.renamed %>%
  left_join(
    re_wide.renamed.set2 %>% 
      rename_with(~ paste0(.x, ".Task2"), -responseid),
    by = "responseid"
  )

print_cor <- function(df, x, y, label, method = "pearson") {
  r <- (cor(df[[x]], df[[y]], use = "complete.obs", method = method))
  cat(sprintf("\nKey correlation (%s): %s vs %s = %.3f\n",
              label, x, y, r))
}

print_cor(re_wide.renamed.combined,
          "slope_distance.amt", "slope_distance.amt.Task2",
          "distance slope (amt) across Tasks")
## 
## Key correlation (distance slope (amt) across Tasks): slope_distance.amt vs slope_distance.amt.Task2 = 0.413
print_cor(re_wide.renamed.combined,
          "slope_distance.prob", "slope_distance.prob.Task2",
          "distance slope (prob) across Tasks")
## 
## Key correlation (distance slope (prob) across Tasks): slope_distance.prob vs slope_distance.prob.Task2 = 0.595
print_cor(re_wide.renamed.combined,
          "DROPT_FA.amt", "DROPT_FA.amt.Task2",
          "FA (amt) across Tasks")
## 
## Key correlation (FA (amt) across Tasks): DROPT_FA.amt vs DROPT_FA.amt.Task2 = 0.463
print_cor(re_wide.renamed.combined,
          "DROPT_FA.prob", "DROPT_FA.prob.Task2",
          "FA (prob) across Tasks")
## 
## Key correlation (FA (prob) across Tasks): DROPT_FA.prob vs DROPT_FA.prob.Task2 = 0.641
ggpairs(
  re_wide.renamed.combined %>% 
    select(
      slope_distance.amt,        slope_distance.amt.Task2,
      DROPT_FA.amt,              DROPT_FA.amt.Task2,
      slope_distance.prob,       slope_distance.prob.Task2,
      DROPT_FA.prob,             DROPT_FA.prob.Task2
    )
) +
  theme(
    axis.text.x = element_text(size = 12),
    axis.text.y = element_text(size = 12),
    strip.text  = element_text(size = 14)
  )

Predict 2ND Lotter Ranking

First pass: do DROPT Task-1 signals help explain Task-2 choices?

  • I start by modeling Task-2 ranks from Task2_prob.z and Task2_amt.z (M1), then add interactions between Task-1 final rankings DROPT slopes (M2) and Task 2 attributes (M3-M4). The goal is to test whether Task-1 DROPT measures provide predictive value above and beyond the prob and amt slopes/weights.
  • I test them as interactions with Task-2 attributes (seems to be how this is done in discrete-choice/quant work). One could do this via precomputed products - though results remain consistent
master_df.predict<-master_df%>%
  left_join(re_wide.renamed%>%select(ResponseId=responseid,slope_final_rank.amt,slope_final_rank.prob,slope_distance.amt,slope_distance.prob,DROPT_FA.prob,DROPT_FA.amt),by="ResponseId")%>%
  mutate(slope_final_rank.amt_amt.z=set2_amt.z*slope_final_rank.amt,
         slope_final_rank.prob_prob.z=set2_prob.z*slope_final_rank.prob,
         slope_distance.amt_amt.z=set2_amt.z*slope_distance.amt,
         slope_distance.prob_prob.z=set2_prob.z*slope_distance.prob,
         DROPT_FA.amt_amt.z=set2_amt.z*DROPT_FA.amt,
         DROPT_FA.prob_prob.z=set2_prob.z*DROPT_FA.prob)
m1<-clmm(
  as.factor(set2_rank.r) ~ set2_prob.z + set2_amt.z+
    (set2_prob.z + set2_amt.z | ResponseId) + (1|bet_label),
  data = master_df.predict
)
m2<-clmm(
  as.factor(set2_rank.r) ~ set2_prob.z + set2_amt.z+set2_amt.z*slope_final_rank.amt+set2_prob.z*slope_final_rank.prob+
    (set2_prob.z + set2_amt.z | ResponseId) + (1|bet_label),
  data = master_df.predict
)
m4<-clmm(
  as.factor(set2_rank.r) ~ set2_prob.z + set2_amt.z+DROPT_FA.amt*set2_amt.z+DROPT_FA.prob*set2_prob.z+set2_amt.z*slope_final_rank.amt+set2_prob.z*slope_final_rank.prob+
    (set2_prob.z + set2_amt.z | ResponseId) + (1|bet_label),
  data = master_df.predict
)
m3<-clmm(
  as.factor(set2_rank.r) ~ set2_prob.z + set2_amt.z+set2_amt.z*slope_final_rank.amt+set2_prob.z*slope_final_rank.prob+slope_distance.prob*set2_prob.z+slope_distance.amt * set2_amt.z+
    (set2_prob.z + set2_amt.z | ResponseId) + (1|bet_label),
  data = master_df.predict
)

tab_model(m1,m2,m3,m4,drop = c("as.factor\\(set1_initial_order\\)", "\\|"),
  transform=NULL, dv.labels  = c("M1","M2","M3","M4"),pred.labels = c("Prob (z). Set2","Amt (z). Set2", "Amt Weight Set 1", "Prob Weight Set 1", "Amt Weight Set 1 x Amt (z). Set2", "Prob Weight Set 1 x Prob (z). Set2","Prob Distance Weight Set 1","Amt Distance Weight Set 1", "Prob Distance Weight Set 1 x Prob (z). Set2","Amt Distance Weight Set 1 x Amt (z). Set2", "Amt FA Set 1", "Prob FA Set 1", "Amt FA Set 1 x Amt (z). Set2", "Prob FA Set 1 x Prob (z). Set2")
)
  M1 M2 M3 M4
Predictors Log-Odds CI p Log-Odds CI p Log-Odds CI p Log-Odds CI p
Prob (z). Set2 1.81 1.05 – 2.57 <0.001 0.50 -0.20 – 1.20 0.161 0.34 -0.38 – 1.05 0.355 0.86 0.08 – 1.64 0.030
Amt (z). Set2 0.74 -0.09 – 1.57 0.080 0.14 -0.71 – 0.98 0.753 0.17 -0.68 – 1.02 0.693 0.17 -0.67 – 1.01 0.694
Amt Weight Set 1 0.07 -0.01 – 0.14 0.094 0.07 -0.05 – 0.18 0.261 0.07 -0.03 – 0.17 0.176
Prob Weight Set 1 0.01 -0.05 – 0.07 0.804 0.01 -0.10 – 0.11 0.906 -0.01 -0.12 – 0.09 0.777
Amt Weight Set 1 x Amt (z). Set2 0.67 0.48 – 0.85 <0.001 0.70 0.41 – 0.99 <0.001 0.65 0.40 – 0.90 <0.001
Prob Weight Set 1 x Prob (z). Set2 0.72 0.58 – 0.85 <0.001 0.49 0.25 – 0.72 <0.001 0.52 0.30 – 0.74 <0.001
Prob Distance Weight Set 1 0.01 -0.48 – 0.50 0.961
Amt Distance Weight Set 1 -0.01 -0.59 – 0.57 0.978
Prob Distance Weight Set 1 x Prob (z). Set2 1.23 0.17 – 2.29 0.023
Amt Distance Weight Set 1 x Amt (z). Set2 -0.24 -1.68 – 1.19 0.738
Amt FA Set 1 0.02 -0.26 – 0.30 0.905
Prob FA Set 1 0.08 -0.23 – 0.40 0.605
Amt FA Set 1 x Amt (z). Set2 0.01 -0.49 – 0.50 0.974
Prob FA Set 1 x Prob (z). Set2 0.54 0.03 – 1.06 0.038
Random Effects
σ2 3.29 3.29 3.29 3.29
τ00 0.03 ResponseId 0.03 ResponseId 0.03 ResponseId 0.03 ResponseId
1.12 bet_label 0.82 bet_label 0.84 bet_label 0.84 bet_label
τ11 4.98 ResponseId.set2_prob.z 2.34 ResponseId.set2_prob.z 2.21 ResponseId.set2_prob.z 2.25 ResponseId.set2_prob.z
5.00 ResponseId.set2_amt.z 3.50 ResponseId.set2_amt.z 3.50 ResponseId.set2_amt.z 3.49 ResponseId.set2_amt.z
ρ01 -0.19 ResponseId.set2_prob.z -0.05 ResponseId.set2_prob.z -0.08 ResponseId.set2_prob.z -0.08 ResponseId.set2_prob.z
1.00 ResponseId.set2_amt.z 1.00 ResponseId.set2_amt.z 0.99 ResponseId.set2_amt.z 0.99 ResponseId.set2_amt.z
ICC 0.79 0.66 0.66 0.66
N 182 ResponseId 182 ResponseId 182 ResponseId 182 ResponseId
6 bet_label 6 bet_label 6 bet_label 6 bet_label
Observations 1092 1092 1092 1092
Marginal R2 / Conditional R2 0.112 / 0.812 0.427 / 0.808 0.430 / 0.808 0.434 / 0.809
# why does the weight not capture this information?????
# why 
anova(m2, m3)
anova(m2, m4)
AIC(m1, m2, m3, m4)
BIC(m1, m2, m3, m4)
  • Check vifs

  • M1

car::vif(m1)
## set2_prob.z  set2_amt.z 
##    20.45466    21.92134
  • M2
car::vif(m2)
##                       set2_prob.z                        set2_amt.z 
##                         16.619290                         17.271313 
##              slope_final_rank.amt             slope_final_rank.prob 
##                         15.880761                          8.344836 
##   set2_amt.z:slope_final_rank.amt set2_prob.z:slope_final_rank.prob 
##                          1.253032                          2.290673
  • M3
car::vif(m3)
##                       set2_prob.z                        set2_amt.z 
##                         17.354552                         18.086626 
##              slope_final_rank.amt             slope_final_rank.prob 
##                         16.609759                          8.696683 
##               slope_distance.prob                slope_distance.amt 
##                          1.294828                          2.286446 
##   set2_amt.z:slope_final_rank.amt set2_prob.z:slope_final_rank.prob 
##                          3.161290                          3.818514 
##   set2_prob.z:slope_distance.prob     set2_amt.z:slope_distance.amt 
##                          3.948903                          3.127880
  • M4
car::vif(m4)
##                       set2_prob.z                        set2_amt.z 
##                         17.495154                         18.208543 
##                      DROPT_FA.amt                     DROPT_FA.prob 
##                         16.712594                          8.722881 
##              slope_final_rank.amt             slope_final_rank.prob 
##                          1.531494                          2.253158 
##           set2_amt.z:DROPT_FA.amt         set2_prob.z:DROPT_FA.prob 
##                          4.668527                          6.169909 
##   set2_amt.z:slope_final_rank.amt set2_prob.z:slope_final_rank.prob 
##                          2.421806                          3.524372

Prediction

  • Here I try to use weights derived from Task 1 DROPT data ( distance slopes and FA scores) to improve prediction of Task 2 rankings, above and beyond amount and probability weights
    • I first train the following models (clmm; all models including prob and amt random slopes and bet random effect)
      • M1: set1.rank ~ set1_prob.z + set1_amt.z
      • M2: set1.rank ~ set1_prob.z + set1_amt.z+set1_prob.z x slope_distance.prob+set1_amt.z x slope_distance.amt
      • M3: set1.rank ~ set1_prob.z + set1_amt.z+set1_prob.z x DROPT_FA.prob +set1_amt.z x DROPT_FA.amt
        • One caveat here is potential leakage: the FA and distance weights are themselves correlated with the final probability/amount weights, so the model might “re-use” outcome information rather than providing an independent signal.
    • Then each participant, we compute predicted scores for set2 items, and see if predictions based on M2 and/or M3 outperform those from M1.
set1.Rank<- clmm(
  as.factor(set1_rank.r) ~ set1_prob.z + set1_amt.z+
    (set1_prob.z + set1_amt.z | ResponseId) + (1|bet_label),
  data = master_df.predict
)
set1.Rank_FA<-clmm(
  as.factor(set1_rank.r) ~ set1_prob.z + set1_amt.z+set1_prob.z*DROPT_FA.prob+set1_amt.z*DROPT_FA.amt+
    (set1_prob.z + set1_amt.z | ResponseId) + (1|bet_label),
  data = master_df.predict
)
set1.Rank_Distance<-clmm(
  as.factor(set1_rank.r) ~ set1_prob.z + set1_amt.z+set1_prob.z*slope_distance.prob+set1_amt.z*slope_distance.amt+
    (set1_prob.z + set1_amt.z | ResponseId) + (1|bet_label),
  data = master_df.predict
)

tab_model(set1.Rank,set1.Rank_Distance,set1.Rank_FA,  drop = c("as.factor\\(set1_initial_order\\)", "\\|"),
  transform=NULL
)
  as.factor(set1_rank.r) as.factor(set1_rank.r) as.factor(set1_rank.r)
Predictors Log-Odds CI p Log-Odds CI p Log-Odds CI p
set1 prob z 1.80 1.00 – 2.60 <0.001 -0.63 -1.33 – 0.08 0.083 1.67 0.97 – 2.37 <0.001
set1 amt z 0.63 -0.22 – 1.47 0.146 -0.35 -1.10 – 0.40 0.361 0.57 -0.21 – 1.35 0.150
slope distance prob 0.28 -0.02 – 0.58 0.066
slope distance amt 0.36 -0.03 – 0.74 0.073
set1 prob z × slope
distance prob
4.78 4.19 – 5.37 <0.001
set1 amt z × slope
distance amt
4.98 4.22 – 5.75 <0.001
DROPT FA prob 0.04 -0.19 – 0.27 0.733
DROPT FA amt 0.02 -0.21 – 0.26 0.839
set1 prob z × DROPT FA
prob
2.22 1.90 – 2.54 <0.001
set1 amt z × DROPT FA amt 1.62 1.28 – 1.95 <0.001
Random Effects
σ2 3.29 3.29 3.29
τ00 0.05 ResponseId 0.02 ResponseId 0.04 ResponseId
2.19 bet_label 1.79 bet_label 2.01 bet_label
τ11 6.68 ResponseId.set1_prob.z 0.87 ResponseId.set1_prob.z 1.41 ResponseId.set1_prob.z
5.15 ResponseId.set1_amt.z 1.17 ResponseId.set1_amt.z 2.18 ResponseId.set1_amt.z
ρ01 0.56 ResponseId.set1_prob.z 0.65 ResponseId.set1_prob.z 0.57 ResponseId.set1_prob.z
0.65 ResponseId.set1_amt.z 0.72 ResponseId.set1_amt.z 0.88 ResponseId.set1_amt.z
ICC 0.83 0.54 0.62
N 182 ResponseId 182 ResponseId 182 ResponseId
6 bet_label 6 bet_label 6 bet_label
Observations 1092 1092 1092
Marginal R2 / Conditional R2 0.094 / 0.847 0.646 / 0.839 0.582 / 0.840
# model<-set1.Rank_Distance
# data<-master_df.predict
# label<-"CLMM: base"
#                                     id_col = "ResponseId"
#                                     x_prob = "set2_prob.z"
#                                     x_amt  = "set2_amt.z"
#                                     fa_prob_col = "DROPT_FA.prob"
#                                     fa_amt_col  = "DROPT_FA.amt"
#                                     dist_prob_col = "slope_distance.prob"   
#                                     dist_amt_col  = "slope_distance.amt"    
#  
#   re_id <- tryCatch(ranef(model)[[id_col]], error = function(e) NULL)
#   re_df <- if (!is.null(re_id)) {
#     re_id %>%
#       as.data.frame(check.names = FALSE) %>%
#       rownames_to_column(id_col) %>%
#       rename(
#         b_prob = `set1_prob.z`,
#         b_amt  = `set1_amt.z`
#       ) %>%
#       select(any_of(c(id_col, "b_prob", "b_amt")))
#   } else {
#     tibble(!!id_col := unique(data[[id_col]]),
#            b_prob = 0, b_amt = 0)
#   } %>%
#     replace_na(list(b_prob = 0, b_amt = 0))
# 
#   # --- fixed-effect coefficients (location part, not thresholds)
#   beta <- tryCatch(ordinal::coef(model, type = "beta"),
#                    error = function(e) model$beta)
#   getb  <- function(pat) {
#     hit <- names(beta)[grepl(pat, names(beta), fixed = FALSE)]
#     if (length(hit)) unname(beta[[hit[1]]]) else 0
#   }
# 
#   # main effects
#   bfe_prob <- getb("^set1_prob\\.z$")    # base term
#   bfe_amt  <- getb("^set1_amt\\.z$")
# 
#   # interactions (auto-detect; names like set1_prob.z:DROPT_FA.prob, etc.)
#   bfe_prob_FA  <- getb("^set1_prob\\.z:DROPT_FA\\.prob$")
#   bfe_amt_FA   <- getb("^set1_amt\\.z:DROPT_FA\\.amt$")
#   bfe_prob_dist<- getb("^set1_prob\\.z:slope_distance\\.prob$")
#   bfe_amt_dist <- getb("^set1_amt\\.z:slope_distance\\.amt$")
#   bfe_prob_FA
#   bfe_amt_FA
# bfe_prob_dist
# bfe_amt_dist
#   # ensure optional columns exist in data; if not, treat as 0
#   safe_pull <- function(df, col) if (col %in% names(df)) df[[col]] else 0
# 
#   data %>%
#     left_join(re_df, by = id_col) %>%
#     mutate(
#       x_p = .data[[x_prob]],
#       x_a = .data[[x_amt]],
#       fa_p = safe_pull(cur_data_all(), fa_prob_col),
#       fa_a = safe_pull(cur_data_all(), fa_amt_col),
#       d_p  = safe_pull(cur_data_all(), dist_prob_col),
#       d_a  = safe_pull(cur_data_all(), dist_amt_col),
# 
#       # latent score used only for *ranking* within each person
#       latent_score =
#         (bfe_prob + b_prob) * x_p +
#         (bfe_amt  + b_amt)  * x_a +
#         bfe_prob_FA  * x_p * fa_p +
#         bfe_amt_FA   * x_a * fa_a +
#         bfe_prob_dist* x_p * d_p  +
#         bfe_amt_dist * x_a * d_a
#     )
# Generic predictor for set2 ranking accuracy from any of the 3 CLMMs
predict_set2_by_id_clmm <- function(model, data, label,
                                    id_col = "ResponseId",
                                    x_prob = "set2_prob.z",
                                    x_amt  = "set2_amt.z",
                                    fa_prob_col = "DROPT_FA.prob",
                                    fa_amt_col  = "DROPT_FA.amt",
                                    dist_prob_col = "slope_distance.prob",      # or "...prob.set2" if that's what you joined
                                    dist_amt_col  = "slope_distance.amt") {     # or "...amt.set2"
  # --- random slopes by participant (if present)
  re_id <- tryCatch(ranef(model)[[id_col]], error = function(e) NULL)
  re_df <- if (!is.null(re_id)) {
    re_id %>%
      as.data.frame(check.names = FALSE) %>%
      rownames_to_column(id_col) %>%
      rename(
        b_prob = `set1_prob.z`,
        b_amt  = `set1_amt.z`
      ) %>%
      select(any_of(c(id_col, "b_prob", "b_amt")))
  } else {
    tibble(!!id_col := unique(data[[id_col]]),
           b_prob = 0, b_amt = 0)
  } %>%
    replace_na(list(b_prob = 0, b_amt = 0))

  # --- fixed-effect coefficients (location part, not thresholds)
  beta <- tryCatch(ordinal::coef(model, type = "beta"),
                   error = function(e) model$beta)
  getb  <- function(pat) {
    hit <- names(beta)[grepl(pat, names(beta), fixed = FALSE)]
    if (length(hit)) unname(beta[[hit[1]]]) else 0
  }

  # main effects
  bfe_prob <- getb("^set1_prob\\.z$")    # base term
  bfe_amt  <- getb("^set1_amt\\.z$")

  # interactions (auto-detect; names like set1_prob.z:DROPT_FA.prob, etc.)
  bfe_prob_FA  <- getb("^set1_prob\\.z:DROPT_FA\\.prob$")
  bfe_amt_FA   <- getb("^set1_amt\\.z:DROPT_FA\\.amt$")
  bfe_prob_dist<- getb("^set1_prob\\.z:slope_distance\\.prob$")
  bfe_amt_dist <- getb("^set1_amt\\.z:slope_distance\\.amt$")

  # ensure optional columns exist in data; if not, treat as 0
  safe_pull <- function(df, col) if (col %in% names(df)) df[[col]] else 0

  data %>%
    left_join(re_df, by = id_col) %>%
    mutate(
      x_p = .data[[x_prob]],
      x_a = .data[[x_amt]],
      fa_p = safe_pull(cur_data_all(), fa_prob_col),
      fa_a = safe_pull(cur_data_all(), fa_amt_col),
      d_p  = safe_pull(cur_data_all(), dist_prob_col),
      d_a  = safe_pull(cur_data_all(), dist_amt_col),

      # latent score used only for *ranking* within each person
      latent_score =
        (bfe_prob + b_prob) * x_p +
        (bfe_amt  + b_amt)  * x_a +
        bfe_prob_FA  * x_p * fa_p +
        bfe_amt_FA   * x_a * fa_a +
        bfe_prob_dist* x_p * d_p  +
        bfe_amt_dist * x_a * d_a
    ) %>%
    group_by(.data[[id_col]]) %>%
    mutate(set2_rank_pred_rank = rank(-latent_score, ties.method = "average")) %>%
    ungroup() %>%
    group_by(.data[[id_col]]) %>%
    summarise(
      r_pearson   = cor(set2_rank_pred_rank, set2_rank,   method = "pearson", use = "pairwise.complete.obs"),
      tau_kendall = cor(set2_rank_pred_rank, set2_rank,   method = "kendall", use = "pairwise.complete.obs"),
      .groups = "drop"
    ) %>%
    mutate(model = label)
}

# --- Run it on each fitted model ---
acc_base      <- predict_set2_by_id_clmm(set1.Rank,           master_df.predict, "CLMM: base")
acc_distance  <- predict_set2_by_id_clmm(set1.Rank_Distance,  master_df.predict, "CLMM: + Distance")
acc_fa        <- predict_set2_by_id_clmm(set1.Rank_FA,        master_df.predict, "CLMM: + FA")

# Combine results
cor_all<-bind_rows(acc_base, acc_distance, acc_fa)
# model<-set1.Rank_Distance
# data<-master_df.predict
# label<-"CLMM: + Distance"
# id_col = "ResponseId"
# x_prob = "set2_prob.z"
# x_amt  = "set2_amt.z"
# fa_prob_col = "DROPT_FA.prob"
# fa_amt_col  = "DROPT_FA.amt"
# id_col = "ResponseId"
# x_prob = "set2_prob.z"
# x_amt  = "set2_amt.z"
# fa_prob_col = "DROPT_FA.prob"
# fa_amt_col  = "DROPT_FA.amt"
# dist_prob_col = "slope_distance.prob"
# dist_amt_col  = "slope_distance.amt"
# re_id <- tryCatch(ranef(model)[[id_col]], error = function(e) NULL)
# re_df <- if (!is.null(re_id)) {
#     re_id %>%
#       as.data.frame(check.names = FALSE) %>%
#       rownames_to_column(id_col) %>%
#       rename(
#         b_prob = `set1_prob.z`,
#         b_amt  = `set1_amt.z`
#       ) %>%
#       select(any_of(c(id_col, "b_prob", "b_amt")))
#   } else {
#     tibble(!!id_col := unique(data[[id_col]]),
#            b_prob = 0, b_amt = 0)
#   } %>%
#     replace_na(list(b_prob = 0, b_amt = 0))
# 
#   # --- fixed-effect coefficients (location part, not thresholds)
#   beta <- tryCatch(ordinal::coef(model, type = "beta"),
#                    error = function(e) model$beta)
#   getb  <- function(pat) {
#     hit <- names(beta)[grepl(pat, names(beta), fixed = FALSE)]
#     if (length(hit)) unname(beta[[hit[1]]]) else 0
#   }
# 
#   # main effects
#   bfe_prob <- getb("^set1_prob\\.z$")    # base term
#   bfe_amt  <- getb("^set1_amt\\.z$")
# 
#   # interactions (auto-detect; names like set1_prob.z:DROPT_FA.prob, etc.)
#   bfe_prob_FA  <- getb("^set1_prob\\.z:DROPT_FA\\.prob$")
#   bfe_amt_FA   <- getb("^set1_amt\\.z:DROPT_FA\\.amt$")
#   bfe_prob_dist<- getb("^set1_prob\\.z:slope_distance\\.prob$")
#   bfe_amt_dist <- getb("^set1_amt\\.z:slope_distance\\.amt$")
# 
#   # ensure optional columns exist in data; if not, treat as 0
#   safe_pull <- function(df, col) if (col %in% names(df)) df[[col]] else 0
# 
# data1<-data %>%
#     left_join(re_df, by = id_col) %>%
#     mutate(
#       x_p = .data[[x_prob]],
#       x_a = .data[[x_amt]],
#       fa_p = safe_pull(cur_data_all(), fa_prob_col),
#       fa_a = safe_pull(cur_data_all(), fa_amt_col),
#       d_p  = safe_pull(cur_data_all(), dist_prob_col),
#       d_a  = safe_pull(cur_data_all(), dist_amt_col),
# 
#       # latent score used only for *ranking* within each person
#       latent_score =
#         (bfe_prob + b_prob) * x_p +
#         (bfe_amt  + b_amt)  * x_a +
#         bfe_prob_FA  * x_p * fa_p +
#         bfe_amt_FA   * x_a * fa_a +
#         bfe_prob_dist* x_p * d_p  +
#         bfe_amt_dist * x_a * d_a
#     ) %>%
#     group_by(.data[[id_col]]) %>%
#     mutate(set2_rank_pred_rank = rank(-latent_score, ties.method = "average"))%>%
#     arrange(ResponseId)
# data2<-data %>%
#     left_join(re_df, by = id_col) %>%
#     mutate(
#       x_p = .data[[x_prob]],
#       x_a = .data[[x_amt]],
#       fa_p = safe_pull(cur_data_all(), fa_prob_col),
#       fa_a = safe_pull(cur_data_all(), fa_amt_col),
#       d_p  = safe_pull(cur_data_all(), dist_prob_col),
#       d_a  = safe_pull(cur_data_all(), dist_amt_col),
# 
#       # latent score used only for *ranking* within each person
#       latent_score =
#         (bfe_prob + b_prob) * x_p +
#         (bfe_amt  + b_amt)  * x_a 
#     ) %>%
#     group_by(.data[[id_col]]) %>%
#     mutate(set2_rank_pred_rank = rank(-latent_score, ties.method = "average"))%>%
#     arrange(ResponseId)
#   
#   
#   %>%
#     group_by(.data[[id_col]]) %>%
#     mutate(set2_rank_pred_rank = rank(-latent_score, ties.method = "average")) %>%
#     ungroup() %>%
#     group_by(.data[[id_col]]) %>%
#     summarise(
#       r_pearson   = cor(set2_rank_pred_rank, set2_rank,   method = "pearson", use = "pairwise.complete.obs"),
#       tau_kendall = cor(set2_rank_pred_rank, set2_rank,   method = "kendall", use = "pairwise.complete.obs"),
#       .groups = "drop"
#     ) %>%
#     mutate(model = label)
# }

Performances

  • Accuracy Calculation
    • For each participant, we compare the predicted rank with the true rank using both Pearson’s r and Kendall’s τ. This yields 182 data points per model, shown in the plot below.
  • black line represents median and red dot mean
# --- quick summaries (optional)
summary_by_model <- cor_all %>%
  group_by(model) %>%
  summarise(
    n = n(),
    mean_r   = mean(r_pearson, na.rm = TRUE),
    median_r = median(r_pearson, na.rm = TRUE),
    mean_tau = mean(tau_kendall, na.rm = TRUE),
    median_tau = median(tau_kendall, na.rm = TRUE),
    .groups = "drop"
  )

make_violin <- function(df, col, title) {
  # Calculate per-model means and medians
  stats <- df %>%
    group_by(model) %>%
    summarise(
      mean_val   = mean(.data[[col]], na.rm = TRUE),
      median_val = median(.data[[col]], na.rm = TRUE),
      .groups = "drop"
    )

  # Create caption text
  cap_text <- paste0(
    "Means: ",
    paste0(stats$model, " = ", sprintf("%.2f", stats$mean_val), collapse = " | "),
    "\nMedians: ",
    paste0(stats$model, " = ", sprintf("%.2f", stats$median_val), collapse = " | ")
  )

  ggplot(df, aes(x = model, y = .data[[col]], fill = model)) +
    geom_violin(alpha = 0.4, width = 0.9, color = "black") +
    geom_jitter(width = 0.12, alpha = 0.5, size = 1.6) +
    geom_point(data = stats, aes(x = model, y = mean_val),
               inherit.aes = FALSE, color = "red", size = 3.2) +
    geom_segment(data = stats,
                 aes(x = as.numeric(model) - 0.35, xend = as.numeric(model) + 0.35,
                     y = median_val, yend = median_val),
                 inherit.aes = FALSE, color = "black", linewidth = 1) +
    coord_cartesian(ylim = c(-1, 1)) +
    labs(x = NULL, y = "Correlation", title = title, caption = cap_text) +
    theme_minimal() +
  theme(
    legend.position = "none",
    plot.caption = element_text(
      size = 12,         # Increase font size
      face = "bold",     # Make bold
      margin = margin(t = 6)
    )
  )
}

cor_all<-cor_all%>%
  mutate(model = factor(model, levels = c("CLMM: base","CLMM: + Distance","CLMM: + FA")))
p_r.Distance_lmer   <- make_violin(cor_all, "r_pearson",   "Per-participant Pearson r")
p_tau.Distance_lmer <- make_violin(cor_all, "tau_kendall", "Per-participant Kendall \u03C4")


p_r.Distance_lmer + p_tau.Distance_lmer + plot_layout(ncol = 2)

  • Significance Testing
perm_paired_mean <- function(x, y, n_perm = 10000, seed = 123) {
  diffs <- y - x
  if (length(diffs) < 1) return(NA_real_)
  set.seed(seed)
  obs <- mean(diffs)
  # Fast vectorized sign flips
  signs <- matrix(sample(c(1,-1), length(diffs) * n_perm, replace = TRUE), nrow = n_perm)
  perm_means <- rowMeans(signs * rep(diffs, each = n_perm))
  mean(abs(perm_means) >= abs(obs))
}


# Compare M2 and M3 against M1 using a single metric column (by name)
compare_models_simple.Distance <- function(df, value_col,
                                  m1 = "CLMM: base",
                                  m2 = "CLMM: + Distance",
                                  m3 = "CLMM: + FA",
                                  n_perm = 10000) {

  # Build wide table with explicit column selection (no NSE)
  wide <- df %>%
    select(ResponseId, model, value = all_of(value_col)) %>%
    pivot_wider(names_from = model, values_from = value)

  # Helper to run the three paired tests for a pair of columns
  run_tests <- function(a_name, b_name, label) {
    x <- wide[[a_name]]
    y <- wide[[b_name]]
    diffs <- y - x  # positive => b_name improves over a_name

    cat(sprintf("%s: pairs used = %d\n", label, length(diffs)))

    # Paired t-test
    t_res <- try(t.test(y, x, paired = TRUE), silent = TRUE)
    if (inherits(t_res, "try-error")) {
      t_p <- NA_real_; t_stat <- NA_real_; t_df <- NA_real_
    } else {
      t_p <- unname(t_res$p.value)
      t_stat <- unname(t_res$statistic)
      t_df <- unname(t_res$parameter)
    }

    # Wilcoxon signed-rank (paired)
    w_res <- try(wilcox.test(y, x, paired = TRUE, exact = FALSE), silent = TRUE)
    if (inherits(w_res, "try-error")) {
      w_p <- NA_real_; w_V <- NA_real_
    } else {
      w_p <- unname(w_res$p.value)
      w_V <- unname(w_res$statistic)
    }

    # Permutation (paired sign-flip) on mean diff
    p_perm <- perm_paired_mean(x, y, n_perm = n_perm)

    data.frame(
      Comparison    = label,
      N_pairs       = length(diffs),
      Mean_Diff     = if (length(diffs)) mean(diffs) else NA_real_,
      Median_Diff   = if (length(diffs)) median(diffs) else NA_real_,
      Prop_Better = if (length(diffs)) mean(diffs > 0) else NA_real_,
      Prop_Worse = if (length(diffs)) mean(diffs < 0) else NA_real_,
      Prop_Tie = if (length(diffs)) mean(diffs == 0) else NA_real_,
      t_test_p      = t_p,
      t_statistic   = t_stat,
      t_df          = t_df,
      wilcoxon_p    = w_p,
      wilcoxon_V    = w_V,
      permutation_p = p_perm,
      check.names   = FALSE
    )
  }

  rbind(
    run_tests(m1, m2, "Adding Distance Slope"),
    run_tests(m1, m3, "Adding FA Score")
  ) %>%
    mutate(
      Mean_Diff     = round(Mean_Diff, 3),
      Median_Diff   = round(Median_Diff, 3),
      Prop_Better = round(Prop_Better, 3),
      Prop_Worse = round(Prop_Worse, 3),
      Prop_Tie = round(Prop_Tie, 3),
      t_test_p      = round(t_test_p, 4),
      t_statistic   = round(t_statistic, 3),
      t_df          = round(t_df),
      wilcoxon_p    = round(wilcoxon_p, 4),
      permutation_p = round(permutation_p, 4)
    )
}

# ---- Build the two tables (Pearson r and Kendall tau) ----
# Expect cor_all to have: ResponseId, model, r_pearson, tau_kendall
table_r.distance_lmer   <- compare_models_simple.Distance(cor_all, "r_pearson")
## Adding Distance Slope: pairs used = 182
## Adding FA Score: pairs used = 182
table_tau.distance_lmer <- compare_models_simple.Distance(cor_all, "tau_kendall")
## Adding Distance Slope: pairs used = 182
## Adding FA Score: pairs used = 182
library(kableExtra)

kable(table_r.distance_lmer,
     caption = "<b><span style='font-size: 18px;color:black;'>Model comparison using Pearson r (Distance)</span></b>",
     digits = 4, align = "lrrrrrrrrrr", escape = FALSE) %>%
  kable_styling(full_width = FALSE)
Model comparison using Pearson r (Distance)
Comparison N_pairs Mean_Diff Median_Diff Prop_Better Prop_Worse Prop_Tie t_test_p t_statistic t_df wilcoxon_p wilcoxon_V permutation_p
Adding Distance Slope 182 0.017 0 0.104 0.071 0.824 0.1072 1.619 181 0.0971 353.0 0.1169
Adding FA Score 182 0.007 0 0.066 0.077 0.857 0.4427 0.769 181 0.7700 187.5 0.4794
kable(table_tau.distance_lmer,
     caption = "<b><span style='font-size: 18px;color:black;'>Model comparison using Kendall &tau; (Distance)</span></b>",
     digits = 4, align = "lrrrrrrrrrr", escape = FALSE) %>%
  kable_styling(full_width = FALSE)
Model comparison using Kendall τ (Distance)
Comparison N_pairs Mean_Diff Median_Diff Prop_Better Prop_Worse Prop_Tie t_test_p t_statistic t_df wilcoxon_p wilcoxon_V permutation_p
Adding Distance Slope 182 0.015 0 0.099 0.066 0.835 0.1164 1.578 181 0.0783 317.5 0.0985
Adding FA Score 182 0.001 0 0.055 0.077 0.868 0.8480 0.192 181 0.8400 142.5 0.7827

Final Thoughts

  • Interpretations: How should we interpret these results? Are they evidence of a causal psychological process (e.g., attention shaping choice), or that people pay more attention to the options they already value more? Or are the findings less theoretically interesting, reflecting a mechanical artifact — items dragged further up the list will, naturally, end up ranked higher (much like a car driving toward location A naturally ends up closer to A/ or a slider bar dragged farther toward one side will, naturally, be positioned closer to that option.)
  • Binary Prediction?

–>

–>

–> –> –> –> –> –>

–>

–> –> –>

–> –> –>

–> –> –>

–> –> –> –> –>

–>