# 1. why Kendall's tau? Ties? spearson tho seems more granular?
# 2. cannot do t test on the thing? Need non-parametric? think about Olivier's transformation to z; probabily fine.
# 3. some how only estimate a weight for positive ranking upward? Only counts when using that to rank upward?
# 4. let's say that the x axis is tau, what the y axis? the
# Load required package
library(gtools) # for permutations
# Generate all permutations of 1:6
perms <- permutations(n = 6, r = 6)
ref <- 1:6
n <- length(ref)
# Initialize vectors
kendall_vals <- numeric(nrow(perms))
spearman_vals <- numeric(nrow(perms))
footrule_vals <- numeric(nrow(perms))
# Compute all metrics
for (i in 1:nrow(perms)) {
p <- perms[i, ]
# Kendall's tau
kendall_vals[i] <- cor(p, ref, method = "kendall")
# Spearman's rho
spearman_vals[i] <- cor(p, ref, method = "spearman")
# Footrule distance (normalized to range [–1, 1])
raw_footrule <- sum(abs(p - ref))
max_footrule <- sum(abs(rev(ref) - ref)) # = 18 for n = 6
norm_footrule <- 1 - (2 * raw_footrule / max_footrule)
footrule_vals[i] <- norm_footrule
}
# Count unique values
cat("Unique Kendall tau values: ", length(unique(kendall_vals)), "\n")
## Unique Kendall tau values: 16
cat("Unique Spearman rho values: ", length(unique(spearman_vals)), "\n")
## Unique Spearman rho values: 36
cat("Unique Footrule scores: ", length(unique(footrule_vals)), "\n")
## Unique Footrule scores: 10
## Getting started ##
# Set WD
#setwd("C:/Users/Arian/Documents/GitHub/SAB-Lab/Dynamic Rank Order Processes Tracing (DROPT)/Shape and Lottery Experiments/Lottery Study_Preferences/Arians-Sandbox")
# Get both data files; pick and choose from each
data_num <- read.csv("DROPT_Lotteries_full_num.csv") %>%
dplyr::slice(3:n())
data_text <- read.csv("DROPT_Lotteries_full_text.csv") %>%
dplyr::slice(3:n())
# set numeric file as base
dat <- data_num
# Convert all columns that are not yet numeric but can be to numeric
dat[] <- lapply(dat, function(col) {
if (is.character(col) || is.factor(col)) {
suppressWarnings(num_col <- as.numeric(as.character(col)))
# If most values are not NA after conversion, assume it's a valid numeric column
if (sum(!is.na(num_col)) > 0.8 * length(num_col)) {
return(num_col)
} else {
return(col) # leave it as-is
}
} else {
return(col)
}
})
## Setup: Merge data frames - take some of the demographic information from the text df
dat$birthYear <- as.numeric(as.character(data_text$age))
dat$age <- 2025 - dat$birthYear
dat$gender <- factor(data_text$gender)
dat$gender_binary <- ifelse(dat$gender == "Male", "Male",
ifelse(dat$gender == "Female", "Female", NA))
dat$education <- data_text$education
dat$education_num <- as.numeric(data_num$education)
dat$income <- ifelse(dat$income != 10, as.numeric(dat$income), NA)
## necessary recoding
correct_answer <- "1,2,3"
dat <- dat %>%
mutate(dose.coded = ifelse(Dose == correct_answer, "Correct", "Incorrect"))
dat$attn1.coded <- ifelse(dat$attn1 == 2, "Correct", "Incorrect")
dat$dose.coded <- as.factor(dat$dose.coded)
dat$attn1.coded <- as.factor(dat$attn1.coded)
dat$bonus_belief [dat$bonus_belief== 1] = 'Yes'
dat$bonus_belief [dat$bonus_belief== 0] = 'No'
dat$bonus_belief <- factor(dat$bonus_belief, levels = names(sort(table(dat$bonus_belief), decreasing = TRUE)))
dat <- dat %>%
mutate(StartDate = as.Date(StartDate, format = "%Y-%m-%d")) %>%
rename(Bot_score = "Q_RecaptchaScore") %>%
rename(Fraud_score = "Q_RelevantIDFraudScore") %>%
rename(Duration = "Duration..in.seconds.")
## Apply filters
dat <- dat %>%
filter(PROLIFIC_PID != "") %>%
filter(DistributionChannel != "preview") %>% #
filter(Bot_score >= 0.50) %>% #
#filter(Fraud_score <= 30) %>%
filter(attn1.coded == "Correct") #
# failed attention
# bots (0.5)
# fraudulent (score?)
# tau less than 0.5, but only exclude in payoff and prob
filter_summary <- data.frame(
Filter_Name = c(
'Test Runs (Preview)',
'Bot_score =< 0.50',
'Attention Check failed',
'Sum'
),
People_Filtered_Out = c(1,3,6,10)
)
filter_summary
## Create long data frames
dat_long1 <- dat %>%
pivot_longer(
cols = matches("Set1_L[1-6]_(Prob|Amt)"),
names_to = c("lottery", ".value"),
names_pattern = "Set1_(L[1-6])_(Prob|Amt)"
) %>%
mutate(item.f = case_when(
lottery == "L1" ~ "Pr6_Amt1",
lottery == "L2" ~ "Pr5_Amt2",
lottery == "L3" ~ "Pr4_Amt3",
lottery == "L4" ~ "Pr3_Amt4",
lottery == "L5" ~ "Pr2_Amt5",
lottery == "L6" ~ "Pr1_Amt6"
))
dat_long2 <- dat %>%
pivot_longer(
cols = matches("Set2_L[1-6]_(Prob|Amt)"),
names_to = c("lottery", ".value"),
names_pattern = "Set2_(L[1-6])_(Prob|Amt)"
) %>%
mutate(item.f = case_when(
lottery == "L1" ~ "Pr6_Amt1",
lottery == "L2" ~ "Pr5_Amt2",
lottery == "L3" ~ "Pr4_Amt3",
lottery == "L4" ~ "Pr3_Amt4",
lottery == "L5" ~ "Pr2_Amt5",
lottery == "L6" ~ "Pr1_Amt6"
))
# write a function for dfs later
process_task <- function(data, rank_column, task_label) {
# Extract the initial order
initial_order <- sub("^\\{([^}]*)\\}.*", "\\1", data[[rank_column]]) # captures the content within the first {} in the string; we will apply this to the RankProcess column; Using double bracket to capture a vector
initial_order <- gsub("0; ", "", initial_order) # remove the 0 timestamp
initial_order_split <- strsplit(initial_order, ",") # separate the strings into list
# Identify unique items
unique_items <- unique(unlist(initial_order_split))
# Create a data frame to store initial ranks
initial_positions_df <- data.frame(matrix(ncol = length(unique_items), nrow = length(initial_order_split))) # nrow is the number of respondents
names(initial_positions_df) <- paste0("initial.items_", unique_items)
# Fill initial ranks
for (i in seq_along(initial_order_split)) {
initial_order <- initial_order_split[[i]] # for each respondent, extract the string of initial order
for (j in seq_along(initial_order)) {
item <- initial_order[j]
initial_positions_df[i, paste0("initial.items_", item)] <- j
}
}
data <- cbind(data, initial_positions_df)
assign(paste0("initial.dat_", task_label), data, envir = .GlobalEnv)
cor_results <- data.frame(
item = paste0("rank_", task_label, "_", unique_items),
initial_item = paste0("initial.items_", unique_items),
correlation = NA_real_,
p_value = NA_real_
)
cor_results <- cor_results %>% # it is important to do this step by Task, because IDs are only unique and consistent within each quiz.
rowwise() %>%
dplyr::mutate(
correlation = cor.test(data[[item]], data[[initial_item]])$estimate,
p_value = cor.test(data[[item]], data[[initial_item]])$p.value
) %>%
dplyr::mutate(sig = p_value < 0.05)
cor_results$task <- task_label
cor_results
}
# List of datasets, rank columns, and task labels
tasks <- list(
list(data = dat, rank_column = "RankProcess_Prob", task_label = "prob"),
list(data = dat, rank_column = "RankProcess_Amount", task_label = "amount"),
list(data = dat, rank_column = "RankProcess_Prefer1", task_label = "pref1"),
list(data = dat, rank_column = "RankProcess_Prefer2", task_label = "pref2")
)
all_results <- bind_rows(lapply(tasks, function(t) {
process_task(t$data, t$rank_column, t$task_label)
}))
### Collection of Wrapper functions that can be applied to all dfs ###
## COUNT ##
get_DROPT_count <- function(dat, rank_col, rank_all_col, item_numbers, item_labels,
condition, initial_dat, dat_long) {
# Load required packages
library(dplyr)
library(tidyr)
library(rlang)
library(stringr)
# Validate inputs
if (length(item_numbers) != length(item_labels)) {
stop("item_numbers and item_labels must be the same length.")
}
item_map <- setNames(item_labels, item_numbers)
# --- Helper: Create Rank Process ---
create_RankProcess <- function(data, col_name) {
col_sym <- sym(col_name)
data %>%
select(ResponseId, !!col_sym) %>%
separate_rows(!!col_sym, sep = "}") %>%
mutate(!!col_sym := gsub("[{}]", "", !!col_sym)) %>%
filter(!!col_sym != "") %>%
separate(!!col_sym, into = c("timing", "order"), sep = ";") %>%
mutate(order = trimws(order)) %>%
group_by(ResponseId) %>%
mutate(step = row_number() - 1) %>%
select(step, everything()) %>%
ungroup()
}
# --- Helper: Create Rank Process All ---
create_RankProcess_all <- function(data, col_name) {
col_sym <- sym(col_name)
data %>%
select(ResponseId, !!col_sym) %>%
separate_rows(!!col_sym, sep = "}") %>%
mutate(!!col_sym := gsub("[{}]", "", !!col_sym)) %>%
filter(!!col_sym != "") %>%
separate(!!col_sym, into = c("timing", "order_all"), sep = ";")
}
# --- Helper: Process and Clean Data ---
process_RankProcess_data <- function(rank_data, rank_all_data, item_map) {
item_numbers <- names(item_map)
processed <- rank_data %>%
left_join(rank_all_data, by = c("ResponseId", "timing")) %>%
mutate(order_all = trimws(order_all),
item_moved = as.numeric(sub(",.*", "", order_all))) %>%
ungroup() %>%
mutate(item.f = as.factor(item_map[as.character(item_moved)]))
na_subj <- processed %>%
filter(is.na(item_moved)) %>%
pull(ResponseId)
is_valid_order <- grepl("^\\d+(,\\d+){5}$", processed$order_all)
bug_respondents <- processed %>%
filter(is_valid_order & timing != 0) %>%
pull(ResponseId)
cleaned <- processed %>%
filter(!ResponseId %in% c(na_subj, bug_respondents))
drag_counts <- cleaned %>%
filter(step != 0) %>%
group_by(ResponseId) %>%
dplyr::summarize(
!!!setNames(
lapply(item_numbers, function(x) {
rlang::expr(sum(item_moved == !!as.numeric(x)))
}),
paste0("item_", item_numbers, "_moved.N")
),
.groups = "drop"
)
return(list(
cleaned_data = cleaned,
bug_respondent = unique(bug_respondents),
na_subj = unique(na_subj),
drag_and_drop_count = drag_counts
))
}
# --- Main workflow ---
rank_data <- create_RankProcess(dat, rank_col)
rank_all_data <- create_RankProcess_all(dat, rank_all_col)
result <- process_RankProcess_data(rank_data, rank_all_data, item_map)
# --- Create long-format data ---
drag_and_drop_count_long <- result$drag_and_drop_count %>%
pivot_longer(
cols = starts_with("item_"),
names_to = c("item_number", ".value"),
names_sep = "_moved\\."
) %>%
mutate(
condition = condition,
item_number = as.numeric(gsub("item_", "", item_number)),
item.f = as.factor(item_map[as.character(item_number)])
) %>%
mutate(
rank.Amount = case_when(
item.f == "Pr6_Amt1" ~ 1,
item.f == "Pr5_Amt2" ~ 2,
item.f == "Pr4_Amt3" ~ 3,
item.f == "Pr3_Amt4" ~ 4,
item.f == "Pr2_Amt5" ~ 5,
item.f == "Pr1_Amt6" ~ 6
),
rank.Prob = case_when(
item.f == "Pr6_Amt1" ~ 6,
item.f == "Pr5_Amt2" ~ 5,
item.f == "Pr4_Amt3" ~ 4,
item.f == "Pr3_Amt4" ~ 3,
item.f == "Pr2_Amt5" ~ 2,
item.f == "Pr1_Amt6" ~ 1
)
) %>%
left_join(initial_dat %>% select(ResponseId, starts_with("initial.items_")), by = "ResponseId") %>%
mutate(
initial.rank = case_when(
item.f == "Pr6_Amt1" ~ initial.items_49,
item.f == "Pr5_Amt2" ~ initial.items_50,
item.f == "Pr4_Amt3" ~ initial.items_64,
item.f == "Pr3_Amt4" ~ initial.items_65,
item.f == "Pr2_Amt5" ~ initial.items_67,
item.f == "Pr1_Amt6" ~ initial.items_68
),
initial.rank.r = relevel(factor(7 - initial.rank), ref = 6),
N_ind = ifelse(N == 0, 0, 1)
) %>%
select(-c(starts_with("initial.items_"))) %>%
left_join(dat_long %>% select(ResponseId, item.f, Prob, Amt), by = c("ResponseId", "item.f"))
result$drag_and_drop_count_long <- drag_and_drop_count_long
return(result)
}
## DISTANCE ##
get_DROPT_distance <- function(
data,
items,
item_labels,
order_col = "order",
step_col = "step",
timing_col = "timing",
item_f_col = "item.f",
response_id_col = "ResponseId",
condition_label = "Condition"
) {
# Step 1: Split 'order' into ranks
data_processed <- data %>%
group_by(.data[[response_id_col]]) %>%
mutate(parts = str_split(.data[[order_col]], ",")) %>%
mutate(
Rank1 = sapply(parts, function(x) x[1]),
Rank2 = sapply(parts, function(x) x[2]),
Rank3 = sapply(parts, function(x) x[3]),
Rank4 = sapply(parts, function(x) x[4]),
Rank5 = sapply(parts, function(x) x[5]),
Rank6 = sapply(parts, function(x) ifelse(length(x) > 5, x[6], NA))
) %>%
select(-parts) %>%
ungroup()
# Step 2: Add current_* columns
for (item in items) {
data_processed[[paste0("current_", item)]] <- NA_integer_
}
data_processed <- data_processed %>%
rowwise() %>%
mutate(across(
starts_with("current_"),
~ {
item_number <- str_remove(cur_column(), "current_")
case_when(
Rank1 == item_number ~ 1,
Rank2 == item_number ~ 2,
Rank3 == item_number ~ 3,
Rank4 == item_number ~ 4,
Rank5 == item_number ~ 5,
Rank6 == item_number ~ 6,
TRUE ~ 1
)
}
)) %>%
ungroup()
# Step 3: Add lagged positions
for (item in items) {
data_processed[[paste0("last_", item)]] <- lag(data_processed[[paste0("current_", item)]])
}
# Step 4: Compute movement direction
data_processed <- data_processed %>%
group_by(.data[[response_id_col]]) %>%
rowwise() %>%
mutate(
current_item_moved = get(paste0("current_", get("item_moved"))),
last_item_moved = get(paste0("last_", get("item_moved"))),
move_direction = case_when(
is.na(last_item_moved) ~ "no_change",
current_item_moved < last_item_moved ~ "up",
current_item_moved > last_item_moved ~ "down",
TRUE ~ "no_change"
)
) %>%
ungroup()
# Step 5: Filter step != 0
data_processed <- data_processed %>%
filter(.data[[step_col]] != 0)
# Step 6: Distance and time difference
distance_data <- data_processed %>%
separate(.data[[timing_col]], into = c("drag_time", "drop_time"), sep = ", ", convert = TRUE) %>%
mutate(across(starts_with("current_"), as.numeric),
across(starts_with("last_"), as.numeric),
DD_diff = drop_time - drag_time,
condition = condition_label)
for (item in items) {
distance_data[[paste0("distance_", item)]] <- distance_data[[paste0("current_", item)]] - distance_data[[paste0("last_", item)]]
}
distance_data <- distance_data %>%
select(drag_time, DD_diff, starts_with("distance_"), all_of(c(order_col, item_f_col, step_col, response_id_col, "condition")))
# Step 7: Keep only latest step per item.f
distance_data <- distance_data %>%
group_by(.data[[response_id_col]]) %>%
arrange(.data[[step_col]]) %>%
filter(!duplicated(.data[[item_f_col]])) %>%
ungroup()
# Step 8: Create full grid of items per respondent
unique_ids <- distance_data %>%
distinct(.data[[response_id_col]]) %>%
pull()
distance_grid <- expand.grid(
ResponseId = unique_ids,
item.f = item_labels
)
# Step 9: Merge and compute final distance columns
full_df <- distance_grid %>%
left_join(distance_data, by = c("ResponseId", "item.f")) %>%
arrange(ResponseId) %>%
mutate(distance = case_when(
item.f == item_labels[1] ~ distance_49,
item.f == item_labels[2] ~ distance_50,
item.f == item_labels[3] ~ distance_64,
item.f == item_labels[4] ~ distance_65,
item.f == item_labels[5] ~ distance_67,
item.f == item_labels[6] ~ distance_68,
TRUE ~ 0
),
distance = replace_na(distance, 0),
distance.abs = abs(distance),
distance.r = distance * -1)
return(full_df)
}
## ORDER ##
get_DROPT_order <- function(
data,
dat_long,
initial_data,
item_labels,
item_codes,
condition_label = "condition",
step_col = "step",
response_id_col = "ResponseId",
item_col = "item.f",
item_moved_col = "item_moved",
prefix_initial = "initial.items_",
condition_value = "condition_name" # example: "pref1"
) {
# Step 1: Filter and order
touch_order <- data %>%
filter(.data[[step_col]] != 0) %>%
group_by(.data[[response_id_col]]) %>%
arrange(.data[[step_col]]) %>%
filter(!duplicated(.data[[item_moved_col]])) %>%
mutate(order = row_number()) %>%
ungroup() %>%
mutate(!!condition_label := condition_value)
# Step 2: Expand to all items × respondent
long_df <- expand_grid(
!!response_id_col := unique(touch_order[[response_id_col]]),
!!item_col := unique(touch_order[[item_col]])
)
# Step 3: Add in order and condition
order_max <- long_df %>%
left_join(touch_order %>% select(all_of(c(response_id_col, item_col, "order"))),
by = c(response_id_col, item_col)) %>%
left_join(touch_order %>%
select(all_of(c(response_id_col, condition_label))) %>%
filter(!duplicated(.data[[response_id_col]])),
by = response_id_col) %>%
group_by(.data[[response_id_col]]) %>%
dplyr::summarize(max_order = max(order, na.rm = TRUE), .groups = "drop")
long_df <- long_df %>%
left_join(touch_order %>% select(all_of(c(response_id_col, item_col, "order"))),
by = c(response_id_col, item_col)) %>%
left_join(touch_order %>%
select(all_of(c(response_id_col, condition_label))) %>%
filter(!duplicated(.data[[response_id_col]])),
by = response_id_col) %>%
left_join(order_max, by = response_id_col) %>%
mutate(order = if_else(!is.na(order), order, max_order + 1))
# Step 4: Add rank.Amount and rank.Prob
long_df <- long_df %>%
mutate(
rank.Amount = case_when(
!!sym(item_col) == item_labels[1] ~ 1,
!!sym(item_col) == item_labels[2] ~ 2,
!!sym(item_col) == item_labels[3] ~ 3,
!!sym(item_col) == item_labels[4] ~ 4,
!!sym(item_col) == item_labels[5] ~ 5,
!!sym(item_col) == item_labels[6] ~ 6
),
rank.Prob = case_when(
!!sym(item_col) == item_labels[1] ~ 6,
!!sym(item_col) == item_labels[2] ~ 5,
!!sym(item_col) == item_labels[3] ~ 4,
!!sym(item_col) == item_labels[4] ~ 3,
!!sym(item_col) == item_labels[5] ~ 2,
!!sym(item_col) == item_labels[6] ~ 1
)
)
# Step 5: Add initial rank
long_df <- long_df %>%
left_join(initial_data %>%
select(all_of(c(response_id_col, paste0(prefix_initial, item_codes)))),
by = response_id_col) %>%
mutate(
initial.rank = case_when(
!!sym(item_col) == item_labels[1] ~ .data[[paste0(prefix_initial, item_codes[1])]],
!!sym(item_col) == item_labels[2] ~ .data[[paste0(prefix_initial, item_codes[2])]],
!!sym(item_col) == item_labels[3] ~ .data[[paste0(prefix_initial, item_codes[3])]],
!!sym(item_col) == item_labels[4] ~ .data[[paste0(prefix_initial, item_codes[4])]],
!!sym(item_col) == item_labels[5] ~ .data[[paste0(prefix_initial, item_codes[5])]],
!!sym(item_col) == item_labels[6] ~ .data[[paste0(prefix_initial, item_codes[6])]]
),
initial.rank.r = 7 - initial.rank,
initial.rank.r = relevel(factor(initial.rank.r), ref = 6)
) %>%
select(-starts_with(prefix_initial))
# Step 6: Merge with dat_long
long_df <- long_df %>%
left_join(dat_long %>%
select(all_of(c(response_id_col, item_col, "Prob", "Amt"))),
by = c(response_id_col, item_col))
return(long_df)
}
### AMOUNT ###
## COUNT ##
result_amount <- get_DROPT_count(
dat = dat,
rank_col = "RankProcess_Amount",
rank_all_col = "RankProcess_all_Amount",
item_numbers = c(49, 50, 64, 65, 67, 68),
item_labels = c("Pr6_Amt1", "Pr5_Amt2", "Pr4_Amt3", "Pr3_Amt4", "Pr2_Amt5", "Pr1_Amt6"),
condition = "Amount",
initial_dat = initial.dat_amount,
dat_long = dat_long1
)
# Extract outputs as needed
RankProcess_Amount <- result_amount$cleaned_data
bug_respondent_Amount <- result_amount$bug_respondent
na_subj_Amount <- result_amount$na_subj
drag_and_drop_count_Amount <- result_amount$drag_and_drop_count
drag_and_drop_count_Amount_long <- result_amount$drag_and_drop_count_long
# Create summary df and filter for tau > 0.5
Summary_data_Amount<- expand_grid(
ResponseId = unique(RankProcess_Amount$ResponseId),
item.f = unique(RankProcess_Amount$item.f))
Summary_data_Amount<-Summary_data_Amount%>%
mutate(rank.amount=
case_when(
item.f=="Pr6_Amt1" ~1,
item.f=="Pr5_Amt2" ~ 2,
item.f== "Pr4_Amt3" ~ 3,
item.f== "Pr3_Amt4" ~ 4,
item.f == "Pr2_Amt5" ~ 5,
item.f == "Pr1_Amt6" ~6),
rank.Prob=case_when(
item.f=="Pr6_Amt1" ~6,
item.f=="Pr5_Amt2" ~ 5,
item.f== "Pr4_Amt3" ~ 4,
item.f== "Pr3_Amt4" ~ 3,
item.f == "Pr2_Amt5" ~ 2,
item.f == "Pr1_Amt6" ~1
))%>%
left_join(dat%>%select(ResponseId,starts_with("rank_amount_")),by="ResponseId")%>%
mutate(Subj.rank=case_when(
item.f=="Pr6_Amt1" ~ rank_amount_49,
item.f=="Pr5_Amt2" ~ rank_amount_50,
item.f== "Pr4_Amt3" ~ rank_amount_64,
item.f== "Pr3_Amt4" ~ rank_amount_65,
item.f == "Pr2_Amt5" ~ rank_amount_67,
item.f == "Pr1_Amt6" ~ rank_amount_68))%>%
select(-c(starts_with("rank_amount_")))%>%
group_by(ResponseId) %>%
mutate(Tau = -cor(Subj.rank, rank.amount, method = "kendall")) %>%
ungroup()
# Check for tau less than 0.5 - used later
Amount_tau_less0.5 <- Summary_data_Amount %>%
filter(Tau <= 0.5) %>%
distinct(ResponseId) %>%
pull(ResponseId)
## Count: fix long df ##
# Further recoding of the long df
drag_and_drop_count_Amount_long<-drag_and_drop_count_Amount_long%>%
filter(ResponseId%notin%Amount_tau_less0.5)%>%
mutate(rank.Amount=case_when(
item.f=="Pr6_Amt1" ~1,
item.f=="Pr5_Amt2" ~ 2,
item.f== "Pr4_Amt3" ~ 3,
item.f== "Pr3_Amt4" ~ 4,
item.f == "Pr2_Amt5" ~ 5,
item.f == "Pr1_Amt6" ~6
),
rank.Prob=case_when(
item.f=="Pr6_Amt1" ~6,
item.f=="Pr5_Amt2" ~ 5,
item.f== "Pr4_Amt3" ~ 4,
item.f== "Pr3_Amt4" ~ 3,
item.f == "Pr2_Amt5" ~ 2,
item.f == "Pr1_Amt6" ~1))%>%
left_join(initial.dat_amount%>%select(ResponseId,starts_with("initial.items_")),by="ResponseId")%>%
mutate(initial.rank=case_when(
item.f=="Pr6_Amt1" ~ initial.items_49,
item.f=="Pr5_Amt2" ~ initial.items_50,
item.f=="Pr4_Amt3" ~ initial.items_64,
item.f=="Pr3_Amt4" ~ initial.items_65,
item.f=="Pr2_Amt5" ~ initial.items_67,
item.f=="Pr1_Amt6" ~ initial.items_68
),
initial.rank.r=relevel(factor(7-initial.rank), ref = 6),
N_ind=case_when(
N==0~0,
TRUE~1)
)%>%
select(-c(starts_with("initial.items_")))
## DISTANCE ##
Distance_Amount.cleanup.df <- get_DROPT_distance(
data = RankProcess_Amount,
items = c("49", "50", "64", "65", "67", "68"),
item_labels = c("Pr6_Amt1", "Pr5_Amt2", "Pr4_Amt3", "Pr3_Amt4", "Pr2_Amt5", "Pr1_Amt6"),
condition_label = "Amount"
)
## ORDER ##
touch_order_analysis.long_Amount <- get_DROPT_order(
data = RankProcess_Amount,
dat_long = dat_long1,
initial_data = initial.dat_amount,
item_labels = c("Pr6_Amt1", "Pr5_Amt2", "Pr4_Amt3", "Pr3_Amt4", "Pr2_Amt5", "Pr1_Amt6"),
item_codes = c("49", "50", "64", "65", "67", "68"),
condition_value = "Amount"
)
### PROBABILITY ###
## COUNT ##
result_prob <- get_DROPT_count(
dat = dat,
rank_col = "RankProcess_Prob",
rank_all_col = "RankProcess_all_Prob",
item_numbers = c(49, 50, 64, 65, 67, 68),
item_labels = c("Pr6_Amt1", "Pr5_Amt2", "Pr4_Amt3", "Pr3_Amt4", "Pr2_Amt5", "Pr1_Amt6"),
condition = "Prob",
initial_dat = initial.dat_prob,
dat_long = dat_long1
)
# Extract outputs as needed
RankProcess_Prob <- result_prob$cleaned_data
bug_respondent_Prob <- result_prob$bug_respondent
na_subj_Prob <- result_prob$na_subj
drag_and_drop_count_Prob <- result_prob$drag_and_drop_count
drag_and_drop_count_Prob_long <- result_prob$drag_and_drop_count_long
# Create summary df and filter for tau > 0.5
Summary_data_Prob<- expand_grid(
ResponseId = unique(RankProcess_Prob$ResponseId),
item.f = unique(RankProcess_Prob$item.f))
Summary_data_Prob<-Summary_data_Prob%>%
mutate(rank.Amount=
case_when(
item.f=="Pr6_Amt1" ~1,
item.f=="Pr5_Amt2" ~ 2,
item.f== "Pr4_Amt3" ~ 3,
item.f== "Pr3_Amt4" ~ 4,
item.f == "Pr2_Amt5" ~ 5,
item.f == "Pr1_Amt6" ~6),
rank.prob=case_when(
item.f=="Pr6_Amt1" ~6,
item.f=="Pr5_Amt2" ~ 5,
item.f== "Pr4_Amt3" ~ 4,
item.f== "Pr3_Amt4" ~ 3,
item.f == "Pr2_Amt5" ~ 2,
item.f == "Pr1_Amt6" ~1
))%>%
left_join(dat%>%select(ResponseId,starts_with("rank_prob_")),by="ResponseId")%>%
mutate(Subj.rank=case_when(
item.f=="Pr6_Amt1" ~ rank_prob_49,
item.f=="Pr5_Amt2" ~ rank_prob_50,
item.f== "Pr4_Amt3" ~ rank_prob_64,
item.f== "Pr3_Amt4" ~ rank_prob_65,
item.f == "Pr2_Amt5" ~ rank_prob_67,
item.f == "Pr1_Amt6" ~ rank_prob_68))%>%
select(-c(starts_with("rank_prob_")))%>%
group_by(ResponseId) %>%
mutate(Tau =- cor(Subj.rank, rank.prob, method = "kendall")) %>%
ungroup()
# Check for tau less than 0.5 - used later
Prob_tau_less0.5 <- Summary_data_Prob %>%
filter(Tau <= 0.5) %>%
distinct(ResponseId) %>%
pull(ResponseId)
## Count: fix long df ##
# Further recoding of the long df
drag_and_drop_count_Prob_long<-drag_and_drop_count_Prob_long%>%
filter(ResponseId%notin%Prob_tau_less0.5)%>%
mutate(rank.Prob=case_when(
item.f=="Pr6_Amt1" ~1,
item.f=="Pr5_Amt2" ~ 2,
item.f== "Pr4_Amt3" ~ 3,
item.f== "Pr3_Amt4" ~ 4,
item.f == "Pr2_Amt5" ~ 5,
item.f == "Pr1_Amt6" ~6
),
rank.Prob=case_when(
item.f=="Pr6_Amt1" ~6,
item.f=="Pr5_Amt2" ~ 5,
item.f== "Pr4_Amt3" ~ 4,
item.f== "Pr3_Amt4" ~ 3,
item.f == "Pr2_Amt5" ~ 2,
item.f == "Pr1_Amt6" ~1))%>%
left_join(initial.dat_prob%>%
select(ResponseId,starts_with("initial.items_")),by="ResponseId")%>%
mutate(initial.rank=case_when(
item.f=="Pr6_Amt1" ~ initial.items_49,
item.f=="Pr5_Amt2" ~ initial.items_50,
item.f=="Pr4_Amt3" ~ initial.items_64,
item.f=="Pr3_Amt4" ~ initial.items_65,
item.f=="Pr2_Amt5" ~ initial.items_67,
item.f=="Pr1_Amt6" ~ initial.items_68
),
initial.rank.r=relevel(factor(7-initial.rank), ref = 6),
N_ind=case_when(
N==0~0,
TRUE~1)
)%>%
select(-c(starts_with("initial.items_")))
## DISTANCE ##
Distance_Prob.cleanup.df <- get_DROPT_distance(
data = RankProcess_Prob,
items = c("49", "50", "64", "65", "67", "68"),
item_labels = c("Pr6_Amt1", "Pr5_Amt2", "Pr4_Amt3", "Pr3_Amt4", "Pr2_Amt5", "Pr1_Amt6"),
condition_label = "Prob"
)
## ORDER ##
touch_order_analysis.long_Prob <- get_DROPT_order(
data = RankProcess_Prob,
dat_long = dat_long1,
initial_data = initial.dat_prob,
item_labels = c("Pr6_Amt1", "Pr5_Amt2", "Pr4_Amt3", "Pr3_Amt4", "Pr2_Amt5", "Pr1_Amt6"),
item_codes = c("49", "50", "64", "65", "67", "68"),
condition_value = "Prob"
)
### PREFERENCE 1 ###
## COUNT ##
result_prefer1 <- get_DROPT_count(
dat = dat,
rank_col = "RankProcess_Prefer1",
rank_all_col = "RankProcess_all_Prefer1",
item_numbers = c(49, 50, 64, 65, 67, 68),
item_labels = c("Pr6_Amt1", "Pr5_Amt2", "Pr4_Amt3", "Pr3_Amt4", "Pr2_Amt5", "Pr1_Amt6"),
condition = "Prefer1",
initial_dat = initial.dat_pref1,
dat_long = dat_long1
)
# Extract outputs as needed
RankProcess_Prefer1 <- result_prefer1$cleaned_data
bug_respondent_Prefer1 <- result_prefer1$bug_respondent
na_subj_Prefer1 <- result_prefer1$na_subj
drag_and_drop_count_Prefer1 <- result_prefer1$drag_and_drop_count
drag_and_drop_count_Prefer1_long <- result_prefer1$drag_and_drop_count_long
## DISTANCE ##
Distance_pref1.cleanup.df <- get_DROPT_distance(
data = RankProcess_Prefer1,
items = c("49", "50", "64", "65", "67", "68"),
item_labels = c("Pr6_Amt1", "Pr5_Amt2", "Pr4_Amt3", "Pr3_Amt4", "Pr2_Amt5", "Pr1_Amt6"),
condition_label = "Pref1"
)
## ORDER ##
touch_order_analysis.long_pref1 <- get_DROPT_order(
data = RankProcess_Prefer1,
dat_long = dat_long1,
initial_data = initial.dat_pref1,
item_labels = c("Pr6_Amt1", "Pr5_Amt2", "Pr4_Amt3", "Pr3_Amt4", "Pr2_Amt5", "Pr1_Amt6"),
item_codes = c("49", "50", "64", "65", "67", "68"),
condition_value = "Pref1"
)
### PREFERENCE 2 ###
result_prefer2 <- get_DROPT_count(
dat = dat,
rank_col = "RankProcess_Prefer2",
rank_all_col = "RankProcess_all_Prefer2",
item_numbers = c(49, 50, 64, 65, 67, 68),
item_labels = c("Pr6_Amt1", "Pr5_Amt2", "Pr4_Amt3", "Pr3_Amt4", "Pr2_Amt5", "Pr1_Amt6"),
condition = "Prefer2",
initial_dat = initial.dat_pref2,
dat_long = dat_long2
)
# Extract outputs as needed
RankProcess_Prefer2 <- result_prefer2$cleaned_data
bug_respondent_Prefer2 <- result_prefer2$bug_respondent
na_subj_Prefer2 <- result_prefer2$na_subj
drag_and_drop_count_Prefer2 <- result_prefer2$drag_and_drop_count
drag_and_drop_count_Prefer2_long <- result_prefer2$drag_and_drop_count_long
## DISTANCE ##
Distance_pref2.cleanup.df <- get_DROPT_distance(
data = RankProcess_Prefer2,
items = c("49", "50", "64", "65", "67", "68"),
item_labels = c("Pr6_Amt1", "Pr5_Amt2", "Pr4_Amt3", "Pr3_Amt4", "Pr2_Amt5", "Pr1_Amt6"),
condition_label = "Pref2"
)
## ORDER ##
touch_order_analysis.long_pref2 <- get_DROPT_order(
data = RankProcess_Prefer2,
dat_long = dat_long2,
initial_data = initial.dat_pref2,
item_labels = c("Pr6_Amt1", "Pr5_Amt2", "Pr4_Amt3", "Pr3_Amt4", "Pr2_Amt5", "Pr1_Amt6"),
item_codes = c("49", "50", "64", "65", "67", "68"),
condition_value = "Pref2"
)
### Combine Preference Rankings into a Master df, which is in the long format ###
# load function
make.rm <-
function (constant, repeated, data, contrasts)
{
if (!missing(constant) && is.vector(constant)) {
if (!missing(repeated) && is.vector(repeated)) {
if (!missing(data)) {
dd <- dim(data)
replen <- length(repeated)
if (missing(contrasts))
contrasts <- ordered(sapply(paste("T", 1:length(repeated),
sep = ""), rep, dd[1]))
else contrasts <- matrix(sapply(contrasts, rep,
dd[1]), ncol = dim(contrasts)[2])
if (length(constant) == 1)
cons.col <- rep(data[, constant], replen)
else cons.col <- lapply(data[, constant], rep,
replen)
new.df <- data.frame(cons.col, repdat = as.vector(data.matrix(data[,
repeated])), contrasts)
return(new.df)
}
}
}
cat("Usage: make.rm(constant, repeated, data [, contrasts])\n")
cat("\tWhere 'constant' is a vector of indices of non-repeated data and\n")
cat("\t'repeated' is a vector of indices of the repeated measures data.\n")
}
# Preselect columns
set1amt_cols <- dat %>%
select(starts_with("Set1_L") & ends_with("_Amt")) %>%
colnames()
set1prob_cols <- dat %>%
select(starts_with("Set1_L") & ends_with("_Prob")) %>%
colnames()
set1rank_cols <- dat %>%
select(starts_with("rank_pref1")) %>%
colnames()
set2amt_cols <- dat %>%
select(starts_with("Set2_L") & ends_with("_Amt")) %>%
colnames()
set2prob_cols <- dat %>%
select(starts_with("Set2_L") & ends_with("_Prob")) %>%
colnames()
set2rank_cols <- dat %>%
select(starts_with("rank_pref2")) %>%
colnames()
amtrank_cols <- dat %>%
select(starts_with("rank_prob_")) %>%
colnames()
probrank_cols <- dat %>%
select(starts_with("rank_amount_")) %>%
colnames()
# Now call the make.rm function and create the first df
master_df <- make.rm(
constant = c("ResponseId", "age", "gender_binary", "education_num", "income"),
repeated = set1amt_cols,
data = dat
)
# rename repdat column
master_df <- master_df %>%
rename(set1_amt = "repdat")
# since the order of the other columns is identical, we can just create a new set for all the other columns
set1prob.rm <- make.rm(
constant = c("ResponseId"),
repeated = set1prob_cols,
data = dat
)
set1rank.rm <- make.rm(
constant = c("ResponseId"),
repeated = set1rank_cols,
data = dat
)
set2amt.rm <- make.rm(
constant = c("ResponseId"),
repeated = set2amt_cols,
data = dat
)
set2prob.rm <- make.rm(
constant = c("ResponseId"),
repeated = set2prob_cols,
data = dat
)
set2rank.rm <- make.rm(
constant = c("ResponseId"),
repeated = set2rank_cols,
data = dat
)
amtranks.rm <- make.rm(
constant = c("ResponseId"),
repeated = amtrank_cols,
data = dat
)
probranks.rm <- make.rm(
constant = c("ResponseId"),
repeated = probrank_cols,
data = dat
)
# Add the columns from the other repeated measures data frames
master_df$set1_prob <- set1prob.rm$repdat
master_df$set1_rank <- set1rank.rm$repdat
master_df$set2_amt <- set2amt.rm$repdat
master_df$set2_prob <- set2prob.rm$repdat
master_df$set2_rank <- set2rank.rm$repdat
master_df$amt_subj_rank <- amtranks.rm$repdat
master_df$prob_subj_rank <- probranks.rm$repdat
# Add labels for the bets
master_df$bet_label[master_df$contrasts == "T1"] <- "Pr6_Amt1"
master_df$bet_label[master_df$contrasts == "T2"] <- "Pr5_Amt2"
master_df$bet_label[master_df$contrasts == "T3"] <- "Pr4_Amt3"
master_df$bet_label[master_df$contrasts == "T4"] <- "Pr3_Amt4"
master_df$bet_label[master_df$contrasts == "T5"] <- "Pr2_Amt5"
master_df$bet_label[master_df$contrasts == "T6"] <- "Pr1_Amt6"
### Finally, add the 3F measures from the data frames created before
# Set 1: Touch count df to add touch count (numeric and binary)
master_df <- master_df %>%
left_join(
drag_and_drop_count_Prefer1_long %>%
select(ResponseId, item.f, N, N_ind) %>%
rename(
bet_label = item.f,
set1_touch_count = N,
set1_touch_count_binary = N_ind
),
by = c("ResponseId", "bet_label")
)
# Set 1: Touch order (order and max order)
master_df <- master_df %>%
left_join(
touch_order_analysis.long_pref1 %>%
select(ResponseId, item.f, order, max_order) %>%
rename(
bet_label = item.f,
set1_order = order,
set1_max_order = max_order
),
by = c("ResponseId", "bet_label")
)
# Set 1: Drag distance
master_df <- master_df %>%
left_join(
Distance_pref1.cleanup.df %>%
select(ResponseId, item.f, distance.r) %>%
rename(
bet_label = item.f,
set1_drag_distance.r = distance.r
),
by = c("ResponseId", "bet_label")
)
# Set 1: initial order
# first, create a tmp df.
dat_tmp1 <- dat %>%
select(ResponseId, RankProcess_Prefer1)
dat_extracted1 <- dat_tmp1 %>%
mutate(
item_string = str_extract(RankProcess_Prefer1, "\\{[^}]*\\}"), # get first curly bracket contents
item_string = str_remove_all(item_string, "\\{|\\}"), # remove braces
item_string = str_split(item_string, ";") %>%
sapply(function(x) x[2]), # get text after semicolon
item_numbers = str_split(item_string, ",") # split into vector of numbers
)
dat_tmp1_long <- dat_extracted1 %>%
select(ResponseId, item_numbers) %>%
unnest(item_numbers) %>%
group_by(ResponseId) %>%
mutate(
item_number = as.integer(str_trim(item_numbers)),
set1_initial_order = row_number(),
bet_label = case_when(
item_number == 49 ~ "Pr6_Amt1",
item_number == 50 ~ "Pr5_Amt2",
item_number == 64 ~ "Pr4_Amt3",
item_number == 65 ~ "Pr3_Amt4",
item_number == 67 ~ "Pr2_Amt5",
item_number == 68 ~ "Pr1_Amt6",
TRUE ~ NA_character_
)
) %>%
select(ResponseId, item_number, set1_initial_order, bet_label) %>%
ungroup()
# merge with master_df
master_df <- master_df %>%
left_join(
dat_tmp1_long %>%
select(ResponseId, bet_label, set1_initial_order),
by = c("ResponseId", "bet_label")
)
### Set 2 3F
# Set 2: Touch count df to add touch count (numeric and binary)
master_df <- master_df %>%
left_join(
drag_and_drop_count_Prefer2_long %>%
select(ResponseId, item.f, N, N_ind) %>%
rename(
bet_label = item.f,
set2_touch_count = N,
set2_touch_count_binary = N_ind
),
by = c("ResponseId", "bet_label")
)
# Set 2: Touch order (order and max order)
master_df <- master_df %>%
left_join(
touch_order_analysis.long_pref2 %>%
select(ResponseId, item.f, order, max_order) %>%
rename(
bet_label = item.f,
set2_order = order,
set2_max_order = max_order
),
by = c("ResponseId", "bet_label")
)
# Set 2: Drag distance
master_df <- master_df %>%
left_join(
Distance_pref2.cleanup.df %>%
select(ResponseId, item.f, distance.r) %>%
rename(
bet_label = item.f,
set2_drag_distance.r = distance.r
),
by = c("ResponseId", "bet_label")
)
# Set 2: initial order
# first, create a tmp df.
dat_tmp2 <- dat %>%
select(ResponseId, RankProcess_Prefer2)
dat_extracted2 <- dat_tmp2 %>%
mutate(
item_string = str_extract(RankProcess_Prefer2, "\\{[^}]*\\}"), # get first curly bracket contents
item_string = str_remove_all(item_string, "\\{|\\}"), # remove braces
item_string = str_split(item_string, ";") %>%
sapply(function(x) x[2]), # get text after semicolon
item_numbers = str_split(item_string, ",") # split into vector of numbers
)
dat_tmp2_long <- dat_extracted2 %>%
select(ResponseId, item_numbers) %>%
unnest(item_numbers) %>%
group_by(ResponseId) %>%
mutate(
item_number = as.integer(str_trim(item_numbers)),
set2_initial_order = row_number(),
bet_label = case_when(
item_number == 49 ~ "Pr6_Amt1",
item_number == 50 ~ "Pr5_Amt2",
item_number == 64 ~ "Pr4_Amt3",
item_number == 65 ~ "Pr3_Amt4",
item_number == 67 ~ "Pr2_Amt5",
item_number == 68 ~ "Pr1_Amt6",
TRUE ~ NA_character_
)
) %>%
select(ResponseId, item_number, set2_initial_order, bet_label) %>%
ungroup()
# merge with master_df
master_df <- master_df %>%
left_join(
dat_tmp2_long %>%
select(ResponseId, bet_label, set2_initial_order),
by = c("ResponseId", "bet_label")
)
# add centered values for amount and probability
master_df$set1_amt.c <- scale(master_df$set1_amt)
master_df$set2_amt.c <- scale(master_df$set2_amt)
master_df$set1_prob.c <- scale(master_df$set1_prob)
master_df$set2_prob.c <- scale(master_df$set2_prob)
### PREP: Binary Data ###
binary_cols <- dat %>%
select(starts_with("Binary")) %>%
colnames()
# Now call the make.rm function and create the first df
binary.rm <- make.rm(
constant = c("ResponseId", "age", "gender_binary", "education_num", "income"),
repeated = binary_cols,
data = dat
)
# rename repdat column
binary.rm <- binary.rm %>%
rename(binary_choice = "repdat")
binary.rm$bet_number[binary.rm$contrasts == "T1"] <- "Bet1"
binary.rm$bet_number[binary.rm$contrasts == "T2"] <- "Bet2"
binary.rm$bet_number[binary.rm$contrasts == "T3"] <- "Bet3"
binary.rm$bet_number[binary.rm$contrasts == "T4"] <- "Bet4"
binary.rm$bet_number[binary.rm$contrasts == "T5"] <- "Bet5"
binary.rm$Chose_P_Bet <- ifelse(binary.rm$binary_choice == 2, 1, 0)
Sometimes, recording errors occur using the DROP-T Method. The code below shows how many observations are “thrown out” and why.
# Amount
n_bug_amount <- length(bug_respondent_Amount)
n_na_amount <- length(na_subj_Amount)
# Probability
n_bug_prob <- length(bug_respondent_Prob)
n_na_prob <- length(na_subj_Prob)
# Preference 1
n_bug_pref1 <- length(bug_respondent_Prefer1)
n_na_pref1 <- length(na_subj_Prefer1)
# Preference 2
n_bug_pref2 <- length(bug_respondent_Prefer2)
n_na_pref2 <- length(na_subj_Prefer2)
# Table
bug_table <- data.frame(
Task = c("Amount", "Probability", "Preference 1", "Preference 2"),
Bug = c(n_bug_amount, n_bug_prob, n_bug_pref1, n_bug_pref2),
`NA` = c(n_na_amount, n_na_prob, n_na_pref1, n_na_pref2)
)
print(bug_table)
## Task Bug NA.
## 1 Amount 1 5
## 2 Probability 2 7
## 3 Preference 1 2 7
## 4 Preference 2 4 5
Definition: The number of drag-and-drop actions (recorded with the RankCount) - only counts when the a drag-and-drop action results in an order change
### plot ###
plot_quiz <- function(data, column, title, item_n) {
n_obs <- nrow(data) # Total number of rows
mean_val <- mean(data[[column]], na.rm = TRUE) # Calculate mean
median_val <- median(data[[column]], na.rm = TRUE) # Calculate median
subtitle_text <- paste0("Item.N = ", item_n, "; Subj.N = ", n_obs,
", Mean = ", round(mean_val, 2),
", Median = ", round(median_val, 2))
ggplot(data, aes(x = "", y = !!sym(column))) +
geom_violin(fill = "lightblue", alpha = 0.5) +
geom_boxplot(width = 0.1, color = "black", alpha = 0.8) +
geom_jitter(width = 0.1, size = 1.5, color = "black", alpha = 0.6)+
theme_minimal() +
labs(
title = title,
subtitle = subtitle_text,
x = "",
y = "Drag count"
) +
theme(plot.title = element_text(hjust = 0.5, face="bold"), plot.subtitle = element_text(hjust = 0.5)) +
scale_y_continuous(breaks = seq(0, 10, by = 1), limits = c(0, 10))
}
# Create plots for each quiz
Quiz_WarmUp <- plot_quiz(dat, "RankCount_WarmUp", "Warm Up", item_n = 5)
Quiz_Prob <- plot_quiz(dat, "RankCount_Prob", "Probability", item_n = 6)
Quiz_Amt <- plot_quiz(dat, "RankCount_Amount", "Amount", item_n = 6)
Quiz_pref1 <- plot_quiz(dat, "RankCount_Prefer1", "Preference (1st)", item_n = 6)
Quiz_Pref2 <- plot_quiz(dat, "RankCount_Prefer2", "Preference (2nd)", item_n = 6)
# Combine all plots into one graph
combined_plot <- (Quiz_WarmUp | Quiz_Prob | Quiz_Amt | Quiz_pref1| Quiz_Pref2)
combined_plot
summary_stats <- all_results %>%
group_by(task) %>%
summarise(
Mean = round(mean(correlation, na.rm = TRUE), 2),
Median = round(median(correlation, na.rm = TRUE), 2),
N = n()
)
# summary_stats
all_results$task <- as.factor(all_results$task)
all_results$task <- factor(all_results$task, levels = c("amount", "prob", "pref1", "pref2"))
combined_plot <- ggplot(all_results, aes(x = task, y = correlation)) +
geom_violin(fill = "lightblue", alpha = 0.5) +
geom_jitter(aes(color = sig), width = 0.1, size = 1.5, alpha = 0.6) +
scale_color_manual(
values = c("TRUE" = "red", "FALSE" = "black"),
labels = c("ns.", "p<.05"),
name = "p value"
) +
theme_minimal() +
labs(
title = " Correlations for Initial and Final Ranks Across Tasks",
subtitle = paste(
" Color Task: Mean =", summary_stats$Mean[1], ", Median =", summary_stats$Median[1], ", N =", summary_stats$N[1], "\n",
" Prob Task: Mean =", summary_stats$Mean[2], ", Median =", summary_stats$Median[2], ", N =", summary_stats$N[2], "\n"
),
x = "Task",
y = "Correlation"
) +
theme(
plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5)
)
# Print the plot
print(combined_plot)
# each extract a dataset for each task and then do the psych mean thing
summarize_task <- function(data, column_name, task_name) {
data %>%
summarise(
Task = task_name,
Mean_t = mean(.data[[column_name]], na.rm = TRUE),
Median_t = median(.data[[column_name]], na.rm = TRUE),
SD = sd(.data[[column_name]], na.rm = TRUE),
Min = min(.data[[column_name]], na.rm = TRUE),
Max = max(.data[[column_name]], na.rm = TRUE),
N = sum(!is.na(.data[[column_name]]))
)
}
# Apply the function to each dataset
summary_warmup <- summarize_task(dat, "t_warmup_Page.Submit", "WarmUp")
summary_prob <- summarize_task(dat, "t_rank_prob_Page.Submit", "Probability")
summary_amt <- summarize_task(dat, "t_rank_amount_Page.Submit", "Amount")
summary_pref1 <- summarize_task(dat, "t_rank_pref1_Page.Submit", "Preference 1ST")
summary_pref2 <- summarize_task(dat, "t_rank_pref2_Page.Submit", "Preference 2ND")
# summary_Binary <- summarize_task(dat, "t_Prob_Page.Submit", "Prob")
# Combine all summaries into one table
all_summaries <- bind_rows(summary_warmup,summary_prob, summary_amt, summary_pref1,summary_pref2 )
# t.test(dat$t_Prob_Page.Submit,dat$rank_color_t_Page.Submit)
all_summaries
1ST Preference Ranking Task
# --- True lottery values and jitter ranges ---
prob <- c(5, 9, 17, 29, 54, 94)
amount <- c(56.7, 31.5, 17.5, 9.7, 5.4, 2.9)
bounded_jitter <- list(
prob = c(2, 2, 6, 6, 18, 5),
amt = c(15.95, 9.25, 4.75, 3.05, 1.25, 1.25)
)
# --- Mapping from Set1_L1–L6 to correct lottery ---
lottery_mapping <- tibble(
input_lottery = 1:6,
lottery = factor(7 - input_lottery)
)
# --- Reshape wide-format jitter data to long format ---
jitter_check <- bind_rows(lapply(1:6, function(i) {
tibble(
ResponseId = dat$ResponseId,
input_lottery = i,
prob = dat[[paste0("Set1_L", i, "_Prob")]],
amount = dat[[paste0("Set1_L", i, "_Amt")]]
)
})) %>%
left_join(lottery_mapping, by = "input_lottery") %>%
select(ResponseId, lottery, prob, amount) %>%
filter(!is.na(prob))
# --- Jitter bounds and true values per lottery ---
jitter_bounds <- tibble(
lottery = factor(1:6),
prob_min = prob - bounded_jitter$prob,
prob_max = prob + bounded_jitter$prob,
amt_min = amount - bounded_jitter$amt,
amt_max = amount + bounded_jitter$amt,
true_prob = prob,
true_amount = amount
)
# --- Plot: Probability ---
ggplot(jitter_check, aes(x = lottery, y = prob)) +
geom_jitter(width = 0.1, alpha = 0.4, size = 1, color = "black") +
geom_point(data = jitter_bounds, aes(y = true_prob), color = "red", size = 3) +
geom_linerange(data = jitter_bounds, aes(ymin = prob_min, ymax = prob_max), color = "red", size = 0.8) +
theme_minimal(base_size = 13) +
labs(
title = "Probability of Win per Lottery",
subtitle = "Black = actual shown values; Red line = intended jitter range",
x = "Lottery ID", y = "Probability (%)"
)
# --- Plot: Amount ---
ggplot(jitter_check, aes(x = lottery, y = amount)) +
geom_jitter(width = 0.1, alpha = 0.4, size = 1, color = "black") +
geom_point(data = jitter_bounds, aes(y = true_amount), color = "red", size = 3) +
geom_linerange(data = jitter_bounds, aes(ymin = amt_min, ymax = amt_max), color = "red", size = 0.8) +
theme_minimal(base_size = 13) +
labs(
title = "Amount to Win per Lottery",
subtitle = "Black = actual shown values; Red line = intended jitter range",
x = "Lottery ID", y = "Amount ($)"
)
# --- Flag violations ---
jitter_check_flagged <- jitter_check %>%
left_join(jitter_bounds, by = "lottery") %>%
mutate(
prob_out_of_range = prob < prob_min | prob > prob_max,
amount_out_of_range = amount < amt_min | amount > amt_max
) %>%
select(
ResponseId, lottery,
prob, prob_min, prob_max, prob_out_of_range,
amount, amt_min, amt_max, amount_out_of_range,
true_prob, true_amount
)
jitter_check_flagged%>%
filter(prob_out_of_range=="TRUE"|amount_out_of_range=="TRUE") #empty table - good!
# --- Compute EV and summary ---
jitter_check <- jitter_check %>%
mutate(ev = (prob / 100) * amount)
ev_summary <- jitter_check %>%
group_by(lottery) %>%
summarise(ev_mean = mean(ev, na.rm = TRUE))
# --- Plot: Expected Value with reference line ---
ggplot(jitter_check, aes(x = lottery, y = ev)) +
geom_jitter(width = 0.1, alpha = 0.4, size = 1, color = "black") +
geom_point(data = ev_summary, aes(x = lottery, y = ev_mean), color = "red", size = 3) +
geom_hline(yintercept = 2.835, linetype = "dashed", color = "blue", linewidth = 1) +
theme_minimal(base_size = 13) +
labs(
title = "Expected Value per Lottery",
subtitle = "Black = individual EVs; Red = mean EV; Blue dashed = intended EV = 2.835",
x = "Lottery ID", y = "Expected Value ($)"
)
2ND Preference Ranking Task
# --- Step 0: True values ---
prob <- c(3, 6, 15, 31, 63, 84)
amount <- c(93.4, 47.7, 18.7, 9.1, 4.4, 3.4)
# BOUNDED jitter ranges used for each lottery (for visual comparison)
bounded_jitter <- list(
prob = c(2, 1, 8, 8, 11, 10),
amt = c(22, 23.7, 5.3, 4.3, 0.4, 0.4)
)
# --- Step 1: Reverse mapping: Set1_L1 = lottery 6, ..., Set1_L6 = lottery 1 ---
lottery_mapping <- tibble(
input_lottery = 1:6,
lottery = factor(7 - input_lottery) # Reverse order
)
# --- Step 2: Reshape wide-format data into long-format jitter_check ---
jitter_check <- bind_rows(lapply(1:6, function(i) {
tibble(
ResponseId = dat$ResponseId,
input_lottery = i,
prob = dat[[paste0("Set2_L", i, "_Prob")]],
amount = dat[[paste0("Set2_L", i, "_Amt")]]
)
})) %>%
left_join(lottery_mapping, by = "input_lottery") %>%
select(ResponseId, lottery, prob, amount) %>%
filter(!is.na(prob))
# --- Step 3: Jitter bounds and true value per lottery ---
jitter_bounds <- tibble(
lottery = factor(1:6),
prob = c(3, 6, 15, 31, 63, 84),
prob_min = prob - bounded_jitter$prob,
prob_max = prob + bounded_jitter$prob,
amount = c(93.4, 47.7, 18.7, 9.1, 4.4, 3.4),
amt_min = amount - bounded_jitter$amt,
amt_max = amount + bounded_jitter$amt
)
# -------------------------------
# Plot 1: Probability (dots + jitter range + true center)
# -------------------------------
ggplot(jitter_check, aes(x = lottery, y = prob)) +
geom_jitter(width = 0.1, alpha = 0.4, size = 1, color = "black") + # actual shown values
geom_point(data = jitter_bounds, aes(y = prob), color = "red", size = 3) + # true center
geom_linerange(data = jitter_bounds, aes(ymin = prob_min, ymax = prob_max), color = "red", size = 0.8) + # expected jitter range
theme_minimal(base_size = 13) +
labs(
title = "Probability of Win per Lottery",
subtitle = "Black = actual shown values; Red line = intended jitter range",
x = "Lottery ID", y = "Probability (%)"
)
# -------------------------------
# Plot 2: Amount (dots + jitter range + true center)
# -------------------------------
ggplot(jitter_check, aes(x = lottery, y = amount)) +
geom_jitter(width = 0.1, alpha = 0.4, size = 1, color = "black") +
geom_point(data = jitter_bounds, aes(y = amount), color = "red", size = 3) +
geom_linerange(data = jitter_bounds, aes(ymin = amt_min, ymax = amt_max), color = "red", size = 0.8) +
theme_minimal(base_size = 13) +
labs(
title = "Amount to Win per Lottery",
subtitle = "Black = actual shown values; RRed line = intended jitter range",
x = "Lottery ID", y = "Amount ($)"
)
jitter_bounds_renamed <- jitter_bounds %>%
rename(
true_prob = prob,
true_amount = amount
)
jitter_check_flagged<-jitter_check %>%
left_join(jitter_bounds_renamed, by = "lottery") %>%
mutate(
prob_out_of_range = prob < prob_min | prob > prob_max,
amount_out_of_range = amount < amt_min | amount > amt_max
) %>%
select(
ResponseId, lottery,
prob, prob_min, prob_max, prob_out_of_range,
amount, amt_min, amt_max, amount_out_of_range,
true_prob, true_amount
)
jitter_check_flagged%>%
filter(prob_out_of_range=="TRUE"|amount_out_of_range=="TRUE")
jitter_check <- jitter_check %>%
mutate(ev = (prob / 100) * amount)
ev_summary <- jitter_check %>%
group_by(lottery) %>%
summarise(ev_mean = mean(ev, na.rm = TRUE))
ggplot(jitter_check, aes(x = lottery, y = ev)) +
geom_jitter(width = 0.1, alpha = 0.4, size = 1, color = "black") +
geom_point(data = ev_summary, aes(x = lottery, y = ev_mean), color = "red", size = 3) +
geom_hline(yintercept = 2.835, linetype = "dashed", color = "blue", linewidth = 1) +
theme_minimal(base_size = 13) +
labs(
title = "Expected Value per Lottery",
subtitle = "Black = individual EVs; Red = mean EV; Blue dashed = intended EV = 2.835",
x = "Lottery ID", y = "Expected Value ($)"
)
ggplot(dat, aes(x = factor(WarmUpAttempt_N+1))) +
geom_bar(fill = "steelblue", color = "black", alpha = 0.8) +
scale_x_discrete(limits = as.character(1:3)) +
labs(
title = "Distribution of Warm-Up Attempts",
x = "Number of Warm-Up Attempts",
y = "Count"
) +
theme_minimal() +
theme(
plot.title = element_text(hjust = 0.5),
axis.text = element_text(size = 12),
axis.title = element_text(size = 13)
)
dat_long2 <- dat %>%
pivot_longer(cols = c(dose.coded, attn1.coded), names_to = "Question", values_to = "Response")
ggplot(dat_long2, aes(x = Response, fill = Question)) +
geom_bar(position = "dodge") + # Bar plot using counts
geom_text(stat = "count", aes(label = after_stat(count)), vjust = -0.5, size = 5) + # Add count labels
facet_wrap(~Question, scales = "free_x") + # Separate plots for dose.coded and attn1.coded
labs(x = "Response", y = "Count", title = "Count of Correct & Incorrect Responses") +
theme_bw() +
ylim(0, 200) # Set y-axis limit
dose.wrong.subj<-dat%>%filter(dose.coded=="Incorrect")%>%pull(ResponseId)
# Display the actual things people select
After the second ranking by “how much you like each lottery” question, we asked participants “Do you believe that you have the chance to win a bonus in the task you just completed?”
ggplot(dat%>%filter(!is.na(bonus_belief)), aes(x = bonus_belief)) +
geom_bar() + # Use counts instead of proportions
geom_text(stat = "count", aes(label = after_stat(count)), vjust = -0.5, size = 5) + # Add count labels
labs(x = "Belief in Getting Bonus", y = "Count", title = "Did you believe that your responses in the ranking task you\njust completed will affect your bonus payment?") +
theme_bw() +
theme(legend.position = "none") + # Tilt x-axis labels
ylim(c(0,200))
Possible Answers: + Mouse (wired or wireless) + Trackpad (touchpad) + Touchscreen (finger or stylus) + Others
dat$device [dat$device== 1] = 'Mouse (Wired or Wireless)'
dat$device [dat$device== 2] = 'Trackpad (touchpad)'
dat$device [dat$device == 4] = 'Touchscreen (finger or stylus)'
dat$device [dat$device == 3] = 'Other'
dat$device <- factor(dat$device, levels = names(sort(table(dat$device), decreasing = TRUE)))
ggplot(dat%>%filter(!is.na(device)), aes(x = device)) +
geom_bar() + # Use counts instead of proportions
geom_text(stat = "count", aes(label = after_stat(count)), vjust = -0.5, size = 5) + # Add count labels
labs(x = "Device Type", y = "Count", title = "Distribution of Ranking Approaches") +
theme_bw() +
theme(axis.text.x = element_text(angle = 45, hjust = 1), legend.position = "none") + # Tilt x-axis labels
ylim(c(0,150))
“Did you run into any technical issue during the survey?”
tech_issues <- sum(dat$technical == 1)
In total, 2 people say they had technical issues.
They say:
“In a few sentences, please explain what you think the purpose of this study is. If you are not sure, please give your best guess.”
# install.packages("reactable")
library(reactable)
# Filter out missing or blank responses
purpose_table <- dat %>%
filter(!is.na(purpose_open), purpose_open != "") %>%
select(purpose_open)
# Create interactive table
reactable(purpose_table,
searchable = TRUE,
pagination = TRUE,
defaultPageSize = 10,
highlight = TRUE,
bordered = TRUE,
striped = TRUE,
columns = list(
purpose_open = colDef(name = "Open-Ended Responses: Purpose")
))
Amount Task: 87% (169/194) Tau =
1
Probability condition: 90% (174/194)
Tau = 1
# Check how many people per group have a tau of 1
# Summary_data %>%
# filter(Tau == 1) %>%
# group_by(Group) %>%
# summarise(Count = n())
# Create a combined summary data frame for the both tasks.
Summary_data <- data.frame(
Tau = c(Summary_data_Prob$Tau, Summary_data_Amount$Tau),
Group = rep(c("Prob", "Amount"), c(length(Summary_data_Prob$Tau), length(Summary_data_Amount$Tau))),
ResponseId = c(Summary_data_Prob$ResponseId, Summary_data_Amount$ResponseId) # Add ResponseId
) %>%
filter(!duplicated(paste(ResponseId,Group)))
# now, compute mean values for the combined df
mean_values <- Summary_data %>%
group_by(Group) %>%
dplyr::summarize(mean_Tau = mean(Tau, na.rm = TRUE))
# Create violin plots for the combined df
ggplot(Summary_data, aes(x = Group, y = Tau, fill = Group)) +
geom_violin(trim = FALSE, alpha = 0.5) + # Violin plot with transparency
geom_jitter(width = 0.1, alpha = 0.5, size = 1.5) + # Add jitter points
stat_summary(fun = mean, geom = "point", shape = 23, size = 4, fill = "white") + # Show mean as point
geom_text(data = mean_values, aes(x = Group, y = mean_Tau, label = sprintf("%.2f", mean_Tau)),
hjust=2, fontface = "bold", size = 5, Amount = "black") + # Add mean text labels
scale_fill_manual(values = c("steelblue", "darkorange")) + # Custom Amounts
labs(
x = "Condition",
y = "Tau") +
theme_minimal(base_size = 14) +
theme(legend.position = "none", # Remove redundant legend
axis.title = element_text(face = "bold"),
axis.text = element_text(face = "bold"))
It appears that in the Amount Condition, several people misunderstood the task (and ranked the items the other way around), dragging the average tau down more than in the Prob task.
# Amount
n_up_amount <- Distance_Amount.cleanup.df %>%
filter(distance.r>0) %>%
nrow()
n_down_amount <- Distance_Amount.cleanup.df %>%
filter(distance.r<0) %>%
nrow()
n_none_amount <- Distance_Amount.cleanup.df %>%
filter(distance.r==0) %>%
nrow()
# Probability
n_up_prob <- Distance_Prob.cleanup.df %>%
filter(distance.r>0) %>%
nrow()
n_down_prob <- Distance_Prob.cleanup.df %>%
filter(distance.r<0) %>%
nrow()
n_none_prob <- Distance_Prob.cleanup.df %>%
filter(distance.r==0) %>%
nrow()
# Preference 1
n_up_pref1 <- Distance_pref1.cleanup.df %>%
filter(distance.r>0) %>%
nrow()
n_down_pref1 <- Distance_pref1.cleanup.df %>%
filter(distance.r<0) %>%
nrow()
n_none_pref1 <- Distance_pref1.cleanup.df %>%
filter(distance.r==0) %>%
nrow()
# Preference 2
n_up_pref2 <- Distance_pref2.cleanup.df %>%
filter(distance.r>0) %>%
nrow()
n_down_pref2 <- Distance_pref2.cleanup.df %>%
filter(distance.r<0) %>%
nrow()
n_none_pref2 <- Distance_pref2.cleanup.df %>%
filter(distance.r==0) %>%
nrow()
# Table
moving_table <- data.frame(
Task = c("Amount", "Probability", "Preference 1", "Preference 2"),
Move_up = c(n_up_amount, n_up_prob, n_up_pref1, n_up_pref2),
Move_down = c(n_down_amount, n_down_prob, n_down_pref1, n_down_pref2),
Not_moved = c(n_none_amount, n_none_prob, n_none_pref1, n_none_pref2)
)
print(moving_table)
## Task Move_up Move_down Not_moved
## 1 Amount 603 45 480
## 2 Probability 623 51 436
## 3 Preference 1 630 56 430
## 4 Preference 2 630 41 445
In our 3F Hypothesis we argue that participants will rank items scoring higher on an attribute first, more frequently, and further up.
Note on rank coding: Throughout the note book, 6 refers to the highest rank (at the top) and 1 refers to the lowest (at the bottom)
The following set of histograms illustrates the number of times each item is touched.
## Amount: Count the number of times each item was touched
drag_drop_counts_Amount <- drag_and_drop_count_Amount_long %>%
filter(ResponseId%notin%Amount_tau_less0.5)%>%
count(item.f,N) %>%
group_by(item.f)%>%
mutate(percentage = n / sum(n) * 100,
condition="Amount")%>%
ungroup()
## Probability: Count the number of times each item was touched
drag_drop_counts_Prob <- drag_and_drop_count_Prob_long %>%
filter(ResponseId%notin%Prob_tau_less0.5)%>%
count(item.f,N) %>%
group_by(item.f)%>%
mutate(percentage = n / sum(n) * 100,
condition="Prob")%>%
ungroup()
# Combine both counts into one data frame
drag_drop_counts_combined<-rbind(drag_drop_counts_Amount,drag_drop_counts_Prob)
# Plot the combined data frame
ggplot(drag_drop_counts_combined, aes(x = factor(N), y = n)) +
geom_bar(
stat = "identity",
color = "black"
) +
geom_text(
aes(
label = paste0(n, " (", round(percentage, 1), "%)")),
vjust = -0.5,
size = 5,
fontface="bold"
) +
labs(
title = "Drag Count by Item and Quiz Condition",
x = "Drag Count",
y = "Frequency"
) +
theme_minimal() +
theme(
strip.text = element_text(size = 12, face = "bold"), # Increased size and bold text
plot.title = element_text(hjust = 0.5),
axis.title = element_text(size = 12), # Adjust axis titles size if needed
axis.text = element_text(size = 10) # Adjust axis labels size if needed
) +
facet_wrap(~ item.f * condition,ncol=2) +
ylim(0, 200)
The following plot illustrates the mean number (with standard errors) of drag counts for each item, grouped by condition. Note: this is based on indicator variables, i.e., whether an item was touched or not.
# Create a summary data frame for AMOUNT; this time, code as binary whether an item was touched or not
summary_data_Amount_ind<- drag_and_drop_count_Amount_long %>%
filter(ResponseId%notin%Amount_tau_less0.5)%>%
mutate(N=case_when(
N==0~0,
TRUE~1
))%>%
dplyr::group_by(condition, item.f) %>%
dplyr::summarize(drag_drop_mean = mean(N, na.rm = TRUE),
drag_drop_sd = sd(N, na.rm = TRUE),
n = n(),
se = drag_drop_sd / sqrt(n),
.groups = "drop")
# Create a summary data frame for PROB; this time, code as binary whether an item was touched or not
summary_data_Prob_ind<- drag_and_drop_count_Prob_long %>%
filter(ResponseId%notin%Prob_tau_less0.5)%>%
mutate(N=case_when(
N==0~0,
TRUE~1
))%>%
dplyr::group_by(condition, item.f) %>%
dplyr::summarize(drag_drop_mean = mean(N, na.rm = TRUE),
drag_drop_sd = sd(N, na.rm = TRUE),
n = n(),
se = drag_drop_sd / sqrt(n),
.groups = "drop")
# Combine the two data frames into one
summary_data_combined_ind <- bind_rows(summary_data_Amount_ind, summary_data_Prob_ind)
# Define colors for the six bets. Logic: Strong green is indicative of the $-Bet; the red color is complementary and signals the focus on probability. Use this color scheme throughout
bet_colors <- c(
"Pr6_Amt1" = "#d73027", # Strong red
"Pr5_Amt2" = "#f46d43", # Medium red
"Pr4_Amt3" = "#fdae61", # Light red (orange-ish)
"Pr3_Amt4" = "#a6d96a", # Light green
"Pr2_Amt5" = "#66bd63", # Medium green
"Pr1_Amt6" = "#1a9850" # Strong green
)
summary_data_combined_ind$item.f = factor(summary_data_combined_ind$item.f, levels = rev(c( "Pr6_Amt1","Pr5_Amt2", "Pr4_Amt3","Pr3_Amt4", "Pr2_Amt5","Pr1_Amt6")), ordered = TRUE)
# Plot
ggplot(summary_data_combined_ind, aes(x = condition, y = drag_drop_mean,
group = item.f, color = item.f, shape = item.f)) +
geom_line(linewidth = 1, position = position_dodge(0.3)) +
geom_point(size = 6, position = position_dodge(0.3)) +
geom_errorbar(
aes(
ymin = drag_drop_mean - se,
ymax = drag_drop_mean + se
),
width = 0.2,
position = position_dodge(0.3)
) +
labs(
x = "Condition",
y = "Mean ± SE Drag Count",
title = "Mean Drag Count by Condition"
) +
scale_color_manual(values = bet_colors) +
scale_shape_manual(values = c("Pr6_Amt1" = 21, "Pr5_Amt2" = 22,
"Pr4_Amt3" = 23, "Pr3_Amt4" = 24,
"Pr2_Amt5" = 25, "Pr1_Amt6" = 11)) +
theme_minimal() +
theme(
legend.position = "top", # Place legend at the top
legend.title = element_text(face = "bold"),
axis.title = element_text(face = "bold"),
plot.subtitle = element_text(hjust = 0.5),
plot.title = element_text(face = "bold", hjust = 0.5)
)
# Calculate correlation - valid for both dfs
cor_result <- cor.test(dat_long1$Prob, dat_long1$Amt)
cor_estimate <- round(cor_result$estimate, 2)
# Create a summary df for amount for subsequent plotting
summary_data_Amount <- drag_and_drop_count_Amount_long%>%
dplyr::group_by(condition, item.f) %>%
dplyr::summarize(drag_mean = mean(N_ind, na.rm = TRUE),
drag_sd = sd(N_ind, na.rm = TRUE),
n = n(),
se = drag_sd / sqrt(n), # Standard error
.groups = "drop",
Avg.Amount=mean(Amt),
Avg.Prob=mean(Prob))
summary_data_Prob <- drag_and_drop_count_Prob_long%>%
dplyr::group_by(condition, item.f) %>%
dplyr::summarize(drag_mean = mean(N_ind, na.rm = TRUE),
drag_sd = sd(N_ind, na.rm = TRUE),
n = n(),
se = drag_sd / sqrt(n), # Standard error
.groups = "drop",
Avg.Amount=mean(Amt),
Avg.Prob=mean(Prob))
# Create plot with correlation in the caption
ggplot(summary_data_Amount, aes(x = Avg.Amount, y = Avg.Prob, label = item.f)) +
geom_point(size = 3, color = "black") +
geom_text(hjust = 0, nudge_x = 1) + # nudges text to the right
xlim(min(summary_data_Amount$Avg.Amount), max(summary_data_Amount$Avg.Amount) + 10) +
theme_minimal() +
labs(
title = "Attributes of Lotteries: Ranking by Amount",
x = "Mean Amount",
y = "Mean Probability",
subtitle = paste("Correlation in dat.long: r =", cor_estimate)
) +
theme(
axis.title = element_text(face = "bold"),
plot.subtitle = element_text(hjust = 0.5),
plot.title = element_text(face = "bold", hjust = 0.5)
) +
geom_smooth(method = "lm", se = FALSE, color = "blue", linetype = "dashed")
Amount Task
# Create a graph showing the correlation between the amount and mean number of drags per item in the amount task
ggplot(summary_data_Amount, aes(x = Avg.Amount, y = drag_mean, label = item.f)) +
geom_point(size = 3, color = "black") +
geom_text(vjust = -1, hjust = 1) +
theme_minimal() +
labs(title = "Drag Count and Amount Attribute", subtitle = "Amount Task", x = "Avg. Amt", y = "Avg. Drag Count Indicator") +
theme(axis.title = element_text(face = "bold"),
plot.subtitle = element_text(hjust = 0.5),
plot.title = element_text(face = "bold", hjust = 0.5))+
geom_smooth(method = "lm", se = FALSE, color = "blue", linetype = "dashed")
# Create a graph showing the correlation between the probability and mean number of drags per item in the probability task
ggplot(summary_data_Amount, aes(x = Avg.Prob, y = drag_mean, label = item.f)) +
geom_point(size = 3, color = "black") +
geom_text(vjust = -1, hjust = 1) +
theme_minimal() +
labs(title = "Drag Count and Prob Attribute", subtitle = "Prob Task", x = "Avg. Prob", y = "Avg. Drag Count Indicator") +
theme(axis.title = element_text(face = "bold"),
plot.subtitle = element_text(hjust = 0.5),
plot.title = element_text(face = "bold", hjust = 0.5))+
geom_smooth(method = "lm", se = FALSE, color = "blue", linetype = "dashed")
Prob Task
# Create a graph showing the correlation between the amount and mean number of drags per item in the amount task
ggplot(summary_data_Prob, aes(x = Avg.Amount, y = drag_mean, label = item.f)) +
geom_point(size = 3, color = "black") +
geom_text(vjust = -1, hjust = 1) +
theme_minimal() +
labs(title = "Drag Count and Amount Attribute", subtitle = "Prob Task", x = "Avg. Amt", y = "Avg. Drag Count Indicator") +
theme(axis.title = element_text(face = "bold"),
plot.subtitle = element_text(hjust = 0.5),
plot.title = element_text(face = "bold", hjust = 0.5))+
geom_smooth(method = "lm", se = FALSE, color = "blue", linetype = "dashed")
# Create a graph showing the correlation between the probability and mean number of drags per item in the probability task
ggplot(summary_data_Prob, aes(x = Avg.Prob, y = drag_mean, label = item.f)) +
geom_point(size = 3, color = "black") +
geom_text(vjust = -1, hjust = 1) +
theme_minimal() +
labs(title = "Drag Count and Prob Attribute", subtitle = "Prob Task", x = "Avg. Prob", y = "Avg. Drag Count Indicator") +
theme(axis.title = element_text(face = "bold"),
plot.subtitle = element_text(hjust = 0.5),
plot.title = element_text(face = "bold", hjust = 0.5))+
geom_smooth(method = "lm", se = FALSE, color = "blue", linetype = "dashed")
Nested Models
# Prep for nested model
drag_and_drop_count_long.combined<-rbind(drag_and_drop_count_Amount_long, drag_and_drop_count_Prob_long)%>%
mutate(Prob.c=Prob-mean(Prob),
Amount.c=Amt-mean(Amt))
drag_and_drop_count_long.combined<-drag_and_drop_count_long.combined%>%
mutate(Amt.Nested_Amount=case_when(
condition == "Amount" ~ Amt,
condition == "Prob" ~ 0
),
Amt.Nested_Prob=case_when(
condition == "Amount" ~ 0,
condition == "Prob" ~ Amt
),
Prob.Nested_Amount=case_when(
condition == "Amount" ~ Prob,
condition == "Prob" ~ 0
),
Prob.Nested_Prob=case_when(
condition == "Amount" ~ 0,
condition == "Prob" ~ Prob
),
Amt.Nested_Amount.c=case_when(
condition == "Amount" ~Amount.c,
condition == "Prob" ~ 0
),
Amt.Nested_Prob.c=case_when(
condition == "Amount" ~ 0,
condition == "Prob" ~ Amount.c
),
Prob.Nested_Amount.c=case_when(
condition == "Amount" ~Prob.c,
condition == "Prob" ~ 0
),
Prob.Nested_Prob.c=case_when(
condition == "Amount" ~ 0,
condition == "Prob" ~ Prob.c
)
)
M1<-glmer(N_ind~Amt.Nested_Amount.c+Amt.Nested_Prob.c+Prob.Nested_Amount.c+Prob.Nested_Prob.c+condition+(1|ResponseId),
drag_and_drop_count_long.combined,
family=binomial)
M2<-glmer(N_ind~Amt.Nested_Amount.c+Amt.Nested_Prob.c+Prob.Nested_Amount.c+Prob.Nested_Prob.c+condition+initial.rank.r+(1|ResponseId),
drag_and_drop_count_long.combined,
family=binomial)
tab_model(M1,M2,
show.se = T,
show.ci = F,
show.stat = T,
pred.labels =
c("Intercept",
"Amount [Nested in Amount]",
"Amount [Nested in Probability]",
"Probability [Nested in Amount]",
"Probability [Nested in Probability]",
"Condition [Probability]",
"Initial Rank [6]","Initial Rank [5]","Initial Rank [4]","Initial Rank [3]","Initial Rank [2]"),
dv.labels =
c("Drag and Drop Count<br>(coded as binary DV; only Subject RE)",
"Drag and Drop Count<br>(coded as binary DV; Subject RE and initial Position)"))
Drag and Drop Count (coded as binary DV; only Subject RE) |
Drag and Drop Count (coded as binary DV; Subject RE and initial Position) |
|||||||
---|---|---|---|---|---|---|---|---|
Predictors | Odds Ratios | std. Error | Statistic | p | Odds Ratios | std. Error | Statistic | p |
Intercept | 1.38 | 0.10 | 4.55 | <0.001 | 0.07 | 0.01 | -13.95 | <0.001 |
Amount [Nested in Amount] | 1.01 | 0.01 | 1.63 | 0.103 | 1.02 | 0.01 | 2.83 | 0.005 |
Amount [Nested in Probability] | 0.96 | 0.01 | -7.21 | <0.001 | 0.95 | 0.01 | -7.82 | <0.001 |
Probability [Nested in Amount] | 0.97 | 0.00 | -8.11 | <0.001 | 0.96 | 0.00 | -9.36 | <0.001 |
Probability [Nested in Probability] | 1.01 | 0.00 | 2.04 | 0.041 | 1.02 | 0.00 | 3.60 | <0.001 |
Condition Probability | 1.18 | 0.12 | 1.63 | 0.103 | 1.19 | 0.15 | 1.36 | 0.172 |
Initial Rank [6] | 115.59 | 31.03 | 17.70 | <0.001 | ||||
Initial Rank [5] | 94.67 | 24.62 | 17.50 | <0.001 | ||||
Initial Rank [4] | 49.66 | 12.17 | 15.94 | <0.001 | ||||
Initial Rank [3] | 30.26 | 7.10 | 14.54 | <0.001 | ||||
Initial Rank [2] | 10.10 | 2.23 | 10.49 | <0.001 | ||||
Random Effects | ||||||||
σ2 | 3.29 | 3.29 | ||||||
τ00 | 0.00 ResponseId | 0.14 ResponseId | ||||||
ICC | 0.04 | |||||||
N | 189 ResponseId | 189 ResponseId | ||||||
Observations | 2118 | 2118 | ||||||
Marginal R2 / Conditional R2 | 0.215 / NA | 0.572 / 0.590 |
The first model has singularity issues; the second is fine.
Collinearity Checks
Model 1
car::vif(M1)
## Amt.Nested_Amount.c Amt.Nested_Prob.c Prob.Nested_Amount.c
## 1.939544 1.972481 1.928123
## Prob.Nested_Prob.c condition
## 1.994129 1.032145
Model 2
car::vif(M2)
## GVIF Df GVIF^(1/(2*Df))
## Amt.Nested_Amount.c 2.074821 1 1.440424
## Amt.Nested_Prob.c 2.021856 1 1.421920
## Prob.Nested_Amount.c 2.021591 1 1.421827
## Prob.Nested_Prob.c 2.095049 1 1.447429
## condition 1.065344 1 1.032155
## initial.rank.r 1.407624 5 1.034782
Drag order is the sequence in which items are dragged and dropped. Items that are not dragged are assigned a value of (1 + the total number of dragged items). For example, if a participant moves item A three times, item B twice, and item C once, while items D, E, and F remain untouched, the drag order is: * A = 1, B = 2, C = 3, D = E = F = 4. * This coding approach simplifies cases where an item is touched multiple times (as in the example). However, as seen, it is relatively rare that participants drag the same item repeatedly, justifying this simplification.
touch_order_analysis_Amount<-RankProcess_Amount%>%
filter(step!=0)%>%
group_by(ResponseId)%>%
arrange(step)%>%
filter(!duplicated(item_moved))%>% # retains only the first instance
mutate(order=row_number())%>%
ungroup()%>%
mutate(condition="Amount")
touch_order_analysis.long_Amount <- expand_grid(
ResponseId = unique(touch_order_analysis_Amount$ResponseId),
item.f = unique(touch_order_analysis_Amount$item.f)
)
order_max.SUBJ_Amount<-touch_order_analysis.long_Amount%>%
left_join(touch_order_analysis_Amount%>%select(ResponseId,item.f,order),by=c("ResponseId","item.f"))%>%
left_join(touch_order_analysis_Amount%>%select(ResponseId,condition)%>%filter(!duplicated(ResponseId)),by=c("ResponseId"))%>%
group_by(ResponseId)%>%
dplyr::summarize(max_order=max(order,na.rm = T))
touch_order_analysis.long_Amount<-touch_order_analysis.long_Amount%>%
left_join(touch_order_analysis_Amount%>%select(ResponseId,item.f,order),by=c("ResponseId","item.f"))%>%
left_join(touch_order_analysis_Amount%>%select(ResponseId,condition)%>%filter(!duplicated(ResponseId)),by=c("ResponseId"))%>%left_join(order_max.SUBJ_Amount,by="ResponseId")%>%
mutate(order = case_when(!is.na(order)~order,
TRUE~max_order+1))
touch_order_analysis_Prob<-RankProcess_Prob%>%
filter(step!=0)%>%
group_by(ResponseId)%>%
arrange(step)%>%
filter(!duplicated(item_moved))%>%
mutate(order=row_number())%>%
ungroup()%>%
mutate(condition="Prob")
touch_order_analysis.long_Prob <- expand_grid(
ResponseId = unique(touch_order_analysis_Prob$ResponseId),
item.f = unique(touch_order_analysis_Prob$item.f)
)
order_max.SUBJ_Prob<-touch_order_analysis.long_Prob%>%
left_join(touch_order_analysis_Prob%>%select(ResponseId,item.f,order),by=c("ResponseId","item.f"))%>%
left_join(touch_order_analysis_Prob%>%select(ResponseId,condition)%>%filter(!duplicated(ResponseId)),by=c("ResponseId"))%>%
group_by(ResponseId)%>%
dplyr::summarize(max_order=max(order,na.rm = T))
touch_order_analysis.long_Prob<-touch_order_analysis.long_Prob%>%
left_join(touch_order_analysis_Prob%>%select(ResponseId,item.f,order),by=c("ResponseId","item.f"))%>%
left_join(touch_order_analysis_Prob%>%select(ResponseId,condition)%>%filter(!duplicated(ResponseId)),by=c("ResponseId"))%>%left_join(order_max.SUBJ_Prob,by="ResponseId")%>%
mutate(order = case_when(!is.na(order)~order,
TRUE~max_order+1))
# length(unique(touch_order_analysis.long_A$ResponseId)) #142, good.
# touch_order_analysis.long_A%>%
# group_by(ResponseId)%>%
# summarize(n_6=n_distinct(item.f))%>%
# filter(n_6!=6) # none, good
# psych::describe(drag_order_analysis.long$order) # between 1 and 6, good.
touch_order_Amount <- touch_order_analysis.long_Amount %>%
filter(ResponseId%notin%Amount_tau_less0.5)%>%
count(item.f,order,condition) %>%
group_by(item.f)%>%
mutate(percentage = n / sum(n) * 100)%>%
ungroup()
touch_order_Prob <- touch_order_analysis.long_Prob %>%
filter(ResponseId%notin%Prob_tau_less0.5)%>%
count(item.f,order,condition) %>%
group_by(item.f)%>%
mutate(percentage = n / sum(n) * 100)%>%
ungroup()
touch_order_combined<-rbind(touch_order_Prob,touch_order_Amount)
ggplot(touch_order_combined, aes(x = factor(order), y = n)) +
geom_bar(
stat = "identity",
color = "black"
) +
geom_text(
aes(
label = paste0(n, " (", round(percentage, 1), "%)")
),
vjust = -0.5,
size = 5,
fontface="bold"
) +
labs(
title = "Drag Order by item and Condition",
x = "Drag Order",
y = "Frequency"
) +
theme_minimal() +
theme(
strip.text = element_text(size = 12, face = "bold"), # Facet label adjustments
plot.title = element_text(hjust = 0.5, face = "bold"),
axis.title = element_text(size = 12),
axis.text = element_text(size = 10)
) +
facet_wrap(~ item.f * condition,ncol = 2) +
ylim(0, 150)
Distribution of Mean Drag Order
mean_order.subj_Prob <- touch_order_analysis.long_Prob %>%
filter(ResponseId%notin%Prob_tau_less0.5)%>%
group_by(ResponseId)%>%
mutate(mean_order = mean(order),
condition="Prob")%>%
ungroup()
mean_order.subj_Amount<- touch_order_analysis.long_Amount %>%
filter(ResponseId%notin%Amount_tau_less0.5)%>%
group_by(ResponseId)%>%
mutate(mean_order = mean(order),
condition="Amount")%>%
ungroup()
mean_order.subj_combined<-rbind(mean_order.subj_Amount,mean_order.subj_Prob)
ggplot(mean_order.subj_combined, aes(x = mean_order)) +
geom_density(fill = "lightblue", color = "black", alpha = 0.5) +
geom_rug(sides = "b", color = "blue") + # Rug plot along the bottom (x-axis) for individual data points
labs(
title = "Density Plot of Mean Drag Order",
x = "Mean Drag Order",
y = "Density"
) +
facet_grid(~condition)
summary_data_Amount<- touch_order_analysis.long_Amount%>%
dplyr::group_by(condition, item.f) %>%
dplyr::summarize(order_mean = mean(order, na.rm = TRUE),
order_sd = sd(order, na.rm = TRUE),
n = n(),
se = order_sd / sqrt(n), # Standard error
.groups = "drop")
summary_data_Prob <- touch_order_analysis.long_Prob%>%
dplyr::group_by(condition, item.f) %>%
dplyr::summarize(order_mean = mean(order, na.rm = TRUE),
order_sd = sd(order, na.rm = TRUE),
n = n(),
se = order_sd / sqrt(n), # Standard error
.groups = "drop")
summary_data_combined <- bind_rows(summary_data_Amount, summary_data_Prob)
summary_data_combined_ind$item.f = factor(summary_data_combined_ind$item.f, levels = rev(c( "Pr6_Amt1","Pr5_Amt2", "Pr4_Amt3","Pr3_Amt4", "Pr2_Amt5","Pr1_Amt6")), ordered = TRUE)
ggplot(summary_data_combined, aes(x = condition, y = order_mean,
group = item.f, color = item.f, shape = item.f)) +
geom_line(linewidth = 1, position = position_dodge(0.3)) +
geom_point(size = 6, position = position_dodge(0.3)) +
geom_errorbar(
aes(
ymin = order_mean - se,
ymax = order_mean + se
),
width = 0.2,
position = position_dodge(0.3)
) +
labs(
x = "Condition",
y = "Mean ± SE Drag Order",
title = "Mean Drag Order by Condition"
) +
scale_color_manual(values = bet_colors) +
scale_shape_manual(values = c("Pr6_Amt1" = 21, "Pr5_Amt2" = 22,
"Pr4_Amt3" = 23, "Pr3_Amt4" = 24,
"Pr2_Amt5" = 25, "Pr1_Amt6" = 11)) +
theme_minimal() +
theme(
legend.position = "top", # Place legend at the top
legend.title = element_text(face = "bold"),
axis.title = element_text(face = "bold"),
plot.subtitle = element_text(hjust = 0.5),
plot.title = element_text(face = "bold", hjust = 0.5)
)
Amount Task
summary_data_Amount <- touch_order_analysis.long_Amount%>%
dplyr::group_by(condition, item.f) %>%
dplyr::summarize(order_mean = mean(order, na.rm = TRUE),
order_sd = sd(order, na.rm = TRUE),
n = n(),
se = order_sd / sqrt(n), # Standard error
.groups = "drop",
Avg.Amount=mean(Amt),
Avg.Prob=mean(Prob))
ggplot(summary_data_Amount, aes(x = Avg.Amount, y = order_mean, label = item.f)) +
geom_point(size = 3, color = "black") +
geom_text(vjust = -1, hjust = 1) +
theme_minimal() +
labs(title = "Drag Order and Amount Attribute", subtitle = "Amount Task", x = "Avg. Amt", y = "Avg. Drag Order") +
theme(axis.title = element_text(face = "bold"),
plot.subtitle = element_text(hjust = 0.5),
plot.title = element_text(face = "bold", hjust = 0.5))+
geom_smooth(method = "lm", se = FALSE, color = "blue", linetype = "dashed")
ggplot(summary_data_Amount, aes(x = Avg.Prob, y = order_mean, label = item.f)) +
geom_point(size = 3, color = "black") +
geom_text(vjust = -1, hjust = 1) +
theme_minimal() +
labs(title = "Drag Order and Prob Attribute", subtitle = "Amount Task", x = "Avg. Prob", y = "Avg. Drag Order") +
theme(axis.title = element_text(face = "bold"),
plot.subtitle = element_text(hjust = 0.5),
plot.title = element_text(face = "bold", hjust = 0.5))+
geom_smooth(method = "lm", se = FALSE, color = "blue", linetype = "dashed")
Prob Task
summary_data_Prob <- touch_order_analysis.long_Prob%>%
dplyr::group_by(condition, item.f) %>%
dplyr::summarize(order_mean = mean(order, na.rm = TRUE),
order_sd = sd(order, na.rm = TRUE),
n = n(),
se = order_sd / sqrt(n), # Standard error
.groups = "drop",
Avg.Amount=mean(Amt),
Avg.Prob=mean(Prob))
ggplot(summary_data_Prob, aes(x = Avg.Amount, y = order_mean, label = item.f)) +
geom_point(size = 3, color = "black") +
geom_text(vjust = -1, hjust = 1) +
theme_minimal() +
labs(title = "Drag Order and Amount Attribute", subtitle = "Prob Task", x = "Avg. Amt", y = "Avg. Drag Order") +
theme(axis.title = element_text(face = "bold"),
plot.subtitle = element_text(hjust = 0.5),
plot.title = element_text(face = "bold", hjust = 0.5))+
geom_smooth(method = "lm", se = FALSE, color = "blue", linetype = "dashed")
ggplot(summary_data_Prob, aes(x = Avg.Prob, y = order_mean, label = item.f)) +
geom_point(size = 3, color = "black") +
geom_text(vjust = -1, hjust = 1) +
theme_minimal() +
labs(title = "Drag Order and Prob Attribute", subtitle = "Prob Task", x = "Avg. Prob", y = "Avg. Drag Order") +
theme(axis.title = element_text(face = "bold"),
plot.subtitle = element_text(hjust = 0.5),
plot.title = element_text(face = "bold", hjust = 0.5))+
geom_smooth(method = "lm", se = FALSE, color = "blue", linetype = "dashed")
Amount and Prob Attribute values are centered before being entered into the model.
touch_order_analysis.long_Amount$condition<-"Amount"
touch_order_analysis.long_Prob$condition<-"Prob"
touch_order_analysis.long.combined<-rbind(touch_order_analysis.long_Prob, touch_order_analysis.long_Amount)%>%
mutate(Prob.c=Prob-mean(Prob),
Amount.c=Amt-mean(Amt))
Nested Models
##### Nested Model
touch_order_analysis.long.combined<-touch_order_analysis.long.combined%>%
mutate(Amt.Nested_Amount=case_when(
condition == "Amount" ~Amt,
condition == "Prob" ~ 0
),
Amt.Nested_Prob=case_when(
condition == "Amount" ~ 0,
condition == "Prob" ~ Amt
),
Prob.Nested_Amount=case_when(
condition == "Amount" ~Prob,
condition == "Prob" ~ 0
),
Prob.Nested_Prob=case_when(
condition == "Amount" ~ 0,
condition == "Prob" ~ Prob
),
Amt.Nested_Amount.c=case_when(
condition == "Amount" ~Amount.c,
condition == "Prob" ~ 0
),
Amt.Nested_Prob.c=case_when(
condition == "Amount" ~ 0,
condition == "Prob" ~ Amount.c
),
Prob.Nested_Amount.c=case_when(
condition == "Amount" ~Prob.c,
condition == "Prob" ~ 0
),
Prob.Nested_Prob.c=case_when(
condition == "Amount" ~ 0,
condition == "Prob" ~ Prob.c
))
M1<-clmm(factor(order)~Amt.Nested_Amount.c+Amt.Nested_Prob.c+Prob.Nested_Amount.c+Prob.Nested_Prob.c+condition + (1|ResponseId),touch_order_analysis.long.combined)
M2<-clmm(factor(order)~Amt.Nested_Amount.c+Amt.Nested_Prob.c+Prob.Nested_Amount.c+Prob.Nested_Prob.c+condition+initial.rank.r+(1|ResponseId),touch_order_analysis.long.combined)
tab_model(M1,M2,
show.se = T,
show.ci = F,
show.stat = T,
pred.labels =
c("1|2","2|3", "3|4", "4|5", "5|6",
"Amount [Nested in Amount]",
"Amount [Nested in Probability]",
"Probability [Nested in Amount]",
"Probability [Nested in Probability]",
"Condition [Probability]",
"Initial Rank [6]","Initial Rank [5]","Initial Rank [4]","Initial Rank [3]","Initial Rank [2]"),
dv.labels =
c("Drag and Drop Order<br>(coded as binary DV; only Subject RE)",
"Drag and Drop Order<br>(coded as binary DV; Subject RE and initial Position)"))
Drag and Drop Order (coded as binary DV; only Subject RE) |
Drag and Drop Order (coded as binary DV; Subject RE and initial Position) |
|||||||
---|---|---|---|---|---|---|---|---|
Predictors | Odds Ratios | std. Error | Statistic | p | Odds Ratios | std. Error | Statistic | p |
1|2 | 0.12 | 0.01 | -26.06 | <0.001 | 0.01 | 0.00 | -31.83 | <0.001 |
2|3 | 0.52 | 0.03 | -10.24 | <0.001 | 0.03 | 0.00 | -24.48 | <0.001 |
3|4 | 1.82 | 0.11 | 9.59 | <0.001 | 0.16 | 0.02 | -14.92 | <0.001 |
4|5 | 7.49 | 0.55 | 27.35 | <0.001 | 0.91 | 0.11 | -0.77 | 0.439 |
5|6 | 66.39 | 9.27 | 30.06 | <0.001 | 14.23 | 2.26 | 16.68 | <0.001 |
Amount [Nested in Amount] | 0.95 | 0.00 | -11.69 | <0.001 | 0.94 | 0.00 | -12.70 | <0.001 |
Amount [Nested in Probability] | 1.01 | 0.00 | 3.44 | 0.001 | 1.02 | 0.00 | 5.12 | <0.001 |
Probability [Nested in Amount] | 1.01 | 0.00 | 3.86 | <0.001 | 1.02 | 0.00 | 6.42 | <0.001 |
Probability [Nested in Probability] | 0.96 | 0.00 | -14.50 | <0.001 | 0.95 | 0.00 | -16.12 | <0.001 |
Condition Probability | 1.03 | 0.08 | 0.33 | 0.738 | 1.06 | 0.08 | 0.74 | 0.461 |
Initial Rank [6] | 0.04 | 0.01 | -20.73 | <0.001 | ||||
Initial Rank [5] | 0.04 | 0.01 | -21.08 | <0.001 | ||||
Initial Rank [4] | 0.04 | 0.01 | -20.08 | <0.001 | ||||
Initial Rank [3] | 0.05 | 0.01 | -19.44 | <0.001 | ||||
Initial Rank [2] | 0.10 | 0.01 | -15.74 | <0.001 | ||||
Random Effects | ||||||||
σ2 | 3.29 | 3.29 | ||||||
τ00 | 0.00 ResponseId | 0.04 ResponseId | ||||||
ICC | 0.01 | |||||||
N | 193 ResponseId | 193 ResponseId | ||||||
Observations | 2238 | 2238 | ||||||
Marginal R2 / Conditional R2 | 0.366 / NA | 0.552 / 0.558 |
Collinearity Check
This is not possible for the clmm() family; have to find a different way
# car::vif(M1)
# car::vif(M2)
Coding of Distance
After each dragging item, the distance is calculated as Last Position - Current Position of the dragged item. Positive values indicate that the item was ranked up (e.g., moved from 2nd place to the 1st place), and positive values indicate that the item is ranked down (e.g., moved from 1st place to the second place). items that are not dragged for each respondent are assigned a distance of 0
summary_stats <- Distance_Amount.cleanup.df %>%
group_by(item.f) %>%
dplyr::summarize(
mean_distance = mean(distance, na.rm = TRUE),
median_distance = median(distance, na.rm = TRUE)
)
Distance_Amount.cleanup.df$item.f<- factor(Distance_Amount.cleanup.df$item.f, levels = rev(c( "Pr6_Amt1","Pr5_Amt2", "Pr4_Amt3","Pr3_Amt4", "Pr2_Amt5","Pr1_Amt6")), ordered = TRUE)
ggplot(Distance_Amount.cleanup.df ,
aes(x = -distance, fill = item.f)) +
geom_histogram(binwidth = 1, alpha = 0.3, position = "identity") +
labs(
title = "Distribution of Drag Distance - Amount Task",
x = "Distance",
y = "Count",
fill = "item"
) +
theme_minimal()+
facet_grid(~item.f)+
xlim(6,-6)+
scale_fill_manual(values = bet_colors)
summary_stats <- Distance_Prob.cleanup.df %>%
group_by(item.f) %>%
dplyr::summarize(
mean_distance = mean(distance, na.rm = TRUE),
median_distance = median(distance, na.rm = TRUE)
)
Distance_Prob.cleanup.df$item.f<- factor(Distance_Prob.cleanup.df$item.f, levels = rev(c( "Pr6_Amt1","Pr5_Amt2", "Pr4_Amt3","Pr3_Amt4", "Pr2_Amt5","Pr1_Amt6")), ordered = TRUE)
ggplot(Distance_Prob.cleanup.df ,
aes(x = -distance, fill = item.f)) +
geom_histogram(binwidth = 1, alpha = 0.3, position = "identity") +
labs(
title = "Distribution of Drag Distance - Prob Task",
x = "Distance",
y = "Count",
fill = "item"
) +
theme_minimal()+
facet_grid(~item.f)+
scale_fill_manual(values = bet_colors)+
xlim(6,-6)
Distance_Amount_cleanup.df.test<-Distance_Amount.cleanup.df%>%
select(ResponseId, item.f,distance,distance.abs)%>%
mutate(condition="Amount")
Distance_Prob_cleanup.df.test<-Distance_Prob.cleanup.df%>%
select(ResponseId, item.f,distance,distance.abs)%>%
mutate(condition="Prob")
Distance_cleanup.df.combined<-rbind(Distance_Amount_cleanup.df.test,Distance_Prob_cleanup.df.test)
summary_distance_data <- Distance_cleanup.df.combined %>%
mutate(condition=as.factor(condition),
distance.abs=(distance))%>%
group_by(condition, item.f) %>%
dplyr::summarize(
distance_mean = -mean(distance, na.rm = TRUE),
distance_sd = sd(distance, na.rm = TRUE),
n = n(),
se = distance_sd / sqrt(n),
.groups = "drop"
)
summary_data_combined_ind$item.f = factor(summary_data_combined_ind$item.f, levels = rev(c( "Pr6_Amt1","Pr5_Amt2", "Pr4_Amt3","Pr3_Amt4", "Pr2_Amt5","Pr1_Amt6")), ordered = TRUE)
ggplot(summary_distance_data, aes(x = condition, y = distance_mean, group = item.f, color = item.f,shape=item.f)) +
geom_line(linewidth = 1, position = position_dodge(0.3)) +
geom_point(size = 6, position = position_dodge(0.3)) +
geom_errorbar(
aes(
ymin = distance_mean - se,
ymax = distance_mean + se
),
width = 0.2,
position = position_dodge(0.3)
) +
labs(
x = "Condition",
y = "Mean ± SE Drag Distance",
title = "Mean Drag Distance by Condition"
) +
theme_minimal() +
theme(
legend.position = "top", # Place legend at the top
legend.title = element_text(face = "bold"),
axis.title = element_text(face = "bold"),
plot.subtitle = element_text(hjust = 0.5),
plot.title = element_text(face = "bold", hjust = 0.5)
)+
scale_shape_manual(values = c("Pr6_Amt1" = 21, "Pr5_Amt2" = 22,
"Pr4_Amt3" = 23, "Pr3_Amt4" = 24,
"Pr2_Amt5" = 25, "Pr1_Amt6" = 11)) +
scale_color_manual(values = bet_colors)
If we plot the ABSOLUTE VALUE of Distance:
summary_distance_data <- Distance_cleanup.df.combined %>%
mutate(condition=as.factor(condition),
distance.abs=abs(distance))%>%
group_by(condition, item.f) %>%
dplyr::summarize(
distance_mean = mean(distance.abs, na.rm = TRUE),
distance_sd = sd(distance.abs, na.rm = TRUE),
n = n(),
se = distance_sd / sqrt(n),
.groups = "drop"
)
summary_data_combined_ind$item.f = factor(summary_data_combined_ind$item.f, levels = rev(c( "Pr6_Amt1","Pr5_Amt2", "Pr4_Amt3","Pr3_Amt4", "Pr2_Amt5","Pr1_Amt6")), ordered = TRUE)
ggplot(summary_distance_data, aes(x = condition, y = distance_mean, group = item.f, color = item.f,shape=item.f)) +
geom_line(linewidth = 1, position = position_dodge(0.3)) +
geom_point(size = 6, position = position_dodge(0.3)) +
geom_errorbar(
aes(
ymin = distance_mean - se,
ymax = distance_mean + se
),
width = 0.2,
position = position_dodge(0.3)
) +
labs(
x = "Condition",
y = "Mean ± SE Drag Order",
title = "Mean Drag Order by Condition"
) +
theme_minimal() +
theme(
legend.position = "top", # Place legend at the top
legend.title = element_text(face = "bold"),
axis.title = element_text(face = "bold"),
plot.subtitle = element_text(hjust = 0.5),
plot.title = element_text(face = "bold", hjust = 0.5)
)+
scale_shape_manual(values = c("Pr6_Amt1" = 21, "Pr5_Amt2" = 22,
"Pr4_Amt3" = 23, "Pr3_Amt4" = 24,
"Pr2_Amt5" = 25, "Pr1_Amt6" = 11)) +
scale_color_manual(values = bet_colors)
Amount Task
Distance_Amount.cleanup.df<-Distance_Amount.cleanup.df%>%
filter(ResponseId%notin%Amount_tau_less0.5)%>%
mutate(rank.Amount=case_when(
item.f=="Pr6_Amt1" ~1,
item.f=="Pr5_Amt2" ~ 2,
item.f== "Pr4_Amt3" ~ 3,
item.f== "Pr3_Amt4" ~ 4,
item.f == "Pr2_Amt5" ~ 5,
item.f == "Pr1_Amt6" ~6
),
rank.Prob=case_when(
item.f=="Pr6_Amt1" ~6,
item.f=="Pr5_Amt2" ~ 5,
item.f== "Pr4_Amt3" ~ 4,
item.f== "Pr3_Amt4" ~ 3,
item.f == "Pr2_Amt5" ~ 2,
item.f == "Pr1_Amt6" ~1))%>%
left_join(initial.dat_amount%>%select(ResponseId,starts_with("initial.items_")),by="ResponseId")%>%
mutate(initial.rank=case_when(
item.f=="Pr6_Amt1" ~ initial.items_49,
item.f=="Pr5_Amt2" ~ initial.items_50,
item.f=="Pr4_Amt3" ~ initial.items_64,
item.f=="Pr3_Amt4" ~ initial.items_65,
item.f=="Pr2_Amt5" ~ initial.items_67,
item.f=="Pr1_Amt6" ~ initial.items_68
),
initial.rank.r=7-initial.rank,
initial.rank.r = relevel(factor(initial.rank.r), ref = 6)
)%>%
select(-c(starts_with("initial.items_")))%>%
left_join(dat_long1%>%select(ResponseId,item.f,Prob,Amt),by=c("ResponseId","item.f"))
summary_data_Amount <- Distance_Amount.cleanup.df%>%
dplyr::group_by(item.f) %>%
dplyr::summarize(distance_mean = mean(distance, na.rm = TRUE),
distance_sd = sd(distance, na.rm = TRUE),
n = n(),
se = distance_sd / sqrt(n), # Standard error
.groups = "drop",
Avg.Amount=mean(Amt),
Avg.Prob=mean(Prob))
ggplot(summary_data_Amount, aes(x = Avg.Amount, y = -distance_mean, label = item.f)) +
geom_point(size = 3, color = "black") +
geom_text(vjust = -1, hjust = 1) +
theme_minimal() +
labs(title = "Drag Distance and Amount Attribute", subtitle = "Amount Task", x = "Avg. Amt", y = "Avg. Drag Distance") +
theme(axis.title = element_text(face = "bold"),
plot.subtitle = element_text(hjust = 0.5),
plot.title = element_text(face = "bold", hjust = 0.5))+
geom_smooth(method = "lm", se = FALSE, color = "blue", linetype = "dashed")
ggplot(summary_data_Amount, aes(x = Avg.Prob, y = -distance_mean, label = item.f)) +
geom_point(size = 3, color = "black") +
geom_text(vjust = -1, hjust = 1) +
theme_minimal() +
labs(title = "Drag Distance and Prob Attribute", subtitle = "Amount Task", x = "Avg. Prob", y = "Avg. Drag Distance") +
theme(axis.title = element_text(face = "bold"),
plot.subtitle = element_text(hjust = 0.5),
plot.title = element_text(face = "bold", hjust = 0.5))+
geom_smooth(method = "lm", se = FALSE, color = "blue", linetype = "dashed")
Prob Task
Distance_Prob.cleanup.df<-Distance_Prob.cleanup.df%>%
filter(ResponseId%notin%Prob_tau_less0.5)%>%
mutate(rank.Amount=case_when(
item.f=="Pr6_Amt1" ~1,
item.f=="Pr5_Amt2" ~ 2,
item.f== "Pr4_Amt3" ~ 3,
item.f== "Pr3_Amt4" ~ 4,
item.f == "Pr2_Amt5" ~ 5,
item.f == "Pr1_Amt6" ~6
),
rank.Prob=case_when(
item.f=="Pr6_Amt1" ~6,
item.f=="Pr5_Amt2" ~ 5,
item.f== "Pr4_Amt3" ~ 4,
item.f== "Pr3_Amt4" ~ 3,
item.f == "Pr2_Amt5" ~ 2,
item.f == "Pr1_Amt6" ~1))%>%
left_join(initial.dat_prob%>%select(ResponseId,starts_with("initial.items_")),by="ResponseId")%>%
mutate(initial.rank=case_when(
item.f=="Pr6_Amt1" ~ initial.items_49,
item.f=="Pr5_Amt2" ~ initial.items_50,
item.f=="Pr4_Amt3" ~ initial.items_64,
item.f=="Pr3_Amt4" ~ initial.items_65,
item.f=="Pr2_Amt5" ~ initial.items_67,
item.f=="Pr1_Amt6" ~ initial.items_68
),
initial.rank.r=7-initial.rank,
initial.rank.r = relevel(factor(initial.rank.r), ref = 6)
)%>%
select(-c(starts_with("initial.items_")))%>%
left_join(dat_long1%>%select(ResponseId,item.f,Prob,Amt),by=c("ResponseId","item.f"))
summary_data_Prob <- Distance_Prob.cleanup.df%>%
dplyr::group_by(item.f) %>%
dplyr::summarize(distance_mean = mean(distance, na.rm = TRUE),
distance_sd = sd(distance, na.rm = TRUE),
n = n(),
se = distance_sd / sqrt(n), # Standard error
.groups = "drop",
Avg.Amount=mean(Amt),
Avg.Prob=mean(Prob))
ggplot(summary_data_Prob, aes(x = Avg.Amount, y = -distance_mean, label = item.f)) +
geom_point(size = 3, color = "black") +
geom_text(vjust = -1, hjust = 1) +
theme_minimal() +
labs(title = "Drag Distance and Amount Attribute", subtitle = "Prob Task", x = "Avg. Amt", y = "Avg. Drag Distance") +
theme(axis.title = element_text(face = "bold"),
plot.subtitle = element_text(hjust = 0.5),
plot.title = element_text(face = "bold", hjust = 0.5))+
geom_smooth(method = "lm", se = FALSE, color = "blue", linetype = "dashed")
ggplot(summary_data_Prob, aes(x = Avg.Prob, y = -distance_mean, label = item.f)) +
geom_point(size = 3, color = "black") +
geom_text(vjust = -1, hjust = 1) +
theme_minimal() +
labs(title = "Drag Distance and Prob Attribute", subtitle = "Prob Task", x = "Avg. Prob", y = "Avg. Drag Distance") +
theme(axis.title = element_text(face = "bold"),
plot.subtitle = element_text(hjust = 0.5),
plot.title = element_text(face = "bold", hjust = 0.5))+
geom_smooth(method = "lm", se = FALSE, color = "blue", linetype = "dashed")
Amount and Prob Attribute Ranks are centered before being entered into the model.
Distance_Amount.cleanup.df$condition<-"Amount"
Distance_Prob.cleanup.df$condition<-"Prob"
Distance.cleanup.combined<-rbind(Distance_Amount.cleanup.df,Distance_Prob.cleanup.df)%>%
mutate(Prob.c=Prob-mean(Prob),
Amount.c=Amt-mean(Amt))
Nested Models
##### Nested Model
Distance.cleanup.combined<-Distance.cleanup.combined%>%
mutate(Amt.Nested_Amount=case_when(
condition == "Amount" ~Amt,
condition == "Prob" ~ 0
),
Amt.Nested_Prob=case_when(
condition == "Amount" ~ 0,
condition == "Prob" ~ Amt
),
Prob.Nested_Amount=case_when(
condition == "Amount" ~Prob,
condition == "Prob" ~ 0
),
Prob.Nested_Prob=case_when(
condition == "Amount" ~ 0,
condition == "Prob" ~ Prob
),
Amt.Nested_Amount.c=case_when(
condition == "Amount" ~Amount.c,
condition == "Prob" ~ 0
),
Amt.Nested_Prob.c=case_when(
condition == "Amount" ~ 0,
condition == "Prob" ~ Amount.c
),
Prob.Nested_Amount.c=case_when(
condition == "Amount" ~ Prob.c,
condition == "Prob" ~ 0
),
Prob.Nested_Prob.c=case_when(
condition == "Amount" ~ 0,
condition == "Prob" ~ Prob.c
))
M1<-lmer(distance.r~Amt.Nested_Amount.c+Amt.Nested_Prob.c+Prob.Nested_Amount.c+Prob.Nested_Prob.c+condition+(1|ResponseId),Distance.cleanup.combined)
M2<-lmer(distance.r~Amt.Nested_Amount.c+Amt.Nested_Prob.c+Prob.Nested_Amount.c+Prob.Nested_Prob.c+condition+initial.rank.r+(1|ResponseId),Distance.cleanup.combined)
tab_model(
M1, M2,
dv.labels = c(
"Drag and Drop Distance<br>(Only Subject RE)",
"Drag and Drop Distance<br>(Subject RE + Added Initial Position)"
),
pred.labels =
c("Intercept",
"Amount [Nested in Amount]",
"Amount [Nested in Probability]",
"Probability [Nested in Amount]",
"Probability [Nested in Probability]",
"Condition [Probability]",
"Initial Rank [6]","Initial Rank [5]","Initial Rank [4]","Initial Rank [3]","Initial Rank [2]"),
show.se = T,
show.ci = F,
show.stat = T
)
Drag and Drop Distance (Only Subject RE) |
Drag and Drop Distance (Subject RE + Added Initial Position) |
|||||||
---|---|---|---|---|---|---|---|---|
Predictors | Estimates | std. Error | Statistic | p | Estimates | std. Error | Statistic | p |
Intercept | 1.11 | 0.05 | 23.68 | <0.001 | -0.24 | 0.06 | -4.23 | <0.001 |
Amount [Nested in Amount] | 0.02 | 0.00 | 8.95 | <0.001 | 0.02 | 0.00 | 14.10 | <0.001 |
Amount [Nested in Probability] | -0.02 | 0.00 | -8.71 | <0.001 | -0.02 | 0.00 | -12.16 | <0.001 |
Probability [Nested in Amount] | -0.02 | 0.00 | -9.61 | <0.001 | -0.02 | 0.00 | -15.99 | <0.001 |
Probability [Nested in Probability] | 0.01 | 0.00 | 8.96 | <0.001 | 0.02 | 0.00 | 16.37 | <0.001 |
Condition Probability | 0.01 | 0.05 | 0.15 | 0.881 | 0.00 | 0.03 | 0.12 | 0.903 |
Initial Rank [6] | 2.69 | 0.06 | 48.01 | <0.001 | ||||
Initial Rank [5] | 2.12 | 0.06 | 37.89 | <0.001 | ||||
Initial Rank [4] | 1.57 | 0.06 | 28.01 | <0.001 | ||||
Initial Rank [3] | 1.14 | 0.06 | 20.39 | <0.001 | ||||
Initial Rank [2] | 0.57 | 0.06 | 10.10 | <0.001 | ||||
Random Effects | ||||||||
σ2 | 1.46 | 0.55 | ||||||
τ00 | 0.14 ResponseId | 0.23 ResponseId | ||||||
ICC | 0.09 | 0.29 | ||||||
N | 189 ResponseId | 189 ResponseId | ||||||
Observations | 2118 | 2118 | ||||||
Marginal R2 / Conditional R2 | 0.330 / 0.389 | 0.673 / 0.768 |
Collinearity Check
car::vif(M1)
## Amt.Nested_Amount.c Amt.Nested_Prob.c Prob.Nested_Amount.c
## 2.032084 2.036722 2.032065
## Prob.Nested_Prob.c condition
## 2.036704 1.000001
car::vif(M2)
## GVIF Df GVIF^(1/(2*Df))
## Amt.Nested_Amount.c 2.045435 1 1.430187
## Amt.Nested_Prob.c 2.050920 1 1.432103
## Prob.Nested_Amount.c 2.049421 1 1.431580
## Prob.Nested_Prob.c 2.049804 1 1.431714
## condition 1.000004 1 1.000002
## initial.rank.r 1.017056 5 1.001693
Distance_Amount<-RankProcess_Amount %>%
group_by(ResponseId)%>%
mutate(
parts = str_split(order, ",")
) %>%
mutate(
Rank1 = sapply(parts, function(x) x[1]), # Extract before 1st comma
Rank2 = sapply(parts, function(x) x[2]), # Extract before 2nd comma
Rank3 = sapply(parts, function(x) x[3]), # Extract before 3rd comma
Rank4 = sapply(parts, function(x) x[4]), # Extract before 4th comma
Rank5 = sapply(parts, function(x) x[5]), # Extract before 5th comma
Rank6 = sapply(parts, function(x) ifelse(length(x) > 5, x[6], NA)) # Extract after 5th comma
) %>%
select(-parts)
items_Amount <- c("49", "50", "64", "65", "67", "68")
for (item in items_Amount) {
Distance_Amount[[paste0("current_", item)]] <- NA_integer_
}
Distance_Amount <- Distance_Amount %>%
rowwise() %>%
mutate(
across(
starts_with("current_"),
~ {
item_number <- str_remove(cur_column(), "current_") # Extract the item number
case_when(
Rank1 == item_number ~ 1,
Rank2 == item_number ~ 2,
Rank3 == item_number ~ 3,
Rank4 == item_number ~ 4,
Rank5 == item_number ~ 5,
Rank6 == item_number ~ 6,
TRUE ~ 1
)
}
)
) %>%
ungroup()
for (item in items_Amount) {
Distance_Amount[[paste0("last_", item)]] <- lag(Distance_Amount[[paste0("current_", item)]])
}
Distance_Amount<-Distance_Amount%>%
group_by(ResponseId)%>%
rowwise() %>%
mutate(
current_item_moved = get(paste0("current_", item_moved)),
last_item_moved = get(paste0("last_", item_moved)),
move_direction = case_when(
is.na(last_item_moved) ~ "no_change",
current_item_moved < last_item_moved ~ "up",
current_item_moved > last_item_moved ~ "down",
TRUE ~ "no_change"
)
) %>%
ungroup()
Distance_Amount <- Distance_Amount %>%
group_by(ResponseId)%>%
filter(step!=0)
TimeAnalysis.Amount<-Distance_Amount%>%
filter(ResponseId%notin%Amount_tau_less0.5)%>%
separate(timing, into = c("drag_time", "drop_time"), sep = ", ", convert = TRUE)%>%
mutate(DD_diff=drop_time-drag_time,
condition="Amount")%>%
select(step,ResponseId,condition,item.f,drag_time,drop_time,DD_diff,current_49:current_68)
duplicated.n<-nrow(TimeAnalysis.Amount)
item<-c("Pr6_Amt1","Pr5_Amt2", "Pr4_Amt3","Pr3_Amt4", "Pr2_Amt5","Pr1_Amt6")
TimeAnalysis.Amount <- TimeAnalysis.Amount %>%
uncount(weights = 6)
TimeAnalysis.Amount$item.f<- rep(item, times = duplicated.n)
TimeAnalysis.Amount<-TimeAnalysis.Amount%>%
mutate(current_rank=case_when(
item.f=="Pr6_Amt1" ~ current_49,
item.f=="Pr5_Amt2" ~ current_50,
item.f=="Pr4_Amt3" ~ current_64,
item.f=="Pr3_Amt4" ~ current_65,
item.f=="Pr2_Amt5" ~ current_67,
item.f=="Pr1_Amt6" ~ current_68
))%>%
select(-c(current_49:current_68))
item_Amounts <- c(
"Pr6_Amt1" = "#d73027", # Strong red
"Pr5_Amt2" = "#f46d43", # Medium red
"Pr4_Amt3" = "#fdae61", # Light red (orange-ish)
"Pr3_Amt4" = "#a6d96a", # Light green
"Pr2_Amt5" = "#66bd63", # Medium green
"Pr1_Amt6" = "#1a9850" # Strong green
)
item_shapes <- c("Pr6_Amt1" = 21, "Pr5_Amt2" = 22,
"Pr4_Amt3" = 23, "Pr3_Amt4" = 24,
"Pr2_Amt5" = 25, "Pr1_Amt6" = 11)
# TimeAnalysis.Amount #one more step here to expand the dataset
TimeAnalysis.Amount.expand <- expand_grid(
ResponseId = unique(TimeAnalysis.Amount$ResponseId),
step = unique(TimeAnalysis.Amount$step),
item.f = unique(TimeAnalysis.Amount$item.f)
) %>%
left_join(
TimeAnalysis.Amount %>% select(ResponseId, step, item.f, current_rank),
by = c("ResponseId", "step", "item.f")
) %>%
arrange(ResponseId, item.f, step) %>%
group_by(ResponseId, item.f) %>%
fill(current_rank, .direction = "down")%>%
ungroup()
Summary.Amount <- TimeAnalysis.Amount.expand %>%
group_by(step, item.f) %>%
dplyr::summarize(mean.current_rank = mean(current_rank),
sd.current_rank = sd(current_rank),
n = n(),
se = sd.current_rank / sqrt(n),
.groups = "drop")
initial.rank<-touch_order_analysis.long_Amount%>%
filter(ResponseId%notin%Amount_tau_less0.5)%>%
group_by(item.f)%>%
mutate(initial.rank=as.numeric(initial.rank))%>%
dplyr::summarize(mean.current_rank = 7-mean(initial.rank),
sd.current_rank = sd(initial.rank),
n = n(),
se = sd.current_rank / sqrt(n),
.groups = "drop")%>%
mutate(step=0)
Summary.Amount<-rbind(Summary.Amount,
initial.rank)
ggplot(Summary.Amount, aes(x = step, y = mean.current_rank,
color = item.f, shape = item.f)) +
geom_line(size = 1) +
geom_point(size = 6, fill = "white") +
geom_errorbar(aes(ymin = mean.current_rank - se, ymax = mean.current_rank + se),
width = 0.3, size = 1.2, alpha = 0.8) +
scale_color_manual(values = item_Amounts) +
scale_shape_manual(values = item_shapes) +
labs(title = "Mean Rank by Step (Amount Task)",
x = "Step",
y = "Mean Rank",
color = "Item",
linetype = "Item",
shape = "Item") +
theme_minimal() + # Clean theme
theme(legend.position = "right",
axis.title.x = element_text(face = "bold", size = 14), # Bold x-axis label
axis.title.y = element_text(face = "bold", size = 14), # Bold y-axis label
axis.text.x = element_text(face = "bold", size = 12), # Bold x-axis text
axis.text.y = element_text(face = "bold", size = 12) # Bold y-axis text
)+
scale_y_continuous(breaks = 6:1) +
scale_x_continuous(breaks = 0:8)
Distance_Prob<-RankProcess_Prob %>%
group_by(ResponseId)%>%
mutate(
# Split the string into parts based on commas
parts = str_split(order, ",")
) %>%
mutate(
Rank1 = sapply(parts, function(x) x[1]), # Extract before 1st comma
Rank2 = sapply(parts, function(x) x[2]), # Extract before 2nd comma
Rank3 = sapply(parts, function(x) x[3]), # Extract before 3rd comma
Rank4 = sapply(parts, function(x) x[4]), # Extract before 4th comma
Rank5 = sapply(parts, function(x) x[5]), # Extract before 5th comma
Rank6 = sapply(parts, function(x) ifelse(length(x) > 5, x[6], NA)) # Extract after 5th comma
) %>%
select(-parts)
items_Prob <- c("49", "50", "64", "65", "67", "68")
for (item in items_Prob) {
Distance_Prob[[paste0("current_", item)]] <- NA_integer_
}
Distance_Prob <- Distance_Prob %>%
rowwise() %>%
mutate(
across(
starts_with("current_"),
~ {
item_number <- str_remove(cur_column(), "current_") # Extract the item number
case_when(
Rank1 == item_number ~ 1,
Rank2 == item_number ~ 2,
Rank3 == item_number ~ 3,
Rank4 == item_number ~ 4,
Rank5 == item_number ~ 5,
Rank6 == item_number ~ 6,
TRUE ~ 1 # Distance_A %>% mutate(NA_count = rowSums(is.na(select(., starts_with("current_"))))); this code somehow results in the first item always gets an NA, so manually fix this error
)
}
)
) %>%
ungroup()
for (item in items_Prob) {
Distance_Prob[[paste0("last_", item)]] <- lag(Distance_Prob[[paste0("current_", item)]])
}
Distance_Prob<-Distance_Prob%>%
group_by(ResponseId)%>%
rowwise() %>%
mutate(
current_item_moved = get(paste0("current_", item_moved)), # Get the rank of the moved item from current columns
last_item_moved = get(paste0("last_", item_moved)), # Get the rank of the moved item from last columns
# Determine the movement direction; we should not see any "no_change"
move_direction = case_when(
is.na(last_item_moved) ~ "no_change",
current_item_moved < last_item_moved ~ "up",
current_item_moved > last_item_moved ~ "down",
TRUE ~ "no_change"
)
) %>%
ungroup()
Distance_Prob <- Distance_Prob %>%
group_by(ResponseId)%>%
filter(step!=0) # need to retain step 0 for steps that come before
TimeAnalysis.Prob<-Distance_Prob%>%
filter(ResponseId%notin%Prob_tau_less0.5)%>%
separate(timing, into = c("drag_time", "drop_time"), sep = ", ", convert = TRUE)%>%
mutate(DD_diff=drop_time-drag_time,
condition="Prob")%>%
select(step,ResponseId,condition,item.f,drag_time,drop_time,DD_diff,current_49:current_68)
duplicated.n<-nrow(TimeAnalysis.Prob)
item<-c("Pr6_Amt1","Pr5_Amt2", "Pr4_Amt3","Pr3_Amt4", "Pr2_Amt5","Pr1_Amt6")
TimeAnalysis.Prob <- TimeAnalysis.Prob %>%
uncount(weights = 6)
TimeAnalysis.Prob$item.f<- rep(item, times = duplicated.n)
TimeAnalysis.Prob<-TimeAnalysis.Prob%>%
mutate(current_rank=case_when(
item.f=="Pr6_Amt1" ~ current_49,
item.f=="Pr5_Amt2" ~ current_50,
item.f=="Pr4_Amt3" ~ current_64,
item.f=="Pr3_Amt4" ~ current_65,
item.f=="Pr2_Amt5" ~ current_67,
item.f=="Pr1_Amt6" ~ current_68
))%>%
select(-c(current_49:current_68))
item_Amounts <- c(
"Pr6_Amt1" = "#d73027", # Strong red
"Pr5_Amt2" = "#f46d43", # Medium red
"Pr4_Amt3" = "#fdae61", # Light red (orange-ish)
"Pr3_Amt4" = "#a6d96a", # Light green
"Pr2_Amt5" = "#66bd63", # Medium green
"Pr1_Amt6" = "#1a9850" # Strong green
)
item_shapes <- c("Pr6_Amt1" = 21, "Pr5_Amt2" = 22,
"Pr4_Amt3" = 23, "Pr3_Amt4" = 24,
"Pr2_Amt5" = 25, "Pr1_Amt6" = 11)
TimeAnalysis.Prob.expand <- expand_grid(
ResponseId = unique(TimeAnalysis.Prob$ResponseId),
step = unique(TimeAnalysis.Prob$step),
item.f = unique(TimeAnalysis.Prob$item.f)
) %>%
left_join(
TimeAnalysis.Prob %>% select(ResponseId, step, item.f, current_rank),
by = c("ResponseId", "step", "item.f")
) %>%
arrange(ResponseId, item.f, step) %>%
group_by(ResponseId, item.f) %>%
fill(current_rank, .direction = "down")%>%
ungroup()
Summary.Prob <- TimeAnalysis.Prob.expand %>%
group_by(step, item.f) %>%
dplyr::summarize(mean.current_rank = mean(current_rank),
sd.current_rank = sd(current_rank),
n = n(),
se = sd.current_rank / sqrt(n),
.groups = "drop")
initial.rank<-touch_order_analysis.long_Prob%>%
filter(ResponseId%notin%Prob_tau_less0.5)%>%
group_by(item.f)%>%
mutate(initial.rank=as.numeric(initial.rank))%>%
dplyr::summarize(mean.current_rank = 7- mean(initial.rank),
sd.current_rank = sd(initial.rank),
n = n(),
se = sd.current_rank / sqrt(n),
.groups = "drop")%>%
mutate(step=0)
Summary.Prob<-rbind(Summary.Prob,
initial.rank)
ggplot(Summary.Prob, aes(x = step, y = mean.current_rank,
color = item.f, shape = item.f)) +
geom_line(size = 1) +
geom_point(size = 6, fill = "white") +
geom_errorbar(aes(ymin = mean.current_rank - se, ymax = mean.current_rank + se),
width = 0.3, size = 1.2, alpha = 0.8) +
scale_color_manual(values = item_Amounts) +
scale_shape_manual(values = item_shapes) +
labs(title = "Mean Rank by Step (Prob Task)",
x = "Step",
y = "Mean Rank",
color = "Item",
linetype = "Item",
shape = "Item") +
theme_minimal() + # Clean theme
theme(legend.position = "right",
axis.title.x = element_text(face = "bold", size = 14), # Bold x-axis label
axis.title.y = element_text(face = "bold", size = 14), # Bold y-axis label
axis.text.x = element_text(face = "bold", size = 12), # Bold x-axis text
axis.text.y = element_text(face = "bold", size = 12) # Bold y-axis text
)+
scale_y_continuous(breaks = 6:1) +
scale_x_continuous(breaks = 0:10)
This only concerns the ranking-by-preference tasks
# Display correlation matrix
print(round(cor(master_df[, c("set1_touch_count", "set1_order", "set1_drag_distance.r")],
use = "pairwise.complete.obs"), 2))
## set1_touch_count set1_order set1_drag_distance.r
## set1_touch_count 1.00 -0.48 0.51
## set1_order -0.48 1.00 -0.47
## set1_drag_distance.r 0.51 -0.47 1.00
# Create SPLOM
ggpairs(master_df,
columns = c("set1_touch_count", "set1_order", "set1_drag_distance.r"),
lower = list(continuous = "smooth"),
diag = list(continuous = "densityDiag"),
upper = list(continuous = "cor")) +
theme_minimal()
# Display correlation matrix
print(round(cor(master_df[, c("set2_touch_count", "set2_order", "set2_drag_distance.r")],
use = "pairwise.complete.obs"), 2))
## set2_touch_count set2_order set2_drag_distance.r
## set2_touch_count 1.00 -0.49 0.54
## set2_order -0.49 1.00 -0.51
## set2_drag_distance.r 0.54 -0.51 1.00
# Create SPLOM
ggpairs(master_df,
columns = c("set2_touch_count", "set2_order", "set2_drag_distance.r"),
lower = list(continuous = "smooth"),
diag = list(continuous = "densityDiag"),
upper = list(continuous = "cor")) +
theme_minimal()
Regress the subjective ranks on the amount and probability tasks.
amt_rob <- lm_robust(amt_subj_rank ~ set1_amt.c + set1_prob.c, data = master_df, cluster = ResponseId, se_type = "stata")
amt_lmer <- lmer(amt_subj_rank ~ set1_amt.c + set1_prob.c + (1 | ResponseId), data = master_df)
amt_clmm <- clmm(factor(amt_subj_rank) ~ set1_amt.c + set1_prob.c + (1 | ResponseId), data = master_df)
tab_model(amt_rob, amt_lmer, amt_clmm,
show.se = T,
show.ci = F,
show.stat = T,
pred.labels =
c("Intercept",
"Amount (centered)",
"Probability (centered)",
"1|2","2|3", "3|4", "4|5", "5|6"),
dv.labels =
c("DV = Amount (subj. Rank)<br>lm_robust",
"DV = Amount (subj. Rank)<br>linear, repeated",
"DV = Amount (subj. Rank)<br>ordinal, repeated"))
DV = Amount (subj. Rank) lm_robust |
DV = Amount (subj. Rank) linear, repeated |
DV = Amount (subj. Rank) ordinal, repeated |
||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|
Predictors | Estimates | std. Error | Statistic | p | Estimates | std. Error | Statistic | p | Odds Ratios | std. Error | Statistic | p |
Intercept | 3.50 | 0.01 | 502.70 | <0.001 | 3.50 | 0.01 | 255.13 | <0.001 | ||||
Amount (centered) | 0.79 | 0.03 | 31.40 | <0.001 | 0.79 | 0.02 | 40.52 | <0.001 | 55.33 | NaN | NaN | NaN |
Probability (centered) | -0.98 | 0.02 | -47.78 | <0.001 | -0.98 | 0.02 | -50.24 | <0.001 | 0.00 | NaN | NaN | NaN |
1|2 | 0.00 | NaN | NaN | NaN | ||||||||
2|3 | 0.03 | NaN | NaN | NaN | ||||||||
3|4 | 5.34 | NaN | NaN | NaN | ||||||||
4|5 | 895.14 | NaN | NaN | NaN | ||||||||
5|6 | 204562.15 | NaN | NaN | NaN | ||||||||
Random Effects | ||||||||||||
σ2 | 0.22 | 3.29 | ||||||||||
τ00 | 0.00 ResponseId | 0.00 ResponseId | ||||||||||
N | 194 ResponseId | 194 ResponseId | ||||||||||
Observations | 1164 | 1164 | 1164 | |||||||||
R2 / R2 adjusted | 0.925 / 0.925 | 0.925 / NA | 0.978 / NA |
prob_rob <- lm_robust(prob_subj_rank ~ set1_amt.c + set1_prob.c, data = master_df, cluster = ResponseId, se_type = "stata")
prob_lmer <- lmer(prob_subj_rank ~ set1_amt.c + set1_prob.c + (1 | ResponseId), data = master_df)
prob_clmm <- clmm(factor(prob_subj_rank) ~ set1_amt.c + set1_prob.c + (1 | ResponseId), data = master_df)
tab_model(prob_rob, prob_lmer, prob_clmm,
show.se = T,
show.ci = F,
show.stat = T,
pred.labels =
c("Intercept",
"Amount (centered)",
"Probability (centered)",
"1|2","2|3", "3|4", "4|5", "5|6"),
dv.labels =
c("DV = Prob (subj. Rank)<br>lm_robust",
"DV = Prob (subj. Rank)<br>linear, repeated",
"DV = Prob (subj. Rank)<br>ordinal, repeated"))
DV = Prob (subj. Rank) lm_robust |
DV = Prob (subj. Rank) linear, repeated |
DV = Prob (subj. Rank) ordinal, repeated |
||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|
Predictors | Estimates | std. Error | Statistic | p | Estimates | std. Error | Statistic | p | Odds Ratios | std. Error | Statistic | p |
Intercept | 3.50 | 0.01 | 568.20 | <0.001 | 3.50 | 0.03 | 122.35 | <0.001 | ||||
Amount (centered) | -0.75 | 0.04 | -20.34 | <0.001 | -0.75 | 0.04 | -18.34 | <0.001 | 0.08 | NaN | NaN | NaN |
Probability (centered) | 0.77 | 0.05 | 15.27 | <0.001 | 0.77 | 0.04 | 18.85 | <0.001 | 10.31 | NaN | NaN | NaN |
1|2 | 0.01 | NaN | NaN | NaN | ||||||||
2|3 | 0.13 | NaN | NaN | NaN | ||||||||
3|4 | 1.07 | NaN | NaN | NaN | ||||||||
4|5 | 8.71 | NaN | NaN | NaN | ||||||||
5|6 | 152.85 | NaN | NaN | NaN | ||||||||
Random Effects | ||||||||||||
σ2 | 0.95 | 3.29 | ||||||||||
τ00 | 0.00 ResponseId | 0.00 ResponseId | ||||||||||
N | 194 ResponseId | 194 ResponseId | ||||||||||
Observations | 1164 | 1164 | 1164 | |||||||||
R2 / R2 adjusted | 0.674 / 0.674 | 0.674 / NA | 0.859 / NA |
rank_data <- dat %>%
select(
`49` = rank_pref1_49,
`50` = rank_pref1_50,
`64` = rank_pref1_64,
`65` = rank_pref1_65,
`67` = rank_pref1_67,
`68` = rank_pref1_68
) %>%
pivot_longer(
everything(),
names_to = "item_moved",
values_to = "Subj_rank"
) %>%
mutate(
item_moved = as.integer(item_moved),
item_label = case_when(
item_moved == 49 ~ "Pr6_Amt1",
item_moved == 50 ~ "Pr5_Amt2",
item_moved == 64 ~ "Pr4_Amt3",
item_moved == 65 ~ "Pr3_Amt4",
item_moved == 67 ~ "Pr2_Amt5",
item_moved == 68 ~ "Pr1_Amt6"
),
item_label = factor(item_label, levels = c("Pr6_Amt1", "Pr5_Amt2", "Pr4_Amt3", "Pr3_Amt4", "Pr2_Amt5", "Pr1_Amt6"))
)
ggplot(rank_data, aes(x = item_label, y = Subj_rank)) + # reverse coded so 6 = top rank
geom_violin(trim = FALSE, fill = "lightblue", color = "darkblue", alpha = 0.5) +
geom_jitter(width = 0.15, height = 0, alpha = 0.6, size = 1.5, color = "black") +
stat_summary(fun = mean, geom = "point", shape = 21, size = 3, fill = "red", color = "red") +
labs(
title = "Distribution of pref1 Ranks by Item",
x = "Item",
y = "Rank"
) +
scale_y_continuous(breaks = 1:6, limits = c(1, 6)) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
What’s the distribution of final ranks?
# Step 1: Define the mapping
id_to_label <- c(
"49" = "Pr6_Amt1",
"50" = "Pr5_Amt2",
"64" = "Pr4_Amt3",
"65" = "Pr3_Amt4",
"67" = "Pr2_Amt5",
"68" = "Pr1_Amt6"
)
# Step 2: Select relevant columns and count number of 1s
rank_counts1 <- dat %>%
select(starts_with("rank_pref1_")) %>%
summarise(across(everything(), ~ sum(. == 1, na.rm = TRUE))) %>%
pivot_longer(cols = everything(), names_to = "column", values_to = "n_order_1") %>%
mutate(
id = sub("rank_pref1_", "", column),
item.f = id_to_label[id]
) %>%
filter(!is.na(item.f)) # Only keep mapped items
# Step 3: Plot
ggplot(rank_counts1, aes(x = item.f, y = n_order_1, fill = item.f)) +
geom_col() +
scale_fill_manual(values = bet_colors) +
labs(
title = "Count of Participants ranking the\nItems on first Position (Preference Task 1)",
x = "Item",
y = "Count (Rank = 1)"
) +
theme_minimal() +
theme(
axis.text.x = element_text(angle = 45, hjust = 1),
plot.title = element_text(face = "bold", hjust = 0.5),
legend.position = "none"
)
How many people touched which lottery first?
order1_counts <- touch_order_analysis.long_pref1 %>%
filter(order == 1) %>% # Keep only rows where order == 1
count(item.f, name = "n_order_1") # Count how many times each item.f appears
# Plot
ggplot(order1_counts, aes(x = item.f, y = n_order_1, fill = item.f)) +
geom_col() +
scale_fill_manual(values = bet_colors) +
labs(
title = "Count of Participants touching\neach Item first (Preference Task 1)",
x = "Item",
y = "Count (Order = 1)"
) +
theme_minimal() +
theme(
axis.text.x = element_text(angle = 45, hjust = 1),
plot.title = element_text(face = "bold", hjust = 0.5),
legend.position = "none"
)
How do the ranking Profiles for these people look like?
# Only consider participants in the same "category", i.e., who arrived at the same final ranking. Let's look at the ones who ranked the highest amount/probability highest
# Only consider participants who ranked the highest Amount first
selected_ids_Pr1_Amt6 <- dat %>%
filter(rank_pref1_68 == 1) %>%
pull(ResponseId)
# Only consider participants who ranked the highest Probability first
selected_ids_Pr6_Amt1 <- dat %>%
filter(rank_pref1_49 == 1) %>%
pull(ResponseId)
# Remaining IDs
excluded_ids <- union(selected_ids_Pr1_Amt6, selected_ids_Pr6_Amt1)
remaining_ids <- dat %>%
filter(!ResponseId %in% excluded_ids) %>%
pull(ResponseId)
# Create Distance_Prefer2
Distance_Prefer1<-RankProcess_Prefer1 %>%
group_by(ResponseId)%>%
mutate(
# Split the string into parts based on commas
parts = str_split(order, ",")
) %>%
mutate(
Rank1 = sapply(parts, function(x) x[1]), # Extract before 1st comma
Rank2 = sapply(parts, function(x) x[2]), # Extract before 2nd comma
Rank3 = sapply(parts, function(x) x[3]), # Extract before 3rd comma
Rank4 = sapply(parts, function(x) x[4]), # Extract before 4th comma
Rank5 = sapply(parts, function(x) x[5]), # Extract before 5th comma
Rank6 = sapply(parts, function(x) ifelse(length(x) > 5, x[6], NA)) # Extract after 5th comma
) %>%
select(-parts)
items_Prefer1 <- c("49", "50", "64", "65", "67", "68")
for (item in items_Prefer1) {
Distance_Prefer1[[paste0("current_", item)]] <- NA_integer_
}
Distance_Prefer1 <- Distance_Prefer1 %>%
rowwise() %>%
mutate(
across(
starts_with("current_"),
~ {
item_number <- str_remove(cur_column(), "current_") # Extract the item number
case_when(
Rank1 == item_number ~ 1,
Rank2 == item_number ~ 2,
Rank3 == item_number ~ 3,
Rank4 == item_number ~ 4,
Rank5 == item_number ~ 5,
Rank6 == item_number ~ 6,
TRUE ~ 1
)
}
)
) %>%
ungroup()
for (item in items_Prefer1) {
Distance_Prefer1[[paste0("last_", item)]] <- lag(Distance_Prefer1[[paste0("current_", item)]])
}
Distance_Prefer1<-Distance_Prefer1%>%
group_by(ResponseId)%>%
rowwise() %>%
mutate(
current_item_moved = get(paste0("current_", item_moved)), # Get the rank of the moved item from current columns
last_item_moved = get(paste0("last_", item_moved)), # Get the rank of the moved item from last columns
# Determine the movement direction; we should not see any "no_change"
move_direction = case_when(
is.na(last_item_moved) ~ "no_change",
current_item_moved < last_item_moved ~ "up",
current_item_moved > last_item_moved ~ "down",
TRUE ~ "no_change"
)
) %>%
ungroup()
# Filter Distance_Prefer2 for those ResponseIds
Ranking_per_Step_Pr1_Amt6 <- Distance_Prefer1 %>%
filter(ResponseId %in% selected_ids_Pr1_Amt6)
Ranking_per_Step_Pr6_Amt1 <- Distance_Prefer1 %>%
filter(ResponseId %in% selected_ids_Pr6_Amt1)
Ranking_per_Step_remaining <- Distance_Prefer1 %>%
filter(ResponseId %in% remaining_ids)
## Create new R columns
# Initialize R columns
Ranking_per_Step_Pr1_Amt6 <- Ranking_per_Step_Pr1_Amt6 %>%
mutate(Pr1_Amt6 = NA, Pr2_Amt5 = NA, Pr3_Amt4 = NA, Pr4_Amt3 = NA, Pr5_Amt2 = NA, Pr6_Amt1 = NA)
Ranking_per_Step_Pr6_Amt1 <- Ranking_per_Step_Pr6_Amt1 %>%
mutate(Pr1_Amt6 = NA, Pr2_Amt5 = NA, Pr3_Amt4 = NA, Pr4_Amt3 = NA, Pr5_Amt2 = NA, Pr6_Amt1 = NA)
Ranking_per_Step_remaining <- Ranking_per_Step_remaining %>%
mutate(Pr1_Amt6 = NA, Pr2_Amt5 = NA, Pr3_Amt4 = NA, Pr4_Amt3 = NA, Pr5_Amt2 = NA, Pr6_Amt1 = NA)
# Function to fill R columns
fill_R_columns <- function(order_string, target_value) {
order_string <- gsub("[[:space:][:cntrl:]]", "", order_string) # Clean spaces and newlines
order_numbers <- strsplit(order_string, ",")[[1]]
position_of_value <- which(order_numbers == as.character(target_value))
if (length(position_of_value) == 0) {
return(NA)
} else if (position_of_value == 1) {
return(1)
} else {
return(position_of_value)
}
}
# Apply function to fill R columns
Ranking_per_Step_Pr1_Amt6 <- Ranking_per_Step_Pr1_Amt6 %>%
mutate(
Pr6_Amt1 = sapply(order, fill_R_columns, target_value = 49),
Pr5_Amt2 = sapply(order, fill_R_columns, target_value = 50),
Pr4_Amt3 = sapply(order, fill_R_columns, target_value = 64),
Pr3_Amt4 = sapply(order, fill_R_columns, target_value = 65),
Pr2_Amt5 = sapply(order, fill_R_columns, target_value = 67),
Pr1_Amt6 = sapply(order, fill_R_columns, target_value = 68),
)
Ranking_per_Step_Pr6_Amt1 <- Ranking_per_Step_Pr6_Amt1 %>%
mutate(
Pr6_Amt1 = sapply(order, fill_R_columns, target_value = 49),
Pr5_Amt2 = sapply(order, fill_R_columns, target_value = 50),
Pr4_Amt3 = sapply(order, fill_R_columns, target_value = 64),
Pr3_Amt4 = sapply(order, fill_R_columns, target_value = 65),
Pr2_Amt5 = sapply(order, fill_R_columns, target_value = 67),
Pr1_Amt6 = sapply(order, fill_R_columns, target_value = 68),
)
Ranking_per_Step_remaining <- Ranking_per_Step_remaining %>%
mutate(
Pr6_Amt1 = sapply(order, fill_R_columns, target_value = 49),
Pr5_Amt2 = sapply(order, fill_R_columns, target_value = 50),
Pr4_Amt3 = sapply(order, fill_R_columns, target_value = 64),
Pr3_Amt4 = sapply(order, fill_R_columns, target_value = 65),
Pr2_Amt5 = sapply(order, fill_R_columns, target_value = 67),
Pr1_Amt6 = sapply(order, fill_R_columns, target_value = 68),
)
# Ensure at least 9 rows per ResponseId
Ranking_per_Step_Pr1_Amt6 <- Ranking_per_Step_Pr1_Amt6 %>% group_by(ResponseId) %>%
group_modify(~ {
while (nrow(.x) < 10) {
max_step_row <- .x %>% filter(step == max(step))
.x <- bind_rows(.x, max_step_row %>% mutate(step = max(step) + 1))
}
return(.x)
}) %>% ungroup()
Ranking_per_Step_Pr6_Amt1 <- Ranking_per_Step_Pr6_Amt1 %>% group_by(ResponseId) %>%
group_modify(~ {
while (nrow(.x) < 10) {
max_step_row <- .x %>% filter(step == max(step))
.x <- bind_rows(.x, max_step_row %>% mutate(step = max(step) + 1))
}
return(.x)
}) %>% ungroup()
Ranking_per_Step_remaining <- Ranking_per_Step_remaining %>% group_by(ResponseId) %>%
group_modify(~ {
while (nrow(.x) < 8) {
max_step_row <- .x %>% filter(step == max(step))
.x <- bind_rows(.x, max_step_row %>% mutate(step = max(step) + 1))
}
return(.x)
}) %>% ungroup()
## Summary DFs
summary_ranks_step_Pr1_Amt6 <- Ranking_per_Step_Pr1_Amt6 %>%
filter(step < 10) %>%
group_by(step) %>%
dplyr::summarise(
mean_Pr6_Amt1 = mean(Pr6_Amt1, na.rm = TRUE),
se_Pr6_Amt1 = sd(Pr6_Amt1, na.rm = TRUE) / sqrt(n()),
mean_Pr5_Amt2 = mean(Pr5_Amt2, na.rm = TRUE),
se_Pr5_Amt2 = sd(Pr5_Amt2, na.rm = TRUE) / sqrt(n()),
mean_Pr4_Amt3 = mean(Pr4_Amt3, na.rm = TRUE),
se_Pr4_Amt3 = sd(Pr4_Amt3, na.rm = TRUE) / sqrt(n()),
mean_Pr3_Amt4 = mean(Pr3_Amt4, na.rm = TRUE),
se_Pr3_Amt4 = sd(Pr3_Amt4, na.rm = TRUE) / sqrt(n()),
mean_Pr2_Amt5 = mean(Pr2_Amt5, na.rm = TRUE),
se_Pr2_Amt5 = sd(Pr2_Amt5, na.rm = TRUE) / sqrt(n()),
mean_Pr1_Amt6 = mean(Pr1_Amt6, na.rm = TRUE),
se_Pr1_Amt6 = sd(Pr1_Amt6, na.rm = TRUE) / sqrt(n()),
.groups = "drop"
)
summary_ranks_step_Pr6_Amt1 <- Ranking_per_Step_Pr6_Amt1 %>%
filter(step < 10) %>%
group_by(step) %>%
dplyr::summarise(
mean_Pr6_Amt1 = mean(Pr6_Amt1, na.rm = TRUE),
se_Pr6_Amt1 = sd(Pr6_Amt1, na.rm = TRUE) / sqrt(n()),
mean_Pr5_Amt2 = mean(Pr5_Amt2, na.rm = TRUE),
se_Pr5_Amt2 = sd(Pr5_Amt2, na.rm = TRUE) / sqrt(n()),
mean_Pr4_Amt3 = mean(Pr4_Amt3, na.rm = TRUE),
se_Pr4_Amt3 = sd(Pr4_Amt3, na.rm = TRUE) / sqrt(n()),
mean_Pr3_Amt4 = mean(Pr3_Amt4, na.rm = TRUE),
se_Pr3_Amt4 = sd(Pr3_Amt4, na.rm = TRUE) / sqrt(n()),
mean_Pr2_Amt5 = mean(Pr2_Amt5, na.rm = TRUE),
se_Pr2_Amt5 = sd(Pr2_Amt5, na.rm = TRUE) / sqrt(n()),
mean_Pr1_Amt6 = mean(Pr1_Amt6, na.rm = TRUE),
se_Pr1_Amt6 = sd(Pr1_Amt6, na.rm = TRUE) / sqrt(n()),
.groups = "drop"
)
summary_ranks_step_remaining <- Ranking_per_Step_remaining %>%
filter(step < 8) %>%
group_by(step) %>%
dplyr::summarise(
mean_Pr6_Amt1 = mean(Pr6_Amt1, na.rm = TRUE),
se_Pr6_Amt1 = sd(Pr6_Amt1, na.rm = TRUE) / sqrt(n()),
mean_Pr5_Amt2 = mean(Pr5_Amt2, na.rm = TRUE),
se_Pr5_Amt2 = sd(Pr5_Amt2, na.rm = TRUE) / sqrt(n()),
mean_Pr4_Amt3 = mean(Pr4_Amt3, na.rm = TRUE),
se_Pr4_Amt3 = sd(Pr4_Amt3, na.rm = TRUE) / sqrt(n()),
mean_Pr3_Amt4 = mean(Pr3_Amt4, na.rm = TRUE),
se_Pr3_Amt4 = sd(Pr3_Amt4, na.rm = TRUE) / sqrt(n()),
mean_Pr2_Amt5 = mean(Pr2_Amt5, na.rm = TRUE),
se_Pr2_Amt5 = sd(Pr2_Amt5, na.rm = TRUE) / sqrt(n()),
mean_Pr1_Amt6 = mean(Pr1_Amt6, na.rm = TRUE),
se_Pr1_Amt6 = sd(Pr1_Amt6, na.rm = TRUE) / sqrt(n()),
.groups = "drop"
)
## Long DFs
summary_ranks_step_Pr1_Amt6_long <- summary_ranks_step_Pr1_Amt6 %>%
pivot_longer(
cols = -step,
names_to = c(".value", "label"),
names_pattern = "^(mean|se)_(.*)$"
) %>%
rename(
mean_rank = mean,
mean_se = se
)
summary_ranks_step_Pr6_Amt1_long <- summary_ranks_step_Pr6_Amt1 %>%
pivot_longer(
cols = -step,
names_to = c(".value", "label"),
names_pattern = "^(mean|se)_(.*)$"
) %>%
rename(
mean_rank = mean,
mean_se = se
)
summary_ranks_step_remaining_long <- summary_ranks_step_remaining %>%
pivot_longer(
cols = -step,
names_to = c(".value", "label"),
names_pattern = "^(mean|se)_(.*)$"
) %>%
rename(
mean_rank = mean,
mean_se = se
)
# Shapes
bet_shapes <- c("Pr6_Amt1" = 21, "Pr5_Amt2" = 22, "Pr4_Amt3" = 23, "Pr3_Amt4" = 24, "Pr2_Amt5" = 25, "Pr1_Amt6" = 11)
# Create plot
S_Bet_Profile1 <- ggplot(summary_ranks_step_Pr1_Amt6_long,
aes(x = step, y = mean_rank)) +
geom_line(aes(color = label), size = 1) +
geom_point(aes(fill = label, shape = label), size = 3) +
geom_errorbar(aes(ymin = mean_rank - mean_se, ymax = mean_rank + mean_se),
width = 0.2, color = "black") +
scale_color_manual(values = bet_colors) +
scale_fill_manual(values = bet_colors) +
scale_shape_manual(values = bet_shapes) +
scale_x_continuous(breaks = 0:9) +
scale_y_continuous(limits = c(1, 6), breaks = 1:6) +
labs(x = "Ranking Step", y = "Mean Rank",
title = "Mean of Ranks by Step - $-Bet Profile\n(Preference Task 1)",
color = "Label", shape = "Label", fill = "Label") +
theme_minimal() +
theme(legend.position = "bottom",
plot.title = element_text(hjust = 0.5))
P_Bet_Profile1 <- ggplot(summary_ranks_step_Pr6_Amt1_long,
aes(x = step, y = mean_rank)) +
geom_line(aes(color = label), size = 1) +
geom_point(aes(fill = label, shape = label), size = 3) +
geom_errorbar(aes(ymin = mean_rank - mean_se, ymax = mean_rank + mean_se),
width = 0.2, color = "black") +
scale_color_manual(values = bet_colors) +
scale_fill_manual(values = bet_colors) +
scale_shape_manual(values = bet_shapes) +
scale_x_continuous(breaks = 0:9) +
scale_y_continuous(limits = c(1, 6), breaks = 1:6) +
labs(x = "Ranking Step", y = "Mean Rank",
title = "Mean of Ranks by Step - P-Bet Profile\n(Preference Task 1)",
color = "Label", shape = "Label", fill = "Label") +
theme_minimal() +
theme(legend.position = "bottom",
plot.title = element_text(hjust = 0.5))
remaining_Profile1 <- ggplot(summary_ranks_step_remaining_long,
aes(x = step, y = mean_rank)) +
geom_line(aes(color = label), size = 1) +
geom_point(aes(fill = label, shape = label), size = 3) +
geom_errorbar(aes(ymin = mean_rank - mean_se, ymax = mean_rank + mean_se),
width = 0.2, color = "black") +
scale_color_manual(values = bet_colors) +
scale_fill_manual(values = bet_colors) +
scale_shape_manual(values = bet_shapes) +
scale_x_continuous(breaks = 0:9) +
scale_y_continuous(limits = c(1, 6), breaks = 1:6) +
labs(x = "Ranking Step", y = "Mean Rank",
title = "Mean of Ranks by Step - Unclear Profile\n(Preference Task 1)",
color = "Label", shape = "Label", fill = "Label") +
theme_minimal() +
theme(legend.position = "bottom",
plot.title = element_text(hjust = 0.5))
# Suppress individual legends
S_Bet_Profile1 <- S_Bet_Profile1 + theme(legend.position = "bottom")
P_Bet_Profile1 <- P_Bet_Profile1 + theme(legend.position = "bottom")
# Combine the plots with shared legend
combined_plot1 <- S_Bet_Profile1 + P_Bet_Profile1 +
plot_layout(guides = "collect") &
theme(legend.position = "bottom")
# Display combined plot
combined_plot1
remaining_Profile1
The following set of histograms illustrates the number of times each item is touched.
drag_drop_counts_Prefer1 <- drag_and_drop_count_Prefer1_long %>%
count(item.f,N) %>%
group_by(item.f)%>%
mutate(percentage = n / sum(n) * 100,
condition="Amount")%>%
ungroup()
ggplot(drag_drop_counts_Prefer1, aes(x = factor(N), y = n)) +
geom_bar(
stat = "identity",
color = "black"
) +
geom_text(
aes(
label = paste0(n, " (", round(percentage, 1), "%)")
),
vjust = -0.5,
size = 5,
fontface="bold") +
labs(
title = "Drag Count by item and Quiz Condition",
x = "Drag Count",
y = "Frequency"
) +
theme_minimal() +
theme(
strip.text = element_text(size = 12, face = "bold"), # Increased size and bold text
plot.title = element_text(hjust = 0.5),
axis.title = element_text(size = 12), # Adjust axis titles size if needed
axis.text = element_text(size = 10) # Adjust axis labels size if needed
) +
facet_wrap(~ item.f,ncol=2) +
ylim(0, 150)
Note on attribute rank coding: Across Amount and Prob Tasks, greater value indicates higher rank (i.e., 1=item in the bottom and 6=item at the top.)
summary_data_Prefer1 <- drag_and_drop_count_Prefer1_long%>%
dplyr::group_by(condition, item.f) %>%
dplyr::summarize(drag_mean = mean(N_ind, na.rm = TRUE),
drag_sd = sd(N_ind, na.rm = TRUE),
n = n(),
se = drag_sd / sqrt(n), # Standard error
.groups = "drop",
Avg.Amount=mean(Amt),
Avg.Prob=mean(Prob))
ggplot(summary_data_Prefer1, aes(x = Avg.Amount, y = drag_mean, label = item.f)) +
geom_point(size = 3, color = "black") +
geom_text(vjust = -1, hjust = 1) +
theme_minimal() +
labs(title = "Drag Count and Amount Attribute", subtitle = "1ST Preference Task", x = "Avg. Amt", y = "Avg. Drag Count Indicator") +
theme(axis.title = element_text(face = "bold"),
plot.subtitle = element_text(hjust = 0.5),
plot.title = element_text(face = "bold", hjust = 0.5))+
geom_smooth(method = "lm", se = FALSE, color = "blue", linetype = "dashed")
ggplot(summary_data_Prefer1, aes(x = Avg.Prob, y = drag_mean, label = item.f)) +
geom_point(size = 3, color = "black") +
geom_text(vjust = -1, hjust = 1) +
theme_minimal() +
labs(title = "Drag Count and Prob Attribute", subtitle = "1ST Preference Task", x = "Avg. Prob", y = "Avg. Drag Count Indicator") +
theme(axis.title = element_text(face = "bold"),
plot.subtitle = element_text(hjust = 0.5),
plot.title = element_text(face = "bold", hjust = 0.5))+
geom_smooth(method = "lm", se = FALSE, color = "blue", linetype = "dashed")
touch_order_pref1 <- touch_order_analysis.long_pref1 %>%
count(item.f,order,condition) %>%
group_by(item.f)%>%
mutate(percentage = n / sum(n) * 100)%>%
ungroup()
ggplot(touch_order_pref1, aes(x = factor(order), y = n)) +
geom_bar(
stat = "identity",
color = "black"
) +
geom_text(
aes(
label = paste0(n, " (", round(percentage, 1), "%)")
),
vjust = -0.5,
size = 5,
fontface="bold"
) +
labs(
title = "Drag Order by item and Condition",
x = "Drag Order",
y = "Frequency"
) +
theme_minimal() +
theme(
strip.text = element_text(size = 12, face = "bold"), # Facet label adjustments
plot.title = element_text(hjust = 0.5, face = "bold"),
axis.title = element_text(size = 12),
axis.text = element_text(size = 10)
) +
facet_wrap(~ item.f ,ncol = 2) +
ylim(0, 100)
summary_data_pref1 <- touch_order_analysis.long_pref1%>%
dplyr::group_by(condition, item.f) %>%
dplyr::summarize(order_mean = mean(order, na.rm = TRUE),
order_sd = sd(order, na.rm = TRUE),
n = n(),
se = order_sd / sqrt(n), # Standard error
.groups = "drop",
Avg.Amount=mean(Amt),
Avg.Prob=mean(Prob))
ggplot(summary_data_pref1, aes(x = Avg.Amount, y = order_mean, label = item.f)) +
geom_point(size = 3, color = "black") +
geom_text(vjust = -1, hjust = 1) +
theme_minimal() +
labs(title = "Drag Order and Amount Attribute", subtitle = "1ST Preference Task", x = "Avg. Amt", y = "Avg. Drag Order") +
theme(axis.title = element_text(face = "bold"),
plot.subtitle = element_text(hjust = 0.5),
plot.title = element_text(face = "bold", hjust = 0.5))+
geom_smooth(method = "lm", se = FALSE, color = "blue", linetype = "dashed")
ggplot(summary_data_pref1, aes(x = Avg.Prob, y = order_mean, label = item.f)) +
geom_point(size = 3, color = "black") +
geom_text(vjust = -1, hjust = 1) +
theme_minimal() +
labs(title = "Drag Order and Prob Attribute", subtitle = "1ST Preference Task", x = "Avg. Prob", y = "Avg. Drag Order") +
theme(axis.title = element_text(face = "bold"),
plot.subtitle = element_text(hjust = 0.5),
plot.title = element_text(face = "bold", hjust = 0.5))+
geom_smooth(method = "lm", se = FALSE, color = "blue", linetype = "dashed")
summary_stats <- Distance_pref1.cleanup.df %>%
group_by(item.f) %>%
dplyr::summarize(
mean_distance = mean(distance, na.rm = TRUE),
median_distance = median(distance, na.rm = TRUE)
)
Distance_pref1.cleanup.df$item.f<- factor(Distance_pref2.cleanup.df$item.f, levels = rev(c( "Pr6_Amt1","Pr5_Amt2", "Pr4_Amt3","Pr3_Amt4", "Pr2_Amt5","Pr1_Amt6")), ordered = TRUE)
ggplot(Distance_pref1.cleanup.df ,
aes(x = -distance, fill = item.f)) +
geom_histogram(binwidth = 1, alpha = 0.3, position = "identity") +
labs(
title = "Distribution of Drag Distance - pref1 Task",
x = "Distance",
y = "Count",
fill = "item"
) +
theme_minimal()+
facet_grid(~item.f)+
xlim(6,-6)+
scale_fill_manual(values = bet_colors)
rank_data2 <- dat %>%
select(
`49` = rank_pref2_49,
`50` = rank_pref2_50,
`64` = rank_pref2_64,
`65` = rank_pref2_65,
`67` = rank_pref2_67,
`68` = rank_pref2_68
) %>%
pivot_longer(
everything(),
names_to = "item_moved",
values_to = "Subj_rank"
) %>%
mutate(
item_moved = as.integer(item_moved),
item_label = case_when(
item_moved == 49 ~ "Pr6_Amt1",
item_moved == 50 ~ "Pr5_Amt2",
item_moved == 64 ~ "Pr4_Amt3",
item_moved == 65 ~ "Pr3_Amt4",
item_moved == 67 ~ "Pr2_Amt5",
item_moved == 68 ~ "Pr1_Amt6"
),
item_label = factor(item_label, levels = c("Pr6_Amt1", "Pr5_Amt2", "Pr4_Amt3", "Pr3_Amt4", "Pr2_Amt5", "Pr1_Amt6"))
)
ggplot(rank_data2, aes(x = item_label, y = Subj_rank)) + # reverse coded so 6 = top rank
geom_violin(trim = FALSE, fill = "lightblue", color = "darkblue", alpha = 0.5) +
geom_jitter(width = 0.15, height = 0, alpha = 0.6, size = 1.5, color = "black") +
stat_summary(fun = mean, geom = "point", shape = 21, size = 3, fill = "red", color = "red") +
labs(
title = "Distribution of pref2 Ranks by Item",
x = "Item",
y = "Rank"
) +
scale_y_continuous(breaks = 1:6, limits = c(1, 6)) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
What’s the distribution of final ranks?
# Step 1: Define the mapping
id_to_label <- c(
"49" = "Pr6_Amt1",
"50" = "Pr5_Amt2",
"64" = "Pr4_Amt3",
"65" = "Pr3_Amt4",
"67" = "Pr2_Amt5",
"68" = "Pr1_Amt6"
)
# Step 2: Select relevant columns and count number of 1s
rank_counts <- dat %>%
select(starts_with("rank_pref2_")) %>%
summarise(across(everything(), ~ sum(. == 1, na.rm = TRUE))) %>%
pivot_longer(cols = everything(), names_to = "column", values_to = "n_order_1") %>%
mutate(
id = sub("rank_pref2_", "", column),
item.f = id_to_label[id]
) %>%
filter(!is.na(item.f)) # Only keep mapped items
# Step 3: Plot
ggplot(rank_counts, aes(x = item.f, y = n_order_1, fill = item.f)) +
geom_col() +
scale_fill_manual(values = bet_colors) +
labs(
title = "Count of Participants ranking the\nItems on first Position",
x = "Item",
y = "Count (Rank = 1)"
) +
theme_minimal() +
theme(
axis.text.x = element_text(angle = 45, hjust = 1),
plot.title = element_text(face = "bold", hjust = 0.5),
legend.position = "none"
)
How many people touched which lottery first?
order1_counts <- touch_order_analysis.long_pref2 %>%
filter(order == 1) %>% # Keep only rows where order == 1
count(item.f, name = "n_order_1") # Count how many times each item.f appears
# Plot
ggplot(order1_counts, aes(x = item.f, y = n_order_1, fill = item.f)) +
geom_col() +
scale_fill_manual(values = bet_colors) +
labs(
title = "Count of Participants touching\neach Item first",
x = "Item",
y = "Count (Order = 1)"
) +
theme_minimal() +
theme(
axis.text.x = element_text(angle = 45, hjust = 1),
plot.title = element_text(face = "bold", hjust = 0.5),
legend.position = "none"
)
How do the ranking Profiles for these people look like?
# Only consider participants in the same "category", i.e., who arrived at the same final ranking. Let's look at the ones who ranked the highest amount/probability highest
# Only consider participants who ranked the highest Amount first
selected_ids_Pr1_Amt6 <- dat %>%
filter(rank_pref2_68 == 1) %>%
pull(ResponseId)
# Only consider participants who ranked the highest Probability first
selected_ids_Pr6_Amt1 <- dat %>%
filter(rank_pref2_49 == 1) %>%
pull(ResponseId)
# Remaining IDs
excluded_ids <- union(selected_ids_Pr1_Amt6, selected_ids_Pr6_Amt1)
remaining_ids <- dat %>%
filter(!ResponseId %in% excluded_ids) %>%
pull(ResponseId)
# Create Distance_Prefer2
Distance_Prefer2<-RankProcess_Prefer2 %>%
group_by(ResponseId)%>%
mutate(
# Split the string into parts based on commas
parts = str_split(order, ",")
) %>%
mutate(
Rank1 = sapply(parts, function(x) x[1]), # Extract before 1st comma
Rank2 = sapply(parts, function(x) x[2]), # Extract before 2nd comma
Rank3 = sapply(parts, function(x) x[3]), # Extract before 3rd comma
Rank4 = sapply(parts, function(x) x[4]), # Extract before 4th comma
Rank5 = sapply(parts, function(x) x[5]), # Extract before 5th comma
Rank6 = sapply(parts, function(x) ifelse(length(x) > 5, x[6], NA)) # Extract after 5th comma
) %>%
select(-parts)
items_Prefer2 <- c("49", "50", "64", "65", "67", "68")
for (item in items_Prefer2) {
Distance_Prefer2[[paste0("current_", item)]] <- NA_integer_
}
Distance_Prefer2 <- Distance_Prefer2 %>%
rowwise() %>%
mutate(
across(
starts_with("current_"),
~ {
item_number <- str_remove(cur_column(), "current_") # Extract the item number
case_when(
Rank1 == item_number ~ 1,
Rank2 == item_number ~ 2,
Rank3 == item_number ~ 3,
Rank4 == item_number ~ 4,
Rank5 == item_number ~ 5,
Rank6 == item_number ~ 6,
TRUE ~ 1
)
}
)
) %>%
ungroup()
for (item in items_Prefer2) {
Distance_Prefer2[[paste0("last_", item)]] <- lag(Distance_Prefer2[[paste0("current_", item)]])
}
Distance_Prefer2<-Distance_Prefer2%>%
group_by(ResponseId)%>%
rowwise() %>%
mutate(
current_item_moved = get(paste0("current_", item_moved)), # Get the rank of the moved item from current columns
last_item_moved = get(paste0("last_", item_moved)), # Get the rank of the moved item from last columns
# Determine the movement direction; we should not see any "no_change"
move_direction = case_when(
is.na(last_item_moved) ~ "no_change",
current_item_moved < last_item_moved ~ "up",
current_item_moved > last_item_moved ~ "down",
TRUE ~ "no_change"
)
) %>%
ungroup()
# Filter Distance_Prefer2 for those ResponseIds
Ranking_per_Step_Pr1_Amt6 <- Distance_Prefer2 %>%
filter(ResponseId %in% selected_ids_Pr1_Amt6)
Ranking_per_Step_Pr6_Amt1 <- Distance_Prefer2 %>%
filter(ResponseId %in% selected_ids_Pr6_Amt1)
Ranking_per_Step_remaining <- Distance_Prefer2 %>%
filter(ResponseId %in% remaining_ids)
## Create new R columns
# Initialize R columns
Ranking_per_Step_Pr1_Amt6 <- Ranking_per_Step_Pr1_Amt6 %>%
mutate(Pr1_Amt6 = NA, Pr2_Amt5 = NA, Pr3_Amt4 = NA, Pr4_Amt3 = NA, Pr5_Amt2 = NA, Pr6_Amt1 = NA)
Ranking_per_Step_Pr6_Amt1 <- Ranking_per_Step_Pr6_Amt1 %>%
mutate(Pr1_Amt6 = NA, Pr2_Amt5 = NA, Pr3_Amt4 = NA, Pr4_Amt3 = NA, Pr5_Amt2 = NA, Pr6_Amt1 = NA)
Ranking_per_Step_remaining <- Ranking_per_Step_remaining %>%
mutate(Pr1_Amt6 = NA, Pr2_Amt5 = NA, Pr3_Amt4 = NA, Pr4_Amt3 = NA, Pr5_Amt2 = NA, Pr6_Amt1 = NA)
# Function to fill R columns
fill_R_columns <- function(order_string, target_value) {
order_string <- gsub("[[:space:][:cntrl:]]", "", order_string) # Clean spaces and newlines
order_numbers <- strsplit(order_string, ",")[[1]]
position_of_value <- which(order_numbers == as.character(target_value))
if (length(position_of_value) == 0) {
return(NA)
} else if (position_of_value == 1) {
return(1)
} else {
return(position_of_value)
}
}
# Apply function to fill R columns
Ranking_per_Step_Pr1_Amt6 <- Ranking_per_Step_Pr1_Amt6 %>%
mutate(
Pr6_Amt1 = sapply(order, fill_R_columns, target_value = 49),
Pr5_Amt2 = sapply(order, fill_R_columns, target_value = 50),
Pr4_Amt3 = sapply(order, fill_R_columns, target_value = 64),
Pr3_Amt4 = sapply(order, fill_R_columns, target_value = 65),
Pr2_Amt5 = sapply(order, fill_R_columns, target_value = 67),
Pr1_Amt6 = sapply(order, fill_R_columns, target_value = 68),
)
Ranking_per_Step_Pr6_Amt1 <- Ranking_per_Step_Pr6_Amt1 %>%
mutate(
Pr6_Amt1 = sapply(order, fill_R_columns, target_value = 49),
Pr5_Amt2 = sapply(order, fill_R_columns, target_value = 50),
Pr4_Amt3 = sapply(order, fill_R_columns, target_value = 64),
Pr3_Amt4 = sapply(order, fill_R_columns, target_value = 65),
Pr2_Amt5 = sapply(order, fill_R_columns, target_value = 67),
Pr1_Amt6 = sapply(order, fill_R_columns, target_value = 68),
)
Ranking_per_Step_remaining <- Ranking_per_Step_remaining %>%
mutate(
Pr6_Amt1 = sapply(order, fill_R_columns, target_value = 49),
Pr5_Amt2 = sapply(order, fill_R_columns, target_value = 50),
Pr4_Amt3 = sapply(order, fill_R_columns, target_value = 64),
Pr3_Amt4 = sapply(order, fill_R_columns, target_value = 65),
Pr2_Amt5 = sapply(order, fill_R_columns, target_value = 67),
Pr1_Amt6 = sapply(order, fill_R_columns, target_value = 68),
)
# Ensure at least 9 rows per ResponseId
Ranking_per_Step_Pr1_Amt6 <- Ranking_per_Step_Pr1_Amt6 %>% group_by(ResponseId) %>%
group_modify(~ {
while (nrow(.x) < 10) {
max_step_row <- .x %>% filter(step == max(step))
.x <- bind_rows(.x, max_step_row %>% mutate(step = max(step) + 1))
}
return(.x)
}) %>% ungroup()
Ranking_per_Step_Pr6_Amt1 <- Ranking_per_Step_Pr6_Amt1 %>% group_by(ResponseId) %>%
group_modify(~ {
while (nrow(.x) < 10) {
max_step_row <- .x %>% filter(step == max(step))
.x <- bind_rows(.x, max_step_row %>% mutate(step = max(step) + 1))
}
return(.x)
}) %>% ungroup()
Ranking_per_Step_remaining <- Ranking_per_Step_remaining %>% group_by(ResponseId) %>%
group_modify(~ {
while (nrow(.x) < 8) {
max_step_row <- .x %>% filter(step == max(step))
.x <- bind_rows(.x, max_step_row %>% mutate(step = max(step) + 1))
}
return(.x)
}) %>% ungroup()
## Summary DFs
summary_ranks_step_Pr1_Amt6 <- Ranking_per_Step_Pr1_Amt6 %>%
filter(step < 10) %>%
group_by(step) %>%
dplyr::summarise(
mean_Pr6_Amt1 = mean(Pr6_Amt1, na.rm = TRUE),
se_Pr6_Amt1 = sd(Pr6_Amt1, na.rm = TRUE) / sqrt(n()),
mean_Pr5_Amt2 = mean(Pr5_Amt2, na.rm = TRUE),
se_Pr5_Amt2 = sd(Pr5_Amt2, na.rm = TRUE) / sqrt(n()),
mean_Pr4_Amt3 = mean(Pr4_Amt3, na.rm = TRUE),
se_Pr4_Amt3 = sd(Pr4_Amt3, na.rm = TRUE) / sqrt(n()),
mean_Pr3_Amt4 = mean(Pr3_Amt4, na.rm = TRUE),
se_Pr3_Amt4 = sd(Pr3_Amt4, na.rm = TRUE) / sqrt(n()),
mean_Pr2_Amt5 = mean(Pr2_Amt5, na.rm = TRUE),
se_Pr2_Amt5 = sd(Pr2_Amt5, na.rm = TRUE) / sqrt(n()),
mean_Pr1_Amt6 = mean(Pr1_Amt6, na.rm = TRUE),
se_Pr1_Amt6 = sd(Pr1_Amt6, na.rm = TRUE) / sqrt(n()),
.groups = "drop"
)
summary_ranks_step_Pr6_Amt1 <- Ranking_per_Step_Pr6_Amt1 %>%
filter(step < 10) %>%
group_by(step) %>%
dplyr::summarise(
mean_Pr6_Amt1 = mean(Pr6_Amt1, na.rm = TRUE),
se_Pr6_Amt1 = sd(Pr6_Amt1, na.rm = TRUE) / sqrt(n()),
mean_Pr5_Amt2 = mean(Pr5_Amt2, na.rm = TRUE),
se_Pr5_Amt2 = sd(Pr5_Amt2, na.rm = TRUE) / sqrt(n()),
mean_Pr4_Amt3 = mean(Pr4_Amt3, na.rm = TRUE),
se_Pr4_Amt3 = sd(Pr4_Amt3, na.rm = TRUE) / sqrt(n()),
mean_Pr3_Amt4 = mean(Pr3_Amt4, na.rm = TRUE),
se_Pr3_Amt4 = sd(Pr3_Amt4, na.rm = TRUE) / sqrt(n()),
mean_Pr2_Amt5 = mean(Pr2_Amt5, na.rm = TRUE),
se_Pr2_Amt5 = sd(Pr2_Amt5, na.rm = TRUE) / sqrt(n()),
mean_Pr1_Amt6 = mean(Pr1_Amt6, na.rm = TRUE),
se_Pr1_Amt6 = sd(Pr1_Amt6, na.rm = TRUE) / sqrt(n()),
.groups = "drop"
)
summary_ranks_step_remaining <- Ranking_per_Step_remaining %>%
filter(step < 8) %>%
group_by(step) %>%
dplyr::summarise(
mean_Pr6_Amt1 = mean(Pr6_Amt1, na.rm = TRUE),
se_Pr6_Amt1 = sd(Pr6_Amt1, na.rm = TRUE) / sqrt(n()),
mean_Pr5_Amt2 = mean(Pr5_Amt2, na.rm = TRUE),
se_Pr5_Amt2 = sd(Pr5_Amt2, na.rm = TRUE) / sqrt(n()),
mean_Pr4_Amt3 = mean(Pr4_Amt3, na.rm = TRUE),
se_Pr4_Amt3 = sd(Pr4_Amt3, na.rm = TRUE) / sqrt(n()),
mean_Pr3_Amt4 = mean(Pr3_Amt4, na.rm = TRUE),
se_Pr3_Amt4 = sd(Pr3_Amt4, na.rm = TRUE) / sqrt(n()),
mean_Pr2_Amt5 = mean(Pr2_Amt5, na.rm = TRUE),
se_Pr2_Amt5 = sd(Pr2_Amt5, na.rm = TRUE) / sqrt(n()),
mean_Pr1_Amt6 = mean(Pr1_Amt6, na.rm = TRUE),
se_Pr1_Amt6 = sd(Pr1_Amt6, na.rm = TRUE) / sqrt(n()),
.groups = "drop"
)
## Long DFs
summary_ranks_step_Pr1_Amt6_long <- summary_ranks_step_Pr1_Amt6 %>%
pivot_longer(
cols = -step,
names_to = c(".value", "label"),
names_pattern = "^(mean|se)_(.*)$"
) %>%
rename(
mean_rank = mean,
mean_se = se
)
summary_ranks_step_Pr6_Amt1_long <- summary_ranks_step_Pr6_Amt1 %>%
pivot_longer(
cols = -step,
names_to = c(".value", "label"),
names_pattern = "^(mean|se)_(.*)$"
) %>%
rename(
mean_rank = mean,
mean_se = se
)
summary_ranks_step_remaining_long <- summary_ranks_step_remaining %>%
pivot_longer(
cols = -step,
names_to = c(".value", "label"),
names_pattern = "^(mean|se)_(.*)$"
) %>%
rename(
mean_rank = mean,
mean_se = se
)
# Shapes
bet_shapes <- c("Pr6_Amt1" = 21, "Pr5_Amt2" = 22, "Pr4_Amt3" = 23, "Pr3_Amt4" = 24, "Pr2_Amt5" = 25, "Pr1_Amt6" = 11)
# Create plot
S_Bet_Profile <- ggplot(summary_ranks_step_Pr1_Amt6_long,
aes(x = step, y = mean_rank)) +
geom_line(aes(color = label), size = 1) +
geom_point(aes(fill = label, shape = label), size = 3) +
geom_errorbar(aes(ymin = mean_rank - mean_se, ymax = mean_rank + mean_se),
width = 0.2, color = "black") +
scale_color_manual(values = bet_colors) +
scale_fill_manual(values = bet_colors) +
scale_shape_manual(values = bet_shapes) +
scale_x_continuous(breaks = 0:9) +
scale_y_continuous(limits = c(1, 6), breaks = 1:6) + # 👈 Fixed y-axis
labs(x = "Ranking Step", y = "Mean Rank",
title = "Mean of Ranks by Step - $-Bet Profile",
color = "Label", shape = "Label", fill = "Label") +
theme_minimal() +
theme(legend.position = "bottom",
plot.title = element_text(hjust = 0.5))
P_Bet_Profile <- ggplot(summary_ranks_step_Pr6_Amt1_long,
aes(x = step, y = mean_rank)) +
geom_line(aes(color = label), size = 1) +
geom_point(aes(fill = label, shape = label), size = 3) +
geom_errorbar(aes(ymin = mean_rank - mean_se, ymax = mean_rank + mean_se),
width = 0.2, color = "black") +
scale_color_manual(values = bet_colors) +
scale_fill_manual(values = bet_colors) +
scale_shape_manual(values = bet_shapes) +
scale_x_continuous(breaks = 0:9) +
scale_y_continuous(limits = c(1, 6), breaks = 1:6) + # Fixed y-axis
labs(x = "Ranking Step", y = "Mean Rank",
title = "Mean of Ranks by Step - P-Bet Profile",
color = "Label", shape = "Label", fill = "Label") +
theme_minimal() +
theme(legend.position = "bottom",
plot.title = element_text(hjust = 0.5))
remaining_Profile <- ggplot(summary_ranks_step_remaining_long,
aes(x = step, y = mean_rank)) +
geom_line(aes(color = label), size = 1) +
geom_point(aes(fill = label, shape = label), size = 3) +
geom_errorbar(aes(ymin = mean_rank - mean_se, ymax = mean_rank + mean_se),
width = 0.2, color = "black") +
scale_color_manual(values = bet_colors) +
scale_fill_manual(values = bet_colors) +
scale_shape_manual(values = bet_shapes) +
scale_x_continuous(breaks = 0:9) +
scale_y_continuous(limits = c(1, 6), breaks = 1:6) + # 👈 Fixed y-axis
labs(x = "Ranking Step", y = "Mean Rank",
title = "Mean of Ranks by Step - Unclear Profile",
color = "Label", shape = "Label", fill = "Label") +
theme_minimal() +
theme(legend.position = "bottom",
plot.title = element_text(hjust = 0.5))
# Suppress individual legends
S_Bet_Profile <- S_Bet_Profile + theme(legend.position = "bottom")
P_Bet_Profile <- P_Bet_Profile + theme(legend.position = "bottom")
# Combine the plots with shared legend
combined_plot <- S_Bet_Profile + P_Bet_Profile +
plot_layout(guides = "collect") &
theme(legend.position = "bottom")
# Display combined plot
combined_plot
remaining_Profile
The following set of histograms illustrates the number of times each item is touched.
drag_drop_counts_Prefer2 <- drag_and_drop_count_Prefer2_long %>%
count(item.f,N) %>%
group_by(item.f)%>%
mutate(percentage = n / sum(n) * 100,
condition="Amount")%>%
ungroup()
ggplot(drag_drop_counts_Prefer2, aes(x = factor(N), y = n)) +
geom_bar(
stat = "identity",
color = "black"
) +
geom_text(
aes(
label = paste0(n, " (", round(percentage, 1), "%)")
),
vjust = -0.5,
size = 5,
fontface="bold") +
labs(
title = "Drag Count by item and Quiz Condition",
x = "Drag Count",
y = "Frequency"
) +
theme_minimal() +
theme(
strip.text = element_text(size = 12, face = "bold"), # Increased size and bold text
plot.title = element_text(hjust = 0.5),
axis.title = element_text(size = 12), # Adjust axis titles size if needed
axis.text = element_text(size = 10) # Adjust axis labels size if needed
) +
facet_wrap(~ item.f,ncol=2) +
ylim(0, 150)
Note on attribute rank coding: Across Amount and Prob Tasks, greater value indicates higher rank (i.e., 1=item in the bottom and 6=item at the top.)
summary_data_Prefer2 <- drag_and_drop_count_Prefer2_long%>%
dplyr::group_by(condition, item.f) %>%
dplyr::summarize(drag_mean = mean(N_ind, na.rm = TRUE),
drag_sd = sd(N_ind, na.rm = TRUE),
n = n(),
se = drag_sd / sqrt(n), # Standard error
.groups = "drop",
Avg.Amount=mean(Amt),
Avg.Prob=mean(Prob))
ggplot(summary_data_Prefer2, aes(x = Avg.Amount, y = drag_mean, label = item.f)) +
geom_point(size = 3, color = "black") +
geom_text(vjust = -1, hjust = 1) +
theme_minimal() +
labs(title = "Drag Count and Amount Attribute", subtitle = "2ND Preference Task", x = "Avg. Amt", y = "Avg. Drag Count Indicator") +
theme(axis.title = element_text(face = "bold"),
plot.subtitle = element_text(hjust = 0.5),
plot.title = element_text(face = "bold", hjust = 0.5))+
geom_smooth(method = "lm", se = FALSE, color = "blue", linetype = "dashed")
ggplot(summary_data_Prefer2, aes(x = Avg.Prob, y = drag_mean, label = item.f)) +
geom_point(size = 3, color = "black") +
geom_text(vjust = -1, hjust = 1) +
theme_minimal() +
labs(title = "Drag Count and Prob Attribute", subtitle = "2ND Preference Task", x = "Avg. Prob", y = "Avg. Drag Count Indicator") +
theme(axis.title = element_text(face = "bold"),
plot.subtitle = element_text(hjust = 0.5),
plot.title = element_text(face = "bold", hjust = 0.5))+
geom_smooth(method = "lm", se = FALSE, color = "blue", linetype = "dashed")
touch_order_pref2 <- touch_order_analysis.long_pref2 %>%
count(item.f,order,condition) %>%
group_by(item.f)%>%
mutate(percentage = n / sum(n) * 100)%>%
ungroup()
ggplot(touch_order_pref2, aes(x = factor(order), y = n)) +
geom_bar(
stat = "identity",
color = "black"
) +
geom_text(
aes(
label = paste0(n, " (", round(percentage, 1), "%)")
),
vjust = -0.5,
size = 5,
fontface="bold"
) +
labs(
title = "Drag Order by item and Condition",
x = "Drag Order",
y = "Frequency"
) +
theme_minimal() +
theme(
strip.text = element_text(size = 12, face = "bold"), # Facet label adjustments
plot.title = element_text(hjust = 0.5, face = "bold"),
axis.title = element_text(size = 12),
axis.text = element_text(size = 10)
) +
facet_wrap(~ item.f ,ncol = 2) +
ylim(0, 100)
summary_data_pref2 <- touch_order_analysis.long_pref2%>%
dplyr::group_by(condition, item.f) %>%
dplyr::summarize(order_mean = mean(order, na.rm = TRUE),
order_sd = sd(order, na.rm = TRUE),
n = n(),
se = order_sd / sqrt(n), # Standard error
.groups = "drop",
Avg.Amount=mean(Amt),
Avg.Prob=mean(Prob))
ggplot(summary_data_pref2, aes(x = Avg.Amount, y = order_mean, label = item.f)) +
geom_point(size = 3, color = "black") +
geom_text(vjust = -1, hjust = 1) +
theme_minimal() +
labs(title = "Drag Order and Amount Attribute", subtitle = "2ND Preference Task", x = "Avg. Amt", y = "Avg. Drag Order") +
theme(axis.title = element_text(face = "bold"),
plot.subtitle = element_text(hjust = 0.5),
plot.title = element_text(face = "bold", hjust = 0.5))+
geom_smooth(method = "lm", se = FALSE, color = "blue", linetype = "dashed")
ggplot(summary_data_pref2, aes(x = Avg.Prob, y = order_mean, label = item.f)) +
geom_point(size = 3, color = "black") +
geom_text(vjust = -1, hjust = 1) +
theme_minimal() +
labs(title = "Drag Order and Prob Attribute", subtitle = "2ND Preference Task", x = "Avg. Prob", y = "Avg. Drag Order") +
theme(axis.title = element_text(face = "bold"),
plot.subtitle = element_text(hjust = 0.5),
plot.title = element_text(face = "bold", hjust = 0.5))+
geom_smooth(method = "lm", se = FALSE, color = "blue", linetype = "dashed")
summary_stats <- Distance_pref2.cleanup.df %>%
group_by(item.f) %>%
dplyr::summarize(
mean_distance = mean(distance, na.rm = TRUE),
median_distance = median(distance, na.rm = TRUE)
)
Distance_pref2.cleanup.df$item.f<- factor(Distance_pref2.cleanup.df$item.f, levels = rev(c( "Pr6_Amt1","Pr5_Amt2", "Pr4_Amt3","Pr3_Amt4", "Pr2_Amt5","Pr1_Amt6")), ordered = TRUE)
ggplot(Distance_pref2.cleanup.df ,
aes(x = -distance, fill = item.f)) +
geom_histogram(binwidth = 1, alpha = 0.3, position = "identity") +
labs(
title = "Distribution of Drag Distance - pref2 Task",
x = "Distance",
y = "Count",
fill = "item"
) +
theme_minimal()+
facet_grid(~item.f)+
xlim(6,-6)+
scale_fill_manual(values = bet_colors)
explore<-master_df %>%
mutate(
amount_rank = case_when(
bet_label == "Pr6_Amt1" ~ 1,
bet_label == "Pr5_Amt2" ~ 2,
bet_label == "Pr4_Amt3" ~ 3,
bet_label == "Pr3_Amt4" ~ 4,
bet_label == "Pr2_Amt5" ~ 5,
bet_label == "Pr1_Amt6" ~ 6
)
)%>%
group_by(ResponseId) %>%
summarise(
Tau.amt_set1 = cor(set1_rank, amount_rank, method = "kendall"),
Rho.amt_set1 = cor(set1_rank, amount_rank, method = "spearman"),
.groups = "drop"
)
#
#
# ggplot(explore, aes(x = Tau.amt_set1)) +
# geom_histogram(binwidth = 0.1, fill = "#3182bd", color = "white") +
# labs(title = "Kendall's Tau (Amount vs Set1 Rank)",
# x = "Kendall's Tau", y = "Count") +
# theme_minimal()
#
# ggplot(explore, aes(x = Rho.amt_set1)) +
# geom_histogram(binwidth = 0.1, fill = "#31a354", color = "white") +
# labs(title = "Spearman's Rho (Amount vs Set1 Rank)",
# x = "Spearman's Rho", y = "Count") +
# theme_minimal()
Ranking_per_Step_Pr1_Amt6 <- Distance_Prefer2
Ranking_per_Step_Pr1_Amt6 <- Ranking_per_Step_Pr1_Amt6 %>%
left_join(explore %>% select(ResponseId, Tau.amt_set1), by = "ResponseId")
## Create new R columns
# Initialize R columns
Ranking_per_Step_Pr1_Amt6 <- Ranking_per_Step_Pr1_Amt6 %>%
mutate(Pr1_Amt6 = NA, Pr2_Amt5 = NA, Pr3_Amt4 = NA, Pr4_Amt3 = NA, Pr5_Amt2 = NA, Pr6_Amt1 = NA)
# Function to fill R columns
fill_R_columns <- function(order_string, target_value) {
order_string <- gsub("[[:space:][:cntrl:]]", "", order_string) # Clean spaces and newlines
order_numbers <- strsplit(order_string, ",")[[1]]
position_of_value <- which(order_numbers == as.character(target_value))
if (length(position_of_value) == 0) {
return(NA)
} else if (position_of_value == 1) {
return(1)
} else {
return(position_of_value)
}
}
Ranking_per_Step_Pr1_Amt6 <- Ranking_per_Step_Pr1_Amt6 %>%
mutate(
Pr6_Amt1 = sapply(order, fill_R_columns, target_value = 49),
Pr5_Amt2 = sapply(order, fill_R_columns, target_value = 50),
Pr4_Amt3 = sapply(order, fill_R_columns, target_value = 64),
Pr3_Amt4 = sapply(order, fill_R_columns, target_value = 65),
Pr2_Amt5 = sapply(order, fill_R_columns, target_value = 67),
Pr1_Amt6 = sapply(order, fill_R_columns, target_value = 68),
)
# Ensure at least 9 rows per ResponseId
Ranking_per_Step_Pr1_Amt6 <- Ranking_per_Step_Pr1_Amt6 %>% group_by(ResponseId) %>%
group_modify(~ {
while (nrow(.x) < 10) {
max_step_row <- .x %>% filter(step == max(step))
.x <- bind_rows(.x, max_step_row %>% mutate(step = max(step) + 1))
}
return(.x)
}) %>% ungroup()
## Summary DFs
summary_ranks_step_split_by_tau <- Ranking_per_Step_Pr1_Amt6 %>%
filter(step < 10) %>%
group_by(step, Tau.amt_set1) %>%
summarise(
mean_Pr6_Amt1 = mean(Pr6_Amt1, na.rm = TRUE),
se_Pr6_Amt1 = sd(Pr6_Amt1, na.rm = TRUE) / sqrt(n()),
mean_Pr5_Amt2 = mean(Pr5_Amt2, na.rm = TRUE),
se_Pr5_Amt2 = sd(Pr5_Amt2, na.rm = TRUE) / sqrt(n()),
mean_Pr4_Amt3 = mean(Pr4_Amt3, na.rm = TRUE),
se_Pr4_Amt3 = sd(Pr4_Amt3, na.rm = TRUE) / sqrt(n()),
mean_Pr3_Amt4 = mean(Pr3_Amt4, na.rm = TRUE),
se_Pr3_Amt4 = sd(Pr3_Amt4, na.rm = TRUE) / sqrt(n()),
mean_Pr2_Amt5 = mean(Pr2_Amt5, na.rm = TRUE),
se_Pr2_Amt5 = sd(Pr2_Amt5, na.rm = TRUE) / sqrt(n()),
mean_Pr1_Amt6 = mean(Pr1_Amt6, na.rm = TRUE),
se_Pr1_Amt6 = sd(Pr1_Amt6, na.rm = TRUE) / sqrt(n()),
.groups = "drop"
)
## Long format
summary_ranks_step_split_by_tau_long <- summary_ranks_step_split_by_tau %>%
pivot_longer(
cols = -c(step, Tau.amt_set1),
names_to = c(".value", "label"),
names_pattern = "^(mean|se)_(.*)$"
) %>%
rename(
mean_rank = mean,
mean_se = se
)
## Add N and reorder tau levels
tau_counts <- Ranking_per_Step_Pr1_Amt6 %>%
distinct(ResponseId, Tau.amt_set1) %>%
count(Tau.amt_set1, name = "N")
summary_ranks_step_split_by_tau_long <- summary_ranks_step_split_by_tau_long %>%
left_join(tau_counts, by = "Tau.amt_set1") %>%
mutate(
Tau.amt_set1 = factor(Tau.amt_set1, levels = sort(unique(Tau.amt_set1), decreasing = TRUE)),
facet_label = paste0("τ = ", Tau.amt_set1, " (N = ", N, ")"),
facet_label = factor(facet_label, levels = rev(unique(facet_label))) # ✅ use rev()
)
## Define shapes (you can define `bet_colors` separately)
bet_shapes <- c(
"Pr6_Amt1" = 21, "Pr5_Amt2" = 22, "Pr4_Amt3" = 23,
"Pr3_Amt4" = 24, "Pr2_Amt5" = 25, "Pr1_Amt6" = 11
)
## Plot
ggplot(summary_ranks_step_split_by_tau_long,
aes(x = step, y = mean_rank)) +
geom_line(aes(color = label), size = 1) +
geom_point(aes(fill = label, shape = label), size = 3) +
geom_errorbar(aes(ymin = mean_rank - mean_se, ymax = mean_rank + mean_se),
width = 0.2, color = "black") +
facet_wrap(~ facet_label, labeller = label_wrap_gen(width = 20)) +
scale_color_manual(values = bet_colors) +
scale_fill_manual(values = bet_colors) +
scale_shape_manual(values = bet_shapes) +
scale_x_continuous(breaks = 0:9) +
scale_y_continuous(limits = c(1, 6), breaks = 1:6) +
labs(
x = "Ranking Step",
y = "Mean Rank",
title = "Mean Rank Trajectories by Kendall's Tau Group (N per Panel)",
color = "Label", shape = "Label", fill = "Label"
) +
theme_minimal() +
theme(
legend.position = "bottom",
plot.title = element_text(hjust = 0.5)
)+
scale_y_reverse(breaks = 1:6)
master_df<-master_df%>% # 194 when full
mutate(set1_prob.z=scale(set1_prob),
set1_amt.z=scale(set1_amt),
set2_prob.z=scale(set2_prob),
set2_amt.z=scale(set2_amt))
master_df <- master_df %>% #182
group_by(ResponseId) %>%
filter(!any(is.na(set1_drag_distance.r|set2_drag_distance.r))) %>%
ungroup()
# need to look into the dataset and removes any NAs or odd values or ites with missing stuff
# master_df%>%
# group_by(bet_label)%>%
# summarize(set1.sd=sd(set1_rank))
# master_df.test<-master_df%>%
# filter(bet_label!="Pr6_Amt1")
#
# master_df.test%>%
# group_by(ResponseId)%>%
# summarize(mean_set1=mean(set1_rank))%>%
# summarize(sd(mean_set1))
set1.distance_M1 <- lmer(
set1_rank.r ~ set1_prob.z + set1_amt.z+
(set1_prob.z + set1_amt.z | ResponseId),
data = master_df
)
set1.distance_M2 <- lmer(
set1_rank.r ~ set1_prob.z + set1_amt.z+ set1_drag_distance.r+
(set1_prob.z + set1_amt.z | ResponseId),
data = master_df
)
set1.distance_M3 <- lmer(
set1_rank.r ~ set1_prob.z + set1_amt.z+ set1_drag_distance.r+
(set1_prob.z + set1_amt.z + set1_drag_distance.r | ResponseId),
data = master_df
)
tab_model(
set1.distance_M1,
set1.distance_M2,
set1.distance_M3,
dv.labels = c("RE: Prob & Amt", "Adding Distance FE", "Adding Distance RE"),
pred.labels = c("(Intercept)", "Prob (z)", "Amt (z)", "Drag distance"),
show.re.var = TRUE, # show random-effects variances
show.icc = TRUE, # show ICC
digits = 2
)
RE: Prob & Amt | Adding Distance FE | Adding Distance RE | |||||||
---|---|---|---|---|---|---|---|---|---|
Predictors | Estimates | CI | p | Estimates | CI | p | Estimates | CI | p |
(Intercept) | 3.50 | 3.45 – 3.55 | <0.001 | 3.18 | 3.10 – 3.25 | <0.001 | 3.17 | 3.09 – 3.25 | <0.001 |
Prob (z) | 0.23 | 0.07 – 0.39 | 0.005 | 0.18 | 0.04 – 0.32 | 0.011 | 0.24 | 0.11 – 0.38 | 0.001 |
Amt (z) | -0.47 | -0.62 – -0.33 | <0.001 | -0.41 | -0.54 – -0.28 | <0.001 | -0.45 | -0.58 – -0.33 | <0.001 |
Drag distance | 0.27 | 0.23 – 0.31 | <0.001 | 0.27 | 0.22 – 0.32 | <0.001 | |||
Random Effects | |||||||||
σ2 | 0.77 | 0.72 | 0.71 | ||||||
τ00 | 0.00 ResponseId | 0.00 ResponseId | 0.04 ResponseId | ||||||
τ11 | 0.95 ResponseId.set1_prob.z | 0.67 ResponseId.set1_prob.z | 0.65 ResponseId.set1_prob.z | ||||||
0.72 ResponseId.set1_amt.z | 0.51 ResponseId.set1_amt.z | 0.48 ResponseId.set1_amt.z | |||||||
0.03 ResponseId.set1_drag_distance.r | |||||||||
ρ01 | -0.29 | 0.72 | |||||||
0.99 | -0.61 | ||||||||
-0.99 | |||||||||
N | 182 ResponseId | 182 ResponseId | 182 ResponseId | ||||||
Observations | 1092 | 1092 | 1092 | ||||||
Marginal R2 / Conditional R2 | 0.360 / NA | 0.457 / NA | 0.510 / NA |
Addressing Singularity Experiment
I added rnorm(0, 0.5) to each participant to artificially create a random intercept and test whether this would resolve the singularity modeling issue.
# Add a model below just to showcase the adding a small value idea
master_df.examine <- master_df %>%
group_by(ResponseId) %>%
mutate(set1_rank.r = set1_rank.r + rnorm(1, mean = 0, sd = 0.5)) %>%
ungroup()
# master_df.examine%>%
# group_by(ResponseId)%>%
# summarize(mean_set1=mean(set1_rank.r))%>%
# summarize(sd(mean_set1))
set1.distance_M1.examine <- lmer(
set1_rank.r ~ set1_prob.z + set1_amt.z+
(set1_prob.z + set1_amt.z | ResponseId),
data = master_df.examine
)
set1.distance_M2.examine <- lmer(
set1_rank.r ~ set1_prob.z + set1_amt.z+ set1_drag_distance.r+
(set1_prob.z + set1_amt.z | ResponseId),
data = master_df.examine
)
set1.distance_M3.examine <- lmer(
set1_rank.r ~ set1_prob.z + set1_amt.z+ set1_drag_distance.r+
(set1_prob.z + set1_amt.z + set1_drag_distance.r | ResponseId),
data = master_df.examine
)
tab_model(
set1.distance_M1.examine,
set1.distance_M2.examine,
set1.distance_M3.examine,
dv.labels = c("RE: Prob & Amt", "Adding Distance FE", "Adding Distance RE"),
pred.labels = c("(Intercept)", "Prob (z)", "Amt (z)", "Drag distance"),
show.re.var = TRUE, # show random-effects variances
show.icc = TRUE, # show ICC
digits = 2
)
RE: Prob & Amt | Adding Distance FE | Adding Distance RE | |||||||
---|---|---|---|---|---|---|---|---|---|
Predictors | Estimates | CI | p | Estimates | CI | p | Estimates | CI | p |
(Intercept) | 3.48 | 3.41 – 3.55 | <0.001 | 3.10 | 3.00 – 3.20 | <0.001 | 3.08 | 2.97 – 3.19 | <0.001 |
Prob (z) | 0.24 | 0.08 – 0.40 | 0.004 | 0.18 | 0.04 – 0.32 | 0.011 | 0.26 | 0.13 – 0.40 | <0.001 |
Amt (z) | -0.47 | -0.61 – -0.32 | <0.001 | -0.40 | -0.53 – -0.27 | <0.001 | -0.45 | -0.58 – -0.33 | <0.001 |
Drag distance | 0.32 | 0.27 – 0.37 | <0.001 | 0.32 | 0.26 – 0.38 | <0.001 | |||
Random Effects | |||||||||
σ2 | 0.99 | 0.89 | 0.86 | ||||||
τ00 | 0.06 ResponseId | 0.13 ResponseId | 0.24 ResponseId | ||||||
τ11 | 0.89 ResponseId.set1_prob.z | 0.59 ResponseId.set1_prob.z | 0.56 ResponseId.set1_prob.z | ||||||
0.66 ResponseId.set1_amt.z | 0.44 ResponseId.set1_amt.z | 0.41 ResponseId.set1_amt.z | |||||||
0.05 ResponseId.set1_drag_distance.r | |||||||||
ρ01 | -0.17 | -0.11 | 0.34 | ||||||
0.05 | 0.01 | -0.33 | |||||||
-0.69 | |||||||||
ICC | 0.65 | 0.59 | |||||||
N | 182 ResponseId | 182 ResponseId | 182 ResponseId | ||||||
Observations | 1092 | 1092 | 1092 | ||||||
Marginal R2 / Conditional R2 | 0.134 / 0.694 | 0.237 / 0.689 | 0.499 / NA |
get_participant_effects_lmer <- function(model, id_var = "ResponseId") {
# Random effects matrix -> data.frame, keep names like (Intercept)
re_mat <- ranef(model)[[id_var]]
re_df <- data.frame(re_mat, check.names = FALSE) # check.name =F keep all the variable names intact
re_df[[id_var]] <- rownames(re_mat)
# Fixed effects (named vector)
fe <- fixef(model)
# Union of all coefficient names (fixed + random)
all_terms <- union(names(fe), colnames(re_mat))
# Ensure every term exists; add fixed beta (default 0) + random deviation (default 0)
for (nm in all_terms) {
if (!nm %in% names(re_df)) re_df[[nm]] <- 0
if (nm %in% names(fe)) re_df[[nm]] <- re_df[[nm]] + unname(fe[[nm]]) # ensure that all variables are there
}
re_df
}
re_M1 <- get_participant_effects_lmer(set1.distance_M1)
re_M2 <- get_participant_effects_lmer(set1.distance_M2)
re_M3 <- get_participant_effects_lmer(set1.distance_M3)
# Add a model label
re_M1$model <- "M1 - RE: Prob & Amt"
re_M2$model <- "M2 - Adding Distance FE"
re_M3$model <- "M3 - Adding Distance RE"
# Combine
re_all <- bind_rows(re_M1, re_M2, re_M3)
re_all_long <- re_all %>%
pivot_longer(
cols = -c(ResponseId, model),
names_to = "parameter",
values_to = "coefficient"
)
re_all_long <- re_all_long %>%
mutate(
parameter = recode(parameter,
`(Intercept)` = "(Intercept)",
`set1_prob.z` = "Prob (z)",
`set1_amt.z` = "Amt (z)",
`set1_drag_distance.r` = "Drag distance"
),
parameter = factor(parameter,
levels = c("(Intercept)", "Prob (z)", "Amt (z)", "Drag distance")
),
model = factor(model, levels = c("M1 - RE: Prob & Amt","M2 - Adding Distance FE","M3 - Adding Distance RE"))
)
# Summary stats for markers
summary_stats <- re_all_long %>%
group_by(model, parameter) %>%
summarise(
mean_val = mean(coefficient, na.rm = TRUE),
median_val = median(coefficient, na.rm = TRUE),
.groups = "drop"
)
# Color map: Intercept grey, slopes steelblue
param_colors <- c(
"(Intercept)" = "grey60",
"Prob (z)" = "steelblue",
"Amt (z)" = "steelblue",
"Drag distance" = "steelblue"
)
ggplot(re_all_long, aes(x = coefficient, fill = parameter)) +
geom_histogram(bins = 30, alpha = 0.85, color = "white") +
facet_grid(parameter ~ model, scales = "free") + # allow y to vary
scale_fill_manual(values = param_colors, guide = "none") +
theme_minimal() +
labs(
title = "Participant-specific coefficients",
x = "Coefficient value",
y = "Count"
)+
theme(
panel.border = element_rect(color = "black", fill = NA, linewidth = 0.5),
strip.text = element_text(face = "bold",size=12) # bold facet labels
)
\[ \begin{aligned} \widehat{s}_{ij} \;=&\; (\beta_0 + b_{0i})_{\text{set1}} \\ &+ (\beta_{prob} + b_{pi})_{\text{set1}} \cdot \text{prob.z}_{ij,\text{set2}} \\ &+ (\beta_{amt} + b_{ai})_{\text{set1}} \cdot \text{amt.z}_{ij,\text{set2}} \\ &+ (\beta_{drag} + b_{di})_{\text{set1}} \cdot \text{drag_distance.r}_{ij,\text{set2}} \end{aligned} \]
predict_set2_by_id.Distance <- function(model, data, label) {
# Random effects (per ID)
re_mat <- ranef(model)$ResponseId
re_df <- as.data.frame(re_mat)
re_df$ResponseId <- rownames(re_mat)
# Rename things to b_intercept/prob/ etc.
if ("(Intercept)" %in% colnames(re_mat)) re_df <- dplyr::rename(re_df, b_intercept = `(Intercept)`)
if ("set1_prob.z" %in% colnames(re_mat)) re_df <- dplyr::rename(re_df, b_prob = `set1_prob.z`)
if ("set1_amt.z" %in% colnames(re_mat)) re_df <- dplyr::rename(re_df, b_amt = `set1_amt.z`)
if ("set1_drag_distance.r" %in% colnames(re_mat)) re_df <- dplyr::rename(re_df, b_drag = `set1_drag_distance.r`)
# Ensure all needed columns exist (fill missing with 0)
for (nm in c("b_intercept","b_prob","b_amt","b_drag")) {
if (!nm %in% names(re_df)) re_df[[nm]] <- 0
}
# Fixed effects (missing terms default to 0)
fe <- fixef(model)
fe_int <- if ("(Intercept)" %in% names(fe)) unname(fe["(Intercept)"]) else 0
fe_prob <- if ("set1_prob.z" %in% names(fe)) unname(fe["set1_prob.z"]) else 0
fe_amt <- if ("set1_amt.z" %in% names(fe)) unname(fe["set1_amt.z"]) else 0
fe_drag <- if ("set1_drag_distance.r" %in% names(fe)) unname(fe["set1_drag_distance.r"]) else 0
df_pred <- data %>%
left_join(re_df, by = "ResponseId") %>%
mutate(
set2_rank_predicted =
(fe_int + b_intercept) +
(fe_prob + b_prob) * set2_prob.z +
(fe_amt + b_amt ) * set2_amt.z +
(fe_drag + b_drag) * set2_drag_distance.r
) %>%
group_by(ResponseId) %>%
# Remove NAs within participant before tie check & ranking
mutate(set2_rank_predicted = ifelse(is.finite(set2_rank_predicted), set2_rank_predicted, NA_real_)) %>%
# Tie flag, then average rank for ties
mutate(
has_tie = n_distinct(set2_rank_predicted) < n(),
set2_rank_pred_rank = rank(-set2_rank_predicted, ties.method = "average")
) %>%
ungroup()
# Message about ties
tie_summary <- df_pred %>%
filter(!is.na(set2_rank_predicted), !is.na(set2_rank)) %>%
distinct(ResponseId, has_tie) %>%
summarise(n_ids = n(), n_ties = sum(has_tie), .groups = "drop")
cat(sprintf("Tie handling: %d of %d participants had tied predicted scores; average ranks were used.",
tie_summary$n_ties, tie_summary$n_ids))
# Per-participant correlations
df_pred %>%
group_by(ResponseId) %>%
summarise(
r_pearson = (cor(set2_rank_pred_rank, set2_rank, method = "pearson")),
tau_kendall = (cor(set2_rank_pred_rank, set2_rank, method = "kendall")),
.groups = "drop"
) %>%
mutate(model = label)
}
# Run for all three models
cor_M1 <- predict_set2_by_id.Distance(set1.distance_M1, master_df, "RE: Prob & Amt")
## Tie handling: 0 of 182 participants had tied predicted scores; average ranks were used.
cor_M2 <- predict_set2_by_id.Distance(set1.distance_M2, master_df, "Adding Distance FE")
## Tie handling: 0 of 182 participants had tied predicted scores; average ranks were used.
cor_M3 <- predict_set2_by_id.Distance(set1.distance_M3, master_df, "Adding Distance RE")
## Tie handling: 0 of 182 participants had tied predicted scores; average ranks were used.
cor_all <- bind_rows(cor_M1, cor_M2, cor_M3) %>%
mutate(model = factor(model, levels = c("RE: Prob & Amt","Adding Distance FE","Adding Distance RE")))
# --- quick summaries (optional)
summary_by_model <- cor_all %>%
group_by(model) %>%
summarise(
n = n(),
mean_r = mean(r_pearson, na.rm = TRUE),
median_r = median(r_pearson, na.rm = TRUE),
mean_tau = mean(tau_kendall, na.rm = TRUE),
median_tau = median(tau_kendall, na.rm = TRUE),
.groups = "drop"
)
make_violin <- function(df, col, title) {
# Calculate per-model means and medians
stats <- df %>%
group_by(model) %>%
summarise(
mean_val = mean(.data[[col]], na.rm = TRUE),
median_val = median(.data[[col]], na.rm = TRUE),
.groups = "drop"
)
# Create caption text
cap_text <- paste0(
"Means: ",
paste0(stats$model, " = ", sprintf("%.2f", stats$mean_val), collapse = " | "),
"\nMedians: ",
paste0(stats$model, " = ", sprintf("%.2f", stats$median_val), collapse = " | ")
)
ggplot(df, aes(x = model, y = .data[[col]], fill = model)) +
geom_violin(alpha = 0.4, width = 0.9, color = "black") +
geom_jitter(width = 0.12, alpha = 0.5, size = 1.6) +
geom_point(data = stats, aes(x = model, y = mean_val),
inherit.aes = FALSE, color = "red", size = 3.2) +
geom_segment(data = stats,
aes(x = as.numeric(model) - 0.35, xend = as.numeric(model) + 0.35,
y = median_val, yend = median_val),
inherit.aes = FALSE, color = "black", linewidth = 1) +
coord_cartesian(ylim = c(-1, 1)) +
labs(x = NULL, y = "Correlation", title = title, caption = cap_text) +
theme_minimal() +
theme(
legend.position = "none",
plot.caption = element_text(
size = 12, # Increase font size
face = "bold", # Make bold
margin = margin(t = 6)
)
)
}
p_r.Distance_lmer <- make_violin(cor_all, "r_pearson", "Per-participant Pearson r")
p_tau.Distance_lmer <- make_violin(cor_all, "tau_kendall", "Per-participant Kendall \u03C4")
p_r.Distance_lmer + p_tau.Distance_lmer + plot_layout(ncol = 2)
perm_paired_mean <- function(x, y, n_perm = 10000, seed = 123) {
diffs <- y - x
if (length(diffs) < 1) return(NA_real_)
set.seed(seed)
obs <- mean(diffs)
# Fast vectorized sign flips
signs <- matrix(sample(c(1,-1), length(diffs) * n_perm, replace = TRUE), nrow = n_perm)
perm_means <- rowMeans(signs * rep(diffs, each = n_perm))
mean(abs(perm_means) >= abs(obs))
}
# Compare M2 and M3 against M1 using a single metric column (by name)
compare_models_simple.Distance <- function(df, value_col,
m1 = "RE: Prob & Amt",
m2 = "Adding Distance FE",
m3 = "Adding Distance RE",
n_perm = 10000) {
# Build wide table with explicit column selection (no NSE)
wide <- df %>%
select(ResponseId, model, value = all_of(value_col)) %>%
pivot_wider(names_from = model, values_from = value)
# Helper to run the three paired tests for a pair of columns
run_tests <- function(a_name, b_name, label) {
x <- wide[[a_name]]
y <- wide[[b_name]]
diffs <- y - x # positive => b_name improves over a_name
cat(sprintf("%s: pairs used = %d\n", label, length(diffs)))
# Paired t-test
t_res <- try(t.test(y, x, paired = TRUE), silent = TRUE)
if (inherits(t_res, "try-error")) {
t_p <- NA_real_; t_stat <- NA_real_; t_df <- NA_real_
} else {
t_p <- unname(t_res$p.value)
t_stat <- unname(t_res$statistic)
t_df <- unname(t_res$parameter)
}
# Wilcoxon signed-rank (paired)
w_res <- try(wilcox.test(y, x, paired = TRUE, exact = FALSE), silent = TRUE)
if (inherits(w_res, "try-error")) {
w_p <- NA_real_; w_V <- NA_real_
} else {
w_p <- unname(w_res$p.value)
w_V <- unname(w_res$statistic)
}
# Permutation (paired sign-flip) on mean diff
p_perm <- perm_paired_mean(x, y, n_perm = n_perm)
data.frame(
Comparison = label,
N_pairs = length(diffs),
Mean_Diff = if (length(diffs)) mean(diffs) else NA_real_,
Median_Diff = if (length(diffs)) median(diffs) else NA_real_,
Prop_Better = if (length(diffs)) mean(diffs > 0) else NA_real_,
Prop_Worse = if (length(diffs)) mean(diffs < 0) else NA_real_,
Prop_Tie = if (length(diffs)) mean(diffs == 0) else NA_real_,
t_test_p = t_p,
t_statistic = t_stat,
t_df = t_df,
wilcoxon_p = w_p,
wilcoxon_V = w_V,
permutation_p = p_perm,
check.names = FALSE
)
}
rbind(
run_tests(m1, m2, "Adding FE (M2 − M1)"),
run_tests(m1, m3, "Adding RE (M3 − M1)")
) %>%
mutate(
Mean_Diff = round(Mean_Diff, 3),
Median_Diff = round(Median_Diff, 3),
Prop_Better = round(Prop_Better, 3),
Prop_Worse = round(Prop_Worse, 3),
Prop_Tie = round(Prop_Tie, 3),
t_test_p = round(t_test_p, 4),
t_statistic = round(t_statistic, 3),
t_df = round(t_df),
wilcoxon_p = round(wilcoxon_p, 4),
permutation_p = round(permutation_p, 4)
)
}
# ---- Build the two tables (Pearson r and Kendall tau) ----
# Expect cor_all to have: ResponseId, model, r_pearson, tau_kendall
table_r.distance_lmer <- compare_models_simple.Distance(cor_all, "r_pearson")
## Adding FE (M2 − M1): pairs used = 182
## Adding RE (M3 − M1): pairs used = 182
table_tau.distance_lmer <- compare_models_simple.Distance(cor_all, "tau_kendall")
## Adding FE (M2 − M1): pairs used = 182
## Adding RE (M3 − M1): pairs used = 182
library(kableExtra)
kable(table_r.distance_lmer,
caption = "<b><span style='font-size: 18px;color:black;'>Model comparison using Pearson r (Distance)</span></b>",
digits = 4, align = "lrrrrrrrrrr", escape = FALSE) %>%
kable_styling(full_width = FALSE)
Comparison | N_pairs | Mean_Diff | Median_Diff | Prop_Better | Prop_Worse | Prop_Tie | t_test_p | t_statistic | t_df | wilcoxon_p | wilcoxon_V | permutation_p |
---|---|---|---|---|---|---|---|---|---|---|---|---|
Adding FE (M2 − M1) | 182 | 0.062 | 0 | 0.286 | 0.192 | 0.522 | 0 | 4.378 | 181 | 2e-04 | 2800.5 | 0 |
Adding RE (M3 − M1) | 182 | 0.076 | 0 | 0.253 | 0.170 | 0.577 | 0 | 4.248 | 181 | 9e-04 | 2155.0 | 0 |
kable(table_tau.distance_lmer,
caption = "<b><span style='font-size: 18px;color:black;'>Model comparison using Kendall τ (Distance)</span></b>",
digits = 4, align = "lrrrrrrrrrr", escape = FALSE) %>%
kable_styling(full_width = FALSE)
Comparison | N_pairs | Mean_Diff | Median_Diff | Prop_Better | Prop_Worse | Prop_Tie | t_test_p | t_statistic | t_df | wilcoxon_p | wilcoxon_V | permutation_p |
---|---|---|---|---|---|---|---|---|---|---|---|---|
Adding FE (M2 − M1) | 182 | 0.040 | 0 | 0.275 | 0.187 | 0.538 | 0.0013 | 3.276 | 181 | 0.0018 | 2472.5 | 5e-04 |
Adding RE (M3 − M1) | 182 | 0.059 | 0 | 0.242 | 0.154 | 0.604 | 0.0004 | 3.616 | 181 | 0.0008 | 1909.5 | 3e-04 |
set1.Order_M1 <- lmer(
set1_rank.r ~ set1_prob.z + set1_amt.z+
(set1_prob.z + set1_amt.z | ResponseId),
data = master_df
)
set1.Order_M2 <- lmer(
set1_rank.r ~ set1_prob.z + set1_amt.z+ set1_order+
(set1_prob.z + set1_amt.z | ResponseId),
data = master_df
)
set1.Order_M3 <- lmer(
set1_rank.r ~ set1_prob.z + set1_amt.z+ set1_order+
(set1_prob.z + set1_amt.z + set1_order | ResponseId),
data = master_df
)
tab_model(
set1.Order_M1,
set1.Order_M2,
set1.Order_M3,
dv.labels = c("RE: Prob & Amt", "Adding order FE", "Adding order RE"),
pred.labels = c("(Intercept)", "Prob (z)", "Amt (z)", "Drag order"),
show.re.var = TRUE, # show random-effects variances
show.icc = TRUE, # show ICC
digits = 2
)
RE: Prob & Amt | Adding order FE | Adding order RE | |||||||
---|---|---|---|---|---|---|---|---|---|
Predictors | Estimates | CI | p | Estimates | CI | p | Estimates | CI | p |
(Intercept) | 3.50 | 3.45 – 3.55 | <0.001 | 4.61 | 4.45 – 4.77 | <0.001 | 4.57 | 4.36 – 4.78 | <0.001 |
Prob (z) | 0.23 | 0.07 – 0.39 | 0.005 | 0.11 | -0.03 – 0.25 | 0.113 | 0.21 | 0.07 – 0.35 | 0.004 |
Amt (z) | -0.47 | -0.62 – -0.33 | <0.001 | -0.46 | -0.60 – -0.33 | <0.001 | -0.48 | -0.61 – -0.36 | <0.001 |
Drag order | -0.35 | -0.40 – -0.31 | <0.001 | -0.34 | -0.40 – -0.27 | <0.001 | |||
Random Effects | |||||||||
σ2 | 0.77 | 0.70 | 0.59 | ||||||
τ00 | 0.00 ResponseId | 0.00 ResponseId | 0.92 ResponseId | ||||||
τ11 | 0.95 ResponseId.set1_prob.z | 0.61 ResponseId.set1_prob.z | 0.67 ResponseId.set1_prob.z | ||||||
0.72 ResponseId.set1_amt.z | 0.54 ResponseId.set1_amt.z | 0.47 ResponseId.set1_amt.z | |||||||
0.09 ResponseId.set1_order | |||||||||
ρ01 | -0.29 | -0.64 | |||||||
0.99 | 0.32 | ||||||||
-1.00 | |||||||||
N | 182 ResponseId | 182 ResponseId | 182 ResponseId | ||||||
Observations | 1092 | 1092 | 1092 | ||||||
Marginal R2 / Conditional R2 | 0.360 / NA | 0.495 / NA | 0.573 / NA |
Addressing Singularity Experiment
add rnorm (0,0.5) to (participant-specific) and all singularity issue resolved
# Add a model below just to showcase the adding a small value idea
set1.order_M1.examine <- lmer(
set1_rank.r ~ set1_prob.z + set1_amt.z+
(set1_prob.z + set1_amt.z | ResponseId),
data = master_df.examine
)
set1.order_M2.examine <- lmer(
set1_rank.r ~ set1_prob.z + set1_amt.z+ set1_order+
(set1_prob.z + set1_amt.z | ResponseId),
data = master_df.examine
)
set1.order_M3.examine <- lmer(
set1_rank.r ~ set1_prob.z + set1_amt.z+ set1_order+
(set1_prob.z + set1_amt.z + set1_order | ResponseId),
data = master_df.examine
)
tab_model(
set1.order_M1.examine,
set1.order_M2.examine,
set1.order_M3.examine,
dv.labels = c("RE: Prob & Amt", "Adding Order FE", "Adding Order RE"),
pred.labels = c("(Intercept)", "Prob (z)", "Amt (z)", "Drag Order"),
show.re.var = TRUE, # show random-effects variances
show.icc = TRUE, # show ICC
digits = 2
)
RE: Prob & Amt | Adding Order FE | Adding Order RE | |||||||
---|---|---|---|---|---|---|---|---|---|
Predictors | Estimates | CI | p | Estimates | CI | p | Estimates | CI | p |
(Intercept) | 3.48 | 3.41 – 3.55 | <0.001 | 4.72 | 4.53 – 4.90 | <0.001 | 4.66 | 4.43 – 4.90 | <0.001 |
Prob (z) | 0.24 | 0.08 – 0.40 | 0.004 | 0.10 | -0.03 – 0.24 | 0.140 | 0.21 | 0.07 – 0.35 | 0.003 |
Amt (z) | -0.47 | -0.61 – -0.32 | <0.001 | -0.46 | -0.59 – -0.33 | <0.001 | -0.49 | -0.61 – -0.36 | <0.001 |
Drag Order | -0.39 | -0.45 – -0.34 | <0.001 | -0.38 | -0.45 – -0.31 | <0.001 | |||
Random Effects | |||||||||
σ2 | 0.99 | 0.87 | 0.76 | ||||||
τ00 | 0.06 ResponseId | 0.12 ResponseId | 1.10 ResponseId | ||||||
τ11 | 0.89 ResponseId.set1_prob.z | 0.53 ResponseId.set1_prob.z | 0.60 ResponseId.set1_prob.z | ||||||
0.66 ResponseId.set1_amt.z | 0.49 ResponseId.set1_amt.z | 0.43 ResponseId.set1_amt.z | |||||||
0.10 ResponseId.set1_order | |||||||||
ρ01 | -0.17 | -0.05 | -0.71 | ||||||
0.05 | 0.04 | 0.43 | |||||||
-0.94 | |||||||||
ICC | 0.65 | 0.59 | 0.63 | ||||||
N | 182 ResponseId | 182 ResponseId | 182 ResponseId | ||||||
Observations | 1092 | 1092 | 1092 | ||||||
Marginal R2 / Conditional R2 | 0.134 / 0.694 | 0.260 / 0.697 | 0.305 / 0.742 |
get_participant_effects_lmer.Order <- function(model, id_var = "ResponseId") {
# Random effects matrix -> data.frame, keep names like (Intercept)
re_mat <- ranef(model)[[id_var]]
re_df <- data.frame(re_mat, check.names = FALSE)
re_df[[id_var]] <- rownames(re_mat)
# Fixed effects (named vector)
fe <- fixef(model)
# Union of all coefficient names (fixed + random)
all_terms <- union(names(fe), colnames(re_mat))
# Ensure every term exists; add fixed beta (default 0) + random deviation (default 0)
for (nm in all_terms) {
if (!nm %in% names(re_df)) re_df[[nm]] <- 0 # add missing column
if (nm %in% names(fe)) re_df[[nm]] <- re_df[[nm]] + unname(fe[[nm]])
}
re_df # output
}
re_M1 <- get_participant_effects_lmer(set1.Order_M1)
re_M2 <- get_participant_effects_lmer(set1.Order_M2)
re_M3 <- get_participant_effects_lmer(set1.Order_M3)
# Add a model label
re_M1$model <- "M1 - RE: Prob & Amt"
re_M2$model <- "M2 - Adding Order FE"
re_M3$model <- "M3 - Adding Order RE"
# Combine
re_all <- bind_rows(re_M1, re_M2, re_M3)
re_all_long <- re_all %>%
pivot_longer(
cols = -c(ResponseId, model),
names_to = "parameter",
values_to = "coefficient"
)
re_all_long <- re_all_long %>%
mutate(
parameter = recode(parameter,
`(Intercept)` = "(Intercept)",
`set1_prob.z` = "Prob (z)",
`set1_amt.z` = "Amt (z)",
`set1_order` = "Drag Order"
),
parameter = factor(parameter,
levels = c("(Intercept)", "Prob (z)", "Amt (z)", "Drag Order")
),
model = factor(model, levels = c("M1 - RE: Prob & Amt","M2 - Adding Order FE","M3 - Adding Order RE"))
)
# Summary stats for markers
summary_stats <- re_all_long %>%
group_by(model, parameter) %>%
summarise(
mean_val = mean(coefficient, na.rm = TRUE),
median_val = median(coefficient, na.rm = TRUE),
.groups = "drop"
)
# Color map: Intercept grey, slopes steelblue
param_colors <- c(
"(Intercept)" = "grey60",
"Prob (z)" = "steelblue",
"Amt (z)" = "steelblue",
"Drag Order" = "steelblue"
)
ggplot(re_all_long, aes(x = coefficient, fill = parameter)) +
geom_histogram(bins = 30, alpha = 0.85, color = "white") +
facet_grid(parameter ~ model, scales = "free") + # allow y to vary
scale_fill_manual(values = param_colors, guide = "none") +
theme_minimal() +
labs(
title = "Participant-specific coefficients",
x = "Coefficient value",
y = "Count"
)+
theme(
panel.border = element_rect(color = "black", fill = NA, linewidth = 0.5),
strip.text = element_text(face = "bold",size=12) # bold facet labels
)
predict_set2_by_id.Order <- function(model, data, label) {
# Random effects (per ID)
re_mat <- ranef(model)$ResponseId
re_df <- as.data.frame(re_mat)
re_df$ResponseId <- rownames(re_mat)
# Rename to consistent names if present
if ("(Intercept)" %in% colnames(re_mat)) re_df <- dplyr::rename(re_df, b_intercept = `(Intercept)`)
if ("set1_prob.z" %in% colnames(re_mat)) re_df <- dplyr::rename(re_df, b_prob = `set1_prob.z`)
if ("set1_amt.z" %in% colnames(re_mat)) re_df <- dplyr::rename(re_df, b_amt = `set1_amt.z`)
if ("set1_order" %in% colnames(re_mat)) re_df <- dplyr::rename(re_df, b_drag = `set1_order`)
# Ensure all needed columns exist (fill missing with 0)
for (nm in c("b_intercept","b_prob","b_amt","b_drag")) {
if (!nm %in% names(re_df)) re_df[[nm]] <- 0
}
# Fixed effects (missing terms default to 0)
fe <- fixef(model)
fe_int <- if ("(Intercept)" %in% names(fe)) unname(fe["(Intercept)"]) else 0
fe_prob <- if ("set1_prob.z" %in% names(fe)) unname(fe["set1_prob.z"]) else 0
fe_amt <- if ("set1_amt.z" %in% names(fe)) unname(fe["set1_amt.z"]) else 0
fe_drag <- if ("set1_order" %in% names(fe)) unname(fe["set1_order"]) else 0
df_pred <- data %>%
left_join(re_df, by = "ResponseId") %>%
mutate(
set2_rank_predicted =
(fe_int + b_intercept) +
(fe_prob + b_prob) * set2_prob.z +
(fe_amt + b_amt ) * set2_amt.z +
(fe_drag + b_drag) * set2_order
) %>%
group_by(ResponseId) %>%
# Remove NAs within participant before tie check & ranking
mutate(set2_rank_predicted = ifelse(is.finite(set2_rank_predicted), set2_rank_predicted, NA_real_)) %>%
# Tie flag, then average rank for ties
mutate(
has_tie = n_distinct(set2_rank_predicted) < n(),
set2_rank_pred_rank = rank(-set2_rank_predicted, ties.method = "average")
) %>%
ungroup()
# Message about ties
tie_summary <- df_pred %>%
filter(!is.na(set2_rank_predicted), !is.na(set2_rank)) %>%
distinct(ResponseId, has_tie) %>%
summarise(n_ids = n(), n_ties = sum(has_tie), .groups = "drop")
cat(sprintf("Tie handling: %d of %d participants had tied predicted scores; average ranks were used.",
tie_summary$n_ties, tie_summary$n_ids))
# Per-participant correlations
df_pred %>%
group_by(ResponseId) %>%
summarise(
r_pearson = (cor(set2_rank_pred_rank, set2_rank, method = "pearson")),
tau_kendall = (cor(set2_rank_pred_rank, set2_rank, method = "kendall")),
.groups = "drop"
) %>%
mutate(model = label)
}
# Run for all three models
cor_M1 <- predict_set2_by_id.Order(set1.Order_M1, master_df, "RE: Prob & Amt")
## Tie handling: 0 of 182 participants had tied predicted scores; average ranks were used.
cor_M2 <- predict_set2_by_id.Order(set1.Order_M2, master_df, "Adding Order FE")
## Tie handling: 0 of 182 participants had tied predicted scores; average ranks were used.
cor_M3 <- predict_set2_by_id.Order(set1.Order_M3, master_df, "Adding Order RE")
## Tie handling: 0 of 182 participants had tied predicted scores; average ranks were used.
cor_all <- bind_rows(cor_M1, cor_M2, cor_M3) %>%
mutate(model = factor(model, levels = c("RE: Prob & Amt","Adding Order FE","Adding Order RE")))
p_r.Order_lmer <- make_violin(cor_all, "r_pearson", "Per-participant Pearson r")
p_tau.Order_lmer <- make_violin(cor_all, "tau_kendall", "Per-participant Kendall \u03C4")
p_r.Order_lmer + p_tau.Order_lmer + plot_layout(ncol = 2)
compare_models_simple.Order <- function(df, value_col,
m1 = "RE: Prob & Amt",
m2 = "Adding Order FE",
m3 = "Adding Order RE",
n_perm = 10000) {
# Build wide table with explicit column selection (no NSE)
wide <- df %>%
select(ResponseId, model, value = all_of(value_col)) %>%
pivot_wider(names_from = model, values_from = value)
# Helper to run the three paired tests for a pair of columns
run_tests <- function(a_name, b_name, label) {
x <- wide[[a_name]]
y <- wide[[b_name]]
diffs <- y - x # positive => b_name improves over a_name
cat(sprintf("%s: pairs used = %d\n", label, length(diffs)))
# Paired t-test
t_res <- try(t.test(y, x, paired = TRUE), silent = TRUE)
if (inherits(t_res, "try-error")) {
t_p <- NA_real_; t_stat <- NA_real_; t_df <- NA_real_
} else {
t_p <- unname(t_res$p.value)
t_stat <- unname(t_res$statistic)
t_df <- unname(t_res$parameter)
}
# Wilcoxon signed-rank (paired)
w_res <- try(wilcox.test(y, x, paired = TRUE, exact = FALSE), silent = TRUE)
if (inherits(w_res, "try-error")) {
w_p <- NA_real_; w_V <- NA_real_
} else {
w_p <- unname(w_res$p.value)
w_V <- unname(w_res$statistic)
}
# Permutation (paired sign-flip) on mean diff
p_perm <- perm_paired_mean(x, y, n_perm = n_perm)
data.frame(
Comparison = label,
N_pairs = length(diffs),
Mean_Diff = if (length(diffs)) mean(diffs) else NA_real_,
Median_Diff = if (length(diffs)) median(diffs) else NA_real_,
Prop_Better = if (length(diffs)) mean(diffs > 0) else NA_real_,
Prop_Worse = if (length(diffs)) mean(diffs < 0) else NA_real_,
Prop_Tie = if (length(diffs)) mean(diffs == 0) else NA_real_,
t_test_p = t_p,
t_statistic = t_stat,
t_df = t_df,
wilcoxon_p = w_p,
wilcoxon_V = w_V,
permutation_p = p_perm,
check.names = FALSE
)
}
rbind(
run_tests(m1, m2, "Adding FE (M2 − M1)"),
run_tests(m1, m3, "Adding RE (M3 − M1)")
) %>%
mutate(
Mean_Diff = round(Mean_Diff, 3),
Median_Diff = round(Median_Diff, 3),
Prop_Better = round(Prop_Better, 3),
Prop_Worse = round(Prop_Worse, 3),
Prop_Tie = round(Prop_Tie, 3),
t_test_p = round(t_test_p, 4),
t_statistic = round(t_statistic, 3),
t_df = round(t_df),
wilcoxon_p = round(wilcoxon_p, 4),
permutation_p = round(permutation_p, 4)
)
}
# ---- Build the two tables (Pearson r and Kendall tau) ----
# Expect cor_all to have: ResponseId, model, r_pearson, tau_kendall
table_r.order_lmer <- compare_models_simple.Order(cor_all, "r_pearson")
## Adding FE (M2 − M1): pairs used = 182
## Adding RE (M3 − M1): pairs used = 182
table_tau.order_lmer <- compare_models_simple.Order(cor_all, "tau_kendall")
## Adding FE (M2 − M1): pairs used = 182
## Adding RE (M3 − M1): pairs used = 182
# Heterogenity in order but not in distance? very interesting
kable(table_r.order_lmer,
caption = "<b><span style='font-size: 18px;color:black;'>Model comparison using Pearson r (Order)</span></b>",
digits = 4, align = "lrrrrrrrrrr", escape = FALSE) %>%
kable_styling(full_width = FALSE)
Comparison | N_pairs | Mean_Diff | Median_Diff | Prop_Better | Prop_Worse | Prop_Tie | t_test_p | t_statistic | t_df | wilcoxon_p | wilcoxon_V | permutation_p |
---|---|---|---|---|---|---|---|---|---|---|---|---|
Adding FE (M2 − M1) | 182 | 0.028 | 0 | 0.253 | 0.280 | 0.467 | 0.0776 | 1.775 | 181 | 0.1437 | 2780.5 | 0.0784 |
Adding RE (M3 − M1) | 182 | 0.064 | 0 | 0.236 | 0.192 | 0.571 | 0.0136 | 2.492 | 181 | 0.0191 | 2010.5 | 0.0121 |
kable(table_tau.order_lmer,
caption = "<b><span style='font-size: 18px;color:black;'>Model comparison using Kendall &tau (Order);</span></b>",
digits = 4, align = "lrrrrrrrrrr", escape = FALSE) %>%
kable_styling(full_width = FALSE)
Comparison | N_pairs | Mean_Diff | Median_Diff | Prop_Better | Prop_Worse | Prop_Tie | t_test_p | t_statistic | t_df | wilcoxon_p | wilcoxon_V | permutation_p |
---|---|---|---|---|---|---|---|---|---|---|---|---|
Adding FE (M2 − M1) | 182 | 0.015 | 0 | 0.247 | 0.264 | 0.489 | 0.3384 | 0.960 | 181 | 0.3439 | 2429.0 | 0.3139 |
Adding RE (M3 − M1) | 182 | 0.053 | 0 | 0.231 | 0.192 | 0.577 | 0.0224 | 2.302 | 181 | 0.0252 | 1940.5 | 0.0210 |
set1.Count_M1 <- lmer(
set1_rank.r ~ set1_prob.z + set1_amt.z+
(set1_prob.z + set1_amt.z | ResponseId),
data = master_df
)
set1.Count_M2 <- lmer(
set1_rank.r ~ set1_prob.z + set1_amt.z+ set1_touch_count_binary+
(set1_prob.z + set1_amt.z | ResponseId),
data = master_df
)
set1.Count_M3 <- lmer(
set1_rank.r ~ set1_prob.z + set1_amt.z+ set1_touch_count_binary+
(set1_prob.z + set1_amt.z + set1_touch_count_binary | ResponseId),
data = master_df
)
tab_model(
set1.Count_M1,
set1.Count_M2,
set1.Count_M3,
dv.labels = c("RE: Prob & Amt", "Adding Count FE", "Adding Count RE"),
pred.labels = c("(Intercept)", "Prob (z)", "Amt (z)", "Drag Count (Binary)"),
show.re.var = TRUE, # show random-effects variances
show.icc = TRUE, # show ICC
digits = 2
)
RE: Prob & Amt | Adding Count FE | Adding Count RE | |||||||
---|---|---|---|---|---|---|---|---|---|
Predictors | Estimates | CI | p | Estimates | CI | p | Estimates | CI | p |
(Intercept) | 3.50 | 3.45 – 3.55 | <0.001 | 3.20 | 3.10 – 3.29 | <0.001 | 3.19 | 3.09 – 3.29 | <0.001 |
Prob (z) | 0.23 | 0.07 – 0.39 | 0.005 | 0.24 | 0.08 – 0.39 | 0.002 | 0.26 | 0.11 – 0.41 | 0.001 |
Amt (z) | -0.47 | -0.62 – -0.33 | <0.001 | -0.43 | -0.57 – -0.29 | <0.001 | -0.44 | -0.58 – -0.30 | <0.001 |
Drag Count (Binary) | 0.49 | 0.36 – 0.62 | <0.001 | 0.49 | 0.35 – 0.63 | <0.001 | |||
Random Effects | |||||||||
σ2 | 0.77 | 0.75 | 0.73 | ||||||
τ00 | 0.00 ResponseId | 0.00 ResponseId | 0.06 ResponseId | ||||||
τ11 | 0.95 ResponseId.set1_prob.z | 0.83 ResponseId.set1_prob.z | 0.81 ResponseId.set1_prob.z | ||||||
0.72 ResponseId.set1_amt.z | 0.63 ResponseId.set1_amt.z | 0.64 ResponseId.set1_amt.z | |||||||
0.16 ResponseId.set1_touch_count_binary | |||||||||
ρ01 | -0.29 | 0.38 | |||||||
0.99 | -0.21 | ||||||||
-1.00 | |||||||||
N | 182 ResponseId | 182 ResponseId | 182 ResponseId | ||||||
Observations | 1092 | 1092 | 1092 | ||||||
Marginal R2 / Conditional R2 | 0.360 / NA | 0.392 / NA | 0.420 / NA |
Addressing Singularity Experiment
add rnorm (0,0.5) to (participant-specific) and all singularity issue resolved
# Add a model below just to showcase the adding a small value idea
set1.Count_M1.examine <- lmer(
set1_rank.r ~ set1_prob.z + set1_amt.z+
(set1_prob.z + set1_amt.z | ResponseId),
data = master_df.examine
)
set1.Count_M2.examine <- lmer(
set1_rank.r ~ set1_prob.z + set1_amt.z+ set1_touch_count_binary+
(set1_prob.z + set1_amt.z | ResponseId),
data = master_df.examine
)
set1.Count_M3.examine <- lmer(
set1_rank.r ~ set1_prob.z + set1_amt.z+ set1_touch_count_binary+
(set1_prob.z + set1_amt.z + set1_touch_count_binary | ResponseId),
data = master_df.examine
)
tab_model(
set1.Count_M1.examine,
set1.Count_M2.examine,
set1.Count_M3.examine,
dv.labels = c("RE: Prob & Amt", "Adding Count FE", "Adding Count RE"),
pred.labels = c("(Intercept)", "Prob (z)", "Amt (z)", "Drag Count"),
show.re.var = TRUE, # show random-effects variances
show.icc = TRUE, # show ICC
digits = 2
)
RE: Prob & Amt | Adding Count FE | Adding Count RE | |||||||
---|---|---|---|---|---|---|---|---|---|
Predictors | Estimates | CI | p | Estimates | CI | p | Estimates | CI | p |
(Intercept) | 3.48 | 3.41 – 3.55 | <0.001 | 3.13 | 3.01 – 3.25 | <0.001 | 3.12 | 2.99 – 3.25 | <0.001 |
Prob (z) | 0.24 | 0.08 – 0.40 | 0.004 | 0.24 | 0.09 – 0.40 | 0.002 | 0.27 | 0.12 – 0.42 | 0.001 |
Amt (z) | -0.47 | -0.61 – -0.32 | <0.001 | -0.42 | -0.56 – -0.27 | <0.001 | -0.43 | -0.57 – -0.29 | <0.001 |
Drag Count | 0.56 | 0.41 – 0.71 | <0.001 | 0.58 | 0.42 – 0.74 | <0.001 | |||
Random Effects | |||||||||
σ2 | 0.99 | 0.96 | 0.93 | ||||||
τ00 | 0.06 ResponseId | 0.08 ResponseId | 0.21 ResponseId | ||||||
τ11 | 0.89 ResponseId.set1_prob.z | 0.76 ResponseId.set1_prob.z | 0.74 ResponseId.set1_prob.z | ||||||
0.66 ResponseId.set1_amt.z | 0.58 ResponseId.set1_amt.z | 0.58 ResponseId.set1_amt.z | |||||||
0.17 ResponseId.set1_touch_count_binary | |||||||||
ρ01 | -0.17 | -0.18 | 0.19 | ||||||
0.05 | 0.05 | -0.17 | |||||||
-0.79 | |||||||||
ICC | 0.65 | 0.62 | 0.63 | ||||||
N | 182 ResponseId | 182 ResponseId | 182 ResponseId | ||||||
Observations | 1092 | 1092 | 1092 | ||||||
Marginal R2 / Conditional R2 | 0.134 / 0.694 | 0.165 / 0.685 | 0.182 / 0.696 |
get_participant_effects_lmer.Count <- function(model, id_var = "ResponseId") {
# Random effects matrix -> data.frame, keep names like (Intercept)
re_mat <- ranef(model)[[id_var]]
re_df <- data.frame(re_mat, check.names = FALSE)
re_df[[id_var]] <- rownames(re_mat)
# Fixed effects (named vector)
fe <- fixef(model)
# Union of all coefficient names (fixed + random)
all_terms <- union(names(fe), colnames(re_mat))
# Ensure every term exists; add fixed beta (default 0) + random deviation (default 0)
for (nm in all_terms) {
if (!nm %in% names(re_df)) re_df[[nm]] <- 0 # add missing column
if (nm %in% names(fe)) re_df[[nm]] <- re_df[[nm]] + unname(fe[[nm]])
}
re_df # output
}
re_M1 <- get_participant_effects_lmer(set1.Count_M1)
re_M2 <- get_participant_effects_lmer(set1.Count_M2)
re_M3 <- get_participant_effects_lmer(set1.Count_M3)
# Add a model label
re_M1$model <- "M1 - RE: Prob & Amt"
re_M2$model <- "M2 - Adding Count FE"
re_M3$model <- "M3 - Adding Count RE"
# Combine
re_all <- bind_rows(re_M1, re_M2, re_M3)
re_all_long <- re_all %>%
pivot_longer(
cols = -c(ResponseId, model),
names_to = "parameter",
values_to = "coefficient"
)
re_all_long <- re_all_long %>%
mutate(
parameter = recode(parameter,
`(Intercept)` = "(Intercept)",
`set1_prob.z` = "Prob (z)",
`set1_amt.z` = "Amt (z)",
`set1_touch_count_binary` = "Drag Count"
),
parameter = factor(parameter,
levels = c("(Intercept)", "Prob (z)", "Amt (z)", "Drag Count")
),
model = factor(model, levels = c("M1 - RE: Prob & Amt","M2 - Adding Count FE","M3 - Adding Count RE"))
)
# Summary stats for markers
summary_stats <- re_all_long %>%
group_by(model, parameter) %>%
summarise(
mean_val = mean(coefficient, na.rm = TRUE),
median_val = median(coefficient, na.rm = TRUE),
.groups = "drop"
)
# Color map: Intercept grey, slopes steelblue
param_colors <- c(
"(Intercept)" = "grey60",
"Prob (z)" = "steelblue",
"Amt (z)" = "steelblue",
"Drag Count" = "steelblue"
)
ggplot(re_all_long, aes(x = coefficient, fill = parameter)) +
geom_histogram(bins = 30, alpha = 0.85, color = "white") +
facet_grid(parameter ~ model, scales = "free") + # allow y to vary
scale_fill_manual(values = param_colors, guide = "none") +
theme_minimal() +
labs(
title = "Participant-specific coefficients",
x = "Coefficient value",
y = "Count"
)+
theme(
panel.border = element_rect(color = "black", fill = NA, linewidth = 0.5),
strip.text = element_text(face = "bold",size=12) # bold facet labels
)
predict_set2_by_id.Count <- function(model, data, label) {
# Random effects (per ID)
re_mat <- ranef(model)$ResponseId
re_df <- as.data.frame(re_mat)
re_df$ResponseId <- rownames(re_mat)
# Rename to consistent names if present
if ("(Intercept)" %in% colnames(re_mat)) re_df <- dplyr::rename(re_df, b_intercept = `(Intercept)`)
if ("set1_prob.z" %in% colnames(re_mat)) re_df <- dplyr::rename(re_df, b_prob = `set1_prob.z`)
if ("set1_amt.z" %in% colnames(re_mat)) re_df <- dplyr::rename(re_df, b_amt = `set1_amt.z`)
if ("set1_touch_count_binary" %in% colnames(re_mat)) re_df <- dplyr::rename(re_df, b_drag = `set1_touch_count_binary`)
# Ensure all needed columns exist (fill missing with 0)
for (nm in c("b_intercept","b_prob","b_amt","b_drag")) {
if (!nm %in% names(re_df)) re_df[[nm]] <- 0
}
# Fixed effects (missing terms default to 0)
fe <- fixef(model)
fe_int <- if ("(Intercept)" %in% names(fe)) unname(fe["(Intercept)"]) else 0
fe_prob <- if ("set1_prob.z" %in% names(fe)) unname(fe["set1_prob.z"]) else 0
fe_amt <- if ("set1_amt.z" %in% names(fe)) unname(fe["set1_amt.z"]) else 0
fe_drag <- if ("set1_touch_count_binary" %in% names(fe)) unname(fe["set1_touch_count_binary"]) else 0
df_pred <- data %>%
left_join(re_df, by = "ResponseId") %>%
mutate(
set2_rank_predicted =
(fe_int + b_intercept) +
(fe_prob + b_prob) * set2_prob.z +
(fe_amt + b_amt ) * set2_amt.z +
(fe_drag + b_drag) * set2_touch_count_binary
) %>%
group_by(ResponseId) %>%
# Remove NAs within participant before tie check & ranking
mutate(set2_rank_predicted = ifelse(is.finite(set2_rank_predicted), set2_rank_predicted, NA_real_)) %>%
# Tie flag, then average rank for ties
mutate(
has_tie = n_distinct(set2_rank_predicted) < n(),
set2_rank_pred_rank = rank(-set2_rank_predicted, ties.method = "average")
) %>%
ungroup()
# Message about ties
tie_summary <- df_pred %>%
filter(!is.na(set2_rank_predicted), !is.na(set2_rank)) %>%
distinct(ResponseId, has_tie) %>%
summarise(n_ids = n(), n_ties = sum(has_tie), .groups = "drop")
cat(sprintf("Tie handling: %d of %d participants had tied predicted scores; average ranks were used.",
tie_summary$n_ties, tie_summary$n_ids))
# Per-participant correlations
df_pred %>%
group_by(ResponseId) %>%
summarise(
r_pearson = (cor(set2_rank_pred_rank, set2_rank, method = "pearson")),
tau_kendall = (cor(set2_rank_pred_rank, set2_rank, method = "kendall")),
.groups = "drop"
) %>%
mutate(model = label)
}
# Run for all three models
cor_M1 <- predict_set2_by_id.Count(set1.Count_M1, master_df, "RE: Prob & Amt")
## Tie handling: 0 of 182 participants had tied predicted scores; average ranks were used.
cor_M2 <- predict_set2_by_id.Count(set1.Count_M2, master_df, "Adding Count FE")
## Tie handling: 0 of 182 participants had tied predicted scores; average ranks were used.
cor_M3 <- predict_set2_by_id.Count(set1.Count_M3, master_df, "Adding Count RE")
## Tie handling: 0 of 182 participants had tied predicted scores; average ranks were used.
cor_all <- bind_rows(cor_M1, cor_M2, cor_M3) %>%
mutate(model = factor(model, levels = c("RE: Prob & Amt","Adding Count FE","Adding Count RE")))
p_r.Count_lmer <- make_violin(cor_all, "r_pearson", "Per-participant Pearson r")
p_tau.Count_lmer <- make_violin(cor_all, "tau_kendall", "Per-participant Kendall \u03C4")
p_r.Count_lmer + p_tau.Count_lmer + plot_layout(ncol = 2)
compare_models_simple.Count <- function(df, value_col,
m1 = "RE: Prob & Amt",
m2 = "Adding Count FE",
m3 = "Adding Count RE",
n_perm = 10000) {
# Build wide table with explicit column selection (no NSE)
wide <- df %>%
select(ResponseId, model, value = all_of(value_col)) %>%
pivot_wider(names_from = model, values_from = value)
# Helper to run the three paired tests for a pair of columns
run_tests <- function(a_name, b_name, label) {
x <- wide[[a_name]]
y <- wide[[b_name]]
diffs <- y - x # positive => b_name improves over a_name
cat(sprintf("%s: pairs used = %d\n", label, length(diffs)))
# Paired t-test
t_res <- try(t.test(y, x, paired = TRUE), silent = TRUE)
if (inherits(t_res, "try-error")) {
t_p <- NA_real_; t_stat <- NA_real_; t_df <- NA_real_
} else {
t_p <- unname(t_res$p.value)
t_stat <- unname(t_res$statistic)
t_df <- unname(t_res$parameter)
}
# Wilcoxon signed-rank (paired)
w_res <- try(wilcox.test(y, x, paired = TRUE, exact = FALSE), silent = TRUE)
if (inherits(w_res, "try-error")) {
w_p <- NA_real_; w_V <- NA_real_
} else {
w_p <- unname(w_res$p.value)
w_V <- unname(w_res$statistic)
}
# Permutation (paired sign-flip) on mean diff
p_perm <- perm_paired_mean(x, y, n_perm = n_perm)
data.frame(
Comparison = label,
N_pairs = length(diffs),
Mean_Diff = if (length(diffs)) mean(diffs) else NA_real_,
Median_Diff = if (length(diffs)) median(diffs) else NA_real_,
Prop_Better = if (length(diffs)) mean(diffs > 0) else NA_real_,
Prop_Worse = if (length(diffs)) mean(diffs < 0) else NA_real_,
Prop_Tie = if (length(diffs)) mean(diffs == 0) else NA_real_,
t_test_p = t_p,
t_statistic = t_stat,
t_df = t_df,
wilcoxon_p = w_p,
wilcoxon_V = w_V,
permutation_p = p_perm,
check.names = FALSE
)
}
rbind(
run_tests(m1, m2, "Adding FE (M2 − M1)"),
run_tests(m1, m3, "Adding RE (M3 − M1)")
) %>%
mutate(
Mean_Diff = round(Mean_Diff, 3),
Median_Diff = round(Median_Diff, 3),
Prop_Better = round(Prop_Better, 3),
Prop_Worse = round(Prop_Worse, 3),
Prop_Tie = round(Prop_Tie, 3),
t_test_p = round(t_test_p, 4),
t_statistic = round(t_statistic, 3),
t_df = round(t_df),
wilcoxon_p = round(wilcoxon_p, 4),
permutation_p = round(permutation_p, 4)
)
}
# ---- Build the two tables (Pearson r and Kendall tau) ----
# Expect cor_all to have: ResponseId, model, r_pearson, tau_kendall
table_r.Count_lmer <- compare_models_simple.Count(cor_all, "r_pearson")
## Adding FE (M2 − M1): pairs used = 182
## Adding RE (M3 − M1): pairs used = 182
table_tau.Count_lmer <- compare_models_simple.Count(cor_all, "tau_kendall")
## Adding FE (M2 − M1): pairs used = 182
## Adding RE (M3 − M1): pairs used = 182
# Heterogenity in order but not in distance? very interesting
kable(table_r.Count_lmer,
caption = "<b><span style='font-size: 18px;color:black;'>Model comparison using Pearson r (Count)</span></b>",
digits = 4, align = "lrrrrrrrrrr", escape = FALSE) %>%
kable_styling(full_width = FALSE)
Comparison | N_pairs | Mean_Diff | Median_Diff | Prop_Better | Prop_Worse | Prop_Tie | t_test_p | t_statistic | t_df | wilcoxon_p | wilcoxon_V | permutation_p |
---|---|---|---|---|---|---|---|---|---|---|---|---|
Adding FE (M2 − M1) | 182 | 0.011 | 0 | 0.126 | 0.104 | 0.769 | 0.1520 | 1.438 | 181 | 0.3571 | 525.5 | 0.1646 |
Adding RE (M3 − M1) | 182 | 0.020 | 0 | 0.148 | 0.099 | 0.753 | 0.0521 | 1.955 | 181 | 0.1072 | 660.5 | 0.0547 |
kable(table_tau.Count_lmer,
caption = "<b><span style='font-size: 18px;color:black;'>Model comparison using Kendall &tau (Count);</span></b>",
digits = 4, align = "lrrrrrrrrrr", escape = FALSE) %>%
kable_styling(full_width = FALSE)
Comparison | N_pairs | Mean_Diff | Median_Diff | Prop_Better | Prop_Worse | Prop_Tie | t_test_p | t_statistic | t_df | wilcoxon_p | wilcoxon_V | permutation_p |
---|---|---|---|---|---|---|---|---|---|---|---|---|
Adding FE (M2 − M1) | 182 | 0.004 | 0 | 0.115 | 0.104 | 0.780 | 0.5331 | 0.624 | 181 | 0.2353 | 498 | 0.4723 |
Adding RE (M3 − M1) | 182 | 0.011 | 0 | 0.137 | 0.099 | 0.764 | 0.2293 | 1.206 | 181 | 0.1024 | 608 | 0.2006 |
set1.distance_M1 <- clmm(
as.factor(set1_rank.r) ~ set1_prob.z + set1_amt.z+
(set1_prob.z + set1_amt.z | ResponseId),
data = master_df
)
set1.distance_M2 <- clmm(
as.factor(set1_rank.r) ~ set1_prob.z + set1_amt.z+ set1_drag_distance.r+
(set1_prob.z + set1_amt.z | ResponseId),
data = master_df
)
set1.distance_M3 <- clmm(
as.factor(set1_rank.r) ~ set1_prob.z + set1_amt.z+ set1_drag_distance.r+
(set1_prob.z + set1_amt.z + set1_drag_distance.r | ResponseId),
data = master_df
)
tab_model(
set1.distance_M1,
set1.distance_M2,
set1.distance_M3,
dv.labels = c("RE: Prob & Amt", "Adding Distance FE", "Adding Distance RE"),
pred.labels = c("(Intercept)", "Prob (z)", "Amt (z)", "Drag distance"),
show.re.var = TRUE, # show random-effects variances
show.icc = TRUE, # show ICC
digits = 2,
transform= NULL
)
RE: Prob & Amt | Adding Distance FE | Adding Distance RE | |||||||
---|---|---|---|---|---|---|---|---|---|
Predictors | Log-Odds | CI | p | Log-Odds | CI | p | Log-Odds | CI | p |
1|2 | -4.15 | -4.53 – -3.77 | <0.001 | -3.60 | -3.99 – -3.21 | <0.001 | -3.65 | -4.08 – -3.22 | <0.001 |
2|3 | -1.76 | -2.01 – -1.50 | <0.001 | -1.15 | -1.42 – -0.88 | <0.001 | -1.16 | -1.46 – -0.86 | <0.001 |
3|4 | 0.14 | -0.08 – 0.36 | 0.220 | 0.84 | 0.58 – 1.10 | <0.001 | 0.87 | 0.58 – 1.16 | <0.001 |
4|5 | 1.95 | 1.69 – 2.21 | <0.001 | 2.74 | 2.43 – 3.06 | <0.001 | 2.81 | 2.45 – 3.17 | <0.001 |
5|6 | 4.08 | 3.72 – 4.44 | <0.001 | 4.95 | 4.52 – 5.38 | <0.001 | 5.10 | 4.59 – 5.60 | <0.001 |
set1_prob.z | 0.60 | 0.16 – 1.04 | 0.008 | 0.49 | 0.07 – 0.90 | 0.021 | 0.61 | 0.19 – 1.04 | 0.005 |
set1_amt.z | -1.34 | -1.75 – -0.93 | <0.001 | -1.26 | -1.66 – -0.86 | <0.001 | -1.39 | -1.79 – -0.99 | <0.001 |
set1_drag_distance.r | 0.59 | 0.48 – 0.70 | <0.001 | 0.60 | 0.47 – 0.73 | <0.001 | |||
Random Effects | |||||||||
σ2 | 3.29 | 3.29 | 3.29 | ||||||
τ00 | 0.07 ResponseId | 0.04 ResponseId | 0.33 ResponseId | ||||||
τ11 | 6.52 ResponseId.set1_prob.z | 5.44 ResponseId.set1_prob.z | 5.56 ResponseId.set1_prob.z | ||||||
4.88 ResponseId.set1_amt.z | 4.10 ResponseId.set1_amt.z | 4.02 ResponseId.set1_amt.z | |||||||
0.16 ResponseId.set1_drag_distance.r | |||||||||
ρ01 | 0.69 | 0.74 | 0.83 | ||||||
0.56 | 0.55 | -0.26 | |||||||
-0.93 | |||||||||
ICC | 0.80 | 0.76 | 0.76 | ||||||
N | 182 ResponseId | 182 ResponseId | 182 ResponseId | ||||||
Observations | 1092 | 1092 | 1092 | ||||||
Marginal R2 / Conditional R2 | 0.167 / 0.833 | 0.234 / 0.819 | 0.275 / 0.827 |
get_participant_effects_clmm <- function(model, id_var = "ResponseId") {
# Random effects
re_list <- ranef(model)
re <- as.data.frame(re_list[[id_var]])
re[[id_var]] <- rownames(re_list[[id_var]])
# Fixed effects: only betas, no thresholds
fe_beta <- model$beta
# Ensure each fixed-effect name exists in RE; fill missing with 0
for (nm in names(fe_beta)) {
if (!nm %in% names(re)) re[[nm]] <- 0
re[[nm]] <- re[[nm]] + unname(fe_beta[[nm]])
}
re
}
# Usage:
re_M1 <- get_participant_effects_clmm(set1.distance_M1)
re_M2 <- get_participant_effects_clmm(set1.distance_M2)
re_M3 <- get_participant_effects_clmm(set1.distance_M3)
# Add a model label
re_M1$model <- "M1 - RE: Prob & Amt"
re_M2$model <- "M2 - Adding Distance FE"
re_M3$model <- "M3 - Adding Distance RE"
# Combine
re_all <- bind_rows(re_M1, re_M2, re_M3)
re_all_long <- re_all %>%
pivot_longer(
cols = -c(ResponseId, model),
names_to = "parameter",
values_to = "coefficient"
)
re_all_long <- re_all_long %>%
mutate(
parameter = recode(parameter,
`(Intercept)` = "(Intercept)",
`set1_prob.z` = "Prob (z)",
`set1_amt.z` = "Amt (z)",
`set1_drag_distance.r` = "Drag distance"
),
parameter = factor(parameter,
levels = c("(Intercept)", "Prob (z)", "Amt (z)", "Drag distance")
),
model = factor(model, levels = c("M1 - RE: Prob & Amt","M2 - Adding Distance FE","M3 - Adding Distance RE"))
)
# Summary stats for markers
summary_stats <- re_all_long %>%
group_by(model, parameter) %>%
summarise(
mean_val = mean(coefficient, na.rm = TRUE),
median_val = median(coefficient, na.rm = TRUE),
.groups = "drop"
)
# Color map: Intercept grey, slopes steelblue
param_colors <- c(
"(Intercept)" = "grey60",
"Prob (z)" = "steelblue",
"Amt (z)" = "steelblue",
"Drag distance" = "steelblue"
)
ggplot(re_all_long, aes(x = coefficient, fill = parameter)) +
geom_histogram(bins = 30, alpha = 0.85, color = "white") +
facet_grid(parameter ~ model, scales = "free") + # allow y to vary
scale_fill_manual(values = param_colors, guide = "none") +
theme_minimal() +
labs(
title = "Participant-specific coefficients",
x = "Coefficient value",
y = "Count"
)+
theme(
panel.border = element_rect(color = "black", fill = NA, linewidth = 0.5),
strip.text = element_text(face = "bold",size=12) # bold facet labels
)
predict_set2_by_id_clmm.Distace <- function(model, data, label) {
# --- Random effects (usually only intercepts in clmm)
re_list <- ranef(model)
if (is.list(re_list) && !is.null(re_list$ResponseId)) {
re_mat <- re_list$ResponseId
re_df <- data.frame(re_mat, check.names = FALSE)
re_df$ResponseId <- rownames(re_mat)
} else {
# no RE present
re_df <- data.frame(ResponseId = unique(data$ResponseId), check.names = FALSE)
}
if ("(Intercept)" %in% colnames(re_mat)) re_df <- dplyr::rename(re_df, b_intercept = `(Intercept)`)
if ("set1_prob.z" %in% colnames(re_mat)) re_df <- dplyr::rename(re_df, b_prob = `set1_prob.z`)
if ("set1_amt.z" %in% colnames(re_mat)) re_df <- dplyr::rename(re_df, b_amt = `set1_amt.z`)
if ("set1_drag_distance.r" %in% colnames(re_mat)) re_df <- dplyr::rename(re_df, b_drag = `set1_drag_distance.r`)
# Ensure all needed columns exist (fill missing with 0)
for (nm in c("b_intercept","b_prob","b_amt","b_drag")) {
if (!nm %in% names(re_df)) re_df[[nm]] <- 0
}
# --- Fixed effects: use model$beta (no thresholds)
fe <- model$beta
fe_prob <- if ("set1_prob.z" %in% names(fe)) unname(fe["set1_prob.z"]) else 0
fe_amt <- if ("set1_amt.z" %in% names(fe)) unname(fe["set1_amt.z"]) else 0
fe_drag <- if ("set1_drag_distance.r" %in% names(fe)) unname(fe["set1_drag_distance.r"]) else 0
# NOTE: clmm has thresholds instead of a global fixed intercept; we do NOT add a fixed intercept.
# We only use participant-specific random intercept shifts (b_intercept) on the latent scale.
df_pred <- data %>%
left_join(re_df, by = "ResponseId") %>%
mutate(
# latent score for ordering (no fixed intercept term)
latent_score =
b_intercept +
(fe_prob+b_prob) * set2_prob.z +
(fe_amt+b_amt) * set2_amt.z +
(fe_drag+b_drag) * set2_drag_distance.r
) %>%
group_by(ResponseId) %>%
# 2) Handle ties via average ranks (and keep NAs out of ranking)
mutate(
# flag ties within participant (ignore NAs)
has_tie = {
x <- latent_score[is.finite(latent_score)]
dplyr::n_distinct(x) < length(x)
},
# average ranks; higher score = better (rank 1)
set2_rank_pred_rank = rank(-latent_score, ties.method = "average")
) %>%
ungroup()
# 3) Tie summary message
tie_summary <- df_pred %>%
distinct(ResponseId, has_tie) %>%
summarise(n_ids = dplyr::n(), n_ties = sum(has_tie), .groups = "drop")
cat(sprintf(
"Tie handling: %d of %d participants had tied predicted scores; average ranks were used.\n",
tie_summary$n_ties, tie_summary$n_ids
))
# 4) Per-participant correlations (complete cases only)
df_pred %>%
group_by(ResponseId) %>%
summarise(
r_pearson = cor(set2_rank_pred_rank, set2_rank, method = "pearson", use = "complete.obs"),
tau_kendall = cor(set2_rank_pred_rank, set2_rank, method = "kendall", use = "complete.obs"),
.groups = "drop"
) %>%
mutate(model = label)
}
# Run for all three models
cor_M1 <- predict_set2_by_id_clmm.Distace(set1.distance_M1, master_df, "RE: Prob & Amt")
## Tie handling: 0 of 182 participants had tied predicted scores; average ranks were used.
cor_M2 <- predict_set2_by_id_clmm.Distace(set1.distance_M2, master_df, "Adding Distance FE")
## Tie handling: 0 of 182 participants had tied predicted scores; average ranks were used.
cor_M3 <- predict_set2_by_id_clmm.Distace(set1.distance_M3, master_df, "Adding Distance RE")
## Tie handling: 0 of 182 participants had tied predicted scores; average ranks were used.
cor_all <- bind_rows(cor_M1, cor_M2, cor_M3) %>%
mutate(model = factor(model, levels = c("RE: Prob & Amt","Adding Distance FE","Adding Distance RE")))
p_r.Distance_clmm <- make_violin(cor_all, "r_pearson", "Per-participant Pearson r")
p_tau.Distance_clmm <- make_violin(cor_all, "tau_kendall", "Per-participant Kendall \u03C4")
p_r.Distance_clmm + p_tau.Distance_clmm + plot_layout(ncol = 2)
table_r.distance_CLMM <- compare_models_simple.Distance(cor_all, "r_pearson")
## Adding FE (M2 − M1): pairs used = 182
## Adding RE (M3 − M1): pairs used = 182
table_tau.distance_CLMM <- compare_models_simple.Distance(cor_all, "tau_kendall")
## Adding FE (M2 − M1): pairs used = 182
## Adding RE (M3 − M1): pairs used = 182
kable(table_r.distance_CLMM,
caption = "<b><span style='font-size: 18px;color:black;'>Model comparison using Pearson r (Distance)</span></b>",
digits = 4, align = "lrrrrrrrrrr", escape = FALSE) %>%
kable_styling(full_width = FALSE)
Comparison | N_pairs | Mean_Diff | Median_Diff | Prop_Better | Prop_Worse | Prop_Tie | t_test_p | t_statistic | t_df | wilcoxon_p | wilcoxon_V | permutation_p |
---|---|---|---|---|---|---|---|---|---|---|---|---|
Adding FE (M2 − M1) | 182 | 0.052 | 0 | 0.253 | 0.154 | 0.593 | 1e-04 | 3.939 | 181 | 4e-04 | 2049.0 | 0 |
Adding RE (M3 − M1) | 182 | 0.068 | 0 | 0.242 | 0.181 | 0.577 | 1e-04 | 4.023 | 181 | 5e-04 | 2184.5 | 0 |
kable(table_tau.distance_CLMM,
caption = "<b><span style='font-size: 18px;color:black;'>Model comparison using Kendall τ (Distance)</span></b>",
digits = 4, align = "lrrrrrrrrrr", escape = FALSE) %>%
kable_styling(full_width = FALSE)
Comparison | N_pairs | Mean_Diff | Median_Diff | Prop_Better | Prop_Worse | Prop_Tie | t_test_p | t_statistic | t_df | wilcoxon_p | wilcoxon_V | permutation_p |
---|---|---|---|---|---|---|---|---|---|---|---|---|
Adding FE (M2 − M1) | 182 | 0.032 | 0 | 0.242 | 0.143 | 0.615 | 0.0065 | 2.752 | 181 | 0.0044 | 1723 | 0.0061 |
Adding RE (M3 − M1) | 182 | 0.044 | 0 | 0.236 | 0.165 | 0.599 | 0.0033 | 2.976 | 181 | 0.0025 | 1897 | 0.0033 |
set1.order_M1 <- clmm(
as.factor(set1_rank.r) ~ set1_prob.z + set1_amt.z+
(set1_prob.z + set1_amt.z | ResponseId),
data = master_df
)
set1.order_M2 <- clmm(
as.factor(set1_rank.r) ~ set1_prob.z + set1_amt.z+ set1_order+
(set1_prob.z + set1_amt.z | ResponseId),
data = master_df
)
set1.order_M3 <- clmm(
as.factor(set1_rank.r) ~ set1_prob.z + set1_amt.z+ set1_order+
(set1_prob.z + set1_amt.z + set1_order | ResponseId),
data = master_df
)
tab_model(
set1.order_M1,
set1.order_M2,
set1.order_M3,
dv.labels = c("RE: Prob & Amt", "Adding Order FE", "Adding Order RE"),
pred.labels = c("(Intercept)", "Prob (z)", "Amt (z)", "Drag Order"),
show.re.var = TRUE, # show random-effects variances
show.icc = TRUE, # show ICC
digits = 2,
transform= NULL
)
RE: Prob & Amt | Adding Order FE | Adding Order RE | |||||||
---|---|---|---|---|---|---|---|---|---|
Predictors | Log-Odds | CI | p | Log-Odds | CI | p | Log-Odds | CI | p |
1|2 | -4.15 | -4.53 – -3.77 | <0.001 | -6.65 | -7.28 – -6.01 | <0.001 | -7.34 | -8.20 – -6.48 | <0.001 |
2|3 | -1.76 | -2.01 – -1.50 | <0.001 | -4.22 | -4.75 – -3.69 | <0.001 | -4.67 | -5.41 – -3.93 | <0.001 |
3|4 | 0.14 | -0.08 – 0.36 | 0.220 | -2.22 | -2.70 – -1.75 | <0.001 | -2.43 | -3.11 – -1.76 | <0.001 |
4|5 | 1.95 | 1.69 – 2.21 | <0.001 | -0.27 | -0.72 – 0.18 | 0.239 | -0.23 | -0.87 – 0.41 | 0.483 |
5|6 | 4.08 | 3.72 – 4.44 | <0.001 | 1.93 | 1.44 – 2.42 | <0.001 | 2.32 | 1.63 – 3.00 | <0.001 |
set1_prob.z | 0.60 | 0.16 – 1.04 | 0.008 | 0.35 | -0.05 – 0.75 | 0.082 | 0.74 | 0.27 – 1.21 | 0.002 |
set1_amt.z | -1.34 | -1.75 – -0.93 | <0.001 | -1.34 | -1.74 – -0.94 | <0.001 | -1.54 | -1.97 – -1.10 | <0.001 |
set1_order | -0.74 | -0.87 – -0.61 | <0.001 | -0.79 | -0.99 – -0.60 | <0.001 | |||
Random Effects | |||||||||
σ2 | 3.29 | 3.29 | 3.29 | ||||||
τ00 | 0.07 ResponseId | 0.05 ResponseId | 7.22 ResponseId | ||||||
τ11 | 6.52 ResponseId.set1_prob.z | 4.79 ResponseId.set1_prob.z | 6.67 ResponseId.set1_prob.z | ||||||
4.88 ResponseId.set1_amt.z | 4.30 ResponseId.set1_amt.z | 4.97 ResponseId.set1_amt.z | |||||||
0.72 ResponseId.set1_order | |||||||||
ρ01 | 0.69 | 0.74 | -0.64 | ||||||
0.56 | 0.55 | 0.45 | |||||||
-0.99 | |||||||||
ICC | 0.80 | 0.76 | 0.80 | ||||||
N | 182 ResponseId | 182 ResponseId | 182 ResponseId | ||||||
Observations | 1092 | 1092 | 1092 | ||||||
Marginal R2 / Conditional R2 | 0.167 / 0.833 | 0.250 / 0.817 | 0.301 / 0.858 |
re_M1 <- get_participant_effects_clmm(set1.order_M1)
re_M2 <- get_participant_effects_clmm(set1.order_M2)
re_M3 <- get_participant_effects_clmm(set1.order_M3)
# Add a model label
re_M1$model <- "M1 - RE: Prob & Amt"
re_M2$model <- "M2 - Adding Order FE"
re_M3$model <- "M3 - Adding Order RE"
# Combine
re_all <- bind_rows(re_M1, re_M2, re_M3)
re_all_long <- re_all %>%
pivot_longer(
cols = -c(ResponseId, model),
names_to = "parameter",
values_to = "coefficient"
)
re_all_long <- re_all_long %>%
mutate(
parameter = recode(parameter,
`(Intercept)` = "(Intercept)",
`set1_prob.z` = "Prob (z)",
`set1_amt.z` = "Amt (z)",
`set1_order` = "Drag Order"
),
parameter = factor(parameter,
levels = c("(Intercept)", "Prob (z)", "Amt (z)", "Drag Order")
),
model = factor(model, levels = c("M1 - RE: Prob & Amt","M2 - Adding Order FE","M3 - Adding Order RE"))
)
# Summary stats for markers
summary_stats <- re_all_long %>%
group_by(model, parameter) %>%
summarise(
mean_val = mean(coefficient, na.rm = TRUE),
median_val = median(coefficient, na.rm = TRUE),
.groups = "drop"
)
# Color map: Intercept grey, slopes steelblue
param_colors <- c(
"(Intercept)" = "grey60",
"Prob (z)" = "steelblue",
"Amt (z)" = "steelblue",
"Drag Order" = "steelblue"
)
ggplot(re_all_long, aes(x = coefficient, fill = parameter)) +
geom_histogram(bins = 30, alpha = 0.85, color = "white") +
facet_grid(parameter ~ model, scales = "free") + # allow y to vary
scale_fill_manual(values = param_colors, guide = "none") +
theme_minimal() +
labs(
title = "Participant-specific coefficients",
x = "Coefficient value",
y = "Count"
)+
theme(
panel.border = element_rect(color = "black", fill = NA, linewidth = 0.5),
strip.text = element_text(face = "bold",size=12) # bold facet labels
)
predict_set2_by_id_clmm.Order <- function(model, data, label) {
# --- Random effects (usually only intercepts in clmm)
re_list <- ranef(model)
if (is.list(re_list) && !is.null(re_list$ResponseId)) {
re_mat <- re_list$ResponseId
re_df <- data.frame(re_mat, check.names = FALSE)
re_df$ResponseId <- rownames(re_mat)
} else {
# no RE present
re_df <- data.frame(ResponseId = unique(data$ResponseId), check.names = FALSE)
}
if ("(Intercept)" %in% colnames(re_mat)) re_df <- dplyr::rename(re_df, b_intercept = `(Intercept)`)
if ("set1_prob.z" %in% colnames(re_mat)) re_df <- dplyr::rename(re_df, b_prob = `set1_prob.z`)
if ("set1_amt.z" %in% colnames(re_mat)) re_df <- dplyr::rename(re_df, b_amt = `set1_amt.z`)
if ("set1_order" %in% colnames(re_mat)) re_df <- dplyr::rename(re_df, b_drag = `set1_order`)
# Ensure all needed columns exist (fill missing with 0)
for (nm in c("b_intercept","b_prob","b_amt","b_drag")) {
if (!nm %in% names(re_df)) re_df[[nm]] <- 0
}
# --- Fixed effects: use model$beta (no thresholds)
fe <- model$beta
fe_prob <- if ("set1_prob.z" %in% names(fe)) unname(fe["set1_prob.z"]) else 0
fe_amt <- if ("set1_amt.z" %in% names(fe)) unname(fe["set1_amt.z"]) else 0
fe_drag <- if ("set1_order" %in% names(fe)) unname(fe["set1_order"]) else 0
# NOTE: clmm has thresholds instead of a global fixed intercept; we do NOT add a fixed intercept.
# We only use participant-specific random intercept shifts (b_intercept) on the latent scale.
df_pred <- data %>%
left_join(re_df, by = "ResponseId") %>%
mutate(
# latent score for ordering (no fixed intercept term)
latent_score =
b_intercept +
(fe_prob+b_prob) * set2_prob.z +
(fe_amt+b_amt) * set2_amt.z +
(fe_drag+b_drag) * set2_order
) %>%
group_by(ResponseId) %>%
# 2) Handle ties via average ranks (and keep NAs out of ranking)
mutate(
# flag ties within participant (ignore NAs)
has_tie = {
x <- latent_score[is.finite(latent_score)]
dplyr::n_distinct(x) < length(x)
},
# average ranks; higher score = better (rank 1)
set2_rank_pred_rank = rank(-latent_score, ties.method = "average")
) %>%
ungroup()
# 3) Tie summary message
tie_summary <- df_pred %>%
distinct(ResponseId, has_tie) %>%
summarise(n_ids = dplyr::n(), n_ties = sum(has_tie), .groups = "drop")
cat(sprintf(
"Tie handling: %d of %d participants had tied predicted scores; average ranks were used.\n",
tie_summary$n_ties, tie_summary$n_ids
))
# 4) Per-participant correlations (complete cases only)
df_pred %>%
group_by(ResponseId) %>%
summarise(
r_pearson = cor(set2_rank_pred_rank, set2_rank, method = "pearson", use = "complete.obs"),
tau_kendall = cor(set2_rank_pred_rank, set2_rank, method = "kendall", use = "complete.obs"),
.groups = "drop"
) %>%
mutate(model = label)
}
# Run for all three models
cor_M1 <- predict_set2_by_id_clmm.Order(set1.order_M1, master_df, "RE: Prob & Amt")
## Tie handling: 0 of 182 participants had tied predicted scores; average ranks were used.
cor_M2 <- predict_set2_by_id_clmm.Order(set1.order_M2, master_df, "Adding Order FE")
## Tie handling: 0 of 182 participants had tied predicted scores; average ranks were used.
cor_M3 <- predict_set2_by_id_clmm.Order(set1.order_M3, master_df, "Adding Order RE")
## Tie handling: 0 of 182 participants had tied predicted scores; average ranks were used.
cor_all <- bind_rows(cor_M1, cor_M2, cor_M3) %>%
mutate(model = factor(model, levels = c("RE: Prob & Amt","Adding Order FE","Adding Order RE")))
p_r.Order_clmm <- make_violin(cor_all, "r_pearson", "Per-participant Pearson r")
p_tau.Order_clmm <- make_violin(cor_all, "tau_kendall", "Per-participant Kendall \u03C4")
p_r.Order_clmm + p_tau.Order_clmm + plot_layout(ncol = 2)
table_r.order_CLMM <- compare_models_simple.Order(cor_all, "r_pearson")
## Adding FE (M2 − M1): pairs used = 182
## Adding RE (M3 − M1): pairs used = 182
table_tau.order_CLMM <- compare_models_simple.Order(cor_all, "tau_kendall")
## Adding FE (M2 − M1): pairs used = 182
## Adding RE (M3 − M1): pairs used = 182
kable(table_r.order_CLMM,
caption = "<b><span style='font-size: 18px;color:black;'>Model comparison using Pearson r (Order)</span></b>",
digits = 4, align = "lrrrrrrrrrr", escape = FALSE) %>%
kable_styling(full_width = FALSE)
Comparison | N_pairs | Mean_Diff | Median_Diff | Prop_Better | Prop_Worse | Prop_Tie | t_test_p | t_statistic | t_df | wilcoxon_p | wilcoxon_V | permutation_p |
---|---|---|---|---|---|---|---|---|---|---|---|---|
Adding FE (M2 − M1) | 182 | 0.023 | 0 | 0.225 | 0.231 | 0.544 | 0.0954 | 1.676 | 181 | 0.0874 | 2116.5 | 0.0901 |
Adding RE (M3 − M1) | 182 | 0.063 | 0 | 0.236 | 0.148 | 0.615 | 0.0078 | 2.692 | 181 | 0.0030 | 1749.5 | 0.0067 |
kable(table_tau.order_CLMM,
caption = "<b><span style='font-size: 18px;color:black;'>Model comparison using Kendall τ (Order)</span></b>",
digits = 4, align = "lrrrrrrrrrr", escape = FALSE) %>%
kable_styling(full_width = FALSE)
Comparison | N_pairs | Mean_Diff | Median_Diff | Prop_Better | Prop_Worse | Prop_Tie | t_test_p | t_statistic | t_df | wilcoxon_p | wilcoxon_V | permutation_p |
---|---|---|---|---|---|---|---|---|---|---|---|---|
Adding FE (M2 − M1) | 182 | 0.007 | 0 | 0.22 | 0.231 | 0.549 | 0.5799 | 0.554 | 181 | 0.2341 | 1954 | 0.5423 |
Adding RE (M3 − M1) | 182 | 0.051 | 0 | 0.22 | 0.154 | 0.626 | 0.0149 | 2.457 | 181 | 0.0091 | 1599 | 0.0144 |
set1.Count_M1 <- clmm(
as.factor(set1_rank.r) ~ set1_prob.z + set1_amt.z+
(set1_prob.z + set1_amt.z | ResponseId),
data = master_df
)
set1.Count_M2 <- clmm(
as.factor(set1_rank.r) ~ set1_prob.z + set1_amt.z+ set1_touch_count_binary+
(set1_prob.z + set1_amt.z | ResponseId),
data = master_df
)
set1.Count_M3 <- clmm(
as.factor(set1_rank.r) ~ set1_prob.z + set1_amt.z+ set1_touch_count_binary+
(set1_prob.z + set1_amt.z + set1_touch_count_binary | ResponseId),
data = master_df
)
tab_model(
set1.Count_M1,
set1.Count_M2,
set1.Count_M3,
dv.labels = c("RE: Prob & Amt", "Adding Count FE", "Adding Count RE"),
pred.labels = c("(Intercept)", "Prob (z)", "Amt (z)", "Drag Count"),
show.re.var = TRUE, # show random-effects variances
show.icc = TRUE, # show ICC
digits = 2,
transform= NULL
)
RE: Prob & Amt | Adding Count FE | Adding Count RE | |||||||
---|---|---|---|---|---|---|---|---|---|
Predictors | Log-Odds | CI | p | Log-Odds | CI | p | Log-Odds | CI | p |
1|2 | -4.15 | -4.53 – -3.77 | <0.001 | -3.61 | -4.02 – -3.21 | <0.001 | -3.64 | -4.07 – -3.21 | <0.001 |
2|3 | -1.76 | -2.01 – -1.50 | <0.001 | -1.17 | -1.48 – -0.86 | <0.001 | -1.16 | -1.48 – -0.84 | <0.001 |
3|4 | 0.14 | -0.08 – 0.36 | 0.220 | 0.78 | 0.48 – 1.08 | <0.001 | 0.81 | 0.48 – 1.15 | <0.001 |
4|5 | 1.95 | 1.69 – 2.21 | <0.001 | 2.62 | 2.28 – 2.96 | <0.001 | 2.66 | 2.27 – 3.05 | <0.001 |
5|6 | 4.08 | 3.72 – 4.44 | <0.001 | 4.72 | 4.30 – 5.15 | <0.001 | 4.78 | 4.28 – 5.27 | <0.001 |
set1_prob.z | 0.60 | 0.16 – 1.04 | 0.008 | 0.57 | 0.14 – 1.00 | 0.009 | 0.59 | 0.16 – 1.02 | 0.007 |
set1_amt.z | -1.34 | -1.75 – -0.93 | <0.001 | -1.30 | -1.70 – -0.89 | <0.001 | -1.36 | -1.78 – -0.94 | <0.001 |
set1_touch_count_binary | 0.97 | 0.67 – 1.27 | <0.001 | 0.99 | 0.66 – 1.32 | <0.001 | |||
Random Effects | |||||||||
σ2 | 3.29 | 3.29 | 3.29 | ||||||
τ00 | 0.07 ResponseId | 0.06 ResponseId | 0.23 ResponseId | ||||||
τ11 | 6.52 ResponseId.set1_prob.z | 6.14 ResponseId.set1_prob.z | 6.14 ResponseId.set1_prob.z | ||||||
4.88 ResponseId.set1_amt.z | 4.59 ResponseId.set1_amt.z | 4.67 ResponseId.set1_amt.z | |||||||
0.42 ResponseId.set1_touch_count_binary | |||||||||
ρ01 | 0.69 | 0.65 | 0.75 | ||||||
0.56 | 0.61 | -0.13 | |||||||
-0.86 | |||||||||
ICC | 0.80 | 0.79 | 0.79 | ||||||
N | 182 ResponseId | 182 ResponseId | 182 ResponseId | ||||||
Observations | 1092 | 1092 | 1092 | ||||||
Marginal R2 / Conditional R2 | 0.167 / 0.833 | 0.186 / 0.828 | 0.199 / 0.830 |
re_M1 <- get_participant_effects_clmm(set1.Count_M1)
re_M2 <- get_participant_effects_clmm(set1.Count_M2)
re_M3 <- get_participant_effects_clmm(set1.Count_M3)
# Add a model label
re_M1$model <- "M1 - RE: Prob & Amt"
re_M2$model <- "M2 - Adding Count FE"
re_M3$model <- "M3 - Adding Count RE"
# Combine
re_all <- bind_rows(re_M1, re_M2, re_M3)
re_all_long <- re_all %>%
pivot_longer(
cols = -c(ResponseId, model),
names_to = "parameter",
values_to = "coefficient"
)
re_all_long <- re_all_long %>%
mutate(
parameter = recode(parameter,
`(Intercept)` = "(Intercept)",
`set1_prob.z` = "Prob (z)",
`set1_amt.z` = "Amt (z)",
`set1_touch_count_binary` = "Drag Count"
),
parameter = factor(parameter,
levels = c("(Intercept)", "Prob (z)", "Amt (z)", "Drag Count")
),
model = factor(model, levels = c("M1 - RE: Prob & Amt","M2 - Adding Count FE","M3 - Adding Count RE"))
)
# Summary stats for markers
summary_stats <- re_all_long %>%
group_by(model, parameter) %>%
summarise(
mean_val = mean(coefficient, na.rm = TRUE),
median_val = median(coefficient, na.rm = TRUE),
.groups = "drop"
)
# Color map: Intercept grey, slopes steelblue
param_colors <- c(
"(Intercept)" = "grey60",
"Prob (z)" = "steelblue",
"Amt (z)" = "steelblue",
"Drag Count" = "steelblue"
)
ggplot(re_all_long, aes(x = coefficient, fill = parameter)) +
geom_histogram(bins = 30, alpha = 0.85, color = "white") +
facet_grid(parameter ~ model, scales = "free") + # allow y to vary
scale_fill_manual(values = param_colors, guide = "none") +
theme_minimal() +
labs(
title = "Participant-specific coefficients",
x = "Coefficient value",
y = "Count"
)+
theme(
panel.border = element_rect(color = "black", fill = NA, linewidth = 0.5),
strip.text = element_text(face = "bold",size=12) # bold facet labels
)
predict_set2_by_id_clmm.Count <- function(model, data, label) {
# --- Random effects (usually only intercepts in clmm)
re_list <- ranef(model)
if (is.list(re_list) && !is.null(re_list$ResponseId)) {
re_mat <- re_list$ResponseId
re_df <- data.frame(re_mat, check.names = FALSE)
re_df$ResponseId <- rownames(re_mat)
} else {
# no RE present
re_df <- data.frame(ResponseId = unique(data$ResponseId), check.names = FALSE)
}
if ("(Intercept)" %in% colnames(re_mat)) re_df <- dplyr::rename(re_df, b_intercept = `(Intercept)`)
if ("set1_prob.z" %in% colnames(re_mat)) re_df <- dplyr::rename(re_df, b_prob = `set1_prob.z`)
if ("set1_amt.z" %in% colnames(re_mat)) re_df <- dplyr::rename(re_df, b_amt = `set1_amt.z`)
if ("set1_touch_count_binary" %in% colnames(re_mat)) re_df <- dplyr::rename(re_df, b_drag = `set1_touch_count_binary`)
# Ensure all needed columns exist (fill missing with 0)
for (nm in c("b_intercept","b_prob","b_amt","b_drag")) {
if (!nm %in% names(re_df)) re_df[[nm]] <- 0
}
# --- Fixed effects: use model$beta (no thresholds)
fe <- model$beta
fe_prob <- if ("set1_prob.z" %in% names(fe)) unname(fe["set1_prob.z"]) else 0
fe_amt <- if ("set1_amt.z" %in% names(fe)) unname(fe["set1_amt.z"]) else 0
fe_drag <- if ("set1_touch_count_binary" %in% names(fe)) unname(fe["set1_touch_count_binary"]) else 0
# NOTE: clmm has thresholds instead of a global fixed intercept; we do NOT add a fixed intercept.
# We only use participant-specific random intercept shifts (b_intercept) on the latent scale.
df_pred <- data %>%
left_join(re_df, by = "ResponseId") %>%
mutate(
# latent score for ordering (no fixed intercept term)
latent_score =
b_intercept +
(fe_prob+b_prob) * set2_prob.z +
(fe_amt+b_amt) * set2_amt.z +
(fe_drag+b_drag) * set2_touch_count_binary
) %>%
group_by(ResponseId) %>%
# 2) Handle ties via average ranks (and keep NAs out of ranking)
mutate(
# flag ties within participant (ignore NAs)
has_tie = {
x <- latent_score[is.finite(latent_score)]
dplyr::n_distinct(x) < length(x)
},
# average ranks; higher score = better (rank 1)
set2_rank_pred_rank = rank(-latent_score, ties.method = "average")
) %>%
ungroup()
# 3) Tie summary message
tie_summary <- df_pred %>%
distinct(ResponseId, has_tie) %>%
summarise(n_ids = dplyr::n(), n_ties = sum(has_tie), .groups = "drop")
cat(sprintf(
"Tie handling: %d of %d participants had tied predicted scores; average ranks were used.\n",
tie_summary$n_ties, tie_summary$n_ids
))
# 4) Per-participant correlations (complete cases only)
df_pred %>%
group_by(ResponseId) %>%
summarise(
r_pearson = cor(set2_rank_pred_rank, set2_rank, method = "pearson", use = "complete.obs"),
tau_kendall = cor(set2_rank_pred_rank, set2_rank, method = "kendall", use = "complete.obs"),
.groups = "drop"
) %>%
mutate(model = label)
}
# Run for all three models
cor_M1 <- predict_set2_by_id_clmm.Count(set1.Count_M1, master_df, "RE: Prob & Amt")
## Tie handling: 0 of 182 participants had tied predicted scores; average ranks were used.
cor_M2 <- predict_set2_by_id_clmm.Count(set1.Count_M2, master_df, "Adding Count FE")
## Tie handling: 0 of 182 participants had tied predicted scores; average ranks were used.
cor_M3 <- predict_set2_by_id_clmm.Count(set1.Count_M3, master_df, "Adding Count RE")
## Tie handling: 0 of 182 participants had tied predicted scores; average ranks were used.
cor_all <- bind_rows(cor_M1, cor_M2, cor_M3) %>%
mutate(model = factor(model, levels = c("RE: Prob & Amt","Adding Count FE","Adding Count RE")))
p_r.Count_clmm <- make_violin(cor_all, "r_pearson", "Per-participant Pearson r")
p_tau.Count_clmm <- make_violin(cor_all, "tau_kendall", "Per-participant Kendall \u03C4")
p_r.Count_clmm + p_tau.Count_clmm + plot_layout(ncol = 2)
table_r.Count_CLMM <- compare_models_simple.Count(cor_all, "r_pearson")
## Adding FE (M2 − M1): pairs used = 182
## Adding RE (M3 − M1): pairs used = 182
table_tau.Count_CLMM <- compare_models_simple.Count(cor_all, "tau_kendall")
## Adding FE (M2 − M1): pairs used = 182
## Adding RE (M3 − M1): pairs used = 182
kable(table_r.Count_CLMM,
caption = "<b><span style='font-size: 18px;color:black;'>Model comparison using Pearson r (Count)</span></b>",
digits = 4, align = "lrrrrrrrrrr", escape = FALSE) %>%
kable_styling(full_width = FALSE)
Comparison | N_pairs | Mean_Diff | Median_Diff | Prop_Better | Prop_Worse | Prop_Tie | t_test_p | t_statistic | t_df | wilcoxon_p | wilcoxon_V | permutation_p |
---|---|---|---|---|---|---|---|---|---|---|---|---|
Adding FE (M2 − M1) | 182 | 0.011 | 0 | 0.132 | 0.088 | 0.780 | 0.0725 | 1.807 | 181 | 0.1318 | 522.5 | 0.0784 |
Adding RE (M3 − M1) | 182 | 0.021 | 0 | 0.154 | 0.099 | 0.747 | 0.0097 | 2.614 | 181 | 0.0453 | 724.0 | 0.0060 |
kable(table_tau.Count_CLMM,
caption = "<b><span style='font-size: 18px;color:black;'>Model comparison using Kendall τ (Count)</span></b>",
digits = 4, align = "lrrrrrrrrrr", escape = FALSE) %>%
kable_styling(full_width = FALSE)
Comparison | N_pairs | Mean_Diff | Median_Diff | Prop_Better | Prop_Worse | Prop_Tie | t_test_p | t_statistic | t_df | wilcoxon_p | wilcoxon_V | permutation_p |
---|---|---|---|---|---|---|---|---|---|---|---|---|
Adding FE (M2 − M1) | 182 | 0.004 | 0 | 0.126 | 0.088 | 0.786 | 0.5487 | 0.601 | 181 | 0.3938 | 451 | 0.4744 |
Adding RE (M3 − M1) | 182 | 0.010 | 0 | 0.148 | 0.099 | 0.753 | 0.1688 | 1.381 | 181 | 0.0572 | 685 | 0.1395 |
p_r.Distance_clmm <- p_r.Distance_clmm + labs(title = "Distance • r")
p_tau.Distance_clmm<- p_tau.Distance_clmm+ labs(title = "Distance • Kendall’s τ")
p_r.Order_clmm <- p_r.Order_clmm + labs(title = "Order • r")
p_tau.Order_clmm <- p_tau.Order_clmm + labs(title = "Order • Kendall’s τ")
p_r.Count_clmm <- p_r.Count_clmm + labs(title = "Count • r")
p_tau.Count_clmm <- p_tau.Count_clmm + labs(title = "Count • Kendall’s τ")
summary_plot.CLMM <-
(p_r.Distance_clmm | p_tau.Distance_clmm) /
(p_r.Order_clmm | p_tau.Order_clmm) /
(p_r.Count_clmm | p_tau.Count_clmm) +
plot_layout(guides = "collect") +
plot_annotation(
title = "Summary for CLMM",
theme = theme(
legend.position = "bottom",
plot.title = element_text(face = "bold", size = 14),
plot.subtitle = element_text(size = 11),
plot.caption = element_text(size = 10, face = "bold")
)
)
p_r.Distance_lmer <- p_r.Distance_lmer + labs(title = "Distance • r")
p_tau.Distance_lmer<- p_tau.Distance_lmer+ labs(title = "Distance • Kendall’s τ")
p_r.Order_lmer <- p_r.Order_lmer + labs(title = "Order • r")
p_tau.Order_lmer <- p_tau.Order_lmer + labs(title = "Order • Kendall’s τ")
p_r.Count_lmer <- p_r.Count_lmer + labs(title = "Count • r")
p_tau.Count_lmer <- p_tau.Count_lmer + labs(title = "Count • Kendall’s τ")
summary_plot.LMER <-
(p_r.Distance_lmer | p_tau.Distance_lmer) /
(p_r.Order_lmer | p_tau.Order_lmer) /
(p_r.Count_lmer | p_tau.Count_lmer) +
plot_layout(guides = "collect") +
plot_annotation(
title = "Summary for LMER",
theme = theme(
legend.position = "bottom",
plot.title = element_text(face = "bold", size = 14),
plot.subtitle = element_text(size = 11),
plot.caption = element_text(size = 10, face = "bold")
)
)
summary_plot.LMER
summary_plot.CLMM
# ggsave("summary_plot.LMER.svg", plot = summary_plot.LMER, width = 15, height = 10, units = "in")
# ggsave("summary_plot.CLMM.svg", plot = summary_plot.CLMM, width = 15, height = 10, units = "in")
library(htmltools)
make_kable <- function(tbl, metric_label, stat_label) {
kable(
tbl,
caption = sprintf(
"<b><span style='font-size:18px;color:black;'>Model comparison using %s (%s)</span></b>",
stat_label, metric_label
),
digits = 4,
align = "lrrrrrrrrrr",
escape = FALSE
) |>
kable_styling(full_width = FALSE)
}
# Create all six tables
k_r_distance <- make_kable(table_r.distance_CLMM, "Distance", "Pearson r")
k_tau_distance <- make_kable(table_tau.distance_CLMM, "Distance", "Kendall τ")
k_r_order <- make_kable(table_r.order_CLMM, "Order", "Pearson r")
k_tau_order <- make_kable(table_tau.order_CLMM, "Order", "Kendall τ")
k_r_count <- make_kable(table_r.Count_CLMM, "Count", "Pearson r")
k_tau_count <- make_kable(table_tau.Count_CLMM, "Count", "Kendall τ")
to_html <- function(k) HTML(as.character(k))
Sig.Test.CLMM <- tagList(
tags$style(".grid2{display:grid;grid-template-columns:1fr 1fr;gap:22px;}
.grid2 h2{font-weight:bold;font-size:24px;color:black;margin:0 0 12px 0;}"),
tags$h2("CLMM Significance Tests"),
tags$div(
class = "grid2",
to_html(k_r_distance), to_html(k_tau_distance),
to_html(k_r_order), to_html(k_tau_order),
to_html(k_r_count), to_html(k_tau_count)
)
)
Sig.Test.CLMM
Comparison | N_pairs | Mean_Diff | Median_Diff | Prop_Better | Prop_Worse | Prop_Tie | t_test_p | t_statistic | t_df | wilcoxon_p | wilcoxon_V | permutation_p |
---|---|---|---|---|---|---|---|---|---|---|---|---|
Adding FE (M2 − M1) | 182 | 0.052 | 0 | 0.253 | 0.154 | 0.593 | 1e-04 | 3.939 | 181 | 4e-04 | 2049.0 | 0 |
Adding RE (M3 − M1) | 182 | 0.068 | 0 | 0.242 | 0.181 | 0.577 | 1e-04 | 4.023 | 181 | 5e-04 | 2184.5 | 0 |
Comparison | N_pairs | Mean_Diff | Median_Diff | Prop_Better | Prop_Worse | Prop_Tie | t_test_p | t_statistic | t_df | wilcoxon_p | wilcoxon_V | permutation_p |
---|---|---|---|---|---|---|---|---|---|---|---|---|
Adding FE (M2 − M1) | 182 | 0.032 | 0 | 0.242 | 0.143 | 0.615 | 0.0065 | 2.752 | 181 | 0.0044 | 1723 | 0.0061 |
Adding RE (M3 − M1) | 182 | 0.044 | 0 | 0.236 | 0.165 | 0.599 | 0.0033 | 2.976 | 181 | 0.0025 | 1897 | 0.0033 |
Comparison | N_pairs | Mean_Diff | Median_Diff | Prop_Better | Prop_Worse | Prop_Tie | t_test_p | t_statistic | t_df | wilcoxon_p | wilcoxon_V | permutation_p |
---|---|---|---|---|---|---|---|---|---|---|---|---|
Adding FE (M2 − M1) | 182 | 0.023 | 0 | 0.225 | 0.231 | 0.544 | 0.0954 | 1.676 | 181 | 0.0874 | 2116.5 | 0.0901 |
Adding RE (M3 − M1) | 182 | 0.063 | 0 | 0.236 | 0.148 | 0.615 | 0.0078 | 2.692 | 181 | 0.0030 | 1749.5 | 0.0067 |
Comparison | N_pairs | Mean_Diff | Median_Diff | Prop_Better | Prop_Worse | Prop_Tie | t_test_p | t_statistic | t_df | wilcoxon_p | wilcoxon_V | permutation_p |
---|---|---|---|---|---|---|---|---|---|---|---|---|
Adding FE (M2 − M1) | 182 | 0.007 | 0 | 0.22 | 0.231 | 0.549 | 0.5799 | 0.554 | 181 | 0.2341 | 1954 | 0.5423 |
Adding RE (M3 − M1) | 182 | 0.051 | 0 | 0.22 | 0.154 | 0.626 | 0.0149 | 2.457 | 181 | 0.0091 | 1599 | 0.0144 |
Comparison | N_pairs | Mean_Diff | Median_Diff | Prop_Better | Prop_Worse | Prop_Tie | t_test_p | t_statistic | t_df | wilcoxon_p | wilcoxon_V | permutation_p |
---|---|---|---|---|---|---|---|---|---|---|---|---|
Adding FE (M2 − M1) | 182 | 0.011 | 0 | 0.132 | 0.088 | 0.780 | 0.0725 | 1.807 | 181 | 0.1318 | 522.5 | 0.0784 |
Adding RE (M3 − M1) | 182 | 0.021 | 0 | 0.154 | 0.099 | 0.747 | 0.0097 | 2.614 | 181 | 0.0453 | 724.0 | 0.0060 |
Comparison | N_pairs | Mean_Diff | Median_Diff | Prop_Better | Prop_Worse | Prop_Tie | t_test_p | t_statistic | t_df | wilcoxon_p | wilcoxon_V | permutation_p |
---|---|---|---|---|---|---|---|---|---|---|---|---|
Adding FE (M2 − M1) | 182 | 0.004 | 0 | 0.126 | 0.088 | 0.786 | 0.5487 | 0.601 | 181 | 0.3938 | 451 | 0.4744 |
Adding RE (M3 − M1) | 182 | 0.010 | 0 | 0.148 | 0.099 | 0.753 | 0.1688 | 1.381 | 181 | 0.0572 | 685 | 0.1395 |
k_r_distance <- make_kable(table_r.distance_lmer, "Distance", "Pearson r")
k_tau_distance <- make_kable(table_tau.distance_lmer, "Distance", "Kendall τ")
k_r_order <- make_kable(table_r.order_lmer, "Order", "Pearson r")
k_tau_order <- make_kable(table_tau.order_lmer, "Order", "Kendall τ")
k_r_count <- make_kable(table_r.Count_lmer, "Count", "Pearson r")
k_tau_count <- make_kable(table_tau.Count_lmer, "Count", "Kendall τ")
Sig.Test.LMER <- tagList(
tags$style(".grid2{display:grid;grid-template-columns:1fr 1fr;gap:22px;}
.grid2 h2{font-weight:bold;font-size:24px;color:black;margin:0 0 12px 0;}"),
tags$h2("LMER Significance Tests"),
tags$div(
class = "grid2",
to_html(k_r_distance), to_html(k_tau_distance),
to_html(k_r_order), to_html(k_tau_order),
to_html(k_r_count), to_html(k_tau_count)
)
)
Sig.Test.LMER
Comparison | N_pairs | Mean_Diff | Median_Diff | Prop_Better | Prop_Worse | Prop_Tie | t_test_p | t_statistic | t_df | wilcoxon_p | wilcoxon_V | permutation_p |
---|---|---|---|---|---|---|---|---|---|---|---|---|
Adding FE (M2 − M1) | 182 | 0.062 | 0 | 0.286 | 0.192 | 0.522 | 0 | 4.378 | 181 | 2e-04 | 2800.5 | 0 |
Adding RE (M3 − M1) | 182 | 0.076 | 0 | 0.253 | 0.170 | 0.577 | 0 | 4.248 | 181 | 9e-04 | 2155.0 | 0 |
Comparison | N_pairs | Mean_Diff | Median_Diff | Prop_Better | Prop_Worse | Prop_Tie | t_test_p | t_statistic | t_df | wilcoxon_p | wilcoxon_V | permutation_p |
---|---|---|---|---|---|---|---|---|---|---|---|---|
Adding FE (M2 − M1) | 182 | 0.040 | 0 | 0.275 | 0.187 | 0.538 | 0.0013 | 3.276 | 181 | 0.0018 | 2472.5 | 5e-04 |
Adding RE (M3 − M1) | 182 | 0.059 | 0 | 0.242 | 0.154 | 0.604 | 0.0004 | 3.616 | 181 | 0.0008 | 1909.5 | 3e-04 |
Comparison | N_pairs | Mean_Diff | Median_Diff | Prop_Better | Prop_Worse | Prop_Tie | t_test_p | t_statistic | t_df | wilcoxon_p | wilcoxon_V | permutation_p |
---|---|---|---|---|---|---|---|---|---|---|---|---|
Adding FE (M2 − M1) | 182 | 0.028 | 0 | 0.253 | 0.280 | 0.467 | 0.0776 | 1.775 | 181 | 0.1437 | 2780.5 | 0.0784 |
Adding RE (M3 − M1) | 182 | 0.064 | 0 | 0.236 | 0.192 | 0.571 | 0.0136 | 2.492 | 181 | 0.0191 | 2010.5 | 0.0121 |
Comparison | N_pairs | Mean_Diff | Median_Diff | Prop_Better | Prop_Worse | Prop_Tie | t_test_p | t_statistic | t_df | wilcoxon_p | wilcoxon_V | permutation_p |
---|---|---|---|---|---|---|---|---|---|---|---|---|
Adding FE (M2 − M1) | 182 | 0.015 | 0 | 0.247 | 0.264 | 0.489 | 0.3384 | 0.960 | 181 | 0.3439 | 2429.0 | 0.3139 |
Adding RE (M3 − M1) | 182 | 0.053 | 0 | 0.231 | 0.192 | 0.577 | 0.0224 | 2.302 | 181 | 0.0252 | 1940.5 | 0.0210 |
Comparison | N_pairs | Mean_Diff | Median_Diff | Prop_Better | Prop_Worse | Prop_Tie | t_test_p | t_statistic | t_df | wilcoxon_p | wilcoxon_V | permutation_p |
---|---|---|---|---|---|---|---|---|---|---|---|---|
Adding FE (M2 − M1) | 182 | 0.011 | 0 | 0.126 | 0.104 | 0.769 | 0.1520 | 1.438 | 181 | 0.3571 | 525.5 | 0.1646 |
Adding RE (M3 − M1) | 182 | 0.020 | 0 | 0.148 | 0.099 | 0.753 | 0.0521 | 1.955 | 181 | 0.1072 | 660.5 | 0.0547 |
Comparison | N_pairs | Mean_Diff | Median_Diff | Prop_Better | Prop_Worse | Prop_Tie | t_test_p | t_statistic | t_df | wilcoxon_p | wilcoxon_V | permutation_p |
---|---|---|---|---|---|---|---|---|---|---|---|---|
Adding FE (M2 − M1) | 182 | 0.004 | 0 | 0.115 | 0.104 | 0.780 | 0.5331 | 0.624 | 181 | 0.2353 | 498 | 0.4723 |
Adding RE (M3 − M1) | 182 | 0.011 | 0 | 0.137 | 0.099 | 0.764 | 0.2293 | 1.206 | 181 | 0.1024 | 608 | 0.2006 |