createRawFiles <- TRUE
data_root <- "data"
output_root <- "outputs"
raw_output_dir <- file.path(output_root, "raw")
questionnaire_output_dir <- file.path(output_root, "questionnaire")
paradata_output_dir <- file.path(output_root, "paradata")Data preperation
1 Notes
This script creates a simplified participant-level dataset using the raw study.rds file and produces:
- A clean wide table with core variables + associations (wide) + keyStats
- Paradata events in long format (clipboard, mouse, scroll, keyIntervals)
- Defocus events in long format
2 global variables
3 functions
last_non_empty <- function(x) {
if (length(x) == 0) return(NA)
for (i in seq_along(x)[length(x):1]) {
val <- x[[i]]
if (is.null(val)) next
if (is.list(val) && length(val) == 0) next
if (!is.list(val) && length(val) == 1 && is.na(val)) next
if (is.character(val) && length(val) == 1 && isTRUE(val == "")) next
return(val)
}
return(NA)
}
`%||%` <- function(x, y) {
if (is.null(x)) y else x
}
parse_duration_seconds <- function(x) {
if (is.na(x) || x == "") return(NA_real_)
clean <- stringr::str_remove(x, "\\s*\\(.*\\)")
if (is.na(clean) || clean == "") return(NA_real_)
as.numeric(hms::as_hms(clean))
}
null_to_empty <- function(x) {
if (is.null(x)) list() else x
}
normalize_cell <- function(val) {
if (is.null(val)) return(NA)
if (is.data.frame(val)) return(list(val))
if (is.list(val)) {
if (length(val) == 0) return(NA)
return(list(val))
}
if (length(val) != 1) return(list(val))
val
}
collapse_list_to_string <- function(x, sep = " // ") {
if (is.null(x)) return(NA_character_)
if (is.list(x)) {
if (length(x) == 0) return(NA_character_)
flat <- unlist(x, recursive = TRUE, use.names = FALSE)
if (length(flat) == 0) return(NA_character_)
return(paste0(flat, collapse = sep))
}
if (length(x) > 1) return(paste0(x, collapse = sep))
as.character(x)
}
sanitize_paradata_name <- function(name) {
cleaned <- gsub("[^A-Za-z0-9]+", "_", name)
cleaned <- gsub("^_+|_+$", "", cleaned)
paste0("para_", cleaned)
}
init_paradata_result <- function() {
list(
wide_vals = list(),
wide_cols = character(),
wide_cols_output = character(),
long_rows = list(),
long_cols = character()
)
}
append_paradata_result <- function(target, source) {
target$wide_vals <- c(target$wide_vals, source$wide_vals)
target$wide_cols <- c(target$wide_cols, source$wide_cols)
target$wide_cols_output <- c(target$wide_cols_output, source$wide_cols_output)
target$long_rows <- c(target$long_rows, source$long_rows)
target$long_cols <- c(target$long_cols, source$long_cols)
target
}
add_paradata_wide <- function(res, field_label, value) {
out_col <- sanitize_paradata_name(field_label)
res$wide_vals[[out_col]] <- value
res$wide_cols <- c(res$wide_cols, field_label)
res$wide_cols_output <- c(res$wide_cols_output, out_col)
res
}
add_paradata_long_row <- function(res, pid, result_id, field_label, value, row_index, timestamp) {
res$long_rows[[length(res$long_rows) + 1]] <- tibble::tibble(
PROLIFIC_PID = pid,
result_id = result_id,
origin = "paradata_general",
event_type = "paradata_column",
event_index = row_index,
timestamp = timestamp %||% NA_character_,
component = NA_character_,
field = field_label,
value = value,
mouse_x = NA_real_,
mouse_y = NA_real_,
scroll_x = NA_real_,
scroll_y = NA_real_,
key_interval_ms = NA_real_,
clipboard_type = NA_character_,
clipboard_text = NA_character_,
clipboard_length = NA_real_,
target_tag = NA_character_,
target_type = NA_character_,
target_id = NA_character_,
target_name = NA_character_,
target_className = NA_character_
)
res$long_cols <- c(res$long_cols, field_label)
res
}
get_event_value <- function(ev, name) {
if (is.null(ev)) return(NA)
if (is.data.frame(ev)) return(ev[[name]] %||% NA)
if (is.list(ev)) return(ev[[name]] %||% NA)
NA
}
get_target_value <- function(ev, field) {
if (is.null(ev)) return(NA)
if (is.data.frame(ev)) {
col <- paste0("target.", field)
return(ev[[col]] %||% NA)
}
if (is.list(ev) && !is.null(ev$target)) return(ev$target[[field]] %||% NA)
NA
}
is_non_empty_element <- function(x) {
if (is.null(x)) return(FALSE)
if (is.data.frame(x)) return(nrow(x) > 0)
if (is.list(x)) {
if (length(x) == 0) return(FALSE)
return(!all(purrr::map_lgl(x, ~ is.null(.x) || (is.list(.x) && length(.x) == 0))))
}
length(x) > 0
}
find_last_non_empty_element <- function(col_vals) {
if (is.null(col_vals)) return(list(value = NULL, index = NA_integer_))
if (!is.list(col_vals)) {
if (length(col_vals) == 0) return(list(value = NULL, index = NA_integer_))
return(list(value = col_vals, index = length(col_vals)))
}
for (i in seq_along(col_vals)[length(col_vals):1]) {
v <- col_vals[[i]]
if (is_non_empty_element(v)) return(list(value = v, index = i))
}
list(value = NULL, index = NA_integer_)
}
process_group_df <- function(df, prefix_label, timestamp_vec, pid, result_id) {
res <- init_paradata_result()
if (is.null(df) || !is.data.frame(df)) return(res)
for (col in names(df)) {
col_vals <- df[[col]]
field_label <- paste0(prefix_label, ".", col)
if (is.data.frame(col_vals)) {
nested_res <- process_group_df(col_vals, field_label, timestamp_vec, pid, result_id)
res <- append_paradata_result(res, nested_res)
next
}
if (is.list(col_vals)) {
vals_json <- vapply(col_vals, function(v) {
if (is.null(v) || (is.list(v) && length(v) == 0)) return(NA_character_)
jsonlite::toJSON(v, auto_unbox = TRUE)
}, character(1))
non_na <- vals_json[!is.na(vals_json)]
if (length(non_na) == 0) next
uniq <- unique(non_na)
if (length(uniq) == 1) {
res <- add_paradata_wide(res, field_label, uniq[[1]])
} else {
for (r in seq_along(vals_json)) {
v <- vals_json[[r]]
if (is.na(v)) next
ts <- if (!is.null(timestamp_vec) && length(timestamp_vec) >= r) timestamp_vec[[r]] else NA_character_
res <- add_paradata_long_row(res, pid, result_id, field_label, v, r, ts)
}
}
next
}
non_na <- col_vals[!is.na(col_vals)]
if (length(non_na) == 0) next
uniq <- unique(non_na)
if (length(uniq) == 1) {
res <- add_paradata_wide(res, field_label, uniq[[1]])
} else {
for (r in seq_along(col_vals)) {
v <- col_vals[[r]]
if (is.na(v)) next
ts <- if (!is.null(timestamp_vec) && length(timestamp_vec) >= r) timestamp_vec[[r]] else NA_character_
res <- add_paradata_long_row(res, pid, result_id, field_label, as.character(v), r, ts)
}
}
}
res
}
flatten_with_prefix <- function(x, prefix) {
if (is.null(x)) return(list())
flat <- unlist(x, recursive = TRUE, use.names = TRUE)
if (length(flat) == 0) return(list())
names(flat) <- paste0(prefix, names(flat))
as.list(flat)
}
assoc_to_wide <- function(assoc_list) {
if (is.null(assoc_list) || length(assoc_list) == 0) {
return(tibble::tibble(
assoc1_response = NA_character_, assoc1_valence = NA_character_,
assoc2_response = NA_character_, assoc2_valence = NA_character_,
assoc3_response = NA_character_, assoc3_valence = NA_character_,
assoc4_response = NA_character_, assoc4_valence = NA_character_,
assoc5_response = NA_character_, assoc5_valence = NA_character_
))
}
assoc_df <- dplyr::as_tibble(assoc_list)
assoc_df <- assoc_df[seq_len(min(5, nrow(assoc_df))), ]
get_val <- function(df, col, idx) {
if (!col %in% names(df)) return(NA_character_)
if (nrow(df) < idx) return(NA_character_)
df[[col]][idx]
}
tibble::tibble(
assoc1_response = get_val(assoc_df, "response", 1),
assoc1_valence = get_val(assoc_df, "valence", 1),
assoc2_response = get_val(assoc_df, "response", 2),
assoc2_valence = get_val(assoc_df, "valence", 2),
assoc3_response = get_val(assoc_df, "response", 3),
assoc3_valence = get_val(assoc_df, "valence", 3),
assoc4_response = get_val(assoc_df, "response", 4),
assoc4_valence = get_val(assoc_df, "valence", 4),
assoc5_response = get_val(assoc_df, "response", 5),
assoc5_valence = get_val(assoc_df, "valence", 5)
)
}
build_paradata_events <- function(pid, result_id, paradata) {
if (is.null(paradata) || is.null(paradata$behavior)) return(tibble::tibble())
events <- list()
behavior <- paradata$behavior
clipboard_events <- null_to_empty(behavior$clipboardEvents)
if (length(clipboard_events)) {
events[[length(events) + 1]] <- purrr::map_dfr(seq_along(clipboard_events), function(i) {
ev <- clipboard_events[[i]]
target <- ev$target
tibble::tibble(
PROLIFIC_PID = pid,
result_id = result_id,
origin = "paradata_general",
event_type = "clipboard",
event_index = i,
timestamp = ev$timestamp %||% NA_character_,
component = ev$component %||% NA_character_,
field = NA_character_,
value = NA_character_,
mouse_x = NA_real_,
mouse_y = NA_real_,
scroll_x = NA_real_,
scroll_y = NA_real_,
key_interval_ms = NA_real_,
clipboard_type = ev$type %||% NA_character_,
clipboard_text = ev$text %||% NA_character_,
clipboard_length = ev$length %||% NA_real_,
target_tag = target$tag %||% NA_character_,
target_type = target$type %||% NA_character_,
target_id = target$id %||% NA_character_,
target_name = target$name %||% NA_character_,
target_className = target$className %||% NA_character_
)
})
}
mouse_events <- null_to_empty(behavior$mouseEvents)
if (length(mouse_events)) {
events[[length(events) + 1]] <- purrr::map_dfr(seq_along(mouse_events), function(i) {
ev <- mouse_events[[i]]
tibble::tibble(
PROLIFIC_PID = pid,
result_id = result_id,
origin = "paradata_general",
event_type = "mouse",
event_index = i,
timestamp = ev$timestamp %||% NA_character_,
component = ev$component %||% NA_character_,
field = NA_character_,
value = NA_character_,
mouse_x = ev$x %||% NA_real_,
mouse_y = ev$y %||% NA_real_,
scroll_x = NA_real_,
scroll_y = NA_real_,
key_interval_ms = NA_real_,
clipboard_type = NA_character_,
clipboard_text = NA_character_,
clipboard_length = NA_real_,
target_tag = NA_character_,
target_type = NA_character_,
target_id = NA_character_,
target_name = NA_character_,
target_className = NA_character_
)
})
}
scroll_events <- null_to_empty(behavior$scrollEvents)
if (length(scroll_events)) {
events[[length(events) + 1]] <- purrr::map_dfr(seq_along(scroll_events), function(i) {
ev <- scroll_events[[i]]
tibble::tibble(
PROLIFIC_PID = pid,
result_id = result_id,
origin = "paradata_general",
event_type = "scroll",
event_index = i,
timestamp = ev$timestamp %||% NA_character_,
component = ev$component %||% NA_character_,
field = NA_character_,
value = NA_character_,
mouse_x = NA_real_,
mouse_y = NA_real_,
scroll_x = ev$scrollX %||% NA_real_,
scroll_y = ev$scrollY %||% NA_real_,
key_interval_ms = NA_real_,
clipboard_type = NA_character_,
clipboard_text = NA_character_,
clipboard_length = NA_real_,
target_tag = NA_character_,
target_type = NA_character_,
target_id = NA_character_,
target_name = NA_character_,
target_className = NA_character_
)
})
}
if (length(events) == 0) return(tibble::tibble())
dplyr::bind_rows(events)
}
process_paradata_general <- function(pid, result_id, paradata) {
empty <- list(
wide = tibble::tibble(),
long = tibble::tibble(),
wide_cols = character(),
wide_cols_output = character(),
long_cols = character(),
key_stats = tibble::tibble(
keyStats_count = NA_real_,
keyStats_mean = NA_real_,
keyStats_median = NA_real_,
keyStats_min = NA_real_,
keyStats_max = NA_real_
)
)
if (is.null(paradata)) return(empty)
if (is.data.frame(paradata) && all(c("static", "ip", "behavior", "meta") %in% names(paradata))) {
static_df <- paradata$static
ip_df <- paradata$ip
behavior_df <- paradata$behavior
meta_df <- paradata$meta
ip_error <- paradata$ipError
timestamps <- NULL
if (is.data.frame(static_df) && "timestamp" %in% names(static_df)) {
timestamps <- static_df$timestamp
} else if (is.data.frame(meta_df) && "updatedAt" %in% names(meta_df)) {
timestamps <- meta_df$updatedAt
}
res <- init_paradata_result()
res <- append_paradata_result(res, process_group_df(static_df, "static", timestamps, pid, result_id))
res <- append_paradata_result(res, process_group_df(ip_df, "ip", timestamps, pid, result_id))
res <- append_paradata_result(res, process_group_df(meta_df, "meta", timestamps, pid, result_id))
if (!is.null(ip_error)) {
non_na <- ip_error[!is.na(ip_error)]
if (length(non_na) == 1) {
res <- add_paradata_wide(res, "ipError", non_na[[1]])
} else if (length(non_na) > 1) {
for (r in seq_along(ip_error)) {
v <- ip_error[[r]]
if (is.na(v)) next
ts <- if (!is.null(timestamps) && length(timestamps) >= r) timestamps[[r]] else NA_character_
res <- add_paradata_long_row(res, pid, result_id, "ipError", as.character(v), r, ts)
}
}
}
key_stats <- empty$key_stats
if (is.data.frame(behavior_df) && "keyStats" %in% names(behavior_df)) {
key_df <- behavior_df$keyStats
if (is.data.frame(key_df)) {
key_stats <- tibble::tibble(
keyStats_count = if ("count" %in% names(key_df)) last_non_empty(key_df$count) else NA_real_,
keyStats_mean = if ("mean" %in% names(key_df)) last_non_empty(key_df$mean) else NA_real_,
keyStats_median = if ("median" %in% names(key_df)) last_non_empty(key_df$median) else NA_real_,
keyStats_min = if ("min" %in% names(key_df)) last_non_empty(key_df$min) else NA_real_,
keyStats_max = if ("max" %in% names(key_df)) last_non_empty(key_df$max) else NA_real_
)
}
}
long_rows <- list()
if (is.data.frame(behavior_df)) {
if ("clipboardEvents" %in% names(behavior_df)) {
last_clip <- find_last_non_empty_element(behavior_df$clipboardEvents)
ev_list <- last_clip$value
if (!is.null(ev_list)) {
if (is.data.frame(ev_list)) {
for (j in seq_len(nrow(ev_list))) {
ev <- ev_list[j, ]
long_rows[[length(long_rows) + 1]] <- tibble::tibble(
PROLIFIC_PID = pid,
result_id = result_id,
origin = "paradata_general",
event_type = "clipboard",
event_index = j,
timestamp = get_event_value(ev, "timestamp") %||% (if (!is.null(timestamps) && !is.na(last_clip$index) && length(timestamps) >= last_clip$index) timestamps[[last_clip$index]] else NA_character_),
component = get_event_value(ev, "component"),
field = NA_character_,
value = NA_character_,
mouse_x = NA_real_,
mouse_y = NA_real_,
scroll_x = NA_real_,
scroll_y = NA_real_,
key_interval_ms = NA_real_,
clipboard_type = get_event_value(ev, "type"),
clipboard_text = get_event_value(ev, "text"),
clipboard_length = get_event_value(ev, "length"),
target_tag = get_target_value(ev, "tag"),
target_type = get_target_value(ev, "type"),
target_id = get_target_value(ev, "id"),
target_name = get_target_value(ev, "name"),
target_className = get_target_value(ev, "className")
)
}
res$long_cols <- c(res$long_cols, "behavior.clipboardEvents")
} else if (is.list(ev_list)) {
ev_list <- null_to_empty(ev_list)
for (j in seq_along(ev_list)) {
ev <- ev_list[[j]]
long_rows[[length(long_rows) + 1]] <- tibble::tibble(
PROLIFIC_PID = pid,
result_id = result_id,
origin = "paradata_general",
event_type = "clipboard",
event_index = j,
timestamp = get_event_value(ev, "timestamp") %||% (if (!is.null(timestamps) && !is.na(last_clip$index) && length(timestamps) >= last_clip$index) timestamps[[last_clip$index]] else NA_character_),
component = get_event_value(ev, "component"),
field = NA_character_,
value = NA_character_,
mouse_x = NA_real_,
mouse_y = NA_real_,
scroll_x = NA_real_,
scroll_y = NA_real_,
key_interval_ms = NA_real_,
clipboard_type = get_event_value(ev, "type"),
clipboard_text = get_event_value(ev, "text"),
clipboard_length = get_event_value(ev, "length"),
target_tag = get_target_value(ev, "tag"),
target_type = get_target_value(ev, "type"),
target_id = get_target_value(ev, "id"),
target_name = get_target_value(ev, "name"),
target_className = get_target_value(ev, "className")
)
}
res$long_cols <- c(res$long_cols, "behavior.clipboardEvents")
}
}
}
if ("mouseEvents" %in% names(behavior_df)) {
last_mouse <- find_last_non_empty_element(behavior_df$mouseEvents)
ev_df <- last_mouse$value
if (!is.null(ev_df)) {
if (is.data.frame(ev_df)) {
for (j in seq_len(nrow(ev_df))) {
ev <- ev_df[j, ]
long_rows[[length(long_rows) + 1]] <- tibble::tibble(
PROLIFIC_PID = pid,
result_id = result_id,
origin = "paradata_general",
event_type = "mouse",
event_index = j,
timestamp = ev$timestamp %||% (if (!is.null(timestamps) && !is.na(last_mouse$index) && length(timestamps) >= last_mouse$index) timestamps[[last_mouse$index]] else NA_character_),
component = ev$component %||% NA_character_,
field = NA_character_,
value = NA_character_,
mouse_x = ev$x %||% NA_real_,
mouse_y = ev$y %||% NA_real_,
scroll_x = NA_real_,
scroll_y = NA_real_,
key_interval_ms = NA_real_,
clipboard_type = NA_character_,
clipboard_text = NA_character_,
clipboard_length = NA_real_,
target_tag = NA_character_,
target_type = NA_character_,
target_id = NA_character_,
target_name = NA_character_,
target_className = NA_character_
)
}
res$long_cols <- c(res$long_cols, "behavior.mouseEvents")
} else if (is.list(ev_df)) {
ev_df <- null_to_empty(ev_df)
for (j in seq_along(ev_df)) {
ev <- ev_df[[j]]
long_rows[[length(long_rows) + 1]] <- tibble::tibble(
PROLIFIC_PID = pid,
result_id = result_id,
origin = "paradata_general",
event_type = "mouse",
event_index = j,
timestamp = get_event_value(ev, "timestamp") %||% (if (!is.null(timestamps) && !is.na(last_mouse$index) && length(timestamps) >= last_mouse$index) timestamps[[last_mouse$index]] else NA_character_),
component = get_event_value(ev, "component"),
field = NA_character_,
value = NA_character_,
mouse_x = get_event_value(ev, "x"),
mouse_y = get_event_value(ev, "y"),
scroll_x = NA_real_,
scroll_y = NA_real_,
key_interval_ms = NA_real_,
clipboard_type = NA_character_,
clipboard_text = NA_character_,
clipboard_length = NA_real_,
target_tag = NA_character_,
target_type = NA_character_,
target_id = NA_character_,
target_name = NA_character_,
target_className = NA_character_
)
}
res$long_cols <- c(res$long_cols, "behavior.mouseEvents")
}
}
}
if ("scrollEvents" %in% names(behavior_df)) {
last_scroll <- find_last_non_empty_element(behavior_df$scrollEvents)
ev_df <- last_scroll$value
if (!is.null(ev_df)) {
if (is.data.frame(ev_df)) {
for (j in seq_len(nrow(ev_df))) {
ev <- ev_df[j, ]
long_rows[[length(long_rows) + 1]] <- tibble::tibble(
PROLIFIC_PID = pid,
result_id = result_id,
origin = "paradata_general",
event_type = "scroll",
event_index = j,
timestamp = ev$timestamp %||% (if (!is.null(timestamps) && !is.na(last_scroll$index) && length(timestamps) >= last_scroll$index) timestamps[[last_scroll$index]] else NA_character_),
component = ev$component %||% NA_character_,
field = NA_character_,
value = NA_character_,
mouse_x = NA_real_,
mouse_y = NA_real_,
scroll_x = ev$scrollX %||% NA_real_,
scroll_y = ev$scrollY %||% NA_real_,
key_interval_ms = NA_real_,
clipboard_type = NA_character_,
clipboard_text = NA_character_,
clipboard_length = NA_real_,
target_tag = NA_character_,
target_type = NA_character_,
target_id = NA_character_,
target_name = NA_character_,
target_className = NA_character_
)
}
res$long_cols <- c(res$long_cols, "behavior.scrollEvents")
} else if (is.list(ev_df)) {
ev_df <- null_to_empty(ev_df)
for (j in seq_along(ev_df)) {
ev <- ev_df[[j]]
long_rows[[length(long_rows) + 1]] <- tibble::tibble(
PROLIFIC_PID = pid,
result_id = result_id,
origin = "paradata_general",
event_type = "scroll",
event_index = j,
timestamp = get_event_value(ev, "timestamp") %||% (if (!is.null(timestamps) && !is.na(last_scroll$index) && length(timestamps) >= last_scroll$index) timestamps[[last_scroll$index]] else NA_character_),
component = get_event_value(ev, "component"),
field = NA_character_,
value = NA_character_,
mouse_x = NA_real_,
mouse_y = NA_real_,
scroll_x = get_event_value(ev, "scrollX"),
scroll_y = get_event_value(ev, "scrollY"),
key_interval_ms = NA_real_,
clipboard_type = NA_character_,
clipboard_text = NA_character_,
clipboard_length = NA_real_,
target_tag = NA_character_,
target_type = NA_character_,
target_id = NA_character_,
target_name = NA_character_,
target_className = NA_character_
)
}
res$long_cols <- c(res$long_cols, "behavior.scrollEvents")
}
}
}
}
if (length(long_rows)) {
res$long_rows <- c(res$long_rows, long_rows)
}
return(list(
wide = if (length(res$wide_vals)) tibble::as_tibble(res$wide_vals) else tibble::tibble(),
long = if (length(res$long_rows)) dplyr::bind_rows(res$long_rows) else tibble::tibble(),
wide_cols = res$wide_cols,
wide_cols_output = res$wide_cols_output,
long_cols = res$long_cols,
key_stats = key_stats
))
}
if (is.list(paradata)) {
wide_vals <- c(
flatten_with_prefix(paradata$static, "para_static_"),
flatten_with_prefix(paradata$ip, "para_ip_"),
flatten_with_prefix(paradata$meta, "para_meta_")
)
key_stats <- tibble::tibble(
keyStats_count = paradata$behavior$keyStats$count %||% NA_real_,
keyStats_mean = paradata$behavior$keyStats$mean %||% NA_real_,
keyStats_median = paradata$behavior$keyStats$median %||% NA_real_,
keyStats_min = paradata$behavior$keyStats$min %||% NA_real_,
keyStats_max = paradata$behavior$keyStats$max %||% NA_real_
)
return(list(
wide = if (length(wide_vals)) tibble::as_tibble(wide_vals) else tibble::tibble(),
long = build_paradata_events(pid, result_id, paradata),
wide_cols = names(wide_vals),
wide_cols_output = names(wide_vals),
long_cols = character(),
key_stats = key_stats
))
}
empty
}
build_defocus_events <- function(pid, result_id, defocus_list) {
if (is.null(defocus_list)) return(tibble::tibble())
if (is.data.frame(defocus_list)) {
if (nrow(defocus_list) == 0) return(tibble::tibble())
return(dplyr::mutate(
tibble::as_tibble(defocus_list),
PROLIFIC_PID = pid,
result_id = result_id,
origin = "paradata_focus",
event_index = dplyr::row_number()
) %>%
dplyr::select(PROLIFIC_PID, result_id, origin, event_index, durationblur, senderblur))
}
if (is.list(defocus_list)) {
if (length(defocus_list) == 0) return(tibble::tibble())
return(purrr::map_dfr(seq_along(defocus_list), function(i) {
ev <- defocus_list[[i]]
tibble::tibble(
PROLIFIC_PID = pid,
result_id = result_id,
origin = "paradata_focus",
event_index = i,
durationblur = ev$durationblur %||% NA_real_,
senderblur = ev$senderblur %||% NA_character_
)
}))
}
tibble::tibble()
}
is_missing_value <- function(x) {
if (is.null(x)) return(TRUE)
if (is.list(x)) {
return(length(x) == 0 || all(is.na(unlist(x))))
}
if (length(x) > 1) {
if (is.character(x)) return(all(is.na(x) | x == ""))
return(all(is.na(x)))
}
if (is.character(x)) return(is.na(x) | x == "")
is.na(x)
}4 load packages
require(pacman)
p_load(
"tidyverse",
"jsonlite",
"hms",
"psych",
"afex",
"writexl",
"readxl",
"haven"
)5 create raw data files
if (createRawFiles) {
dir.create(raw_output_dir, recursive = TRUE, showWarnings = FALSE)
data_files <- list.files(
data_root,
pattern = "data.txt$",
recursive = TRUE,
full.names = TRUE
)
results <- list()
for (file in data_files) {
result_folder <- basename(dirname(dirname(file)))
result_id <- stringr::str_remove(result_folder, "^study_result_")
tmp <- jsonlite::fromJSON(file)
tmp[, colnames(tmp) == ""] <- NULL
tmp <- dplyr::as_tibble(tmp)
tmp$result_id <- as.character(result_id)
results[[length(results) + 1]] <- tmp
}
saveRDS(results, file = file.path(raw_output_dir, "study.rds"))
json_lines <- purrr::map_chr(results, ~ jsonlite::toJSON(.x, auto_unbox = TRUE))
writeLines(json_lines, con = file.path(raw_output_dir, "study.jsonl"))
print("Raw files have been created in this run!")
} else {
print("Raw files have been created in previous run!")
}[1] "Raw files have been created in this run!"
6 load JATOS metadata
metadata_files <- list.files(
data_root,
pattern = "jatos_results_metadata_.*\\.csv$",
full.names = TRUE
)
metadata_file <- metadata_files[which.max(file.info(metadata_files)$mtime)]
metadata <- readr::read_csv(metadata_file, show_col_types = FALSE)
names(metadata) <- make.names(names(metadata))
metadata <- metadata %>%
mutate(
result_id = as.character(Result.ID),
duration_seconds = purrr::map_dbl(Duration, parse_duration_seconds)
)
metadata_small <- metadata %>%
select(result_id, State, Duration, duration_seconds)
table(metadata_small$State)
FINISHED PRE
68 20
7 load Prolific demographics
prolific_demographic_file <- file.path(
data_root,
"prolific_demographic_export_69d4ced16907d423ae787668.csv"
)
prolific_demographic_raw <- readr::read_csv(prolific_demographic_file, show_col_types = FALSE)
names(prolific_demographic_raw) <- make.names(names(prolific_demographic_raw))
prolific_demographic <- prolific_demographic_raw %>%
rename(PROLIFIC_PID = Participant.id) %>%
mutate(PROLIFIC_PID = as.character(PROLIFIC_PID)) %>%
distinct(PROLIFIC_PID, .keep_all = TRUE) %>%
rename_with(~ paste0("socidem_", .x), -PROLIFIC_PID)8 build participant-level data
study <- readRDS(file.path(raw_output_dir, "study.rds"))
core_vars <- c(
"dummy_informedconsent",
"hp_exclusionCriteria",
"commCheck",
"counterVisualTraps",
"visualTrapResponse",
"visualTrap_Planets_response",
"visualTrap_Planets_responseLabel",
"visualTrap_Planets_correct",
"visualTrap_Planets_rtMs",
"visualTrap_Collision_response",
"visualTrap_Collision_responseLabel",
"visualTrap_Collision_correct",
"visualTrap_Collision_rtMs",
"visualTrap_CafeWall_response",
"visualTrap_CafeWall_responseLabel",
"visualTrap_CafeWall_correct",
"visualTrap_CafeWall_rtMs",
"visualTrap_Circles_response",
"visualTrap_Circles_responseLabel",
"visualTrap_Circles_correct",
"visualTrap_Circles_rtMs",
"visualTrap_Lines_response",
"visualTrap_Lines_responseLabel",
"visualTrap_Lines_correct",
"visualTrap_Lines_rtMs",
"visualTrap_Robot_response",
"visualTrap_Robot_responseLabel",
"visualTrap_Robot_correct",
"visualTrap_Robot_rtMs",
"visualTrap_order",
"condition_FutureSociety",
"cue",
"cue_coding",
"R1",
"R2",
"R3",
"R4",
"R5",
"para_countclicks",
"AR-2",
"AR-5",
"AR-3",
"AR-1",
"AR-4",
"undUtopiaGeneral",
"undUtopiaGoal",
"undUtopiaHow",
"clearUtopia",
"clearUtopiatext",
"biasUtopia",
"biasUtopiatext",
"attributesFutureSociety-5",
"attributesFutureSociety-6",
"attributesFutureSociety-2",
"attributesFutureSociety-3",
"attributesFutureSociety-4",
"attributesFutureSociety-7",
"attributesFutureSociety-1",
"attributesFutureSociety-8",
"utopiaPrototypeAssignment",
"protoConfidenceRadio",
"protoReasonText",
"feedback_critic"
)
core_rows <- list()
paradata_events_rows <- list()
paradata_defocus_rows <- list()
paradata_wide_cols_rows <- list()
paradata_long_cols_rows <- list()
for (i in seq_along(study)) {
print(i)
df <- study[[i]]
pid <- if ("PROLIFIC_PID" %in% names(df)) last_non_empty(df$PROLIFIC_PID) else NA
result_id <- if ("result_id" %in% names(df)) last_non_empty(df$result_id) else NA
core_values <- lapply(core_vars, function(v) {
val <- if (v %in% names(df)) {
last_non_empty(df[[v]])
} else if (v == "attributesFutureSociety-8" && "attributesFutureSociety-undefined" %in% names(df)) {
last_non_empty(df[["attributesFutureSociety-undefined"]])
} else {
NA
}
normalize_cell(val)
})
names(core_values) <- core_vars
core_row <- tibble::as_tibble(core_values)
if ("visualTrap_order" %in% names(core_row)) {
core_row$visualTrap_order <- collapse_list_to_string(core_row$visualTrap_order)
}
assoc_list <- if ("sucsessfulAssociations" %in% names(df)) last_non_empty(df$sucsessfulAssociations) else NULL
assoc_wide <- assoc_to_wide(assoc_list)
paradata <- if ("paradata_general" %in% names(df)) df$paradata_general else NULL
paradata_summary <- process_paradata_general(pid, result_id, paradata)
paradata_wide <- paradata_summary$wide
key_stats_row <- paradata_summary$key_stats
core_row <- dplyr::bind_cols(
tibble::tibble(result_id = result_id, PROLIFIC_PID = pid),
core_row,
assoc_wide,
paradata_wide,
key_stats_row
)
core_rows[[length(core_rows) + 1]] <- core_row
paradata_events <- paradata_summary$long
if (nrow(paradata_events) > 0) {
paradata_events_rows[[length(paradata_events_rows) + 1]] <- paradata_events
}
if (length(paradata_summary$wide_cols) > 0) {
paradata_wide_cols_rows[[length(paradata_wide_cols_rows) + 1]] <- tibble::tibble(
PROLIFIC_PID = pid,
result_id = result_id,
field = paradata_summary$wide_cols,
output_field = paradata_summary$wide_cols_output,
destination = "wide"
)
}
if (length(paradata_summary$long_cols) > 0) {
paradata_long_cols_rows[[length(paradata_long_cols_rows) + 1]] <- tibble::tibble(
PROLIFIC_PID = pid,
result_id = result_id,
field = paradata_summary$long_cols,
output_field = NA_character_,
destination = "long"
)
}
defocus_list <- if ("para_defocuscount" %in% names(df)) last_non_empty(df$para_defocuscount) else NULL
defocus_events <- build_defocus_events(pid, result_id, defocus_list)
if (nrow(defocus_events) > 0) {
paradata_defocus_rows[[length(paradata_defocus_rows) + 1]] <- defocus_events
}
}[1] 1
[1] 2
[1] 3
[1] 4
[1] 5
[1] 6
[1] 7
[1] 8
[1] 9
[1] 10
[1] 11
[1] 12
[1] 13
[1] 14
[1] 15
[1] 16
[1] 17
[1] 18
[1] 19
[1] 20
[1] 21
[1] 22
[1] 23
[1] 24
[1] 25
[1] 26
[1] 27
[1] 28
[1] 29
[1] 30
[1] 31
[1] 32
[1] 33
[1] 34
[1] 35
[1] 36
[1] 37
[1] 38
[1] 39
[1] 40
[1] 41
[1] 42
[1] 43
[1] 44
[1] 45
[1] 46
[1] 47
[1] 48
[1] 49
[1] 50
[1] 51
[1] 52
[1] 53
[1] 54
[1] 55
[1] 56
[1] 57
[1] 58
[1] 59
[1] 60
[1] 61
[1] 62
[1] 63
[1] 64
[1] 65
[1] 66
[1] 67
[1] 68
[1] 69
[1] 70
[1] 71
[1] 72
[1] 73
[1] 74
[1] 75
[1] 76
[1] 77
[1] 78
[1] 79
[1] 80
[1] 81
[1] 82
[1] 83
[1] 84
[1] 85
[1] 86
[1] 87
[1] 88
core_df <- dplyr::bind_rows(core_rows)
paradata_events_long <- dplyr::bind_rows(paradata_events_rows)
paradata_defocus_long <- dplyr::bind_rows(paradata_defocus_rows)
paradata_column_map <- dplyr::bind_rows(paradata_wide_cols_rows, paradata_long_cols_rows)
if (nrow(paradata_events_long) == 0) {
paradata_events_long <- tibble::tibble(
PROLIFIC_PID = character(),
result_id = character(),
origin = character(),
event_type = character(),
event_index = integer(),
timestamp = character(),
component = character(),
field = character(),
value = character(),
mouse_x = double(),
mouse_y = double(),
scroll_x = double(),
scroll_y = double(),
key_interval_ms = double(),
clipboard_type = character(),
clipboard_text = character(),
clipboard_length = double(),
target_tag = character(),
target_type = character(),
target_id = character(),
target_name = character(),
target_className = character()
)
}
if (nrow(paradata_defocus_long) == 0) {
paradata_defocus_long <- tibble::tibble(
PROLIFIC_PID = character(),
result_id = character(),
origin = character(),
event_index = integer(),
durationblur = double(),
senderblur = character()
)
}
if (nrow(paradata_column_map) == 0) {
paradata_column_map <- tibble::tibble(
PROLIFIC_PID = character(),
result_id = character(),
field = character(),
output_field = character(),
destination = character()
)
}
core_df <- core_df %>%
left_join(metadata_small, by = "result_id") %>%
left_join(prolific_demographic, by = "PROLIFIC_PID")9 filter to complete datasets
optional_text <- c("clearUtopiatext", "biasUtopiatext", "protoReasonText", "feedback_critic")
required_cols <- setdiff(
c("PROLIFIC_PID",
core_vars,
"assoc1_response", "assoc2_response", "assoc3_response", "assoc4_response", "assoc5_response",
"assoc1_valence", "assoc2_valence", "assoc3_valence", "assoc4_valence", "assoc5_valence"
),
optional_text
)
table(core_df$State)
FINISHED PRE
69 19
# clean_complete <- core_df
clean_complete <- core_df %>%
filter(State == "FINISHED")
dim(clean_complete)[1] 69 182
table(clean_complete$condition_FutureSociety)
aicentered anarchic futurist lawBased moderngreen primitivist
6 14 16 7 10 7
religious
8
to_numeric <- function(x) {
if (is.numeric(x)) return(x)
suppressWarnings(as.numeric(as.character(x)))
}
to_logical <- function(x) {
if (is.logical(x)) return(x)
if (is.factor(x)) x <- as.character(x)
as.logical(x)
}
num_cols <- c(
"counterVisualTraps",
"para_countclicks",
"visualTrap_Planets_rtMs",
"visualTrap_Collision_rtMs",
"visualTrap_CafeWall_rtMs",
"visualTrap_Circles_rtMs",
"visualTrap_Lines_rtMs",
"visualTrap_Robot_rtMs",
"AR-1",
"AR-2",
"AR-3",
"AR-4",
"AR-5",
"clearUtopia",
"biasUtopia",
"attributesFutureSociety-1",
"attributesFutureSociety-2",
"attributesFutureSociety-3",
"attributesFutureSociety-4",
"attributesFutureSociety-5",
"attributesFutureSociety-6",
"attributesFutureSociety-7",
"attributesFutureSociety-8",
"protoConfidenceRadio",
"assoc1_valence",
"assoc2_valence",
"assoc3_valence",
"assoc4_valence",
"assoc5_valence",
"keyStats_count",
"keyStats_mean",
"keyStats_median",
"keyStats_min",
"keyStats_max",
"duration_seconds",
"socidem_Age"
)
bool_cols <- c(
"hp_exclusionCriteria",
"visualTrap_Planets_correct",
"visualTrap_Collision_correct",
"visualTrap_CafeWall_correct",
"visualTrap_Circles_correct",
"visualTrap_Lines_correct",
"visualTrap_Robot_correct"
)
factor_cols <- c(
"dummy_informedconsent",
"commCheck",
"condition_FutureSociety",
"cue_coding",
"utopiaPrototypeAssignment",
"visualTrapResponse",
"visualTrap_Planets_response",
"visualTrap_Collision_response",
"visualTrap_CafeWall_response",
"visualTrap_Circles_response",
"visualTrap_Lines_response",
"visualTrap_Robot_response",
"socidem_Sex",
"socidem_Ethnicity.simplified",
"socidem_Student.status",
"socidem_Employment.status"
)
clean_complete <- clean_complete %>%
mutate(
across(any_of(bool_cols), ~ to_logical(.)),
across(any_of(num_cols), ~ to_numeric(.)),
across(any_of(factor_cols), ~ as.factor(.))
)10 save outputs
dir.create(questionnaire_output_dir, recursive = TRUE, showWarnings = FALSE)
dir.create(paradata_output_dir, recursive = TRUE, showWarnings = FALSE)
saveRDS(clean_complete, file = file.path(questionnaire_output_dir, "clean_complete_wide.rds"))
writexl::write_xlsx(x = clean_complete, path = file.path(questionnaire_output_dir, "clean_complete_wide.xlsx"))
write.csv(clean_complete, file = file.path(questionnaire_output_dir, "clean_complete_wide.csv"), row.names = FALSE)
saveRDS(paradata_events_long, file = file.path(paradata_output_dir, "paradata_events_long.rds"))
writexl::write_xlsx(x = paradata_events_long, path = file.path(paradata_output_dir, "paradata_events_long.xlsx"))
write.csv(paradata_events_long, file = file.path(paradata_output_dir, "paradata_events_long.csv"), row.names = FALSE)
saveRDS(paradata_defocus_long, file = file.path(paradata_output_dir, "paradata_defocus_long.rds"))
writexl::write_xlsx(x = paradata_defocus_long, path = file.path(paradata_output_dir, "paradata_defocus_long.xlsx"))
write.csv(paradata_defocus_long, file = file.path(paradata_output_dir, "paradata_defocus_long.csv"), row.names = FALSE)
writexl::write_xlsx(x = clean_complete[, c("PROLIFIC_PID", "condition_FutureSociety","clearUtopia", "clearUtopiatext", "biasUtopia", "biasUtopiatext", "protoConfidenceRadio", "protoReasonText")], path = file.path(questionnaire_output_dir, "clean_complete_wide_text.xlsx"))11 Summary tables
paradata_column_summary <- paradata_column_map %>%
group_by(destination, field, output_field) %>%
summarise(n_participants = dplyr::n_distinct(PROLIFIC_PID), .groups = "drop") %>%
arrange(destination, field)
paradata_participant_summary <- core_df %>%
select(PROLIFIC_PID, result_id, State, duration_seconds) %>%
distinct() %>%
left_join(paradata_events_long %>% count(PROLIFIC_PID, name = "paradata_event_rows"), by = "PROLIFIC_PID") %>%
left_join(paradata_defocus_long %>% count(PROLIFIC_PID, name = "defocus_event_rows"), by = "PROLIFIC_PID") %>%
left_join(paradata_column_map %>% filter(destination == "wide") %>% count(PROLIFIC_PID, name = "paradata_wide_fields"), by = "PROLIFIC_PID") %>%
left_join(paradata_column_map %>% filter(destination == "long") %>% count(PROLIFIC_PID, name = "paradata_long_fields"), by = "PROLIFIC_PID") %>%
mutate(
paradata_event_rows = tidyr::replace_na(paradata_event_rows, 0),
defocus_event_rows = tidyr::replace_na(defocus_event_rows, 0),
paradata_wide_fields = tidyr::replace_na(paradata_wide_fields, 0),
paradata_long_fields = tidyr::replace_na(paradata_long_fields, 0)
)
paradata_column_summary# A tibble: 93 × 4
destination field output_field n_participants
<chr> <chr> <chr> <int>
1 long behavior.clipboardEvents <NA> 10
2 long behavior.mouseEvents <NA> 84
3 long behavior.scrollEvents <NA> 84
4 long meta.updatedAt <NA> 84
5 long static.network.downlink <NA> 9
6 long static.network.effectiveType <NA> 3
7 long static.network.rtt <NA> 19
8 long static.timestamp <NA> 84
9 long static.viewport.devicePixelRatio <NA> 2
10 long static.viewport.innerHeight <NA> 12
# ℹ 83 more rows
paradata_participant_summary# A tibble: 84 × 8
PROLIFIC_PID result_id State duration_seconds paradata_event_rows
<chr> <chr> <chr> <dbl> <int>
1 6980f140677b5e0cbec2561d 15245 FINI… 538 429
2 69d46a112c7ba62da0198176 15246 FINI… 579 724
3 5f888f1504ea37030833e9ec 15247 FINI… 1561 667
4 62d43cee3d60ac98c1dcacc8 15248 FINI… 1062 1018
5 583ef6f5ad2f4300014b358d 15249 FINI… 871 504
6 69ced52e0c7bfe72777b1f89 15250 FINI… 607 684
7 66ec53e096df305784c5aa6f 15251 FINI… 1324 308
8 69b86e94de9c503e9a58411d 15252 FINI… 504 443
9 697ca395ee14230291313ff7 15253 FINI… 587 803
10 60f657824394dac9902c821f 15255 PRE 243 246
# ℹ 74 more rows
# ℹ 3 more variables: defocus_event_rows <int>, paradata_wide_fields <int>,
# paradata_long_fields <int>
12 Summary statistics
summary_stats_df <- clean_complete %>%
select(
PROLIFIC_PID,
duration_seconds,
keyStats_count,
keyStats_mean,
keyStats_median,
keyStats_min,
keyStats_max
) %>%
left_join(
paradata_participant_summary %>%
select(PROLIFIC_PID, paradata_event_rows, defocus_event_rows, paradata_wide_fields, paradata_long_fields),
by = "PROLIFIC_PID"
) %>%
mutate(
paradata_event_rows = tidyr::replace_na(paradata_event_rows, 0),
defocus_event_rows = tidyr::replace_na(defocus_event_rows, 0),
paradata_wide_fields = tidyr::replace_na(paradata_wide_fields, 0),
paradata_long_fields = tidyr::replace_na(paradata_long_fields, 0)
)
psych::describe(summary_stats_df %>% select(where(is.numeric))) vars n mean sd median trimmed mad
duration_seconds 1 69 924.58 492.08 819.00 877.49 455.16
keyStats_count 2 69 514.55 328.10 395.00 470.81 201.63
keyStats_mean 3 69 963.88 671.07 773.74 870.03 547.06
keyStats_median 4 69 208.15 95.48 183.95 196.63 66.05
keyStats_min 5 69 22.40 34.76 6.60 15.28 9.49
keyStats_max 6 69 120610.58 141136.65 65536.60 93808.39 61007.95
paradata_event_rows 7 69 738.38 396.62 667.00 677.18 244.63
defocus_event_rows 8 69 1.06 1.82 0.00 0.68 0.00
paradata_wide_fields 9 69 78.59 12.96 78.00 76.74 1.48
paradata_long_fields 10 69 63.77 30.24 46.00 58.58 2.97
min max range skew kurtosis se
duration_seconds 194.00 2149.00 1955.0 0.82 -0.14 59.24
keyStats_count 3.00 1714.00 1711.0 1.47 2.18 39.50
keyStats_mean 227.77 3031.07 2803.3 1.23 0.84 80.79
keyStats_median 40.20 523.80 483.6 1.30 1.85 11.49
keyStats_min 0.00 179.70 179.7 2.30 5.72 4.18
keyStats_max 1197.70 738032.00 736834.3 2.32 5.89 16990.86
paradata_event_rows 253.00 2267.00 2014.0 1.79 3.61 47.75
defocus_event_rows 0.00 9.00 9.0 2.04 4.23 0.22
paradata_wide_fields 69.00 152.00 83.0 5.24 26.82 1.56
paradata_long_fields 42.00 150.00 108.0 1.36 0.46 3.64
12.1 descriptive sample
socidem_df <- clean_complete %>%
select(starts_with("socidem_"))
psych::describe(socidem_df %>% select(where(is.numeric))) vars n mean sd median trimmed mad min
socidem_Time.taken 1 69 959.57 512.36 832 905.11 443.30 200
socidem_Total.approvals 2 69 2064.29 2744.69 976 1557.72 1365.47 1
socidem_Age 3 69 29.29 4.39 30 29.60 5.93 20
max range skew kurtosis se
socidem_Time.taken 2276 2076 0.88 -0.06 61.68
socidem_Total.approvals 12802 12801 1.91 3.76 330.42
socidem_Age 35 15 -0.44 -0.94 0.53
table(socidem_df$socidem_Sex)
Female Male
28 41
table(socidem_df$socidem_Employment.status)
DATA_EXPIRED
12
Due to start a new job within the next month
2
Full-Time
27
Not in paid work (e.g. homemaker', 'retired or disabled)
6
Other
1
Part-Time
15
Unemployed (and job seeking)
6
12.2 descriptive mean differences
assoc_cols <- c("assoc1_valence", "assoc2_valence", "assoc3_valence", "assoc4_valence", "assoc5_valence")
attr_cols <- paste0("attributesFutureSociety-", 1:8)
clean_complete <- clean_complete %>%
mutate(
assoc_valence_mean = rowMeans(select(., all_of(assoc_cols)), na.rm = TRUE),
attributes_mean = rowMeans(select(., all_of(attr_cols)), na.rm = TRUE)
)
analysis_df <- clean_complete %>%
select(PROLIFIC_PID, condition_FutureSociety, assoc_valence_mean, attributes_mean, all_of(attr_cols)) %>%
drop_na(assoc_valence_mean, attributes_mean, condition_FutureSociety)
mean_summary <- analysis_df %>%
pivot_longer(
cols = c(assoc_valence_mean, attributes_mean),
names_to = "measure",
values_to = "value"
) %>%
group_by(condition_FutureSociety, measure) %>%
summarise(
mean = mean(value, na.rm = TRUE),
sd = sd(value, na.rm = TRUE),
n = dplyr::n(),
se = sd / sqrt(n),
.groups = "drop"
) %>%
mutate(
measure = recode(
measure,
assoc_valence_mean = "Association Valence Mean",
attributes_mean = "Attributes Mean"
)
)
ggplot(mean_summary, aes(x = condition_FutureSociety, y = mean, fill = measure)) +
geom_col(position = position_dodge(width = 0.8), width = 0.7) +
geom_errorbar(
aes(ymin = mean - se, ymax = mean + se),
position = position_dodge(width = 0.8),
width = 0.2
) +
labs(x = "Condition", y = "Mean", fill = "Measure") +
theme_minimal()anova_assoc <- afex::aov_ez(
id = "PROLIFIC_PID",
dv = "assoc_valence_mean",
between = "condition_FutureSociety",
data = analysis_df
)Contrasts set to contr.sum for the following variables: condition_FutureSociety
anova_attr <- afex::aov_ez(
id = "PROLIFIC_PID",
dv = "attributes_mean",
between = "condition_FutureSociety",
data = analysis_df
)Contrasts set to contr.sum for the following variables: condition_FutureSociety
summary(anova_assoc)Anova Table (Type 3 tests)
Response: assoc_valence_mean
num Df den Df MSE F ges Pr(>F)
condition_FutureSociety 6 61 2.7046 2.6343 0.20579 0.02451 *
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(anova_attr)Anova Table (Type 3 tests)
Response: attributes_mean
num Df den Df MSE F ges Pr(>F)
condition_FutureSociety 6 61 1.4635 3.1599 0.23711 0.009164 **
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
attr_labels <- c(
"1" = "Utopian",
"2" = "Desirable",
"3" = "Ideal",
"4" = "Beneficial for the greater good",
"5" = "Imaginative",
"6" = "Innovative",
"7" = "Creative",
"8" = "Possible"
)
attr_long <- analysis_df %>%
pivot_longer(
cols = all_of(attr_cols),
names_to = "item",
values_to = "value"
) %>%
mutate(
item_num = stringr::str_extract(item, "[0-9]+"),
item_label = recode(item_num, !!!attr_labels)
)
attr_effects <- attr_long %>%
filter(!is.na(value)) %>%
group_by(item_label) %>%
summarise(
eta_sq = {
fit <- aov(value ~ condition_FutureSociety)
ss <- summary(fit)[[1]]
ss_between <- ss["condition_FutureSociety", "Sum Sq"]
ss_total <- sum(ss[, "Sum Sq"])
if (is.na(ss_between) || is.na(ss_total) || ss_total == 0) NA_real_ else ss_between / ss_total
},
.groups = "drop"
) %>%
mutate(
eta_label = ifelse(is.na(eta_sq), "eta^2=NA", paste0("eta^2=", round(eta_sq, 2)))
)
attr_effects_table <- attr_effects %>%
arrange(desc(eta_sq))
attr_effects_table# A tibble: 8 × 3
item_label eta_sq eta_label
<chr> <dbl> <chr>
1 Innovative 0.251 eta^2=0.25
2 Desirable 0.227 eta^2=0.23
3 Ideal 0.220 eta^2=0.22
4 Utopian 0.208 eta^2=0.21
5 Beneficial for the greater good 0.176 eta^2=0.18
6 Creative 0.169 eta^2=0.17
7 Imaginative 0.159 eta^2=0.16
8 Possible 0.0144 eta^2=0.01
item_order <- attr_effects %>%
mutate(eta_order = ifelse(is.na(eta_sq), -Inf, eta_sq)) %>%
arrange(desc(eta_order)) %>%
pull(item_label)
label_order <- attr_effects %>%
mutate(eta_order = ifelse(is.na(eta_sq), -Inf, eta_sq)) %>%
arrange(desc(eta_order)) %>%
mutate(facet_label = paste0(item_label, "\n", eta_label)) %>%
pull(facet_label)
attr_summary <- attr_long %>%
group_by(condition_FutureSociety, item_label) %>%
summarise(
mean = mean(value, na.rm = TRUE),
sd = sd(value, na.rm = TRUE),
n = dplyr::n(),
se = sd / sqrt(n),
.groups = "drop"
) %>%
left_join(attr_effects %>% select(item_label, eta_label), by = "item_label") %>%
mutate(
item_label = factor(item_label, levels = item_order),
facet_label = factor(paste0(item_label, "\n", eta_label), levels = label_order)
)
condition_order <- analysis_df %>%
distinct(condition_FutureSociety) %>%
arrange(condition_FutureSociety) %>%
pull(condition_FutureSociety)
attr_summary <- attr_summary %>%
mutate(condition_FutureSociety = factor(condition_FutureSociety, levels = condition_order))
ggplot(attr_summary, aes(x = condition_FutureSociety, y = mean, fill = condition_FutureSociety)) +
geom_col(width = 0.7) +
geom_errorbar(aes(ymin = mean - se, ymax = mean + se), width = 0.2) +
geom_text(aes(label = round(mean, 2)), vjust = -0.3, size = 3) +
facet_wrap(~ facet_label, ncol = 4) +
labs(x = "Condition", y = "Mean", fill = "Condition") +
theme_minimal() +
theme(legend.position = "bottom", axis.text.x = element_text(angle = 30, hjust = 1))tmp <- clean_complete[, str_subset(string = colnames(clean_complete), pattern = "attri.*[1-9]")]
psych::corPlot(r = cor(tmp, use = "pairwise"))hist(clean_complete$`attributesFutureSociety-8`)hist(clean_complete$`attributesFutureSociety-4`)tmp_efa <- psych::fa.parallel(x = tmp)Parallel analysis suggests that the number of factors = 2 and the number of components = 1
tmp_fa <- psych::fa(r = cor(tmp, use = "pairwise"), nfactors = 2, n.obs = nrow(tmp))Loading required namespace: GPArotation
tmp_faFactor Analysis using method = minres
Call: psych::fa(r = cor(tmp, use = "pairwise"), nfactors = 2, n.obs = nrow(tmp))
Standardized loadings (pattern matrix) based upon correlation matrix
MR1 MR2 h2 u2 com
attributesFutureSociety-5 -0.10 0.95 0.793 0.207 1.0
attributesFutureSociety-6 0.25 0.57 0.576 0.424 1.4
attributesFutureSociety-2 0.97 -0.03 0.911 0.089 1.0
attributesFutureSociety-3 0.96 -0.01 0.917 0.083 1.0
attributesFutureSociety-4 0.68 0.18 0.661 0.339 1.1
attributesFutureSociety-7 0.07 0.91 0.914 0.086 1.0
attributesFutureSociety-1 0.27 0.43 0.412 0.588 1.7
attributesFutureSociety-8 0.23 -0.01 0.052 0.948 1.0
MR1 MR2
SS loadings 2.75 2.49
Proportion Var 0.34 0.31
Cumulative Var 0.34 0.65
Proportion Explained 0.52 0.48
Cumulative Proportion 0.52 1.00
With factor correlations of
MR1 MR2
MR1 1.00 0.65
MR2 0.65 1.00
Mean item complexity = 1.1
Test of the hypothesis that 2 factors are sufficient.
df null model = 28 with the objective function = 6.38 with Chi Square = 411.24
df of the model are 13 and the objective function was 0.41
The root mean square of the residuals (RMSR) is 0.03
The df corrected root mean square of the residuals is 0.04
The harmonic n.obs is 69 with the empirical chi square 3.16 with prob < 1
The total n.obs was 69 with Likelihood Chi Square = 25.81 with prob < 0.018
Tucker Lewis Index of factoring reliability = 0.926
RMSEA index = 0.119 and the 90 % confidence intervals are 0.048 0.188
BIC = -29.23
Fit based upon off diagonal values = 1
Measures of factor score adequacy
MR1 MR2
Correlation of (regression) scores with factors 0.94 0.92
Multiple R square of scores with factors 0.87 0.84
Minimum correlation of possible factor scores 0.75 0.68