createRawFiles <- FALSEData preperation all studies
1 Notes
2 global variables
Define your global variables (e.g., to reduce run time):
3 functions
########################################
# from JATOS to table
########################################
### argss:
# dataset = dat_secondPostCAM
# listvars = vec_ques
# notNumeric = vec_notNumeric
# verbose = TRUE
questionnairetype <- function(dataset,
listvars = ques_mixed,
notNumeric = vec_notNumeric,
verbose=FALSE){
datasetques <- data.frame(ID = unique(dataset$ID))
for(c in 1:length(listvars)){
if(verbose){
print(c)
}
if(any(colnames(dataset) == listvars[c])){
if(verbose){
print(listvars[c])
}
## tmp IDs
tmpid <- dataset$ID[!is.na(dataset[, listvars[c]])]
## tmp value variable
tmpvalue <- dataset[, listvars[c]][!is.na(dataset[, listvars[c]])]
datasetques[listvars[c]] <- NA
if(listvars[c] %in% notNumeric){
datasetques[datasetques$ID %in% tmpid, listvars[c]] <- tmpvalue
}else if(is.list(tmpvalue)){
tmpvalue_tmp <- unique(tmpvalue)
tmpvalue <- c()
counter = 1
for(i in 1:length(tmpvalue_tmp)){
if(!is.null(tmpvalue_tmp[[i]])){
tmpvalue[counter] <- paste0(tmpvalue_tmp[[i]], collapse = " - ")
counter = counter + 1
}
}
datasetques[datasetques$ID %in% tmpid, listvars[c]] <- tmpvalue
}else{
datasetques[datasetques$ID %in% tmpid, listvars[c]] <- as.numeric(tmpvalue)
}
}
}
return(datasetques)
}4 load packages
### load packages
require(pacman)
p_load('tidyverse', 'jsonlite',
'stargazer', 'DT', 'psych',
'writexl')
# devtools::install_github("samuelae/associatoR")5 create raw data files
if(createRawFiles){
# Define root paths
source_root <- "../../data"
output_dir <- "outputs/raw"
# Define studies with paths
studies <- list(
study1 = file.path(source_root, "study_1"),
study2_BMI_high = file.path(source_root, "study_2", "BMI_high"),
study2_BMI_mixed = file.path(source_root, "study_2", "BMI_mixed")
)
# Loop over each study
for (study_name in names(studies)) {
study_path <- studies[[study_name]]
# Get folders matching pattern "study_result*"
tmp_folders <- list.files(study_path, pattern = "^study_result.*", full.names = TRUE)
results <- list()
for (folder in tmp_folders) {
# Look for a single subdirectory inside the folder
inner_folders <- list.files(folder, full.names = TRUE)
if (length(inner_folders) == 1) {
data_file <- file.path(inner_folders[1], "data.txt")
if (file.exists(data_file)) {
# Load and store the JSON data
tmp <- fromJSON(data_file)
results[[length(results) + 1]] <- tmp
}
}
}
# Save as RDS
saveRDS(results, file = file.path(output_dir, paste0(study_name, ".rds")))
# Optionally also save as JSON Lines (.jsonl)
json_lines <- map_chr(results, ~ toJSON(.x, auto_unbox = TRUE))
writeLines(json_lines, con = file.path(output_dir, paste0(study_name, ".jsonl")))
}
}else{
print("Raw files have been created in previous run!")
}[1] "Raw files have been created in previous run!"
6 prepare data questionnaires
6.1 study 1
load data:
output_dir <- "outputs/raw"
# Load the RDS files into a named list
study1 <- readRDS(file.path(output_dir, "study1.rds"))
# Combine into single data.frame
suppressMessages({
dat_study1 <- bind_rows(study1)
})
rm(study1)counter variable:
dat_study1$ID <- cumsum(dat_study1$sender == "Greetings" & !is.na(dat_study1$sender))
table(dat_study1$ID)
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
5 2291 2 2291 5 2230 2291 2291 2291 2291 2291 2291 2291 2291 5 2291
17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32
2291 2291 2291 2291 2291 2291 2291 2230 5 2291 5 2291 2291 5 2291 2291
33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48
2291 2291 2291 2291 2230 2291 2291 2291 2291 2261 2 2291 2291 2291 2291 2291
49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64
2291 2291 2291 2291 2291 2291 2291 2291 2291 2291 2291 2291 2 2291 2291 2291
65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80
2291 2291 2291 2 5 2291 2291 2291 2291 5 2291 2 2291 2291 2291 2291
81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96
2291 2291 2 2291 2291 2291 2291 2291 2291 2291 2291 5 2291 2291 2291 2291
97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112
2291 2291 2291 2291 2291 2291 2291 2291 2291 2291 2291 2291 2291 2291 2291 2291
113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128
2291 5 2291 2291 2291 2291 2291 2291 2291 2291 2 2291 5 2291 5 2291
129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144
2291 2291 2 2291 2291 2291 2291 5 5 2291 2291 5 5 2291 2291 2291
145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160
2291 2291 2291 2291 2291 2 2291 2291 5 2 2291 2291 2291 2291 2291 2291
161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176
2 2291 2291 2291 2291 2291 2291 2291 2291 2291 2291 2291 2291 2291 2291 2291
177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192
5 2291 2 2291 2291 2291 2291 2291 2291 2291 2291 2291 2291 2291 2291 2291
193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208
2291 2291 2291 2 2291 2291 5 2291 2291 2291 2291 2291 2291 2 2291 2291
209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224
2291 5 2291 2291 2291 2291 2291 2291 2291 2291 2291 2291 2291 2291 2291 2291
225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240
2291 2291 2291 2291 5 5 2291 2291 2291 5 2291 2291 2291 2291 5 2291
241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256
2291 5 2291 2291 2291 2291 2291 2291 2291 2291 2291 2291 5 2291 2291 2291
257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272
2291 2291 2291 2291 5 2291 2291 2291 2291 2291 5 2291 5 2 2291 2291
273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288
2291 2291 2291 2291 2291 2291 2291 2291 2291 2291 2291 2291 2291 2291 2291 2291
289
5
filter:
sum(table(dat_study1$ID) != max(table(dat_study1$ID)))[1] 49
sum(table(dat_study1$ID) == max(table(dat_study1$ID)))[1] 240
length(unique(dat_study1$ID))[1] 289
dat_study1 <-
dat_study1[dat_study1$ID %in% names(table(dat_study1$ID))[table(dat_study1$ID) == max(table(dat_study1$ID))], ]
length(unique(dat_study1$ID))[1] 240
questionnaire:
some elements are lists:
colnames(dat_study1)[sapply(dat_study1, is.list)][1] "url" "meta" "key_assignment"
[4] "para_defocuscount"
add meta manually and remove it:
meta_df <- dat_study1$meta[, c("language", "screen_width", "screen_height", "userAgent")]
dat_study1 <- bind_cols(dat_study1, meta_df)New names:
• `...7` -> `...157`
rm(meta_df)
dat_study1$meta <- NULLcreate questionnaire:
vec_notNumeric <- c("PROLIFIC_PID",
"sociodemo_residency", "sociodemo_gender", "sociodemo_sexualOrientation",
"sociodemo_weight_type", "sociodemo_height_type",
"feedback_critic",
"language", "screen_width", "screen_height", "userAgent",
str_subset(string = colnames(dat_study1), pattern = "^normalweight_R"),
str_subset(string = colnames(dat_study1), pattern = "^underweight_R"),
str_subset(string = colnames(dat_study1), pattern = "^obesity_R"),
str_subset(string = colnames(dat_study1), pattern = "^overweight_R"))
### get survey
vec_ques <- c("PROLIFIC_PID",
"dummy_informedconsent",
"commCheck", "attCheck", "feedback_conscientiousCompletion",
sort(str_subset(string = colnames(dat_study1), pattern = "^pair")),
sort(str_subset(string = colnames(dat_study1), pattern = "^PathogenDisgus")),
sort(str_subset(string = colnames(dat_study1), pattern = "^GermAversion")),
sort(str_subset(string = colnames(dat_study1), pattern = "^BeliefsAboutObesePersons")),
sort(str_subset(string = colnames(dat_study1), pattern = "^AttitudeTowardsObesePeople")),
sort(str_subset(string = colnames(dat_study1), pattern = "^PerceivedCausesofObesity")),
sort(str_subset(string = colnames(dat_study1), pattern = "^sociodemo")),
sort(str_subset(string = colnames(dat_study1), pattern = "^normalweight")),
sort(str_subset(string = colnames(dat_study1), pattern = "^underweigh")),
sort(str_subset(string = colnames(dat_study1), pattern = "^obesity")),
sort(str_subset(string = colnames(dat_study1), pattern = "^overweight")),
vec_notNumeric)
ques_study1 <- questionnairetype(
dataset = dat_study1,
listvars = vec_ques,
notNumeric = vec_notNumeric,
verbose = FALSE
)
dim(ques_study1)[1] 240 130
setwd("outputs/questionnaire")
saveRDS(ques_study1, file = paste0("ques_study1", ".rds"))6.2 study 2 - BMI high
load data:
output_dir <- "outputs/raw"
# Load the RDS files into a named list
study2_BMI_high <- readRDS(file.path(output_dir, "study2_BMI_high.rds"))
# Combine into single data.frame
suppressMessages({
dat_study2_BMI_high <- bind_rows(study2_BMI_high)
})
rm(study2_BMI_high)counter variable:
dat_study2_BMI_high$ID <- cumsum(dat_study2_BMI_high$sender == "Greetings" & !is.na(dat_study2_BMI_high$sender))
table(dat_study2_BMI_high$ID)
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
2922 2922 2922 2922 2922 2922 2922 2922 2922 2922 2922 4 2922 2922 2922 2922
17 18 19 20 21 22 23 24 25 26 27
2922 4 4 2922 2922 2922 2922 2922 2922 2922 2920
filter:
sum(table(dat_study2_BMI_high$ID) != max(table(dat_study2_BMI_high$ID)))[1] 4
sum(table(dat_study2_BMI_high$ID) == max(table(dat_study2_BMI_high$ID)))[1] 23
length(unique(dat_study2_BMI_high$ID))[1] 27
dat_study2_BMI_high <-
dat_study2_BMI_high[dat_study2_BMI_high$ID %in% names(table(dat_study2_BMI_high$ID))[table(dat_study2_BMI_high$ID) == max(table(dat_study2_BMI_high$ID))], ]
length(unique(dat_study2_BMI_high$ID))[1] 23
questionnaire:
some elements are lists:
colnames(dat_study2_BMI_high)[sapply(dat_study2_BMI_high, is.list)][1] "url" "meta"
[3] "para_defocuscount" "key_assignment"
[5] "unsucsessfulAssociations" "sucsessfulAssociations"
add meta manually and remove it:
meta_df <- dat_study2_BMI_high$meta[, c("language", "screen_width", "screen_height", "userAgent")]
dat_study2_BMI_high <- bind_cols(dat_study2_BMI_high, meta_df)New names:
• `...6` -> `...131`
rm(meta_df)
dat_study2_BMI_high$meta <- NULLcreate questionnaire:
vec_notNumeric <- c("PROLIFIC_PID", "study_condition",
"sociodemo_residency", "sociodemo_gender",
"sociodemo_weight_type", "sociodemo_height_type",
"feedback_critic",
"language", "screen_width", "screen_height", "userAgent")
### get survey
vec_ques <- c("PROLIFIC_PID", "study_condition",
"dummy_informedconsent",
"commCheck", "attCheck", "feedback_conscientiousCompletion",
sort(str_subset(string = colnames(dat_study2_BMI_high), pattern = "^pair")),
sort(str_subset(string = colnames(dat_study2_BMI_high), pattern = "^PathogenDisgus")),
sort(str_subset(string = colnames(dat_study2_BMI_high), pattern = "^GermAversion")),
sort(str_subset(string = colnames(dat_study2_BMI_high), pattern = "^BeliefsAboutObesePersons")),
sort(str_subset(string = colnames(dat_study2_BMI_high), pattern = "^AttitudeTowardsObesePeople")),
sort(str_subset(string = colnames(dat_study2_BMI_high), pattern = "^PerceivedCausesofObesity")),
sort(str_subset(string = colnames(dat_study2_BMI_high), pattern = "^sociodemo")),
"weight_avatar_choice1", "weight_avatar_choice2",
vec_notNumeric)
ques_study2_BMI_high <- questionnairetype(
dataset = dat_study2_BMI_high,
listvars = vec_ques,
notNumeric = vec_notNumeric,
verbose = FALSE
)
dim(ques_study2_BMI_high)[1] 23 95
setwd("outputs/questionnaire")
saveRDS(ques_study2_BMI_high, file = paste0("ques_study2_BMI_high", ".rds"))6.3 study 2 - BMI mixed
load data:
output_dir <- "outputs/raw"
# Load the RDS files into a named list
study2_BMI_mixed <- readRDS(file.path(output_dir, "study2_BMI_mixed.rds"))
# Combine into single data.frame
suppressMessages({
dat_study2_BMI_mixed <- bind_rows(study2_BMI_mixed)
})
rm(study2_BMI_mixed)counter variable:
dat_study2_BMI_mixed$ID <- cumsum(dat_study2_BMI_mixed$sender == "Greetings" & !is.na(dat_study2_BMI_mixed$sender))
table(dat_study2_BMI_mixed$ID)
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
2920 2920 2920 2920 2920 2920 2 2920 2920 2 2 2920 2920 2920 2920 2920
17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32
2920 2920 2920 2920 2920 2920 2920 2920 2920 2920 2920 2920 2920 2 2920 2920
33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48
2920 2 2920 2920 2920 2920 2 2920 2 2920 2920 2920 2 2920 2 2
49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64
2920 2920 2920 2920 2920 2920 2 2 2920 2920 2920 2920 2920 2920 2920 2920
65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80
2920 2920 2920 2920 2920 2 2920 2920 2920 2920 2 2920 2920 2 2920 2920
81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96
2920 2920 2920 2920 2920 2920 2 2 2920 2 2920 2920 2920 2920 2920 2
97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112
2920 2920 2920 2 2 2 2920 2 2920 2920 2920 2 2920 2920 2 2920
113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128
2 2913 2 2 2920 2 2920 2920 2920 2920 2920 2920 2920 2 2920 2920
129 130 131 132 133 134
2 2 2 2920 2920 2920
filter:
sum(table(dat_study2_BMI_mixed$ID) != max(table(dat_study2_BMI_mixed$ID)))[1] 34
sum(table(dat_study2_BMI_mixed$ID) == max(table(dat_study2_BMI_mixed$ID)))[1] 100
length(unique(dat_study2_BMI_mixed$ID))[1] 134
dat_study2_BMI_mixed <-
dat_study2_BMI_mixed[dat_study2_BMI_mixed$ID %in% names(table(dat_study2_BMI_mixed$ID))[table(dat_study2_BMI_mixed$ID) == max(table(dat_study2_BMI_mixed$ID))], ]
length(unique(dat_study2_BMI_mixed$ID))[1] 100
questionnaire:
some elements are lists:
colnames(dat_study2_BMI_mixed)[sapply(dat_study2_BMI_mixed, is.list)][1] "url" "meta"
[3] "key_assignment" "para_defocuscount"
[5] "unsucsessfulAssociations" "sucsessfulAssociations"
add meta manually and remove it:
meta_df <- dat_study2_BMI_mixed$meta[, c("language", "screen_width", "screen_height", "userAgent")]
dat_study2_BMI_mixed <- bind_cols(dat_study2_BMI_mixed, meta_df)New names:
• `...7` -> `...130`
rm(meta_df)
dat_study2_BMI_mixed$meta <- NULLcreate questionnaire:
vec_notNumeric <- c("PROLIFIC_PID", "study_condition",
"sociodemo_residency", "sociodemo_gender",
"sociodemo_weight_type", "sociodemo_height_type",
"feedback_critic",
"language", "screen_width", "screen_height", "userAgent")
### get survey
vec_ques <- c("PROLIFIC_PID", "study_condition",
"dummy_informedconsent",
"commCheck", "attCheck", "feedback_conscientiousCompletion",
sort(str_subset(string = colnames(dat_study2_BMI_mixed), pattern = "^pair")),
sort(str_subset(string = colnames(dat_study2_BMI_mixed), pattern = "^PathogenDisgus")),
sort(str_subset(string = colnames(dat_study2_BMI_mixed), pattern = "^GermAversion")),
sort(str_subset(string = colnames(dat_study2_BMI_mixed), pattern = "^BeliefsAboutObesePersons")),
sort(str_subset(string = colnames(dat_study2_BMI_mixed), pattern = "^AttitudeTowardsObesePeople")),
sort(str_subset(string = colnames(dat_study2_BMI_mixed), pattern = "^PerceivedCausesofObesity")),
sort(str_subset(string = colnames(dat_study2_BMI_mixed), pattern = "^sociodemo")),
"weight_avatar_choice1", "weight_avatar_choice2",
vec_notNumeric)
ques_study2_BMI_mixed <- questionnairetype(
dataset = dat_study2_BMI_mixed,
listvars = vec_ques,
notNumeric = vec_notNumeric,
verbose = FALSE
)
dim(ques_study2_BMI_mixed)[1] 100 94
add BMI:
functions:
# Define the conversion functions (same as your second block)
convert_weight_to_kg <- function(weight, weight_type) {
if (weight_type == "pounds") {
return(weight * 0.45359237)
} else if (weight_type == "kilograms") {
return(weight)
} else {
return(NA_real_)
}
}
convert_height_to_cm <- function(height_cm, height_ft, height_ftin, height_type) {
if (height_type == "centimeters") {
return(height_cm)
} else if (height_type == "feet-inches") {
feet_in_inches <- height_ft
inches <- height_ftin
return(feet_in_inches * 30.48 + inches * 2.54)
} else {
return(NA_real_)
}
}
# Apply the computation using mutate + mapply
ques_study2_BMI_mixed <- ques_study2_BMI_mixed %>%
mutate(
weight_kg = mapply(convert_weight_to_kg,
weight = sociodemo_weight,
weight_type = sociodemo_weight_type),
height_cm_val = mapply(convert_height_to_cm,
height_cm = sociodemo_height_cm,
height_ft = sociodemo_height_ft,
height_ftin = sociodemo_height_ftin,
height_type = sociodemo_height_type),
sociodemo_BMI = ifelse(
!is.na(weight_kg) & !is.na(height_cm_val),
weight_kg / ((height_cm_val / 100)^2),
NA_real_
)
)add BMI - ques_study2_BMI_mixed:
# Apply the computation using mutate + mapply
ques_study2_BMI_mixed <- ques_study2_BMI_mixed %>%
mutate(
weight_kg = mapply(convert_weight_to_kg,
weight = sociodemo_weight,
weight_type = sociodemo_weight_type),
height_cm_val = mapply(convert_height_to_cm,
height_cm = sociodemo_height_cm,
height_ft = sociodemo_height_ft,
height_ftin = sociodemo_height_ftin,
height_type = sociodemo_height_type),
sociodemo_BMI = ifelse(
!is.na(weight_kg) & !is.na(height_cm_val),
weight_kg / ((height_cm_val / 100)^2),
NA_real_
)
)
ques_study2_BMI_mixed$sociodemo_BMI[ques_study2_BMI_mixed$weight_kg < 40 | ques_study2_BMI_mixed$height_cm_val < 140 | ques_study2_BMI_mixed$height_cm_val > 230] <- NA
# ques_study2_BMI_mixed$weight_kg[ques_study2_BMI_mixed$sociodemo_BMI < 10]
# ques_study2_BMI_mixed$height_cm_val[ques_study2_BMI_mixed$sociodemo_BMI < 10]add BMI - ques_study1:
ques_study1$sociodemo_height_ft <- NA
ques_study1$sociodemo_height_ftin <- NA
ques_study1 <- ques_study1 %>%
mutate(
weight_kg = mapply(convert_weight_to_kg,
weight = sociodemo_weight,
weight_type = sociodemo_weight_type),
height_cm_val = mapply(convert_height_to_cm,
height_cm = sociodemo_height,
height_ft = sociodemo_height_ft,
height_ftin = sociodemo_height_ftin,
height_type = sociodemo_height_type),
sociodemo_BMI = ifelse(
!is.na(weight_kg) & !is.na(height_cm_val),
weight_kg / ((height_cm_val / 100)^2),
NA_real_
)
)
ques_study1$sociodemo_BMI[ques_study1$weight_kg < 40 | ques_study1$height_cm_val < 140 | ques_study1$height_cm_val > 230] <- NA
# ques_study1$weight_kg[ques_study1$sociodemo_BMI < 10]
# ques_study1$height_cm_val[ques_study1$sociodemo_BMI < 10]summary(ques_study2_BMI_high$sociodemo_BMI) Min. 1st Qu. Median Mean 3rd Qu. Max.
30.04 32.04 37.30 36.88 38.90 61.50
summary(ques_study2_BMI_mixed$sociodemo_BMI) Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
16.04 22.34 24.45 26.56 28.14 53.37 4
summary(ques_study1$sociodemo_BMI) Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
14.04 22.22 24.57 25.93 28.58 48.21 91
setwd("outputs/questionnaire")
saveRDS(ques_study2_BMI_mixed, file = paste0("ques_study2_BMI_mixed", ".rds"))6.4 combine questionnaires
tmp_surveysVars <- c(sort(str_subset(string = colnames(dat_study2_BMI_mixed), pattern = "^pair")),
sort(str_subset(string = colnames(dat_study2_BMI_mixed), pattern = "^PathogenDisgus")),
sort(str_subset(string = colnames(dat_study2_BMI_mixed), pattern = "^GermAversion")),
sort(str_subset(string = colnames(dat_study2_BMI_mixed), pattern = "^BeliefsAboutObesePersons")),
sort(str_subset(string = colnames(dat_study2_BMI_mixed), pattern = "^AttitudeTowardsObesePeople")),
sort(str_subset(string = colnames(dat_study2_BMI_mixed), pattern = "^PerceivedCausesofObesity")))
ques_study1$weight_avatar_choice2 <- NA
ques_study1$source <- "study1"
ques_study2_BMI_high$source <- "study2_BMI_high"
ques_study2_BMI_mixed$source <- "study2_BMI_mixed"
ques_combined <- rbind(ques_study1[, c("PROLIFIC_PID", "source", "sociodemo_age", "sociodemo_gender", "sociodemo_BMI", "weight_avatar_choice2", tmp_surveysVars)],
ques_study2_BMI_high[, c("PROLIFIC_PID", "source", "sociodemo_age", "sociodemo_gender", "sociodemo_BMI","weight_avatar_choice2", tmp_surveysVars)],
ques_study2_BMI_mixed[, c("PROLIFIC_PID", "source", "sociodemo_age", "sociodemo_gender", "sociodemo_BMI", "weight_avatar_choice2", tmp_surveysVars)])
dim(ques_combined)[1] 363 77
names(table(ques_combined$PROLIFIC_PID))[table(ques_combined$PROLIFIC_PID) >= 2][1] "601129f77e0c21000b0c408a"
6.4.1 reverse code items
Fat Phobia Scale:
psych::cor.plot(r = cor(ques_combined[,str_subset(string = colnames(ques_combined), pattern = "^pair")]))# --- 1) Item metadata (from your list) ---
items <- tibble::tribble(
~scale, ~left, ~right,
"pair01", "lazy", "industrious",
"pair02", "no will power", "has will power",
"pair03", "attractive", "unattractive",
"pair04", "good self-control", "poor self-control",
"pair05", "fast", "slow",
"pair06", "having endurance", "having no endurance",
"pair07", "active", "inactive",
"pair08", "weak", "strong",
"pair09", "self-indulgent", "self-sacrificing",
"pair10", "dislikes food", "likes food",
"pair11", "shapeless", "shapely",
"pair12", "undereats", "overeats",
"pair13", "insecure", "secure",
"pair14", "low self-esteem", "high self-esteem"
)
# --- 2) Define which *right* words are "positive" ---
# Edit this list as needed. Only items whose RIGHT word is in this list will be reversed.
positive_right_terms <- c(
"industrious",
"has will power",
"strong",
"self-sacrificing",
"shapely",
"secure",
"high self-esteem"
# NOTE: "likes food" and "overeats" are context-dependent; add/remove if needed.
)
# Items to reverse: those whose RIGHT term is positive
reverse_items <- items$scale[items$right %in% positive_right_terms]
# --- 3) Reverse-code those columns in your data frame df ---
min_val <- 1
max_val <- 5
# Safety check: only reverse variables that actually exist in df
reverse_items_in_df <- intersect(reverse_items, names(ques_combined))
ques_combined[reverse_items_in_df] <- lapply(ques_combined[reverse_items_in_df], function(x) {
(max_val + min_val) - x
})
# --- 4) (Optional) Inspect what was reversed ---
list(
reversed_by_right_positive = reverse_items_in_df,
not_found_in_df = setdiff(reverse_items, reverse_items_in_df)
)$reversed_by_right_positive
[1] "pair01" "pair02" "pair08" "pair09" "pair11" "pair13" "pair14"
$not_found_in_df
character(0)
psych::cor.plot(r = cor(ques_combined[,str_subset(string = colnames(ques_combined), pattern = "^pair")]))Subscale Germ Aversion:
psych::cor.plot(r = cor(ques_combined[,str_subset(string = colnames(ques_combined), pattern = "^GermAversion")]))# with names like GermAversion1, GermAversion5r, GermAversion7r, etc.
min_val <- 1
max_val <- 7
# 1. Identify all GermAversion columns
germ_cols <- str_subset(colnames(ques_combined), pattern = "^GermAversion")
# 2. Find which of these end with "r"
germ_cols_rev <- str_subset(germ_cols, pattern = "r$")
# 3. Reverse code the ones ending in "r"
ques_combined[germ_cols_rev] <- lapply(ques_combined[germ_cols_rev], function(x) {
(max_val + min_val) - x
})psych::cor.plot(r = cor(ques_combined[,str_subset(string = colnames(ques_combined), pattern = "^GermAversion")]))Beliefs About Obese Persons Scale:
psych::cor.plot(r = cor(ques_combined[,str_subset(string = colnames(ques_combined), pattern = "^BeliefsAboutObesePersons")]))min_val <- 1
max_val <- 6
# 1. Identify all BeliefsAboutObesePersons columns
beliefsAboutObe_cols <- str_subset(colnames(ques_combined), pattern = "^BeliefsAboutObesePersons")
# 2. Find which of these end with "r"
beliefsAboutObe_cols_rev <- str_subset(beliefsAboutObe_cols, pattern = "r$")
# 3. Reverse code the ones ending in "r"
ques_combined[beliefsAboutObe_cols_rev] <- lapply(ques_combined[beliefsAboutObe_cols_rev], function(x) {
(max_val + min_val) - x
})psych::cor.plot(r = cor(ques_combined[,str_subset(string = colnames(ques_combined), pattern = "^BeliefsAboutObesePersons")]))Attitude Towards Obese People Scale:
psych::cor.plot(r = cor(ques_combined[,str_subset(string = colnames(ques_combined), pattern = "^AttitudeTowardsObesePeople")]))min_val <- 1
max_val <- 6
# 1. Identify all AttitudeTowardsObesePeople columns
attitudeTowObe_cols <- str_subset(colnames(ques_combined), pattern = "^AttitudeTowardsObesePeople")
# 2. Find which of these end with "r"
attitudeTowObe_cols_rev <- str_subset(attitudeTowObe_cols, pattern = "r$")
# 3. Reverse code the ones ending in "r"
ques_combined[attitudeTowObe_cols_rev] <- lapply(ques_combined[attitudeTowObe_cols_rev], function(x) {
(max_val + min_val) - x
})psych::cor.plot(r = cor(ques_combined[,str_subset(string = colnames(ques_combined), pattern = "^AttitudeTowardsObesePeople")]))6.4.2 save questionnaire
setwd("outputs/questionnaire")
# Save RDS
saveRDS(ques_combined, file = "ques_combined.rds")
# Save CSV
write.csv(ques_combined, file = "ques_combined.csv", row.names = FALSE)
# Save XLSX using writexl
writexl::write_xlsx(ques_combined, path = "ques_combined.xlsx")7 prepare data associations
colnames(dat_study2_BMI_high)[sapply(dat_study2_BMI_high, is.list)][1] "url" "para_defocuscount"
[3] "key_assignment" "unsucsessfulAssociations"
[5] "sucsessfulAssociations"
colnames(dat_study2_BMI_mixed)[sapply(dat_study2_BMI_mixed, is.list)][1] "url" "key_assignment"
[3] "para_defocuscount" "unsucsessfulAssociations"
[5] "sucsessfulAssociations"
Function:
# association_list = sucsessfulAssociations_study2_BMI_mixed
# metadata_df = ques_study2_BMI_mixed
process_association_data <- function(association_list, metadata_df) {
# Initialize output data frame
output_df <- data.frame(
participant_id = character(),
gender = character(),
age = integer(),
BMI = numeric(),
cue = character(),
response = character(),
response_position = integer(),
response_level = integer(),
timestamp = character(),
time_diff_sec = numeric(),
stringsAsFactors = FALSE
)
# Loop through all participants
for (i in seq_along(association_list)) {
# Get associations
assoc <- association_list[[i]]
assoc_L1 <- assoc[1:5, ]
assoc_L2 <- assoc[6:nrow(assoc), ]
# Get participant metadata
meta <- metadata_df[i, ]
pid <- meta$PROLIFIC_PID
gender <- meta$sociodemo_gender
age <- meta$sociodemo_age
BMI <- meta$sociodemo_BMI
avatar <- meta$weight_avatar_choice2
if(nrow(assoc_L1) != 5){
cat("assoc_L1 not 5 for:", pid, "\n")
}
if(nrow(assoc_L2) != 25){
cat("assoc_L2 not 25 but", nrow(assoc_L2), "for:", pid, "\n")
}
# --- Level 1: responses ---
df_level1 <- data.frame(
participant_id = pid,
gender = gender,
age = age,
BMI = BMI,
avatar = avatar,
cue = assoc_L1$cue,
response = assoc_L1$response,
response_position = seq_along(assoc_L1$cue),
response_level = 1,
timestamp = assoc_L1$timestamp,
stringsAsFactors = FALSE
)
# --- Level 2: responses ---
df_level2 <- data.frame(
participant_id = pid,
gender = gender,
age = age,
BMI = BMI,
avatar = avatar,
cue = assoc_L2$cue,
response = assoc_L2$response,
response_position = as.numeric(ave(assoc_L2$cue, assoc_L2$cue, FUN = seq_along)), #rep(1:5, times = 5)
response_level = 2,
timestamp = assoc_L2$timestamp,
stringsAsFactors = FALSE
)
# Combine both levels
combined <- rbind(df_level1, df_level2)
# Convert timestamp and calculate time difference from first response
combined$timestamp <- as.POSIXct(combined$timestamp, format = "%Y-%m-%dT%H:%M:%OSZ", tz = "UTC")
combined$time_diff_sec <- as.numeric(difftime(combined$timestamp, combined$timestamp[1], units = "secs"))
# Append to final output
output_df <- rbind(output_df, combined)
}
return(output_df)
}7.1 study 2 - BMI high
sucsessfulAssociations_study2_BMI_high <- dat_study2_BMI_high$sucsessfulAssociations[!sapply(dat_study2_BMI_high$sucsessfulAssociations, is.null)]
ass_study2_BMI_high <- process_association_data(
association_list = sucsessfulAssociations_study2_BMI_high,
metadata_df = ques_study2_BMI_high
)
dim(ass_study2_BMI_high)[1] 690 11
setwd("outputs/associations")
# Save RDS
saveRDS(ass_study2_BMI_high, file = "ass_study2_BMI_high.rds")
# Save CSV
write.csv(ass_study2_BMI_high, file = "ass_study2_BMI_high.csv", row.names = FALSE)
# Save XLSX using writexl
writexl::write_xlsx(ass_study2_BMI_high, path = "ass_study2_BMI_high.xlsx")7.2 study 2 - BMI mixed
sucsessfulAssociations_study2_BMI_mixed <- dat_study2_BMI_mixed$sucsessfulAssociations[!sapply(dat_study2_BMI_mixed$sucsessfulAssociations, is.null)]
ass_study2_BMI_mixed <- process_association_data(
association_list = sucsessfulAssociations_study2_BMI_mixed,
metadata_df = ques_study2_BMI_mixed
)assoc_L2 not 25 but 23 for: 673b8b36a23f091c707518ae
assoc_L2 not 25 but 24 for: 6664dfc807e75330a8b54a8b
assoc_L2 not 25 but 24 for: 67bcdccadec7e4c73404a0ad
assoc_L2 not 25 but 24 for: 64f3036a5b5f2feae0b1f51b
assoc_L2 not 25 but 19 for: 65feaaac53eb219f09ad5ea0
assoc_L2 not 25 but 24 for: 5bb0cc738f3bd70001e513e3
assoc_L2 not 25 but 24 for: 67edb2c2157a46a0dcb9c5e3
assoc_L2 not 25 but 24 for: 667c744d940ae66df47ab873
assoc_L2 not 25 but 24 for: 634688daf4776ef459edf673
assoc_L2 not 25 but 22 for: 678a406859e436e14f1b1deb
assoc_L2 not 25 but 24 for: 6165d25922529904e5d7267d
assoc_L2 not 25 but 24 for: 6776815763e82f23fe4bcd40
dim(ass_study2_BMI_mixed)[1] 2980 11
setwd("outputs/associations")
# Save RDS
saveRDS(ass_study2_BMI_mixed, file = "ass_study2_BMI_mixed.rds")
# Save CSV
write.csv(ass_study2_BMI_mixed, file = "ass_study2_BMI_mixed.csv", row.names = FALSE)
# Save XLSX using writexl
writexl::write_xlsx(ass_study2_BMI_mixed, path = "ass_study2_BMI_mixed.xlsx")7.3 combine associations
ass_combined <- rbind(ass_study2_BMI_high, ass_study2_BMI_mixed)
dim(ass_combined)[1] 3670 11
names(table(ass_combined$participant_id))[table(ass_combined$participant_id) != 30] [1] "5bb0cc738f3bd70001e513e3" "6165d25922529904e5d7267d"
[3] "634688daf4776ef459edf673" "64f3036a5b5f2feae0b1f51b"
[5] "65feaaac53eb219f09ad5ea0" "6664dfc807e75330a8b54a8b"
[7] "667c744d940ae66df47ab873" "673b8b36a23f091c707518ae"
[9] "6776815763e82f23fe4bcd40" "678a406859e436e14f1b1deb"
[11] "67bcdccadec7e4c73404a0ad" "67edb2c2157a46a0dcb9c5e3"
setwd("outputs/associations")
# Save RDS
saveRDS(ass_combined, file = "ass_combined.rds")
# Save CSV
write.csv(ass_combined, file = "ass_combined.csv", row.names = FALSE)
# Save XLSX using writexl
writexl::write_xlsx(ass_combined, path = "ass_combined.xlsx")