<- FALSE createRawFiles
Data preperation all studies
1 Notes
2 global variables
Define your global variables (e.g., to reduce run time):
3 functions
########################################
# from JATOS to table
########################################
### argss:
# dataset = dat_secondPostCAM
# listvars = vec_ques
# notNumeric = vec_notNumeric
# verbose = TRUE
<- function(dataset,
questionnairetype listvars = ques_mixed,
notNumeric = vec_notNumeric,
verbose=FALSE){
<- data.frame(ID = unique(dataset$ID))
datasetques
for(c in 1:length(listvars)){
if(verbose){
print(c)
}
if(any(colnames(dataset) == listvars[c])){
if(verbose){
print(listvars[c])
}
## tmp IDs
<- dataset$ID[!is.na(dataset[, listvars[c]])]
tmpid ## tmp value variable
<- dataset[, listvars[c]][!is.na(dataset[, listvars[c]])]
tmpvalue <- NA
datasetques[listvars[c]]
if(listvars[c] %in% notNumeric){
$ID %in% tmpid, listvars[c]] <- tmpvalue
datasetques[datasetqueselse if(is.list(tmpvalue)){
}<- unique(tmpvalue)
tmpvalue_tmp <- c()
tmpvalue = 1
counter for(i in 1:length(tmpvalue_tmp)){
if(!is.null(tmpvalue_tmp[[i]])){
<- paste0(tmpvalue_tmp[[i]], collapse = " - ")
tmpvalue[counter] = counter + 1
counter
}
}$ID %in% tmpid, listvars[c]] <- tmpvalue
datasetques[datasetqueselse{
}$ID %in% tmpid, listvars[c]] <- as.numeric(tmpvalue)
datasetques[datasetques
}
}
}return(datasetques)
}
4 load packages
### load packages
require(pacman)
p_load('tidyverse', 'jsonlite',
'stargazer', 'DT', 'psych',
'writexl')
# devtools::install_github("samuelae/associatoR")
5 create raw data files
if(createRawFiles){
# Define root paths
<- "../../data"
source_root <- "outputs/raw"
output_dir
# Define studies with paths
<- list(
studies study1 = file.path(source_root, "study_1"),
study2_BMI_high = file.path(source_root, "study_2", "BMI_high"),
study2_BMI_mixed = file.path(source_root, "study_2", "BMI_mixed")
)
# Loop over each study
for (study_name in names(studies)) {
<- studies[[study_name]]
study_path
# Get folders matching pattern "study_result*"
<- list.files(study_path, pattern = "^study_result.*", full.names = TRUE)
tmp_folders
<- list()
results
for (folder in tmp_folders) {
# Look for a single subdirectory inside the folder
<- list.files(folder, full.names = TRUE)
inner_folders
if (length(inner_folders) == 1) {
<- file.path(inner_folders[1], "data.txt")
data_file
if (file.exists(data_file)) {
# Load and store the JSON data
<- fromJSON(data_file)
tmp length(results) + 1]] <- tmp
results[[
}
}
}
# Save as RDS
saveRDS(results, file = file.path(output_dir, paste0(study_name, ".rds")))
# Optionally also save as JSON Lines (.jsonl)
<- map_chr(results, ~ toJSON(.x, auto_unbox = TRUE))
json_lines writeLines(json_lines, con = file.path(output_dir, paste0(study_name, ".jsonl")))
}else{
}print("Raw files have been created in previous run!")
}
[1] "Raw files have been created in previous run!"
6 prepare data questionnaires
6.1 study 1
load data:
<- "outputs/raw"
output_dir
# Load the RDS files into a named list
<- readRDS(file.path(output_dir, "study1.rds"))
study1
# Combine into single data.frame
suppressMessages({
<- bind_rows(study1)
dat_study1
})
rm(study1)
counter variable:
$ID <- cumsum(dat_study1$sender == "Greetings" & !is.na(dat_study1$sender))
dat_study1
table(dat_study1$ID)
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
5 2291 2 2291 5 2230 2291 2291 2291 2291 2291 2291 2291 2291 5 2291
17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32
2291 2291 2291 2291 2291 2291 2291 2230 5 2291 5 2291 2291 5 2291 2291
33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48
2291 2291 2291 2291 2230 2291 2291 2291 2291 2261 2 2291 2291 2291 2291 2291
49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64
2291 2291 2291 2291 2291 2291 2291 2291 2291 2291 2291 2291 2 2291 2291 2291
65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80
2291 2291 2291 2 5 2291 2291 2291 2291 5 2291 2 2291 2291 2291 2291
81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96
2291 2291 2 2291 2291 2291 2291 2291 2291 2291 2291 5 2291 2291 2291 2291
97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112
2291 2291 2291 2291 2291 2291 2291 2291 2291 2291 2291 2291 2291 2291 2291 2291
113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128
2291 5 2291 2291 2291 2291 2291 2291 2291 2291 2 2291 5 2291 5 2291
129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144
2291 2291 2 2291 2291 2291 2291 5 5 2291 2291 5 5 2291 2291 2291
145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160
2291 2291 2291 2291 2291 2 2291 2291 5 2 2291 2291 2291 2291 2291 2291
161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176
2 2291 2291 2291 2291 2291 2291 2291 2291 2291 2291 2291 2291 2291 2291 2291
177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192
5 2291 2 2291 2291 2291 2291 2291 2291 2291 2291 2291 2291 2291 2291 2291
193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208
2291 2291 2291 2 2291 2291 5 2291 2291 2291 2291 2291 2291 2 2291 2291
209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224
2291 5 2291 2291 2291 2291 2291 2291 2291 2291 2291 2291 2291 2291 2291 2291
225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240
2291 2291 2291 2291 5 5 2291 2291 2291 5 2291 2291 2291 2291 5 2291
241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256
2291 5 2291 2291 2291 2291 2291 2291 2291 2291 2291 2291 5 2291 2291 2291
257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272
2291 2291 2291 2291 5 2291 2291 2291 2291 2291 5 2291 5 2 2291 2291
273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288
2291 2291 2291 2291 2291 2291 2291 2291 2291 2291 2291 2291 2291 2291 2291 2291
289
5
filter:
sum(table(dat_study1$ID) != max(table(dat_study1$ID)))
[1] 49
sum(table(dat_study1$ID) == max(table(dat_study1$ID)))
[1] 240
length(unique(dat_study1$ID))
[1] 289
<-
dat_study1 $ID %in% names(table(dat_study1$ID))[table(dat_study1$ID) == max(table(dat_study1$ID))], ]
dat_study1[dat_study1length(unique(dat_study1$ID))
[1] 240
questionnaire:
some elements are lists:
colnames(dat_study1)[sapply(dat_study1, is.list)]
[1] "url" "meta" "key_assignment"
[4] "para_defocuscount"
add meta manually and remove it:
<- dat_study1$meta[, c("language", "screen_width", "screen_height", "userAgent")]
meta_df <- bind_cols(dat_study1, meta_df) dat_study1
New names:
• `...7` -> `...157`
rm(meta_df)
$meta <- NULL dat_study1
create questionnaire:
<- c("PROLIFIC_PID",
vec_notNumeric "sociodemo_residency", "sociodemo_gender", "sociodemo_sexualOrientation",
"sociodemo_weight_type", "sociodemo_height_type",
"feedback_critic",
"language", "screen_width", "screen_height", "userAgent",
str_subset(string = colnames(dat_study1), pattern = "^normalweight_R"),
str_subset(string = colnames(dat_study1), pattern = "^underweight_R"),
str_subset(string = colnames(dat_study1), pattern = "^obesity_R"),
str_subset(string = colnames(dat_study1), pattern = "^overweight_R"))
### get survey
<- c("PROLIFIC_PID",
vec_ques "dummy_informedconsent",
"commCheck", "attCheck", "feedback_conscientiousCompletion",
sort(str_subset(string = colnames(dat_study1), pattern = "^pair")),
sort(str_subset(string = colnames(dat_study1), pattern = "^PathogenDisgus")),
sort(str_subset(string = colnames(dat_study1), pattern = "^GermAversion")),
sort(str_subset(string = colnames(dat_study1), pattern = "^BeliefsAboutObesePersons")),
sort(str_subset(string = colnames(dat_study1), pattern = "^AttitudeTowardsObesePeople")),
sort(str_subset(string = colnames(dat_study1), pattern = "^PerceivedCausesofObesity")),
sort(str_subset(string = colnames(dat_study1), pattern = "^sociodemo")),
sort(str_subset(string = colnames(dat_study1), pattern = "^normalweight")),
sort(str_subset(string = colnames(dat_study1), pattern = "^underweigh")),
sort(str_subset(string = colnames(dat_study1), pattern = "^obesity")),
sort(str_subset(string = colnames(dat_study1), pattern = "^overweight")),
vec_notNumeric)
<- questionnairetype(
ques_study1 dataset = dat_study1,
listvars = vec_ques,
notNumeric = vec_notNumeric,
verbose = FALSE
)
dim(ques_study1)
[1] 240 130
setwd("outputs/questionnaire")
saveRDS(ques_study1, file = paste0("ques_study1", ".rds"))
6.2 study 2 - BMI high
load data:
<- "outputs/raw"
output_dir
# Load the RDS files into a named list
<- readRDS(file.path(output_dir, "study2_BMI_high.rds"))
study2_BMI_high
# Combine into single data.frame
suppressMessages({
<- bind_rows(study2_BMI_high)
dat_study2_BMI_high
})
rm(study2_BMI_high)
counter variable:
$ID <- cumsum(dat_study2_BMI_high$sender == "Greetings" & !is.na(dat_study2_BMI_high$sender))
dat_study2_BMI_high
table(dat_study2_BMI_high$ID)
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
2922 2922 2922 2922 2922 2922 2922 2922 2922 2922 2922 4 2922 2922 2922 2922
17 18 19 20 21 22 23 24 25 26 27
2922 4 4 2922 2922 2922 2922 2922 2922 2922 2920
filter:
sum(table(dat_study2_BMI_high$ID) != max(table(dat_study2_BMI_high$ID)))
[1] 4
sum(table(dat_study2_BMI_high$ID) == max(table(dat_study2_BMI_high$ID)))
[1] 23
length(unique(dat_study2_BMI_high$ID))
[1] 27
<-
dat_study2_BMI_high $ID %in% names(table(dat_study2_BMI_high$ID))[table(dat_study2_BMI_high$ID) == max(table(dat_study2_BMI_high$ID))], ]
dat_study2_BMI_high[dat_study2_BMI_highlength(unique(dat_study2_BMI_high$ID))
[1] 23
questionnaire:
some elements are lists:
colnames(dat_study2_BMI_high)[sapply(dat_study2_BMI_high, is.list)]
[1] "url" "meta"
[3] "para_defocuscount" "key_assignment"
[5] "unsucsessfulAssociations" "sucsessfulAssociations"
add meta manually and remove it:
<- dat_study2_BMI_high$meta[, c("language", "screen_width", "screen_height", "userAgent")]
meta_df <- bind_cols(dat_study2_BMI_high, meta_df) dat_study2_BMI_high
New names:
• `...6` -> `...131`
rm(meta_df)
$meta <- NULL dat_study2_BMI_high
create questionnaire:
<- c("PROLIFIC_PID", "study_condition",
vec_notNumeric "sociodemo_residency", "sociodemo_gender",
"sociodemo_weight_type", "sociodemo_height_type",
"feedback_critic",
"language", "screen_width", "screen_height", "userAgent")
### get survey
<- c("PROLIFIC_PID", "study_condition",
vec_ques "dummy_informedconsent",
"commCheck", "attCheck", "feedback_conscientiousCompletion",
sort(str_subset(string = colnames(dat_study2_BMI_high), pattern = "^pair")),
sort(str_subset(string = colnames(dat_study2_BMI_high), pattern = "^PathogenDisgus")),
sort(str_subset(string = colnames(dat_study2_BMI_high), pattern = "^GermAversion")),
sort(str_subset(string = colnames(dat_study2_BMI_high), pattern = "^BeliefsAboutObesePersons")),
sort(str_subset(string = colnames(dat_study2_BMI_high), pattern = "^AttitudeTowardsObesePeople")),
sort(str_subset(string = colnames(dat_study2_BMI_high), pattern = "^PerceivedCausesofObesity")),
sort(str_subset(string = colnames(dat_study2_BMI_high), pattern = "^sociodemo")),
"weight_avatar_choice1", "weight_avatar_choice2",
vec_notNumeric)
<- questionnairetype(
ques_study2_BMI_high dataset = dat_study2_BMI_high,
listvars = vec_ques,
notNumeric = vec_notNumeric,
verbose = FALSE
)
dim(ques_study2_BMI_high)
[1] 23 95
setwd("outputs/questionnaire")
saveRDS(ques_study2_BMI_high, file = paste0("ques_study2_BMI_high", ".rds"))
6.3 study 2 - BMI mixed
load data:
<- "outputs/raw"
output_dir
# Load the RDS files into a named list
<- readRDS(file.path(output_dir, "study2_BMI_mixed.rds"))
study2_BMI_mixed
# Combine into single data.frame
suppressMessages({
<- bind_rows(study2_BMI_mixed)
dat_study2_BMI_mixed
})
rm(study2_BMI_mixed)
counter variable:
$ID <- cumsum(dat_study2_BMI_mixed$sender == "Greetings" & !is.na(dat_study2_BMI_mixed$sender))
dat_study2_BMI_mixed
table(dat_study2_BMI_mixed$ID)
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
2920 2920 2920 2920 2920 2920 2 2920 2920 2 2 2920 2920 2920 2920 2920
17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32
2920 2920 2920 2920 2920 2920 2920 2920 2920 2920 2920 2920 2920 2 2920 2920
33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48
2920 2 2920 2920 2920 2920 2 2920 2 2920 2920 2920 2 2920 2 2
49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64
2920 2920 2920 2920 2920 2920 2 2 2920 2920 2920 2920 2920 2920 2920 2920
65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80
2920 2920 2920 2920 2920 2 2920 2920 2920 2920 2 2920 2920 2 2920 2920
81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96
2920 2920 2920 2920 2920 2920 2 2 2920 2 2920 2920 2920 2920 2920 2
97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112
2920 2920 2920 2 2 2 2920 2 2920 2920 2920 2 2920 2920 2 2920
113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128
2 2913 2 2 2920 2 2920 2920 2920 2920 2920 2920 2920 2 2920 2920
129 130 131 132 133 134
2 2 2 2920 2920 2920
filter:
sum(table(dat_study2_BMI_mixed$ID) != max(table(dat_study2_BMI_mixed$ID)))
[1] 34
sum(table(dat_study2_BMI_mixed$ID) == max(table(dat_study2_BMI_mixed$ID)))
[1] 100
length(unique(dat_study2_BMI_mixed$ID))
[1] 134
<-
dat_study2_BMI_mixed $ID %in% names(table(dat_study2_BMI_mixed$ID))[table(dat_study2_BMI_mixed$ID) == max(table(dat_study2_BMI_mixed$ID))], ]
dat_study2_BMI_mixed[dat_study2_BMI_mixedlength(unique(dat_study2_BMI_mixed$ID))
[1] 100
questionnaire:
some elements are lists:
colnames(dat_study2_BMI_mixed)[sapply(dat_study2_BMI_mixed, is.list)]
[1] "url" "meta"
[3] "key_assignment" "para_defocuscount"
[5] "unsucsessfulAssociations" "sucsessfulAssociations"
add meta manually and remove it:
<- dat_study2_BMI_mixed$meta[, c("language", "screen_width", "screen_height", "userAgent")]
meta_df <- bind_cols(dat_study2_BMI_mixed, meta_df) dat_study2_BMI_mixed
New names:
• `...7` -> `...130`
rm(meta_df)
$meta <- NULL dat_study2_BMI_mixed
create questionnaire:
<- c("PROLIFIC_PID", "study_condition",
vec_notNumeric "sociodemo_residency", "sociodemo_gender",
"sociodemo_weight_type", "sociodemo_height_type",
"feedback_critic",
"language", "screen_width", "screen_height", "userAgent")
### get survey
<- c("PROLIFIC_PID", "study_condition",
vec_ques "dummy_informedconsent",
"commCheck", "attCheck", "feedback_conscientiousCompletion",
sort(str_subset(string = colnames(dat_study2_BMI_mixed), pattern = "^pair")),
sort(str_subset(string = colnames(dat_study2_BMI_mixed), pattern = "^PathogenDisgus")),
sort(str_subset(string = colnames(dat_study2_BMI_mixed), pattern = "^GermAversion")),
sort(str_subset(string = colnames(dat_study2_BMI_mixed), pattern = "^BeliefsAboutObesePersons")),
sort(str_subset(string = colnames(dat_study2_BMI_mixed), pattern = "^AttitudeTowardsObesePeople")),
sort(str_subset(string = colnames(dat_study2_BMI_mixed), pattern = "^PerceivedCausesofObesity")),
sort(str_subset(string = colnames(dat_study2_BMI_mixed), pattern = "^sociodemo")),
"weight_avatar_choice1", "weight_avatar_choice2",
vec_notNumeric)
<- questionnairetype(
ques_study2_BMI_mixed dataset = dat_study2_BMI_mixed,
listvars = vec_ques,
notNumeric = vec_notNumeric,
verbose = FALSE
)
dim(ques_study2_BMI_mixed)
[1] 100 94
add BMI:
functions:
# Define the conversion functions (same as your second block)
<- function(weight, weight_type) {
convert_weight_to_kg if (weight_type == "pounds") {
return(weight * 0.45359237)
else if (weight_type == "kilograms") {
} return(weight)
else {
} return(NA_real_)
}
}
<- function(height_cm, height_ft, height_ftin, height_type) {
convert_height_to_cm if (height_type == "centimeters") {
return(height_cm)
else if (height_type == "feet-inches") {
} <- height_ft
feet_in_inches <- height_ftin
inches return(feet_in_inches * 30.48 + inches * 2.54)
else {
} return(NA_real_)
}
}
# Apply the computation using mutate + mapply
<- ques_study2_BMI_mixed %>%
ques_study2_BMI_mixed mutate(
weight_kg = mapply(convert_weight_to_kg,
weight = sociodemo_weight,
weight_type = sociodemo_weight_type),
height_cm_val = mapply(convert_height_to_cm,
height_cm = sociodemo_height_cm,
height_ft = sociodemo_height_ft,
height_ftin = sociodemo_height_ftin,
height_type = sociodemo_height_type),
sociodemo_BMI = ifelse(
!is.na(weight_kg) & !is.na(height_cm_val),
/ ((height_cm_val / 100)^2),
weight_kg NA_real_
) )
add BMI - ques_study2_BMI_mixed:
# Apply the computation using mutate + mapply
<- ques_study2_BMI_mixed %>%
ques_study2_BMI_mixed mutate(
weight_kg = mapply(convert_weight_to_kg,
weight = sociodemo_weight,
weight_type = sociodemo_weight_type),
height_cm_val = mapply(convert_height_to_cm,
height_cm = sociodemo_height_cm,
height_ft = sociodemo_height_ft,
height_ftin = sociodemo_height_ftin,
height_type = sociodemo_height_type),
sociodemo_BMI = ifelse(
!is.na(weight_kg) & !is.na(height_cm_val),
/ ((height_cm_val / 100)^2),
weight_kg NA_real_
)
)
$sociodemo_BMI[ques_study2_BMI_mixed$weight_kg < 40 | ques_study2_BMI_mixed$height_cm_val < 140 | ques_study2_BMI_mixed$height_cm_val > 230] <- NA
ques_study2_BMI_mixed# ques_study2_BMI_mixed$weight_kg[ques_study2_BMI_mixed$sociodemo_BMI < 10]
# ques_study2_BMI_mixed$height_cm_val[ques_study2_BMI_mixed$sociodemo_BMI < 10]
add BMI - ques_study1:
$sociodemo_height_ft <- NA
ques_study1$sociodemo_height_ftin <- NA
ques_study1
<- ques_study1 %>%
ques_study1 mutate(
weight_kg = mapply(convert_weight_to_kg,
weight = sociodemo_weight,
weight_type = sociodemo_weight_type),
height_cm_val = mapply(convert_height_to_cm,
height_cm = sociodemo_height,
height_ft = sociodemo_height_ft,
height_ftin = sociodemo_height_ftin,
height_type = sociodemo_height_type),
sociodemo_BMI = ifelse(
!is.na(weight_kg) & !is.na(height_cm_val),
/ ((height_cm_val / 100)^2),
weight_kg NA_real_
)
)
$sociodemo_BMI[ques_study1$weight_kg < 40 | ques_study1$height_cm_val < 140 | ques_study1$height_cm_val > 230] <- NA
ques_study1# ques_study1$weight_kg[ques_study1$sociodemo_BMI < 10]
# ques_study1$height_cm_val[ques_study1$sociodemo_BMI < 10]
summary(ques_study2_BMI_high$sociodemo_BMI)
Min. 1st Qu. Median Mean 3rd Qu. Max.
30.04 32.04 37.30 36.88 38.90 61.50
summary(ques_study2_BMI_mixed$sociodemo_BMI)
Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
16.04 22.34 24.45 26.56 28.14 53.37 4
summary(ques_study1$sociodemo_BMI)
Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
14.04 22.22 24.57 25.93 28.58 48.21 91
setwd("outputs/questionnaire")
saveRDS(ques_study2_BMI_mixed, file = paste0("ques_study2_BMI_mixed", ".rds"))
6.4 combine questionnaires
<- c(sort(str_subset(string = colnames(dat_study2_BMI_mixed), pattern = "^pair")),
tmp_surveysVars sort(str_subset(string = colnames(dat_study2_BMI_mixed), pattern = "^PathogenDisgus")),
sort(str_subset(string = colnames(dat_study2_BMI_mixed), pattern = "^GermAversion")),
sort(str_subset(string = colnames(dat_study2_BMI_mixed), pattern = "^BeliefsAboutObesePersons")),
sort(str_subset(string = colnames(dat_study2_BMI_mixed), pattern = "^AttitudeTowardsObesePeople")),
sort(str_subset(string = colnames(dat_study2_BMI_mixed), pattern = "^PerceivedCausesofObesity")))
$weight_avatar_choice2 <- NA
ques_study1
$source <- "study1"
ques_study1$source <- "study2_BMI_high"
ques_study2_BMI_high$source <- "study2_BMI_mixed"
ques_study2_BMI_mixed
<- rbind(ques_study1[, c("PROLIFIC_PID", "source", "sociodemo_age", "sociodemo_gender", "sociodemo_BMI", "weight_avatar_choice2", tmp_surveysVars)],
ques_combined c("PROLIFIC_PID", "source", "sociodemo_age", "sociodemo_gender", "sociodemo_BMI","weight_avatar_choice2", tmp_surveysVars)],
ques_study2_BMI_high[, c("PROLIFIC_PID", "source", "sociodemo_age", "sociodemo_gender", "sociodemo_BMI", "weight_avatar_choice2", tmp_surveysVars)])
ques_study2_BMI_mixed[,
dim(ques_combined)
[1] 363 77
names(table(ques_combined$PROLIFIC_PID))[table(ques_combined$PROLIFIC_PID) >= 2]
[1] "601129f77e0c21000b0c408a"
6.4.1 reverse code items
Fat Phobia Scale:
::cor.plot(r = cor(ques_combined[,str_subset(string = colnames(ques_combined), pattern = "^pair")])) psych
# --- 1) Item metadata (from your list) ---
<- tibble::tribble(
items ~scale, ~left, ~right,
"pair01", "lazy", "industrious",
"pair02", "no will power", "has will power",
"pair03", "attractive", "unattractive",
"pair04", "good self-control", "poor self-control",
"pair05", "fast", "slow",
"pair06", "having endurance", "having no endurance",
"pair07", "active", "inactive",
"pair08", "weak", "strong",
"pair09", "self-indulgent", "self-sacrificing",
"pair10", "dislikes food", "likes food",
"pair11", "shapeless", "shapely",
"pair12", "undereats", "overeats",
"pair13", "insecure", "secure",
"pair14", "low self-esteem", "high self-esteem"
)
# --- 2) Define which *right* words are "positive" ---
# Edit this list as needed. Only items whose RIGHT word is in this list will be reversed.
<- c(
positive_right_terms "industrious",
"has will power",
"strong",
"self-sacrificing",
"shapely",
"secure",
"high self-esteem"
# NOTE: "likes food" and "overeats" are context-dependent; add/remove if needed.
)
# Items to reverse: those whose RIGHT term is positive
<- items$scale[items$right %in% positive_right_terms]
reverse_items
# --- 3) Reverse-code those columns in your data frame df ---
<- 1
min_val <- 5
max_val
# Safety check: only reverse variables that actually exist in df
<- intersect(reverse_items, names(ques_combined))
reverse_items_in_df
<- lapply(ques_combined[reverse_items_in_df], function(x) {
ques_combined[reverse_items_in_df] + min_val) - x
(max_val
})
# --- 4) (Optional) Inspect what was reversed ---
list(
reversed_by_right_positive = reverse_items_in_df,
not_found_in_df = setdiff(reverse_items, reverse_items_in_df)
)
$reversed_by_right_positive
[1] "pair01" "pair02" "pair08" "pair09" "pair11" "pair13" "pair14"
$not_found_in_df
character(0)
::cor.plot(r = cor(ques_combined[,str_subset(string = colnames(ques_combined), pattern = "^pair")])) psych
Subscale Germ Aversion:
::cor.plot(r = cor(ques_combined[,str_subset(string = colnames(ques_combined), pattern = "^GermAversion")])) psych
# with names like GermAversion1, GermAversion5r, GermAversion7r, etc.
<- 1
min_val <- 7
max_val
# 1. Identify all GermAversion columns
<- str_subset(colnames(ques_combined), pattern = "^GermAversion")
germ_cols
# 2. Find which of these end with "r"
<- str_subset(germ_cols, pattern = "r$")
germ_cols_rev
# 3. Reverse code the ones ending in "r"
<- lapply(ques_combined[germ_cols_rev], function(x) {
ques_combined[germ_cols_rev] + min_val) - x
(max_val })
::cor.plot(r = cor(ques_combined[,str_subset(string = colnames(ques_combined), pattern = "^GermAversion")])) psych
Beliefs About Obese Persons Scale:
::cor.plot(r = cor(ques_combined[,str_subset(string = colnames(ques_combined), pattern = "^BeliefsAboutObesePersons")])) psych
<- 1
min_val <- 6
max_val
# 1. Identify all BeliefsAboutObesePersons columns
<- str_subset(colnames(ques_combined), pattern = "^BeliefsAboutObesePersons")
beliefsAboutObe_cols
# 2. Find which of these end with "r"
<- str_subset(beliefsAboutObe_cols, pattern = "r$")
beliefsAboutObe_cols_rev
# 3. Reverse code the ones ending in "r"
<- lapply(ques_combined[beliefsAboutObe_cols_rev], function(x) {
ques_combined[beliefsAboutObe_cols_rev] + min_val) - x
(max_val })
::cor.plot(r = cor(ques_combined[,str_subset(string = colnames(ques_combined), pattern = "^BeliefsAboutObesePersons")])) psych
Attitude Towards Obese People Scale:
::cor.plot(r = cor(ques_combined[,str_subset(string = colnames(ques_combined), pattern = "^AttitudeTowardsObesePeople")])) psych
<- 1
min_val <- 6
max_val
# 1. Identify all AttitudeTowardsObesePeople columns
<- str_subset(colnames(ques_combined), pattern = "^AttitudeTowardsObesePeople")
attitudeTowObe_cols
# 2. Find which of these end with "r"
<- str_subset(attitudeTowObe_cols, pattern = "r$")
attitudeTowObe_cols_rev
# 3. Reverse code the ones ending in "r"
<- lapply(ques_combined[attitudeTowObe_cols_rev], function(x) {
ques_combined[attitudeTowObe_cols_rev] + min_val) - x
(max_val })
::cor.plot(r = cor(ques_combined[,str_subset(string = colnames(ques_combined), pattern = "^AttitudeTowardsObesePeople")])) psych
6.4.2 save questionnaire
setwd("outputs/questionnaire")
# Save RDS
saveRDS(ques_combined, file = "ques_combined.rds")
# Save CSV
write.csv(ques_combined, file = "ques_combined.csv", row.names = FALSE)
# Save XLSX using writexl
::write_xlsx(ques_combined, path = "ques_combined.xlsx") writexl
7 prepare data associations
colnames(dat_study2_BMI_high)[sapply(dat_study2_BMI_high, is.list)]
[1] "url" "para_defocuscount"
[3] "key_assignment" "unsucsessfulAssociations"
[5] "sucsessfulAssociations"
colnames(dat_study2_BMI_mixed)[sapply(dat_study2_BMI_mixed, is.list)]
[1] "url" "key_assignment"
[3] "para_defocuscount" "unsucsessfulAssociations"
[5] "sucsessfulAssociations"
Function:
# association_list = sucsessfulAssociations_study2_BMI_mixed
# metadata_df = ques_study2_BMI_mixed
<- function(association_list, metadata_df) {
process_association_data # Initialize output data frame
<- data.frame(
output_df participant_id = character(),
gender = character(),
age = integer(),
BMI = numeric(),
cue = character(),
response = character(),
response_position = integer(),
response_level = integer(),
timestamp = character(),
time_diff_sec = numeric(),
stringsAsFactors = FALSE
)
# Loop through all participants
for (i in seq_along(association_list)) {
# Get associations
<- association_list[[i]]
assoc <- assoc[1:5, ]
assoc_L1 <- assoc[6:nrow(assoc), ]
assoc_L2
# Get participant metadata
<- metadata_df[i, ]
meta <- meta$PROLIFIC_PID
pid <- meta$sociodemo_gender
gender <- meta$sociodemo_age
age <- meta$sociodemo_BMI
BMI <- meta$weight_avatar_choice2
avatar
if(nrow(assoc_L1) != 5){
cat("assoc_L1 not 5 for:", pid, "\n")
}if(nrow(assoc_L2) != 25){
cat("assoc_L2 not 25 but", nrow(assoc_L2), "for:", pid, "\n")
}
# --- Level 1: responses ---
<- data.frame(
df_level1 participant_id = pid,
gender = gender,
age = age,
BMI = BMI,
avatar = avatar,
cue = assoc_L1$cue,
response = assoc_L1$response,
response_position = seq_along(assoc_L1$cue),
response_level = 1,
timestamp = assoc_L1$timestamp,
stringsAsFactors = FALSE
)
# --- Level 2: responses ---
<- data.frame(
df_level2 participant_id = pid,
gender = gender,
age = age,
BMI = BMI,
avatar = avatar,
cue = assoc_L2$cue,
response = assoc_L2$response,
response_position = as.numeric(ave(assoc_L2$cue, assoc_L2$cue, FUN = seq_along)), #rep(1:5, times = 5)
response_level = 2,
timestamp = assoc_L2$timestamp,
stringsAsFactors = FALSE
)
# Combine both levels
<- rbind(df_level1, df_level2)
combined
# Convert timestamp and calculate time difference from first response
$timestamp <- as.POSIXct(combined$timestamp, format = "%Y-%m-%dT%H:%M:%OSZ", tz = "UTC")
combined$time_diff_sec <- as.numeric(difftime(combined$timestamp, combined$timestamp[1], units = "secs"))
combined
# Append to final output
<- rbind(output_df, combined)
output_df
}
return(output_df)
}
7.1 study 2 - BMI high
<- dat_study2_BMI_high$sucsessfulAssociations[!sapply(dat_study2_BMI_high$sucsessfulAssociations, is.null)]
sucsessfulAssociations_study2_BMI_high
<- process_association_data(
ass_study2_BMI_high association_list = sucsessfulAssociations_study2_BMI_high,
metadata_df = ques_study2_BMI_high
)
dim(ass_study2_BMI_high)
[1] 690 11
setwd("outputs/associations")
# Save RDS
saveRDS(ass_study2_BMI_high, file = "ass_study2_BMI_high.rds")
# Save CSV
write.csv(ass_study2_BMI_high, file = "ass_study2_BMI_high.csv", row.names = FALSE)
# Save XLSX using writexl
::write_xlsx(ass_study2_BMI_high, path = "ass_study2_BMI_high.xlsx") writexl
7.2 study 2 - BMI mixed
<- dat_study2_BMI_mixed$sucsessfulAssociations[!sapply(dat_study2_BMI_mixed$sucsessfulAssociations, is.null)]
sucsessfulAssociations_study2_BMI_mixed
<- process_association_data(
ass_study2_BMI_mixed association_list = sucsessfulAssociations_study2_BMI_mixed,
metadata_df = ques_study2_BMI_mixed
)
assoc_L2 not 25 but 23 for: 673b8b36a23f091c707518ae
assoc_L2 not 25 but 24 for: 6664dfc807e75330a8b54a8b
assoc_L2 not 25 but 24 for: 67bcdccadec7e4c73404a0ad
assoc_L2 not 25 but 24 for: 64f3036a5b5f2feae0b1f51b
assoc_L2 not 25 but 19 for: 65feaaac53eb219f09ad5ea0
assoc_L2 not 25 but 24 for: 5bb0cc738f3bd70001e513e3
assoc_L2 not 25 but 24 for: 67edb2c2157a46a0dcb9c5e3
assoc_L2 not 25 but 24 for: 667c744d940ae66df47ab873
assoc_L2 not 25 but 24 for: 634688daf4776ef459edf673
assoc_L2 not 25 but 22 for: 678a406859e436e14f1b1deb
assoc_L2 not 25 but 24 for: 6165d25922529904e5d7267d
assoc_L2 not 25 but 24 for: 6776815763e82f23fe4bcd40
dim(ass_study2_BMI_mixed)
[1] 2980 11
setwd("outputs/associations")
# Save RDS
saveRDS(ass_study2_BMI_mixed, file = "ass_study2_BMI_mixed.rds")
# Save CSV
write.csv(ass_study2_BMI_mixed, file = "ass_study2_BMI_mixed.csv", row.names = FALSE)
# Save XLSX using writexl
::write_xlsx(ass_study2_BMI_mixed, path = "ass_study2_BMI_mixed.xlsx") writexl
7.3 combine associations
<- rbind(ass_study2_BMI_high, ass_study2_BMI_mixed)
ass_combined
dim(ass_combined)
[1] 3670 11
names(table(ass_combined$participant_id))[table(ass_combined$participant_id) != 30]
[1] "5bb0cc738f3bd70001e513e3" "6165d25922529904e5d7267d"
[3] "634688daf4776ef459edf673" "64f3036a5b5f2feae0b1f51b"
[5] "65feaaac53eb219f09ad5ea0" "6664dfc807e75330a8b54a8b"
[7] "667c744d940ae66df47ab873" "673b8b36a23f091c707518ae"
[9] "6776815763e82f23fe4bcd40" "678a406859e436e14f1b1deb"
[11] "67bcdccadec7e4c73404a0ad" "67edb2c2157a46a0dcb9c5e3"
setwd("outputs/associations")
# Save RDS
saveRDS(ass_combined, file = "ass_combined.rds")
# Save CSV
write.csv(ass_combined, file = "ass_combined.csv", row.names = FALSE)
# Save XLSX using writexl
::write_xlsx(ass_combined, path = "ass_combined.xlsx") writexl