# sets the directory of location of this script as the current directory
# setwd(dirname(rstudioapi::getSourceEditorContext()$path))
### load packages
require(pacman)
p_load('tidyverse', 'jsonlite',
'stargazer', 'DT', 'psych',
'writexl')
### load socio-demographic data
setwd("data demographic")
# prolific <- read.csv(file = "prolific_export_65d4744b1373145896174eba.csv", header = TRUE)
### load JATOS data
setwd("../data")
suppressMessages(
read_file('jatos_results_data_20250405122610.txt') %>%
# ... split it into lines ...
str_split('\n') %>% first() %>%
# ... filter empty rows ...
discard(function(x)
== '') %>%
x discard(function(x)
== '\r') %>%
x # ... parse JSON into a data.frame
map_dfr(fromJSON, flatten = TRUE)
-> dat
)
# Read and parse each JSON line into a list
# json_data <- suppressMessages(
# read_file("jatos_results_data_20250405122610.txt") %>%
# str_split("\n") %>%
# first() %>%
# discard(~ .x == "" || .x == "\r") %>%
# map(~ fromJSON(.x, simplifyVector = FALSE)) # Keep full nested structure
# )
#> add ID counter
$ID <- NA
dat
<- 0
tmp_IDcounter for (i in 1:nrow(dat)) {
if (!is.na(dat$sender[i]) &&
$sender[i] == "Greetings") {
dat= tmp_IDcounter + 1
tmp_IDcounter
}$ID[i] <- tmp_IDcounter
dat
}rm(tmp_IDcounter)
### load functions
setwd("../functions")
for(i in 1:length(dir())){
# print(dir()[i])
source(dir()[i], encoding = "utf-8")
}
rm(i)
### summary function
<- function(data, varname, groupnames){
data_summary require(plyr)
<- function(x, col){
summary_func c(mean = mean(x[[col]], na.rm=TRUE),
se = sd(x[[col]], na.rm=TRUE) / sqrt(length(x[[col]])))
}<-ddply(data, groupnames, .fun=summary_func,
data_sum
varname)<- plyr::rename(data_sum, c("mean" = varname))
data_sum return(data_sum)
}
Data preperation pilot study
1 Notes
2 global variables
Define your global variables (can take some time to run):
3 create raw data files
4 set up data.frame questionnaires
### keep only complete data sets
sort(table(dat$ID))
2 6 13 15 25 26 35 44 57 34 1 3 4 5 7 8 9 10 11 12 14 16 17 18 19 20
2 2 2 2 2 2 2 2 2 13 17 17 17 17 17 17 17 17 17 17 17 17 17 17 17 17
21 22 23 24 27 28 29 30 31 32 33 36 37 38 39 40 41 42 43 45 46 47 48 49 50 51
17 17 17 17 17 17 17 17 17 17 17 17 17 17 17 17 17 17 17 17 17 17 17 17 17 17
52 53 54 55 56 58 59
17 17 17 17 17 17 17
sum(table(dat$ID) != max(table(dat$ID)))
[1] 10
sum(table(dat$ID) == max(table(dat$ID)))
[1] 49
<-
dat $ID %in% names(table(dat$ID))[table(dat$ID) == max(table(dat$ID))], ]
dat[dat
### json (from JATOS) to 2D data.frame
# > pre study
# add paradata
<-
tmp_notNumeric str_subset(string = colnames(dat), pattern = "^meta|^sustainable|^bioinspired|^rationalCon|^feedback")
<-
tmp_notNumeric str_subset(string = tmp_notNumeric,
pattern = "labjs|location",
negate = TRUE)
### get survey
<- c("PROLIFIC_PID",
vec_ques "dummy_informedconsent",
tmp_notNumeric)
= c("PROLIFIC_PID", tmp_notNumeric)
vec_notNumeric
<- questionnairetype(
questionnaire dataset = dat,
listvars = vec_ques,
notNumeric = vec_notNumeric,
verbose = FALSE
)
dim(questionnaire)
[1] 49 39
### get word list of associations
library(tidyverse)
# First, select only relevant columns
<- questionnaire %>%
wordlistAssociations select(PROLIFIC_PID, starts_with("bioinspired_R"), starts_with("sustainable_R")) %>%
pivot_longer(
cols = starts_with(c("bioinspired_R", "sustainable_R")),
names_to = c("typeWord", "orderAssociation"),
names_pattern = "(bioinspired|sustainable)_R(\\d)",
values_to = "association"
%>%
) mutate(orderAssociation = as.integer(orderAssociation)) %>%
arrange(PROLIFIC_PID, typeWord, orderAssociation)
### get edge list of seen connections
<- list()
edge_list
for(i in unique(dat$ID)){
<- dat$drawnConnections[dat$ID == i]
tmp_connections <- tmp_connections[!sapply(tmp_connections, is.null)]
tmp_connections <- questionnaire[questionnaire$ID == i, str_subset(colnames(questionnaire), "rationalCon")]
tmp_rational
# Only proceed if there is at least one set of connections
if (length(tmp_connections) > 0 && !is.null(unlist(tmp_connections))) {
# Assuming only one non-null element per ID (if not, loop or bind all)
<- tmp_connections[[1]]
con_df
# Flatten tmp_rational into a character vector
<- as.character(tmp_rational[1, ])
rat_vec
# Pad rational vector to match the number of rows in con_df
if (length(rat_vec) < nrow(con_df)) {
<- c(rat_vec, rep(NA, nrow(con_df) - length(rat_vec)))
rat_vec else if (length(rat_vec) > nrow(con_df)) {
} <- rat_vec[1:nrow(con_df)]
rat_vec
}
$rational <- rat_vec
con_df
# Add to result list
as.character(i)]] <- con_df
edge_list[[
}
}
# Combine all into a single data frame
<- bind_rows(edge_list, .id = "ID") edgelistAssociations
### save files
setwd("outputs")
#> questionnaire
## save as .xlsx file
::write_xlsx(x = questionnaire, path = "questionnaire.xlsx")
writexl## save as R object
saveRDS(questionnaire, file = "questionnaire.rds")
#> wordlistAssociations
## save as .xlsx file
::write_xlsx(x = wordlistAssociations, path = "wordlistAssociations.xlsx")
writexl## save as R object
saveRDS(wordlistAssociations, file = "wordlistAssociations.rds")
#> edgelistAssociations
## save as .xlsx file
::write_xlsx(x = edgelistAssociations, path = "edgelistAssociations.xlsx")
writexl## save as R object
saveRDS(edgelistAssociations, file = "edgelistAssociations.rds")
5 show data
::datatable(questionnaire, options = list(pageLength = 5)) DT
summary(as.numeric(table(edgelistAssociations$ID)))
Min. 1st Qu. Median Mean 3rd Qu. Max.
1.000 2.000 4.000 4.022 5.000 11.000
table(edgelistAssociations$ID)
1 10 11 12 14 16 17 18 19 20 21 22 23 24 28 29 3 30 31 32 33 36 37 38 39 4
2 4 5 4 5 5 2 3 8 11 5 4 2 8 6 2 9 5 2 4 3 1 3 1 1 6
40 41 43 45 46 48 49 5 50 51 52 53 54 56 58 59 7 8 9
4 3 6 2 7 2 4 6 1 3 2 5 4 4 2 4 5 3 3
::datatable(wordlistAssociations, options = list(pageLength = 5)) DT
::datatable(edgelistAssociations, options = list(pageLength = 5)) DT