# sets the directory of location of this script as the current directory
# setwd(dirname(rstudioapi::getSourceEditorContext()$path))
### load packages
require(pacman)
p_load('tidyverse', 'jsonlite',
'stargazer', 'DT', 'psych',
'writexl')
### load socio-demographic data
setwd("data demographic")
# prolific <- read.csv(file = "prolific_export_65d4744b1373145896174eba.csv", header = TRUE)
### load JATOS data
setwd("../data")
suppressMessages(
read_file('jatos_results_data_20250405122610.txt') %>%
# ... split it into lines ...
str_split('\n') %>% first() %>%
# ... filter empty rows ...
discard(function(x)
x == '') %>%
discard(function(x)
x == '\r') %>%
# ... parse JSON into a data.frame
map_dfr(fromJSON, flatten = TRUE)
) -> dat
# Read and parse each JSON line into a list
# json_data <- suppressMessages(
# read_file("jatos_results_data_20250405122610.txt") %>%
# str_split("\n") %>%
# first() %>%
# discard(~ .x == "" || .x == "\r") %>%
# map(~ fromJSON(.x, simplifyVector = FALSE)) # Keep full nested structure
# )
#> add ID counter
dat$ID <- NA
tmp_IDcounter <- 0
for (i in 1:nrow(dat)) {
if (!is.na(dat$sender[i]) &&
dat$sender[i] == "Greetings") {
tmp_IDcounter = tmp_IDcounter + 1
}
dat$ID[i] <- tmp_IDcounter
}
rm(tmp_IDcounter)
### load functions
setwd("../functions")
for(i in 1:length(dir())){
# print(dir()[i])
source(dir()[i], encoding = "utf-8")
}
rm(i)
### summary function
data_summary <- function(data, varname, groupnames){
require(plyr)
summary_func <- function(x, col){
c(mean = mean(x[[col]], na.rm=TRUE),
se = sd(x[[col]], na.rm=TRUE) / sqrt(length(x[[col]])))
}
data_sum<-ddply(data, groupnames, .fun=summary_func,
varname)
data_sum <- plyr::rename(data_sum, c("mean" = varname))
return(data_sum)
}Data preperation pilot study
1 Notes
2 global variables
Define your global variables (can take some time to run):
3 create raw data files
4 set up data.frame questionnaires
### keep only complete data sets
sort(table(dat$ID))
2 6 13 15 25 26 35 44 57 34 1 3 4 5 7 8 9 10 11 12 14 16 17 18 19 20
2 2 2 2 2 2 2 2 2 13 17 17 17 17 17 17 17 17 17 17 17 17 17 17 17 17
21 22 23 24 27 28 29 30 31 32 33 36 37 38 39 40 41 42 43 45 46 47 48 49 50 51
17 17 17 17 17 17 17 17 17 17 17 17 17 17 17 17 17 17 17 17 17 17 17 17 17 17
52 53 54 55 56 58 59
17 17 17 17 17 17 17
sum(table(dat$ID) != max(table(dat$ID)))[1] 10
sum(table(dat$ID) == max(table(dat$ID)))[1] 49
dat <-
dat[dat$ID %in% names(table(dat$ID))[table(dat$ID) == max(table(dat$ID))], ]
### json (from JATOS) to 2D data.frame
# > pre study
# add paradata
tmp_notNumeric <-
str_subset(string = colnames(dat), pattern = "^meta|^sustainable|^bioinspired|^rationalCon|^feedback")
tmp_notNumeric <-
str_subset(string = tmp_notNumeric,
pattern = "labjs|location",
negate = TRUE)
### get survey
vec_ques <- c("PROLIFIC_PID",
"dummy_informedconsent",
tmp_notNumeric)
vec_notNumeric = c("PROLIFIC_PID", tmp_notNumeric)
questionnaire <- questionnairetype(
dataset = dat,
listvars = vec_ques,
notNumeric = vec_notNumeric,
verbose = FALSE
)
dim(questionnaire)[1] 49 39
### get word list of associations
library(tidyverse)
# First, select only relevant columns
wordlistAssociations <- questionnaire %>%
select(PROLIFIC_PID, starts_with("bioinspired_R"), starts_with("sustainable_R")) %>%
pivot_longer(
cols = starts_with(c("bioinspired_R", "sustainable_R")),
names_to = c("typeWord", "orderAssociation"),
names_pattern = "(bioinspired|sustainable)_R(\\d)",
values_to = "association"
) %>%
mutate(orderAssociation = as.integer(orderAssociation)) %>%
arrange(PROLIFIC_PID, typeWord, orderAssociation)
### get edge list of seen connections
edge_list <- list()
for(i in unique(dat$ID)){
tmp_connections <- dat$drawnConnections[dat$ID == i]
tmp_connections <- tmp_connections[!sapply(tmp_connections, is.null)]
tmp_rational <- questionnaire[questionnaire$ID == i, str_subset(colnames(questionnaire), "rationalCon")]
# Only proceed if there is at least one set of connections
if (length(tmp_connections) > 0 && !is.null(unlist(tmp_connections))) {
# Assuming only one non-null element per ID (if not, loop or bind all)
con_df <- tmp_connections[[1]]
# Flatten tmp_rational into a character vector
rat_vec <- as.character(tmp_rational[1, ])
# Pad rational vector to match the number of rows in con_df
if (length(rat_vec) < nrow(con_df)) {
rat_vec <- c(rat_vec, rep(NA, nrow(con_df) - length(rat_vec)))
} else if (length(rat_vec) > nrow(con_df)) {
rat_vec <- rat_vec[1:nrow(con_df)]
}
con_df$rational <- rat_vec
# Add to result list
edge_list[[as.character(i)]] <- con_df
}
}
# Combine all into a single data frame
edgelistAssociations <- bind_rows(edge_list, .id = "ID")### save files
setwd("outputs")
#> questionnaire
## save as .xlsx file
writexl::write_xlsx(x = questionnaire, path = "questionnaire.xlsx")
## save as R object
saveRDS(questionnaire, file = "questionnaire.rds")
#> wordlistAssociations
## save as .xlsx file
writexl::write_xlsx(x = wordlistAssociations, path = "wordlistAssociations.xlsx")
## save as R object
saveRDS(wordlistAssociations, file = "wordlistAssociations.rds")
#> edgelistAssociations
## save as .xlsx file
writexl::write_xlsx(x = edgelistAssociations, path = "edgelistAssociations.xlsx")
## save as R object
saveRDS(edgelistAssociations, file = "edgelistAssociations.rds")5 show data
DT::datatable(questionnaire, options = list(pageLength = 5))summary(as.numeric(table(edgelistAssociations$ID))) Min. 1st Qu. Median Mean 3rd Qu. Max.
1.000 2.000 4.000 4.022 5.000 11.000
table(edgelistAssociations$ID)
1 10 11 12 14 16 17 18 19 20 21 22 23 24 28 29 3 30 31 32 33 36 37 38 39 4
2 4 5 4 5 5 2 3 8 11 5 4 2 8 6 2 9 5 2 4 3 1 3 1 1 6
40 41 43 45 46 48 49 5 50 51 52 53 54 56 58 59 7 8 9
4 3 6 2 7 2 4 6 1 3 2 5 4 4 2 4 5 3 3
DT::datatable(wordlistAssociations, options = list(pageLength = 5))DT::datatable(edgelistAssociations, options = list(pageLength = 5))